re PR middle-end/23623 (volatile keyword changes bitfield access size from 32bit...
[gcc.git] / gcc / expmed.c
1 /* Medium-level subroutines: convert bit-field store and extract
2 and shifts, multiplies and divides to rtl instructions.
3 Copyright (C) 1987-2013 Free Software Foundation, Inc.
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
11
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
21
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "tm.h"
26 #include "diagnostic-core.h"
27 #include "rtl.h"
28 #include "tree.h"
29 #include "stor-layout.h"
30 #include "tm_p.h"
31 #include "flags.h"
32 #include "insn-config.h"
33 #include "expr.h"
34 #include "optabs.h"
35 #include "recog.h"
36 #include "langhooks.h"
37 #include "df.h"
38 #include "target.h"
39 #include "expmed.h"
40
41 struct target_expmed default_target_expmed;
42 #if SWITCHABLE_TARGET
43 struct target_expmed *this_target_expmed = &default_target_expmed;
44 #endif
45
46 static void store_fixed_bit_field (rtx, unsigned HOST_WIDE_INT,
47 unsigned HOST_WIDE_INT,
48 unsigned HOST_WIDE_INT,
49 unsigned HOST_WIDE_INT,
50 rtx);
51 static void store_split_bit_field (rtx, unsigned HOST_WIDE_INT,
52 unsigned HOST_WIDE_INT,
53 unsigned HOST_WIDE_INT,
54 unsigned HOST_WIDE_INT,
55 rtx);
56 static rtx extract_fixed_bit_field (enum machine_mode, rtx,
57 unsigned HOST_WIDE_INT,
58 unsigned HOST_WIDE_INT, rtx, int);
59 static rtx mask_rtx (enum machine_mode, int, int, int);
60 static rtx lshift_value (enum machine_mode, unsigned HOST_WIDE_INT, int);
61 static rtx extract_split_bit_field (rtx, unsigned HOST_WIDE_INT,
62 unsigned HOST_WIDE_INT, int);
63 static void do_cmp_and_jump (rtx, rtx, enum rtx_code, enum machine_mode, rtx);
64 static rtx expand_smod_pow2 (enum machine_mode, rtx, HOST_WIDE_INT);
65 static rtx expand_sdiv_pow2 (enum machine_mode, rtx, HOST_WIDE_INT);
66
67 /* Test whether a value is zero of a power of two. */
68 #define EXACT_POWER_OF_2_OR_ZERO_P(x) \
69 (((x) & ((x) - (unsigned HOST_WIDE_INT) 1)) == 0)
70
71 struct init_expmed_rtl
72 {
73 struct rtx_def reg;
74 struct rtx_def plus;
75 struct rtx_def neg;
76 struct rtx_def mult;
77 struct rtx_def sdiv;
78 struct rtx_def udiv;
79 struct rtx_def sdiv_32;
80 struct rtx_def smod_32;
81 struct rtx_def wide_mult;
82 struct rtx_def wide_lshr;
83 struct rtx_def wide_trunc;
84 struct rtx_def shift;
85 struct rtx_def shift_mult;
86 struct rtx_def shift_add;
87 struct rtx_def shift_sub0;
88 struct rtx_def shift_sub1;
89 struct rtx_def zext;
90 struct rtx_def trunc;
91
92 rtx pow2[MAX_BITS_PER_WORD];
93 rtx cint[MAX_BITS_PER_WORD];
94 };
95
96 static void
97 init_expmed_one_conv (struct init_expmed_rtl *all, enum machine_mode to_mode,
98 enum machine_mode from_mode, bool speed)
99 {
100 int to_size, from_size;
101 rtx which;
102
103 /* We're given no information about the true size of a partial integer,
104 only the size of the "full" integer it requires for storage. For
105 comparison purposes here, reduce the bit size by one in that case. */
106 to_size = (GET_MODE_BITSIZE (to_mode)
107 - (GET_MODE_CLASS (to_mode) == MODE_PARTIAL_INT));
108 from_size = (GET_MODE_BITSIZE (from_mode)
109 - (GET_MODE_CLASS (from_mode) == MODE_PARTIAL_INT));
110
111 /* Assume cost of zero-extend and sign-extend is the same. */
112 which = (to_size < from_size ? &all->trunc : &all->zext);
113
114 PUT_MODE (&all->reg, from_mode);
115 set_convert_cost (to_mode, from_mode, speed, set_src_cost (which, speed));
116 }
117
118 static void
119 init_expmed_one_mode (struct init_expmed_rtl *all,
120 enum machine_mode mode, int speed)
121 {
122 int m, n, mode_bitsize;
123 enum machine_mode mode_from;
124
125 mode_bitsize = GET_MODE_UNIT_BITSIZE (mode);
126
127 PUT_MODE (&all->reg, mode);
128 PUT_MODE (&all->plus, mode);
129 PUT_MODE (&all->neg, mode);
130 PUT_MODE (&all->mult, mode);
131 PUT_MODE (&all->sdiv, mode);
132 PUT_MODE (&all->udiv, mode);
133 PUT_MODE (&all->sdiv_32, mode);
134 PUT_MODE (&all->smod_32, mode);
135 PUT_MODE (&all->wide_trunc, mode);
136 PUT_MODE (&all->shift, mode);
137 PUT_MODE (&all->shift_mult, mode);
138 PUT_MODE (&all->shift_add, mode);
139 PUT_MODE (&all->shift_sub0, mode);
140 PUT_MODE (&all->shift_sub1, mode);
141 PUT_MODE (&all->zext, mode);
142 PUT_MODE (&all->trunc, mode);
143
144 set_add_cost (speed, mode, set_src_cost (&all->plus, speed));
145 set_neg_cost (speed, mode, set_src_cost (&all->neg, speed));
146 set_mul_cost (speed, mode, set_src_cost (&all->mult, speed));
147 set_sdiv_cost (speed, mode, set_src_cost (&all->sdiv, speed));
148 set_udiv_cost (speed, mode, set_src_cost (&all->udiv, speed));
149
150 set_sdiv_pow2_cheap (speed, mode, (set_src_cost (&all->sdiv_32, speed)
151 <= 2 * add_cost (speed, mode)));
152 set_smod_pow2_cheap (speed, mode, (set_src_cost (&all->smod_32, speed)
153 <= 4 * add_cost (speed, mode)));
154
155 set_shift_cost (speed, mode, 0, 0);
156 {
157 int cost = add_cost (speed, mode);
158 set_shiftadd_cost (speed, mode, 0, cost);
159 set_shiftsub0_cost (speed, mode, 0, cost);
160 set_shiftsub1_cost (speed, mode, 0, cost);
161 }
162
163 n = MIN (MAX_BITS_PER_WORD, mode_bitsize);
164 for (m = 1; m < n; m++)
165 {
166 XEXP (&all->shift, 1) = all->cint[m];
167 XEXP (&all->shift_mult, 1) = all->pow2[m];
168
169 set_shift_cost (speed, mode, m, set_src_cost (&all->shift, speed));
170 set_shiftadd_cost (speed, mode, m, set_src_cost (&all->shift_add, speed));
171 set_shiftsub0_cost (speed, mode, m, set_src_cost (&all->shift_sub0, speed));
172 set_shiftsub1_cost (speed, mode, m, set_src_cost (&all->shift_sub1, speed));
173 }
174
175 if (SCALAR_INT_MODE_P (mode))
176 {
177 for (mode_from = MIN_MODE_INT; mode_from <= MAX_MODE_INT;
178 mode_from = (enum machine_mode)(mode_from + 1))
179 init_expmed_one_conv (all, mode, mode_from, speed);
180 }
181 if (GET_MODE_CLASS (mode) == MODE_INT)
182 {
183 enum machine_mode wider_mode = GET_MODE_WIDER_MODE (mode);
184 if (wider_mode != VOIDmode)
185 {
186 PUT_MODE (&all->zext, wider_mode);
187 PUT_MODE (&all->wide_mult, wider_mode);
188 PUT_MODE (&all->wide_lshr, wider_mode);
189 XEXP (&all->wide_lshr, 1) = GEN_INT (mode_bitsize);
190
191 set_mul_widen_cost (speed, wider_mode,
192 set_src_cost (&all->wide_mult, speed));
193 set_mul_highpart_cost (speed, mode,
194 set_src_cost (&all->wide_trunc, speed));
195 }
196 }
197 }
198
199 void
200 init_expmed (void)
201 {
202 struct init_expmed_rtl all;
203 enum machine_mode mode;
204 int m, speed;
205
206 memset (&all, 0, sizeof all);
207 for (m = 1; m < MAX_BITS_PER_WORD; m++)
208 {
209 all.pow2[m] = GEN_INT ((HOST_WIDE_INT) 1 << m);
210 all.cint[m] = GEN_INT (m);
211 }
212
213 PUT_CODE (&all.reg, REG);
214 /* Avoid using hard regs in ways which may be unsupported. */
215 SET_REGNO (&all.reg, LAST_VIRTUAL_REGISTER + 1);
216
217 PUT_CODE (&all.plus, PLUS);
218 XEXP (&all.plus, 0) = &all.reg;
219 XEXP (&all.plus, 1) = &all.reg;
220
221 PUT_CODE (&all.neg, NEG);
222 XEXP (&all.neg, 0) = &all.reg;
223
224 PUT_CODE (&all.mult, MULT);
225 XEXP (&all.mult, 0) = &all.reg;
226 XEXP (&all.mult, 1) = &all.reg;
227
228 PUT_CODE (&all.sdiv, DIV);
229 XEXP (&all.sdiv, 0) = &all.reg;
230 XEXP (&all.sdiv, 1) = &all.reg;
231
232 PUT_CODE (&all.udiv, UDIV);
233 XEXP (&all.udiv, 0) = &all.reg;
234 XEXP (&all.udiv, 1) = &all.reg;
235
236 PUT_CODE (&all.sdiv_32, DIV);
237 XEXP (&all.sdiv_32, 0) = &all.reg;
238 XEXP (&all.sdiv_32, 1) = 32 < MAX_BITS_PER_WORD ? all.cint[32] : GEN_INT (32);
239
240 PUT_CODE (&all.smod_32, MOD);
241 XEXP (&all.smod_32, 0) = &all.reg;
242 XEXP (&all.smod_32, 1) = XEXP (&all.sdiv_32, 1);
243
244 PUT_CODE (&all.zext, ZERO_EXTEND);
245 XEXP (&all.zext, 0) = &all.reg;
246
247 PUT_CODE (&all.wide_mult, MULT);
248 XEXP (&all.wide_mult, 0) = &all.zext;
249 XEXP (&all.wide_mult, 1) = &all.zext;
250
251 PUT_CODE (&all.wide_lshr, LSHIFTRT);
252 XEXP (&all.wide_lshr, 0) = &all.wide_mult;
253
254 PUT_CODE (&all.wide_trunc, TRUNCATE);
255 XEXP (&all.wide_trunc, 0) = &all.wide_lshr;
256
257 PUT_CODE (&all.shift, ASHIFT);
258 XEXP (&all.shift, 0) = &all.reg;
259
260 PUT_CODE (&all.shift_mult, MULT);
261 XEXP (&all.shift_mult, 0) = &all.reg;
262
263 PUT_CODE (&all.shift_add, PLUS);
264 XEXP (&all.shift_add, 0) = &all.shift_mult;
265 XEXP (&all.shift_add, 1) = &all.reg;
266
267 PUT_CODE (&all.shift_sub0, MINUS);
268 XEXP (&all.shift_sub0, 0) = &all.shift_mult;
269 XEXP (&all.shift_sub0, 1) = &all.reg;
270
271 PUT_CODE (&all.shift_sub1, MINUS);
272 XEXP (&all.shift_sub1, 0) = &all.reg;
273 XEXP (&all.shift_sub1, 1) = &all.shift_mult;
274
275 PUT_CODE (&all.trunc, TRUNCATE);
276 XEXP (&all.trunc, 0) = &all.reg;
277
278 for (speed = 0; speed < 2; speed++)
279 {
280 crtl->maybe_hot_insn_p = speed;
281 set_zero_cost (speed, set_src_cost (const0_rtx, speed));
282
283 for (mode = MIN_MODE_INT; mode <= MAX_MODE_INT;
284 mode = (enum machine_mode)(mode + 1))
285 init_expmed_one_mode (&all, mode, speed);
286
287 if (MIN_MODE_PARTIAL_INT != VOIDmode)
288 for (mode = MIN_MODE_PARTIAL_INT; mode <= MAX_MODE_PARTIAL_INT;
289 mode = (enum machine_mode)(mode + 1))
290 init_expmed_one_mode (&all, mode, speed);
291
292 if (MIN_MODE_VECTOR_INT != VOIDmode)
293 for (mode = MIN_MODE_VECTOR_INT; mode <= MAX_MODE_VECTOR_INT;
294 mode = (enum machine_mode)(mode + 1))
295 init_expmed_one_mode (&all, mode, speed);
296 }
297
298 if (alg_hash_used_p ())
299 {
300 struct alg_hash_entry *p = alg_hash_entry_ptr (0);
301 memset (p, 0, sizeof (*p) * NUM_ALG_HASH_ENTRIES);
302 }
303 else
304 set_alg_hash_used_p (true);
305 default_rtl_profile ();
306 }
307
308 /* Return an rtx representing minus the value of X.
309 MODE is the intended mode of the result,
310 useful if X is a CONST_INT. */
311
312 rtx
313 negate_rtx (enum machine_mode mode, rtx x)
314 {
315 rtx result = simplify_unary_operation (NEG, mode, x, mode);
316
317 if (result == 0)
318 result = expand_unop (mode, neg_optab, x, NULL_RTX, 0);
319
320 return result;
321 }
322
323 /* Adjust bitfield memory MEM so that it points to the first unit of mode
324 MODE that contains a bitfield of size BITSIZE at bit position BITNUM.
325 If MODE is BLKmode, return a reference to every byte in the bitfield.
326 Set *NEW_BITNUM to the bit position of the field within the new memory. */
327
328 static rtx
329 narrow_bit_field_mem (rtx mem, enum machine_mode mode,
330 unsigned HOST_WIDE_INT bitsize,
331 unsigned HOST_WIDE_INT bitnum,
332 unsigned HOST_WIDE_INT *new_bitnum)
333 {
334 if (mode == BLKmode)
335 {
336 *new_bitnum = bitnum % BITS_PER_UNIT;
337 HOST_WIDE_INT offset = bitnum / BITS_PER_UNIT;
338 HOST_WIDE_INT size = ((*new_bitnum + bitsize + BITS_PER_UNIT - 1)
339 / BITS_PER_UNIT);
340 return adjust_bitfield_address_size (mem, mode, offset, size);
341 }
342 else
343 {
344 unsigned int unit = GET_MODE_BITSIZE (mode);
345 *new_bitnum = bitnum % unit;
346 HOST_WIDE_INT offset = (bitnum - *new_bitnum) / BITS_PER_UNIT;
347 return adjust_bitfield_address (mem, mode, offset);
348 }
349 }
350
351 /* The caller wants to perform insertion or extraction PATTERN on a
352 bitfield of size BITSIZE at BITNUM bits into memory operand OP0.
353 BITREGION_START and BITREGION_END are as for store_bit_field
354 and FIELDMODE is the natural mode of the field.
355
356 Search for a mode that is compatible with the memory access
357 restrictions and (where applicable) with a register insertion or
358 extraction. Return the new memory on success, storing the adjusted
359 bit position in *NEW_BITNUM. Return null otherwise. */
360
361 static rtx
362 adjust_bit_field_mem_for_reg (enum extraction_pattern pattern,
363 rtx op0, HOST_WIDE_INT bitsize,
364 HOST_WIDE_INT bitnum,
365 unsigned HOST_WIDE_INT bitregion_start,
366 unsigned HOST_WIDE_INT bitregion_end,
367 enum machine_mode fieldmode,
368 unsigned HOST_WIDE_INT *new_bitnum)
369 {
370 bit_field_mode_iterator iter (bitsize, bitnum, bitregion_start,
371 bitregion_end, MEM_ALIGN (op0),
372 MEM_VOLATILE_P (op0));
373 enum machine_mode best_mode;
374 if (iter.next_mode (&best_mode))
375 {
376 /* We can use a memory in BEST_MODE. See whether this is true for
377 any wider modes. All other things being equal, we prefer to
378 use the widest mode possible because it tends to expose more
379 CSE opportunities. */
380 if (!iter.prefer_smaller_modes ())
381 {
382 /* Limit the search to the mode required by the corresponding
383 register insertion or extraction instruction, if any. */
384 enum machine_mode limit_mode = word_mode;
385 extraction_insn insn;
386 if (get_best_reg_extraction_insn (&insn, pattern,
387 GET_MODE_BITSIZE (best_mode),
388 fieldmode))
389 limit_mode = insn.field_mode;
390
391 enum machine_mode wider_mode;
392 while (iter.next_mode (&wider_mode)
393 && GET_MODE_SIZE (wider_mode) <= GET_MODE_SIZE (limit_mode))
394 best_mode = wider_mode;
395 }
396 return narrow_bit_field_mem (op0, best_mode, bitsize, bitnum,
397 new_bitnum);
398 }
399 return NULL_RTX;
400 }
401
402 /* Return true if a bitfield of size BITSIZE at bit number BITNUM within
403 a structure of mode STRUCT_MODE represents a lowpart subreg. The subreg
404 offset is then BITNUM / BITS_PER_UNIT. */
405
406 static bool
407 lowpart_bit_field_p (unsigned HOST_WIDE_INT bitnum,
408 unsigned HOST_WIDE_INT bitsize,
409 enum machine_mode struct_mode)
410 {
411 if (BYTES_BIG_ENDIAN)
412 return (bitnum % BITS_PER_UNIT == 0
413 && (bitnum + bitsize == GET_MODE_BITSIZE (struct_mode)
414 || (bitnum + bitsize) % BITS_PER_WORD == 0));
415 else
416 return bitnum % BITS_PER_WORD == 0;
417 }
418
419 /* Return true if -fstrict-volatile-bitfields applies an access of OP0
420 containing BITSIZE bits starting at BITNUM, with field mode FIELDMODE. */
421
422 static bool
423 strict_volatile_bitfield_p (rtx op0, unsigned HOST_WIDE_INT bitsize,
424 unsigned HOST_WIDE_INT bitnum,
425 enum machine_mode fieldmode)
426 {
427 unsigned HOST_WIDE_INT modesize = GET_MODE_BITSIZE (fieldmode);
428
429 /* -fstrict-volatile-bitfields must be enabled and we must have a
430 volatile MEM. */
431 if (!MEM_P (op0)
432 || !MEM_VOLATILE_P (op0)
433 || flag_strict_volatile_bitfields <= 0)
434 return false;
435
436 /* Non-integral modes likely only happen with packed structures.
437 Punt. */
438 if (!SCALAR_INT_MODE_P (fieldmode))
439 return false;
440
441 /* The bit size must not be larger than the field mode, and
442 the field mode must not be larger than a word. */
443 if (bitsize > modesize || modesize > BITS_PER_WORD)
444 return false;
445
446 /* Check for cases of unaligned fields that must be split. */
447 if (bitnum % BITS_PER_UNIT + bitsize > modesize
448 || (STRICT_ALIGNMENT
449 && bitnum % GET_MODE_ALIGNMENT (fieldmode) + bitsize > modesize))
450 return false;
451
452 return true;
453 }
454
455 /* Return true if OP is a memory and if a bitfield of size BITSIZE at
456 bit number BITNUM can be treated as a simple value of mode MODE. */
457
458 static bool
459 simple_mem_bitfield_p (rtx op0, unsigned HOST_WIDE_INT bitsize,
460 unsigned HOST_WIDE_INT bitnum, enum machine_mode mode)
461 {
462 return (MEM_P (op0)
463 && bitnum % BITS_PER_UNIT == 0
464 && bitsize == GET_MODE_BITSIZE (mode)
465 && (!SLOW_UNALIGNED_ACCESS (mode, MEM_ALIGN (op0))
466 || (bitnum % GET_MODE_ALIGNMENT (mode) == 0
467 && MEM_ALIGN (op0) >= GET_MODE_ALIGNMENT (mode))));
468 }
469 \f
470 /* Try to use instruction INSV to store VALUE into a field of OP0.
471 BITSIZE and BITNUM are as for store_bit_field. */
472
473 static bool
474 store_bit_field_using_insv (const extraction_insn *insv, rtx op0,
475 unsigned HOST_WIDE_INT bitsize,
476 unsigned HOST_WIDE_INT bitnum, rtx value)
477 {
478 struct expand_operand ops[4];
479 rtx value1;
480 rtx xop0 = op0;
481 rtx last = get_last_insn ();
482 bool copy_back = false;
483
484 enum machine_mode op_mode = insv->field_mode;
485 unsigned int unit = GET_MODE_BITSIZE (op_mode);
486 if (bitsize == 0 || bitsize > unit)
487 return false;
488
489 if (MEM_P (xop0))
490 /* Get a reference to the first byte of the field. */
491 xop0 = narrow_bit_field_mem (xop0, insv->struct_mode, bitsize, bitnum,
492 &bitnum);
493 else
494 {
495 /* Convert from counting within OP0 to counting in OP_MODE. */
496 if (BYTES_BIG_ENDIAN)
497 bitnum += unit - GET_MODE_BITSIZE (GET_MODE (op0));
498
499 /* If xop0 is a register, we need it in OP_MODE
500 to make it acceptable to the format of insv. */
501 if (GET_CODE (xop0) == SUBREG)
502 /* We can't just change the mode, because this might clobber op0,
503 and we will need the original value of op0 if insv fails. */
504 xop0 = gen_rtx_SUBREG (op_mode, SUBREG_REG (xop0), SUBREG_BYTE (xop0));
505 if (REG_P (xop0) && GET_MODE (xop0) != op_mode)
506 xop0 = gen_lowpart_SUBREG (op_mode, xop0);
507 }
508
509 /* If the destination is a paradoxical subreg such that we need a
510 truncate to the inner mode, perform the insertion on a temporary and
511 truncate the result to the original destination. Note that we can't
512 just truncate the paradoxical subreg as (truncate:N (subreg:W (reg:N
513 X) 0)) is (reg:N X). */
514 if (GET_CODE (xop0) == SUBREG
515 && REG_P (SUBREG_REG (xop0))
516 && !TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (SUBREG_REG (xop0)),
517 op_mode))
518 {
519 rtx tem = gen_reg_rtx (op_mode);
520 emit_move_insn (tem, xop0);
521 xop0 = tem;
522 copy_back = true;
523 }
524
525 /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
526 "backwards" from the size of the unit we are inserting into.
527 Otherwise, we count bits from the most significant on a
528 BYTES/BITS_BIG_ENDIAN machine. */
529
530 if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
531 bitnum = unit - bitsize - bitnum;
532
533 /* Convert VALUE to op_mode (which insv insn wants) in VALUE1. */
534 value1 = value;
535 if (GET_MODE (value) != op_mode)
536 {
537 if (GET_MODE_BITSIZE (GET_MODE (value)) >= bitsize)
538 {
539 /* Optimization: Don't bother really extending VALUE
540 if it has all the bits we will actually use. However,
541 if we must narrow it, be sure we do it correctly. */
542
543 if (GET_MODE_SIZE (GET_MODE (value)) < GET_MODE_SIZE (op_mode))
544 {
545 rtx tmp;
546
547 tmp = simplify_subreg (op_mode, value1, GET_MODE (value), 0);
548 if (! tmp)
549 tmp = simplify_gen_subreg (op_mode,
550 force_reg (GET_MODE (value),
551 value1),
552 GET_MODE (value), 0);
553 value1 = tmp;
554 }
555 else
556 value1 = gen_lowpart (op_mode, value1);
557 }
558 else if (CONST_INT_P (value))
559 value1 = gen_int_mode (INTVAL (value), op_mode);
560 else
561 /* Parse phase is supposed to make VALUE's data type
562 match that of the component reference, which is a type
563 at least as wide as the field; so VALUE should have
564 a mode that corresponds to that type. */
565 gcc_assert (CONSTANT_P (value));
566 }
567
568 create_fixed_operand (&ops[0], xop0);
569 create_integer_operand (&ops[1], bitsize);
570 create_integer_operand (&ops[2], bitnum);
571 create_input_operand (&ops[3], value1, op_mode);
572 if (maybe_expand_insn (insv->icode, 4, ops))
573 {
574 if (copy_back)
575 convert_move (op0, xop0, true);
576 return true;
577 }
578 delete_insns_since (last);
579 return false;
580 }
581
582 /* A subroutine of store_bit_field, with the same arguments. Return true
583 if the operation could be implemented.
584
585 If FALLBACK_P is true, fall back to store_fixed_bit_field if we have
586 no other way of implementing the operation. If FALLBACK_P is false,
587 return false instead. */
588
589 static bool
590 store_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
591 unsigned HOST_WIDE_INT bitnum,
592 unsigned HOST_WIDE_INT bitregion_start,
593 unsigned HOST_WIDE_INT bitregion_end,
594 enum machine_mode fieldmode,
595 rtx value, bool fallback_p)
596 {
597 rtx op0 = str_rtx;
598 rtx orig_value;
599
600 while (GET_CODE (op0) == SUBREG)
601 {
602 /* The following line once was done only if WORDS_BIG_ENDIAN,
603 but I think that is a mistake. WORDS_BIG_ENDIAN is
604 meaningful at a much higher level; when structures are copied
605 between memory and regs, the higher-numbered regs
606 always get higher addresses. */
607 int inner_mode_size = GET_MODE_SIZE (GET_MODE (SUBREG_REG (op0)));
608 int outer_mode_size = GET_MODE_SIZE (GET_MODE (op0));
609 int byte_offset = 0;
610
611 /* Paradoxical subregs need special handling on big endian machines. */
612 if (SUBREG_BYTE (op0) == 0 && inner_mode_size < outer_mode_size)
613 {
614 int difference = inner_mode_size - outer_mode_size;
615
616 if (WORDS_BIG_ENDIAN)
617 byte_offset += (difference / UNITS_PER_WORD) * UNITS_PER_WORD;
618 if (BYTES_BIG_ENDIAN)
619 byte_offset += difference % UNITS_PER_WORD;
620 }
621 else
622 byte_offset = SUBREG_BYTE (op0);
623
624 bitnum += byte_offset * BITS_PER_UNIT;
625 op0 = SUBREG_REG (op0);
626 }
627
628 /* No action is needed if the target is a register and if the field
629 lies completely outside that register. This can occur if the source
630 code contains an out-of-bounds access to a small array. */
631 if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
632 return true;
633
634 /* Use vec_set patterns for inserting parts of vectors whenever
635 available. */
636 if (VECTOR_MODE_P (GET_MODE (op0))
637 && !MEM_P (op0)
638 && optab_handler (vec_set_optab, GET_MODE (op0)) != CODE_FOR_nothing
639 && fieldmode == GET_MODE_INNER (GET_MODE (op0))
640 && bitsize == GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
641 && !(bitnum % GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
642 {
643 struct expand_operand ops[3];
644 enum machine_mode outermode = GET_MODE (op0);
645 enum machine_mode innermode = GET_MODE_INNER (outermode);
646 enum insn_code icode = optab_handler (vec_set_optab, outermode);
647 int pos = bitnum / GET_MODE_BITSIZE (innermode);
648
649 create_fixed_operand (&ops[0], op0);
650 create_input_operand (&ops[1], value, innermode);
651 create_integer_operand (&ops[2], pos);
652 if (maybe_expand_insn (icode, 3, ops))
653 return true;
654 }
655
656 /* If the target is a register, overwriting the entire object, or storing
657 a full-word or multi-word field can be done with just a SUBREG. */
658 if (!MEM_P (op0)
659 && bitsize == GET_MODE_BITSIZE (fieldmode)
660 && ((bitsize == GET_MODE_BITSIZE (GET_MODE (op0)) && bitnum == 0)
661 || (bitsize % BITS_PER_WORD == 0 && bitnum % BITS_PER_WORD == 0)))
662 {
663 /* Use the subreg machinery either to narrow OP0 to the required
664 words or to cope with mode punning between equal-sized modes.
665 In the latter case, use subreg on the rhs side, not lhs. */
666 rtx sub;
667
668 if (bitsize == GET_MODE_BITSIZE (GET_MODE (op0)))
669 {
670 sub = simplify_gen_subreg (GET_MODE (op0), value, fieldmode, 0);
671 if (sub)
672 {
673 emit_move_insn (op0, sub);
674 return true;
675 }
676 }
677 else
678 {
679 sub = simplify_gen_subreg (fieldmode, op0, GET_MODE (op0),
680 bitnum / BITS_PER_UNIT);
681 if (sub)
682 {
683 emit_move_insn (sub, value);
684 return true;
685 }
686 }
687 }
688
689 /* If the target is memory, storing any naturally aligned field can be
690 done with a simple store. For targets that support fast unaligned
691 memory, any naturally sized, unit aligned field can be done directly. */
692 if (simple_mem_bitfield_p (op0, bitsize, bitnum, fieldmode))
693 {
694 op0 = adjust_bitfield_address (op0, fieldmode, bitnum / BITS_PER_UNIT);
695 emit_move_insn (op0, value);
696 return true;
697 }
698
699 /* Make sure we are playing with integral modes. Pun with subregs
700 if we aren't. This must come after the entire register case above,
701 since that case is valid for any mode. The following cases are only
702 valid for integral modes. */
703 {
704 enum machine_mode imode = int_mode_for_mode (GET_MODE (op0));
705 if (imode != GET_MODE (op0))
706 {
707 if (MEM_P (op0))
708 op0 = adjust_bitfield_address_size (op0, imode, 0, MEM_SIZE (op0));
709 else
710 {
711 gcc_assert (imode != BLKmode);
712 op0 = gen_lowpart (imode, op0);
713 }
714 }
715 }
716
717 /* Storing an lsb-aligned field in a register
718 can be done with a movstrict instruction. */
719
720 if (!MEM_P (op0)
721 && lowpart_bit_field_p (bitnum, bitsize, GET_MODE (op0))
722 && bitsize == GET_MODE_BITSIZE (fieldmode)
723 && optab_handler (movstrict_optab, fieldmode) != CODE_FOR_nothing)
724 {
725 struct expand_operand ops[2];
726 enum insn_code icode = optab_handler (movstrict_optab, fieldmode);
727 rtx arg0 = op0;
728 unsigned HOST_WIDE_INT subreg_off;
729
730 if (GET_CODE (arg0) == SUBREG)
731 {
732 /* Else we've got some float mode source being extracted into
733 a different float mode destination -- this combination of
734 subregs results in Severe Tire Damage. */
735 gcc_assert (GET_MODE (SUBREG_REG (arg0)) == fieldmode
736 || GET_MODE_CLASS (fieldmode) == MODE_INT
737 || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT);
738 arg0 = SUBREG_REG (arg0);
739 }
740
741 subreg_off = bitnum / BITS_PER_UNIT;
742 if (validate_subreg (fieldmode, GET_MODE (arg0), arg0, subreg_off))
743 {
744 arg0 = gen_rtx_SUBREG (fieldmode, arg0, subreg_off);
745
746 create_fixed_operand (&ops[0], arg0);
747 /* Shrink the source operand to FIELDMODE. */
748 create_convert_operand_to (&ops[1], value, fieldmode, false);
749 if (maybe_expand_insn (icode, 2, ops))
750 return true;
751 }
752 }
753
754 /* Handle fields bigger than a word. */
755
756 if (bitsize > BITS_PER_WORD)
757 {
758 /* Here we transfer the words of the field
759 in the order least significant first.
760 This is because the most significant word is the one which may
761 be less than full.
762 However, only do that if the value is not BLKmode. */
763
764 unsigned int backwards = WORDS_BIG_ENDIAN && fieldmode != BLKmode;
765 unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
766 unsigned int i;
767 rtx last;
768
769 /* This is the mode we must force value to, so that there will be enough
770 subwords to extract. Note that fieldmode will often (always?) be
771 VOIDmode, because that is what store_field uses to indicate that this
772 is a bit field, but passing VOIDmode to operand_subword_force
773 is not allowed. */
774 fieldmode = GET_MODE (value);
775 if (fieldmode == VOIDmode)
776 fieldmode = smallest_mode_for_size (nwords * BITS_PER_WORD, MODE_INT);
777
778 last = get_last_insn ();
779 for (i = 0; i < nwords; i++)
780 {
781 /* If I is 0, use the low-order word in both field and target;
782 if I is 1, use the next to lowest word; and so on. */
783 unsigned int wordnum = (backwards
784 ? GET_MODE_SIZE (fieldmode) / UNITS_PER_WORD
785 - i - 1
786 : i);
787 unsigned int bit_offset = (backwards
788 ? MAX ((int) bitsize - ((int) i + 1)
789 * BITS_PER_WORD,
790 0)
791 : (int) i * BITS_PER_WORD);
792 rtx value_word = operand_subword_force (value, wordnum, fieldmode);
793 unsigned HOST_WIDE_INT new_bitsize =
794 MIN (BITS_PER_WORD, bitsize - i * BITS_PER_WORD);
795
796 /* If the remaining chunk doesn't have full wordsize we have
797 to make sure that for big endian machines the higher order
798 bits are used. */
799 if (new_bitsize < BITS_PER_WORD && BYTES_BIG_ENDIAN && !backwards)
800 value_word = simplify_expand_binop (word_mode, lshr_optab,
801 value_word,
802 GEN_INT (BITS_PER_WORD
803 - new_bitsize),
804 NULL_RTX, true,
805 OPTAB_LIB_WIDEN);
806
807 if (!store_bit_field_1 (op0, new_bitsize,
808 bitnum + bit_offset,
809 bitregion_start, bitregion_end,
810 word_mode,
811 value_word, fallback_p))
812 {
813 delete_insns_since (last);
814 return false;
815 }
816 }
817 return true;
818 }
819
820 /* If VALUE has a floating-point or complex mode, access it as an
821 integer of the corresponding size. This can occur on a machine
822 with 64 bit registers that uses SFmode for float. It can also
823 occur for unaligned float or complex fields. */
824 orig_value = value;
825 if (GET_MODE (value) != VOIDmode
826 && GET_MODE_CLASS (GET_MODE (value)) != MODE_INT
827 && GET_MODE_CLASS (GET_MODE (value)) != MODE_PARTIAL_INT)
828 {
829 value = gen_reg_rtx (int_mode_for_mode (GET_MODE (value)));
830 emit_move_insn (gen_lowpart (GET_MODE (orig_value), value), orig_value);
831 }
832
833 /* If OP0 is a multi-word register, narrow it to the affected word.
834 If the region spans two words, defer to store_split_bit_field. */
835 if (!MEM_P (op0) && GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
836 {
837 op0 = simplify_gen_subreg (word_mode, op0, GET_MODE (op0),
838 bitnum / BITS_PER_WORD * UNITS_PER_WORD);
839 gcc_assert (op0);
840 bitnum %= BITS_PER_WORD;
841 if (bitnum + bitsize > BITS_PER_WORD)
842 {
843 if (!fallback_p)
844 return false;
845
846 store_split_bit_field (op0, bitsize, bitnum, bitregion_start,
847 bitregion_end, value);
848 return true;
849 }
850 }
851
852 /* From here on we can assume that the field to be stored in fits
853 within a word. If the destination is a register, it too fits
854 in a word. */
855
856 extraction_insn insv;
857 if (!MEM_P (op0)
858 && get_best_reg_extraction_insn (&insv, EP_insv,
859 GET_MODE_BITSIZE (GET_MODE (op0)),
860 fieldmode)
861 && store_bit_field_using_insv (&insv, op0, bitsize, bitnum, value))
862 return true;
863
864 /* If OP0 is a memory, try copying it to a register and seeing if a
865 cheap register alternative is available. */
866 if (MEM_P (op0))
867 {
868 if (get_best_mem_extraction_insn (&insv, EP_insv, bitsize, bitnum,
869 fieldmode)
870 && store_bit_field_using_insv (&insv, op0, bitsize, bitnum, value))
871 return true;
872
873 rtx last = get_last_insn ();
874
875 /* Try loading part of OP0 into a register, inserting the bitfield
876 into that, and then copying the result back to OP0. */
877 unsigned HOST_WIDE_INT bitpos;
878 rtx xop0 = adjust_bit_field_mem_for_reg (EP_insv, op0, bitsize, bitnum,
879 bitregion_start, bitregion_end,
880 fieldmode, &bitpos);
881 if (xop0)
882 {
883 rtx tempreg = copy_to_reg (xop0);
884 if (store_bit_field_1 (tempreg, bitsize, bitpos,
885 bitregion_start, bitregion_end,
886 fieldmode, orig_value, false))
887 {
888 emit_move_insn (xop0, tempreg);
889 return true;
890 }
891 delete_insns_since (last);
892 }
893 }
894
895 if (!fallback_p)
896 return false;
897
898 store_fixed_bit_field (op0, bitsize, bitnum, bitregion_start,
899 bitregion_end, value);
900 return true;
901 }
902
903 /* Generate code to store value from rtx VALUE
904 into a bit-field within structure STR_RTX
905 containing BITSIZE bits starting at bit BITNUM.
906
907 BITREGION_START is bitpos of the first bitfield in this region.
908 BITREGION_END is the bitpos of the ending bitfield in this region.
909 These two fields are 0, if the C++ memory model does not apply,
910 or we are not interested in keeping track of bitfield regions.
911
912 FIELDMODE is the machine-mode of the FIELD_DECL node for this field. */
913
914 void
915 store_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
916 unsigned HOST_WIDE_INT bitnum,
917 unsigned HOST_WIDE_INT bitregion_start,
918 unsigned HOST_WIDE_INT bitregion_end,
919 enum machine_mode fieldmode,
920 rtx value)
921 {
922 /* Handle -fstrict-volatile-bitfields in the cases where it applies. */
923 if (strict_volatile_bitfield_p (str_rtx, bitsize, bitnum, fieldmode))
924 {
925
926 /* Storing any naturally aligned field can be done with a simple
927 store. For targets that support fast unaligned memory, any
928 naturally sized, unit aligned field can be done directly. */
929 if (simple_mem_bitfield_p (str_rtx, bitsize, bitnum, fieldmode))
930 {
931 str_rtx = adjust_bitfield_address (str_rtx, fieldmode,
932 bitnum / BITS_PER_UNIT);
933 emit_move_insn (str_rtx, value);
934 }
935 else
936 /* Explicitly override the C/C++ memory model; ignore the
937 bit range so that we can do the access in the mode mandated
938 by -fstrict-volatile-bitfields instead. */
939 store_fixed_bit_field (str_rtx, bitsize, bitnum, 0, 0, value);
940 return;
941 }
942
943 /* Under the C++0x memory model, we must not touch bits outside the
944 bit region. Adjust the address to start at the beginning of the
945 bit region. */
946 if (MEM_P (str_rtx) && bitregion_start > 0)
947 {
948 enum machine_mode bestmode;
949 HOST_WIDE_INT offset, size;
950
951 gcc_assert ((bitregion_start % BITS_PER_UNIT) == 0);
952
953 offset = bitregion_start / BITS_PER_UNIT;
954 bitnum -= bitregion_start;
955 size = (bitnum + bitsize + BITS_PER_UNIT - 1) / BITS_PER_UNIT;
956 bitregion_end -= bitregion_start;
957 bitregion_start = 0;
958 bestmode = get_best_mode (bitsize, bitnum,
959 bitregion_start, bitregion_end,
960 MEM_ALIGN (str_rtx), VOIDmode,
961 MEM_VOLATILE_P (str_rtx));
962 str_rtx = adjust_bitfield_address_size (str_rtx, bestmode, offset, size);
963 }
964
965 if (!store_bit_field_1 (str_rtx, bitsize, bitnum,
966 bitregion_start, bitregion_end,
967 fieldmode, value, true))
968 gcc_unreachable ();
969 }
970 \f
971 /* Use shifts and boolean operations to store VALUE into a bit field of
972 width BITSIZE in OP0, starting at bit BITNUM. */
973
974 static void
975 store_fixed_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
976 unsigned HOST_WIDE_INT bitnum,
977 unsigned HOST_WIDE_INT bitregion_start,
978 unsigned HOST_WIDE_INT bitregion_end,
979 rtx value)
980 {
981 enum machine_mode mode;
982 rtx temp;
983 int all_zero = 0;
984 int all_one = 0;
985
986 /* There is a case not handled here:
987 a structure with a known alignment of just a halfword
988 and a field split across two aligned halfwords within the structure.
989 Or likewise a structure with a known alignment of just a byte
990 and a field split across two bytes.
991 Such cases are not supposed to be able to occur. */
992
993 if (MEM_P (op0))
994 {
995 mode = GET_MODE (op0);
996 if (GET_MODE_BITSIZE (mode) == 0
997 || GET_MODE_BITSIZE (mode) > GET_MODE_BITSIZE (word_mode))
998 mode = word_mode;
999 mode = get_best_mode (bitsize, bitnum, bitregion_start, bitregion_end,
1000 MEM_ALIGN (op0), mode, MEM_VOLATILE_P (op0));
1001
1002 if (mode == VOIDmode)
1003 {
1004 /* The only way this should occur is if the field spans word
1005 boundaries. */
1006 store_split_bit_field (op0, bitsize, bitnum, bitregion_start,
1007 bitregion_end, value);
1008 return;
1009 }
1010
1011 op0 = narrow_bit_field_mem (op0, mode, bitsize, bitnum, &bitnum);
1012 }
1013
1014 mode = GET_MODE (op0);
1015 gcc_assert (SCALAR_INT_MODE_P (mode));
1016
1017 /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode)
1018 for invalid input, such as f5 from gcc.dg/pr48335-2.c. */
1019
1020 if (BYTES_BIG_ENDIAN)
1021 /* BITNUM is the distance between our msb
1022 and that of the containing datum.
1023 Convert it to the distance from the lsb. */
1024 bitnum = GET_MODE_BITSIZE (mode) - bitsize - bitnum;
1025
1026 /* Now BITNUM is always the distance between our lsb
1027 and that of OP0. */
1028
1029 /* Shift VALUE left by BITNUM bits. If VALUE is not constant,
1030 we must first convert its mode to MODE. */
1031
1032 if (CONST_INT_P (value))
1033 {
1034 HOST_WIDE_INT v = INTVAL (value);
1035
1036 if (bitsize < HOST_BITS_PER_WIDE_INT)
1037 v &= ((HOST_WIDE_INT) 1 << bitsize) - 1;
1038
1039 if (v == 0)
1040 all_zero = 1;
1041 else if ((bitsize < HOST_BITS_PER_WIDE_INT
1042 && v == ((HOST_WIDE_INT) 1 << bitsize) - 1)
1043 || (bitsize == HOST_BITS_PER_WIDE_INT && v == -1))
1044 all_one = 1;
1045
1046 value = lshift_value (mode, v, bitnum);
1047 }
1048 else
1049 {
1050 int must_and = (GET_MODE_BITSIZE (GET_MODE (value)) != bitsize
1051 && bitnum + bitsize != GET_MODE_BITSIZE (mode));
1052
1053 if (GET_MODE (value) != mode)
1054 value = convert_to_mode (mode, value, 1);
1055
1056 if (must_and)
1057 value = expand_binop (mode, and_optab, value,
1058 mask_rtx (mode, 0, bitsize, 0),
1059 NULL_RTX, 1, OPTAB_LIB_WIDEN);
1060 if (bitnum > 0)
1061 value = expand_shift (LSHIFT_EXPR, mode, value,
1062 bitnum, NULL_RTX, 1);
1063 }
1064
1065 /* Now clear the chosen bits in OP0,
1066 except that if VALUE is -1 we need not bother. */
1067 /* We keep the intermediates in registers to allow CSE to combine
1068 consecutive bitfield assignments. */
1069
1070 temp = force_reg (mode, op0);
1071
1072 if (! all_one)
1073 {
1074 temp = expand_binop (mode, and_optab, temp,
1075 mask_rtx (mode, bitnum, bitsize, 1),
1076 NULL_RTX, 1, OPTAB_LIB_WIDEN);
1077 temp = force_reg (mode, temp);
1078 }
1079
1080 /* Now logical-or VALUE into OP0, unless it is zero. */
1081
1082 if (! all_zero)
1083 {
1084 temp = expand_binop (mode, ior_optab, temp, value,
1085 NULL_RTX, 1, OPTAB_LIB_WIDEN);
1086 temp = force_reg (mode, temp);
1087 }
1088
1089 if (op0 != temp)
1090 {
1091 op0 = copy_rtx (op0);
1092 emit_move_insn (op0, temp);
1093 }
1094 }
1095 \f
1096 /* Store a bit field that is split across multiple accessible memory objects.
1097
1098 OP0 is the REG, SUBREG or MEM rtx for the first of the objects.
1099 BITSIZE is the field width; BITPOS the position of its first bit
1100 (within the word).
1101 VALUE is the value to store.
1102
1103 This does not yet handle fields wider than BITS_PER_WORD. */
1104
1105 static void
1106 store_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1107 unsigned HOST_WIDE_INT bitpos,
1108 unsigned HOST_WIDE_INT bitregion_start,
1109 unsigned HOST_WIDE_INT bitregion_end,
1110 rtx value)
1111 {
1112 unsigned int unit;
1113 unsigned int bitsdone = 0;
1114
1115 /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1116 much at a time. */
1117 if (REG_P (op0) || GET_CODE (op0) == SUBREG)
1118 unit = BITS_PER_WORD;
1119 else
1120 unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1121
1122 /* If VALUE is a constant other than a CONST_INT, get it into a register in
1123 WORD_MODE. If we can do this using gen_lowpart_common, do so. Note
1124 that VALUE might be a floating-point constant. */
1125 if (CONSTANT_P (value) && !CONST_INT_P (value))
1126 {
1127 rtx word = gen_lowpart_common (word_mode, value);
1128
1129 if (word && (value != word))
1130 value = word;
1131 else
1132 value = gen_lowpart_common (word_mode,
1133 force_reg (GET_MODE (value) != VOIDmode
1134 ? GET_MODE (value)
1135 : word_mode, value));
1136 }
1137
1138 while (bitsdone < bitsize)
1139 {
1140 unsigned HOST_WIDE_INT thissize;
1141 rtx part, word;
1142 unsigned HOST_WIDE_INT thispos;
1143 unsigned HOST_WIDE_INT offset;
1144
1145 offset = (bitpos + bitsdone) / unit;
1146 thispos = (bitpos + bitsdone) % unit;
1147
1148 /* When region of bytes we can touch is restricted, decrease
1149 UNIT close to the end of the region as needed. If op0 is a REG
1150 or SUBREG of REG, don't do this, as there can't be data races
1151 on a register and we can expand shorter code in some cases. */
1152 if (bitregion_end
1153 && unit > BITS_PER_UNIT
1154 && bitpos + bitsdone - thispos + unit > bitregion_end + 1
1155 && !REG_P (op0)
1156 && (GET_CODE (op0) != SUBREG || !REG_P (SUBREG_REG (op0))))
1157 {
1158 unit = unit / 2;
1159 continue;
1160 }
1161
1162 /* THISSIZE must not overrun a word boundary. Otherwise,
1163 store_fixed_bit_field will call us again, and we will mutually
1164 recurse forever. */
1165 thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1166 thissize = MIN (thissize, unit - thispos);
1167
1168 if (BYTES_BIG_ENDIAN)
1169 {
1170 /* Fetch successively less significant portions. */
1171 if (CONST_INT_P (value))
1172 part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1173 >> (bitsize - bitsdone - thissize))
1174 & (((HOST_WIDE_INT) 1 << thissize) - 1));
1175 else
1176 {
1177 int total_bits = GET_MODE_BITSIZE (GET_MODE (value));
1178 /* The args are chosen so that the last part includes the
1179 lsb. Give extract_bit_field the value it needs (with
1180 endianness compensation) to fetch the piece we want. */
1181 part = extract_fixed_bit_field (word_mode, value, thissize,
1182 total_bits - bitsize + bitsdone,
1183 NULL_RTX, 1);
1184 }
1185 }
1186 else
1187 {
1188 /* Fetch successively more significant portions. */
1189 if (CONST_INT_P (value))
1190 part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1191 >> bitsdone)
1192 & (((HOST_WIDE_INT) 1 << thissize) - 1));
1193 else
1194 part = extract_fixed_bit_field (word_mode, value, thissize,
1195 bitsdone, NULL_RTX, 1);
1196 }
1197
1198 /* If OP0 is a register, then handle OFFSET here.
1199
1200 When handling multiword bitfields, extract_bit_field may pass
1201 down a word_mode SUBREG of a larger REG for a bitfield that actually
1202 crosses a word boundary. Thus, for a SUBREG, we must find
1203 the current word starting from the base register. */
1204 if (GET_CODE (op0) == SUBREG)
1205 {
1206 int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD)
1207 + (offset * unit / BITS_PER_WORD);
1208 enum machine_mode sub_mode = GET_MODE (SUBREG_REG (op0));
1209 if (sub_mode != BLKmode && GET_MODE_SIZE (sub_mode) < UNITS_PER_WORD)
1210 word = word_offset ? const0_rtx : op0;
1211 else
1212 word = operand_subword_force (SUBREG_REG (op0), word_offset,
1213 GET_MODE (SUBREG_REG (op0)));
1214 offset &= BITS_PER_WORD / unit - 1;
1215 }
1216 else if (REG_P (op0))
1217 {
1218 enum machine_mode op0_mode = GET_MODE (op0);
1219 if (op0_mode != BLKmode && GET_MODE_SIZE (op0_mode) < UNITS_PER_WORD)
1220 word = offset ? const0_rtx : op0;
1221 else
1222 word = operand_subword_force (op0, offset * unit / BITS_PER_WORD,
1223 GET_MODE (op0));
1224 offset &= BITS_PER_WORD / unit - 1;
1225 }
1226 else
1227 word = op0;
1228
1229 /* OFFSET is in UNITs, and UNIT is in bits. If WORD is const0_rtx,
1230 it is just an out-of-bounds access. Ignore it. */
1231 if (word != const0_rtx)
1232 store_fixed_bit_field (word, thissize, offset * unit + thispos,
1233 bitregion_start, bitregion_end, part);
1234 bitsdone += thissize;
1235 }
1236 }
1237 \f
1238 /* A subroutine of extract_bit_field_1 that converts return value X
1239 to either MODE or TMODE. MODE, TMODE and UNSIGNEDP are arguments
1240 to extract_bit_field. */
1241
1242 static rtx
1243 convert_extracted_bit_field (rtx x, enum machine_mode mode,
1244 enum machine_mode tmode, bool unsignedp)
1245 {
1246 if (GET_MODE (x) == tmode || GET_MODE (x) == mode)
1247 return x;
1248
1249 /* If the x mode is not a scalar integral, first convert to the
1250 integer mode of that size and then access it as a floating-point
1251 value via a SUBREG. */
1252 if (!SCALAR_INT_MODE_P (tmode))
1253 {
1254 enum machine_mode smode;
1255
1256 smode = mode_for_size (GET_MODE_BITSIZE (tmode), MODE_INT, 0);
1257 x = convert_to_mode (smode, x, unsignedp);
1258 x = force_reg (smode, x);
1259 return gen_lowpart (tmode, x);
1260 }
1261
1262 return convert_to_mode (tmode, x, unsignedp);
1263 }
1264
1265 /* Try to use an ext(z)v pattern to extract a field from OP0.
1266 Return the extracted value on success, otherwise return null.
1267 EXT_MODE is the mode of the extraction and the other arguments
1268 are as for extract_bit_field. */
1269
1270 static rtx
1271 extract_bit_field_using_extv (const extraction_insn *extv, rtx op0,
1272 unsigned HOST_WIDE_INT bitsize,
1273 unsigned HOST_WIDE_INT bitnum,
1274 int unsignedp, rtx target,
1275 enum machine_mode mode, enum machine_mode tmode)
1276 {
1277 struct expand_operand ops[4];
1278 rtx spec_target = target;
1279 rtx spec_target_subreg = 0;
1280 enum machine_mode ext_mode = extv->field_mode;
1281 unsigned unit = GET_MODE_BITSIZE (ext_mode);
1282
1283 if (bitsize == 0 || unit < bitsize)
1284 return NULL_RTX;
1285
1286 if (MEM_P (op0))
1287 /* Get a reference to the first byte of the field. */
1288 op0 = narrow_bit_field_mem (op0, extv->struct_mode, bitsize, bitnum,
1289 &bitnum);
1290 else
1291 {
1292 /* Convert from counting within OP0 to counting in EXT_MODE. */
1293 if (BYTES_BIG_ENDIAN)
1294 bitnum += unit - GET_MODE_BITSIZE (GET_MODE (op0));
1295
1296 /* If op0 is a register, we need it in EXT_MODE to make it
1297 acceptable to the format of ext(z)v. */
1298 if (GET_CODE (op0) == SUBREG && GET_MODE (op0) != ext_mode)
1299 return NULL_RTX;
1300 if (REG_P (op0) && GET_MODE (op0) != ext_mode)
1301 op0 = gen_lowpart_SUBREG (ext_mode, op0);
1302 }
1303
1304 /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
1305 "backwards" from the size of the unit we are extracting from.
1306 Otherwise, we count bits from the most significant on a
1307 BYTES/BITS_BIG_ENDIAN machine. */
1308
1309 if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1310 bitnum = unit - bitsize - bitnum;
1311
1312 if (target == 0)
1313 target = spec_target = gen_reg_rtx (tmode);
1314
1315 if (GET_MODE (target) != ext_mode)
1316 {
1317 /* Don't use LHS paradoxical subreg if explicit truncation is needed
1318 between the mode of the extraction (word_mode) and the target
1319 mode. Instead, create a temporary and use convert_move to set
1320 the target. */
1321 if (REG_P (target)
1322 && TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (target), ext_mode))
1323 {
1324 target = gen_lowpart (ext_mode, target);
1325 if (GET_MODE_PRECISION (ext_mode)
1326 > GET_MODE_PRECISION (GET_MODE (spec_target)))
1327 spec_target_subreg = target;
1328 }
1329 else
1330 target = gen_reg_rtx (ext_mode);
1331 }
1332
1333 create_output_operand (&ops[0], target, ext_mode);
1334 create_fixed_operand (&ops[1], op0);
1335 create_integer_operand (&ops[2], bitsize);
1336 create_integer_operand (&ops[3], bitnum);
1337 if (maybe_expand_insn (extv->icode, 4, ops))
1338 {
1339 target = ops[0].value;
1340 if (target == spec_target)
1341 return target;
1342 if (target == spec_target_subreg)
1343 return spec_target;
1344 return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1345 }
1346 return NULL_RTX;
1347 }
1348
1349 /* A subroutine of extract_bit_field, with the same arguments.
1350 If FALLBACK_P is true, fall back to extract_fixed_bit_field
1351 if we can find no other means of implementing the operation.
1352 if FALLBACK_P is false, return NULL instead. */
1353
1354 static rtx
1355 extract_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1356 unsigned HOST_WIDE_INT bitnum, int unsignedp, rtx target,
1357 enum machine_mode mode, enum machine_mode tmode,
1358 bool fallback_p)
1359 {
1360 rtx op0 = str_rtx;
1361 enum machine_mode int_mode;
1362 enum machine_mode mode1;
1363
1364 if (tmode == VOIDmode)
1365 tmode = mode;
1366
1367 while (GET_CODE (op0) == SUBREG)
1368 {
1369 bitnum += SUBREG_BYTE (op0) * BITS_PER_UNIT;
1370 op0 = SUBREG_REG (op0);
1371 }
1372
1373 /* If we have an out-of-bounds access to a register, just return an
1374 uninitialized register of the required mode. This can occur if the
1375 source code contains an out-of-bounds access to a small array. */
1376 if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
1377 return gen_reg_rtx (tmode);
1378
1379 if (REG_P (op0)
1380 && mode == GET_MODE (op0)
1381 && bitnum == 0
1382 && bitsize == GET_MODE_BITSIZE (GET_MODE (op0)))
1383 {
1384 /* We're trying to extract a full register from itself. */
1385 return op0;
1386 }
1387
1388 /* See if we can get a better vector mode before extracting. */
1389 if (VECTOR_MODE_P (GET_MODE (op0))
1390 && !MEM_P (op0)
1391 && GET_MODE_INNER (GET_MODE (op0)) != tmode)
1392 {
1393 enum machine_mode new_mode;
1394
1395 if (GET_MODE_CLASS (tmode) == MODE_FLOAT)
1396 new_mode = MIN_MODE_VECTOR_FLOAT;
1397 else if (GET_MODE_CLASS (tmode) == MODE_FRACT)
1398 new_mode = MIN_MODE_VECTOR_FRACT;
1399 else if (GET_MODE_CLASS (tmode) == MODE_UFRACT)
1400 new_mode = MIN_MODE_VECTOR_UFRACT;
1401 else if (GET_MODE_CLASS (tmode) == MODE_ACCUM)
1402 new_mode = MIN_MODE_VECTOR_ACCUM;
1403 else if (GET_MODE_CLASS (tmode) == MODE_UACCUM)
1404 new_mode = MIN_MODE_VECTOR_UACCUM;
1405 else
1406 new_mode = MIN_MODE_VECTOR_INT;
1407
1408 for (; new_mode != VOIDmode ; new_mode = GET_MODE_WIDER_MODE (new_mode))
1409 if (GET_MODE_SIZE (new_mode) == GET_MODE_SIZE (GET_MODE (op0))
1410 && targetm.vector_mode_supported_p (new_mode))
1411 break;
1412 if (new_mode != VOIDmode)
1413 op0 = gen_lowpart (new_mode, op0);
1414 }
1415
1416 /* Use vec_extract patterns for extracting parts of vectors whenever
1417 available. */
1418 if (VECTOR_MODE_P (GET_MODE (op0))
1419 && !MEM_P (op0)
1420 && optab_handler (vec_extract_optab, GET_MODE (op0)) != CODE_FOR_nothing
1421 && ((bitnum + bitsize - 1) / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
1422 == bitnum / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
1423 {
1424 struct expand_operand ops[3];
1425 enum machine_mode outermode = GET_MODE (op0);
1426 enum machine_mode innermode = GET_MODE_INNER (outermode);
1427 enum insn_code icode = optab_handler (vec_extract_optab, outermode);
1428 unsigned HOST_WIDE_INT pos = bitnum / GET_MODE_BITSIZE (innermode);
1429
1430 create_output_operand (&ops[0], target, innermode);
1431 create_input_operand (&ops[1], op0, outermode);
1432 create_integer_operand (&ops[2], pos);
1433 if (maybe_expand_insn (icode, 3, ops))
1434 {
1435 target = ops[0].value;
1436 if (GET_MODE (target) != mode)
1437 return gen_lowpart (tmode, target);
1438 return target;
1439 }
1440 }
1441
1442 /* Make sure we are playing with integral modes. Pun with subregs
1443 if we aren't. */
1444 {
1445 enum machine_mode imode = int_mode_for_mode (GET_MODE (op0));
1446 if (imode != GET_MODE (op0))
1447 {
1448 if (MEM_P (op0))
1449 op0 = adjust_bitfield_address_size (op0, imode, 0, MEM_SIZE (op0));
1450 else if (imode != BLKmode)
1451 {
1452 op0 = gen_lowpart (imode, op0);
1453
1454 /* If we got a SUBREG, force it into a register since we
1455 aren't going to be able to do another SUBREG on it. */
1456 if (GET_CODE (op0) == SUBREG)
1457 op0 = force_reg (imode, op0);
1458 }
1459 else if (REG_P (op0))
1460 {
1461 rtx reg, subreg;
1462 imode = smallest_mode_for_size (GET_MODE_BITSIZE (GET_MODE (op0)),
1463 MODE_INT);
1464 reg = gen_reg_rtx (imode);
1465 subreg = gen_lowpart_SUBREG (GET_MODE (op0), reg);
1466 emit_move_insn (subreg, op0);
1467 op0 = reg;
1468 bitnum += SUBREG_BYTE (subreg) * BITS_PER_UNIT;
1469 }
1470 else
1471 {
1472 HOST_WIDE_INT size = GET_MODE_SIZE (GET_MODE (op0));
1473 rtx mem = assign_stack_temp (GET_MODE (op0), size);
1474 emit_move_insn (mem, op0);
1475 op0 = adjust_bitfield_address_size (mem, BLKmode, 0, size);
1476 }
1477 }
1478 }
1479
1480 /* ??? We currently assume TARGET is at least as big as BITSIZE.
1481 If that's wrong, the solution is to test for it and set TARGET to 0
1482 if needed. */
1483
1484 /* Get the mode of the field to use for atomic access or subreg
1485 conversion. */
1486 mode1 = mode;
1487 if (SCALAR_INT_MODE_P (tmode))
1488 {
1489 enum machine_mode try_mode = mode_for_size (bitsize,
1490 GET_MODE_CLASS (tmode), 0);
1491 if (try_mode != BLKmode)
1492 mode1 = try_mode;
1493 }
1494 gcc_assert (mode1 != BLKmode);
1495
1496 /* Extraction of a full MODE1 value can be done with a subreg as long
1497 as the least significant bit of the value is the least significant
1498 bit of either OP0 or a word of OP0. */
1499 if (!MEM_P (op0)
1500 && lowpart_bit_field_p (bitnum, bitsize, GET_MODE (op0))
1501 && bitsize == GET_MODE_BITSIZE (mode1)
1502 && TRULY_NOOP_TRUNCATION_MODES_P (mode1, GET_MODE (op0)))
1503 {
1504 rtx sub = simplify_gen_subreg (mode1, op0, GET_MODE (op0),
1505 bitnum / BITS_PER_UNIT);
1506 if (sub)
1507 return convert_extracted_bit_field (sub, mode, tmode, unsignedp);
1508 }
1509
1510 /* Extraction of a full MODE1 value can be done with a load as long as
1511 the field is on a byte boundary and is sufficiently aligned. */
1512 if (simple_mem_bitfield_p (op0, bitsize, bitnum, mode1))
1513 {
1514 op0 = adjust_bitfield_address (op0, mode1, bitnum / BITS_PER_UNIT);
1515 return convert_extracted_bit_field (op0, mode, tmode, unsignedp);
1516 }
1517
1518 /* Handle fields bigger than a word. */
1519
1520 if (bitsize > BITS_PER_WORD)
1521 {
1522 /* Here we transfer the words of the field
1523 in the order least significant first.
1524 This is because the most significant word is the one which may
1525 be less than full. */
1526
1527 unsigned int backwards = WORDS_BIG_ENDIAN;
1528 unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
1529 unsigned int i;
1530 rtx last;
1531
1532 if (target == 0 || !REG_P (target) || !valid_multiword_target_p (target))
1533 target = gen_reg_rtx (mode);
1534
1535 /* Indicate for flow that the entire target reg is being set. */
1536 emit_clobber (target);
1537
1538 last = get_last_insn ();
1539 for (i = 0; i < nwords; i++)
1540 {
1541 /* If I is 0, use the low-order word in both field and target;
1542 if I is 1, use the next to lowest word; and so on. */
1543 /* Word number in TARGET to use. */
1544 unsigned int wordnum
1545 = (backwards
1546 ? GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD - i - 1
1547 : i);
1548 /* Offset from start of field in OP0. */
1549 unsigned int bit_offset = (backwards
1550 ? MAX ((int) bitsize - ((int) i + 1)
1551 * BITS_PER_WORD,
1552 0)
1553 : (int) i * BITS_PER_WORD);
1554 rtx target_part = operand_subword (target, wordnum, 1, VOIDmode);
1555 rtx result_part
1556 = extract_bit_field_1 (op0, MIN (BITS_PER_WORD,
1557 bitsize - i * BITS_PER_WORD),
1558 bitnum + bit_offset, 1, target_part,
1559 mode, word_mode, fallback_p);
1560
1561 gcc_assert (target_part);
1562 if (!result_part)
1563 {
1564 delete_insns_since (last);
1565 return NULL;
1566 }
1567
1568 if (result_part != target_part)
1569 emit_move_insn (target_part, result_part);
1570 }
1571
1572 if (unsignedp)
1573 {
1574 /* Unless we've filled TARGET, the upper regs in a multi-reg value
1575 need to be zero'd out. */
1576 if (GET_MODE_SIZE (GET_MODE (target)) > nwords * UNITS_PER_WORD)
1577 {
1578 unsigned int i, total_words;
1579
1580 total_words = GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD;
1581 for (i = nwords; i < total_words; i++)
1582 emit_move_insn
1583 (operand_subword (target,
1584 backwards ? total_words - i - 1 : i,
1585 1, VOIDmode),
1586 const0_rtx);
1587 }
1588 return target;
1589 }
1590
1591 /* Signed bit field: sign-extend with two arithmetic shifts. */
1592 target = expand_shift (LSHIFT_EXPR, mode, target,
1593 GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1594 return expand_shift (RSHIFT_EXPR, mode, target,
1595 GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1596 }
1597
1598 /* If OP0 is a multi-word register, narrow it to the affected word.
1599 If the region spans two words, defer to extract_split_bit_field. */
1600 if (!MEM_P (op0) && GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
1601 {
1602 op0 = simplify_gen_subreg (word_mode, op0, GET_MODE (op0),
1603 bitnum / BITS_PER_WORD * UNITS_PER_WORD);
1604 bitnum %= BITS_PER_WORD;
1605 if (bitnum + bitsize > BITS_PER_WORD)
1606 {
1607 if (!fallback_p)
1608 return NULL_RTX;
1609 target = extract_split_bit_field (op0, bitsize, bitnum, unsignedp);
1610 return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1611 }
1612 }
1613
1614 /* From here on we know the desired field is smaller than a word.
1615 If OP0 is a register, it too fits within a word. */
1616 enum extraction_pattern pattern = unsignedp ? EP_extzv : EP_extv;
1617 extraction_insn extv;
1618 if (!MEM_P (op0)
1619 /* ??? We could limit the structure size to the part of OP0 that
1620 contains the field, with appropriate checks for endianness
1621 and TRULY_NOOP_TRUNCATION. */
1622 && get_best_reg_extraction_insn (&extv, pattern,
1623 GET_MODE_BITSIZE (GET_MODE (op0)),
1624 tmode))
1625 {
1626 rtx result = extract_bit_field_using_extv (&extv, op0, bitsize, bitnum,
1627 unsignedp, target, mode,
1628 tmode);
1629 if (result)
1630 return result;
1631 }
1632
1633 /* If OP0 is a memory, try copying it to a register and seeing if a
1634 cheap register alternative is available. */
1635 if (MEM_P (op0))
1636 {
1637 if (get_best_mem_extraction_insn (&extv, pattern, bitsize, bitnum,
1638 tmode))
1639 {
1640 rtx result = extract_bit_field_using_extv (&extv, op0, bitsize,
1641 bitnum, unsignedp,
1642 target, mode,
1643 tmode);
1644 if (result)
1645 return result;
1646 }
1647
1648 rtx last = get_last_insn ();
1649
1650 /* Try loading part of OP0 into a register and extracting the
1651 bitfield from that. */
1652 unsigned HOST_WIDE_INT bitpos;
1653 rtx xop0 = adjust_bit_field_mem_for_reg (pattern, op0, bitsize, bitnum,
1654 0, 0, tmode, &bitpos);
1655 if (xop0)
1656 {
1657 xop0 = copy_to_reg (xop0);
1658 rtx result = extract_bit_field_1 (xop0, bitsize, bitpos,
1659 unsignedp, target,
1660 mode, tmode, false);
1661 if (result)
1662 return result;
1663 delete_insns_since (last);
1664 }
1665 }
1666
1667 if (!fallback_p)
1668 return NULL;
1669
1670 /* Find a correspondingly-sized integer field, so we can apply
1671 shifts and masks to it. */
1672 int_mode = int_mode_for_mode (tmode);
1673 if (int_mode == BLKmode)
1674 int_mode = int_mode_for_mode (mode);
1675 /* Should probably push op0 out to memory and then do a load. */
1676 gcc_assert (int_mode != BLKmode);
1677
1678 target = extract_fixed_bit_field (int_mode, op0, bitsize, bitnum,
1679 target, unsignedp);
1680 return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1681 }
1682
1683 /* Generate code to extract a byte-field from STR_RTX
1684 containing BITSIZE bits, starting at BITNUM,
1685 and put it in TARGET if possible (if TARGET is nonzero).
1686 Regardless of TARGET, we return the rtx for where the value is placed.
1687
1688 STR_RTX is the structure containing the byte (a REG or MEM).
1689 UNSIGNEDP is nonzero if this is an unsigned bit field.
1690 MODE is the natural mode of the field value once extracted.
1691 TMODE is the mode the caller would like the value to have;
1692 but the value may be returned with type MODE instead.
1693
1694 If a TARGET is specified and we can store in it at no extra cost,
1695 we do so, and return TARGET.
1696 Otherwise, we return a REG of mode TMODE or MODE, with TMODE preferred
1697 if they are equally easy. */
1698
1699 rtx
1700 extract_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1701 unsigned HOST_WIDE_INT bitnum, int unsignedp, rtx target,
1702 enum machine_mode mode, enum machine_mode tmode)
1703 {
1704 enum machine_mode mode1;
1705
1706 /* Handle -fstrict-volatile-bitfields in the cases where it applies. */
1707 if (GET_MODE_BITSIZE (GET_MODE (str_rtx)) > 0)
1708 mode1 = GET_MODE (str_rtx);
1709 else if (target && GET_MODE_BITSIZE (GET_MODE (target)) > 0)
1710 mode1 = GET_MODE (target);
1711 else
1712 mode1 = tmode;
1713
1714 if (strict_volatile_bitfield_p (str_rtx, bitsize, bitnum, mode1))
1715 {
1716 rtx result;
1717
1718 /* Extraction of a full MODE1 value can be done with a load as long as
1719 the field is on a byte boundary and is sufficiently aligned. */
1720 if (simple_mem_bitfield_p (str_rtx, bitsize, bitnum, mode1))
1721 result = adjust_bitfield_address (str_rtx, mode1,
1722 bitnum / BITS_PER_UNIT);
1723 else
1724 result = extract_fixed_bit_field (mode, str_rtx, bitsize, bitnum,
1725 target, unsignedp);
1726 return convert_extracted_bit_field (result, mode, tmode, unsignedp);
1727 }
1728
1729 return extract_bit_field_1 (str_rtx, bitsize, bitnum, unsignedp,
1730 target, mode, tmode, true);
1731 }
1732 \f
1733 /* Use shifts and boolean operations to extract a field of BITSIZE bits
1734 from bit BITNUM of OP0.
1735
1736 UNSIGNEDP is nonzero for an unsigned bit field (don't sign-extend value).
1737 If TARGET is nonzero, attempts to store the value there
1738 and return TARGET, but this is not guaranteed.
1739 If TARGET is not used, create a pseudo-reg of mode TMODE for the value. */
1740
1741 static rtx
1742 extract_fixed_bit_field (enum machine_mode tmode, rtx op0,
1743 unsigned HOST_WIDE_INT bitsize,
1744 unsigned HOST_WIDE_INT bitnum, rtx target,
1745 int unsignedp)
1746 {
1747 enum machine_mode mode;
1748
1749 if (MEM_P (op0))
1750 {
1751 /* Get the proper mode to use for this field. We want a mode that
1752 includes the entire field. If such a mode would be larger than
1753 a word, we won't be doing the extraction the normal way. */
1754
1755 mode = GET_MODE (op0);
1756 if (GET_MODE_BITSIZE (mode) == 0
1757 || GET_MODE_BITSIZE (mode) > GET_MODE_BITSIZE (word_mode))
1758 mode = word_mode;
1759 mode = get_best_mode (bitsize, bitnum, 0, 0,
1760 MEM_ALIGN (op0), mode, MEM_VOLATILE_P (op0));
1761
1762 if (mode == VOIDmode)
1763 /* The only way this should occur is if the field spans word
1764 boundaries. */
1765 return extract_split_bit_field (op0, bitsize, bitnum, unsignedp);
1766
1767 op0 = narrow_bit_field_mem (op0, mode, bitsize, bitnum, &bitnum);
1768 }
1769
1770 mode = GET_MODE (op0);
1771 gcc_assert (SCALAR_INT_MODE_P (mode));
1772
1773 /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode)
1774 for invalid input, such as extract equivalent of f5 from
1775 gcc.dg/pr48335-2.c. */
1776
1777 if (BYTES_BIG_ENDIAN)
1778 /* BITNUM is the distance between our msb and that of OP0.
1779 Convert it to the distance from the lsb. */
1780 bitnum = GET_MODE_BITSIZE (mode) - bitsize - bitnum;
1781
1782 /* Now BITNUM is always the distance between the field's lsb and that of OP0.
1783 We have reduced the big-endian case to the little-endian case. */
1784
1785 if (unsignedp)
1786 {
1787 if (bitnum)
1788 {
1789 /* If the field does not already start at the lsb,
1790 shift it so it does. */
1791 /* Maybe propagate the target for the shift. */
1792 rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
1793 if (tmode != mode)
1794 subtarget = 0;
1795 op0 = expand_shift (RSHIFT_EXPR, mode, op0, bitnum, subtarget, 1);
1796 }
1797 /* Convert the value to the desired mode. */
1798 if (mode != tmode)
1799 op0 = convert_to_mode (tmode, op0, 1);
1800
1801 /* Unless the msb of the field used to be the msb when we shifted,
1802 mask out the upper bits. */
1803
1804 if (GET_MODE_BITSIZE (mode) != bitnum + bitsize)
1805 return expand_binop (GET_MODE (op0), and_optab, op0,
1806 mask_rtx (GET_MODE (op0), 0, bitsize, 0),
1807 target, 1, OPTAB_LIB_WIDEN);
1808 return op0;
1809 }
1810
1811 /* To extract a signed bit-field, first shift its msb to the msb of the word,
1812 then arithmetic-shift its lsb to the lsb of the word. */
1813 op0 = force_reg (mode, op0);
1814
1815 /* Find the narrowest integer mode that contains the field. */
1816
1817 for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT); mode != VOIDmode;
1818 mode = GET_MODE_WIDER_MODE (mode))
1819 if (GET_MODE_BITSIZE (mode) >= bitsize + bitnum)
1820 {
1821 op0 = convert_to_mode (mode, op0, 0);
1822 break;
1823 }
1824
1825 if (mode != tmode)
1826 target = 0;
1827
1828 if (GET_MODE_BITSIZE (mode) != (bitsize + bitnum))
1829 {
1830 int amount = GET_MODE_BITSIZE (mode) - (bitsize + bitnum);
1831 /* Maybe propagate the target for the shift. */
1832 rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
1833 op0 = expand_shift (LSHIFT_EXPR, mode, op0, amount, subtarget, 1);
1834 }
1835
1836 return expand_shift (RSHIFT_EXPR, mode, op0,
1837 GET_MODE_BITSIZE (mode) - bitsize, target, 0);
1838 }
1839 \f
1840 /* Return a constant integer (CONST_INT or CONST_DOUBLE) mask value
1841 of mode MODE with BITSIZE ones followed by BITPOS zeros, or the
1842 complement of that if COMPLEMENT. The mask is truncated if
1843 necessary to the width of mode MODE. The mask is zero-extended if
1844 BITSIZE+BITPOS is too small for MODE. */
1845
1846 static rtx
1847 mask_rtx (enum machine_mode mode, int bitpos, int bitsize, int complement)
1848 {
1849 double_int mask;
1850
1851 mask = double_int::mask (bitsize);
1852 mask = mask.llshift (bitpos, HOST_BITS_PER_DOUBLE_INT);
1853
1854 if (complement)
1855 mask = ~mask;
1856
1857 return immed_double_int_const (mask, mode);
1858 }
1859
1860 /* Return a constant integer (CONST_INT or CONST_DOUBLE) rtx with the value
1861 VALUE << BITPOS. */
1862
1863 static rtx
1864 lshift_value (enum machine_mode mode, unsigned HOST_WIDE_INT value,
1865 int bitpos)
1866 {
1867 double_int val;
1868
1869 val = double_int::from_uhwi (value);
1870 val = val.llshift (bitpos, HOST_BITS_PER_DOUBLE_INT);
1871
1872 return immed_double_int_const (val, mode);
1873 }
1874 \f
1875 /* Extract a bit field that is split across two words
1876 and return an RTX for the result.
1877
1878 OP0 is the REG, SUBREG or MEM rtx for the first of the two words.
1879 BITSIZE is the field width; BITPOS, position of its first bit, in the word.
1880 UNSIGNEDP is 1 if should zero-extend the contents; else sign-extend. */
1881
1882 static rtx
1883 extract_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1884 unsigned HOST_WIDE_INT bitpos, int unsignedp)
1885 {
1886 unsigned int unit;
1887 unsigned int bitsdone = 0;
1888 rtx result = NULL_RTX;
1889 int first = 1;
1890
1891 /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1892 much at a time. */
1893 if (REG_P (op0) || GET_CODE (op0) == SUBREG)
1894 unit = BITS_PER_WORD;
1895 else
1896 unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1897
1898 while (bitsdone < bitsize)
1899 {
1900 unsigned HOST_WIDE_INT thissize;
1901 rtx part, word;
1902 unsigned HOST_WIDE_INT thispos;
1903 unsigned HOST_WIDE_INT offset;
1904
1905 offset = (bitpos + bitsdone) / unit;
1906 thispos = (bitpos + bitsdone) % unit;
1907
1908 /* THISSIZE must not overrun a word boundary. Otherwise,
1909 extract_fixed_bit_field will call us again, and we will mutually
1910 recurse forever. */
1911 thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1912 thissize = MIN (thissize, unit - thispos);
1913
1914 /* If OP0 is a register, then handle OFFSET here.
1915
1916 When handling multiword bitfields, extract_bit_field may pass
1917 down a word_mode SUBREG of a larger REG for a bitfield that actually
1918 crosses a word boundary. Thus, for a SUBREG, we must find
1919 the current word starting from the base register. */
1920 if (GET_CODE (op0) == SUBREG)
1921 {
1922 int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset;
1923 word = operand_subword_force (SUBREG_REG (op0), word_offset,
1924 GET_MODE (SUBREG_REG (op0)));
1925 offset = 0;
1926 }
1927 else if (REG_P (op0))
1928 {
1929 word = operand_subword_force (op0, offset, GET_MODE (op0));
1930 offset = 0;
1931 }
1932 else
1933 word = op0;
1934
1935 /* Extract the parts in bit-counting order,
1936 whose meaning is determined by BYTES_PER_UNIT.
1937 OFFSET is in UNITs, and UNIT is in bits. */
1938 part = extract_fixed_bit_field (word_mode, word, thissize,
1939 offset * unit + thispos, 0, 1);
1940 bitsdone += thissize;
1941
1942 /* Shift this part into place for the result. */
1943 if (BYTES_BIG_ENDIAN)
1944 {
1945 if (bitsize != bitsdone)
1946 part = expand_shift (LSHIFT_EXPR, word_mode, part,
1947 bitsize - bitsdone, 0, 1);
1948 }
1949 else
1950 {
1951 if (bitsdone != thissize)
1952 part = expand_shift (LSHIFT_EXPR, word_mode, part,
1953 bitsdone - thissize, 0, 1);
1954 }
1955
1956 if (first)
1957 result = part;
1958 else
1959 /* Combine the parts with bitwise or. This works
1960 because we extracted each part as an unsigned bit field. */
1961 result = expand_binop (word_mode, ior_optab, part, result, NULL_RTX, 1,
1962 OPTAB_LIB_WIDEN);
1963
1964 first = 0;
1965 }
1966
1967 /* Unsigned bit field: we are done. */
1968 if (unsignedp)
1969 return result;
1970 /* Signed bit field: sign-extend with two arithmetic shifts. */
1971 result = expand_shift (LSHIFT_EXPR, word_mode, result,
1972 BITS_PER_WORD - bitsize, NULL_RTX, 0);
1973 return expand_shift (RSHIFT_EXPR, word_mode, result,
1974 BITS_PER_WORD - bitsize, NULL_RTX, 0);
1975 }
1976 \f
1977 /* Try to read the low bits of SRC as an rvalue of mode MODE, preserving
1978 the bit pattern. SRC_MODE is the mode of SRC; if this is smaller than
1979 MODE, fill the upper bits with zeros. Fail if the layout of either
1980 mode is unknown (as for CC modes) or if the extraction would involve
1981 unprofitable mode punning. Return the value on success, otherwise
1982 return null.
1983
1984 This is different from gen_lowpart* in these respects:
1985
1986 - the returned value must always be considered an rvalue
1987
1988 - when MODE is wider than SRC_MODE, the extraction involves
1989 a zero extension
1990
1991 - when MODE is smaller than SRC_MODE, the extraction involves
1992 a truncation (and is thus subject to TRULY_NOOP_TRUNCATION).
1993
1994 In other words, this routine performs a computation, whereas the
1995 gen_lowpart* routines are conceptually lvalue or rvalue subreg
1996 operations. */
1997
1998 rtx
1999 extract_low_bits (enum machine_mode mode, enum machine_mode src_mode, rtx src)
2000 {
2001 enum machine_mode int_mode, src_int_mode;
2002
2003 if (mode == src_mode)
2004 return src;
2005
2006 if (CONSTANT_P (src))
2007 {
2008 /* simplify_gen_subreg can't be used here, as if simplify_subreg
2009 fails, it will happily create (subreg (symbol_ref)) or similar
2010 invalid SUBREGs. */
2011 unsigned int byte = subreg_lowpart_offset (mode, src_mode);
2012 rtx ret = simplify_subreg (mode, src, src_mode, byte);
2013 if (ret)
2014 return ret;
2015
2016 if (GET_MODE (src) == VOIDmode
2017 || !validate_subreg (mode, src_mode, src, byte))
2018 return NULL_RTX;
2019
2020 src = force_reg (GET_MODE (src), src);
2021 return gen_rtx_SUBREG (mode, src, byte);
2022 }
2023
2024 if (GET_MODE_CLASS (mode) == MODE_CC || GET_MODE_CLASS (src_mode) == MODE_CC)
2025 return NULL_RTX;
2026
2027 if (GET_MODE_BITSIZE (mode) == GET_MODE_BITSIZE (src_mode)
2028 && MODES_TIEABLE_P (mode, src_mode))
2029 {
2030 rtx x = gen_lowpart_common (mode, src);
2031 if (x)
2032 return x;
2033 }
2034
2035 src_int_mode = int_mode_for_mode (src_mode);
2036 int_mode = int_mode_for_mode (mode);
2037 if (src_int_mode == BLKmode || int_mode == BLKmode)
2038 return NULL_RTX;
2039
2040 if (!MODES_TIEABLE_P (src_int_mode, src_mode))
2041 return NULL_RTX;
2042 if (!MODES_TIEABLE_P (int_mode, mode))
2043 return NULL_RTX;
2044
2045 src = gen_lowpart (src_int_mode, src);
2046 src = convert_modes (int_mode, src_int_mode, src, true);
2047 src = gen_lowpart (mode, src);
2048 return src;
2049 }
2050 \f
2051 /* Add INC into TARGET. */
2052
2053 void
2054 expand_inc (rtx target, rtx inc)
2055 {
2056 rtx value = expand_binop (GET_MODE (target), add_optab,
2057 target, inc,
2058 target, 0, OPTAB_LIB_WIDEN);
2059 if (value != target)
2060 emit_move_insn (target, value);
2061 }
2062
2063 /* Subtract DEC from TARGET. */
2064
2065 void
2066 expand_dec (rtx target, rtx dec)
2067 {
2068 rtx value = expand_binop (GET_MODE (target), sub_optab,
2069 target, dec,
2070 target, 0, OPTAB_LIB_WIDEN);
2071 if (value != target)
2072 emit_move_insn (target, value);
2073 }
2074 \f
2075 /* Output a shift instruction for expression code CODE,
2076 with SHIFTED being the rtx for the value to shift,
2077 and AMOUNT the rtx for the amount to shift by.
2078 Store the result in the rtx TARGET, if that is convenient.
2079 If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2080 Return the rtx for where the value is. */
2081
2082 static rtx
2083 expand_shift_1 (enum tree_code code, enum machine_mode mode, rtx shifted,
2084 rtx amount, rtx target, int unsignedp)
2085 {
2086 rtx op1, temp = 0;
2087 int left = (code == LSHIFT_EXPR || code == LROTATE_EXPR);
2088 int rotate = (code == LROTATE_EXPR || code == RROTATE_EXPR);
2089 optab lshift_optab = ashl_optab;
2090 optab rshift_arith_optab = ashr_optab;
2091 optab rshift_uns_optab = lshr_optab;
2092 optab lrotate_optab = rotl_optab;
2093 optab rrotate_optab = rotr_optab;
2094 enum machine_mode op1_mode;
2095 int attempt;
2096 bool speed = optimize_insn_for_speed_p ();
2097
2098 op1 = amount;
2099 op1_mode = GET_MODE (op1);
2100
2101 /* Determine whether the shift/rotate amount is a vector, or scalar. If the
2102 shift amount is a vector, use the vector/vector shift patterns. */
2103 if (VECTOR_MODE_P (mode) && VECTOR_MODE_P (op1_mode))
2104 {
2105 lshift_optab = vashl_optab;
2106 rshift_arith_optab = vashr_optab;
2107 rshift_uns_optab = vlshr_optab;
2108 lrotate_optab = vrotl_optab;
2109 rrotate_optab = vrotr_optab;
2110 }
2111
2112 /* Previously detected shift-counts computed by NEGATE_EXPR
2113 and shifted in the other direction; but that does not work
2114 on all machines. */
2115
2116 if (SHIFT_COUNT_TRUNCATED)
2117 {
2118 if (CONST_INT_P (op1)
2119 && ((unsigned HOST_WIDE_INT) INTVAL (op1) >=
2120 (unsigned HOST_WIDE_INT) GET_MODE_BITSIZE (mode)))
2121 op1 = GEN_INT ((unsigned HOST_WIDE_INT) INTVAL (op1)
2122 % GET_MODE_BITSIZE (mode));
2123 else if (GET_CODE (op1) == SUBREG
2124 && subreg_lowpart_p (op1)
2125 && SCALAR_INT_MODE_P (GET_MODE (SUBREG_REG (op1)))
2126 && SCALAR_INT_MODE_P (GET_MODE (op1)))
2127 op1 = SUBREG_REG (op1);
2128 }
2129
2130 /* Canonicalize rotates by constant amount. If op1 is bitsize / 2,
2131 prefer left rotation, if op1 is from bitsize / 2 + 1 to
2132 bitsize - 1, use other direction of rotate with 1 .. bitsize / 2 - 1
2133 amount instead. */
2134 if (rotate
2135 && CONST_INT_P (op1)
2136 && IN_RANGE (INTVAL (op1), GET_MODE_BITSIZE (mode) / 2 + left,
2137 GET_MODE_BITSIZE (mode) - 1))
2138 {
2139 op1 = GEN_INT (GET_MODE_BITSIZE (mode) - INTVAL (op1));
2140 left = !left;
2141 code = left ? LROTATE_EXPR : RROTATE_EXPR;
2142 }
2143
2144 if (op1 == const0_rtx)
2145 return shifted;
2146
2147 /* Check whether its cheaper to implement a left shift by a constant
2148 bit count by a sequence of additions. */
2149 if (code == LSHIFT_EXPR
2150 && CONST_INT_P (op1)
2151 && INTVAL (op1) > 0
2152 && INTVAL (op1) < GET_MODE_PRECISION (mode)
2153 && INTVAL (op1) < MAX_BITS_PER_WORD
2154 && (shift_cost (speed, mode, INTVAL (op1))
2155 > INTVAL (op1) * add_cost (speed, mode))
2156 && shift_cost (speed, mode, INTVAL (op1)) != MAX_COST)
2157 {
2158 int i;
2159 for (i = 0; i < INTVAL (op1); i++)
2160 {
2161 temp = force_reg (mode, shifted);
2162 shifted = expand_binop (mode, add_optab, temp, temp, NULL_RTX,
2163 unsignedp, OPTAB_LIB_WIDEN);
2164 }
2165 return shifted;
2166 }
2167
2168 for (attempt = 0; temp == 0 && attempt < 3; attempt++)
2169 {
2170 enum optab_methods methods;
2171
2172 if (attempt == 0)
2173 methods = OPTAB_DIRECT;
2174 else if (attempt == 1)
2175 methods = OPTAB_WIDEN;
2176 else
2177 methods = OPTAB_LIB_WIDEN;
2178
2179 if (rotate)
2180 {
2181 /* Widening does not work for rotation. */
2182 if (methods == OPTAB_WIDEN)
2183 continue;
2184 else if (methods == OPTAB_LIB_WIDEN)
2185 {
2186 /* If we have been unable to open-code this by a rotation,
2187 do it as the IOR of two shifts. I.e., to rotate A
2188 by N bits, compute
2189 (A << N) | ((unsigned) A >> ((-N) & (C - 1)))
2190 where C is the bitsize of A.
2191
2192 It is theoretically possible that the target machine might
2193 not be able to perform either shift and hence we would
2194 be making two libcalls rather than just the one for the
2195 shift (similarly if IOR could not be done). We will allow
2196 this extremely unlikely lossage to avoid complicating the
2197 code below. */
2198
2199 rtx subtarget = target == shifted ? 0 : target;
2200 rtx new_amount, other_amount;
2201 rtx temp1;
2202
2203 new_amount = op1;
2204 if (op1 == const0_rtx)
2205 return shifted;
2206 else if (CONST_INT_P (op1))
2207 other_amount = GEN_INT (GET_MODE_BITSIZE (mode)
2208 - INTVAL (op1));
2209 else
2210 {
2211 other_amount
2212 = simplify_gen_unary (NEG, GET_MODE (op1),
2213 op1, GET_MODE (op1));
2214 HOST_WIDE_INT mask = GET_MODE_PRECISION (mode) - 1;
2215 other_amount
2216 = simplify_gen_binary (AND, GET_MODE (op1), other_amount,
2217 gen_int_mode (mask, GET_MODE (op1)));
2218 }
2219
2220 shifted = force_reg (mode, shifted);
2221
2222 temp = expand_shift_1 (left ? LSHIFT_EXPR : RSHIFT_EXPR,
2223 mode, shifted, new_amount, 0, 1);
2224 temp1 = expand_shift_1 (left ? RSHIFT_EXPR : LSHIFT_EXPR,
2225 mode, shifted, other_amount,
2226 subtarget, 1);
2227 return expand_binop (mode, ior_optab, temp, temp1, target,
2228 unsignedp, methods);
2229 }
2230
2231 temp = expand_binop (mode,
2232 left ? lrotate_optab : rrotate_optab,
2233 shifted, op1, target, unsignedp, methods);
2234 }
2235 else if (unsignedp)
2236 temp = expand_binop (mode,
2237 left ? lshift_optab : rshift_uns_optab,
2238 shifted, op1, target, unsignedp, methods);
2239
2240 /* Do arithmetic shifts.
2241 Also, if we are going to widen the operand, we can just as well
2242 use an arithmetic right-shift instead of a logical one. */
2243 if (temp == 0 && ! rotate
2244 && (! unsignedp || (! left && methods == OPTAB_WIDEN)))
2245 {
2246 enum optab_methods methods1 = methods;
2247
2248 /* If trying to widen a log shift to an arithmetic shift,
2249 don't accept an arithmetic shift of the same size. */
2250 if (unsignedp)
2251 methods1 = OPTAB_MUST_WIDEN;
2252
2253 /* Arithmetic shift */
2254
2255 temp = expand_binop (mode,
2256 left ? lshift_optab : rshift_arith_optab,
2257 shifted, op1, target, unsignedp, methods1);
2258 }
2259
2260 /* We used to try extzv here for logical right shifts, but that was
2261 only useful for one machine, the VAX, and caused poor code
2262 generation there for lshrdi3, so the code was deleted and a
2263 define_expand for lshrsi3 was added to vax.md. */
2264 }
2265
2266 gcc_assert (temp);
2267 return temp;
2268 }
2269
2270 /* Output a shift instruction for expression code CODE,
2271 with SHIFTED being the rtx for the value to shift,
2272 and AMOUNT the amount to shift by.
2273 Store the result in the rtx TARGET, if that is convenient.
2274 If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2275 Return the rtx for where the value is. */
2276
2277 rtx
2278 expand_shift (enum tree_code code, enum machine_mode mode, rtx shifted,
2279 int amount, rtx target, int unsignedp)
2280 {
2281 return expand_shift_1 (code, mode,
2282 shifted, GEN_INT (amount), target, unsignedp);
2283 }
2284
2285 /* Output a shift instruction for expression code CODE,
2286 with SHIFTED being the rtx for the value to shift,
2287 and AMOUNT the tree for the amount to shift by.
2288 Store the result in the rtx TARGET, if that is convenient.
2289 If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2290 Return the rtx for where the value is. */
2291
2292 rtx
2293 expand_variable_shift (enum tree_code code, enum machine_mode mode, rtx shifted,
2294 tree amount, rtx target, int unsignedp)
2295 {
2296 return expand_shift_1 (code, mode,
2297 shifted, expand_normal (amount), target, unsignedp);
2298 }
2299
2300 \f
2301 /* Indicates the type of fixup needed after a constant multiplication.
2302 BASIC_VARIANT means no fixup is needed, NEGATE_VARIANT means that
2303 the result should be negated, and ADD_VARIANT means that the
2304 multiplicand should be added to the result. */
2305 enum mult_variant {basic_variant, negate_variant, add_variant};
2306
2307 static void synth_mult (struct algorithm *, unsigned HOST_WIDE_INT,
2308 const struct mult_cost *, enum machine_mode mode);
2309 static bool choose_mult_variant (enum machine_mode, HOST_WIDE_INT,
2310 struct algorithm *, enum mult_variant *, int);
2311 static rtx expand_mult_const (enum machine_mode, rtx, HOST_WIDE_INT, rtx,
2312 const struct algorithm *, enum mult_variant);
2313 static unsigned HOST_WIDE_INT invert_mod2n (unsigned HOST_WIDE_INT, int);
2314 static rtx extract_high_half (enum machine_mode, rtx);
2315 static rtx expmed_mult_highpart (enum machine_mode, rtx, rtx, rtx, int, int);
2316 static rtx expmed_mult_highpart_optab (enum machine_mode, rtx, rtx, rtx,
2317 int, int);
2318 /* Compute and return the best algorithm for multiplying by T.
2319 The algorithm must cost less than cost_limit
2320 If retval.cost >= COST_LIMIT, no algorithm was found and all
2321 other field of the returned struct are undefined.
2322 MODE is the machine mode of the multiplication. */
2323
2324 static void
2325 synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
2326 const struct mult_cost *cost_limit, enum machine_mode mode)
2327 {
2328 int m;
2329 struct algorithm *alg_in, *best_alg;
2330 struct mult_cost best_cost;
2331 struct mult_cost new_limit;
2332 int op_cost, op_latency;
2333 unsigned HOST_WIDE_INT orig_t = t;
2334 unsigned HOST_WIDE_INT q;
2335 int maxm, hash_index;
2336 bool cache_hit = false;
2337 enum alg_code cache_alg = alg_zero;
2338 bool speed = optimize_insn_for_speed_p ();
2339 enum machine_mode imode;
2340 struct alg_hash_entry *entry_ptr;
2341
2342 /* Indicate that no algorithm is yet found. If no algorithm
2343 is found, this value will be returned and indicate failure. */
2344 alg_out->cost.cost = cost_limit->cost + 1;
2345 alg_out->cost.latency = cost_limit->latency + 1;
2346
2347 if (cost_limit->cost < 0
2348 || (cost_limit->cost == 0 && cost_limit->latency <= 0))
2349 return;
2350
2351 /* Be prepared for vector modes. */
2352 imode = GET_MODE_INNER (mode);
2353 if (imode == VOIDmode)
2354 imode = mode;
2355
2356 maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (imode));
2357
2358 /* Restrict the bits of "t" to the multiplication's mode. */
2359 t &= GET_MODE_MASK (imode);
2360
2361 /* t == 1 can be done in zero cost. */
2362 if (t == 1)
2363 {
2364 alg_out->ops = 1;
2365 alg_out->cost.cost = 0;
2366 alg_out->cost.latency = 0;
2367 alg_out->op[0] = alg_m;
2368 return;
2369 }
2370
2371 /* t == 0 sometimes has a cost. If it does and it exceeds our limit,
2372 fail now. */
2373 if (t == 0)
2374 {
2375 if (MULT_COST_LESS (cost_limit, zero_cost (speed)))
2376 return;
2377 else
2378 {
2379 alg_out->ops = 1;
2380 alg_out->cost.cost = zero_cost (speed);
2381 alg_out->cost.latency = zero_cost (speed);
2382 alg_out->op[0] = alg_zero;
2383 return;
2384 }
2385 }
2386
2387 /* We'll be needing a couple extra algorithm structures now. */
2388
2389 alg_in = XALLOCA (struct algorithm);
2390 best_alg = XALLOCA (struct algorithm);
2391 best_cost = *cost_limit;
2392
2393 /* Compute the hash index. */
2394 hash_index = (t ^ (unsigned int) mode ^ (speed * 256)) % NUM_ALG_HASH_ENTRIES;
2395
2396 /* See if we already know what to do for T. */
2397 entry_ptr = alg_hash_entry_ptr (hash_index);
2398 if (entry_ptr->t == t
2399 && entry_ptr->mode == mode
2400 && entry_ptr->mode == mode
2401 && entry_ptr->speed == speed
2402 && entry_ptr->alg != alg_unknown)
2403 {
2404 cache_alg = entry_ptr->alg;
2405
2406 if (cache_alg == alg_impossible)
2407 {
2408 /* The cache tells us that it's impossible to synthesize
2409 multiplication by T within entry_ptr->cost. */
2410 if (!CHEAPER_MULT_COST (&entry_ptr->cost, cost_limit))
2411 /* COST_LIMIT is at least as restrictive as the one
2412 recorded in the hash table, in which case we have no
2413 hope of synthesizing a multiplication. Just
2414 return. */
2415 return;
2416
2417 /* If we get here, COST_LIMIT is less restrictive than the
2418 one recorded in the hash table, so we may be able to
2419 synthesize a multiplication. Proceed as if we didn't
2420 have the cache entry. */
2421 }
2422 else
2423 {
2424 if (CHEAPER_MULT_COST (cost_limit, &entry_ptr->cost))
2425 /* The cached algorithm shows that this multiplication
2426 requires more cost than COST_LIMIT. Just return. This
2427 way, we don't clobber this cache entry with
2428 alg_impossible but retain useful information. */
2429 return;
2430
2431 cache_hit = true;
2432
2433 switch (cache_alg)
2434 {
2435 case alg_shift:
2436 goto do_alg_shift;
2437
2438 case alg_add_t_m2:
2439 case alg_sub_t_m2:
2440 goto do_alg_addsub_t_m2;
2441
2442 case alg_add_factor:
2443 case alg_sub_factor:
2444 goto do_alg_addsub_factor;
2445
2446 case alg_add_t2_m:
2447 goto do_alg_add_t2_m;
2448
2449 case alg_sub_t2_m:
2450 goto do_alg_sub_t2_m;
2451
2452 default:
2453 gcc_unreachable ();
2454 }
2455 }
2456 }
2457
2458 /* If we have a group of zero bits at the low-order part of T, try
2459 multiplying by the remaining bits and then doing a shift. */
2460
2461 if ((t & 1) == 0)
2462 {
2463 do_alg_shift:
2464 m = floor_log2 (t & -t); /* m = number of low zero bits */
2465 if (m < maxm)
2466 {
2467 q = t >> m;
2468 /* The function expand_shift will choose between a shift and
2469 a sequence of additions, so the observed cost is given as
2470 MIN (m * add_cost(speed, mode), shift_cost(speed, mode, m)). */
2471 op_cost = m * add_cost (speed, mode);
2472 if (shift_cost (speed, mode, m) < op_cost)
2473 op_cost = shift_cost (speed, mode, m);
2474 new_limit.cost = best_cost.cost - op_cost;
2475 new_limit.latency = best_cost.latency - op_cost;
2476 synth_mult (alg_in, q, &new_limit, mode);
2477
2478 alg_in->cost.cost += op_cost;
2479 alg_in->cost.latency += op_cost;
2480 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2481 {
2482 struct algorithm *x;
2483 best_cost = alg_in->cost;
2484 x = alg_in, alg_in = best_alg, best_alg = x;
2485 best_alg->log[best_alg->ops] = m;
2486 best_alg->op[best_alg->ops] = alg_shift;
2487 }
2488
2489 /* See if treating ORIG_T as a signed number yields a better
2490 sequence. Try this sequence only for a negative ORIG_T
2491 as it would be useless for a non-negative ORIG_T. */
2492 if ((HOST_WIDE_INT) orig_t < 0)
2493 {
2494 /* Shift ORIG_T as follows because a right shift of a
2495 negative-valued signed type is implementation
2496 defined. */
2497 q = ~(~orig_t >> m);
2498 /* The function expand_shift will choose between a shift
2499 and a sequence of additions, so the observed cost is
2500 given as MIN (m * add_cost(speed, mode),
2501 shift_cost(speed, mode, m)). */
2502 op_cost = m * add_cost (speed, mode);
2503 if (shift_cost (speed, mode, m) < op_cost)
2504 op_cost = shift_cost (speed, mode, m);
2505 new_limit.cost = best_cost.cost - op_cost;
2506 new_limit.latency = best_cost.latency - op_cost;
2507 synth_mult (alg_in, q, &new_limit, mode);
2508
2509 alg_in->cost.cost += op_cost;
2510 alg_in->cost.latency += op_cost;
2511 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2512 {
2513 struct algorithm *x;
2514 best_cost = alg_in->cost;
2515 x = alg_in, alg_in = best_alg, best_alg = x;
2516 best_alg->log[best_alg->ops] = m;
2517 best_alg->op[best_alg->ops] = alg_shift;
2518 }
2519 }
2520 }
2521 if (cache_hit)
2522 goto done;
2523 }
2524
2525 /* If we have an odd number, add or subtract one. */
2526 if ((t & 1) != 0)
2527 {
2528 unsigned HOST_WIDE_INT w;
2529
2530 do_alg_addsub_t_m2:
2531 for (w = 1; (w & t) != 0; w <<= 1)
2532 ;
2533 /* If T was -1, then W will be zero after the loop. This is another
2534 case where T ends with ...111. Handling this with (T + 1) and
2535 subtract 1 produces slightly better code and results in algorithm
2536 selection much faster than treating it like the ...0111 case
2537 below. */
2538 if (w == 0
2539 || (w > 2
2540 /* Reject the case where t is 3.
2541 Thus we prefer addition in that case. */
2542 && t != 3))
2543 {
2544 /* T ends with ...111. Multiply by (T + 1) and subtract 1. */
2545
2546 op_cost = add_cost (speed, mode);
2547 new_limit.cost = best_cost.cost - op_cost;
2548 new_limit.latency = best_cost.latency - op_cost;
2549 synth_mult (alg_in, t + 1, &new_limit, mode);
2550
2551 alg_in->cost.cost += op_cost;
2552 alg_in->cost.latency += op_cost;
2553 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2554 {
2555 struct algorithm *x;
2556 best_cost = alg_in->cost;
2557 x = alg_in, alg_in = best_alg, best_alg = x;
2558 best_alg->log[best_alg->ops] = 0;
2559 best_alg->op[best_alg->ops] = alg_sub_t_m2;
2560 }
2561 }
2562 else
2563 {
2564 /* T ends with ...01 or ...011. Multiply by (T - 1) and add 1. */
2565
2566 op_cost = add_cost (speed, mode);
2567 new_limit.cost = best_cost.cost - op_cost;
2568 new_limit.latency = best_cost.latency - op_cost;
2569 synth_mult (alg_in, t - 1, &new_limit, mode);
2570
2571 alg_in->cost.cost += op_cost;
2572 alg_in->cost.latency += op_cost;
2573 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2574 {
2575 struct algorithm *x;
2576 best_cost = alg_in->cost;
2577 x = alg_in, alg_in = best_alg, best_alg = x;
2578 best_alg->log[best_alg->ops] = 0;
2579 best_alg->op[best_alg->ops] = alg_add_t_m2;
2580 }
2581 }
2582
2583 /* We may be able to calculate a * -7, a * -15, a * -31, etc
2584 quickly with a - a * n for some appropriate constant n. */
2585 m = exact_log2 (-orig_t + 1);
2586 if (m >= 0 && m < maxm)
2587 {
2588 op_cost = shiftsub1_cost (speed, mode, m);
2589 new_limit.cost = best_cost.cost - op_cost;
2590 new_limit.latency = best_cost.latency - op_cost;
2591 synth_mult (alg_in, (unsigned HOST_WIDE_INT) (-orig_t + 1) >> m,
2592 &new_limit, mode);
2593
2594 alg_in->cost.cost += op_cost;
2595 alg_in->cost.latency += op_cost;
2596 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2597 {
2598 struct algorithm *x;
2599 best_cost = alg_in->cost;
2600 x = alg_in, alg_in = best_alg, best_alg = x;
2601 best_alg->log[best_alg->ops] = m;
2602 best_alg->op[best_alg->ops] = alg_sub_t_m2;
2603 }
2604 }
2605
2606 if (cache_hit)
2607 goto done;
2608 }
2609
2610 /* Look for factors of t of the form
2611 t = q(2**m +- 1), 2 <= m <= floor(log2(t - 1)).
2612 If we find such a factor, we can multiply by t using an algorithm that
2613 multiplies by q, shift the result by m and add/subtract it to itself.
2614
2615 We search for large factors first and loop down, even if large factors
2616 are less probable than small; if we find a large factor we will find a
2617 good sequence quickly, and therefore be able to prune (by decreasing
2618 COST_LIMIT) the search. */
2619
2620 do_alg_addsub_factor:
2621 for (m = floor_log2 (t - 1); m >= 2; m--)
2622 {
2623 unsigned HOST_WIDE_INT d;
2624
2625 d = ((unsigned HOST_WIDE_INT) 1 << m) + 1;
2626 if (t % d == 0 && t > d && m < maxm
2627 && (!cache_hit || cache_alg == alg_add_factor))
2628 {
2629 /* If the target has a cheap shift-and-add instruction use
2630 that in preference to a shift insn followed by an add insn.
2631 Assume that the shift-and-add is "atomic" with a latency
2632 equal to its cost, otherwise assume that on superscalar
2633 hardware the shift may be executed concurrently with the
2634 earlier steps in the algorithm. */
2635 op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
2636 if (shiftadd_cost (speed, mode, m) < op_cost)
2637 {
2638 op_cost = shiftadd_cost (speed, mode, m);
2639 op_latency = op_cost;
2640 }
2641 else
2642 op_latency = add_cost (speed, mode);
2643
2644 new_limit.cost = best_cost.cost - op_cost;
2645 new_limit.latency = best_cost.latency - op_latency;
2646 synth_mult (alg_in, t / d, &new_limit, mode);
2647
2648 alg_in->cost.cost += op_cost;
2649 alg_in->cost.latency += op_latency;
2650 if (alg_in->cost.latency < op_cost)
2651 alg_in->cost.latency = op_cost;
2652 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2653 {
2654 struct algorithm *x;
2655 best_cost = alg_in->cost;
2656 x = alg_in, alg_in = best_alg, best_alg = x;
2657 best_alg->log[best_alg->ops] = m;
2658 best_alg->op[best_alg->ops] = alg_add_factor;
2659 }
2660 /* Other factors will have been taken care of in the recursion. */
2661 break;
2662 }
2663
2664 d = ((unsigned HOST_WIDE_INT) 1 << m) - 1;
2665 if (t % d == 0 && t > d && m < maxm
2666 && (!cache_hit || cache_alg == alg_sub_factor))
2667 {
2668 /* If the target has a cheap shift-and-subtract insn use
2669 that in preference to a shift insn followed by a sub insn.
2670 Assume that the shift-and-sub is "atomic" with a latency
2671 equal to it's cost, otherwise assume that on superscalar
2672 hardware the shift may be executed concurrently with the
2673 earlier steps in the algorithm. */
2674 op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
2675 if (shiftsub0_cost (speed, mode, m) < op_cost)
2676 {
2677 op_cost = shiftsub0_cost (speed, mode, m);
2678 op_latency = op_cost;
2679 }
2680 else
2681 op_latency = add_cost (speed, mode);
2682
2683 new_limit.cost = best_cost.cost - op_cost;
2684 new_limit.latency = best_cost.latency - op_latency;
2685 synth_mult (alg_in, t / d, &new_limit, mode);
2686
2687 alg_in->cost.cost += op_cost;
2688 alg_in->cost.latency += op_latency;
2689 if (alg_in->cost.latency < op_cost)
2690 alg_in->cost.latency = op_cost;
2691 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2692 {
2693 struct algorithm *x;
2694 best_cost = alg_in->cost;
2695 x = alg_in, alg_in = best_alg, best_alg = x;
2696 best_alg->log[best_alg->ops] = m;
2697 best_alg->op[best_alg->ops] = alg_sub_factor;
2698 }
2699 break;
2700 }
2701 }
2702 if (cache_hit)
2703 goto done;
2704
2705 /* Try shift-and-add (load effective address) instructions,
2706 i.e. do a*3, a*5, a*9. */
2707 if ((t & 1) != 0)
2708 {
2709 do_alg_add_t2_m:
2710 q = t - 1;
2711 q = q & -q;
2712 m = exact_log2 (q);
2713 if (m >= 0 && m < maxm)
2714 {
2715 op_cost = shiftadd_cost (speed, mode, m);
2716 new_limit.cost = best_cost.cost - op_cost;
2717 new_limit.latency = best_cost.latency - op_cost;
2718 synth_mult (alg_in, (t - 1) >> m, &new_limit, mode);
2719
2720 alg_in->cost.cost += op_cost;
2721 alg_in->cost.latency += op_cost;
2722 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2723 {
2724 struct algorithm *x;
2725 best_cost = alg_in->cost;
2726 x = alg_in, alg_in = best_alg, best_alg = x;
2727 best_alg->log[best_alg->ops] = m;
2728 best_alg->op[best_alg->ops] = alg_add_t2_m;
2729 }
2730 }
2731 if (cache_hit)
2732 goto done;
2733
2734 do_alg_sub_t2_m:
2735 q = t + 1;
2736 q = q & -q;
2737 m = exact_log2 (q);
2738 if (m >= 0 && m < maxm)
2739 {
2740 op_cost = shiftsub0_cost (speed, mode, m);
2741 new_limit.cost = best_cost.cost - op_cost;
2742 new_limit.latency = best_cost.latency - op_cost;
2743 synth_mult (alg_in, (t + 1) >> m, &new_limit, mode);
2744
2745 alg_in->cost.cost += op_cost;
2746 alg_in->cost.latency += op_cost;
2747 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2748 {
2749 struct algorithm *x;
2750 best_cost = alg_in->cost;
2751 x = alg_in, alg_in = best_alg, best_alg = x;
2752 best_alg->log[best_alg->ops] = m;
2753 best_alg->op[best_alg->ops] = alg_sub_t2_m;
2754 }
2755 }
2756 if (cache_hit)
2757 goto done;
2758 }
2759
2760 done:
2761 /* If best_cost has not decreased, we have not found any algorithm. */
2762 if (!CHEAPER_MULT_COST (&best_cost, cost_limit))
2763 {
2764 /* We failed to find an algorithm. Record alg_impossible for
2765 this case (that is, <T, MODE, COST_LIMIT>) so that next time
2766 we are asked to find an algorithm for T within the same or
2767 lower COST_LIMIT, we can immediately return to the
2768 caller. */
2769 entry_ptr->t = t;
2770 entry_ptr->mode = mode;
2771 entry_ptr->speed = speed;
2772 entry_ptr->alg = alg_impossible;
2773 entry_ptr->cost = *cost_limit;
2774 return;
2775 }
2776
2777 /* Cache the result. */
2778 if (!cache_hit)
2779 {
2780 entry_ptr->t = t;
2781 entry_ptr->mode = mode;
2782 entry_ptr->speed = speed;
2783 entry_ptr->alg = best_alg->op[best_alg->ops];
2784 entry_ptr->cost.cost = best_cost.cost;
2785 entry_ptr->cost.latency = best_cost.latency;
2786 }
2787
2788 /* If we are getting a too long sequence for `struct algorithm'
2789 to record, make this search fail. */
2790 if (best_alg->ops == MAX_BITS_PER_WORD)
2791 return;
2792
2793 /* Copy the algorithm from temporary space to the space at alg_out.
2794 We avoid using structure assignment because the majority of
2795 best_alg is normally undefined, and this is a critical function. */
2796 alg_out->ops = best_alg->ops + 1;
2797 alg_out->cost = best_cost;
2798 memcpy (alg_out->op, best_alg->op,
2799 alg_out->ops * sizeof *alg_out->op);
2800 memcpy (alg_out->log, best_alg->log,
2801 alg_out->ops * sizeof *alg_out->log);
2802 }
2803 \f
2804 /* Find the cheapest way of multiplying a value of mode MODE by VAL.
2805 Try three variations:
2806
2807 - a shift/add sequence based on VAL itself
2808 - a shift/add sequence based on -VAL, followed by a negation
2809 - a shift/add sequence based on VAL - 1, followed by an addition.
2810
2811 Return true if the cheapest of these cost less than MULT_COST,
2812 describing the algorithm in *ALG and final fixup in *VARIANT. */
2813
2814 static bool
2815 choose_mult_variant (enum machine_mode mode, HOST_WIDE_INT val,
2816 struct algorithm *alg, enum mult_variant *variant,
2817 int mult_cost)
2818 {
2819 struct algorithm alg2;
2820 struct mult_cost limit;
2821 int op_cost;
2822 bool speed = optimize_insn_for_speed_p ();
2823
2824 /* Fail quickly for impossible bounds. */
2825 if (mult_cost < 0)
2826 return false;
2827
2828 /* Ensure that mult_cost provides a reasonable upper bound.
2829 Any constant multiplication can be performed with less
2830 than 2 * bits additions. */
2831 op_cost = 2 * GET_MODE_UNIT_BITSIZE (mode) * add_cost (speed, mode);
2832 if (mult_cost > op_cost)
2833 mult_cost = op_cost;
2834
2835 *variant = basic_variant;
2836 limit.cost = mult_cost;
2837 limit.latency = mult_cost;
2838 synth_mult (alg, val, &limit, mode);
2839
2840 /* This works only if the inverted value actually fits in an
2841 `unsigned int' */
2842 if (HOST_BITS_PER_INT >= GET_MODE_UNIT_BITSIZE (mode))
2843 {
2844 op_cost = neg_cost (speed, mode);
2845 if (MULT_COST_LESS (&alg->cost, mult_cost))
2846 {
2847 limit.cost = alg->cost.cost - op_cost;
2848 limit.latency = alg->cost.latency - op_cost;
2849 }
2850 else
2851 {
2852 limit.cost = mult_cost - op_cost;
2853 limit.latency = mult_cost - op_cost;
2854 }
2855
2856 synth_mult (&alg2, -val, &limit, mode);
2857 alg2.cost.cost += op_cost;
2858 alg2.cost.latency += op_cost;
2859 if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
2860 *alg = alg2, *variant = negate_variant;
2861 }
2862
2863 /* This proves very useful for division-by-constant. */
2864 op_cost = add_cost (speed, mode);
2865 if (MULT_COST_LESS (&alg->cost, mult_cost))
2866 {
2867 limit.cost = alg->cost.cost - op_cost;
2868 limit.latency = alg->cost.latency - op_cost;
2869 }
2870 else
2871 {
2872 limit.cost = mult_cost - op_cost;
2873 limit.latency = mult_cost - op_cost;
2874 }
2875
2876 synth_mult (&alg2, val - 1, &limit, mode);
2877 alg2.cost.cost += op_cost;
2878 alg2.cost.latency += op_cost;
2879 if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
2880 *alg = alg2, *variant = add_variant;
2881
2882 return MULT_COST_LESS (&alg->cost, mult_cost);
2883 }
2884
2885 /* A subroutine of expand_mult, used for constant multiplications.
2886 Multiply OP0 by VAL in mode MODE, storing the result in TARGET if
2887 convenient. Use the shift/add sequence described by ALG and apply
2888 the final fixup specified by VARIANT. */
2889
2890 static rtx
2891 expand_mult_const (enum machine_mode mode, rtx op0, HOST_WIDE_INT val,
2892 rtx target, const struct algorithm *alg,
2893 enum mult_variant variant)
2894 {
2895 HOST_WIDE_INT val_so_far;
2896 rtx insn, accum, tem;
2897 int opno;
2898 enum machine_mode nmode;
2899
2900 /* Avoid referencing memory over and over and invalid sharing
2901 on SUBREGs. */
2902 op0 = force_reg (mode, op0);
2903
2904 /* ACCUM starts out either as OP0 or as a zero, depending on
2905 the first operation. */
2906
2907 if (alg->op[0] == alg_zero)
2908 {
2909 accum = copy_to_mode_reg (mode, CONST0_RTX (mode));
2910 val_so_far = 0;
2911 }
2912 else if (alg->op[0] == alg_m)
2913 {
2914 accum = copy_to_mode_reg (mode, op0);
2915 val_so_far = 1;
2916 }
2917 else
2918 gcc_unreachable ();
2919
2920 for (opno = 1; opno < alg->ops; opno++)
2921 {
2922 int log = alg->log[opno];
2923 rtx shift_subtarget = optimize ? 0 : accum;
2924 rtx add_target
2925 = (opno == alg->ops - 1 && target != 0 && variant != add_variant
2926 && !optimize)
2927 ? target : 0;
2928 rtx accum_target = optimize ? 0 : accum;
2929 rtx accum_inner;
2930
2931 switch (alg->op[opno])
2932 {
2933 case alg_shift:
2934 tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
2935 /* REG_EQUAL note will be attached to the following insn. */
2936 emit_move_insn (accum, tem);
2937 val_so_far <<= log;
2938 break;
2939
2940 case alg_add_t_m2:
2941 tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
2942 accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
2943 add_target ? add_target : accum_target);
2944 val_so_far += (HOST_WIDE_INT) 1 << log;
2945 break;
2946
2947 case alg_sub_t_m2:
2948 tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
2949 accum = force_operand (gen_rtx_MINUS (mode, accum, tem),
2950 add_target ? add_target : accum_target);
2951 val_so_far -= (HOST_WIDE_INT) 1 << log;
2952 break;
2953
2954 case alg_add_t2_m:
2955 accum = expand_shift (LSHIFT_EXPR, mode, accum,
2956 log, shift_subtarget, 0);
2957 accum = force_operand (gen_rtx_PLUS (mode, accum, op0),
2958 add_target ? add_target : accum_target);
2959 val_so_far = (val_so_far << log) + 1;
2960 break;
2961
2962 case alg_sub_t2_m:
2963 accum = expand_shift (LSHIFT_EXPR, mode, accum,
2964 log, shift_subtarget, 0);
2965 accum = force_operand (gen_rtx_MINUS (mode, accum, op0),
2966 add_target ? add_target : accum_target);
2967 val_so_far = (val_so_far << log) - 1;
2968 break;
2969
2970 case alg_add_factor:
2971 tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
2972 accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
2973 add_target ? add_target : accum_target);
2974 val_so_far += val_so_far << log;
2975 break;
2976
2977 case alg_sub_factor:
2978 tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
2979 accum = force_operand (gen_rtx_MINUS (mode, tem, accum),
2980 (add_target
2981 ? add_target : (optimize ? 0 : tem)));
2982 val_so_far = (val_so_far << log) - val_so_far;
2983 break;
2984
2985 default:
2986 gcc_unreachable ();
2987 }
2988
2989 if (SCALAR_INT_MODE_P (mode))
2990 {
2991 /* Write a REG_EQUAL note on the last insn so that we can cse
2992 multiplication sequences. Note that if ACCUM is a SUBREG,
2993 we've set the inner register and must properly indicate that. */
2994 tem = op0, nmode = mode;
2995 accum_inner = accum;
2996 if (GET_CODE (accum) == SUBREG)
2997 {
2998 accum_inner = SUBREG_REG (accum);
2999 nmode = GET_MODE (accum_inner);
3000 tem = gen_lowpart (nmode, op0);
3001 }
3002
3003 insn = get_last_insn ();
3004 set_dst_reg_note (insn, REG_EQUAL,
3005 gen_rtx_MULT (nmode, tem,
3006 gen_int_mode (val_so_far, nmode)),
3007 accum_inner);
3008 }
3009 }
3010
3011 if (variant == negate_variant)
3012 {
3013 val_so_far = -val_so_far;
3014 accum = expand_unop (mode, neg_optab, accum, target, 0);
3015 }
3016 else if (variant == add_variant)
3017 {
3018 val_so_far = val_so_far + 1;
3019 accum = force_operand (gen_rtx_PLUS (mode, accum, op0), target);
3020 }
3021
3022 /* Compare only the bits of val and val_so_far that are significant
3023 in the result mode, to avoid sign-/zero-extension confusion. */
3024 nmode = GET_MODE_INNER (mode);
3025 if (nmode == VOIDmode)
3026 nmode = mode;
3027 val &= GET_MODE_MASK (nmode);
3028 val_so_far &= GET_MODE_MASK (nmode);
3029 gcc_assert (val == val_so_far);
3030
3031 return accum;
3032 }
3033
3034 /* Perform a multiplication and return an rtx for the result.
3035 MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3036 TARGET is a suggestion for where to store the result (an rtx).
3037
3038 We check specially for a constant integer as OP1.
3039 If you want this check for OP0 as well, then before calling
3040 you should swap the two operands if OP0 would be constant. */
3041
3042 rtx
3043 expand_mult (enum machine_mode mode, rtx op0, rtx op1, rtx target,
3044 int unsignedp)
3045 {
3046 enum mult_variant variant;
3047 struct algorithm algorithm;
3048 rtx scalar_op1;
3049 int max_cost;
3050 bool speed = optimize_insn_for_speed_p ();
3051 bool do_trapv = flag_trapv && SCALAR_INT_MODE_P (mode) && !unsignedp;
3052
3053 if (CONSTANT_P (op0))
3054 {
3055 rtx temp = op0;
3056 op0 = op1;
3057 op1 = temp;
3058 }
3059
3060 /* For vectors, there are several simplifications that can be made if
3061 all elements of the vector constant are identical. */
3062 scalar_op1 = op1;
3063 if (GET_CODE (op1) == CONST_VECTOR)
3064 {
3065 int i, n = CONST_VECTOR_NUNITS (op1);
3066 scalar_op1 = CONST_VECTOR_ELT (op1, 0);
3067 for (i = 1; i < n; ++i)
3068 if (!rtx_equal_p (scalar_op1, CONST_VECTOR_ELT (op1, i)))
3069 goto skip_scalar;
3070 }
3071
3072 if (INTEGRAL_MODE_P (mode))
3073 {
3074 rtx fake_reg;
3075 HOST_WIDE_INT coeff;
3076 bool is_neg;
3077 int mode_bitsize;
3078
3079 if (op1 == CONST0_RTX (mode))
3080 return op1;
3081 if (op1 == CONST1_RTX (mode))
3082 return op0;
3083 if (op1 == CONSTM1_RTX (mode))
3084 return expand_unop (mode, do_trapv ? negv_optab : neg_optab,
3085 op0, target, 0);
3086
3087 if (do_trapv)
3088 goto skip_synth;
3089
3090 /* These are the operations that are potentially turned into
3091 a sequence of shifts and additions. */
3092 mode_bitsize = GET_MODE_UNIT_BITSIZE (mode);
3093
3094 /* synth_mult does an `unsigned int' multiply. As long as the mode is
3095 less than or equal in size to `unsigned int' this doesn't matter.
3096 If the mode is larger than `unsigned int', then synth_mult works
3097 only if the constant value exactly fits in an `unsigned int' without
3098 any truncation. This means that multiplying by negative values does
3099 not work; results are off by 2^32 on a 32 bit machine. */
3100
3101 if (CONST_INT_P (scalar_op1))
3102 {
3103 coeff = INTVAL (scalar_op1);
3104 is_neg = coeff < 0;
3105 }
3106 else if (CONST_DOUBLE_AS_INT_P (scalar_op1))
3107 {
3108 /* If we are multiplying in DImode, it may still be a win
3109 to try to work with shifts and adds. */
3110 if (CONST_DOUBLE_HIGH (scalar_op1) == 0
3111 && (CONST_DOUBLE_LOW (scalar_op1) > 0
3112 || (CONST_DOUBLE_LOW (scalar_op1) < 0
3113 && EXACT_POWER_OF_2_OR_ZERO_P
3114 (CONST_DOUBLE_LOW (scalar_op1)))))
3115 {
3116 coeff = CONST_DOUBLE_LOW (scalar_op1);
3117 is_neg = false;
3118 }
3119 else if (CONST_DOUBLE_LOW (scalar_op1) == 0)
3120 {
3121 coeff = CONST_DOUBLE_HIGH (scalar_op1);
3122 if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3123 {
3124 int shift = floor_log2 (coeff) + HOST_BITS_PER_WIDE_INT;
3125 if (shift < HOST_BITS_PER_DOUBLE_INT - 1
3126 || mode_bitsize <= HOST_BITS_PER_DOUBLE_INT)
3127 return expand_shift (LSHIFT_EXPR, mode, op0,
3128 shift, target, unsignedp);
3129 }
3130 goto skip_synth;
3131 }
3132 else
3133 goto skip_synth;
3134 }
3135 else
3136 goto skip_synth;
3137
3138 /* We used to test optimize here, on the grounds that it's better to
3139 produce a smaller program when -O is not used. But this causes
3140 such a terrible slowdown sometimes that it seems better to always
3141 use synth_mult. */
3142
3143 /* Special case powers of two. */
3144 if (EXACT_POWER_OF_2_OR_ZERO_P (coeff)
3145 && !(is_neg && mode_bitsize > HOST_BITS_PER_WIDE_INT))
3146 return expand_shift (LSHIFT_EXPR, mode, op0,
3147 floor_log2 (coeff), target, unsignedp);
3148
3149 fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3150
3151 /* Attempt to handle multiplication of DImode values by negative
3152 coefficients, by performing the multiplication by a positive
3153 multiplier and then inverting the result. */
3154 if (is_neg && mode_bitsize > HOST_BITS_PER_WIDE_INT)
3155 {
3156 /* Its safe to use -coeff even for INT_MIN, as the
3157 result is interpreted as an unsigned coefficient.
3158 Exclude cost of op0 from max_cost to match the cost
3159 calculation of the synth_mult. */
3160 coeff = -(unsigned HOST_WIDE_INT) coeff;
3161 max_cost = (set_src_cost (gen_rtx_MULT (mode, fake_reg, op1), speed)
3162 - neg_cost (speed, mode));
3163 if (max_cost <= 0)
3164 goto skip_synth;
3165
3166 /* Special case powers of two. */
3167 if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3168 {
3169 rtx temp = expand_shift (LSHIFT_EXPR, mode, op0,
3170 floor_log2 (coeff), target, unsignedp);
3171 return expand_unop (mode, neg_optab, temp, target, 0);
3172 }
3173
3174 if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3175 max_cost))
3176 {
3177 rtx temp = expand_mult_const (mode, op0, coeff, NULL_RTX,
3178 &algorithm, variant);
3179 return expand_unop (mode, neg_optab, temp, target, 0);
3180 }
3181 goto skip_synth;
3182 }
3183
3184 /* Exclude cost of op0 from max_cost to match the cost
3185 calculation of the synth_mult. */
3186 max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, op1), speed);
3187 if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost))
3188 return expand_mult_const (mode, op0, coeff, target,
3189 &algorithm, variant);
3190 }
3191 skip_synth:
3192
3193 /* Expand x*2.0 as x+x. */
3194 if (CONST_DOUBLE_AS_FLOAT_P (scalar_op1))
3195 {
3196 REAL_VALUE_TYPE d;
3197 REAL_VALUE_FROM_CONST_DOUBLE (d, scalar_op1);
3198
3199 if (REAL_VALUES_EQUAL (d, dconst2))
3200 {
3201 op0 = force_reg (GET_MODE (op0), op0);
3202 return expand_binop (mode, add_optab, op0, op0,
3203 target, unsignedp, OPTAB_LIB_WIDEN);
3204 }
3205 }
3206 skip_scalar:
3207
3208 /* This used to use umul_optab if unsigned, but for non-widening multiply
3209 there is no difference between signed and unsigned. */
3210 op0 = expand_binop (mode, do_trapv ? smulv_optab : smul_optab,
3211 op0, op1, target, unsignedp, OPTAB_LIB_WIDEN);
3212 gcc_assert (op0);
3213 return op0;
3214 }
3215
3216 /* Return a cost estimate for multiplying a register by the given
3217 COEFFicient in the given MODE and SPEED. */
3218
3219 int
3220 mult_by_coeff_cost (HOST_WIDE_INT coeff, enum machine_mode mode, bool speed)
3221 {
3222 int max_cost;
3223 struct algorithm algorithm;
3224 enum mult_variant variant;
3225
3226 rtx fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3227 max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, fake_reg), speed);
3228 if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost))
3229 return algorithm.cost.cost;
3230 else
3231 return max_cost;
3232 }
3233
3234 /* Perform a widening multiplication and return an rtx for the result.
3235 MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3236 TARGET is a suggestion for where to store the result (an rtx).
3237 THIS_OPTAB is the optab we should use, it must be either umul_widen_optab
3238 or smul_widen_optab.
3239
3240 We check specially for a constant integer as OP1, comparing the
3241 cost of a widening multiply against the cost of a sequence of shifts
3242 and adds. */
3243
3244 rtx
3245 expand_widening_mult (enum machine_mode mode, rtx op0, rtx op1, rtx target,
3246 int unsignedp, optab this_optab)
3247 {
3248 bool speed = optimize_insn_for_speed_p ();
3249 rtx cop1;
3250
3251 if (CONST_INT_P (op1)
3252 && GET_MODE (op0) != VOIDmode
3253 && (cop1 = convert_modes (mode, GET_MODE (op0), op1,
3254 this_optab == umul_widen_optab))
3255 && CONST_INT_P (cop1)
3256 && (INTVAL (cop1) >= 0
3257 || HWI_COMPUTABLE_MODE_P (mode)))
3258 {
3259 HOST_WIDE_INT coeff = INTVAL (cop1);
3260 int max_cost;
3261 enum mult_variant variant;
3262 struct algorithm algorithm;
3263
3264 /* Special case powers of two. */
3265 if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3266 {
3267 op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3268 return expand_shift (LSHIFT_EXPR, mode, op0,
3269 floor_log2 (coeff), target, unsignedp);
3270 }
3271
3272 /* Exclude cost of op0 from max_cost to match the cost
3273 calculation of the synth_mult. */
3274 max_cost = mul_widen_cost (speed, mode);
3275 if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3276 max_cost))
3277 {
3278 op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3279 return expand_mult_const (mode, op0, coeff, target,
3280 &algorithm, variant);
3281 }
3282 }
3283 return expand_binop (mode, this_optab, op0, op1, target,
3284 unsignedp, OPTAB_LIB_WIDEN);
3285 }
3286 \f
3287 /* Choose a minimal N + 1 bit approximation to 1/D that can be used to
3288 replace division by D, and put the least significant N bits of the result
3289 in *MULTIPLIER_PTR and return the most significant bit.
3290
3291 The width of operations is N (should be <= HOST_BITS_PER_WIDE_INT), the
3292 needed precision is in PRECISION (should be <= N).
3293
3294 PRECISION should be as small as possible so this function can choose
3295 multiplier more freely.
3296
3297 The rounded-up logarithm of D is placed in *lgup_ptr. A shift count that
3298 is to be used for a final right shift is placed in *POST_SHIFT_PTR.
3299
3300 Using this function, x/D will be equal to (x * m) >> (*POST_SHIFT_PTR),
3301 where m is the full HOST_BITS_PER_WIDE_INT + 1 bit multiplier. */
3302
3303 unsigned HOST_WIDE_INT
3304 choose_multiplier (unsigned HOST_WIDE_INT d, int n, int precision,
3305 unsigned HOST_WIDE_INT *multiplier_ptr,
3306 int *post_shift_ptr, int *lgup_ptr)
3307 {
3308 double_int mhigh, mlow;
3309 int lgup, post_shift;
3310 int pow, pow2;
3311
3312 /* lgup = ceil(log2(divisor)); */
3313 lgup = ceil_log2 (d);
3314
3315 gcc_assert (lgup <= n);
3316
3317 pow = n + lgup;
3318 pow2 = n + lgup - precision;
3319
3320 /* We could handle this with some effort, but this case is much
3321 better handled directly with a scc insn, so rely on caller using
3322 that. */
3323 gcc_assert (pow != HOST_BITS_PER_DOUBLE_INT);
3324
3325 /* mlow = 2^(N + lgup)/d */
3326 double_int val = double_int_zero.set_bit (pow);
3327 mlow = val.div (double_int::from_uhwi (d), true, TRUNC_DIV_EXPR);
3328
3329 /* mhigh = (2^(N + lgup) + 2^(N + lgup - precision))/d */
3330 val |= double_int_zero.set_bit (pow2);
3331 mhigh = val.div (double_int::from_uhwi (d), true, TRUNC_DIV_EXPR);
3332
3333 gcc_assert (!mhigh.high || val.high - d < d);
3334 gcc_assert (mhigh.high <= 1 && mlow.high <= 1);
3335 /* Assert that mlow < mhigh. */
3336 gcc_assert (mlow.ult (mhigh));
3337
3338 /* If precision == N, then mlow, mhigh exceed 2^N
3339 (but they do not exceed 2^(N+1)). */
3340
3341 /* Reduce to lowest terms. */
3342 for (post_shift = lgup; post_shift > 0; post_shift--)
3343 {
3344 int shft = HOST_BITS_PER_WIDE_INT - 1;
3345 unsigned HOST_WIDE_INT ml_lo = (mlow.high << shft) | (mlow.low >> 1);
3346 unsigned HOST_WIDE_INT mh_lo = (mhigh.high << shft) | (mhigh.low >> 1);
3347 if (ml_lo >= mh_lo)
3348 break;
3349
3350 mlow = double_int::from_uhwi (ml_lo);
3351 mhigh = double_int::from_uhwi (mh_lo);
3352 }
3353
3354 *post_shift_ptr = post_shift;
3355 *lgup_ptr = lgup;
3356 if (n < HOST_BITS_PER_WIDE_INT)
3357 {
3358 unsigned HOST_WIDE_INT mask = ((unsigned HOST_WIDE_INT) 1 << n) - 1;
3359 *multiplier_ptr = mhigh.low & mask;
3360 return mhigh.low >= mask;
3361 }
3362 else
3363 {
3364 *multiplier_ptr = mhigh.low;
3365 return mhigh.high;
3366 }
3367 }
3368
3369 /* Compute the inverse of X mod 2**n, i.e., find Y such that X * Y is
3370 congruent to 1 (mod 2**N). */
3371
3372 static unsigned HOST_WIDE_INT
3373 invert_mod2n (unsigned HOST_WIDE_INT x, int n)
3374 {
3375 /* Solve x*y == 1 (mod 2^n), where x is odd. Return y. */
3376
3377 /* The algorithm notes that the choice y = x satisfies
3378 x*y == 1 mod 2^3, since x is assumed odd.
3379 Each iteration doubles the number of bits of significance in y. */
3380
3381 unsigned HOST_WIDE_INT mask;
3382 unsigned HOST_WIDE_INT y = x;
3383 int nbit = 3;
3384
3385 mask = (n == HOST_BITS_PER_WIDE_INT
3386 ? ~(unsigned HOST_WIDE_INT) 0
3387 : ((unsigned HOST_WIDE_INT) 1 << n) - 1);
3388
3389 while (nbit < n)
3390 {
3391 y = y * (2 - x*y) & mask; /* Modulo 2^N */
3392 nbit *= 2;
3393 }
3394 return y;
3395 }
3396
3397 /* Emit code to adjust ADJ_OPERAND after multiplication of wrong signedness
3398 flavor of OP0 and OP1. ADJ_OPERAND is already the high half of the
3399 product OP0 x OP1. If UNSIGNEDP is nonzero, adjust the signed product
3400 to become unsigned, if UNSIGNEDP is zero, adjust the unsigned product to
3401 become signed.
3402
3403 The result is put in TARGET if that is convenient.
3404
3405 MODE is the mode of operation. */
3406
3407 rtx
3408 expand_mult_highpart_adjust (enum machine_mode mode, rtx adj_operand, rtx op0,
3409 rtx op1, rtx target, int unsignedp)
3410 {
3411 rtx tem;
3412 enum rtx_code adj_code = unsignedp ? PLUS : MINUS;
3413
3414 tem = expand_shift (RSHIFT_EXPR, mode, op0,
3415 GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3416 tem = expand_and (mode, tem, op1, NULL_RTX);
3417 adj_operand
3418 = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3419 adj_operand);
3420
3421 tem = expand_shift (RSHIFT_EXPR, mode, op1,
3422 GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3423 tem = expand_and (mode, tem, op0, NULL_RTX);
3424 target = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3425 target);
3426
3427 return target;
3428 }
3429
3430 /* Subroutine of expmed_mult_highpart. Return the MODE high part of OP. */
3431
3432 static rtx
3433 extract_high_half (enum machine_mode mode, rtx op)
3434 {
3435 enum machine_mode wider_mode;
3436
3437 if (mode == word_mode)
3438 return gen_highpart (mode, op);
3439
3440 gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3441
3442 wider_mode = GET_MODE_WIDER_MODE (mode);
3443 op = expand_shift (RSHIFT_EXPR, wider_mode, op,
3444 GET_MODE_BITSIZE (mode), 0, 1);
3445 return convert_modes (mode, wider_mode, op, 0);
3446 }
3447
3448 /* Like expmed_mult_highpart, but only consider using a multiplication
3449 optab. OP1 is an rtx for the constant operand. */
3450
3451 static rtx
3452 expmed_mult_highpart_optab (enum machine_mode mode, rtx op0, rtx op1,
3453 rtx target, int unsignedp, int max_cost)
3454 {
3455 rtx narrow_op1 = gen_int_mode (INTVAL (op1), mode);
3456 enum machine_mode wider_mode;
3457 optab moptab;
3458 rtx tem;
3459 int size;
3460 bool speed = optimize_insn_for_speed_p ();
3461
3462 gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3463
3464 wider_mode = GET_MODE_WIDER_MODE (mode);
3465 size = GET_MODE_BITSIZE (mode);
3466
3467 /* Firstly, try using a multiplication insn that only generates the needed
3468 high part of the product, and in the sign flavor of unsignedp. */
3469 if (mul_highpart_cost (speed, mode) < max_cost)
3470 {
3471 moptab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
3472 tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3473 unsignedp, OPTAB_DIRECT);
3474 if (tem)
3475 return tem;
3476 }
3477
3478 /* Secondly, same as above, but use sign flavor opposite of unsignedp.
3479 Need to adjust the result after the multiplication. */
3480 if (size - 1 < BITS_PER_WORD
3481 && (mul_highpart_cost (speed, mode)
3482 + 2 * shift_cost (speed, mode, size-1)
3483 + 4 * add_cost (speed, mode) < max_cost))
3484 {
3485 moptab = unsignedp ? smul_highpart_optab : umul_highpart_optab;
3486 tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3487 unsignedp, OPTAB_DIRECT);
3488 if (tem)
3489 /* We used the wrong signedness. Adjust the result. */
3490 return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3491 tem, unsignedp);
3492 }
3493
3494 /* Try widening multiplication. */
3495 moptab = unsignedp ? umul_widen_optab : smul_widen_optab;
3496 if (widening_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3497 && mul_widen_cost (speed, wider_mode) < max_cost)
3498 {
3499 tem = expand_binop (wider_mode, moptab, op0, narrow_op1, 0,
3500 unsignedp, OPTAB_WIDEN);
3501 if (tem)
3502 return extract_high_half (mode, tem);
3503 }
3504
3505 /* Try widening the mode and perform a non-widening multiplication. */
3506 if (optab_handler (smul_optab, wider_mode) != CODE_FOR_nothing
3507 && size - 1 < BITS_PER_WORD
3508 && (mul_cost (speed, wider_mode) + shift_cost (speed, mode, size-1)
3509 < max_cost))
3510 {
3511 rtx insns, wop0, wop1;
3512
3513 /* We need to widen the operands, for example to ensure the
3514 constant multiplier is correctly sign or zero extended.
3515 Use a sequence to clean-up any instructions emitted by
3516 the conversions if things don't work out. */
3517 start_sequence ();
3518 wop0 = convert_modes (wider_mode, mode, op0, unsignedp);
3519 wop1 = convert_modes (wider_mode, mode, op1, unsignedp);
3520 tem = expand_binop (wider_mode, smul_optab, wop0, wop1, 0,
3521 unsignedp, OPTAB_WIDEN);
3522 insns = get_insns ();
3523 end_sequence ();
3524
3525 if (tem)
3526 {
3527 emit_insn (insns);
3528 return extract_high_half (mode, tem);
3529 }
3530 }
3531
3532 /* Try widening multiplication of opposite signedness, and adjust. */
3533 moptab = unsignedp ? smul_widen_optab : umul_widen_optab;
3534 if (widening_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3535 && size - 1 < BITS_PER_WORD
3536 && (mul_widen_cost (speed, wider_mode)
3537 + 2 * shift_cost (speed, mode, size-1)
3538 + 4 * add_cost (speed, mode) < max_cost))
3539 {
3540 tem = expand_binop (wider_mode, moptab, op0, narrow_op1,
3541 NULL_RTX, ! unsignedp, OPTAB_WIDEN);
3542 if (tem != 0)
3543 {
3544 tem = extract_high_half (mode, tem);
3545 /* We used the wrong signedness. Adjust the result. */
3546 return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3547 target, unsignedp);
3548 }
3549 }
3550
3551 return 0;
3552 }
3553
3554 /* Emit code to multiply OP0 and OP1 (where OP1 is an integer constant),
3555 putting the high half of the result in TARGET if that is convenient,
3556 and return where the result is. If the operation can not be performed,
3557 0 is returned.
3558
3559 MODE is the mode of operation and result.
3560
3561 UNSIGNEDP nonzero means unsigned multiply.
3562
3563 MAX_COST is the total allowed cost for the expanded RTL. */
3564
3565 static rtx
3566 expmed_mult_highpart (enum machine_mode mode, rtx op0, rtx op1,
3567 rtx target, int unsignedp, int max_cost)
3568 {
3569 enum machine_mode wider_mode = GET_MODE_WIDER_MODE (mode);
3570 unsigned HOST_WIDE_INT cnst1;
3571 int extra_cost;
3572 bool sign_adjust = false;
3573 enum mult_variant variant;
3574 struct algorithm alg;
3575 rtx tem;
3576 bool speed = optimize_insn_for_speed_p ();
3577
3578 gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3579 /* We can't support modes wider than HOST_BITS_PER_INT. */
3580 gcc_assert (HWI_COMPUTABLE_MODE_P (mode));
3581
3582 cnst1 = INTVAL (op1) & GET_MODE_MASK (mode);
3583
3584 /* We can't optimize modes wider than BITS_PER_WORD.
3585 ??? We might be able to perform double-word arithmetic if
3586 mode == word_mode, however all the cost calculations in
3587 synth_mult etc. assume single-word operations. */
3588 if (GET_MODE_BITSIZE (wider_mode) > BITS_PER_WORD)
3589 return expmed_mult_highpart_optab (mode, op0, op1, target,
3590 unsignedp, max_cost);
3591
3592 extra_cost = shift_cost (speed, mode, GET_MODE_BITSIZE (mode) - 1);
3593
3594 /* Check whether we try to multiply by a negative constant. */
3595 if (!unsignedp && ((cnst1 >> (GET_MODE_BITSIZE (mode) - 1)) & 1))
3596 {
3597 sign_adjust = true;
3598 extra_cost += add_cost (speed, mode);
3599 }
3600
3601 /* See whether shift/add multiplication is cheap enough. */
3602 if (choose_mult_variant (wider_mode, cnst1, &alg, &variant,
3603 max_cost - extra_cost))
3604 {
3605 /* See whether the specialized multiplication optabs are
3606 cheaper than the shift/add version. */
3607 tem = expmed_mult_highpart_optab (mode, op0, op1, target, unsignedp,
3608 alg.cost.cost + extra_cost);
3609 if (tem)
3610 return tem;
3611
3612 tem = convert_to_mode (wider_mode, op0, unsignedp);
3613 tem = expand_mult_const (wider_mode, tem, cnst1, 0, &alg, variant);
3614 tem = extract_high_half (mode, tem);
3615
3616 /* Adjust result for signedness. */
3617 if (sign_adjust)
3618 tem = force_operand (gen_rtx_MINUS (mode, tem, op0), tem);
3619
3620 return tem;
3621 }
3622 return expmed_mult_highpart_optab (mode, op0, op1, target,
3623 unsignedp, max_cost);
3624 }
3625
3626
3627 /* Expand signed modulus of OP0 by a power of two D in mode MODE. */
3628
3629 static rtx
3630 expand_smod_pow2 (enum machine_mode mode, rtx op0, HOST_WIDE_INT d)
3631 {
3632 unsigned HOST_WIDE_INT masklow, maskhigh;
3633 rtx result, temp, shift, label;
3634 int logd;
3635
3636 logd = floor_log2 (d);
3637 result = gen_reg_rtx (mode);
3638
3639 /* Avoid conditional branches when they're expensive. */
3640 if (BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2
3641 && optimize_insn_for_speed_p ())
3642 {
3643 rtx signmask = emit_store_flag (result, LT, op0, const0_rtx,
3644 mode, 0, -1);
3645 if (signmask)
3646 {
3647 signmask = force_reg (mode, signmask);
3648 masklow = ((HOST_WIDE_INT) 1 << logd) - 1;
3649 shift = GEN_INT (GET_MODE_BITSIZE (mode) - logd);
3650
3651 /* Use the rtx_cost of a LSHIFTRT instruction to determine
3652 which instruction sequence to use. If logical right shifts
3653 are expensive the use 2 XORs, 2 SUBs and an AND, otherwise
3654 use a LSHIFTRT, 1 ADD, 1 SUB and an AND. */
3655
3656 temp = gen_rtx_LSHIFTRT (mode, result, shift);
3657 if (optab_handler (lshr_optab, mode) == CODE_FOR_nothing
3658 || (set_src_cost (temp, optimize_insn_for_speed_p ())
3659 > COSTS_N_INSNS (2)))
3660 {
3661 temp = expand_binop (mode, xor_optab, op0, signmask,
3662 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3663 temp = expand_binop (mode, sub_optab, temp, signmask,
3664 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3665 temp = expand_binop (mode, and_optab, temp,
3666 gen_int_mode (masklow, mode),
3667 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3668 temp = expand_binop (mode, xor_optab, temp, signmask,
3669 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3670 temp = expand_binop (mode, sub_optab, temp, signmask,
3671 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3672 }
3673 else
3674 {
3675 signmask = expand_binop (mode, lshr_optab, signmask, shift,
3676 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3677 signmask = force_reg (mode, signmask);
3678
3679 temp = expand_binop (mode, add_optab, op0, signmask,
3680 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3681 temp = expand_binop (mode, and_optab, temp,
3682 gen_int_mode (masklow, mode),
3683 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3684 temp = expand_binop (mode, sub_optab, temp, signmask,
3685 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3686 }
3687 return temp;
3688 }
3689 }
3690
3691 /* Mask contains the mode's signbit and the significant bits of the
3692 modulus. By including the signbit in the operation, many targets
3693 can avoid an explicit compare operation in the following comparison
3694 against zero. */
3695
3696 masklow = ((HOST_WIDE_INT) 1 << logd) - 1;
3697 if (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
3698 {
3699 masklow |= HOST_WIDE_INT_M1U << (GET_MODE_BITSIZE (mode) - 1);
3700 maskhigh = -1;
3701 }
3702 else
3703 maskhigh = HOST_WIDE_INT_M1U
3704 << (GET_MODE_BITSIZE (mode) - HOST_BITS_PER_WIDE_INT - 1);
3705
3706 temp = expand_binop (mode, and_optab, op0,
3707 immed_double_const (masklow, maskhigh, mode),
3708 result, 1, OPTAB_LIB_WIDEN);
3709 if (temp != result)
3710 emit_move_insn (result, temp);
3711
3712 label = gen_label_rtx ();
3713 do_cmp_and_jump (result, const0_rtx, GE, mode, label);
3714
3715 temp = expand_binop (mode, sub_optab, result, const1_rtx, result,
3716 0, OPTAB_LIB_WIDEN);
3717 masklow = HOST_WIDE_INT_M1U << logd;
3718 maskhigh = -1;
3719 temp = expand_binop (mode, ior_optab, temp,
3720 immed_double_const (masklow, maskhigh, mode),
3721 result, 1, OPTAB_LIB_WIDEN);
3722 temp = expand_binop (mode, add_optab, temp, const1_rtx, result,
3723 0, OPTAB_LIB_WIDEN);
3724 if (temp != result)
3725 emit_move_insn (result, temp);
3726 emit_label (label);
3727 return result;
3728 }
3729
3730 /* Expand signed division of OP0 by a power of two D in mode MODE.
3731 This routine is only called for positive values of D. */
3732
3733 static rtx
3734 expand_sdiv_pow2 (enum machine_mode mode, rtx op0, HOST_WIDE_INT d)
3735 {
3736 rtx temp, label;
3737 int logd;
3738
3739 logd = floor_log2 (d);
3740
3741 if (d == 2
3742 && BRANCH_COST (optimize_insn_for_speed_p (),
3743 false) >= 1)
3744 {
3745 temp = gen_reg_rtx (mode);
3746 temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, 1);
3747 temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3748 0, OPTAB_LIB_WIDEN);
3749 return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3750 }
3751
3752 #ifdef HAVE_conditional_move
3753 if (BRANCH_COST (optimize_insn_for_speed_p (), false)
3754 >= 2)
3755 {
3756 rtx temp2;
3757
3758 start_sequence ();
3759 temp2 = copy_to_mode_reg (mode, op0);
3760 temp = expand_binop (mode, add_optab, temp2, gen_int_mode (d - 1, mode),
3761 NULL_RTX, 0, OPTAB_LIB_WIDEN);
3762 temp = force_reg (mode, temp);
3763
3764 /* Construct "temp2 = (temp2 < 0) ? temp : temp2". */
3765 temp2 = emit_conditional_move (temp2, LT, temp2, const0_rtx,
3766 mode, temp, temp2, mode, 0);
3767 if (temp2)
3768 {
3769 rtx seq = get_insns ();
3770 end_sequence ();
3771 emit_insn (seq);
3772 return expand_shift (RSHIFT_EXPR, mode, temp2, logd, NULL_RTX, 0);
3773 }
3774 end_sequence ();
3775 }
3776 #endif
3777
3778 if (BRANCH_COST (optimize_insn_for_speed_p (),
3779 false) >= 2)
3780 {
3781 int ushift = GET_MODE_BITSIZE (mode) - logd;
3782
3783 temp = gen_reg_rtx (mode);
3784 temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, -1);
3785 if (shift_cost (optimize_insn_for_speed_p (), mode, ushift)
3786 > COSTS_N_INSNS (1))
3787 temp = expand_binop (mode, and_optab, temp, gen_int_mode (d - 1, mode),
3788 NULL_RTX, 0, OPTAB_LIB_WIDEN);
3789 else
3790 temp = expand_shift (RSHIFT_EXPR, mode, temp,
3791 ushift, NULL_RTX, 1);
3792 temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3793 0, OPTAB_LIB_WIDEN);
3794 return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3795 }
3796
3797 label = gen_label_rtx ();
3798 temp = copy_to_mode_reg (mode, op0);
3799 do_cmp_and_jump (temp, const0_rtx, GE, mode, label);
3800 expand_inc (temp, gen_int_mode (d - 1, mode));
3801 emit_label (label);
3802 return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3803 }
3804 \f
3805 /* Emit the code to divide OP0 by OP1, putting the result in TARGET
3806 if that is convenient, and returning where the result is.
3807 You may request either the quotient or the remainder as the result;
3808 specify REM_FLAG nonzero to get the remainder.
3809
3810 CODE is the expression code for which kind of division this is;
3811 it controls how rounding is done. MODE is the machine mode to use.
3812 UNSIGNEDP nonzero means do unsigned division. */
3813
3814 /* ??? For CEIL_MOD_EXPR, can compute incorrect remainder with ANDI
3815 and then correct it by or'ing in missing high bits
3816 if result of ANDI is nonzero.
3817 For ROUND_MOD_EXPR, can use ANDI and then sign-extend the result.
3818 This could optimize to a bfexts instruction.
3819 But C doesn't use these operations, so their optimizations are
3820 left for later. */
3821 /* ??? For modulo, we don't actually need the highpart of the first product,
3822 the low part will do nicely. And for small divisors, the second multiply
3823 can also be a low-part only multiply or even be completely left out.
3824 E.g. to calculate the remainder of a division by 3 with a 32 bit
3825 multiply, multiply with 0x55555556 and extract the upper two bits;
3826 the result is exact for inputs up to 0x1fffffff.
3827 The input range can be reduced by using cross-sum rules.
3828 For odd divisors >= 3, the following table gives right shift counts
3829 so that if a number is shifted by an integer multiple of the given
3830 amount, the remainder stays the same:
3831 2, 4, 3, 6, 10, 12, 4, 8, 18, 6, 11, 20, 18, 0, 5, 10, 12, 0, 12, 20,
3832 14, 12, 23, 21, 8, 0, 20, 18, 0, 0, 6, 12, 0, 22, 0, 18, 20, 30, 0, 0,
3833 0, 8, 0, 11, 12, 10, 36, 0, 30, 0, 0, 12, 0, 0, 0, 0, 44, 12, 24, 0,
3834 20, 0, 7, 14, 0, 18, 36, 0, 0, 46, 60, 0, 42, 0, 15, 24, 20, 0, 0, 33,
3835 0, 20, 0, 0, 18, 0, 60, 0, 0, 0, 0, 0, 40, 18, 0, 0, 12
3836
3837 Cross-sum rules for even numbers can be derived by leaving as many bits
3838 to the right alone as the divisor has zeros to the right.
3839 E.g. if x is an unsigned 32 bit number:
3840 (x mod 12) == (((x & 1023) + ((x >> 8) & ~3)) * 0x15555558 >> 2 * 3) >> 28
3841 */
3842
3843 rtx
3844 expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode,
3845 rtx op0, rtx op1, rtx target, int unsignedp)
3846 {
3847 enum machine_mode compute_mode;
3848 rtx tquotient;
3849 rtx quotient = 0, remainder = 0;
3850 rtx last;
3851 int size;
3852 rtx insn;
3853 optab optab1, optab2;
3854 int op1_is_constant, op1_is_pow2 = 0;
3855 int max_cost, extra_cost;
3856 static HOST_WIDE_INT last_div_const = 0;
3857 bool speed = optimize_insn_for_speed_p ();
3858
3859 op1_is_constant = CONST_INT_P (op1);
3860 if (op1_is_constant)
3861 {
3862 unsigned HOST_WIDE_INT ext_op1 = UINTVAL (op1);
3863 if (unsignedp)
3864 ext_op1 &= GET_MODE_MASK (mode);
3865 op1_is_pow2 = ((EXACT_POWER_OF_2_OR_ZERO_P (ext_op1)
3866 || (! unsignedp && EXACT_POWER_OF_2_OR_ZERO_P (-ext_op1))));
3867 }
3868
3869 /*
3870 This is the structure of expand_divmod:
3871
3872 First comes code to fix up the operands so we can perform the operations
3873 correctly and efficiently.
3874
3875 Second comes a switch statement with code specific for each rounding mode.
3876 For some special operands this code emits all RTL for the desired
3877 operation, for other cases, it generates only a quotient and stores it in
3878 QUOTIENT. The case for trunc division/remainder might leave quotient = 0,
3879 to indicate that it has not done anything.
3880
3881 Last comes code that finishes the operation. If QUOTIENT is set and
3882 REM_FLAG is set, the remainder is computed as OP0 - QUOTIENT * OP1. If
3883 QUOTIENT is not set, it is computed using trunc rounding.
3884
3885 We try to generate special code for division and remainder when OP1 is a
3886 constant. If |OP1| = 2**n we can use shifts and some other fast
3887 operations. For other values of OP1, we compute a carefully selected
3888 fixed-point approximation m = 1/OP1, and generate code that multiplies OP0
3889 by m.
3890
3891 In all cases but EXACT_DIV_EXPR, this multiplication requires the upper
3892 half of the product. Different strategies for generating the product are
3893 implemented in expmed_mult_highpart.
3894
3895 If what we actually want is the remainder, we generate that by another
3896 by-constant multiplication and a subtraction. */
3897
3898 /* We shouldn't be called with OP1 == const1_rtx, but some of the
3899 code below will malfunction if we are, so check here and handle
3900 the special case if so. */
3901 if (op1 == const1_rtx)
3902 return rem_flag ? const0_rtx : op0;
3903
3904 /* When dividing by -1, we could get an overflow.
3905 negv_optab can handle overflows. */
3906 if (! unsignedp && op1 == constm1_rtx)
3907 {
3908 if (rem_flag)
3909 return const0_rtx;
3910 return expand_unop (mode, flag_trapv && GET_MODE_CLASS (mode) == MODE_INT
3911 ? negv_optab : neg_optab, op0, target, 0);
3912 }
3913
3914 if (target
3915 /* Don't use the function value register as a target
3916 since we have to read it as well as write it,
3917 and function-inlining gets confused by this. */
3918 && ((REG_P (target) && REG_FUNCTION_VALUE_P (target))
3919 /* Don't clobber an operand while doing a multi-step calculation. */
3920 || ((rem_flag || op1_is_constant)
3921 && (reg_mentioned_p (target, op0)
3922 || (MEM_P (op0) && MEM_P (target))))
3923 || reg_mentioned_p (target, op1)
3924 || (MEM_P (op1) && MEM_P (target))))
3925 target = 0;
3926
3927 /* Get the mode in which to perform this computation. Normally it will
3928 be MODE, but sometimes we can't do the desired operation in MODE.
3929 If so, pick a wider mode in which we can do the operation. Convert
3930 to that mode at the start to avoid repeated conversions.
3931
3932 First see what operations we need. These depend on the expression
3933 we are evaluating. (We assume that divxx3 insns exist under the
3934 same conditions that modxx3 insns and that these insns don't normally
3935 fail. If these assumptions are not correct, we may generate less
3936 efficient code in some cases.)
3937
3938 Then see if we find a mode in which we can open-code that operation
3939 (either a division, modulus, or shift). Finally, check for the smallest
3940 mode for which we can do the operation with a library call. */
3941
3942 /* We might want to refine this now that we have division-by-constant
3943 optimization. Since expmed_mult_highpart tries so many variants, it is
3944 not straightforward to generalize this. Maybe we should make an array
3945 of possible modes in init_expmed? Save this for GCC 2.7. */
3946
3947 optab1 = ((op1_is_pow2 && op1 != const0_rtx)
3948 ? (unsignedp ? lshr_optab : ashr_optab)
3949 : (unsignedp ? udiv_optab : sdiv_optab));
3950 optab2 = ((op1_is_pow2 && op1 != const0_rtx)
3951 ? optab1
3952 : (unsignedp ? udivmod_optab : sdivmod_optab));
3953
3954 for (compute_mode = mode; compute_mode != VOIDmode;
3955 compute_mode = GET_MODE_WIDER_MODE (compute_mode))
3956 if (optab_handler (optab1, compute_mode) != CODE_FOR_nothing
3957 || optab_handler (optab2, compute_mode) != CODE_FOR_nothing)
3958 break;
3959
3960 if (compute_mode == VOIDmode)
3961 for (compute_mode = mode; compute_mode != VOIDmode;
3962 compute_mode = GET_MODE_WIDER_MODE (compute_mode))
3963 if (optab_libfunc (optab1, compute_mode)
3964 || optab_libfunc (optab2, compute_mode))
3965 break;
3966
3967 /* If we still couldn't find a mode, use MODE, but expand_binop will
3968 probably die. */
3969 if (compute_mode == VOIDmode)
3970 compute_mode = mode;
3971
3972 if (target && GET_MODE (target) == compute_mode)
3973 tquotient = target;
3974 else
3975 tquotient = gen_reg_rtx (compute_mode);
3976
3977 size = GET_MODE_BITSIZE (compute_mode);
3978 #if 0
3979 /* It should be possible to restrict the precision to GET_MODE_BITSIZE
3980 (mode), and thereby get better code when OP1 is a constant. Do that
3981 later. It will require going over all usages of SIZE below. */
3982 size = GET_MODE_BITSIZE (mode);
3983 #endif
3984
3985 /* Only deduct something for a REM if the last divide done was
3986 for a different constant. Then set the constant of the last
3987 divide. */
3988 max_cost = (unsignedp
3989 ? udiv_cost (speed, compute_mode)
3990 : sdiv_cost (speed, compute_mode));
3991 if (rem_flag && ! (last_div_const != 0 && op1_is_constant
3992 && INTVAL (op1) == last_div_const))
3993 max_cost -= (mul_cost (speed, compute_mode)
3994 + add_cost (speed, compute_mode));
3995
3996 last_div_const = ! rem_flag && op1_is_constant ? INTVAL (op1) : 0;
3997
3998 /* Now convert to the best mode to use. */
3999 if (compute_mode != mode)
4000 {
4001 op0 = convert_modes (compute_mode, mode, op0, unsignedp);
4002 op1 = convert_modes (compute_mode, mode, op1, unsignedp);
4003
4004 /* convert_modes may have placed op1 into a register, so we
4005 must recompute the following. */
4006 op1_is_constant = CONST_INT_P (op1);
4007 op1_is_pow2 = (op1_is_constant
4008 && ((EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4009 || (! unsignedp
4010 && EXACT_POWER_OF_2_OR_ZERO_P (-UINTVAL (op1))))));
4011 }
4012
4013 /* If one of the operands is a volatile MEM, copy it into a register. */
4014
4015 if (MEM_P (op0) && MEM_VOLATILE_P (op0))
4016 op0 = force_reg (compute_mode, op0);
4017 if (MEM_P (op1) && MEM_VOLATILE_P (op1))
4018 op1 = force_reg (compute_mode, op1);
4019
4020 /* If we need the remainder or if OP1 is constant, we need to
4021 put OP0 in a register in case it has any queued subexpressions. */
4022 if (rem_flag || op1_is_constant)
4023 op0 = force_reg (compute_mode, op0);
4024
4025 last = get_last_insn ();
4026
4027 /* Promote floor rounding to trunc rounding for unsigned operations. */
4028 if (unsignedp)
4029 {
4030 if (code == FLOOR_DIV_EXPR)
4031 code = TRUNC_DIV_EXPR;
4032 if (code == FLOOR_MOD_EXPR)
4033 code = TRUNC_MOD_EXPR;
4034 if (code == EXACT_DIV_EXPR && op1_is_pow2)
4035 code = TRUNC_DIV_EXPR;
4036 }
4037
4038 if (op1 != const0_rtx)
4039 switch (code)
4040 {
4041 case TRUNC_MOD_EXPR:
4042 case TRUNC_DIV_EXPR:
4043 if (op1_is_constant)
4044 {
4045 if (unsignedp)
4046 {
4047 unsigned HOST_WIDE_INT mh, ml;
4048 int pre_shift, post_shift;
4049 int dummy;
4050 unsigned HOST_WIDE_INT d = (INTVAL (op1)
4051 & GET_MODE_MASK (compute_mode));
4052
4053 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4054 {
4055 pre_shift = floor_log2 (d);
4056 if (rem_flag)
4057 {
4058 unsigned HOST_WIDE_INT mask
4059 = ((unsigned HOST_WIDE_INT) 1 << pre_shift) - 1;
4060 remainder
4061 = expand_binop (compute_mode, and_optab, op0,
4062 gen_int_mode (mask, compute_mode),
4063 remainder, 1,
4064 OPTAB_LIB_WIDEN);
4065 if (remainder)
4066 return gen_lowpart (mode, remainder);
4067 }
4068 quotient = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4069 pre_shift, tquotient, 1);
4070 }
4071 else if (size <= HOST_BITS_PER_WIDE_INT)
4072 {
4073 if (d >= ((unsigned HOST_WIDE_INT) 1 << (size - 1)))
4074 {
4075 /* Most significant bit of divisor is set; emit an scc
4076 insn. */
4077 quotient = emit_store_flag_force (tquotient, GEU, op0, op1,
4078 compute_mode, 1, 1);
4079 }
4080 else
4081 {
4082 /* Find a suitable multiplier and right shift count
4083 instead of multiplying with D. */
4084
4085 mh = choose_multiplier (d, size, size,
4086 &ml, &post_shift, &dummy);
4087
4088 /* If the suggested multiplier is more than SIZE bits,
4089 we can do better for even divisors, using an
4090 initial right shift. */
4091 if (mh != 0 && (d & 1) == 0)
4092 {
4093 pre_shift = floor_log2 (d & -d);
4094 mh = choose_multiplier (d >> pre_shift, size,
4095 size - pre_shift,
4096 &ml, &post_shift, &dummy);
4097 gcc_assert (!mh);
4098 }
4099 else
4100 pre_shift = 0;
4101
4102 if (mh != 0)
4103 {
4104 rtx t1, t2, t3, t4;
4105
4106 if (post_shift - 1 >= BITS_PER_WORD)
4107 goto fail1;
4108
4109 extra_cost
4110 = (shift_cost (speed, compute_mode, post_shift - 1)
4111 + shift_cost (speed, compute_mode, 1)
4112 + 2 * add_cost (speed, compute_mode));
4113 t1 = expmed_mult_highpart
4114 (compute_mode, op0,
4115 gen_int_mode (ml, compute_mode),
4116 NULL_RTX, 1, max_cost - extra_cost);
4117 if (t1 == 0)
4118 goto fail1;
4119 t2 = force_operand (gen_rtx_MINUS (compute_mode,
4120 op0, t1),
4121 NULL_RTX);
4122 t3 = expand_shift (RSHIFT_EXPR, compute_mode,
4123 t2, 1, NULL_RTX, 1);
4124 t4 = force_operand (gen_rtx_PLUS (compute_mode,
4125 t1, t3),
4126 NULL_RTX);
4127 quotient = expand_shift
4128 (RSHIFT_EXPR, compute_mode, t4,
4129 post_shift - 1, tquotient, 1);
4130 }
4131 else
4132 {
4133 rtx t1, t2;
4134
4135 if (pre_shift >= BITS_PER_WORD
4136 || post_shift >= BITS_PER_WORD)
4137 goto fail1;
4138
4139 t1 = expand_shift
4140 (RSHIFT_EXPR, compute_mode, op0,
4141 pre_shift, NULL_RTX, 1);
4142 extra_cost
4143 = (shift_cost (speed, compute_mode, pre_shift)
4144 + shift_cost (speed, compute_mode, post_shift));
4145 t2 = expmed_mult_highpart
4146 (compute_mode, t1,
4147 gen_int_mode (ml, compute_mode),
4148 NULL_RTX, 1, max_cost - extra_cost);
4149 if (t2 == 0)
4150 goto fail1;
4151 quotient = expand_shift
4152 (RSHIFT_EXPR, compute_mode, t2,
4153 post_shift, tquotient, 1);
4154 }
4155 }
4156 }
4157 else /* Too wide mode to use tricky code */
4158 break;
4159
4160 insn = get_last_insn ();
4161 if (insn != last)
4162 set_dst_reg_note (insn, REG_EQUAL,
4163 gen_rtx_UDIV (compute_mode, op0, op1),
4164 quotient);
4165 }
4166 else /* TRUNC_DIV, signed */
4167 {
4168 unsigned HOST_WIDE_INT ml;
4169 int lgup, post_shift;
4170 rtx mlr;
4171 HOST_WIDE_INT d = INTVAL (op1);
4172 unsigned HOST_WIDE_INT abs_d;
4173
4174 /* Since d might be INT_MIN, we have to cast to
4175 unsigned HOST_WIDE_INT before negating to avoid
4176 undefined signed overflow. */
4177 abs_d = (d >= 0
4178 ? (unsigned HOST_WIDE_INT) d
4179 : - (unsigned HOST_WIDE_INT) d);
4180
4181 /* n rem d = n rem -d */
4182 if (rem_flag && d < 0)
4183 {
4184 d = abs_d;
4185 op1 = gen_int_mode (abs_d, compute_mode);
4186 }
4187
4188 if (d == 1)
4189 quotient = op0;
4190 else if (d == -1)
4191 quotient = expand_unop (compute_mode, neg_optab, op0,
4192 tquotient, 0);
4193 else if (HOST_BITS_PER_WIDE_INT >= size
4194 && abs_d == (unsigned HOST_WIDE_INT) 1 << (size - 1))
4195 {
4196 /* This case is not handled correctly below. */
4197 quotient = emit_store_flag (tquotient, EQ, op0, op1,
4198 compute_mode, 1, 1);
4199 if (quotient == 0)
4200 goto fail1;
4201 }
4202 else if (EXACT_POWER_OF_2_OR_ZERO_P (d)
4203 && (rem_flag
4204 ? smod_pow2_cheap (speed, compute_mode)
4205 : sdiv_pow2_cheap (speed, compute_mode))
4206 /* We assume that cheap metric is true if the
4207 optab has an expander for this mode. */
4208 && ((optab_handler ((rem_flag ? smod_optab
4209 : sdiv_optab),
4210 compute_mode)
4211 != CODE_FOR_nothing)
4212 || (optab_handler (sdivmod_optab,
4213 compute_mode)
4214 != CODE_FOR_nothing)))
4215 ;
4216 else if (EXACT_POWER_OF_2_OR_ZERO_P (abs_d))
4217 {
4218 if (rem_flag)
4219 {
4220 remainder = expand_smod_pow2 (compute_mode, op0, d);
4221 if (remainder)
4222 return gen_lowpart (mode, remainder);
4223 }
4224
4225 if (sdiv_pow2_cheap (speed, compute_mode)
4226 && ((optab_handler (sdiv_optab, compute_mode)
4227 != CODE_FOR_nothing)
4228 || (optab_handler (sdivmod_optab, compute_mode)
4229 != CODE_FOR_nothing)))
4230 quotient = expand_divmod (0, TRUNC_DIV_EXPR,
4231 compute_mode, op0,
4232 gen_int_mode (abs_d,
4233 compute_mode),
4234 NULL_RTX, 0);
4235 else
4236 quotient = expand_sdiv_pow2 (compute_mode, op0, abs_d);
4237
4238 /* We have computed OP0 / abs(OP1). If OP1 is negative,
4239 negate the quotient. */
4240 if (d < 0)
4241 {
4242 insn = get_last_insn ();
4243 if (insn != last
4244 && abs_d < ((unsigned HOST_WIDE_INT) 1
4245 << (HOST_BITS_PER_WIDE_INT - 1)))
4246 set_dst_reg_note (insn, REG_EQUAL,
4247 gen_rtx_DIV (compute_mode, op0,
4248 gen_int_mode
4249 (abs_d,
4250 compute_mode)),
4251 quotient);
4252
4253 quotient = expand_unop (compute_mode, neg_optab,
4254 quotient, quotient, 0);
4255 }
4256 }
4257 else if (size <= HOST_BITS_PER_WIDE_INT)
4258 {
4259 choose_multiplier (abs_d, size, size - 1,
4260 &ml, &post_shift, &lgup);
4261 if (ml < (unsigned HOST_WIDE_INT) 1 << (size - 1))
4262 {
4263 rtx t1, t2, t3;
4264
4265 if (post_shift >= BITS_PER_WORD
4266 || size - 1 >= BITS_PER_WORD)
4267 goto fail1;
4268
4269 extra_cost = (shift_cost (speed, compute_mode, post_shift)
4270 + shift_cost (speed, compute_mode, size - 1)
4271 + add_cost (speed, compute_mode));
4272 t1 = expmed_mult_highpart
4273 (compute_mode, op0, gen_int_mode (ml, compute_mode),
4274 NULL_RTX, 0, max_cost - extra_cost);
4275 if (t1 == 0)
4276 goto fail1;
4277 t2 = expand_shift
4278 (RSHIFT_EXPR, compute_mode, t1,
4279 post_shift, NULL_RTX, 0);
4280 t3 = expand_shift
4281 (RSHIFT_EXPR, compute_mode, op0,
4282 size - 1, NULL_RTX, 0);
4283 if (d < 0)
4284 quotient
4285 = force_operand (gen_rtx_MINUS (compute_mode,
4286 t3, t2),
4287 tquotient);
4288 else
4289 quotient
4290 = force_operand (gen_rtx_MINUS (compute_mode,
4291 t2, t3),
4292 tquotient);
4293 }
4294 else
4295 {
4296 rtx t1, t2, t3, t4;
4297
4298 if (post_shift >= BITS_PER_WORD
4299 || size - 1 >= BITS_PER_WORD)
4300 goto fail1;
4301
4302 ml |= (~(unsigned HOST_WIDE_INT) 0) << (size - 1);
4303 mlr = gen_int_mode (ml, compute_mode);
4304 extra_cost = (shift_cost (speed, compute_mode, post_shift)
4305 + shift_cost (speed, compute_mode, size - 1)
4306 + 2 * add_cost (speed, compute_mode));
4307 t1 = expmed_mult_highpart (compute_mode, op0, mlr,
4308 NULL_RTX, 0,
4309 max_cost - extra_cost);
4310 if (t1 == 0)
4311 goto fail1;
4312 t2 = force_operand (gen_rtx_PLUS (compute_mode,
4313 t1, op0),
4314 NULL_RTX);
4315 t3 = expand_shift
4316 (RSHIFT_EXPR, compute_mode, t2,
4317 post_shift, NULL_RTX, 0);
4318 t4 = expand_shift
4319 (RSHIFT_EXPR, compute_mode, op0,
4320 size - 1, NULL_RTX, 0);
4321 if (d < 0)
4322 quotient
4323 = force_operand (gen_rtx_MINUS (compute_mode,
4324 t4, t3),
4325 tquotient);
4326 else
4327 quotient
4328 = force_operand (gen_rtx_MINUS (compute_mode,
4329 t3, t4),
4330 tquotient);
4331 }
4332 }
4333 else /* Too wide mode to use tricky code */
4334 break;
4335
4336 insn = get_last_insn ();
4337 if (insn != last)
4338 set_dst_reg_note (insn, REG_EQUAL,
4339 gen_rtx_DIV (compute_mode, op0, op1),
4340 quotient);
4341 }
4342 break;
4343 }
4344 fail1:
4345 delete_insns_since (last);
4346 break;
4347
4348 case FLOOR_DIV_EXPR:
4349 case FLOOR_MOD_EXPR:
4350 /* We will come here only for signed operations. */
4351 if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4352 {
4353 unsigned HOST_WIDE_INT mh, ml;
4354 int pre_shift, lgup, post_shift;
4355 HOST_WIDE_INT d = INTVAL (op1);
4356
4357 if (d > 0)
4358 {
4359 /* We could just as easily deal with negative constants here,
4360 but it does not seem worth the trouble for GCC 2.6. */
4361 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4362 {
4363 pre_shift = floor_log2 (d);
4364 if (rem_flag)
4365 {
4366 unsigned HOST_WIDE_INT mask
4367 = ((unsigned HOST_WIDE_INT) 1 << pre_shift) - 1;
4368 remainder = expand_binop
4369 (compute_mode, and_optab, op0,
4370 gen_int_mode (mask, compute_mode),
4371 remainder, 0, OPTAB_LIB_WIDEN);
4372 if (remainder)
4373 return gen_lowpart (mode, remainder);
4374 }
4375 quotient = expand_shift
4376 (RSHIFT_EXPR, compute_mode, op0,
4377 pre_shift, tquotient, 0);
4378 }
4379 else
4380 {
4381 rtx t1, t2, t3, t4;
4382
4383 mh = choose_multiplier (d, size, size - 1,
4384 &ml, &post_shift, &lgup);
4385 gcc_assert (!mh);
4386
4387 if (post_shift < BITS_PER_WORD
4388 && size - 1 < BITS_PER_WORD)
4389 {
4390 t1 = expand_shift
4391 (RSHIFT_EXPR, compute_mode, op0,
4392 size - 1, NULL_RTX, 0);
4393 t2 = expand_binop (compute_mode, xor_optab, op0, t1,
4394 NULL_RTX, 0, OPTAB_WIDEN);
4395 extra_cost = (shift_cost (speed, compute_mode, post_shift)
4396 + shift_cost (speed, compute_mode, size - 1)
4397 + 2 * add_cost (speed, compute_mode));
4398 t3 = expmed_mult_highpart
4399 (compute_mode, t2, gen_int_mode (ml, compute_mode),
4400 NULL_RTX, 1, max_cost - extra_cost);
4401 if (t3 != 0)
4402 {
4403 t4 = expand_shift
4404 (RSHIFT_EXPR, compute_mode, t3,
4405 post_shift, NULL_RTX, 1);
4406 quotient = expand_binop (compute_mode, xor_optab,
4407 t4, t1, tquotient, 0,
4408 OPTAB_WIDEN);
4409 }
4410 }
4411 }
4412 }
4413 else
4414 {
4415 rtx nsign, t1, t2, t3, t4;
4416 t1 = force_operand (gen_rtx_PLUS (compute_mode,
4417 op0, constm1_rtx), NULL_RTX);
4418 t2 = expand_binop (compute_mode, ior_optab, op0, t1, NULL_RTX,
4419 0, OPTAB_WIDEN);
4420 nsign = expand_shift
4421 (RSHIFT_EXPR, compute_mode, t2,
4422 size - 1, NULL_RTX, 0);
4423 t3 = force_operand (gen_rtx_MINUS (compute_mode, t1, nsign),
4424 NULL_RTX);
4425 t4 = expand_divmod (0, TRUNC_DIV_EXPR, compute_mode, t3, op1,
4426 NULL_RTX, 0);
4427 if (t4)
4428 {
4429 rtx t5;
4430 t5 = expand_unop (compute_mode, one_cmpl_optab, nsign,
4431 NULL_RTX, 0);
4432 quotient = force_operand (gen_rtx_PLUS (compute_mode,
4433 t4, t5),
4434 tquotient);
4435 }
4436 }
4437 }
4438
4439 if (quotient != 0)
4440 break;
4441 delete_insns_since (last);
4442
4443 /* Try using an instruction that produces both the quotient and
4444 remainder, using truncation. We can easily compensate the quotient
4445 or remainder to get floor rounding, once we have the remainder.
4446 Notice that we compute also the final remainder value here,
4447 and return the result right away. */
4448 if (target == 0 || GET_MODE (target) != compute_mode)
4449 target = gen_reg_rtx (compute_mode);
4450
4451 if (rem_flag)
4452 {
4453 remainder
4454 = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4455 quotient = gen_reg_rtx (compute_mode);
4456 }
4457 else
4458 {
4459 quotient
4460 = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4461 remainder = gen_reg_rtx (compute_mode);
4462 }
4463
4464 if (expand_twoval_binop (sdivmod_optab, op0, op1,
4465 quotient, remainder, 0))
4466 {
4467 /* This could be computed with a branch-less sequence.
4468 Save that for later. */
4469 rtx tem;
4470 rtx label = gen_label_rtx ();
4471 do_cmp_and_jump (remainder, const0_rtx, EQ, compute_mode, label);
4472 tem = expand_binop (compute_mode, xor_optab, op0, op1,
4473 NULL_RTX, 0, OPTAB_WIDEN);
4474 do_cmp_and_jump (tem, const0_rtx, GE, compute_mode, label);
4475 expand_dec (quotient, const1_rtx);
4476 expand_inc (remainder, op1);
4477 emit_label (label);
4478 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4479 }
4480
4481 /* No luck with division elimination or divmod. Have to do it
4482 by conditionally adjusting op0 *and* the result. */
4483 {
4484 rtx label1, label2, label3, label4, label5;
4485 rtx adjusted_op0;
4486 rtx tem;
4487
4488 quotient = gen_reg_rtx (compute_mode);
4489 adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4490 label1 = gen_label_rtx ();
4491 label2 = gen_label_rtx ();
4492 label3 = gen_label_rtx ();
4493 label4 = gen_label_rtx ();
4494 label5 = gen_label_rtx ();
4495 do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4496 do_cmp_and_jump (adjusted_op0, const0_rtx, LT, compute_mode, label1);
4497 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4498 quotient, 0, OPTAB_LIB_WIDEN);
4499 if (tem != quotient)
4500 emit_move_insn (quotient, tem);
4501 emit_jump_insn (gen_jump (label5));
4502 emit_barrier ();
4503 emit_label (label1);
4504 expand_inc (adjusted_op0, const1_rtx);
4505 emit_jump_insn (gen_jump (label4));
4506 emit_barrier ();
4507 emit_label (label2);
4508 do_cmp_and_jump (adjusted_op0, const0_rtx, GT, compute_mode, label3);
4509 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4510 quotient, 0, OPTAB_LIB_WIDEN);
4511 if (tem != quotient)
4512 emit_move_insn (quotient, tem);
4513 emit_jump_insn (gen_jump (label5));
4514 emit_barrier ();
4515 emit_label (label3);
4516 expand_dec (adjusted_op0, const1_rtx);
4517 emit_label (label4);
4518 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4519 quotient, 0, OPTAB_LIB_WIDEN);
4520 if (tem != quotient)
4521 emit_move_insn (quotient, tem);
4522 expand_dec (quotient, const1_rtx);
4523 emit_label (label5);
4524 }
4525 break;
4526
4527 case CEIL_DIV_EXPR:
4528 case CEIL_MOD_EXPR:
4529 if (unsignedp)
4530 {
4531 if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1)))
4532 {
4533 rtx t1, t2, t3;
4534 unsigned HOST_WIDE_INT d = INTVAL (op1);
4535 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4536 floor_log2 (d), tquotient, 1);
4537 t2 = expand_binop (compute_mode, and_optab, op0,
4538 gen_int_mode (d - 1, compute_mode),
4539 NULL_RTX, 1, OPTAB_LIB_WIDEN);
4540 t3 = gen_reg_rtx (compute_mode);
4541 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4542 compute_mode, 1, 1);
4543 if (t3 == 0)
4544 {
4545 rtx lab;
4546 lab = gen_label_rtx ();
4547 do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4548 expand_inc (t1, const1_rtx);
4549 emit_label (lab);
4550 quotient = t1;
4551 }
4552 else
4553 quotient = force_operand (gen_rtx_PLUS (compute_mode,
4554 t1, t3),
4555 tquotient);
4556 break;
4557 }
4558
4559 /* Try using an instruction that produces both the quotient and
4560 remainder, using truncation. We can easily compensate the
4561 quotient or remainder to get ceiling rounding, once we have the
4562 remainder. Notice that we compute also the final remainder
4563 value here, and return the result right away. */
4564 if (target == 0 || GET_MODE (target) != compute_mode)
4565 target = gen_reg_rtx (compute_mode);
4566
4567 if (rem_flag)
4568 {
4569 remainder = (REG_P (target)
4570 ? target : gen_reg_rtx (compute_mode));
4571 quotient = gen_reg_rtx (compute_mode);
4572 }
4573 else
4574 {
4575 quotient = (REG_P (target)
4576 ? target : gen_reg_rtx (compute_mode));
4577 remainder = gen_reg_rtx (compute_mode);
4578 }
4579
4580 if (expand_twoval_binop (udivmod_optab, op0, op1, quotient,
4581 remainder, 1))
4582 {
4583 /* This could be computed with a branch-less sequence.
4584 Save that for later. */
4585 rtx label = gen_label_rtx ();
4586 do_cmp_and_jump (remainder, const0_rtx, EQ,
4587 compute_mode, label);
4588 expand_inc (quotient, const1_rtx);
4589 expand_dec (remainder, op1);
4590 emit_label (label);
4591 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4592 }
4593
4594 /* No luck with division elimination or divmod. Have to do it
4595 by conditionally adjusting op0 *and* the result. */
4596 {
4597 rtx label1, label2;
4598 rtx adjusted_op0, tem;
4599
4600 quotient = gen_reg_rtx (compute_mode);
4601 adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4602 label1 = gen_label_rtx ();
4603 label2 = gen_label_rtx ();
4604 do_cmp_and_jump (adjusted_op0, const0_rtx, NE,
4605 compute_mode, label1);
4606 emit_move_insn (quotient, const0_rtx);
4607 emit_jump_insn (gen_jump (label2));
4608 emit_barrier ();
4609 emit_label (label1);
4610 expand_dec (adjusted_op0, const1_rtx);
4611 tem = expand_binop (compute_mode, udiv_optab, adjusted_op0, op1,
4612 quotient, 1, OPTAB_LIB_WIDEN);
4613 if (tem != quotient)
4614 emit_move_insn (quotient, tem);
4615 expand_inc (quotient, const1_rtx);
4616 emit_label (label2);
4617 }
4618 }
4619 else /* signed */
4620 {
4621 if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4622 && INTVAL (op1) >= 0)
4623 {
4624 /* This is extremely similar to the code for the unsigned case
4625 above. For 2.7 we should merge these variants, but for
4626 2.6.1 I don't want to touch the code for unsigned since that
4627 get used in C. The signed case will only be used by other
4628 languages (Ada). */
4629
4630 rtx t1, t2, t3;
4631 unsigned HOST_WIDE_INT d = INTVAL (op1);
4632 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4633 floor_log2 (d), tquotient, 0);
4634 t2 = expand_binop (compute_mode, and_optab, op0,
4635 gen_int_mode (d - 1, compute_mode),
4636 NULL_RTX, 1, OPTAB_LIB_WIDEN);
4637 t3 = gen_reg_rtx (compute_mode);
4638 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4639 compute_mode, 1, 1);
4640 if (t3 == 0)
4641 {
4642 rtx lab;
4643 lab = gen_label_rtx ();
4644 do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4645 expand_inc (t1, const1_rtx);
4646 emit_label (lab);
4647 quotient = t1;
4648 }
4649 else
4650 quotient = force_operand (gen_rtx_PLUS (compute_mode,
4651 t1, t3),
4652 tquotient);
4653 break;
4654 }
4655
4656 /* Try using an instruction that produces both the quotient and
4657 remainder, using truncation. We can easily compensate the
4658 quotient or remainder to get ceiling rounding, once we have the
4659 remainder. Notice that we compute also the final remainder
4660 value here, and return the result right away. */
4661 if (target == 0 || GET_MODE (target) != compute_mode)
4662 target = gen_reg_rtx (compute_mode);
4663 if (rem_flag)
4664 {
4665 remainder= (REG_P (target)
4666 ? target : gen_reg_rtx (compute_mode));
4667 quotient = gen_reg_rtx (compute_mode);
4668 }
4669 else
4670 {
4671 quotient = (REG_P (target)
4672 ? target : gen_reg_rtx (compute_mode));
4673 remainder = gen_reg_rtx (compute_mode);
4674 }
4675
4676 if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient,
4677 remainder, 0))
4678 {
4679 /* This could be computed with a branch-less sequence.
4680 Save that for later. */
4681 rtx tem;
4682 rtx label = gen_label_rtx ();
4683 do_cmp_and_jump (remainder, const0_rtx, EQ,
4684 compute_mode, label);
4685 tem = expand_binop (compute_mode, xor_optab, op0, op1,
4686 NULL_RTX, 0, OPTAB_WIDEN);
4687 do_cmp_and_jump (tem, const0_rtx, LT, compute_mode, label);
4688 expand_inc (quotient, const1_rtx);
4689 expand_dec (remainder, op1);
4690 emit_label (label);
4691 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4692 }
4693
4694 /* No luck with division elimination or divmod. Have to do it
4695 by conditionally adjusting op0 *and* the result. */
4696 {
4697 rtx label1, label2, label3, label4, label5;
4698 rtx adjusted_op0;
4699 rtx tem;
4700
4701 quotient = gen_reg_rtx (compute_mode);
4702 adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4703 label1 = gen_label_rtx ();
4704 label2 = gen_label_rtx ();
4705 label3 = gen_label_rtx ();
4706 label4 = gen_label_rtx ();
4707 label5 = gen_label_rtx ();
4708 do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4709 do_cmp_and_jump (adjusted_op0, const0_rtx, GT,
4710 compute_mode, label1);
4711 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4712 quotient, 0, OPTAB_LIB_WIDEN);
4713 if (tem != quotient)
4714 emit_move_insn (quotient, tem);
4715 emit_jump_insn (gen_jump (label5));
4716 emit_barrier ();
4717 emit_label (label1);
4718 expand_dec (adjusted_op0, const1_rtx);
4719 emit_jump_insn (gen_jump (label4));
4720 emit_barrier ();
4721 emit_label (label2);
4722 do_cmp_and_jump (adjusted_op0, const0_rtx, LT,
4723 compute_mode, label3);
4724 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4725 quotient, 0, OPTAB_LIB_WIDEN);
4726 if (tem != quotient)
4727 emit_move_insn (quotient, tem);
4728 emit_jump_insn (gen_jump (label5));
4729 emit_barrier ();
4730 emit_label (label3);
4731 expand_inc (adjusted_op0, const1_rtx);
4732 emit_label (label4);
4733 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4734 quotient, 0, OPTAB_LIB_WIDEN);
4735 if (tem != quotient)
4736 emit_move_insn (quotient, tem);
4737 expand_inc (quotient, const1_rtx);
4738 emit_label (label5);
4739 }
4740 }
4741 break;
4742
4743 case EXACT_DIV_EXPR:
4744 if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4745 {
4746 HOST_WIDE_INT d = INTVAL (op1);
4747 unsigned HOST_WIDE_INT ml;
4748 int pre_shift;
4749 rtx t1;
4750
4751 pre_shift = floor_log2 (d & -d);
4752 ml = invert_mod2n (d >> pre_shift, size);
4753 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4754 pre_shift, NULL_RTX, unsignedp);
4755 quotient = expand_mult (compute_mode, t1,
4756 gen_int_mode (ml, compute_mode),
4757 NULL_RTX, 1);
4758
4759 insn = get_last_insn ();
4760 set_dst_reg_note (insn, REG_EQUAL,
4761 gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
4762 compute_mode, op0, op1),
4763 quotient);
4764 }
4765 break;
4766
4767 case ROUND_DIV_EXPR:
4768 case ROUND_MOD_EXPR:
4769 if (unsignedp)
4770 {
4771 rtx tem;
4772 rtx label;
4773 label = gen_label_rtx ();
4774 quotient = gen_reg_rtx (compute_mode);
4775 remainder = gen_reg_rtx (compute_mode);
4776 if (expand_twoval_binop (udivmod_optab, op0, op1, quotient, remainder, 1) == 0)
4777 {
4778 rtx tem;
4779 quotient = expand_binop (compute_mode, udiv_optab, op0, op1,
4780 quotient, 1, OPTAB_LIB_WIDEN);
4781 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 1);
4782 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4783 remainder, 1, OPTAB_LIB_WIDEN);
4784 }
4785 tem = plus_constant (compute_mode, op1, -1);
4786 tem = expand_shift (RSHIFT_EXPR, compute_mode, tem, 1, NULL_RTX, 1);
4787 do_cmp_and_jump (remainder, tem, LEU, compute_mode, label);
4788 expand_inc (quotient, const1_rtx);
4789 expand_dec (remainder, op1);
4790 emit_label (label);
4791 }
4792 else
4793 {
4794 rtx abs_rem, abs_op1, tem, mask;
4795 rtx label;
4796 label = gen_label_rtx ();
4797 quotient = gen_reg_rtx (compute_mode);
4798 remainder = gen_reg_rtx (compute_mode);
4799 if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient, remainder, 0) == 0)
4800 {
4801 rtx tem;
4802 quotient = expand_binop (compute_mode, sdiv_optab, op0, op1,
4803 quotient, 0, OPTAB_LIB_WIDEN);
4804 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 0);
4805 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4806 remainder, 0, OPTAB_LIB_WIDEN);
4807 }
4808 abs_rem = expand_abs (compute_mode, remainder, NULL_RTX, 1, 0);
4809 abs_op1 = expand_abs (compute_mode, op1, NULL_RTX, 1, 0);
4810 tem = expand_shift (LSHIFT_EXPR, compute_mode, abs_rem,
4811 1, NULL_RTX, 1);
4812 do_cmp_and_jump (tem, abs_op1, LTU, compute_mode, label);
4813 tem = expand_binop (compute_mode, xor_optab, op0, op1,
4814 NULL_RTX, 0, OPTAB_WIDEN);
4815 mask = expand_shift (RSHIFT_EXPR, compute_mode, tem,
4816 size - 1, NULL_RTX, 0);
4817 tem = expand_binop (compute_mode, xor_optab, mask, const1_rtx,
4818 NULL_RTX, 0, OPTAB_WIDEN);
4819 tem = expand_binop (compute_mode, sub_optab, tem, mask,
4820 NULL_RTX, 0, OPTAB_WIDEN);
4821 expand_inc (quotient, tem);
4822 tem = expand_binop (compute_mode, xor_optab, mask, op1,
4823 NULL_RTX, 0, OPTAB_WIDEN);
4824 tem = expand_binop (compute_mode, sub_optab, tem, mask,
4825 NULL_RTX, 0, OPTAB_WIDEN);
4826 expand_dec (remainder, tem);
4827 emit_label (label);
4828 }
4829 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4830
4831 default:
4832 gcc_unreachable ();
4833 }
4834
4835 if (quotient == 0)
4836 {
4837 if (target && GET_MODE (target) != compute_mode)
4838 target = 0;
4839
4840 if (rem_flag)
4841 {
4842 /* Try to produce the remainder without producing the quotient.
4843 If we seem to have a divmod pattern that does not require widening,
4844 don't try widening here. We should really have a WIDEN argument
4845 to expand_twoval_binop, since what we'd really like to do here is
4846 1) try a mod insn in compute_mode
4847 2) try a divmod insn in compute_mode
4848 3) try a div insn in compute_mode and multiply-subtract to get
4849 remainder
4850 4) try the same things with widening allowed. */
4851 remainder
4852 = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4853 op0, op1, target,
4854 unsignedp,
4855 ((optab_handler (optab2, compute_mode)
4856 != CODE_FOR_nothing)
4857 ? OPTAB_DIRECT : OPTAB_WIDEN));
4858 if (remainder == 0)
4859 {
4860 /* No luck there. Can we do remainder and divide at once
4861 without a library call? */
4862 remainder = gen_reg_rtx (compute_mode);
4863 if (! expand_twoval_binop ((unsignedp
4864 ? udivmod_optab
4865 : sdivmod_optab),
4866 op0, op1,
4867 NULL_RTX, remainder, unsignedp))
4868 remainder = 0;
4869 }
4870
4871 if (remainder)
4872 return gen_lowpart (mode, remainder);
4873 }
4874
4875 /* Produce the quotient. Try a quotient insn, but not a library call.
4876 If we have a divmod in this mode, use it in preference to widening
4877 the div (for this test we assume it will not fail). Note that optab2
4878 is set to the one of the two optabs that the call below will use. */
4879 quotient
4880 = sign_expand_binop (compute_mode, udiv_optab, sdiv_optab,
4881 op0, op1, rem_flag ? NULL_RTX : target,
4882 unsignedp,
4883 ((optab_handler (optab2, compute_mode)
4884 != CODE_FOR_nothing)
4885 ? OPTAB_DIRECT : OPTAB_WIDEN));
4886
4887 if (quotient == 0)
4888 {
4889 /* No luck there. Try a quotient-and-remainder insn,
4890 keeping the quotient alone. */
4891 quotient = gen_reg_rtx (compute_mode);
4892 if (! expand_twoval_binop (unsignedp ? udivmod_optab : sdivmod_optab,
4893 op0, op1,
4894 quotient, NULL_RTX, unsignedp))
4895 {
4896 quotient = 0;
4897 if (! rem_flag)
4898 /* Still no luck. If we are not computing the remainder,
4899 use a library call for the quotient. */
4900 quotient = sign_expand_binop (compute_mode,
4901 udiv_optab, sdiv_optab,
4902 op0, op1, target,
4903 unsignedp, OPTAB_LIB_WIDEN);
4904 }
4905 }
4906 }
4907
4908 if (rem_flag)
4909 {
4910 if (target && GET_MODE (target) != compute_mode)
4911 target = 0;
4912
4913 if (quotient == 0)
4914 {
4915 /* No divide instruction either. Use library for remainder. */
4916 remainder = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4917 op0, op1, target,
4918 unsignedp, OPTAB_LIB_WIDEN);
4919 /* No remainder function. Try a quotient-and-remainder
4920 function, keeping the remainder. */
4921 if (!remainder)
4922 {
4923 remainder = gen_reg_rtx (compute_mode);
4924 if (!expand_twoval_binop_libfunc
4925 (unsignedp ? udivmod_optab : sdivmod_optab,
4926 op0, op1,
4927 NULL_RTX, remainder,
4928 unsignedp ? UMOD : MOD))
4929 remainder = NULL_RTX;
4930 }
4931 }
4932 else
4933 {
4934 /* We divided. Now finish doing X - Y * (X / Y). */
4935 remainder = expand_mult (compute_mode, quotient, op1,
4936 NULL_RTX, unsignedp);
4937 remainder = expand_binop (compute_mode, sub_optab, op0,
4938 remainder, target, unsignedp,
4939 OPTAB_LIB_WIDEN);
4940 }
4941 }
4942
4943 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4944 }
4945 \f
4946 /* Return a tree node with data type TYPE, describing the value of X.
4947 Usually this is an VAR_DECL, if there is no obvious better choice.
4948 X may be an expression, however we only support those expressions
4949 generated by loop.c. */
4950
4951 tree
4952 make_tree (tree type, rtx x)
4953 {
4954 tree t;
4955
4956 switch (GET_CODE (x))
4957 {
4958 case CONST_INT:
4959 {
4960 HOST_WIDE_INT hi = 0;
4961
4962 if (INTVAL (x) < 0
4963 && !(TYPE_UNSIGNED (type)
4964 && (GET_MODE_BITSIZE (TYPE_MODE (type))
4965 < HOST_BITS_PER_WIDE_INT)))
4966 hi = -1;
4967
4968 t = build_int_cst_wide (type, INTVAL (x), hi);
4969
4970 return t;
4971 }
4972
4973 case CONST_DOUBLE:
4974 if (GET_MODE (x) == VOIDmode)
4975 t = build_int_cst_wide (type,
4976 CONST_DOUBLE_LOW (x), CONST_DOUBLE_HIGH (x));
4977 else
4978 {
4979 REAL_VALUE_TYPE d;
4980
4981 REAL_VALUE_FROM_CONST_DOUBLE (d, x);
4982 t = build_real (type, d);
4983 }
4984
4985 return t;
4986
4987 case CONST_VECTOR:
4988 {
4989 int units = CONST_VECTOR_NUNITS (x);
4990 tree itype = TREE_TYPE (type);
4991 tree *elts;
4992 int i;
4993
4994 /* Build a tree with vector elements. */
4995 elts = XALLOCAVEC (tree, units);
4996 for (i = units - 1; i >= 0; --i)
4997 {
4998 rtx elt = CONST_VECTOR_ELT (x, i);
4999 elts[i] = make_tree (itype, elt);
5000 }
5001
5002 return build_vector (type, elts);
5003 }
5004
5005 case PLUS:
5006 return fold_build2 (PLUS_EXPR, type, make_tree (type, XEXP (x, 0)),
5007 make_tree (type, XEXP (x, 1)));
5008
5009 case MINUS:
5010 return fold_build2 (MINUS_EXPR, type, make_tree (type, XEXP (x, 0)),
5011 make_tree (type, XEXP (x, 1)));
5012
5013 case NEG:
5014 return fold_build1 (NEGATE_EXPR, type, make_tree (type, XEXP (x, 0)));
5015
5016 case MULT:
5017 return fold_build2 (MULT_EXPR, type, make_tree (type, XEXP (x, 0)),
5018 make_tree (type, XEXP (x, 1)));
5019
5020 case ASHIFT:
5021 return fold_build2 (LSHIFT_EXPR, type, make_tree (type, XEXP (x, 0)),
5022 make_tree (type, XEXP (x, 1)));
5023
5024 case LSHIFTRT:
5025 t = unsigned_type_for (type);
5026 return fold_convert (type, build2 (RSHIFT_EXPR, t,
5027 make_tree (t, XEXP (x, 0)),
5028 make_tree (type, XEXP (x, 1))));
5029
5030 case ASHIFTRT:
5031 t = signed_type_for (type);
5032 return fold_convert (type, build2 (RSHIFT_EXPR, t,
5033 make_tree (t, XEXP (x, 0)),
5034 make_tree (type, XEXP (x, 1))));
5035
5036 case DIV:
5037 if (TREE_CODE (type) != REAL_TYPE)
5038 t = signed_type_for (type);
5039 else
5040 t = type;
5041
5042 return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5043 make_tree (t, XEXP (x, 0)),
5044 make_tree (t, XEXP (x, 1))));
5045 case UDIV:
5046 t = unsigned_type_for (type);
5047 return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5048 make_tree (t, XEXP (x, 0)),
5049 make_tree (t, XEXP (x, 1))));
5050
5051 case SIGN_EXTEND:
5052 case ZERO_EXTEND:
5053 t = lang_hooks.types.type_for_mode (GET_MODE (XEXP (x, 0)),
5054 GET_CODE (x) == ZERO_EXTEND);
5055 return fold_convert (type, make_tree (t, XEXP (x, 0)));
5056
5057 case CONST:
5058 return make_tree (type, XEXP (x, 0));
5059
5060 case SYMBOL_REF:
5061 t = SYMBOL_REF_DECL (x);
5062 if (t)
5063 return fold_convert (type, build_fold_addr_expr (t));
5064 /* else fall through. */
5065
5066 default:
5067 t = build_decl (RTL_LOCATION (x), VAR_DECL, NULL_TREE, type);
5068
5069 /* If TYPE is a POINTER_TYPE, we might need to convert X from
5070 address mode to pointer mode. */
5071 if (POINTER_TYPE_P (type))
5072 x = convert_memory_address_addr_space
5073 (TYPE_MODE (type), x, TYPE_ADDR_SPACE (TREE_TYPE (type)));
5074
5075 /* Note that we do *not* use SET_DECL_RTL here, because we do not
5076 want set_decl_rtl to go adjusting REG_ATTRS for this temporary. */
5077 t->decl_with_rtl.rtl = x;
5078
5079 return t;
5080 }
5081 }
5082 \f
5083 /* Compute the logical-and of OP0 and OP1, storing it in TARGET
5084 and returning TARGET.
5085
5086 If TARGET is 0, a pseudo-register or constant is returned. */
5087
5088 rtx
5089 expand_and (enum machine_mode mode, rtx op0, rtx op1, rtx target)
5090 {
5091 rtx tem = 0;
5092
5093 if (GET_MODE (op0) == VOIDmode && GET_MODE (op1) == VOIDmode)
5094 tem = simplify_binary_operation (AND, mode, op0, op1);
5095 if (tem == 0)
5096 tem = expand_binop (mode, and_optab, op0, op1, target, 0, OPTAB_LIB_WIDEN);
5097
5098 if (target == 0)
5099 target = tem;
5100 else if (tem != target)
5101 emit_move_insn (target, tem);
5102 return target;
5103 }
5104
5105 /* Helper function for emit_store_flag. */
5106 static rtx
5107 emit_cstore (rtx target, enum insn_code icode, enum rtx_code code,
5108 enum machine_mode mode, enum machine_mode compare_mode,
5109 int unsignedp, rtx x, rtx y, int normalizep,
5110 enum machine_mode target_mode)
5111 {
5112 struct expand_operand ops[4];
5113 rtx op0, last, comparison, subtarget;
5114 enum machine_mode result_mode = targetm.cstore_mode (icode);
5115
5116 last = get_last_insn ();
5117 x = prepare_operand (icode, x, 2, mode, compare_mode, unsignedp);
5118 y = prepare_operand (icode, y, 3, mode, compare_mode, unsignedp);
5119 if (!x || !y)
5120 {
5121 delete_insns_since (last);
5122 return NULL_RTX;
5123 }
5124
5125 if (target_mode == VOIDmode)
5126 target_mode = result_mode;
5127 if (!target)
5128 target = gen_reg_rtx (target_mode);
5129
5130 comparison = gen_rtx_fmt_ee (code, result_mode, x, y);
5131
5132 create_output_operand (&ops[0], optimize ? NULL_RTX : target, result_mode);
5133 create_fixed_operand (&ops[1], comparison);
5134 create_fixed_operand (&ops[2], x);
5135 create_fixed_operand (&ops[3], y);
5136 if (!maybe_expand_insn (icode, 4, ops))
5137 {
5138 delete_insns_since (last);
5139 return NULL_RTX;
5140 }
5141 subtarget = ops[0].value;
5142
5143 /* If we are converting to a wider mode, first convert to
5144 TARGET_MODE, then normalize. This produces better combining
5145 opportunities on machines that have a SIGN_EXTRACT when we are
5146 testing a single bit. This mostly benefits the 68k.
5147
5148 If STORE_FLAG_VALUE does not have the sign bit set when
5149 interpreted in MODE, we can do this conversion as unsigned, which
5150 is usually more efficient. */
5151 if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (result_mode))
5152 {
5153 convert_move (target, subtarget,
5154 val_signbit_known_clear_p (result_mode,
5155 STORE_FLAG_VALUE));
5156 op0 = target;
5157 result_mode = target_mode;
5158 }
5159 else
5160 op0 = subtarget;
5161
5162 /* If we want to keep subexpressions around, don't reuse our last
5163 target. */
5164 if (optimize)
5165 subtarget = 0;
5166
5167 /* Now normalize to the proper value in MODE. Sometimes we don't
5168 have to do anything. */
5169 if (normalizep == 0 || normalizep == STORE_FLAG_VALUE)
5170 ;
5171 /* STORE_FLAG_VALUE might be the most negative number, so write
5172 the comparison this way to avoid a compiler-time warning. */
5173 else if (- normalizep == STORE_FLAG_VALUE)
5174 op0 = expand_unop (result_mode, neg_optab, op0, subtarget, 0);
5175
5176 /* We don't want to use STORE_FLAG_VALUE < 0 below since this makes
5177 it hard to use a value of just the sign bit due to ANSI integer
5178 constant typing rules. */
5179 else if (val_signbit_known_set_p (result_mode, STORE_FLAG_VALUE))
5180 op0 = expand_shift (RSHIFT_EXPR, result_mode, op0,
5181 GET_MODE_BITSIZE (result_mode) - 1, subtarget,
5182 normalizep == 1);
5183 else
5184 {
5185 gcc_assert (STORE_FLAG_VALUE & 1);
5186
5187 op0 = expand_and (result_mode, op0, const1_rtx, subtarget);
5188 if (normalizep == -1)
5189 op0 = expand_unop (result_mode, neg_optab, op0, op0, 0);
5190 }
5191
5192 /* If we were converting to a smaller mode, do the conversion now. */
5193 if (target_mode != result_mode)
5194 {
5195 convert_move (target, op0, 0);
5196 return target;
5197 }
5198 else
5199 return op0;
5200 }
5201
5202
5203 /* A subroutine of emit_store_flag only including "tricks" that do not
5204 need a recursive call. These are kept separate to avoid infinite
5205 loops. */
5206
5207 static rtx
5208 emit_store_flag_1 (rtx target, enum rtx_code code, rtx op0, rtx op1,
5209 enum machine_mode mode, int unsignedp, int normalizep,
5210 enum machine_mode target_mode)
5211 {
5212 rtx subtarget;
5213 enum insn_code icode;
5214 enum machine_mode compare_mode;
5215 enum mode_class mclass;
5216 enum rtx_code scode;
5217 rtx tem;
5218
5219 if (unsignedp)
5220 code = unsigned_condition (code);
5221 scode = swap_condition (code);
5222
5223 /* If one operand is constant, make it the second one. Only do this
5224 if the other operand is not constant as well. */
5225
5226 if (swap_commutative_operands_p (op0, op1))
5227 {
5228 tem = op0;
5229 op0 = op1;
5230 op1 = tem;
5231 code = swap_condition (code);
5232 }
5233
5234 if (mode == VOIDmode)
5235 mode = GET_MODE (op0);
5236
5237 /* For some comparisons with 1 and -1, we can convert this to
5238 comparisons with zero. This will often produce more opportunities for
5239 store-flag insns. */
5240
5241 switch (code)
5242 {
5243 case LT:
5244 if (op1 == const1_rtx)
5245 op1 = const0_rtx, code = LE;
5246 break;
5247 case LE:
5248 if (op1 == constm1_rtx)
5249 op1 = const0_rtx, code = LT;
5250 break;
5251 case GE:
5252 if (op1 == const1_rtx)
5253 op1 = const0_rtx, code = GT;
5254 break;
5255 case GT:
5256 if (op1 == constm1_rtx)
5257 op1 = const0_rtx, code = GE;
5258 break;
5259 case GEU:
5260 if (op1 == const1_rtx)
5261 op1 = const0_rtx, code = NE;
5262 break;
5263 case LTU:
5264 if (op1 == const1_rtx)
5265 op1 = const0_rtx, code = EQ;
5266 break;
5267 default:
5268 break;
5269 }
5270
5271 /* If we are comparing a double-word integer with zero or -1, we can
5272 convert the comparison into one involving a single word. */
5273 if (GET_MODE_BITSIZE (mode) == BITS_PER_WORD * 2
5274 && GET_MODE_CLASS (mode) == MODE_INT
5275 && (!MEM_P (op0) || ! MEM_VOLATILE_P (op0)))
5276 {
5277 if ((code == EQ || code == NE)
5278 && (op1 == const0_rtx || op1 == constm1_rtx))
5279 {
5280 rtx op00, op01;
5281
5282 /* Do a logical OR or AND of the two words and compare the
5283 result. */
5284 op00 = simplify_gen_subreg (word_mode, op0, mode, 0);
5285 op01 = simplify_gen_subreg (word_mode, op0, mode, UNITS_PER_WORD);
5286 tem = expand_binop (word_mode,
5287 op1 == const0_rtx ? ior_optab : and_optab,
5288 op00, op01, NULL_RTX, unsignedp,
5289 OPTAB_DIRECT);
5290
5291 if (tem != 0)
5292 tem = emit_store_flag (NULL_RTX, code, tem, op1, word_mode,
5293 unsignedp, normalizep);
5294 }
5295 else if ((code == LT || code == GE) && op1 == const0_rtx)
5296 {
5297 rtx op0h;
5298
5299 /* If testing the sign bit, can just test on high word. */
5300 op0h = simplify_gen_subreg (word_mode, op0, mode,
5301 subreg_highpart_offset (word_mode,
5302 mode));
5303 tem = emit_store_flag (NULL_RTX, code, op0h, op1, word_mode,
5304 unsignedp, normalizep);
5305 }
5306 else
5307 tem = NULL_RTX;
5308
5309 if (tem)
5310 {
5311 if (target_mode == VOIDmode || GET_MODE (tem) == target_mode)
5312 return tem;
5313 if (!target)
5314 target = gen_reg_rtx (target_mode);
5315
5316 convert_move (target, tem,
5317 !val_signbit_known_set_p (word_mode,
5318 (normalizep ? normalizep
5319 : STORE_FLAG_VALUE)));
5320 return target;
5321 }
5322 }
5323
5324 /* If this is A < 0 or A >= 0, we can do this by taking the ones
5325 complement of A (for GE) and shifting the sign bit to the low bit. */
5326 if (op1 == const0_rtx && (code == LT || code == GE)
5327 && GET_MODE_CLASS (mode) == MODE_INT
5328 && (normalizep || STORE_FLAG_VALUE == 1
5329 || val_signbit_p (mode, STORE_FLAG_VALUE)))
5330 {
5331 subtarget = target;
5332
5333 if (!target)
5334 target_mode = mode;
5335
5336 /* If the result is to be wider than OP0, it is best to convert it
5337 first. If it is to be narrower, it is *incorrect* to convert it
5338 first. */
5339 else if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (mode))
5340 {
5341 op0 = convert_modes (target_mode, mode, op0, 0);
5342 mode = target_mode;
5343 }
5344
5345 if (target_mode != mode)
5346 subtarget = 0;
5347
5348 if (code == GE)
5349 op0 = expand_unop (mode, one_cmpl_optab, op0,
5350 ((STORE_FLAG_VALUE == 1 || normalizep)
5351 ? 0 : subtarget), 0);
5352
5353 if (STORE_FLAG_VALUE == 1 || normalizep)
5354 /* If we are supposed to produce a 0/1 value, we want to do
5355 a logical shift from the sign bit to the low-order bit; for
5356 a -1/0 value, we do an arithmetic shift. */
5357 op0 = expand_shift (RSHIFT_EXPR, mode, op0,
5358 GET_MODE_BITSIZE (mode) - 1,
5359 subtarget, normalizep != -1);
5360
5361 if (mode != target_mode)
5362 op0 = convert_modes (target_mode, mode, op0, 0);
5363
5364 return op0;
5365 }
5366
5367 mclass = GET_MODE_CLASS (mode);
5368 for (compare_mode = mode; compare_mode != VOIDmode;
5369 compare_mode = GET_MODE_WIDER_MODE (compare_mode))
5370 {
5371 enum machine_mode optab_mode = mclass == MODE_CC ? CCmode : compare_mode;
5372 icode = optab_handler (cstore_optab, optab_mode);
5373 if (icode != CODE_FOR_nothing)
5374 {
5375 do_pending_stack_adjust ();
5376 tem = emit_cstore (target, icode, code, mode, compare_mode,
5377 unsignedp, op0, op1, normalizep, target_mode);
5378 if (tem)
5379 return tem;
5380
5381 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5382 {
5383 tem = emit_cstore (target, icode, scode, mode, compare_mode,
5384 unsignedp, op1, op0, normalizep, target_mode);
5385 if (tem)
5386 return tem;
5387 }
5388 break;
5389 }
5390 }
5391
5392 return 0;
5393 }
5394
5395 /* Emit a store-flags instruction for comparison CODE on OP0 and OP1
5396 and storing in TARGET. Normally return TARGET.
5397 Return 0 if that cannot be done.
5398
5399 MODE is the mode to use for OP0 and OP1 should they be CONST_INTs. If
5400 it is VOIDmode, they cannot both be CONST_INT.
5401
5402 UNSIGNEDP is for the case where we have to widen the operands
5403 to perform the operation. It says to use zero-extension.
5404
5405 NORMALIZEP is 1 if we should convert the result to be either zero
5406 or one. Normalize is -1 if we should convert the result to be
5407 either zero or -1. If NORMALIZEP is zero, the result will be left
5408 "raw" out of the scc insn. */
5409
5410 rtx
5411 emit_store_flag (rtx target, enum rtx_code code, rtx op0, rtx op1,
5412 enum machine_mode mode, int unsignedp, int normalizep)
5413 {
5414 enum machine_mode target_mode = target ? GET_MODE (target) : VOIDmode;
5415 enum rtx_code rcode;
5416 rtx subtarget;
5417 rtx tem, last, trueval;
5418
5419 /* If we compare constants, we shouldn't use a store-flag operation,
5420 but a constant load. We can get there via the vanilla route that
5421 usually generates a compare-branch sequence, but will in this case
5422 fold the comparison to a constant, and thus elide the branch. */
5423 if (CONSTANT_P (op0) && CONSTANT_P (op1))
5424 return NULL_RTX;
5425
5426 tem = emit_store_flag_1 (target, code, op0, op1, mode, unsignedp, normalizep,
5427 target_mode);
5428 if (tem)
5429 return tem;
5430
5431 /* If we reached here, we can't do this with a scc insn, however there
5432 are some comparisons that can be done in other ways. Don't do any
5433 of these cases if branches are very cheap. */
5434 if (BRANCH_COST (optimize_insn_for_speed_p (), false) == 0)
5435 return 0;
5436
5437 /* See what we need to return. We can only return a 1, -1, or the
5438 sign bit. */
5439
5440 if (normalizep == 0)
5441 {
5442 if (STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1)
5443 normalizep = STORE_FLAG_VALUE;
5444
5445 else if (val_signbit_p (mode, STORE_FLAG_VALUE))
5446 ;
5447 else
5448 return 0;
5449 }
5450
5451 last = get_last_insn ();
5452
5453 /* If optimizing, use different pseudo registers for each insn, instead
5454 of reusing the same pseudo. This leads to better CSE, but slows
5455 down the compiler, since there are more pseudos */
5456 subtarget = (!optimize
5457 && (target_mode == mode)) ? target : NULL_RTX;
5458 trueval = GEN_INT (normalizep ? normalizep : STORE_FLAG_VALUE);
5459
5460 /* For floating-point comparisons, try the reverse comparison or try
5461 changing the "orderedness" of the comparison. */
5462 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5463 {
5464 enum rtx_code first_code;
5465 bool and_them;
5466
5467 rcode = reverse_condition_maybe_unordered (code);
5468 if (can_compare_p (rcode, mode, ccp_store_flag)
5469 && (code == ORDERED || code == UNORDERED
5470 || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
5471 || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
5472 {
5473 int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5474 || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5475
5476 /* For the reverse comparison, use either an addition or a XOR. */
5477 if (want_add
5478 && rtx_cost (GEN_INT (normalizep), PLUS, 1,
5479 optimize_insn_for_speed_p ()) == 0)
5480 {
5481 tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5482 STORE_FLAG_VALUE, target_mode);
5483 if (tem)
5484 return expand_binop (target_mode, add_optab, tem,
5485 gen_int_mode (normalizep, target_mode),
5486 target, 0, OPTAB_WIDEN);
5487 }
5488 else if (!want_add
5489 && rtx_cost (trueval, XOR, 1,
5490 optimize_insn_for_speed_p ()) == 0)
5491 {
5492 tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5493 normalizep, target_mode);
5494 if (tem)
5495 return expand_binop (target_mode, xor_optab, tem, trueval,
5496 target, INTVAL (trueval) >= 0, OPTAB_WIDEN);
5497 }
5498 }
5499
5500 delete_insns_since (last);
5501
5502 /* Cannot split ORDERED and UNORDERED, only try the above trick. */
5503 if (code == ORDERED || code == UNORDERED)
5504 return 0;
5505
5506 and_them = split_comparison (code, mode, &first_code, &code);
5507
5508 /* If there are no NaNs, the first comparison should always fall through.
5509 Effectively change the comparison to the other one. */
5510 if (!HONOR_NANS (mode))
5511 {
5512 gcc_assert (first_code == (and_them ? ORDERED : UNORDERED));
5513 return emit_store_flag_1 (target, code, op0, op1, mode, 0, normalizep,
5514 target_mode);
5515 }
5516
5517 #ifdef HAVE_conditional_move
5518 /* Try using a setcc instruction for ORDERED/UNORDERED, followed by a
5519 conditional move. */
5520 tem = emit_store_flag_1 (subtarget, first_code, op0, op1, mode, 0,
5521 normalizep, target_mode);
5522 if (tem == 0)
5523 return 0;
5524
5525 if (and_them)
5526 tem = emit_conditional_move (target, code, op0, op1, mode,
5527 tem, const0_rtx, GET_MODE (tem), 0);
5528 else
5529 tem = emit_conditional_move (target, code, op0, op1, mode,
5530 trueval, tem, GET_MODE (tem), 0);
5531
5532 if (tem == 0)
5533 delete_insns_since (last);
5534 return tem;
5535 #else
5536 return 0;
5537 #endif
5538 }
5539
5540 /* The remaining tricks only apply to integer comparisons. */
5541
5542 if (GET_MODE_CLASS (mode) != MODE_INT)
5543 return 0;
5544
5545 /* If this is an equality comparison of integers, we can try to exclusive-or
5546 (or subtract) the two operands and use a recursive call to try the
5547 comparison with zero. Don't do any of these cases if branches are
5548 very cheap. */
5549
5550 if ((code == EQ || code == NE) && op1 != const0_rtx)
5551 {
5552 tem = expand_binop (mode, xor_optab, op0, op1, subtarget, 1,
5553 OPTAB_WIDEN);
5554
5555 if (tem == 0)
5556 tem = expand_binop (mode, sub_optab, op0, op1, subtarget, 1,
5557 OPTAB_WIDEN);
5558 if (tem != 0)
5559 tem = emit_store_flag (target, code, tem, const0_rtx,
5560 mode, unsignedp, normalizep);
5561 if (tem != 0)
5562 return tem;
5563
5564 delete_insns_since (last);
5565 }
5566
5567 /* For integer comparisons, try the reverse comparison. However, for
5568 small X and if we'd have anyway to extend, implementing "X != 0"
5569 as "-(int)X >> 31" is still cheaper than inverting "(int)X == 0". */
5570 rcode = reverse_condition (code);
5571 if (can_compare_p (rcode, mode, ccp_store_flag)
5572 && ! (optab_handler (cstore_optab, mode) == CODE_FOR_nothing
5573 && code == NE
5574 && GET_MODE_SIZE (mode) < UNITS_PER_WORD
5575 && op1 == const0_rtx))
5576 {
5577 int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5578 || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5579
5580 /* Again, for the reverse comparison, use either an addition or a XOR. */
5581 if (want_add
5582 && rtx_cost (GEN_INT (normalizep), PLUS, 1,
5583 optimize_insn_for_speed_p ()) == 0)
5584 {
5585 tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5586 STORE_FLAG_VALUE, target_mode);
5587 if (tem != 0)
5588 tem = expand_binop (target_mode, add_optab, tem,
5589 gen_int_mode (normalizep, target_mode),
5590 target, 0, OPTAB_WIDEN);
5591 }
5592 else if (!want_add
5593 && rtx_cost (trueval, XOR, 1,
5594 optimize_insn_for_speed_p ()) == 0)
5595 {
5596 tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5597 normalizep, target_mode);
5598 if (tem != 0)
5599 tem = expand_binop (target_mode, xor_optab, tem, trueval, target,
5600 INTVAL (trueval) >= 0, OPTAB_WIDEN);
5601 }
5602
5603 if (tem != 0)
5604 return tem;
5605 delete_insns_since (last);
5606 }
5607
5608 /* Some other cases we can do are EQ, NE, LE, and GT comparisons with
5609 the constant zero. Reject all other comparisons at this point. Only
5610 do LE and GT if branches are expensive since they are expensive on
5611 2-operand machines. */
5612
5613 if (op1 != const0_rtx
5614 || (code != EQ && code != NE
5615 && (BRANCH_COST (optimize_insn_for_speed_p (),
5616 false) <= 1 || (code != LE && code != GT))))
5617 return 0;
5618
5619 /* Try to put the result of the comparison in the sign bit. Assume we can't
5620 do the necessary operation below. */
5621
5622 tem = 0;
5623
5624 /* To see if A <= 0, compute (A | (A - 1)). A <= 0 iff that result has
5625 the sign bit set. */
5626
5627 if (code == LE)
5628 {
5629 /* This is destructive, so SUBTARGET can't be OP0. */
5630 if (rtx_equal_p (subtarget, op0))
5631 subtarget = 0;
5632
5633 tem = expand_binop (mode, sub_optab, op0, const1_rtx, subtarget, 0,
5634 OPTAB_WIDEN);
5635 if (tem)
5636 tem = expand_binop (mode, ior_optab, op0, tem, subtarget, 0,
5637 OPTAB_WIDEN);
5638 }
5639
5640 /* To see if A > 0, compute (((signed) A) << BITS) - A, where BITS is the
5641 number of bits in the mode of OP0, minus one. */
5642
5643 if (code == GT)
5644 {
5645 if (rtx_equal_p (subtarget, op0))
5646 subtarget = 0;
5647
5648 tem = expand_shift (RSHIFT_EXPR, mode, op0,
5649 GET_MODE_BITSIZE (mode) - 1,
5650 subtarget, 0);
5651 tem = expand_binop (mode, sub_optab, tem, op0, subtarget, 0,
5652 OPTAB_WIDEN);
5653 }
5654
5655 if (code == EQ || code == NE)
5656 {
5657 /* For EQ or NE, one way to do the comparison is to apply an operation
5658 that converts the operand into a positive number if it is nonzero
5659 or zero if it was originally zero. Then, for EQ, we subtract 1 and
5660 for NE we negate. This puts the result in the sign bit. Then we
5661 normalize with a shift, if needed.
5662
5663 Two operations that can do the above actions are ABS and FFS, so try
5664 them. If that doesn't work, and MODE is smaller than a full word,
5665 we can use zero-extension to the wider mode (an unsigned conversion)
5666 as the operation. */
5667
5668 /* Note that ABS doesn't yield a positive number for INT_MIN, but
5669 that is compensated by the subsequent overflow when subtracting
5670 one / negating. */
5671
5672 if (optab_handler (abs_optab, mode) != CODE_FOR_nothing)
5673 tem = expand_unop (mode, abs_optab, op0, subtarget, 1);
5674 else if (optab_handler (ffs_optab, mode) != CODE_FOR_nothing)
5675 tem = expand_unop (mode, ffs_optab, op0, subtarget, 1);
5676 else if (GET_MODE_SIZE (mode) < UNITS_PER_WORD)
5677 {
5678 tem = convert_modes (word_mode, mode, op0, 1);
5679 mode = word_mode;
5680 }
5681
5682 if (tem != 0)
5683 {
5684 if (code == EQ)
5685 tem = expand_binop (mode, sub_optab, tem, const1_rtx, subtarget,
5686 0, OPTAB_WIDEN);
5687 else
5688 tem = expand_unop (mode, neg_optab, tem, subtarget, 0);
5689 }
5690
5691 /* If we couldn't do it that way, for NE we can "or" the two's complement
5692 of the value with itself. For EQ, we take the one's complement of
5693 that "or", which is an extra insn, so we only handle EQ if branches
5694 are expensive. */
5695
5696 if (tem == 0
5697 && (code == NE
5698 || BRANCH_COST (optimize_insn_for_speed_p (),
5699 false) > 1))
5700 {
5701 if (rtx_equal_p (subtarget, op0))
5702 subtarget = 0;
5703
5704 tem = expand_unop (mode, neg_optab, op0, subtarget, 0);
5705 tem = expand_binop (mode, ior_optab, tem, op0, subtarget, 0,
5706 OPTAB_WIDEN);
5707
5708 if (tem && code == EQ)
5709 tem = expand_unop (mode, one_cmpl_optab, tem, subtarget, 0);
5710 }
5711 }
5712
5713 if (tem && normalizep)
5714 tem = expand_shift (RSHIFT_EXPR, mode, tem,
5715 GET_MODE_BITSIZE (mode) - 1,
5716 subtarget, normalizep == 1);
5717
5718 if (tem)
5719 {
5720 if (!target)
5721 ;
5722 else if (GET_MODE (tem) != target_mode)
5723 {
5724 convert_move (target, tem, 0);
5725 tem = target;
5726 }
5727 else if (!subtarget)
5728 {
5729 emit_move_insn (target, tem);
5730 tem = target;
5731 }
5732 }
5733 else
5734 delete_insns_since (last);
5735
5736 return tem;
5737 }
5738
5739 /* Like emit_store_flag, but always succeeds. */
5740
5741 rtx
5742 emit_store_flag_force (rtx target, enum rtx_code code, rtx op0, rtx op1,
5743 enum machine_mode mode, int unsignedp, int normalizep)
5744 {
5745 rtx tem, label;
5746 rtx trueval, falseval;
5747
5748 /* First see if emit_store_flag can do the job. */
5749 tem = emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep);
5750 if (tem != 0)
5751 return tem;
5752
5753 if (!target)
5754 target = gen_reg_rtx (word_mode);
5755
5756 /* If this failed, we have to do this with set/compare/jump/set code.
5757 For foo != 0, if foo is in OP0, just replace it with 1 if nonzero. */
5758 trueval = normalizep ? GEN_INT (normalizep) : const1_rtx;
5759 if (code == NE
5760 && GET_MODE_CLASS (mode) == MODE_INT
5761 && REG_P (target)
5762 && op0 == target
5763 && op1 == const0_rtx)
5764 {
5765 label = gen_label_rtx ();
5766 do_compare_rtx_and_jump (target, const0_rtx, EQ, unsignedp,
5767 mode, NULL_RTX, NULL_RTX, label, -1);
5768 emit_move_insn (target, trueval);
5769 emit_label (label);
5770 return target;
5771 }
5772
5773 if (!REG_P (target)
5774 || reg_mentioned_p (target, op0) || reg_mentioned_p (target, op1))
5775 target = gen_reg_rtx (GET_MODE (target));
5776
5777 /* Jump in the right direction if the target cannot implement CODE
5778 but can jump on its reverse condition. */
5779 falseval = const0_rtx;
5780 if (! can_compare_p (code, mode, ccp_jump)
5781 && (! FLOAT_MODE_P (mode)
5782 || code == ORDERED || code == UNORDERED
5783 || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
5784 || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
5785 {
5786 enum rtx_code rcode;
5787 if (FLOAT_MODE_P (mode))
5788 rcode = reverse_condition_maybe_unordered (code);
5789 else
5790 rcode = reverse_condition (code);
5791
5792 /* Canonicalize to UNORDERED for the libcall. */
5793 if (can_compare_p (rcode, mode, ccp_jump)
5794 || (code == ORDERED && ! can_compare_p (ORDERED, mode, ccp_jump)))
5795 {
5796 falseval = trueval;
5797 trueval = const0_rtx;
5798 code = rcode;
5799 }
5800 }
5801
5802 emit_move_insn (target, trueval);
5803 label = gen_label_rtx ();
5804 do_compare_rtx_and_jump (op0, op1, code, unsignedp, mode, NULL_RTX,
5805 NULL_RTX, label, -1);
5806
5807 emit_move_insn (target, falseval);
5808 emit_label (label);
5809
5810 return target;
5811 }
5812 \f
5813 /* Perform possibly multi-word comparison and conditional jump to LABEL
5814 if ARG1 OP ARG2 true where ARG1 and ARG2 are of mode MODE. This is
5815 now a thin wrapper around do_compare_rtx_and_jump. */
5816
5817 static void
5818 do_cmp_and_jump (rtx arg1, rtx arg2, enum rtx_code op, enum machine_mode mode,
5819 rtx label)
5820 {
5821 int unsignedp = (op == LTU || op == LEU || op == GTU || op == GEU);
5822 do_compare_rtx_and_jump (arg1, arg2, op, unsignedp, mode,
5823 NULL_RTX, NULL_RTX, label, -1);
5824 }