re PR rtl-optimization/55388 (ICE in int_mode_for_mode at stor-layout.c:423)
[gcc.git] / gcc / expmed.c
1 /* Medium-level subroutines: convert bit-field store and extract
2 and shifts, multiplies and divides to rtl instructions.
3 Copyright (C) 1987, 1988, 1989, 1992, 1993, 1994, 1995, 1996, 1997, 1998,
4 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010,
5 2011, 2012
6 Free Software Foundation, Inc.
7
8 This file is part of GCC.
9
10 GCC is free software; you can redistribute it and/or modify it under
11 the terms of the GNU General Public License as published by the Free
12 Software Foundation; either version 3, or (at your option) any later
13 version.
14
15 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
16 WARRANTY; without even the implied warranty of MERCHANTABILITY or
17 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 for more details.
19
20 You should have received a copy of the GNU General Public License
21 along with GCC; see the file COPYING3. If not see
22 <http://www.gnu.org/licenses/>. */
23
24
25 #include "config.h"
26 #include "system.h"
27 #include "coretypes.h"
28 #include "tm.h"
29 #include "diagnostic-core.h"
30 #include "rtl.h"
31 #include "tree.h"
32 #include "tm_p.h"
33 #include "flags.h"
34 #include "insn-config.h"
35 #include "expr.h"
36 #include "optabs.h"
37 #include "recog.h"
38 #include "langhooks.h"
39 #include "df.h"
40 #include "target.h"
41 #include "expmed.h"
42
43 struct target_expmed default_target_expmed;
44 #if SWITCHABLE_TARGET
45 struct target_expmed *this_target_expmed = &default_target_expmed;
46 #endif
47
48 static void store_fixed_bit_field (rtx, unsigned HOST_WIDE_INT,
49 unsigned HOST_WIDE_INT,
50 unsigned HOST_WIDE_INT,
51 unsigned HOST_WIDE_INT,
52 rtx);
53 static void store_split_bit_field (rtx, unsigned HOST_WIDE_INT,
54 unsigned HOST_WIDE_INT,
55 unsigned HOST_WIDE_INT,
56 unsigned HOST_WIDE_INT,
57 rtx);
58 static rtx extract_fixed_bit_field (enum machine_mode, rtx,
59 unsigned HOST_WIDE_INT,
60 unsigned HOST_WIDE_INT, rtx, int, bool);
61 static rtx mask_rtx (enum machine_mode, int, int, int);
62 static rtx lshift_value (enum machine_mode, rtx, int, int);
63 static rtx extract_split_bit_field (rtx, unsigned HOST_WIDE_INT,
64 unsigned HOST_WIDE_INT, int);
65 static void do_cmp_and_jump (rtx, rtx, enum rtx_code, enum machine_mode, rtx);
66 static rtx expand_smod_pow2 (enum machine_mode, rtx, HOST_WIDE_INT);
67 static rtx expand_sdiv_pow2 (enum machine_mode, rtx, HOST_WIDE_INT);
68
69 /* Test whether a value is zero of a power of two. */
70 #define EXACT_POWER_OF_2_OR_ZERO_P(x) (((x) & ((x) - 1)) == 0)
71
72 struct init_expmed_rtl
73 {
74 struct rtx_def reg; rtunion reg_fld[2];
75 struct rtx_def plus; rtunion plus_fld1;
76 struct rtx_def neg;
77 struct rtx_def mult; rtunion mult_fld1;
78 struct rtx_def sdiv; rtunion sdiv_fld1;
79 struct rtx_def udiv; rtunion udiv_fld1;
80 struct rtx_def sdiv_32; rtunion sdiv_32_fld1;
81 struct rtx_def smod_32; rtunion smod_32_fld1;
82 struct rtx_def wide_mult; rtunion wide_mult_fld1;
83 struct rtx_def wide_lshr; rtunion wide_lshr_fld1;
84 struct rtx_def wide_trunc;
85 struct rtx_def shift; rtunion shift_fld1;
86 struct rtx_def shift_mult; rtunion shift_mult_fld1;
87 struct rtx_def shift_add; rtunion shift_add_fld1;
88 struct rtx_def shift_sub0; rtunion shift_sub0_fld1;
89 struct rtx_def shift_sub1; rtunion shift_sub1_fld1;
90 struct rtx_def zext;
91 struct rtx_def trunc;
92
93 rtx pow2[MAX_BITS_PER_WORD];
94 rtx cint[MAX_BITS_PER_WORD];
95 };
96
97 static void
98 init_expmed_one_conv (struct init_expmed_rtl *all, enum machine_mode to_mode,
99 enum machine_mode from_mode, bool speed)
100 {
101 int to_size, from_size;
102 rtx which;
103
104 /* We're given no information about the true size of a partial integer,
105 only the size of the "full" integer it requires for storage. For
106 comparison purposes here, reduce the bit size by one in that case. */
107 to_size = (GET_MODE_BITSIZE (to_mode)
108 - (GET_MODE_CLASS (to_mode) == MODE_PARTIAL_INT));
109 from_size = (GET_MODE_BITSIZE (from_mode)
110 - (GET_MODE_CLASS (from_mode) == MODE_PARTIAL_INT));
111
112 /* Assume cost of zero-extend and sign-extend is the same. */
113 which = (to_size < from_size ? &all->trunc : &all->zext);
114
115 PUT_MODE (&all->reg, from_mode);
116 set_convert_cost (to_mode, from_mode, speed, set_src_cost (which, speed));
117 }
118
119 static void
120 init_expmed_one_mode (struct init_expmed_rtl *all,
121 enum machine_mode mode, int speed)
122 {
123 int m, n, mode_bitsize;
124 enum machine_mode mode_from;
125
126 mode_bitsize = GET_MODE_UNIT_BITSIZE (mode);
127
128 PUT_MODE (&all->reg, mode);
129 PUT_MODE (&all->plus, mode);
130 PUT_MODE (&all->neg, mode);
131 PUT_MODE (&all->mult, mode);
132 PUT_MODE (&all->sdiv, mode);
133 PUT_MODE (&all->udiv, mode);
134 PUT_MODE (&all->sdiv_32, mode);
135 PUT_MODE (&all->smod_32, mode);
136 PUT_MODE (&all->wide_trunc, mode);
137 PUT_MODE (&all->shift, mode);
138 PUT_MODE (&all->shift_mult, mode);
139 PUT_MODE (&all->shift_add, mode);
140 PUT_MODE (&all->shift_sub0, mode);
141 PUT_MODE (&all->shift_sub1, mode);
142 PUT_MODE (&all->zext, mode);
143 PUT_MODE (&all->trunc, mode);
144
145 set_add_cost (speed, mode, set_src_cost (&all->plus, speed));
146 set_neg_cost (speed, mode, set_src_cost (&all->neg, speed));
147 set_mul_cost (speed, mode, set_src_cost (&all->mult, speed));
148 set_sdiv_cost (speed, mode, set_src_cost (&all->sdiv, speed));
149 set_udiv_cost (speed, mode, set_src_cost (&all->udiv, speed));
150
151 set_sdiv_pow2_cheap (speed, mode, (set_src_cost (&all->sdiv_32, speed)
152 <= 2 * add_cost (speed, mode)));
153 set_smod_pow2_cheap (speed, mode, (set_src_cost (&all->smod_32, speed)
154 <= 4 * add_cost (speed, mode)));
155
156 set_shift_cost (speed, mode, 0, 0);
157 {
158 int cost = add_cost (speed, mode);
159 set_shiftadd_cost (speed, mode, 0, cost);
160 set_shiftsub0_cost (speed, mode, 0, cost);
161 set_shiftsub1_cost (speed, mode, 0, cost);
162 }
163
164 n = MIN (MAX_BITS_PER_WORD, mode_bitsize);
165 for (m = 1; m < n; m++)
166 {
167 XEXP (&all->shift, 1) = all->cint[m];
168 XEXP (&all->shift_mult, 1) = all->pow2[m];
169
170 set_shift_cost (speed, mode, m, set_src_cost (&all->shift, speed));
171 set_shiftadd_cost (speed, mode, m, set_src_cost (&all->shift_add, speed));
172 set_shiftsub0_cost (speed, mode, m, set_src_cost (&all->shift_sub0, speed));
173 set_shiftsub1_cost (speed, mode, m, set_src_cost (&all->shift_sub1, speed));
174 }
175
176 if (SCALAR_INT_MODE_P (mode))
177 {
178 for (mode_from = MIN_MODE_INT; mode_from <= MAX_MODE_INT;
179 mode_from = (enum machine_mode)(mode_from + 1))
180 init_expmed_one_conv (all, mode, mode_from, speed);
181 }
182 if (GET_MODE_CLASS (mode) == MODE_INT)
183 {
184 enum machine_mode wider_mode = GET_MODE_WIDER_MODE (mode);
185 if (wider_mode != VOIDmode)
186 {
187 PUT_MODE (&all->zext, wider_mode);
188 PUT_MODE (&all->wide_mult, wider_mode);
189 PUT_MODE (&all->wide_lshr, wider_mode);
190 XEXP (&all->wide_lshr, 1) = GEN_INT (mode_bitsize);
191
192 set_mul_widen_cost (speed, wider_mode,
193 set_src_cost (&all->wide_mult, speed));
194 set_mul_highpart_cost (speed, mode,
195 set_src_cost (&all->wide_trunc, speed));
196 }
197 }
198 }
199
200 void
201 init_expmed (void)
202 {
203 struct init_expmed_rtl all;
204 enum machine_mode mode;
205 int m, speed;
206
207 memset (&all, 0, sizeof all);
208 for (m = 1; m < MAX_BITS_PER_WORD; m++)
209 {
210 all.pow2[m] = GEN_INT ((HOST_WIDE_INT) 1 << m);
211 all.cint[m] = GEN_INT (m);
212 }
213
214 PUT_CODE (&all.reg, REG);
215 /* Avoid using hard regs in ways which may be unsupported. */
216 SET_REGNO (&all.reg, LAST_VIRTUAL_REGISTER + 1);
217
218 PUT_CODE (&all.plus, PLUS);
219 XEXP (&all.plus, 0) = &all.reg;
220 XEXP (&all.plus, 1) = &all.reg;
221
222 PUT_CODE (&all.neg, NEG);
223 XEXP (&all.neg, 0) = &all.reg;
224
225 PUT_CODE (&all.mult, MULT);
226 XEXP (&all.mult, 0) = &all.reg;
227 XEXP (&all.mult, 1) = &all.reg;
228
229 PUT_CODE (&all.sdiv, DIV);
230 XEXP (&all.sdiv, 0) = &all.reg;
231 XEXP (&all.sdiv, 1) = &all.reg;
232
233 PUT_CODE (&all.udiv, UDIV);
234 XEXP (&all.udiv, 0) = &all.reg;
235 XEXP (&all.udiv, 1) = &all.reg;
236
237 PUT_CODE (&all.sdiv_32, DIV);
238 XEXP (&all.sdiv_32, 0) = &all.reg;
239 XEXP (&all.sdiv_32, 1) = 32 < MAX_BITS_PER_WORD ? all.cint[32] : GEN_INT (32);
240
241 PUT_CODE (&all.smod_32, MOD);
242 XEXP (&all.smod_32, 0) = &all.reg;
243 XEXP (&all.smod_32, 1) = XEXP (&all.sdiv_32, 1);
244
245 PUT_CODE (&all.zext, ZERO_EXTEND);
246 XEXP (&all.zext, 0) = &all.reg;
247
248 PUT_CODE (&all.wide_mult, MULT);
249 XEXP (&all.wide_mult, 0) = &all.zext;
250 XEXP (&all.wide_mult, 1) = &all.zext;
251
252 PUT_CODE (&all.wide_lshr, LSHIFTRT);
253 XEXP (&all.wide_lshr, 0) = &all.wide_mult;
254
255 PUT_CODE (&all.wide_trunc, TRUNCATE);
256 XEXP (&all.wide_trunc, 0) = &all.wide_lshr;
257
258 PUT_CODE (&all.shift, ASHIFT);
259 XEXP (&all.shift, 0) = &all.reg;
260
261 PUT_CODE (&all.shift_mult, MULT);
262 XEXP (&all.shift_mult, 0) = &all.reg;
263
264 PUT_CODE (&all.shift_add, PLUS);
265 XEXP (&all.shift_add, 0) = &all.shift_mult;
266 XEXP (&all.shift_add, 1) = &all.reg;
267
268 PUT_CODE (&all.shift_sub0, MINUS);
269 XEXP (&all.shift_sub0, 0) = &all.shift_mult;
270 XEXP (&all.shift_sub0, 1) = &all.reg;
271
272 PUT_CODE (&all.shift_sub1, MINUS);
273 XEXP (&all.shift_sub1, 0) = &all.reg;
274 XEXP (&all.shift_sub1, 1) = &all.shift_mult;
275
276 PUT_CODE (&all.trunc, TRUNCATE);
277 XEXP (&all.trunc, 0) = &all.reg;
278
279 for (speed = 0; speed < 2; speed++)
280 {
281 crtl->maybe_hot_insn_p = speed;
282 set_zero_cost (speed, set_src_cost (const0_rtx, speed));
283
284 for (mode = MIN_MODE_INT; mode <= MAX_MODE_INT;
285 mode = (enum machine_mode)(mode + 1))
286 init_expmed_one_mode (&all, mode, speed);
287
288 if (MIN_MODE_PARTIAL_INT != VOIDmode)
289 for (mode = MIN_MODE_PARTIAL_INT; mode <= MAX_MODE_PARTIAL_INT;
290 mode = (enum machine_mode)(mode + 1))
291 init_expmed_one_mode (&all, mode, speed);
292
293 if (MIN_MODE_VECTOR_INT != VOIDmode)
294 for (mode = MIN_MODE_VECTOR_INT; mode <= MAX_MODE_VECTOR_INT;
295 mode = (enum machine_mode)(mode + 1))
296 init_expmed_one_mode (&all, mode, speed);
297 }
298
299 if (alg_hash_used_p ())
300 {
301 struct alg_hash_entry *p = alg_hash_entry_ptr (0);
302 memset (p, 0, sizeof (*p) * NUM_ALG_HASH_ENTRIES);
303 }
304 else
305 set_alg_hash_used_p (true);
306 default_rtl_profile ();
307 }
308
309 /* Return an rtx representing minus the value of X.
310 MODE is the intended mode of the result,
311 useful if X is a CONST_INT. */
312
313 rtx
314 negate_rtx (enum machine_mode mode, rtx x)
315 {
316 rtx result = simplify_unary_operation (NEG, mode, x, mode);
317
318 if (result == 0)
319 result = expand_unop (mode, neg_optab, x, NULL_RTX, 0);
320
321 return result;
322 }
323
324 /* Adjust bitfield memory MEM so that it points to the first unit of mode
325 MODE that contains a bitfield of size BITSIZE at bit position BITNUM.
326 If MODE is BLKmode, return a reference to every byte in the bitfield.
327 Set *NEW_BITNUM to the bit position of the field within the new memory. */
328
329 static rtx
330 narrow_bit_field_mem (rtx mem, enum machine_mode mode,
331 unsigned HOST_WIDE_INT bitsize,
332 unsigned HOST_WIDE_INT bitnum,
333 unsigned HOST_WIDE_INT *new_bitnum)
334 {
335 if (mode == BLKmode)
336 {
337 *new_bitnum = bitnum % BITS_PER_UNIT;
338 HOST_WIDE_INT offset = bitnum / BITS_PER_UNIT;
339 HOST_WIDE_INT size = ((*new_bitnum + bitsize + BITS_PER_UNIT - 1)
340 / BITS_PER_UNIT);
341 return adjust_bitfield_address_size (mem, mode, offset, size);
342 }
343 else
344 {
345 unsigned int unit = GET_MODE_BITSIZE (mode);
346 *new_bitnum = bitnum % unit;
347 HOST_WIDE_INT offset = (bitnum - *new_bitnum) / BITS_PER_UNIT;
348 return adjust_bitfield_address (mem, mode, offset);
349 }
350 }
351
352 /* The caller wants to perform insertion or extraction PATTERN on a
353 bitfield of size BITSIZE at BITNUM bits into memory operand OP0.
354 BITREGION_START and BITREGION_END are as for store_bit_field
355 and FIELDMODE is the natural mode of the field.
356
357 Search for a mode that is compatible with the memory access
358 restrictions and (where applicable) with a register insertion or
359 extraction. Return the new memory on success, storing the adjusted
360 bit position in *NEW_BITNUM. Return null otherwise. */
361
362 static rtx
363 adjust_bit_field_mem_for_reg (enum extraction_pattern pattern,
364 rtx op0, HOST_WIDE_INT bitsize,
365 HOST_WIDE_INT bitnum,
366 unsigned HOST_WIDE_INT bitregion_start,
367 unsigned HOST_WIDE_INT bitregion_end,
368 enum machine_mode fieldmode,
369 unsigned HOST_WIDE_INT *new_bitnum)
370 {
371 bit_field_mode_iterator iter (bitsize, bitnum, bitregion_start,
372 bitregion_end, MEM_ALIGN (op0),
373 MEM_VOLATILE_P (op0));
374 enum machine_mode best_mode;
375 if (iter.next_mode (&best_mode))
376 {
377 /* We can use a memory in BEST_MODE. See whether this is true for
378 any wider modes. All other things being equal, we prefer to
379 use the widest mode possible because it tends to expose more
380 CSE opportunities. */
381 if (!iter.prefer_smaller_modes ())
382 {
383 /* Limit the search to the mode required by the corresponding
384 register insertion or extraction instruction, if any. */
385 enum machine_mode limit_mode = word_mode;
386 extraction_insn insn;
387 if (get_best_reg_extraction_insn (&insn, pattern,
388 GET_MODE_BITSIZE (best_mode),
389 fieldmode))
390 limit_mode = insn.field_mode;
391
392 enum machine_mode wider_mode;
393 while (iter.next_mode (&wider_mode)
394 && GET_MODE_SIZE (wider_mode) <= GET_MODE_SIZE (limit_mode))
395 best_mode = wider_mode;
396 }
397 return narrow_bit_field_mem (op0, best_mode, bitsize, bitnum,
398 new_bitnum);
399 }
400 return NULL_RTX;
401 }
402
403 /* Return true if a bitfield of size BITSIZE at bit number BITNUM within
404 a structure of mode STRUCT_MODE represents a lowpart subreg. The subreg
405 offset is then BITNUM / BITS_PER_UNIT. */
406
407 static bool
408 lowpart_bit_field_p (unsigned HOST_WIDE_INT bitnum,
409 unsigned HOST_WIDE_INT bitsize,
410 enum machine_mode struct_mode)
411 {
412 if (BYTES_BIG_ENDIAN)
413 return (bitnum % BITS_PER_UNIT == 0
414 && (bitnum + bitsize == GET_MODE_BITSIZE (struct_mode)
415 || (bitnum + bitsize) % BITS_PER_WORD == 0));
416 else
417 return bitnum % BITS_PER_WORD == 0;
418 }
419 \f
420 /* Try to use instruction INSV to store VALUE into a field of OP0.
421 BITSIZE and BITNUM are as for store_bit_field. */
422
423 static bool
424 store_bit_field_using_insv (const extraction_insn *insv, rtx op0,
425 unsigned HOST_WIDE_INT bitsize,
426 unsigned HOST_WIDE_INT bitnum, rtx value)
427 {
428 struct expand_operand ops[4];
429 rtx value1;
430 rtx xop0 = op0;
431 rtx last = get_last_insn ();
432 bool copy_back = false;
433
434 enum machine_mode op_mode = insv->field_mode;
435 unsigned int unit = GET_MODE_BITSIZE (op_mode);
436 if (bitsize == 0 || bitsize > unit)
437 return false;
438
439 if (MEM_P (xop0))
440 /* Get a reference to the first byte of the field. */
441 xop0 = narrow_bit_field_mem (xop0, insv->struct_mode, bitsize, bitnum,
442 &bitnum);
443 else
444 {
445 /* Convert from counting within OP0 to counting in OP_MODE. */
446 if (BYTES_BIG_ENDIAN)
447 bitnum += unit - GET_MODE_BITSIZE (GET_MODE (op0));
448
449 /* If xop0 is a register, we need it in OP_MODE
450 to make it acceptable to the format of insv. */
451 if (GET_CODE (xop0) == SUBREG)
452 /* We can't just change the mode, because this might clobber op0,
453 and we will need the original value of op0 if insv fails. */
454 xop0 = gen_rtx_SUBREG (op_mode, SUBREG_REG (xop0), SUBREG_BYTE (xop0));
455 if (REG_P (xop0) && GET_MODE (xop0) != op_mode)
456 xop0 = gen_lowpart_SUBREG (op_mode, xop0);
457 }
458
459 /* If the destination is a paradoxical subreg such that we need a
460 truncate to the inner mode, perform the insertion on a temporary and
461 truncate the result to the original destination. Note that we can't
462 just truncate the paradoxical subreg as (truncate:N (subreg:W (reg:N
463 X) 0)) is (reg:N X). */
464 if (GET_CODE (xop0) == SUBREG
465 && REG_P (SUBREG_REG (xop0))
466 && !TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (SUBREG_REG (xop0)),
467 op_mode))
468 {
469 rtx tem = gen_reg_rtx (op_mode);
470 emit_move_insn (tem, xop0);
471 xop0 = tem;
472 copy_back = true;
473 }
474
475 /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
476 "backwards" from the size of the unit we are inserting into.
477 Otherwise, we count bits from the most significant on a
478 BYTES/BITS_BIG_ENDIAN machine. */
479
480 if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
481 bitnum = unit - bitsize - bitnum;
482
483 /* Convert VALUE to op_mode (which insv insn wants) in VALUE1. */
484 value1 = value;
485 if (GET_MODE (value) != op_mode)
486 {
487 if (GET_MODE_BITSIZE (GET_MODE (value)) >= bitsize)
488 {
489 /* Optimization: Don't bother really extending VALUE
490 if it has all the bits we will actually use. However,
491 if we must narrow it, be sure we do it correctly. */
492
493 if (GET_MODE_SIZE (GET_MODE (value)) < GET_MODE_SIZE (op_mode))
494 {
495 rtx tmp;
496
497 tmp = simplify_subreg (op_mode, value1, GET_MODE (value), 0);
498 if (! tmp)
499 tmp = simplify_gen_subreg (op_mode,
500 force_reg (GET_MODE (value),
501 value1),
502 GET_MODE (value), 0);
503 value1 = tmp;
504 }
505 else
506 value1 = gen_lowpart (op_mode, value1);
507 }
508 else if (CONST_INT_P (value))
509 value1 = gen_int_mode (INTVAL (value), op_mode);
510 else
511 /* Parse phase is supposed to make VALUE's data type
512 match that of the component reference, which is a type
513 at least as wide as the field; so VALUE should have
514 a mode that corresponds to that type. */
515 gcc_assert (CONSTANT_P (value));
516 }
517
518 create_fixed_operand (&ops[0], xop0);
519 create_integer_operand (&ops[1], bitsize);
520 create_integer_operand (&ops[2], bitnum);
521 create_input_operand (&ops[3], value1, op_mode);
522 if (maybe_expand_insn (insv->icode, 4, ops))
523 {
524 if (copy_back)
525 convert_move (op0, xop0, true);
526 return true;
527 }
528 delete_insns_since (last);
529 return false;
530 }
531
532 /* A subroutine of store_bit_field, with the same arguments. Return true
533 if the operation could be implemented.
534
535 If FALLBACK_P is true, fall back to store_fixed_bit_field if we have
536 no other way of implementing the operation. If FALLBACK_P is false,
537 return false instead. */
538
539 static bool
540 store_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
541 unsigned HOST_WIDE_INT bitnum,
542 unsigned HOST_WIDE_INT bitregion_start,
543 unsigned HOST_WIDE_INT bitregion_end,
544 enum machine_mode fieldmode,
545 rtx value, bool fallback_p)
546 {
547 rtx op0 = str_rtx;
548 rtx orig_value;
549
550 while (GET_CODE (op0) == SUBREG)
551 {
552 /* The following line once was done only if WORDS_BIG_ENDIAN,
553 but I think that is a mistake. WORDS_BIG_ENDIAN is
554 meaningful at a much higher level; when structures are copied
555 between memory and regs, the higher-numbered regs
556 always get higher addresses. */
557 int inner_mode_size = GET_MODE_SIZE (GET_MODE (SUBREG_REG (op0)));
558 int outer_mode_size = GET_MODE_SIZE (GET_MODE (op0));
559 int byte_offset = 0;
560
561 /* Paradoxical subregs need special handling on big endian machines. */
562 if (SUBREG_BYTE (op0) == 0 && inner_mode_size < outer_mode_size)
563 {
564 int difference = inner_mode_size - outer_mode_size;
565
566 if (WORDS_BIG_ENDIAN)
567 byte_offset += (difference / UNITS_PER_WORD) * UNITS_PER_WORD;
568 if (BYTES_BIG_ENDIAN)
569 byte_offset += difference % UNITS_PER_WORD;
570 }
571 else
572 byte_offset = SUBREG_BYTE (op0);
573
574 bitnum += byte_offset * BITS_PER_UNIT;
575 op0 = SUBREG_REG (op0);
576 }
577
578 /* No action is needed if the target is a register and if the field
579 lies completely outside that register. This can occur if the source
580 code contains an out-of-bounds access to a small array. */
581 if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
582 return true;
583
584 /* Use vec_set patterns for inserting parts of vectors whenever
585 available. */
586 if (VECTOR_MODE_P (GET_MODE (op0))
587 && !MEM_P (op0)
588 && optab_handler (vec_set_optab, GET_MODE (op0)) != CODE_FOR_nothing
589 && fieldmode == GET_MODE_INNER (GET_MODE (op0))
590 && bitsize == GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
591 && !(bitnum % GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
592 {
593 struct expand_operand ops[3];
594 enum machine_mode outermode = GET_MODE (op0);
595 enum machine_mode innermode = GET_MODE_INNER (outermode);
596 enum insn_code icode = optab_handler (vec_set_optab, outermode);
597 int pos = bitnum / GET_MODE_BITSIZE (innermode);
598
599 create_fixed_operand (&ops[0], op0);
600 create_input_operand (&ops[1], value, innermode);
601 create_integer_operand (&ops[2], pos);
602 if (maybe_expand_insn (icode, 3, ops))
603 return true;
604 }
605
606 /* If the target is a register, overwriting the entire object, or storing
607 a full-word or multi-word field can be done with just a SUBREG. */
608 if (!MEM_P (op0)
609 && bitsize == GET_MODE_BITSIZE (fieldmode)
610 && ((bitsize == GET_MODE_BITSIZE (GET_MODE (op0)) && bitnum == 0)
611 || (bitsize % BITS_PER_WORD == 0 && bitnum % BITS_PER_WORD == 0)))
612 {
613 /* Use the subreg machinery either to narrow OP0 to the required
614 words or to cope with mode punning between equal-sized modes. */
615 rtx sub = simplify_gen_subreg (fieldmode, op0, GET_MODE (op0),
616 bitnum / BITS_PER_UNIT);
617 if (sub)
618 {
619 emit_move_insn (sub, value);
620 return true;
621 }
622 }
623
624 /* If the target is memory, storing any naturally aligned field can be
625 done with a simple store. For targets that support fast unaligned
626 memory, any naturally sized, unit aligned field can be done directly. */
627 if (MEM_P (op0)
628 && bitnum % BITS_PER_UNIT == 0
629 && bitsize == GET_MODE_BITSIZE (fieldmode)
630 && (!SLOW_UNALIGNED_ACCESS (fieldmode, MEM_ALIGN (op0))
631 || (bitnum % bitsize == 0
632 && MEM_ALIGN (op0) % bitsize == 0)))
633 {
634 op0 = adjust_bitfield_address (op0, fieldmode, bitnum / BITS_PER_UNIT);
635 emit_move_insn (op0, value);
636 return true;
637 }
638
639 /* Make sure we are playing with integral modes. Pun with subregs
640 if we aren't. This must come after the entire register case above,
641 since that case is valid for any mode. The following cases are only
642 valid for integral modes. */
643 {
644 enum machine_mode imode = int_mode_for_mode (GET_MODE (op0));
645 if (imode != GET_MODE (op0))
646 {
647 if (MEM_P (op0))
648 op0 = adjust_bitfield_address_size (op0, imode, 0, MEM_SIZE (op0));
649 else
650 {
651 gcc_assert (imode != BLKmode);
652 op0 = gen_lowpart (imode, op0);
653 }
654 }
655 }
656
657 /* Storing an lsb-aligned field in a register
658 can be done with a movstrict instruction. */
659
660 if (!MEM_P (op0)
661 && lowpart_bit_field_p (bitnum, bitsize, GET_MODE (op0))
662 && bitsize == GET_MODE_BITSIZE (fieldmode)
663 && optab_handler (movstrict_optab, fieldmode) != CODE_FOR_nothing)
664 {
665 struct expand_operand ops[2];
666 enum insn_code icode = optab_handler (movstrict_optab, fieldmode);
667 rtx arg0 = op0;
668 unsigned HOST_WIDE_INT subreg_off;
669
670 if (GET_CODE (arg0) == SUBREG)
671 {
672 /* Else we've got some float mode source being extracted into
673 a different float mode destination -- this combination of
674 subregs results in Severe Tire Damage. */
675 gcc_assert (GET_MODE (SUBREG_REG (arg0)) == fieldmode
676 || GET_MODE_CLASS (fieldmode) == MODE_INT
677 || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT);
678 arg0 = SUBREG_REG (arg0);
679 }
680
681 subreg_off = bitnum / BITS_PER_UNIT;
682 if (validate_subreg (fieldmode, GET_MODE (arg0), arg0, subreg_off))
683 {
684 arg0 = gen_rtx_SUBREG (fieldmode, arg0, subreg_off);
685
686 create_fixed_operand (&ops[0], arg0);
687 /* Shrink the source operand to FIELDMODE. */
688 create_convert_operand_to (&ops[1], value, fieldmode, false);
689 if (maybe_expand_insn (icode, 2, ops))
690 return true;
691 }
692 }
693
694 /* Handle fields bigger than a word. */
695
696 if (bitsize > BITS_PER_WORD)
697 {
698 /* Here we transfer the words of the field
699 in the order least significant first.
700 This is because the most significant word is the one which may
701 be less than full.
702 However, only do that if the value is not BLKmode. */
703
704 unsigned int backwards = WORDS_BIG_ENDIAN && fieldmode != BLKmode;
705 unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
706 unsigned int i;
707 rtx last;
708
709 /* This is the mode we must force value to, so that there will be enough
710 subwords to extract. Note that fieldmode will often (always?) be
711 VOIDmode, because that is what store_field uses to indicate that this
712 is a bit field, but passing VOIDmode to operand_subword_force
713 is not allowed. */
714 fieldmode = GET_MODE (value);
715 if (fieldmode == VOIDmode)
716 fieldmode = smallest_mode_for_size (nwords * BITS_PER_WORD, MODE_INT);
717
718 last = get_last_insn ();
719 for (i = 0; i < nwords; i++)
720 {
721 /* If I is 0, use the low-order word in both field and target;
722 if I is 1, use the next to lowest word; and so on. */
723 unsigned int wordnum = (backwards
724 ? GET_MODE_SIZE (fieldmode) / UNITS_PER_WORD
725 - i - 1
726 : i);
727 unsigned int bit_offset = (backwards
728 ? MAX ((int) bitsize - ((int) i + 1)
729 * BITS_PER_WORD,
730 0)
731 : (int) i * BITS_PER_WORD);
732 rtx value_word = operand_subword_force (value, wordnum, fieldmode);
733 unsigned HOST_WIDE_INT new_bitsize =
734 MIN (BITS_PER_WORD, bitsize - i * BITS_PER_WORD);
735
736 /* If the remaining chunk doesn't have full wordsize we have
737 to make sure that for big endian machines the higher order
738 bits are used. */
739 if (new_bitsize < BITS_PER_WORD && BYTES_BIG_ENDIAN && !backwards)
740 value_word = simplify_expand_binop (word_mode, lshr_optab,
741 value_word,
742 GEN_INT (BITS_PER_WORD
743 - new_bitsize),
744 NULL_RTX, true,
745 OPTAB_LIB_WIDEN);
746
747 if (!store_bit_field_1 (op0, new_bitsize,
748 bitnum + bit_offset,
749 bitregion_start, bitregion_end,
750 word_mode,
751 value_word, fallback_p))
752 {
753 delete_insns_since (last);
754 return false;
755 }
756 }
757 return true;
758 }
759
760 /* If VALUE has a floating-point or complex mode, access it as an
761 integer of the corresponding size. This can occur on a machine
762 with 64 bit registers that uses SFmode for float. It can also
763 occur for unaligned float or complex fields. */
764 orig_value = value;
765 if (GET_MODE (value) != VOIDmode
766 && GET_MODE_CLASS (GET_MODE (value)) != MODE_INT
767 && GET_MODE_CLASS (GET_MODE (value)) != MODE_PARTIAL_INT)
768 {
769 value = gen_reg_rtx (int_mode_for_mode (GET_MODE (value)));
770 emit_move_insn (gen_lowpart (GET_MODE (orig_value), value), orig_value);
771 }
772
773 /* If OP0 is a multi-word register, narrow it to the affected word.
774 If the region spans two words, defer to store_split_bit_field. */
775 if (!MEM_P (op0) && GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
776 {
777 op0 = simplify_gen_subreg (word_mode, op0, GET_MODE (op0),
778 bitnum / BITS_PER_WORD * UNITS_PER_WORD);
779 gcc_assert (op0);
780 bitnum %= BITS_PER_WORD;
781 if (bitnum + bitsize > BITS_PER_WORD)
782 {
783 if (!fallback_p)
784 return false;
785
786 store_split_bit_field (op0, bitsize, bitnum, bitregion_start,
787 bitregion_end, value);
788 return true;
789 }
790 }
791
792 /* From here on we can assume that the field to be stored in fits
793 within a word. If the destination is a register, it too fits
794 in a word. */
795
796 extraction_insn insv;
797 if (!MEM_P (op0)
798 && get_best_reg_extraction_insn (&insv, EP_insv,
799 GET_MODE_BITSIZE (GET_MODE (op0)),
800 fieldmode)
801 && store_bit_field_using_insv (&insv, op0, bitsize, bitnum, value))
802 return true;
803
804 /* If OP0 is a memory, try copying it to a register and seeing if a
805 cheap register alternative is available. */
806 if (MEM_P (op0))
807 {
808 /* Do not use unaligned memory insvs for volatile bitfields when
809 -fstrict-volatile-bitfields is in effect. */
810 if (!(MEM_VOLATILE_P (op0)
811 && flag_strict_volatile_bitfields > 0)
812 && get_best_mem_extraction_insn (&insv, EP_insv, bitsize, bitnum,
813 fieldmode)
814 && store_bit_field_using_insv (&insv, op0, bitsize, bitnum, value))
815 return true;
816
817 rtx last = get_last_insn ();
818
819 /* Try loading part of OP0 into a register, inserting the bitfield
820 into that, and then copying the result back to OP0. */
821 unsigned HOST_WIDE_INT bitpos;
822 rtx xop0 = adjust_bit_field_mem_for_reg (EP_insv, op0, bitsize, bitnum,
823 bitregion_start, bitregion_end,
824 fieldmode, &bitpos);
825 if (xop0)
826 {
827 rtx tempreg = copy_to_reg (xop0);
828 if (store_bit_field_1 (tempreg, bitsize, bitpos,
829 bitregion_start, bitregion_end,
830 fieldmode, orig_value, false))
831 {
832 emit_move_insn (xop0, tempreg);
833 return true;
834 }
835 delete_insns_since (last);
836 }
837 }
838
839 if (!fallback_p)
840 return false;
841
842 store_fixed_bit_field (op0, bitsize, bitnum, bitregion_start,
843 bitregion_end, value);
844 return true;
845 }
846
847 /* Generate code to store value from rtx VALUE
848 into a bit-field within structure STR_RTX
849 containing BITSIZE bits starting at bit BITNUM.
850
851 BITREGION_START is bitpos of the first bitfield in this region.
852 BITREGION_END is the bitpos of the ending bitfield in this region.
853 These two fields are 0, if the C++ memory model does not apply,
854 or we are not interested in keeping track of bitfield regions.
855
856 FIELDMODE is the machine-mode of the FIELD_DECL node for this field. */
857
858 void
859 store_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
860 unsigned HOST_WIDE_INT bitnum,
861 unsigned HOST_WIDE_INT bitregion_start,
862 unsigned HOST_WIDE_INT bitregion_end,
863 enum machine_mode fieldmode,
864 rtx value)
865 {
866 /* Under the C++0x memory model, we must not touch bits outside the
867 bit region. Adjust the address to start at the beginning of the
868 bit region. */
869 if (MEM_P (str_rtx) && bitregion_start > 0)
870 {
871 enum machine_mode bestmode;
872 HOST_WIDE_INT offset, size;
873
874 gcc_assert ((bitregion_start % BITS_PER_UNIT) == 0);
875
876 offset = bitregion_start / BITS_PER_UNIT;
877 bitnum -= bitregion_start;
878 size = (bitnum + bitsize + BITS_PER_UNIT - 1) / BITS_PER_UNIT;
879 bitregion_end -= bitregion_start;
880 bitregion_start = 0;
881 bestmode = get_best_mode (bitsize, bitnum,
882 bitregion_start, bitregion_end,
883 MEM_ALIGN (str_rtx), VOIDmode,
884 MEM_VOLATILE_P (str_rtx));
885 str_rtx = adjust_bitfield_address_size (str_rtx, bestmode, offset, size);
886 }
887
888 if (!store_bit_field_1 (str_rtx, bitsize, bitnum,
889 bitregion_start, bitregion_end,
890 fieldmode, value, true))
891 gcc_unreachable ();
892 }
893 \f
894 /* Use shifts and boolean operations to store VALUE into a bit field of
895 width BITSIZE in OP0, starting at bit BITNUM. */
896
897 static void
898 store_fixed_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
899 unsigned HOST_WIDE_INT bitnum,
900 unsigned HOST_WIDE_INT bitregion_start,
901 unsigned HOST_WIDE_INT bitregion_end,
902 rtx value)
903 {
904 enum machine_mode mode;
905 rtx temp;
906 int all_zero = 0;
907 int all_one = 0;
908
909 /* There is a case not handled here:
910 a structure with a known alignment of just a halfword
911 and a field split across two aligned halfwords within the structure.
912 Or likewise a structure with a known alignment of just a byte
913 and a field split across two bytes.
914 Such cases are not supposed to be able to occur. */
915
916 if (MEM_P (op0))
917 {
918 unsigned HOST_WIDE_INT maxbits = MAX_FIXED_MODE_SIZE;
919
920 if (bitregion_end)
921 maxbits = bitregion_end - bitregion_start + 1;
922
923 /* Get the proper mode to use for this field. We want a mode that
924 includes the entire field. If such a mode would be larger than
925 a word, we won't be doing the extraction the normal way.
926 We don't want a mode bigger than the destination. */
927
928 mode = GET_MODE (op0);
929 if (GET_MODE_BITSIZE (mode) == 0
930 || GET_MODE_BITSIZE (mode) > GET_MODE_BITSIZE (word_mode))
931 mode = word_mode;
932
933 if (MEM_VOLATILE_P (op0)
934 && GET_MODE_BITSIZE (GET_MODE (op0)) > 0
935 && GET_MODE_BITSIZE (GET_MODE (op0)) <= maxbits
936 && flag_strict_volatile_bitfields > 0)
937 mode = GET_MODE (op0);
938 else
939 mode = get_best_mode (bitsize, bitnum, bitregion_start, bitregion_end,
940 MEM_ALIGN (op0), mode, MEM_VOLATILE_P (op0));
941
942 if (mode == VOIDmode)
943 {
944 /* The only way this should occur is if the field spans word
945 boundaries. */
946 store_split_bit_field (op0, bitsize, bitnum, bitregion_start,
947 bitregion_end, value);
948 return;
949 }
950
951 op0 = narrow_bit_field_mem (op0, mode, bitsize, bitnum, &bitnum);
952 }
953
954 mode = GET_MODE (op0);
955 gcc_assert (SCALAR_INT_MODE_P (mode));
956
957 /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode)
958 for invalid input, such as f5 from gcc.dg/pr48335-2.c. */
959
960 if (BYTES_BIG_ENDIAN)
961 /* BITNUM is the distance between our msb
962 and that of the containing datum.
963 Convert it to the distance from the lsb. */
964 bitnum = GET_MODE_BITSIZE (mode) - bitsize - bitnum;
965
966 /* Now BITNUM is always the distance between our lsb
967 and that of OP0. */
968
969 /* Shift VALUE left by BITNUM bits. If VALUE is not constant,
970 we must first convert its mode to MODE. */
971
972 if (CONST_INT_P (value))
973 {
974 HOST_WIDE_INT v = INTVAL (value);
975
976 if (bitsize < HOST_BITS_PER_WIDE_INT)
977 v &= ((HOST_WIDE_INT) 1 << bitsize) - 1;
978
979 if (v == 0)
980 all_zero = 1;
981 else if ((bitsize < HOST_BITS_PER_WIDE_INT
982 && v == ((HOST_WIDE_INT) 1 << bitsize) - 1)
983 || (bitsize == HOST_BITS_PER_WIDE_INT && v == -1))
984 all_one = 1;
985
986 value = lshift_value (mode, value, bitnum, bitsize);
987 }
988 else
989 {
990 int must_and = (GET_MODE_BITSIZE (GET_MODE (value)) != bitsize
991 && bitnum + bitsize != GET_MODE_BITSIZE (mode));
992
993 if (GET_MODE (value) != mode)
994 value = convert_to_mode (mode, value, 1);
995
996 if (must_and)
997 value = expand_binop (mode, and_optab, value,
998 mask_rtx (mode, 0, bitsize, 0),
999 NULL_RTX, 1, OPTAB_LIB_WIDEN);
1000 if (bitnum > 0)
1001 value = expand_shift (LSHIFT_EXPR, mode, value,
1002 bitnum, NULL_RTX, 1);
1003 }
1004
1005 /* Now clear the chosen bits in OP0,
1006 except that if VALUE is -1 we need not bother. */
1007 /* We keep the intermediates in registers to allow CSE to combine
1008 consecutive bitfield assignments. */
1009
1010 temp = force_reg (mode, op0);
1011
1012 if (! all_one)
1013 {
1014 temp = expand_binop (mode, and_optab, temp,
1015 mask_rtx (mode, bitnum, bitsize, 1),
1016 NULL_RTX, 1, OPTAB_LIB_WIDEN);
1017 temp = force_reg (mode, temp);
1018 }
1019
1020 /* Now logical-or VALUE into OP0, unless it is zero. */
1021
1022 if (! all_zero)
1023 {
1024 temp = expand_binop (mode, ior_optab, temp, value,
1025 NULL_RTX, 1, OPTAB_LIB_WIDEN);
1026 temp = force_reg (mode, temp);
1027 }
1028
1029 if (op0 != temp)
1030 {
1031 op0 = copy_rtx (op0);
1032 emit_move_insn (op0, temp);
1033 }
1034 }
1035 \f
1036 /* Store a bit field that is split across multiple accessible memory objects.
1037
1038 OP0 is the REG, SUBREG or MEM rtx for the first of the objects.
1039 BITSIZE is the field width; BITPOS the position of its first bit
1040 (within the word).
1041 VALUE is the value to store.
1042
1043 This does not yet handle fields wider than BITS_PER_WORD. */
1044
1045 static void
1046 store_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1047 unsigned HOST_WIDE_INT bitpos,
1048 unsigned HOST_WIDE_INT bitregion_start,
1049 unsigned HOST_WIDE_INT bitregion_end,
1050 rtx value)
1051 {
1052 unsigned int unit;
1053 unsigned int bitsdone = 0;
1054
1055 /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1056 much at a time. */
1057 if (REG_P (op0) || GET_CODE (op0) == SUBREG)
1058 unit = BITS_PER_WORD;
1059 else
1060 unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1061
1062 /* If VALUE is a constant other than a CONST_INT, get it into a register in
1063 WORD_MODE. If we can do this using gen_lowpart_common, do so. Note
1064 that VALUE might be a floating-point constant. */
1065 if (CONSTANT_P (value) && !CONST_INT_P (value))
1066 {
1067 rtx word = gen_lowpart_common (word_mode, value);
1068
1069 if (word && (value != word))
1070 value = word;
1071 else
1072 value = gen_lowpart_common (word_mode,
1073 force_reg (GET_MODE (value) != VOIDmode
1074 ? GET_MODE (value)
1075 : word_mode, value));
1076 }
1077
1078 while (bitsdone < bitsize)
1079 {
1080 unsigned HOST_WIDE_INT thissize;
1081 rtx part, word;
1082 unsigned HOST_WIDE_INT thispos;
1083 unsigned HOST_WIDE_INT offset;
1084
1085 offset = (bitpos + bitsdone) / unit;
1086 thispos = (bitpos + bitsdone) % unit;
1087
1088 /* When region of bytes we can touch is restricted, decrease
1089 UNIT close to the end of the region as needed. */
1090 if (bitregion_end
1091 && unit > BITS_PER_UNIT
1092 && bitpos + bitsdone - thispos + unit > bitregion_end + 1)
1093 {
1094 unit = unit / 2;
1095 continue;
1096 }
1097
1098 /* THISSIZE must not overrun a word boundary. Otherwise,
1099 store_fixed_bit_field will call us again, and we will mutually
1100 recurse forever. */
1101 thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1102 thissize = MIN (thissize, unit - thispos);
1103
1104 if (BYTES_BIG_ENDIAN)
1105 {
1106 /* Fetch successively less significant portions. */
1107 if (CONST_INT_P (value))
1108 part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1109 >> (bitsize - bitsdone - thissize))
1110 & (((HOST_WIDE_INT) 1 << thissize) - 1));
1111 else
1112 {
1113 int total_bits = GET_MODE_BITSIZE (GET_MODE (value));
1114 /* The args are chosen so that the last part includes the
1115 lsb. Give extract_bit_field the value it needs (with
1116 endianness compensation) to fetch the piece we want. */
1117 part = extract_fixed_bit_field (word_mode, value, thissize,
1118 total_bits - bitsize + bitsdone,
1119 NULL_RTX, 1, false);
1120 }
1121 }
1122 else
1123 {
1124 /* Fetch successively more significant portions. */
1125 if (CONST_INT_P (value))
1126 part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1127 >> bitsdone)
1128 & (((HOST_WIDE_INT) 1 << thissize) - 1));
1129 else
1130 part = extract_fixed_bit_field (word_mode, value, thissize,
1131 bitsdone, NULL_RTX, 1, false);
1132 }
1133
1134 /* If OP0 is a register, then handle OFFSET here.
1135
1136 When handling multiword bitfields, extract_bit_field may pass
1137 down a word_mode SUBREG of a larger REG for a bitfield that actually
1138 crosses a word boundary. Thus, for a SUBREG, we must find
1139 the current word starting from the base register. */
1140 if (GET_CODE (op0) == SUBREG)
1141 {
1142 int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset;
1143 enum machine_mode sub_mode = GET_MODE (SUBREG_REG (op0));
1144 if (sub_mode != BLKmode && GET_MODE_SIZE (sub_mode) < UNITS_PER_WORD)
1145 word = word_offset ? const0_rtx : op0;
1146 else
1147 word = operand_subword_force (SUBREG_REG (op0), word_offset,
1148 GET_MODE (SUBREG_REG (op0)));
1149 offset = 0;
1150 }
1151 else if (REG_P (op0))
1152 {
1153 enum machine_mode op0_mode = GET_MODE (op0);
1154 if (op0_mode != BLKmode && GET_MODE_SIZE (op0_mode) < UNITS_PER_WORD)
1155 word = offset ? const0_rtx : op0;
1156 else
1157 word = operand_subword_force (op0, offset, GET_MODE (op0));
1158 offset = 0;
1159 }
1160 else
1161 word = op0;
1162
1163 /* OFFSET is in UNITs, and UNIT is in bits. If WORD is const0_rtx,
1164 it is just an out-of-bounds access. Ignore it. */
1165 if (word != const0_rtx)
1166 store_fixed_bit_field (word, thissize, offset * unit + thispos,
1167 bitregion_start, bitregion_end, part);
1168 bitsdone += thissize;
1169 }
1170 }
1171 \f
1172 /* A subroutine of extract_bit_field_1 that converts return value X
1173 to either MODE or TMODE. MODE, TMODE and UNSIGNEDP are arguments
1174 to extract_bit_field. */
1175
1176 static rtx
1177 convert_extracted_bit_field (rtx x, enum machine_mode mode,
1178 enum machine_mode tmode, bool unsignedp)
1179 {
1180 if (GET_MODE (x) == tmode || GET_MODE (x) == mode)
1181 return x;
1182
1183 /* If the x mode is not a scalar integral, first convert to the
1184 integer mode of that size and then access it as a floating-point
1185 value via a SUBREG. */
1186 if (!SCALAR_INT_MODE_P (tmode))
1187 {
1188 enum machine_mode smode;
1189
1190 smode = mode_for_size (GET_MODE_BITSIZE (tmode), MODE_INT, 0);
1191 x = convert_to_mode (smode, x, unsignedp);
1192 x = force_reg (smode, x);
1193 return gen_lowpart (tmode, x);
1194 }
1195
1196 return convert_to_mode (tmode, x, unsignedp);
1197 }
1198
1199 /* Try to use an ext(z)v pattern to extract a field from OP0.
1200 Return the extracted value on success, otherwise return null.
1201 EXT_MODE is the mode of the extraction and the other arguments
1202 are as for extract_bit_field. */
1203
1204 static rtx
1205 extract_bit_field_using_extv (const extraction_insn *extv, rtx op0,
1206 unsigned HOST_WIDE_INT bitsize,
1207 unsigned HOST_WIDE_INT bitnum,
1208 int unsignedp, rtx target,
1209 enum machine_mode mode, enum machine_mode tmode)
1210 {
1211 struct expand_operand ops[4];
1212 rtx spec_target = target;
1213 rtx spec_target_subreg = 0;
1214 enum machine_mode ext_mode = extv->field_mode;
1215 unsigned unit = GET_MODE_BITSIZE (ext_mode);
1216
1217 if (bitsize == 0 || unit < bitsize)
1218 return NULL_RTX;
1219
1220 if (MEM_P (op0))
1221 /* Get a reference to the first byte of the field. */
1222 op0 = narrow_bit_field_mem (op0, extv->struct_mode, bitsize, bitnum,
1223 &bitnum);
1224 else
1225 {
1226 /* Convert from counting within OP0 to counting in EXT_MODE. */
1227 if (BYTES_BIG_ENDIAN)
1228 bitnum += unit - GET_MODE_BITSIZE (GET_MODE (op0));
1229
1230 /* If op0 is a register, we need it in EXT_MODE to make it
1231 acceptable to the format of ext(z)v. */
1232 if (GET_CODE (op0) == SUBREG && GET_MODE (op0) != ext_mode)
1233 return NULL_RTX;
1234 if (REG_P (op0) && GET_MODE (op0) != ext_mode)
1235 op0 = gen_lowpart_SUBREG (ext_mode, op0);
1236 }
1237
1238 /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
1239 "backwards" from the size of the unit we are extracting from.
1240 Otherwise, we count bits from the most significant on a
1241 BYTES/BITS_BIG_ENDIAN machine. */
1242
1243 if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1244 bitnum = unit - bitsize - bitnum;
1245
1246 if (target == 0)
1247 target = spec_target = gen_reg_rtx (tmode);
1248
1249 if (GET_MODE (target) != ext_mode)
1250 {
1251 /* Don't use LHS paradoxical subreg if explicit truncation is needed
1252 between the mode of the extraction (word_mode) and the target
1253 mode. Instead, create a temporary and use convert_move to set
1254 the target. */
1255 if (REG_P (target)
1256 && TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (target), ext_mode))
1257 {
1258 target = gen_lowpart (ext_mode, target);
1259 if (GET_MODE_PRECISION (ext_mode)
1260 > GET_MODE_PRECISION (GET_MODE (spec_target)))
1261 spec_target_subreg = target;
1262 }
1263 else
1264 target = gen_reg_rtx (ext_mode);
1265 }
1266
1267 create_output_operand (&ops[0], target, ext_mode);
1268 create_fixed_operand (&ops[1], op0);
1269 create_integer_operand (&ops[2], bitsize);
1270 create_integer_operand (&ops[3], bitnum);
1271 if (maybe_expand_insn (extv->icode, 4, ops))
1272 {
1273 target = ops[0].value;
1274 if (target == spec_target)
1275 return target;
1276 if (target == spec_target_subreg)
1277 return spec_target;
1278 return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1279 }
1280 return NULL_RTX;
1281 }
1282
1283 /* A subroutine of extract_bit_field, with the same arguments.
1284 If FALLBACK_P is true, fall back to extract_fixed_bit_field
1285 if we can find no other means of implementing the operation.
1286 if FALLBACK_P is false, return NULL instead. */
1287
1288 static rtx
1289 extract_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1290 unsigned HOST_WIDE_INT bitnum,
1291 int unsignedp, bool packedp, rtx target,
1292 enum machine_mode mode, enum machine_mode tmode,
1293 bool fallback_p)
1294 {
1295 rtx op0 = str_rtx;
1296 enum machine_mode int_mode;
1297 enum machine_mode mode1;
1298
1299 if (tmode == VOIDmode)
1300 tmode = mode;
1301
1302 while (GET_CODE (op0) == SUBREG)
1303 {
1304 bitnum += SUBREG_BYTE (op0) * BITS_PER_UNIT;
1305 op0 = SUBREG_REG (op0);
1306 }
1307
1308 /* If we have an out-of-bounds access to a register, just return an
1309 uninitialized register of the required mode. This can occur if the
1310 source code contains an out-of-bounds access to a small array. */
1311 if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
1312 return gen_reg_rtx (tmode);
1313
1314 if (REG_P (op0)
1315 && mode == GET_MODE (op0)
1316 && bitnum == 0
1317 && bitsize == GET_MODE_BITSIZE (GET_MODE (op0)))
1318 {
1319 /* We're trying to extract a full register from itself. */
1320 return op0;
1321 }
1322
1323 /* See if we can get a better vector mode before extracting. */
1324 if (VECTOR_MODE_P (GET_MODE (op0))
1325 && !MEM_P (op0)
1326 && GET_MODE_INNER (GET_MODE (op0)) != tmode)
1327 {
1328 enum machine_mode new_mode;
1329
1330 if (GET_MODE_CLASS (tmode) == MODE_FLOAT)
1331 new_mode = MIN_MODE_VECTOR_FLOAT;
1332 else if (GET_MODE_CLASS (tmode) == MODE_FRACT)
1333 new_mode = MIN_MODE_VECTOR_FRACT;
1334 else if (GET_MODE_CLASS (tmode) == MODE_UFRACT)
1335 new_mode = MIN_MODE_VECTOR_UFRACT;
1336 else if (GET_MODE_CLASS (tmode) == MODE_ACCUM)
1337 new_mode = MIN_MODE_VECTOR_ACCUM;
1338 else if (GET_MODE_CLASS (tmode) == MODE_UACCUM)
1339 new_mode = MIN_MODE_VECTOR_UACCUM;
1340 else
1341 new_mode = MIN_MODE_VECTOR_INT;
1342
1343 for (; new_mode != VOIDmode ; new_mode = GET_MODE_WIDER_MODE (new_mode))
1344 if (GET_MODE_SIZE (new_mode) == GET_MODE_SIZE (GET_MODE (op0))
1345 && targetm.vector_mode_supported_p (new_mode))
1346 break;
1347 if (new_mode != VOIDmode)
1348 op0 = gen_lowpart (new_mode, op0);
1349 }
1350
1351 /* Use vec_extract patterns for extracting parts of vectors whenever
1352 available. */
1353 if (VECTOR_MODE_P (GET_MODE (op0))
1354 && !MEM_P (op0)
1355 && optab_handler (vec_extract_optab, GET_MODE (op0)) != CODE_FOR_nothing
1356 && ((bitnum + bitsize - 1) / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
1357 == bitnum / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
1358 {
1359 struct expand_operand ops[3];
1360 enum machine_mode outermode = GET_MODE (op0);
1361 enum machine_mode innermode = GET_MODE_INNER (outermode);
1362 enum insn_code icode = optab_handler (vec_extract_optab, outermode);
1363 unsigned HOST_WIDE_INT pos = bitnum / GET_MODE_BITSIZE (innermode);
1364
1365 create_output_operand (&ops[0], target, innermode);
1366 create_input_operand (&ops[1], op0, outermode);
1367 create_integer_operand (&ops[2], pos);
1368 if (maybe_expand_insn (icode, 3, ops))
1369 {
1370 target = ops[0].value;
1371 if (GET_MODE (target) != mode)
1372 return gen_lowpart (tmode, target);
1373 return target;
1374 }
1375 }
1376
1377 /* Make sure we are playing with integral modes. Pun with subregs
1378 if we aren't. */
1379 {
1380 enum machine_mode imode = int_mode_for_mode (GET_MODE (op0));
1381 if (imode != GET_MODE (op0))
1382 {
1383 if (MEM_P (op0))
1384 op0 = adjust_bitfield_address_size (op0, imode, 0, MEM_SIZE (op0));
1385 else if (imode != BLKmode)
1386 {
1387 op0 = gen_lowpart (imode, op0);
1388
1389 /* If we got a SUBREG, force it into a register since we
1390 aren't going to be able to do another SUBREG on it. */
1391 if (GET_CODE (op0) == SUBREG)
1392 op0 = force_reg (imode, op0);
1393 }
1394 else if (REG_P (op0))
1395 {
1396 rtx reg, subreg;
1397 imode = smallest_mode_for_size (GET_MODE_BITSIZE (GET_MODE (op0)),
1398 MODE_INT);
1399 reg = gen_reg_rtx (imode);
1400 subreg = gen_lowpart_SUBREG (GET_MODE (op0), reg);
1401 emit_move_insn (subreg, op0);
1402 op0 = reg;
1403 bitnum += SUBREG_BYTE (subreg) * BITS_PER_UNIT;
1404 }
1405 else
1406 {
1407 HOST_WIDE_INT size = GET_MODE_SIZE (GET_MODE (op0));
1408 rtx mem = assign_stack_temp (GET_MODE (op0), size);
1409 emit_move_insn (mem, op0);
1410 op0 = adjust_bitfield_address_size (mem, BLKmode, 0, size);
1411 }
1412 }
1413 }
1414
1415 /* ??? We currently assume TARGET is at least as big as BITSIZE.
1416 If that's wrong, the solution is to test for it and set TARGET to 0
1417 if needed. */
1418
1419 /* If the bitfield is volatile, we need to make sure the access
1420 remains on a type-aligned boundary. */
1421 if (GET_CODE (op0) == MEM
1422 && MEM_VOLATILE_P (op0)
1423 && GET_MODE_BITSIZE (GET_MODE (op0)) > 0
1424 && flag_strict_volatile_bitfields > 0)
1425 goto no_subreg_mode_swap;
1426
1427 /* Only scalar integer modes can be converted via subregs. There is an
1428 additional problem for FP modes here in that they can have a precision
1429 which is different from the size. mode_for_size uses precision, but
1430 we want a mode based on the size, so we must avoid calling it for FP
1431 modes. */
1432 mode1 = mode;
1433 if (SCALAR_INT_MODE_P (tmode))
1434 {
1435 enum machine_mode try_mode = mode_for_size (bitsize,
1436 GET_MODE_CLASS (tmode), 0);
1437 if (try_mode != BLKmode)
1438 mode1 = try_mode;
1439 }
1440 gcc_assert (mode1 != BLKmode);
1441
1442 /* Extraction of a full MODE1 value can be done with a subreg as long
1443 as the least significant bit of the value is the least significant
1444 bit of either OP0 or a word of OP0. */
1445 if (!MEM_P (op0)
1446 && lowpart_bit_field_p (bitnum, bitsize, GET_MODE (op0))
1447 && bitsize == GET_MODE_BITSIZE (mode1)
1448 && TRULY_NOOP_TRUNCATION_MODES_P (mode1, GET_MODE (op0)))
1449 {
1450 rtx sub = simplify_gen_subreg (mode1, op0, GET_MODE (op0),
1451 bitnum / BITS_PER_UNIT);
1452 if (sub)
1453 return convert_extracted_bit_field (sub, mode, tmode, unsignedp);
1454 }
1455
1456 /* Extraction of a full MODE1 value can be done with a load as long as
1457 the field is on a byte boundary and is sufficiently aligned. */
1458 if (MEM_P (op0)
1459 && bitnum % BITS_PER_UNIT == 0
1460 && bitsize == GET_MODE_BITSIZE (mode1)
1461 && (!SLOW_UNALIGNED_ACCESS (mode1, MEM_ALIGN (op0))
1462 || (bitnum % bitsize == 0
1463 && MEM_ALIGN (op0) % bitsize == 0)))
1464 {
1465 op0 = adjust_bitfield_address (op0, mode1, bitnum / BITS_PER_UNIT);
1466 return convert_extracted_bit_field (op0, mode, tmode, unsignedp);
1467 }
1468
1469 no_subreg_mode_swap:
1470
1471 /* Handle fields bigger than a word. */
1472
1473 if (bitsize > BITS_PER_WORD)
1474 {
1475 /* Here we transfer the words of the field
1476 in the order least significant first.
1477 This is because the most significant word is the one which may
1478 be less than full. */
1479
1480 unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
1481 unsigned int i;
1482 rtx last;
1483
1484 if (target == 0 || !REG_P (target) || !valid_multiword_target_p (target))
1485 target = gen_reg_rtx (mode);
1486
1487 /* Indicate for flow that the entire target reg is being set. */
1488 emit_clobber (target);
1489
1490 last = get_last_insn ();
1491 for (i = 0; i < nwords; i++)
1492 {
1493 /* If I is 0, use the low-order word in both field and target;
1494 if I is 1, use the next to lowest word; and so on. */
1495 /* Word number in TARGET to use. */
1496 unsigned int wordnum
1497 = (WORDS_BIG_ENDIAN
1498 ? GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD - i - 1
1499 : i);
1500 /* Offset from start of field in OP0. */
1501 unsigned int bit_offset = (WORDS_BIG_ENDIAN
1502 ? MAX (0, ((int) bitsize - ((int) i + 1)
1503 * (int) BITS_PER_WORD))
1504 : (int) i * BITS_PER_WORD);
1505 rtx target_part = operand_subword (target, wordnum, 1, VOIDmode);
1506 rtx result_part
1507 = extract_bit_field_1 (op0, MIN (BITS_PER_WORD,
1508 bitsize - i * BITS_PER_WORD),
1509 bitnum + bit_offset, 1, false, target_part,
1510 mode, word_mode, fallback_p);
1511
1512 gcc_assert (target_part);
1513 if (!result_part)
1514 {
1515 delete_insns_since (last);
1516 return NULL;
1517 }
1518
1519 if (result_part != target_part)
1520 emit_move_insn (target_part, result_part);
1521 }
1522
1523 if (unsignedp)
1524 {
1525 /* Unless we've filled TARGET, the upper regs in a multi-reg value
1526 need to be zero'd out. */
1527 if (GET_MODE_SIZE (GET_MODE (target)) > nwords * UNITS_PER_WORD)
1528 {
1529 unsigned int i, total_words;
1530
1531 total_words = GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD;
1532 for (i = nwords; i < total_words; i++)
1533 emit_move_insn
1534 (operand_subword (target,
1535 WORDS_BIG_ENDIAN ? total_words - i - 1 : i,
1536 1, VOIDmode),
1537 const0_rtx);
1538 }
1539 return target;
1540 }
1541
1542 /* Signed bit field: sign-extend with two arithmetic shifts. */
1543 target = expand_shift (LSHIFT_EXPR, mode, target,
1544 GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1545 return expand_shift (RSHIFT_EXPR, mode, target,
1546 GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1547 }
1548
1549 /* If OP0 is a multi-word register, narrow it to the affected word.
1550 If the region spans two words, defer to extract_split_bit_field. */
1551 if (!MEM_P (op0) && GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
1552 {
1553 op0 = simplify_gen_subreg (word_mode, op0, GET_MODE (op0),
1554 bitnum / BITS_PER_WORD * UNITS_PER_WORD);
1555 bitnum %= BITS_PER_WORD;
1556 if (bitnum + bitsize > BITS_PER_WORD)
1557 {
1558 if (!fallback_p)
1559 return NULL_RTX;
1560 target = extract_split_bit_field (op0, bitsize, bitnum, unsignedp);
1561 return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1562 }
1563 }
1564
1565 /* From here on we know the desired field is smaller than a word.
1566 If OP0 is a register, it too fits within a word. */
1567 enum extraction_pattern pattern = unsignedp ? EP_extzv : EP_extv;
1568 extraction_insn extv;
1569 if (!MEM_P (op0)
1570 && get_best_reg_extraction_insn (&extv, pattern, bitnum + bitsize,
1571 tmode))
1572 {
1573 rtx result = extract_bit_field_using_extv (&extv, op0, bitsize, bitnum,
1574 unsignedp, target, mode,
1575 tmode);
1576 if (result)
1577 return result;
1578 }
1579
1580 /* If OP0 is a memory, try copying it to a register and seeing if a
1581 cheap register alternative is available. */
1582 if (MEM_P (op0))
1583 {
1584 /* Do not use extv/extzv for volatile bitfields when
1585 -fstrict-volatile-bitfields is in effect. */
1586 if (!(MEM_VOLATILE_P (op0) && flag_strict_volatile_bitfields > 0)
1587 && get_best_mem_extraction_insn (&extv, pattern, bitsize, bitnum,
1588 tmode))
1589 {
1590 rtx result = extract_bit_field_using_extv (&extv, op0, bitsize,
1591 bitnum, unsignedp,
1592 target, mode,
1593 tmode);
1594 if (result)
1595 return result;
1596 }
1597
1598 rtx last = get_last_insn ();
1599
1600 /* Try loading part of OP0 into a register and extracting the
1601 bitfield from that. */
1602 unsigned HOST_WIDE_INT bitpos;
1603 rtx xop0 = adjust_bit_field_mem_for_reg (pattern, op0, bitsize, bitnum,
1604 0, 0, tmode, &bitpos);
1605 if (xop0)
1606 {
1607 xop0 = copy_to_reg (xop0);
1608 rtx result = extract_bit_field_1 (xop0, bitsize, bitpos,
1609 unsignedp, packedp, target,
1610 mode, tmode, false);
1611 if (result)
1612 return result;
1613 delete_insns_since (last);
1614 }
1615 }
1616
1617 if (!fallback_p)
1618 return NULL;
1619
1620 /* Find a correspondingly-sized integer field, so we can apply
1621 shifts and masks to it. */
1622 int_mode = int_mode_for_mode (tmode);
1623 if (int_mode == BLKmode)
1624 int_mode = int_mode_for_mode (mode);
1625 /* Should probably push op0 out to memory and then do a load. */
1626 gcc_assert (int_mode != BLKmode);
1627
1628 target = extract_fixed_bit_field (int_mode, op0, bitsize, bitnum,
1629 target, unsignedp, packedp);
1630 return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1631 }
1632
1633 /* Generate code to extract a byte-field from STR_RTX
1634 containing BITSIZE bits, starting at BITNUM,
1635 and put it in TARGET if possible (if TARGET is nonzero).
1636 Regardless of TARGET, we return the rtx for where the value is placed.
1637
1638 STR_RTX is the structure containing the byte (a REG or MEM).
1639 UNSIGNEDP is nonzero if this is an unsigned bit field.
1640 PACKEDP is nonzero if the field has the packed attribute.
1641 MODE is the natural mode of the field value once extracted.
1642 TMODE is the mode the caller would like the value to have;
1643 but the value may be returned with type MODE instead.
1644
1645 If a TARGET is specified and we can store in it at no extra cost,
1646 we do so, and return TARGET.
1647 Otherwise, we return a REG of mode TMODE or MODE, with TMODE preferred
1648 if they are equally easy. */
1649
1650 rtx
1651 extract_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1652 unsigned HOST_WIDE_INT bitnum, int unsignedp, bool packedp,
1653 rtx target, enum machine_mode mode, enum machine_mode tmode)
1654 {
1655 return extract_bit_field_1 (str_rtx, bitsize, bitnum, unsignedp, packedp,
1656 target, mode, tmode, true);
1657 }
1658 \f
1659 /* Use shifts and boolean operations to extract a field of BITSIZE bits
1660 from bit BITNUM of OP0.
1661
1662 UNSIGNEDP is nonzero for an unsigned bit field (don't sign-extend value).
1663 PACKEDP is true if the field has the packed attribute.
1664
1665 If TARGET is nonzero, attempts to store the value there
1666 and return TARGET, but this is not guaranteed.
1667 If TARGET is not used, create a pseudo-reg of mode TMODE for the value. */
1668
1669 static rtx
1670 extract_fixed_bit_field (enum machine_mode tmode, rtx op0,
1671 unsigned HOST_WIDE_INT bitsize,
1672 unsigned HOST_WIDE_INT bitnum, rtx target,
1673 int unsignedp, bool packedp)
1674 {
1675 enum machine_mode mode;
1676
1677 if (MEM_P (op0))
1678 {
1679 /* Get the proper mode to use for this field. We want a mode that
1680 includes the entire field. If such a mode would be larger than
1681 a word, we won't be doing the extraction the normal way. */
1682
1683 if (MEM_VOLATILE_P (op0)
1684 && flag_strict_volatile_bitfields > 0)
1685 {
1686 if (GET_MODE_BITSIZE (GET_MODE (op0)) > 0)
1687 mode = GET_MODE (op0);
1688 else if (target && GET_MODE_BITSIZE (GET_MODE (target)) > 0)
1689 mode = GET_MODE (target);
1690 else
1691 mode = tmode;
1692 }
1693 else
1694 mode = get_best_mode (bitsize, bitnum, 0, 0,
1695 MEM_ALIGN (op0), word_mode, MEM_VOLATILE_P (op0));
1696
1697 if (mode == VOIDmode)
1698 /* The only way this should occur is if the field spans word
1699 boundaries. */
1700 return extract_split_bit_field (op0, bitsize, bitnum, unsignedp);
1701
1702 unsigned int total_bits = GET_MODE_BITSIZE (mode);
1703 HOST_WIDE_INT bit_offset = bitnum - bitnum % total_bits;
1704
1705 /* If we're accessing a volatile MEM, we can't apply BIT_OFFSET
1706 if it results in a multi-word access where we otherwise wouldn't
1707 have one. So, check for that case here. */
1708 if (MEM_P (op0)
1709 && MEM_VOLATILE_P (op0)
1710 && flag_strict_volatile_bitfields > 0
1711 && bitnum % BITS_PER_UNIT + bitsize <= total_bits
1712 && bitnum % GET_MODE_BITSIZE (mode) + bitsize > total_bits)
1713 {
1714 if (STRICT_ALIGNMENT)
1715 {
1716 static bool informed_about_misalignment = false;
1717
1718 if (packedp)
1719 {
1720 if (bitsize == total_bits)
1721 warning_at (input_location, OPT_fstrict_volatile_bitfields,
1722 "multiple accesses to volatile structure"
1723 " member because of packed attribute");
1724 else
1725 warning_at (input_location, OPT_fstrict_volatile_bitfields,
1726 "multiple accesses to volatile structure"
1727 " bitfield because of packed attribute");
1728
1729 return extract_split_bit_field (op0, bitsize, bitnum,
1730 unsignedp);
1731 }
1732
1733 if (bitsize == total_bits)
1734 warning_at (input_location, OPT_fstrict_volatile_bitfields,
1735 "mis-aligned access used for structure member");
1736 else
1737 warning_at (input_location, OPT_fstrict_volatile_bitfields,
1738 "mis-aligned access used for structure bitfield");
1739
1740 if (! informed_about_misalignment)
1741 {
1742 informed_about_misalignment = true;
1743 inform (input_location,
1744 "when a volatile object spans multiple type-sized"
1745 " locations, the compiler must choose between using"
1746 " a single mis-aligned access to preserve the"
1747 " volatility, or using multiple aligned accesses"
1748 " to avoid runtime faults; this code may fail at"
1749 " runtime if the hardware does not allow this"
1750 " access");
1751 }
1752 }
1753 bit_offset = bitnum - bitnum % BITS_PER_UNIT;
1754 }
1755 op0 = adjust_bitfield_address (op0, mode, bit_offset / BITS_PER_UNIT);
1756 bitnum -= bit_offset;
1757 }
1758
1759 mode = GET_MODE (op0);
1760 gcc_assert (SCALAR_INT_MODE_P (mode));
1761
1762 /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode)
1763 for invalid input, such as extract equivalent of f5 from
1764 gcc.dg/pr48335-2.c. */
1765
1766 if (BYTES_BIG_ENDIAN)
1767 /* BITNUM is the distance between our msb and that of OP0.
1768 Convert it to the distance from the lsb. */
1769 bitnum = GET_MODE_BITSIZE (mode) - bitsize - bitnum;
1770
1771 /* Now BITNUM is always the distance between the field's lsb and that of OP0.
1772 We have reduced the big-endian case to the little-endian case. */
1773
1774 if (unsignedp)
1775 {
1776 if (bitnum)
1777 {
1778 /* If the field does not already start at the lsb,
1779 shift it so it does. */
1780 /* Maybe propagate the target for the shift. */
1781 rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
1782 if (tmode != mode)
1783 subtarget = 0;
1784 op0 = expand_shift (RSHIFT_EXPR, mode, op0, bitnum, subtarget, 1);
1785 }
1786 /* Convert the value to the desired mode. */
1787 if (mode != tmode)
1788 op0 = convert_to_mode (tmode, op0, 1);
1789
1790 /* Unless the msb of the field used to be the msb when we shifted,
1791 mask out the upper bits. */
1792
1793 if (GET_MODE_BITSIZE (mode) != bitnum + bitsize)
1794 return expand_binop (GET_MODE (op0), and_optab, op0,
1795 mask_rtx (GET_MODE (op0), 0, bitsize, 0),
1796 target, 1, OPTAB_LIB_WIDEN);
1797 return op0;
1798 }
1799
1800 /* To extract a signed bit-field, first shift its msb to the msb of the word,
1801 then arithmetic-shift its lsb to the lsb of the word. */
1802 op0 = force_reg (mode, op0);
1803
1804 /* Find the narrowest integer mode that contains the field. */
1805
1806 for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT); mode != VOIDmode;
1807 mode = GET_MODE_WIDER_MODE (mode))
1808 if (GET_MODE_BITSIZE (mode) >= bitsize + bitnum)
1809 {
1810 op0 = convert_to_mode (mode, op0, 0);
1811 break;
1812 }
1813
1814 if (mode != tmode)
1815 target = 0;
1816
1817 if (GET_MODE_BITSIZE (mode) != (bitsize + bitnum))
1818 {
1819 int amount = GET_MODE_BITSIZE (mode) - (bitsize + bitnum);
1820 /* Maybe propagate the target for the shift. */
1821 rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
1822 op0 = expand_shift (LSHIFT_EXPR, mode, op0, amount, subtarget, 1);
1823 }
1824
1825 return expand_shift (RSHIFT_EXPR, mode, op0,
1826 GET_MODE_BITSIZE (mode) - bitsize, target, 0);
1827 }
1828 \f
1829 /* Return a constant integer (CONST_INT or CONST_DOUBLE) mask value
1830 of mode MODE with BITSIZE ones followed by BITPOS zeros, or the
1831 complement of that if COMPLEMENT. The mask is truncated if
1832 necessary to the width of mode MODE. The mask is zero-extended if
1833 BITSIZE+BITPOS is too small for MODE. */
1834
1835 static rtx
1836 mask_rtx (enum machine_mode mode, int bitpos, int bitsize, int complement)
1837 {
1838 double_int mask;
1839
1840 mask = double_int::mask (bitsize);
1841 mask = mask.llshift (bitpos, HOST_BITS_PER_DOUBLE_INT);
1842
1843 if (complement)
1844 mask = ~mask;
1845
1846 return immed_double_int_const (mask, mode);
1847 }
1848
1849 /* Return a constant integer (CONST_INT or CONST_DOUBLE) rtx with the value
1850 VALUE truncated to BITSIZE bits and then shifted left BITPOS bits. */
1851
1852 static rtx
1853 lshift_value (enum machine_mode mode, rtx value, int bitpos, int bitsize)
1854 {
1855 double_int val;
1856
1857 val = double_int::from_uhwi (INTVAL (value)).zext (bitsize);
1858 val = val.llshift (bitpos, HOST_BITS_PER_DOUBLE_INT);
1859
1860 return immed_double_int_const (val, mode);
1861 }
1862 \f
1863 /* Extract a bit field that is split across two words
1864 and return an RTX for the result.
1865
1866 OP0 is the REG, SUBREG or MEM rtx for the first of the two words.
1867 BITSIZE is the field width; BITPOS, position of its first bit, in the word.
1868 UNSIGNEDP is 1 if should zero-extend the contents; else sign-extend. */
1869
1870 static rtx
1871 extract_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1872 unsigned HOST_WIDE_INT bitpos, int unsignedp)
1873 {
1874 unsigned int unit;
1875 unsigned int bitsdone = 0;
1876 rtx result = NULL_RTX;
1877 int first = 1;
1878
1879 /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1880 much at a time. */
1881 if (REG_P (op0) || GET_CODE (op0) == SUBREG)
1882 unit = BITS_PER_WORD;
1883 else
1884 unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1885
1886 while (bitsdone < bitsize)
1887 {
1888 unsigned HOST_WIDE_INT thissize;
1889 rtx part, word;
1890 unsigned HOST_WIDE_INT thispos;
1891 unsigned HOST_WIDE_INT offset;
1892
1893 offset = (bitpos + bitsdone) / unit;
1894 thispos = (bitpos + bitsdone) % unit;
1895
1896 /* THISSIZE must not overrun a word boundary. Otherwise,
1897 extract_fixed_bit_field will call us again, and we will mutually
1898 recurse forever. */
1899 thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1900 thissize = MIN (thissize, unit - thispos);
1901
1902 /* If OP0 is a register, then handle OFFSET here.
1903
1904 When handling multiword bitfields, extract_bit_field may pass
1905 down a word_mode SUBREG of a larger REG for a bitfield that actually
1906 crosses a word boundary. Thus, for a SUBREG, we must find
1907 the current word starting from the base register. */
1908 if (GET_CODE (op0) == SUBREG)
1909 {
1910 int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset;
1911 word = operand_subword_force (SUBREG_REG (op0), word_offset,
1912 GET_MODE (SUBREG_REG (op0)));
1913 offset = 0;
1914 }
1915 else if (REG_P (op0))
1916 {
1917 word = operand_subword_force (op0, offset, GET_MODE (op0));
1918 offset = 0;
1919 }
1920 else
1921 word = op0;
1922
1923 /* Extract the parts in bit-counting order,
1924 whose meaning is determined by BYTES_PER_UNIT.
1925 OFFSET is in UNITs, and UNIT is in bits. */
1926 part = extract_fixed_bit_field (word_mode, word, thissize,
1927 offset * unit + thispos, 0, 1, false);
1928 bitsdone += thissize;
1929
1930 /* Shift this part into place for the result. */
1931 if (BYTES_BIG_ENDIAN)
1932 {
1933 if (bitsize != bitsdone)
1934 part = expand_shift (LSHIFT_EXPR, word_mode, part,
1935 bitsize - bitsdone, 0, 1);
1936 }
1937 else
1938 {
1939 if (bitsdone != thissize)
1940 part = expand_shift (LSHIFT_EXPR, word_mode, part,
1941 bitsdone - thissize, 0, 1);
1942 }
1943
1944 if (first)
1945 result = part;
1946 else
1947 /* Combine the parts with bitwise or. This works
1948 because we extracted each part as an unsigned bit field. */
1949 result = expand_binop (word_mode, ior_optab, part, result, NULL_RTX, 1,
1950 OPTAB_LIB_WIDEN);
1951
1952 first = 0;
1953 }
1954
1955 /* Unsigned bit field: we are done. */
1956 if (unsignedp)
1957 return result;
1958 /* Signed bit field: sign-extend with two arithmetic shifts. */
1959 result = expand_shift (LSHIFT_EXPR, word_mode, result,
1960 BITS_PER_WORD - bitsize, NULL_RTX, 0);
1961 return expand_shift (RSHIFT_EXPR, word_mode, result,
1962 BITS_PER_WORD - bitsize, NULL_RTX, 0);
1963 }
1964 \f
1965 /* Try to read the low bits of SRC as an rvalue of mode MODE, preserving
1966 the bit pattern. SRC_MODE is the mode of SRC; if this is smaller than
1967 MODE, fill the upper bits with zeros. Fail if the layout of either
1968 mode is unknown (as for CC modes) or if the extraction would involve
1969 unprofitable mode punning. Return the value on success, otherwise
1970 return null.
1971
1972 This is different from gen_lowpart* in these respects:
1973
1974 - the returned value must always be considered an rvalue
1975
1976 - when MODE is wider than SRC_MODE, the extraction involves
1977 a zero extension
1978
1979 - when MODE is smaller than SRC_MODE, the extraction involves
1980 a truncation (and is thus subject to TRULY_NOOP_TRUNCATION).
1981
1982 In other words, this routine performs a computation, whereas the
1983 gen_lowpart* routines are conceptually lvalue or rvalue subreg
1984 operations. */
1985
1986 rtx
1987 extract_low_bits (enum machine_mode mode, enum machine_mode src_mode, rtx src)
1988 {
1989 enum machine_mode int_mode, src_int_mode;
1990
1991 if (mode == src_mode)
1992 return src;
1993
1994 if (CONSTANT_P (src))
1995 {
1996 /* simplify_gen_subreg can't be used here, as if simplify_subreg
1997 fails, it will happily create (subreg (symbol_ref)) or similar
1998 invalid SUBREGs. */
1999 unsigned int byte = subreg_lowpart_offset (mode, src_mode);
2000 rtx ret = simplify_subreg (mode, src, src_mode, byte);
2001 if (ret)
2002 return ret;
2003
2004 if (GET_MODE (src) == VOIDmode
2005 || !validate_subreg (mode, src_mode, src, byte))
2006 return NULL_RTX;
2007
2008 src = force_reg (GET_MODE (src), src);
2009 return gen_rtx_SUBREG (mode, src, byte);
2010 }
2011
2012 if (GET_MODE_CLASS (mode) == MODE_CC || GET_MODE_CLASS (src_mode) == MODE_CC)
2013 return NULL_RTX;
2014
2015 if (GET_MODE_BITSIZE (mode) == GET_MODE_BITSIZE (src_mode)
2016 && MODES_TIEABLE_P (mode, src_mode))
2017 {
2018 rtx x = gen_lowpart_common (mode, src);
2019 if (x)
2020 return x;
2021 }
2022
2023 src_int_mode = int_mode_for_mode (src_mode);
2024 int_mode = int_mode_for_mode (mode);
2025 if (src_int_mode == BLKmode || int_mode == BLKmode)
2026 return NULL_RTX;
2027
2028 if (!MODES_TIEABLE_P (src_int_mode, src_mode))
2029 return NULL_RTX;
2030 if (!MODES_TIEABLE_P (int_mode, mode))
2031 return NULL_RTX;
2032
2033 src = gen_lowpart (src_int_mode, src);
2034 src = convert_modes (int_mode, src_int_mode, src, true);
2035 src = gen_lowpart (mode, src);
2036 return src;
2037 }
2038 \f
2039 /* Add INC into TARGET. */
2040
2041 void
2042 expand_inc (rtx target, rtx inc)
2043 {
2044 rtx value = expand_binop (GET_MODE (target), add_optab,
2045 target, inc,
2046 target, 0, OPTAB_LIB_WIDEN);
2047 if (value != target)
2048 emit_move_insn (target, value);
2049 }
2050
2051 /* Subtract DEC from TARGET. */
2052
2053 void
2054 expand_dec (rtx target, rtx dec)
2055 {
2056 rtx value = expand_binop (GET_MODE (target), sub_optab,
2057 target, dec,
2058 target, 0, OPTAB_LIB_WIDEN);
2059 if (value != target)
2060 emit_move_insn (target, value);
2061 }
2062 \f
2063 /* Output a shift instruction for expression code CODE,
2064 with SHIFTED being the rtx for the value to shift,
2065 and AMOUNT the rtx for the amount to shift by.
2066 Store the result in the rtx TARGET, if that is convenient.
2067 If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2068 Return the rtx for where the value is. */
2069
2070 static rtx
2071 expand_shift_1 (enum tree_code code, enum machine_mode mode, rtx shifted,
2072 rtx amount, rtx target, int unsignedp)
2073 {
2074 rtx op1, temp = 0;
2075 int left = (code == LSHIFT_EXPR || code == LROTATE_EXPR);
2076 int rotate = (code == LROTATE_EXPR || code == RROTATE_EXPR);
2077 optab lshift_optab = ashl_optab;
2078 optab rshift_arith_optab = ashr_optab;
2079 optab rshift_uns_optab = lshr_optab;
2080 optab lrotate_optab = rotl_optab;
2081 optab rrotate_optab = rotr_optab;
2082 enum machine_mode op1_mode;
2083 int attempt;
2084 bool speed = optimize_insn_for_speed_p ();
2085
2086 op1 = amount;
2087 op1_mode = GET_MODE (op1);
2088
2089 /* Determine whether the shift/rotate amount is a vector, or scalar. If the
2090 shift amount is a vector, use the vector/vector shift patterns. */
2091 if (VECTOR_MODE_P (mode) && VECTOR_MODE_P (op1_mode))
2092 {
2093 lshift_optab = vashl_optab;
2094 rshift_arith_optab = vashr_optab;
2095 rshift_uns_optab = vlshr_optab;
2096 lrotate_optab = vrotl_optab;
2097 rrotate_optab = vrotr_optab;
2098 }
2099
2100 /* Previously detected shift-counts computed by NEGATE_EXPR
2101 and shifted in the other direction; but that does not work
2102 on all machines. */
2103
2104 if (SHIFT_COUNT_TRUNCATED)
2105 {
2106 if (CONST_INT_P (op1)
2107 && ((unsigned HOST_WIDE_INT) INTVAL (op1) >=
2108 (unsigned HOST_WIDE_INT) GET_MODE_BITSIZE (mode)))
2109 op1 = GEN_INT ((unsigned HOST_WIDE_INT) INTVAL (op1)
2110 % GET_MODE_BITSIZE (mode));
2111 else if (GET_CODE (op1) == SUBREG
2112 && subreg_lowpart_p (op1)
2113 && INTEGRAL_MODE_P (GET_MODE (SUBREG_REG (op1)))
2114 && INTEGRAL_MODE_P (GET_MODE (op1)))
2115 op1 = SUBREG_REG (op1);
2116 }
2117
2118 if (op1 == const0_rtx)
2119 return shifted;
2120
2121 /* Check whether its cheaper to implement a left shift by a constant
2122 bit count by a sequence of additions. */
2123 if (code == LSHIFT_EXPR
2124 && CONST_INT_P (op1)
2125 && INTVAL (op1) > 0
2126 && INTVAL (op1) < GET_MODE_PRECISION (mode)
2127 && INTVAL (op1) < MAX_BITS_PER_WORD
2128 && (shift_cost (speed, mode, INTVAL (op1))
2129 > INTVAL (op1) * add_cost (speed, mode))
2130 && shift_cost (speed, mode, INTVAL (op1)) != MAX_COST)
2131 {
2132 int i;
2133 for (i = 0; i < INTVAL (op1); i++)
2134 {
2135 temp = force_reg (mode, shifted);
2136 shifted = expand_binop (mode, add_optab, temp, temp, NULL_RTX,
2137 unsignedp, OPTAB_LIB_WIDEN);
2138 }
2139 return shifted;
2140 }
2141
2142 for (attempt = 0; temp == 0 && attempt < 3; attempt++)
2143 {
2144 enum optab_methods methods;
2145
2146 if (attempt == 0)
2147 methods = OPTAB_DIRECT;
2148 else if (attempt == 1)
2149 methods = OPTAB_WIDEN;
2150 else
2151 methods = OPTAB_LIB_WIDEN;
2152
2153 if (rotate)
2154 {
2155 /* Widening does not work for rotation. */
2156 if (methods == OPTAB_WIDEN)
2157 continue;
2158 else if (methods == OPTAB_LIB_WIDEN)
2159 {
2160 /* If we have been unable to open-code this by a rotation,
2161 do it as the IOR of two shifts. I.e., to rotate A
2162 by N bits, compute (A << N) | ((unsigned) A >> (C - N))
2163 where C is the bitsize of A.
2164
2165 It is theoretically possible that the target machine might
2166 not be able to perform either shift and hence we would
2167 be making two libcalls rather than just the one for the
2168 shift (similarly if IOR could not be done). We will allow
2169 this extremely unlikely lossage to avoid complicating the
2170 code below. */
2171
2172 rtx subtarget = target == shifted ? 0 : target;
2173 rtx new_amount, other_amount;
2174 rtx temp1;
2175
2176 new_amount = op1;
2177 if (CONST_INT_P (op1))
2178 other_amount = GEN_INT (GET_MODE_BITSIZE (mode)
2179 - INTVAL (op1));
2180 else
2181 other_amount
2182 = simplify_gen_binary (MINUS, GET_MODE (op1),
2183 GEN_INT (GET_MODE_PRECISION (mode)),
2184 op1);
2185
2186 shifted = force_reg (mode, shifted);
2187
2188 temp = expand_shift_1 (left ? LSHIFT_EXPR : RSHIFT_EXPR,
2189 mode, shifted, new_amount, 0, 1);
2190 temp1 = expand_shift_1 (left ? RSHIFT_EXPR : LSHIFT_EXPR,
2191 mode, shifted, other_amount,
2192 subtarget, 1);
2193 return expand_binop (mode, ior_optab, temp, temp1, target,
2194 unsignedp, methods);
2195 }
2196
2197 temp = expand_binop (mode,
2198 left ? lrotate_optab : rrotate_optab,
2199 shifted, op1, target, unsignedp, methods);
2200 }
2201 else if (unsignedp)
2202 temp = expand_binop (mode,
2203 left ? lshift_optab : rshift_uns_optab,
2204 shifted, op1, target, unsignedp, methods);
2205
2206 /* Do arithmetic shifts.
2207 Also, if we are going to widen the operand, we can just as well
2208 use an arithmetic right-shift instead of a logical one. */
2209 if (temp == 0 && ! rotate
2210 && (! unsignedp || (! left && methods == OPTAB_WIDEN)))
2211 {
2212 enum optab_methods methods1 = methods;
2213
2214 /* If trying to widen a log shift to an arithmetic shift,
2215 don't accept an arithmetic shift of the same size. */
2216 if (unsignedp)
2217 methods1 = OPTAB_MUST_WIDEN;
2218
2219 /* Arithmetic shift */
2220
2221 temp = expand_binop (mode,
2222 left ? lshift_optab : rshift_arith_optab,
2223 shifted, op1, target, unsignedp, methods1);
2224 }
2225
2226 /* We used to try extzv here for logical right shifts, but that was
2227 only useful for one machine, the VAX, and caused poor code
2228 generation there for lshrdi3, so the code was deleted and a
2229 define_expand for lshrsi3 was added to vax.md. */
2230 }
2231
2232 gcc_assert (temp);
2233 return temp;
2234 }
2235
2236 /* Output a shift instruction for expression code CODE,
2237 with SHIFTED being the rtx for the value to shift,
2238 and AMOUNT the amount to shift by.
2239 Store the result in the rtx TARGET, if that is convenient.
2240 If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2241 Return the rtx for where the value is. */
2242
2243 rtx
2244 expand_shift (enum tree_code code, enum machine_mode mode, rtx shifted,
2245 int amount, rtx target, int unsignedp)
2246 {
2247 return expand_shift_1 (code, mode,
2248 shifted, GEN_INT (amount), target, unsignedp);
2249 }
2250
2251 /* Output a shift instruction for expression code CODE,
2252 with SHIFTED being the rtx for the value to shift,
2253 and AMOUNT the tree for the amount to shift by.
2254 Store the result in the rtx TARGET, if that is convenient.
2255 If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2256 Return the rtx for where the value is. */
2257
2258 rtx
2259 expand_variable_shift (enum tree_code code, enum machine_mode mode, rtx shifted,
2260 tree amount, rtx target, int unsignedp)
2261 {
2262 return expand_shift_1 (code, mode,
2263 shifted, expand_normal (amount), target, unsignedp);
2264 }
2265
2266 \f
2267 /* Indicates the type of fixup needed after a constant multiplication.
2268 BASIC_VARIANT means no fixup is needed, NEGATE_VARIANT means that
2269 the result should be negated, and ADD_VARIANT means that the
2270 multiplicand should be added to the result. */
2271 enum mult_variant {basic_variant, negate_variant, add_variant};
2272
2273 static void synth_mult (struct algorithm *, unsigned HOST_WIDE_INT,
2274 const struct mult_cost *, enum machine_mode mode);
2275 static bool choose_mult_variant (enum machine_mode, HOST_WIDE_INT,
2276 struct algorithm *, enum mult_variant *, int);
2277 static rtx expand_mult_const (enum machine_mode, rtx, HOST_WIDE_INT, rtx,
2278 const struct algorithm *, enum mult_variant);
2279 static unsigned HOST_WIDE_INT invert_mod2n (unsigned HOST_WIDE_INT, int);
2280 static rtx extract_high_half (enum machine_mode, rtx);
2281 static rtx expmed_mult_highpart (enum machine_mode, rtx, rtx, rtx, int, int);
2282 static rtx expmed_mult_highpart_optab (enum machine_mode, rtx, rtx, rtx,
2283 int, int);
2284 /* Compute and return the best algorithm for multiplying by T.
2285 The algorithm must cost less than cost_limit
2286 If retval.cost >= COST_LIMIT, no algorithm was found and all
2287 other field of the returned struct are undefined.
2288 MODE is the machine mode of the multiplication. */
2289
2290 static void
2291 synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
2292 const struct mult_cost *cost_limit, enum machine_mode mode)
2293 {
2294 int m;
2295 struct algorithm *alg_in, *best_alg;
2296 struct mult_cost best_cost;
2297 struct mult_cost new_limit;
2298 int op_cost, op_latency;
2299 unsigned HOST_WIDE_INT orig_t = t;
2300 unsigned HOST_WIDE_INT q;
2301 int maxm, hash_index;
2302 bool cache_hit = false;
2303 enum alg_code cache_alg = alg_zero;
2304 bool speed = optimize_insn_for_speed_p ();
2305 enum machine_mode imode;
2306 struct alg_hash_entry *entry_ptr;
2307
2308 /* Indicate that no algorithm is yet found. If no algorithm
2309 is found, this value will be returned and indicate failure. */
2310 alg_out->cost.cost = cost_limit->cost + 1;
2311 alg_out->cost.latency = cost_limit->latency + 1;
2312
2313 if (cost_limit->cost < 0
2314 || (cost_limit->cost == 0 && cost_limit->latency <= 0))
2315 return;
2316
2317 /* Be prepared for vector modes. */
2318 imode = GET_MODE_INNER (mode);
2319 if (imode == VOIDmode)
2320 imode = mode;
2321
2322 maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (imode));
2323
2324 /* Restrict the bits of "t" to the multiplication's mode. */
2325 t &= GET_MODE_MASK (imode);
2326
2327 /* t == 1 can be done in zero cost. */
2328 if (t == 1)
2329 {
2330 alg_out->ops = 1;
2331 alg_out->cost.cost = 0;
2332 alg_out->cost.latency = 0;
2333 alg_out->op[0] = alg_m;
2334 return;
2335 }
2336
2337 /* t == 0 sometimes has a cost. If it does and it exceeds our limit,
2338 fail now. */
2339 if (t == 0)
2340 {
2341 if (MULT_COST_LESS (cost_limit, zero_cost (speed)))
2342 return;
2343 else
2344 {
2345 alg_out->ops = 1;
2346 alg_out->cost.cost = zero_cost (speed);
2347 alg_out->cost.latency = zero_cost (speed);
2348 alg_out->op[0] = alg_zero;
2349 return;
2350 }
2351 }
2352
2353 /* We'll be needing a couple extra algorithm structures now. */
2354
2355 alg_in = XALLOCA (struct algorithm);
2356 best_alg = XALLOCA (struct algorithm);
2357 best_cost = *cost_limit;
2358
2359 /* Compute the hash index. */
2360 hash_index = (t ^ (unsigned int) mode ^ (speed * 256)) % NUM_ALG_HASH_ENTRIES;
2361
2362 /* See if we already know what to do for T. */
2363 entry_ptr = alg_hash_entry_ptr (hash_index);
2364 if (entry_ptr->t == t
2365 && entry_ptr->mode == mode
2366 && entry_ptr->mode == mode
2367 && entry_ptr->speed == speed
2368 && entry_ptr->alg != alg_unknown)
2369 {
2370 cache_alg = entry_ptr->alg;
2371
2372 if (cache_alg == alg_impossible)
2373 {
2374 /* The cache tells us that it's impossible to synthesize
2375 multiplication by T within entry_ptr->cost. */
2376 if (!CHEAPER_MULT_COST (&entry_ptr->cost, cost_limit))
2377 /* COST_LIMIT is at least as restrictive as the one
2378 recorded in the hash table, in which case we have no
2379 hope of synthesizing a multiplication. Just
2380 return. */
2381 return;
2382
2383 /* If we get here, COST_LIMIT is less restrictive than the
2384 one recorded in the hash table, so we may be able to
2385 synthesize a multiplication. Proceed as if we didn't
2386 have the cache entry. */
2387 }
2388 else
2389 {
2390 if (CHEAPER_MULT_COST (cost_limit, &entry_ptr->cost))
2391 /* The cached algorithm shows that this multiplication
2392 requires more cost than COST_LIMIT. Just return. This
2393 way, we don't clobber this cache entry with
2394 alg_impossible but retain useful information. */
2395 return;
2396
2397 cache_hit = true;
2398
2399 switch (cache_alg)
2400 {
2401 case alg_shift:
2402 goto do_alg_shift;
2403
2404 case alg_add_t_m2:
2405 case alg_sub_t_m2:
2406 goto do_alg_addsub_t_m2;
2407
2408 case alg_add_factor:
2409 case alg_sub_factor:
2410 goto do_alg_addsub_factor;
2411
2412 case alg_add_t2_m:
2413 goto do_alg_add_t2_m;
2414
2415 case alg_sub_t2_m:
2416 goto do_alg_sub_t2_m;
2417
2418 default:
2419 gcc_unreachable ();
2420 }
2421 }
2422 }
2423
2424 /* If we have a group of zero bits at the low-order part of T, try
2425 multiplying by the remaining bits and then doing a shift. */
2426
2427 if ((t & 1) == 0)
2428 {
2429 do_alg_shift:
2430 m = floor_log2 (t & -t); /* m = number of low zero bits */
2431 if (m < maxm)
2432 {
2433 q = t >> m;
2434 /* The function expand_shift will choose between a shift and
2435 a sequence of additions, so the observed cost is given as
2436 MIN (m * add_cost(speed, mode), shift_cost(speed, mode, m)). */
2437 op_cost = m * add_cost (speed, mode);
2438 if (shift_cost (speed, mode, m) < op_cost)
2439 op_cost = shift_cost (speed, mode, m);
2440 new_limit.cost = best_cost.cost - op_cost;
2441 new_limit.latency = best_cost.latency - op_cost;
2442 synth_mult (alg_in, q, &new_limit, mode);
2443
2444 alg_in->cost.cost += op_cost;
2445 alg_in->cost.latency += op_cost;
2446 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2447 {
2448 struct algorithm *x;
2449 best_cost = alg_in->cost;
2450 x = alg_in, alg_in = best_alg, best_alg = x;
2451 best_alg->log[best_alg->ops] = m;
2452 best_alg->op[best_alg->ops] = alg_shift;
2453 }
2454
2455 /* See if treating ORIG_T as a signed number yields a better
2456 sequence. Try this sequence only for a negative ORIG_T
2457 as it would be useless for a non-negative ORIG_T. */
2458 if ((HOST_WIDE_INT) orig_t < 0)
2459 {
2460 /* Shift ORIG_T as follows because a right shift of a
2461 negative-valued signed type is implementation
2462 defined. */
2463 q = ~(~orig_t >> m);
2464 /* The function expand_shift will choose between a shift
2465 and a sequence of additions, so the observed cost is
2466 given as MIN (m * add_cost(speed, mode),
2467 shift_cost(speed, mode, m)). */
2468 op_cost = m * add_cost (speed, mode);
2469 if (shift_cost (speed, mode, m) < op_cost)
2470 op_cost = shift_cost (speed, mode, m);
2471 new_limit.cost = best_cost.cost - op_cost;
2472 new_limit.latency = best_cost.latency - op_cost;
2473 synth_mult (alg_in, q, &new_limit, mode);
2474
2475 alg_in->cost.cost += op_cost;
2476 alg_in->cost.latency += op_cost;
2477 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2478 {
2479 struct algorithm *x;
2480 best_cost = alg_in->cost;
2481 x = alg_in, alg_in = best_alg, best_alg = x;
2482 best_alg->log[best_alg->ops] = m;
2483 best_alg->op[best_alg->ops] = alg_shift;
2484 }
2485 }
2486 }
2487 if (cache_hit)
2488 goto done;
2489 }
2490
2491 /* If we have an odd number, add or subtract one. */
2492 if ((t & 1) != 0)
2493 {
2494 unsigned HOST_WIDE_INT w;
2495
2496 do_alg_addsub_t_m2:
2497 for (w = 1; (w & t) != 0; w <<= 1)
2498 ;
2499 /* If T was -1, then W will be zero after the loop. This is another
2500 case where T ends with ...111. Handling this with (T + 1) and
2501 subtract 1 produces slightly better code and results in algorithm
2502 selection much faster than treating it like the ...0111 case
2503 below. */
2504 if (w == 0
2505 || (w > 2
2506 /* Reject the case where t is 3.
2507 Thus we prefer addition in that case. */
2508 && t != 3))
2509 {
2510 /* T ends with ...111. Multiply by (T + 1) and subtract 1. */
2511
2512 op_cost = add_cost (speed, mode);
2513 new_limit.cost = best_cost.cost - op_cost;
2514 new_limit.latency = best_cost.latency - op_cost;
2515 synth_mult (alg_in, t + 1, &new_limit, mode);
2516
2517 alg_in->cost.cost += op_cost;
2518 alg_in->cost.latency += op_cost;
2519 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2520 {
2521 struct algorithm *x;
2522 best_cost = alg_in->cost;
2523 x = alg_in, alg_in = best_alg, best_alg = x;
2524 best_alg->log[best_alg->ops] = 0;
2525 best_alg->op[best_alg->ops] = alg_sub_t_m2;
2526 }
2527 }
2528 else
2529 {
2530 /* T ends with ...01 or ...011. Multiply by (T - 1) and add 1. */
2531
2532 op_cost = add_cost (speed, mode);
2533 new_limit.cost = best_cost.cost - op_cost;
2534 new_limit.latency = best_cost.latency - op_cost;
2535 synth_mult (alg_in, t - 1, &new_limit, mode);
2536
2537 alg_in->cost.cost += op_cost;
2538 alg_in->cost.latency += op_cost;
2539 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2540 {
2541 struct algorithm *x;
2542 best_cost = alg_in->cost;
2543 x = alg_in, alg_in = best_alg, best_alg = x;
2544 best_alg->log[best_alg->ops] = 0;
2545 best_alg->op[best_alg->ops] = alg_add_t_m2;
2546 }
2547 }
2548
2549 /* We may be able to calculate a * -7, a * -15, a * -31, etc
2550 quickly with a - a * n for some appropriate constant n. */
2551 m = exact_log2 (-orig_t + 1);
2552 if (m >= 0 && m < maxm)
2553 {
2554 op_cost = shiftsub1_cost (speed, mode, m);
2555 new_limit.cost = best_cost.cost - op_cost;
2556 new_limit.latency = best_cost.latency - op_cost;
2557 synth_mult (alg_in, (unsigned HOST_WIDE_INT) (-orig_t + 1) >> m,
2558 &new_limit, mode);
2559
2560 alg_in->cost.cost += op_cost;
2561 alg_in->cost.latency += op_cost;
2562 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2563 {
2564 struct algorithm *x;
2565 best_cost = alg_in->cost;
2566 x = alg_in, alg_in = best_alg, best_alg = x;
2567 best_alg->log[best_alg->ops] = m;
2568 best_alg->op[best_alg->ops] = alg_sub_t_m2;
2569 }
2570 }
2571
2572 if (cache_hit)
2573 goto done;
2574 }
2575
2576 /* Look for factors of t of the form
2577 t = q(2**m +- 1), 2 <= m <= floor(log2(t - 1)).
2578 If we find such a factor, we can multiply by t using an algorithm that
2579 multiplies by q, shift the result by m and add/subtract it to itself.
2580
2581 We search for large factors first and loop down, even if large factors
2582 are less probable than small; if we find a large factor we will find a
2583 good sequence quickly, and therefore be able to prune (by decreasing
2584 COST_LIMIT) the search. */
2585
2586 do_alg_addsub_factor:
2587 for (m = floor_log2 (t - 1); m >= 2; m--)
2588 {
2589 unsigned HOST_WIDE_INT d;
2590
2591 d = ((unsigned HOST_WIDE_INT) 1 << m) + 1;
2592 if (t % d == 0 && t > d && m < maxm
2593 && (!cache_hit || cache_alg == alg_add_factor))
2594 {
2595 /* If the target has a cheap shift-and-add instruction use
2596 that in preference to a shift insn followed by an add insn.
2597 Assume that the shift-and-add is "atomic" with a latency
2598 equal to its cost, otherwise assume that on superscalar
2599 hardware the shift may be executed concurrently with the
2600 earlier steps in the algorithm. */
2601 op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
2602 if (shiftadd_cost (speed, mode, m) < op_cost)
2603 {
2604 op_cost = shiftadd_cost (speed, mode, m);
2605 op_latency = op_cost;
2606 }
2607 else
2608 op_latency = add_cost (speed, mode);
2609
2610 new_limit.cost = best_cost.cost - op_cost;
2611 new_limit.latency = best_cost.latency - op_latency;
2612 synth_mult (alg_in, t / d, &new_limit, mode);
2613
2614 alg_in->cost.cost += op_cost;
2615 alg_in->cost.latency += op_latency;
2616 if (alg_in->cost.latency < op_cost)
2617 alg_in->cost.latency = op_cost;
2618 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2619 {
2620 struct algorithm *x;
2621 best_cost = alg_in->cost;
2622 x = alg_in, alg_in = best_alg, best_alg = x;
2623 best_alg->log[best_alg->ops] = m;
2624 best_alg->op[best_alg->ops] = alg_add_factor;
2625 }
2626 /* Other factors will have been taken care of in the recursion. */
2627 break;
2628 }
2629
2630 d = ((unsigned HOST_WIDE_INT) 1 << m) - 1;
2631 if (t % d == 0 && t > d && m < maxm
2632 && (!cache_hit || cache_alg == alg_sub_factor))
2633 {
2634 /* If the target has a cheap shift-and-subtract insn use
2635 that in preference to a shift insn followed by a sub insn.
2636 Assume that the shift-and-sub is "atomic" with a latency
2637 equal to it's cost, otherwise assume that on superscalar
2638 hardware the shift may be executed concurrently with the
2639 earlier steps in the algorithm. */
2640 op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
2641 if (shiftsub0_cost (speed, mode, m) < op_cost)
2642 {
2643 op_cost = shiftsub0_cost (speed, mode, m);
2644 op_latency = op_cost;
2645 }
2646 else
2647 op_latency = add_cost (speed, mode);
2648
2649 new_limit.cost = best_cost.cost - op_cost;
2650 new_limit.latency = best_cost.latency - op_latency;
2651 synth_mult (alg_in, t / d, &new_limit, mode);
2652
2653 alg_in->cost.cost += op_cost;
2654 alg_in->cost.latency += op_latency;
2655 if (alg_in->cost.latency < op_cost)
2656 alg_in->cost.latency = op_cost;
2657 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2658 {
2659 struct algorithm *x;
2660 best_cost = alg_in->cost;
2661 x = alg_in, alg_in = best_alg, best_alg = x;
2662 best_alg->log[best_alg->ops] = m;
2663 best_alg->op[best_alg->ops] = alg_sub_factor;
2664 }
2665 break;
2666 }
2667 }
2668 if (cache_hit)
2669 goto done;
2670
2671 /* Try shift-and-add (load effective address) instructions,
2672 i.e. do a*3, a*5, a*9. */
2673 if ((t & 1) != 0)
2674 {
2675 do_alg_add_t2_m:
2676 q = t - 1;
2677 q = q & -q;
2678 m = exact_log2 (q);
2679 if (m >= 0 && m < maxm)
2680 {
2681 op_cost = shiftadd_cost (speed, mode, m);
2682 new_limit.cost = best_cost.cost - op_cost;
2683 new_limit.latency = best_cost.latency - op_cost;
2684 synth_mult (alg_in, (t - 1) >> m, &new_limit, mode);
2685
2686 alg_in->cost.cost += op_cost;
2687 alg_in->cost.latency += op_cost;
2688 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2689 {
2690 struct algorithm *x;
2691 best_cost = alg_in->cost;
2692 x = alg_in, alg_in = best_alg, best_alg = x;
2693 best_alg->log[best_alg->ops] = m;
2694 best_alg->op[best_alg->ops] = alg_add_t2_m;
2695 }
2696 }
2697 if (cache_hit)
2698 goto done;
2699
2700 do_alg_sub_t2_m:
2701 q = t + 1;
2702 q = q & -q;
2703 m = exact_log2 (q);
2704 if (m >= 0 && m < maxm)
2705 {
2706 op_cost = shiftsub0_cost (speed, mode, m);
2707 new_limit.cost = best_cost.cost - op_cost;
2708 new_limit.latency = best_cost.latency - op_cost;
2709 synth_mult (alg_in, (t + 1) >> m, &new_limit, mode);
2710
2711 alg_in->cost.cost += op_cost;
2712 alg_in->cost.latency += op_cost;
2713 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2714 {
2715 struct algorithm *x;
2716 best_cost = alg_in->cost;
2717 x = alg_in, alg_in = best_alg, best_alg = x;
2718 best_alg->log[best_alg->ops] = m;
2719 best_alg->op[best_alg->ops] = alg_sub_t2_m;
2720 }
2721 }
2722 if (cache_hit)
2723 goto done;
2724 }
2725
2726 done:
2727 /* If best_cost has not decreased, we have not found any algorithm. */
2728 if (!CHEAPER_MULT_COST (&best_cost, cost_limit))
2729 {
2730 /* We failed to find an algorithm. Record alg_impossible for
2731 this case (that is, <T, MODE, COST_LIMIT>) so that next time
2732 we are asked to find an algorithm for T within the same or
2733 lower COST_LIMIT, we can immediately return to the
2734 caller. */
2735 entry_ptr->t = t;
2736 entry_ptr->mode = mode;
2737 entry_ptr->speed = speed;
2738 entry_ptr->alg = alg_impossible;
2739 entry_ptr->cost = *cost_limit;
2740 return;
2741 }
2742
2743 /* Cache the result. */
2744 if (!cache_hit)
2745 {
2746 entry_ptr->t = t;
2747 entry_ptr->mode = mode;
2748 entry_ptr->speed = speed;
2749 entry_ptr->alg = best_alg->op[best_alg->ops];
2750 entry_ptr->cost.cost = best_cost.cost;
2751 entry_ptr->cost.latency = best_cost.latency;
2752 }
2753
2754 /* If we are getting a too long sequence for `struct algorithm'
2755 to record, make this search fail. */
2756 if (best_alg->ops == MAX_BITS_PER_WORD)
2757 return;
2758
2759 /* Copy the algorithm from temporary space to the space at alg_out.
2760 We avoid using structure assignment because the majority of
2761 best_alg is normally undefined, and this is a critical function. */
2762 alg_out->ops = best_alg->ops + 1;
2763 alg_out->cost = best_cost;
2764 memcpy (alg_out->op, best_alg->op,
2765 alg_out->ops * sizeof *alg_out->op);
2766 memcpy (alg_out->log, best_alg->log,
2767 alg_out->ops * sizeof *alg_out->log);
2768 }
2769 \f
2770 /* Find the cheapest way of multiplying a value of mode MODE by VAL.
2771 Try three variations:
2772
2773 - a shift/add sequence based on VAL itself
2774 - a shift/add sequence based on -VAL, followed by a negation
2775 - a shift/add sequence based on VAL - 1, followed by an addition.
2776
2777 Return true if the cheapest of these cost less than MULT_COST,
2778 describing the algorithm in *ALG and final fixup in *VARIANT. */
2779
2780 static bool
2781 choose_mult_variant (enum machine_mode mode, HOST_WIDE_INT val,
2782 struct algorithm *alg, enum mult_variant *variant,
2783 int mult_cost)
2784 {
2785 struct algorithm alg2;
2786 struct mult_cost limit;
2787 int op_cost;
2788 bool speed = optimize_insn_for_speed_p ();
2789
2790 /* Fail quickly for impossible bounds. */
2791 if (mult_cost < 0)
2792 return false;
2793
2794 /* Ensure that mult_cost provides a reasonable upper bound.
2795 Any constant multiplication can be performed with less
2796 than 2 * bits additions. */
2797 op_cost = 2 * GET_MODE_UNIT_BITSIZE (mode) * add_cost (speed, mode);
2798 if (mult_cost > op_cost)
2799 mult_cost = op_cost;
2800
2801 *variant = basic_variant;
2802 limit.cost = mult_cost;
2803 limit.latency = mult_cost;
2804 synth_mult (alg, val, &limit, mode);
2805
2806 /* This works only if the inverted value actually fits in an
2807 `unsigned int' */
2808 if (HOST_BITS_PER_INT >= GET_MODE_UNIT_BITSIZE (mode))
2809 {
2810 op_cost = neg_cost(speed, mode);
2811 if (MULT_COST_LESS (&alg->cost, mult_cost))
2812 {
2813 limit.cost = alg->cost.cost - op_cost;
2814 limit.latency = alg->cost.latency - op_cost;
2815 }
2816 else
2817 {
2818 limit.cost = mult_cost - op_cost;
2819 limit.latency = mult_cost - op_cost;
2820 }
2821
2822 synth_mult (&alg2, -val, &limit, mode);
2823 alg2.cost.cost += op_cost;
2824 alg2.cost.latency += op_cost;
2825 if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
2826 *alg = alg2, *variant = negate_variant;
2827 }
2828
2829 /* This proves very useful for division-by-constant. */
2830 op_cost = add_cost (speed, mode);
2831 if (MULT_COST_LESS (&alg->cost, mult_cost))
2832 {
2833 limit.cost = alg->cost.cost - op_cost;
2834 limit.latency = alg->cost.latency - op_cost;
2835 }
2836 else
2837 {
2838 limit.cost = mult_cost - op_cost;
2839 limit.latency = mult_cost - op_cost;
2840 }
2841
2842 synth_mult (&alg2, val - 1, &limit, mode);
2843 alg2.cost.cost += op_cost;
2844 alg2.cost.latency += op_cost;
2845 if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
2846 *alg = alg2, *variant = add_variant;
2847
2848 return MULT_COST_LESS (&alg->cost, mult_cost);
2849 }
2850
2851 /* A subroutine of expand_mult, used for constant multiplications.
2852 Multiply OP0 by VAL in mode MODE, storing the result in TARGET if
2853 convenient. Use the shift/add sequence described by ALG and apply
2854 the final fixup specified by VARIANT. */
2855
2856 static rtx
2857 expand_mult_const (enum machine_mode mode, rtx op0, HOST_WIDE_INT val,
2858 rtx target, const struct algorithm *alg,
2859 enum mult_variant variant)
2860 {
2861 HOST_WIDE_INT val_so_far;
2862 rtx insn, accum, tem;
2863 int opno;
2864 enum machine_mode nmode;
2865
2866 /* Avoid referencing memory over and over and invalid sharing
2867 on SUBREGs. */
2868 op0 = force_reg (mode, op0);
2869
2870 /* ACCUM starts out either as OP0 or as a zero, depending on
2871 the first operation. */
2872
2873 if (alg->op[0] == alg_zero)
2874 {
2875 accum = copy_to_mode_reg (mode, CONST0_RTX (mode));
2876 val_so_far = 0;
2877 }
2878 else if (alg->op[0] == alg_m)
2879 {
2880 accum = copy_to_mode_reg (mode, op0);
2881 val_so_far = 1;
2882 }
2883 else
2884 gcc_unreachable ();
2885
2886 for (opno = 1; opno < alg->ops; opno++)
2887 {
2888 int log = alg->log[opno];
2889 rtx shift_subtarget = optimize ? 0 : accum;
2890 rtx add_target
2891 = (opno == alg->ops - 1 && target != 0 && variant != add_variant
2892 && !optimize)
2893 ? target : 0;
2894 rtx accum_target = optimize ? 0 : accum;
2895 rtx accum_inner;
2896
2897 switch (alg->op[opno])
2898 {
2899 case alg_shift:
2900 tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
2901 /* REG_EQUAL note will be attached to the following insn. */
2902 emit_move_insn (accum, tem);
2903 val_so_far <<= log;
2904 break;
2905
2906 case alg_add_t_m2:
2907 tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
2908 accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
2909 add_target ? add_target : accum_target);
2910 val_so_far += (HOST_WIDE_INT) 1 << log;
2911 break;
2912
2913 case alg_sub_t_m2:
2914 tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
2915 accum = force_operand (gen_rtx_MINUS (mode, accum, tem),
2916 add_target ? add_target : accum_target);
2917 val_so_far -= (HOST_WIDE_INT) 1 << log;
2918 break;
2919
2920 case alg_add_t2_m:
2921 accum = expand_shift (LSHIFT_EXPR, mode, accum,
2922 log, shift_subtarget, 0);
2923 accum = force_operand (gen_rtx_PLUS (mode, accum, op0),
2924 add_target ? add_target : accum_target);
2925 val_so_far = (val_so_far << log) + 1;
2926 break;
2927
2928 case alg_sub_t2_m:
2929 accum = expand_shift (LSHIFT_EXPR, mode, accum,
2930 log, shift_subtarget, 0);
2931 accum = force_operand (gen_rtx_MINUS (mode, accum, op0),
2932 add_target ? add_target : accum_target);
2933 val_so_far = (val_so_far << log) - 1;
2934 break;
2935
2936 case alg_add_factor:
2937 tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
2938 accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
2939 add_target ? add_target : accum_target);
2940 val_so_far += val_so_far << log;
2941 break;
2942
2943 case alg_sub_factor:
2944 tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
2945 accum = force_operand (gen_rtx_MINUS (mode, tem, accum),
2946 (add_target
2947 ? add_target : (optimize ? 0 : tem)));
2948 val_so_far = (val_so_far << log) - val_so_far;
2949 break;
2950
2951 default:
2952 gcc_unreachable ();
2953 }
2954
2955 if (SCALAR_INT_MODE_P (mode))
2956 {
2957 /* Write a REG_EQUAL note on the last insn so that we can cse
2958 multiplication sequences. Note that if ACCUM is a SUBREG,
2959 we've set the inner register and must properly indicate that. */
2960 tem = op0, nmode = mode;
2961 accum_inner = accum;
2962 if (GET_CODE (accum) == SUBREG)
2963 {
2964 accum_inner = SUBREG_REG (accum);
2965 nmode = GET_MODE (accum_inner);
2966 tem = gen_lowpart (nmode, op0);
2967 }
2968
2969 insn = get_last_insn ();
2970 set_dst_reg_note (insn, REG_EQUAL,
2971 gen_rtx_MULT (nmode, tem, GEN_INT (val_so_far)),
2972 accum_inner);
2973 }
2974 }
2975
2976 if (variant == negate_variant)
2977 {
2978 val_so_far = -val_so_far;
2979 accum = expand_unop (mode, neg_optab, accum, target, 0);
2980 }
2981 else if (variant == add_variant)
2982 {
2983 val_so_far = val_so_far + 1;
2984 accum = force_operand (gen_rtx_PLUS (mode, accum, op0), target);
2985 }
2986
2987 /* Compare only the bits of val and val_so_far that are significant
2988 in the result mode, to avoid sign-/zero-extension confusion. */
2989 nmode = GET_MODE_INNER (mode);
2990 if (nmode == VOIDmode)
2991 nmode = mode;
2992 val &= GET_MODE_MASK (nmode);
2993 val_so_far &= GET_MODE_MASK (nmode);
2994 gcc_assert (val == val_so_far);
2995
2996 return accum;
2997 }
2998
2999 /* Perform a multiplication and return an rtx for the result.
3000 MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3001 TARGET is a suggestion for where to store the result (an rtx).
3002
3003 We check specially for a constant integer as OP1.
3004 If you want this check for OP0 as well, then before calling
3005 you should swap the two operands if OP0 would be constant. */
3006
3007 rtx
3008 expand_mult (enum machine_mode mode, rtx op0, rtx op1, rtx target,
3009 int unsignedp)
3010 {
3011 enum mult_variant variant;
3012 struct algorithm algorithm;
3013 rtx scalar_op1;
3014 int max_cost;
3015 bool speed = optimize_insn_for_speed_p ();
3016 bool do_trapv = flag_trapv && SCALAR_INT_MODE_P (mode) && !unsignedp;
3017
3018 if (CONSTANT_P (op0))
3019 {
3020 rtx temp = op0;
3021 op0 = op1;
3022 op1 = temp;
3023 }
3024
3025 /* For vectors, there are several simplifications that can be made if
3026 all elements of the vector constant are identical. */
3027 scalar_op1 = op1;
3028 if (GET_CODE (op1) == CONST_VECTOR)
3029 {
3030 int i, n = CONST_VECTOR_NUNITS (op1);
3031 scalar_op1 = CONST_VECTOR_ELT (op1, 0);
3032 for (i = 1; i < n; ++i)
3033 if (!rtx_equal_p (scalar_op1, CONST_VECTOR_ELT (op1, i)))
3034 goto skip_scalar;
3035 }
3036
3037 if (INTEGRAL_MODE_P (mode))
3038 {
3039 rtx fake_reg;
3040 HOST_WIDE_INT coeff;
3041 bool is_neg;
3042 int mode_bitsize;
3043
3044 if (op1 == CONST0_RTX (mode))
3045 return op1;
3046 if (op1 == CONST1_RTX (mode))
3047 return op0;
3048 if (op1 == CONSTM1_RTX (mode))
3049 return expand_unop (mode, do_trapv ? negv_optab : neg_optab,
3050 op0, target, 0);
3051
3052 if (do_trapv)
3053 goto skip_synth;
3054
3055 /* These are the operations that are potentially turned into
3056 a sequence of shifts and additions. */
3057 mode_bitsize = GET_MODE_UNIT_BITSIZE (mode);
3058
3059 /* synth_mult does an `unsigned int' multiply. As long as the mode is
3060 less than or equal in size to `unsigned int' this doesn't matter.
3061 If the mode is larger than `unsigned int', then synth_mult works
3062 only if the constant value exactly fits in an `unsigned int' without
3063 any truncation. This means that multiplying by negative values does
3064 not work; results are off by 2^32 on a 32 bit machine. */
3065
3066 if (CONST_INT_P (scalar_op1))
3067 {
3068 coeff = INTVAL (scalar_op1);
3069 is_neg = coeff < 0;
3070 }
3071 else if (CONST_DOUBLE_AS_INT_P (scalar_op1))
3072 {
3073 /* If we are multiplying in DImode, it may still be a win
3074 to try to work with shifts and adds. */
3075 if (CONST_DOUBLE_HIGH (scalar_op1) == 0
3076 && CONST_DOUBLE_LOW (scalar_op1) > 0)
3077 {
3078 coeff = CONST_DOUBLE_LOW (scalar_op1);
3079 is_neg = false;
3080 }
3081 else if (CONST_DOUBLE_LOW (scalar_op1) == 0)
3082 {
3083 coeff = CONST_DOUBLE_HIGH (scalar_op1);
3084 if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3085 {
3086 int shift = floor_log2 (coeff) + HOST_BITS_PER_WIDE_INT;
3087 if (shift < HOST_BITS_PER_DOUBLE_INT - 1
3088 || mode_bitsize <= HOST_BITS_PER_DOUBLE_INT)
3089 return expand_shift (LSHIFT_EXPR, mode, op0,
3090 shift, target, unsignedp);
3091 }
3092 goto skip_synth;
3093 }
3094 else
3095 goto skip_synth;
3096 }
3097 else
3098 goto skip_synth;
3099
3100 /* We used to test optimize here, on the grounds that it's better to
3101 produce a smaller program when -O is not used. But this causes
3102 such a terrible slowdown sometimes that it seems better to always
3103 use synth_mult. */
3104
3105 /* Special case powers of two. */
3106 if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3107 return expand_shift (LSHIFT_EXPR, mode, op0,
3108 floor_log2 (coeff), target, unsignedp);
3109
3110 fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3111
3112 /* Attempt to handle multiplication of DImode values by negative
3113 coefficients, by performing the multiplication by a positive
3114 multiplier and then inverting the result. */
3115 if (is_neg && mode_bitsize > HOST_BITS_PER_WIDE_INT)
3116 {
3117 /* Its safe to use -coeff even for INT_MIN, as the
3118 result is interpreted as an unsigned coefficient.
3119 Exclude cost of op0 from max_cost to match the cost
3120 calculation of the synth_mult. */
3121 max_cost = (set_src_cost (gen_rtx_MULT (mode, fake_reg, op1), speed)
3122 - neg_cost(speed, mode));
3123 if (max_cost > 0
3124 && choose_mult_variant (mode, -coeff, &algorithm,
3125 &variant, max_cost))
3126 {
3127 rtx temp = expand_mult_const (mode, op0, -coeff, NULL_RTX,
3128 &algorithm, variant);
3129 return expand_unop (mode, neg_optab, temp, target, 0);
3130 }
3131 goto skip_synth;
3132 }
3133
3134 /* Exclude cost of op0 from max_cost to match the cost
3135 calculation of the synth_mult. */
3136 max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, op1), speed);
3137 if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost))
3138 return expand_mult_const (mode, op0, coeff, target,
3139 &algorithm, variant);
3140 }
3141 skip_synth:
3142
3143 /* Expand x*2.0 as x+x. */
3144 if (CONST_DOUBLE_AS_FLOAT_P (scalar_op1))
3145 {
3146 REAL_VALUE_TYPE d;
3147 REAL_VALUE_FROM_CONST_DOUBLE (d, scalar_op1);
3148
3149 if (REAL_VALUES_EQUAL (d, dconst2))
3150 {
3151 op0 = force_reg (GET_MODE (op0), op0);
3152 return expand_binop (mode, add_optab, op0, op0,
3153 target, unsignedp, OPTAB_LIB_WIDEN);
3154 }
3155 }
3156 skip_scalar:
3157
3158 /* This used to use umul_optab if unsigned, but for non-widening multiply
3159 there is no difference between signed and unsigned. */
3160 op0 = expand_binop (mode, do_trapv ? smulv_optab : smul_optab,
3161 op0, op1, target, unsignedp, OPTAB_LIB_WIDEN);
3162 gcc_assert (op0);
3163 return op0;
3164 }
3165
3166 /* Return a cost estimate for multiplying a register by the given
3167 COEFFicient in the given MODE and SPEED. */
3168
3169 int
3170 mult_by_coeff_cost (HOST_WIDE_INT coeff, enum machine_mode mode, bool speed)
3171 {
3172 int max_cost;
3173 struct algorithm algorithm;
3174 enum mult_variant variant;
3175
3176 rtx fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3177 max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, fake_reg), speed);
3178 if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost))
3179 return algorithm.cost.cost;
3180 else
3181 return max_cost;
3182 }
3183
3184 /* Perform a widening multiplication and return an rtx for the result.
3185 MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3186 TARGET is a suggestion for where to store the result (an rtx).
3187 THIS_OPTAB is the optab we should use, it must be either umul_widen_optab
3188 or smul_widen_optab.
3189
3190 We check specially for a constant integer as OP1, comparing the
3191 cost of a widening multiply against the cost of a sequence of shifts
3192 and adds. */
3193
3194 rtx
3195 expand_widening_mult (enum machine_mode mode, rtx op0, rtx op1, rtx target,
3196 int unsignedp, optab this_optab)
3197 {
3198 bool speed = optimize_insn_for_speed_p ();
3199 rtx cop1;
3200
3201 if (CONST_INT_P (op1)
3202 && GET_MODE (op0) != VOIDmode
3203 && (cop1 = convert_modes (mode, GET_MODE (op0), op1,
3204 this_optab == umul_widen_optab))
3205 && CONST_INT_P (cop1)
3206 && (INTVAL (cop1) >= 0
3207 || HWI_COMPUTABLE_MODE_P (mode)))
3208 {
3209 HOST_WIDE_INT coeff = INTVAL (cop1);
3210 int max_cost;
3211 enum mult_variant variant;
3212 struct algorithm algorithm;
3213
3214 /* Special case powers of two. */
3215 if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3216 {
3217 op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3218 return expand_shift (LSHIFT_EXPR, mode, op0,
3219 floor_log2 (coeff), target, unsignedp);
3220 }
3221
3222 /* Exclude cost of op0 from max_cost to match the cost
3223 calculation of the synth_mult. */
3224 max_cost = mul_widen_cost (speed, mode);
3225 if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3226 max_cost))
3227 {
3228 op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3229 return expand_mult_const (mode, op0, coeff, target,
3230 &algorithm, variant);
3231 }
3232 }
3233 return expand_binop (mode, this_optab, op0, op1, target,
3234 unsignedp, OPTAB_LIB_WIDEN);
3235 }
3236 \f
3237 /* Choose a minimal N + 1 bit approximation to 1/D that can be used to
3238 replace division by D, and put the least significant N bits of the result
3239 in *MULTIPLIER_PTR and return the most significant bit.
3240
3241 The width of operations is N (should be <= HOST_BITS_PER_WIDE_INT), the
3242 needed precision is in PRECISION (should be <= N).
3243
3244 PRECISION should be as small as possible so this function can choose
3245 multiplier more freely.
3246
3247 The rounded-up logarithm of D is placed in *lgup_ptr. A shift count that
3248 is to be used for a final right shift is placed in *POST_SHIFT_PTR.
3249
3250 Using this function, x/D will be equal to (x * m) >> (*POST_SHIFT_PTR),
3251 where m is the full HOST_BITS_PER_WIDE_INT + 1 bit multiplier. */
3252
3253 unsigned HOST_WIDE_INT
3254 choose_multiplier (unsigned HOST_WIDE_INT d, int n, int precision,
3255 unsigned HOST_WIDE_INT *multiplier_ptr,
3256 int *post_shift_ptr, int *lgup_ptr)
3257 {
3258 double_int mhigh, mlow;
3259 int lgup, post_shift;
3260 int pow, pow2;
3261
3262 /* lgup = ceil(log2(divisor)); */
3263 lgup = ceil_log2 (d);
3264
3265 gcc_assert (lgup <= n);
3266
3267 pow = n + lgup;
3268 pow2 = n + lgup - precision;
3269
3270 /* We could handle this with some effort, but this case is much
3271 better handled directly with a scc insn, so rely on caller using
3272 that. */
3273 gcc_assert (pow != HOST_BITS_PER_DOUBLE_INT);
3274
3275 /* mlow = 2^(N + lgup)/d */
3276 double_int val = double_int_zero.set_bit (pow);
3277 mlow = val.div (double_int::from_uhwi (d), true, TRUNC_DIV_EXPR);
3278
3279 /* mhigh = (2^(N + lgup) + 2^(N + lgup - precision))/d */
3280 val |= double_int_zero.set_bit (pow2);
3281 mhigh = val.div (double_int::from_uhwi (d), true, TRUNC_DIV_EXPR);
3282
3283 gcc_assert (!mhigh.high || val.high - d < d);
3284 gcc_assert (mhigh.high <= 1 && mlow.high <= 1);
3285 /* Assert that mlow < mhigh. */
3286 gcc_assert (mlow.ult (mhigh));
3287
3288 /* If precision == N, then mlow, mhigh exceed 2^N
3289 (but they do not exceed 2^(N+1)). */
3290
3291 /* Reduce to lowest terms. */
3292 for (post_shift = lgup; post_shift > 0; post_shift--)
3293 {
3294 int shft = HOST_BITS_PER_WIDE_INT - 1;
3295 unsigned HOST_WIDE_INT ml_lo = (mlow.high << shft) | (mlow.low >> 1);
3296 unsigned HOST_WIDE_INT mh_lo = (mhigh.high << shft) | (mhigh.low >> 1);
3297 if (ml_lo >= mh_lo)
3298 break;
3299
3300 mlow = double_int::from_uhwi (ml_lo);
3301 mhigh = double_int::from_uhwi (mh_lo);
3302 }
3303
3304 *post_shift_ptr = post_shift;
3305 *lgup_ptr = lgup;
3306 if (n < HOST_BITS_PER_WIDE_INT)
3307 {
3308 unsigned HOST_WIDE_INT mask = ((unsigned HOST_WIDE_INT) 1 << n) - 1;
3309 *multiplier_ptr = mhigh.low & mask;
3310 return mhigh.low >= mask;
3311 }
3312 else
3313 {
3314 *multiplier_ptr = mhigh.low;
3315 return mhigh.high;
3316 }
3317 }
3318
3319 /* Compute the inverse of X mod 2**n, i.e., find Y such that X * Y is
3320 congruent to 1 (mod 2**N). */
3321
3322 static unsigned HOST_WIDE_INT
3323 invert_mod2n (unsigned HOST_WIDE_INT x, int n)
3324 {
3325 /* Solve x*y == 1 (mod 2^n), where x is odd. Return y. */
3326
3327 /* The algorithm notes that the choice y = x satisfies
3328 x*y == 1 mod 2^3, since x is assumed odd.
3329 Each iteration doubles the number of bits of significance in y. */
3330
3331 unsigned HOST_WIDE_INT mask;
3332 unsigned HOST_WIDE_INT y = x;
3333 int nbit = 3;
3334
3335 mask = (n == HOST_BITS_PER_WIDE_INT
3336 ? ~(unsigned HOST_WIDE_INT) 0
3337 : ((unsigned HOST_WIDE_INT) 1 << n) - 1);
3338
3339 while (nbit < n)
3340 {
3341 y = y * (2 - x*y) & mask; /* Modulo 2^N */
3342 nbit *= 2;
3343 }
3344 return y;
3345 }
3346
3347 /* Emit code to adjust ADJ_OPERAND after multiplication of wrong signedness
3348 flavor of OP0 and OP1. ADJ_OPERAND is already the high half of the
3349 product OP0 x OP1. If UNSIGNEDP is nonzero, adjust the signed product
3350 to become unsigned, if UNSIGNEDP is zero, adjust the unsigned product to
3351 become signed.
3352
3353 The result is put in TARGET if that is convenient.
3354
3355 MODE is the mode of operation. */
3356
3357 rtx
3358 expand_mult_highpart_adjust (enum machine_mode mode, rtx adj_operand, rtx op0,
3359 rtx op1, rtx target, int unsignedp)
3360 {
3361 rtx tem;
3362 enum rtx_code adj_code = unsignedp ? PLUS : MINUS;
3363
3364 tem = expand_shift (RSHIFT_EXPR, mode, op0,
3365 GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3366 tem = expand_and (mode, tem, op1, NULL_RTX);
3367 adj_operand
3368 = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3369 adj_operand);
3370
3371 tem = expand_shift (RSHIFT_EXPR, mode, op1,
3372 GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3373 tem = expand_and (mode, tem, op0, NULL_RTX);
3374 target = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3375 target);
3376
3377 return target;
3378 }
3379
3380 /* Subroutine of expmed_mult_highpart. Return the MODE high part of OP. */
3381
3382 static rtx
3383 extract_high_half (enum machine_mode mode, rtx op)
3384 {
3385 enum machine_mode wider_mode;
3386
3387 if (mode == word_mode)
3388 return gen_highpart (mode, op);
3389
3390 gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3391
3392 wider_mode = GET_MODE_WIDER_MODE (mode);
3393 op = expand_shift (RSHIFT_EXPR, wider_mode, op,
3394 GET_MODE_BITSIZE (mode), 0, 1);
3395 return convert_modes (mode, wider_mode, op, 0);
3396 }
3397
3398 /* Like expmed_mult_highpart, but only consider using a multiplication
3399 optab. OP1 is an rtx for the constant operand. */
3400
3401 static rtx
3402 expmed_mult_highpart_optab (enum machine_mode mode, rtx op0, rtx op1,
3403 rtx target, int unsignedp, int max_cost)
3404 {
3405 rtx narrow_op1 = gen_int_mode (INTVAL (op1), mode);
3406 enum machine_mode wider_mode;
3407 optab moptab;
3408 rtx tem;
3409 int size;
3410 bool speed = optimize_insn_for_speed_p ();
3411
3412 gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3413
3414 wider_mode = GET_MODE_WIDER_MODE (mode);
3415 size = GET_MODE_BITSIZE (mode);
3416
3417 /* Firstly, try using a multiplication insn that only generates the needed
3418 high part of the product, and in the sign flavor of unsignedp. */
3419 if (mul_highpart_cost (speed, mode) < max_cost)
3420 {
3421 moptab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
3422 tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3423 unsignedp, OPTAB_DIRECT);
3424 if (tem)
3425 return tem;
3426 }
3427
3428 /* Secondly, same as above, but use sign flavor opposite of unsignedp.
3429 Need to adjust the result after the multiplication. */
3430 if (size - 1 < BITS_PER_WORD
3431 && (mul_highpart_cost (speed, mode)
3432 + 2 * shift_cost (speed, mode, size-1)
3433 + 4 * add_cost (speed, mode) < max_cost))
3434 {
3435 moptab = unsignedp ? smul_highpart_optab : umul_highpart_optab;
3436 tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3437 unsignedp, OPTAB_DIRECT);
3438 if (tem)
3439 /* We used the wrong signedness. Adjust the result. */
3440 return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3441 tem, unsignedp);
3442 }
3443
3444 /* Try widening multiplication. */
3445 moptab = unsignedp ? umul_widen_optab : smul_widen_optab;
3446 if (widening_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3447 && mul_widen_cost (speed, wider_mode) < max_cost)
3448 {
3449 tem = expand_binop (wider_mode, moptab, op0, narrow_op1, 0,
3450 unsignedp, OPTAB_WIDEN);
3451 if (tem)
3452 return extract_high_half (mode, tem);
3453 }
3454
3455 /* Try widening the mode and perform a non-widening multiplication. */
3456 if (optab_handler (smul_optab, wider_mode) != CODE_FOR_nothing
3457 && size - 1 < BITS_PER_WORD
3458 && (mul_cost (speed, wider_mode) + shift_cost (speed, mode, size-1)
3459 < max_cost))
3460 {
3461 rtx insns, wop0, wop1;
3462
3463 /* We need to widen the operands, for example to ensure the
3464 constant multiplier is correctly sign or zero extended.
3465 Use a sequence to clean-up any instructions emitted by
3466 the conversions if things don't work out. */
3467 start_sequence ();
3468 wop0 = convert_modes (wider_mode, mode, op0, unsignedp);
3469 wop1 = convert_modes (wider_mode, mode, op1, unsignedp);
3470 tem = expand_binop (wider_mode, smul_optab, wop0, wop1, 0,
3471 unsignedp, OPTAB_WIDEN);
3472 insns = get_insns ();
3473 end_sequence ();
3474
3475 if (tem)
3476 {
3477 emit_insn (insns);
3478 return extract_high_half (mode, tem);
3479 }
3480 }
3481
3482 /* Try widening multiplication of opposite signedness, and adjust. */
3483 moptab = unsignedp ? smul_widen_optab : umul_widen_optab;
3484 if (widening_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3485 && size - 1 < BITS_PER_WORD
3486 && (mul_widen_cost (speed, wider_mode)
3487 + 2 * shift_cost (speed, mode, size-1)
3488 + 4 * add_cost (speed, mode) < max_cost))
3489 {
3490 tem = expand_binop (wider_mode, moptab, op0, narrow_op1,
3491 NULL_RTX, ! unsignedp, OPTAB_WIDEN);
3492 if (tem != 0)
3493 {
3494 tem = extract_high_half (mode, tem);
3495 /* We used the wrong signedness. Adjust the result. */
3496 return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3497 target, unsignedp);
3498 }
3499 }
3500
3501 return 0;
3502 }
3503
3504 /* Emit code to multiply OP0 and OP1 (where OP1 is an integer constant),
3505 putting the high half of the result in TARGET if that is convenient,
3506 and return where the result is. If the operation can not be performed,
3507 0 is returned.
3508
3509 MODE is the mode of operation and result.
3510
3511 UNSIGNEDP nonzero means unsigned multiply.
3512
3513 MAX_COST is the total allowed cost for the expanded RTL. */
3514
3515 static rtx
3516 expmed_mult_highpart (enum machine_mode mode, rtx op0, rtx op1,
3517 rtx target, int unsignedp, int max_cost)
3518 {
3519 enum machine_mode wider_mode = GET_MODE_WIDER_MODE (mode);
3520 unsigned HOST_WIDE_INT cnst1;
3521 int extra_cost;
3522 bool sign_adjust = false;
3523 enum mult_variant variant;
3524 struct algorithm alg;
3525 rtx tem;
3526 bool speed = optimize_insn_for_speed_p ();
3527
3528 gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3529 /* We can't support modes wider than HOST_BITS_PER_INT. */
3530 gcc_assert (HWI_COMPUTABLE_MODE_P (mode));
3531
3532 cnst1 = INTVAL (op1) & GET_MODE_MASK (mode);
3533
3534 /* We can't optimize modes wider than BITS_PER_WORD.
3535 ??? We might be able to perform double-word arithmetic if
3536 mode == word_mode, however all the cost calculations in
3537 synth_mult etc. assume single-word operations. */
3538 if (GET_MODE_BITSIZE (wider_mode) > BITS_PER_WORD)
3539 return expmed_mult_highpart_optab (mode, op0, op1, target,
3540 unsignedp, max_cost);
3541
3542 extra_cost = shift_cost (speed, mode, GET_MODE_BITSIZE (mode) - 1);
3543
3544 /* Check whether we try to multiply by a negative constant. */
3545 if (!unsignedp && ((cnst1 >> (GET_MODE_BITSIZE (mode) - 1)) & 1))
3546 {
3547 sign_adjust = true;
3548 extra_cost += add_cost (speed, mode);
3549 }
3550
3551 /* See whether shift/add multiplication is cheap enough. */
3552 if (choose_mult_variant (wider_mode, cnst1, &alg, &variant,
3553 max_cost - extra_cost))
3554 {
3555 /* See whether the specialized multiplication optabs are
3556 cheaper than the shift/add version. */
3557 tem = expmed_mult_highpart_optab (mode, op0, op1, target, unsignedp,
3558 alg.cost.cost + extra_cost);
3559 if (tem)
3560 return tem;
3561
3562 tem = convert_to_mode (wider_mode, op0, unsignedp);
3563 tem = expand_mult_const (wider_mode, tem, cnst1, 0, &alg, variant);
3564 tem = extract_high_half (mode, tem);
3565
3566 /* Adjust result for signedness. */
3567 if (sign_adjust)
3568 tem = force_operand (gen_rtx_MINUS (mode, tem, op0), tem);
3569
3570 return tem;
3571 }
3572 return expmed_mult_highpart_optab (mode, op0, op1, target,
3573 unsignedp, max_cost);
3574 }
3575
3576
3577 /* Expand signed modulus of OP0 by a power of two D in mode MODE. */
3578
3579 static rtx
3580 expand_smod_pow2 (enum machine_mode mode, rtx op0, HOST_WIDE_INT d)
3581 {
3582 unsigned HOST_WIDE_INT masklow, maskhigh;
3583 rtx result, temp, shift, label;
3584 int logd;
3585
3586 logd = floor_log2 (d);
3587 result = gen_reg_rtx (mode);
3588
3589 /* Avoid conditional branches when they're expensive. */
3590 if (BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2
3591 && optimize_insn_for_speed_p ())
3592 {
3593 rtx signmask = emit_store_flag (result, LT, op0, const0_rtx,
3594 mode, 0, -1);
3595 if (signmask)
3596 {
3597 signmask = force_reg (mode, signmask);
3598 masklow = ((HOST_WIDE_INT) 1 << logd) - 1;
3599 shift = GEN_INT (GET_MODE_BITSIZE (mode) - logd);
3600
3601 /* Use the rtx_cost of a LSHIFTRT instruction to determine
3602 which instruction sequence to use. If logical right shifts
3603 are expensive the use 2 XORs, 2 SUBs and an AND, otherwise
3604 use a LSHIFTRT, 1 ADD, 1 SUB and an AND. */
3605
3606 temp = gen_rtx_LSHIFTRT (mode, result, shift);
3607 if (optab_handler (lshr_optab, mode) == CODE_FOR_nothing
3608 || (set_src_cost (temp, optimize_insn_for_speed_p ())
3609 > COSTS_N_INSNS (2)))
3610 {
3611 temp = expand_binop (mode, xor_optab, op0, signmask,
3612 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3613 temp = expand_binop (mode, sub_optab, temp, signmask,
3614 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3615 temp = expand_binop (mode, and_optab, temp, GEN_INT (masklow),
3616 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3617 temp = expand_binop (mode, xor_optab, temp, signmask,
3618 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3619 temp = expand_binop (mode, sub_optab, temp, signmask,
3620 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3621 }
3622 else
3623 {
3624 signmask = expand_binop (mode, lshr_optab, signmask, shift,
3625 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3626 signmask = force_reg (mode, signmask);
3627
3628 temp = expand_binop (mode, add_optab, op0, signmask,
3629 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3630 temp = expand_binop (mode, and_optab, temp, GEN_INT (masklow),
3631 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3632 temp = expand_binop (mode, sub_optab, temp, signmask,
3633 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3634 }
3635 return temp;
3636 }
3637 }
3638
3639 /* Mask contains the mode's signbit and the significant bits of the
3640 modulus. By including the signbit in the operation, many targets
3641 can avoid an explicit compare operation in the following comparison
3642 against zero. */
3643
3644 masklow = ((HOST_WIDE_INT) 1 << logd) - 1;
3645 if (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
3646 {
3647 masklow |= (HOST_WIDE_INT) -1 << (GET_MODE_BITSIZE (mode) - 1);
3648 maskhigh = -1;
3649 }
3650 else
3651 maskhigh = (HOST_WIDE_INT) -1
3652 << (GET_MODE_BITSIZE (mode) - HOST_BITS_PER_WIDE_INT - 1);
3653
3654 temp = expand_binop (mode, and_optab, op0,
3655 immed_double_const (masklow, maskhigh, mode),
3656 result, 1, OPTAB_LIB_WIDEN);
3657 if (temp != result)
3658 emit_move_insn (result, temp);
3659
3660 label = gen_label_rtx ();
3661 do_cmp_and_jump (result, const0_rtx, GE, mode, label);
3662
3663 temp = expand_binop (mode, sub_optab, result, const1_rtx, result,
3664 0, OPTAB_LIB_WIDEN);
3665 masklow = (HOST_WIDE_INT) -1 << logd;
3666 maskhigh = -1;
3667 temp = expand_binop (mode, ior_optab, temp,
3668 immed_double_const (masklow, maskhigh, mode),
3669 result, 1, OPTAB_LIB_WIDEN);
3670 temp = expand_binop (mode, add_optab, temp, const1_rtx, result,
3671 0, OPTAB_LIB_WIDEN);
3672 if (temp != result)
3673 emit_move_insn (result, temp);
3674 emit_label (label);
3675 return result;
3676 }
3677
3678 /* Expand signed division of OP0 by a power of two D in mode MODE.
3679 This routine is only called for positive values of D. */
3680
3681 static rtx
3682 expand_sdiv_pow2 (enum machine_mode mode, rtx op0, HOST_WIDE_INT d)
3683 {
3684 rtx temp, label;
3685 int logd;
3686
3687 logd = floor_log2 (d);
3688
3689 if (d == 2
3690 && BRANCH_COST (optimize_insn_for_speed_p (),
3691 false) >= 1)
3692 {
3693 temp = gen_reg_rtx (mode);
3694 temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, 1);
3695 temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3696 0, OPTAB_LIB_WIDEN);
3697 return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3698 }
3699
3700 #ifdef HAVE_conditional_move
3701 if (BRANCH_COST (optimize_insn_for_speed_p (), false)
3702 >= 2)
3703 {
3704 rtx temp2;
3705
3706 /* ??? emit_conditional_move forces a stack adjustment via
3707 compare_from_rtx so, if the sequence is discarded, it will
3708 be lost. Do it now instead. */
3709 do_pending_stack_adjust ();
3710
3711 start_sequence ();
3712 temp2 = copy_to_mode_reg (mode, op0);
3713 temp = expand_binop (mode, add_optab, temp2, GEN_INT (d-1),
3714 NULL_RTX, 0, OPTAB_LIB_WIDEN);
3715 temp = force_reg (mode, temp);
3716
3717 /* Construct "temp2 = (temp2 < 0) ? temp : temp2". */
3718 temp2 = emit_conditional_move (temp2, LT, temp2, const0_rtx,
3719 mode, temp, temp2, mode, 0);
3720 if (temp2)
3721 {
3722 rtx seq = get_insns ();
3723 end_sequence ();
3724 emit_insn (seq);
3725 return expand_shift (RSHIFT_EXPR, mode, temp2, logd, NULL_RTX, 0);
3726 }
3727 end_sequence ();
3728 }
3729 #endif
3730
3731 if (BRANCH_COST (optimize_insn_for_speed_p (),
3732 false) >= 2)
3733 {
3734 int ushift = GET_MODE_BITSIZE (mode) - logd;
3735
3736 temp = gen_reg_rtx (mode);
3737 temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, -1);
3738 if (shift_cost (optimize_insn_for_speed_p (), mode, ushift)
3739 > COSTS_N_INSNS (1))
3740 temp = expand_binop (mode, and_optab, temp, GEN_INT (d - 1),
3741 NULL_RTX, 0, OPTAB_LIB_WIDEN);
3742 else
3743 temp = expand_shift (RSHIFT_EXPR, mode, temp,
3744 ushift, NULL_RTX, 1);
3745 temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3746 0, OPTAB_LIB_WIDEN);
3747 return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3748 }
3749
3750 label = gen_label_rtx ();
3751 temp = copy_to_mode_reg (mode, op0);
3752 do_cmp_and_jump (temp, const0_rtx, GE, mode, label);
3753 expand_inc (temp, GEN_INT (d - 1));
3754 emit_label (label);
3755 return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3756 }
3757 \f
3758 /* Emit the code to divide OP0 by OP1, putting the result in TARGET
3759 if that is convenient, and returning where the result is.
3760 You may request either the quotient or the remainder as the result;
3761 specify REM_FLAG nonzero to get the remainder.
3762
3763 CODE is the expression code for which kind of division this is;
3764 it controls how rounding is done. MODE is the machine mode to use.
3765 UNSIGNEDP nonzero means do unsigned division. */
3766
3767 /* ??? For CEIL_MOD_EXPR, can compute incorrect remainder with ANDI
3768 and then correct it by or'ing in missing high bits
3769 if result of ANDI is nonzero.
3770 For ROUND_MOD_EXPR, can use ANDI and then sign-extend the result.
3771 This could optimize to a bfexts instruction.
3772 But C doesn't use these operations, so their optimizations are
3773 left for later. */
3774 /* ??? For modulo, we don't actually need the highpart of the first product,
3775 the low part will do nicely. And for small divisors, the second multiply
3776 can also be a low-part only multiply or even be completely left out.
3777 E.g. to calculate the remainder of a division by 3 with a 32 bit
3778 multiply, multiply with 0x55555556 and extract the upper two bits;
3779 the result is exact for inputs up to 0x1fffffff.
3780 The input range can be reduced by using cross-sum rules.
3781 For odd divisors >= 3, the following table gives right shift counts
3782 so that if a number is shifted by an integer multiple of the given
3783 amount, the remainder stays the same:
3784 2, 4, 3, 6, 10, 12, 4, 8, 18, 6, 11, 20, 18, 0, 5, 10, 12, 0, 12, 20,
3785 14, 12, 23, 21, 8, 0, 20, 18, 0, 0, 6, 12, 0, 22, 0, 18, 20, 30, 0, 0,
3786 0, 8, 0, 11, 12, 10, 36, 0, 30, 0, 0, 12, 0, 0, 0, 0, 44, 12, 24, 0,
3787 20, 0, 7, 14, 0, 18, 36, 0, 0, 46, 60, 0, 42, 0, 15, 24, 20, 0, 0, 33,
3788 0, 20, 0, 0, 18, 0, 60, 0, 0, 0, 0, 0, 40, 18, 0, 0, 12
3789
3790 Cross-sum rules for even numbers can be derived by leaving as many bits
3791 to the right alone as the divisor has zeros to the right.
3792 E.g. if x is an unsigned 32 bit number:
3793 (x mod 12) == (((x & 1023) + ((x >> 8) & ~3)) * 0x15555558 >> 2 * 3) >> 28
3794 */
3795
3796 rtx
3797 expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode,
3798 rtx op0, rtx op1, rtx target, int unsignedp)
3799 {
3800 enum machine_mode compute_mode;
3801 rtx tquotient;
3802 rtx quotient = 0, remainder = 0;
3803 rtx last;
3804 int size;
3805 rtx insn;
3806 optab optab1, optab2;
3807 int op1_is_constant, op1_is_pow2 = 0;
3808 int max_cost, extra_cost;
3809 static HOST_WIDE_INT last_div_const = 0;
3810 static HOST_WIDE_INT ext_op1;
3811 bool speed = optimize_insn_for_speed_p ();
3812
3813 op1_is_constant = CONST_INT_P (op1);
3814 if (op1_is_constant)
3815 {
3816 ext_op1 = INTVAL (op1);
3817 if (unsignedp)
3818 ext_op1 &= GET_MODE_MASK (mode);
3819 op1_is_pow2 = ((EXACT_POWER_OF_2_OR_ZERO_P (ext_op1)
3820 || (! unsignedp && EXACT_POWER_OF_2_OR_ZERO_P (-ext_op1))));
3821 }
3822
3823 /*
3824 This is the structure of expand_divmod:
3825
3826 First comes code to fix up the operands so we can perform the operations
3827 correctly and efficiently.
3828
3829 Second comes a switch statement with code specific for each rounding mode.
3830 For some special operands this code emits all RTL for the desired
3831 operation, for other cases, it generates only a quotient and stores it in
3832 QUOTIENT. The case for trunc division/remainder might leave quotient = 0,
3833 to indicate that it has not done anything.
3834
3835 Last comes code that finishes the operation. If QUOTIENT is set and
3836 REM_FLAG is set, the remainder is computed as OP0 - QUOTIENT * OP1. If
3837 QUOTIENT is not set, it is computed using trunc rounding.
3838
3839 We try to generate special code for division and remainder when OP1 is a
3840 constant. If |OP1| = 2**n we can use shifts and some other fast
3841 operations. For other values of OP1, we compute a carefully selected
3842 fixed-point approximation m = 1/OP1, and generate code that multiplies OP0
3843 by m.
3844
3845 In all cases but EXACT_DIV_EXPR, this multiplication requires the upper
3846 half of the product. Different strategies for generating the product are
3847 implemented in expmed_mult_highpart.
3848
3849 If what we actually want is the remainder, we generate that by another
3850 by-constant multiplication and a subtraction. */
3851
3852 /* We shouldn't be called with OP1 == const1_rtx, but some of the
3853 code below will malfunction if we are, so check here and handle
3854 the special case if so. */
3855 if (op1 == const1_rtx)
3856 return rem_flag ? const0_rtx : op0;
3857
3858 /* When dividing by -1, we could get an overflow.
3859 negv_optab can handle overflows. */
3860 if (! unsignedp && op1 == constm1_rtx)
3861 {
3862 if (rem_flag)
3863 return const0_rtx;
3864 return expand_unop (mode, flag_trapv && GET_MODE_CLASS(mode) == MODE_INT
3865 ? negv_optab : neg_optab, op0, target, 0);
3866 }
3867
3868 if (target
3869 /* Don't use the function value register as a target
3870 since we have to read it as well as write it,
3871 and function-inlining gets confused by this. */
3872 && ((REG_P (target) && REG_FUNCTION_VALUE_P (target))
3873 /* Don't clobber an operand while doing a multi-step calculation. */
3874 || ((rem_flag || op1_is_constant)
3875 && (reg_mentioned_p (target, op0)
3876 || (MEM_P (op0) && MEM_P (target))))
3877 || reg_mentioned_p (target, op1)
3878 || (MEM_P (op1) && MEM_P (target))))
3879 target = 0;
3880
3881 /* Get the mode in which to perform this computation. Normally it will
3882 be MODE, but sometimes we can't do the desired operation in MODE.
3883 If so, pick a wider mode in which we can do the operation. Convert
3884 to that mode at the start to avoid repeated conversions.
3885
3886 First see what operations we need. These depend on the expression
3887 we are evaluating. (We assume that divxx3 insns exist under the
3888 same conditions that modxx3 insns and that these insns don't normally
3889 fail. If these assumptions are not correct, we may generate less
3890 efficient code in some cases.)
3891
3892 Then see if we find a mode in which we can open-code that operation
3893 (either a division, modulus, or shift). Finally, check for the smallest
3894 mode for which we can do the operation with a library call. */
3895
3896 /* We might want to refine this now that we have division-by-constant
3897 optimization. Since expmed_mult_highpart tries so many variants, it is
3898 not straightforward to generalize this. Maybe we should make an array
3899 of possible modes in init_expmed? Save this for GCC 2.7. */
3900
3901 optab1 = ((op1_is_pow2 && op1 != const0_rtx)
3902 ? (unsignedp ? lshr_optab : ashr_optab)
3903 : (unsignedp ? udiv_optab : sdiv_optab));
3904 optab2 = ((op1_is_pow2 && op1 != const0_rtx)
3905 ? optab1
3906 : (unsignedp ? udivmod_optab : sdivmod_optab));
3907
3908 for (compute_mode = mode; compute_mode != VOIDmode;
3909 compute_mode = GET_MODE_WIDER_MODE (compute_mode))
3910 if (optab_handler (optab1, compute_mode) != CODE_FOR_nothing
3911 || optab_handler (optab2, compute_mode) != CODE_FOR_nothing)
3912 break;
3913
3914 if (compute_mode == VOIDmode)
3915 for (compute_mode = mode; compute_mode != VOIDmode;
3916 compute_mode = GET_MODE_WIDER_MODE (compute_mode))
3917 if (optab_libfunc (optab1, compute_mode)
3918 || optab_libfunc (optab2, compute_mode))
3919 break;
3920
3921 /* If we still couldn't find a mode, use MODE, but expand_binop will
3922 probably die. */
3923 if (compute_mode == VOIDmode)
3924 compute_mode = mode;
3925
3926 if (target && GET_MODE (target) == compute_mode)
3927 tquotient = target;
3928 else
3929 tquotient = gen_reg_rtx (compute_mode);
3930
3931 size = GET_MODE_BITSIZE (compute_mode);
3932 #if 0
3933 /* It should be possible to restrict the precision to GET_MODE_BITSIZE
3934 (mode), and thereby get better code when OP1 is a constant. Do that
3935 later. It will require going over all usages of SIZE below. */
3936 size = GET_MODE_BITSIZE (mode);
3937 #endif
3938
3939 /* Only deduct something for a REM if the last divide done was
3940 for a different constant. Then set the constant of the last
3941 divide. */
3942 max_cost = (unsignedp
3943 ? udiv_cost (speed, compute_mode)
3944 : sdiv_cost (speed, compute_mode));
3945 if (rem_flag && ! (last_div_const != 0 && op1_is_constant
3946 && INTVAL (op1) == last_div_const))
3947 max_cost -= (mul_cost (speed, compute_mode)
3948 + add_cost (speed, compute_mode));
3949
3950 last_div_const = ! rem_flag && op1_is_constant ? INTVAL (op1) : 0;
3951
3952 /* Now convert to the best mode to use. */
3953 if (compute_mode != mode)
3954 {
3955 op0 = convert_modes (compute_mode, mode, op0, unsignedp);
3956 op1 = convert_modes (compute_mode, mode, op1, unsignedp);
3957
3958 /* convert_modes may have placed op1 into a register, so we
3959 must recompute the following. */
3960 op1_is_constant = CONST_INT_P (op1);
3961 op1_is_pow2 = (op1_is_constant
3962 && ((EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
3963 || (! unsignedp
3964 && EXACT_POWER_OF_2_OR_ZERO_P (-INTVAL (op1)))))) ;
3965 }
3966
3967 /* If one of the operands is a volatile MEM, copy it into a register. */
3968
3969 if (MEM_P (op0) && MEM_VOLATILE_P (op0))
3970 op0 = force_reg (compute_mode, op0);
3971 if (MEM_P (op1) && MEM_VOLATILE_P (op1))
3972 op1 = force_reg (compute_mode, op1);
3973
3974 /* If we need the remainder or if OP1 is constant, we need to
3975 put OP0 in a register in case it has any queued subexpressions. */
3976 if (rem_flag || op1_is_constant)
3977 op0 = force_reg (compute_mode, op0);
3978
3979 last = get_last_insn ();
3980
3981 /* Promote floor rounding to trunc rounding for unsigned operations. */
3982 if (unsignedp)
3983 {
3984 if (code == FLOOR_DIV_EXPR)
3985 code = TRUNC_DIV_EXPR;
3986 if (code == FLOOR_MOD_EXPR)
3987 code = TRUNC_MOD_EXPR;
3988 if (code == EXACT_DIV_EXPR && op1_is_pow2)
3989 code = TRUNC_DIV_EXPR;
3990 }
3991
3992 if (op1 != const0_rtx)
3993 switch (code)
3994 {
3995 case TRUNC_MOD_EXPR:
3996 case TRUNC_DIV_EXPR:
3997 if (op1_is_constant)
3998 {
3999 if (unsignedp)
4000 {
4001 unsigned HOST_WIDE_INT mh, ml;
4002 int pre_shift, post_shift;
4003 int dummy;
4004 unsigned HOST_WIDE_INT d = (INTVAL (op1)
4005 & GET_MODE_MASK (compute_mode));
4006
4007 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4008 {
4009 pre_shift = floor_log2 (d);
4010 if (rem_flag)
4011 {
4012 remainder
4013 = expand_binop (compute_mode, and_optab, op0,
4014 GEN_INT (((HOST_WIDE_INT) 1 << pre_shift) - 1),
4015 remainder, 1,
4016 OPTAB_LIB_WIDEN);
4017 if (remainder)
4018 return gen_lowpart (mode, remainder);
4019 }
4020 quotient = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4021 pre_shift, tquotient, 1);
4022 }
4023 else if (size <= HOST_BITS_PER_WIDE_INT)
4024 {
4025 if (d >= ((unsigned HOST_WIDE_INT) 1 << (size - 1)))
4026 {
4027 /* Most significant bit of divisor is set; emit an scc
4028 insn. */
4029 quotient = emit_store_flag_force (tquotient, GEU, op0, op1,
4030 compute_mode, 1, 1);
4031 }
4032 else
4033 {
4034 /* Find a suitable multiplier and right shift count
4035 instead of multiplying with D. */
4036
4037 mh = choose_multiplier (d, size, size,
4038 &ml, &post_shift, &dummy);
4039
4040 /* If the suggested multiplier is more than SIZE bits,
4041 we can do better for even divisors, using an
4042 initial right shift. */
4043 if (mh != 0 && (d & 1) == 0)
4044 {
4045 pre_shift = floor_log2 (d & -d);
4046 mh = choose_multiplier (d >> pre_shift, size,
4047 size - pre_shift,
4048 &ml, &post_shift, &dummy);
4049 gcc_assert (!mh);
4050 }
4051 else
4052 pre_shift = 0;
4053
4054 if (mh != 0)
4055 {
4056 rtx t1, t2, t3, t4;
4057
4058 if (post_shift - 1 >= BITS_PER_WORD)
4059 goto fail1;
4060
4061 extra_cost
4062 = (shift_cost (speed, compute_mode, post_shift - 1)
4063 + shift_cost (speed, compute_mode, 1)
4064 + 2 * add_cost (speed, compute_mode));
4065 t1 = expmed_mult_highpart (compute_mode, op0,
4066 GEN_INT (ml),
4067 NULL_RTX, 1,
4068 max_cost - extra_cost);
4069 if (t1 == 0)
4070 goto fail1;
4071 t2 = force_operand (gen_rtx_MINUS (compute_mode,
4072 op0, t1),
4073 NULL_RTX);
4074 t3 = expand_shift (RSHIFT_EXPR, compute_mode,
4075 t2, 1, NULL_RTX, 1);
4076 t4 = force_operand (gen_rtx_PLUS (compute_mode,
4077 t1, t3),
4078 NULL_RTX);
4079 quotient = expand_shift
4080 (RSHIFT_EXPR, compute_mode, t4,
4081 post_shift - 1, tquotient, 1);
4082 }
4083 else
4084 {
4085 rtx t1, t2;
4086
4087 if (pre_shift >= BITS_PER_WORD
4088 || post_shift >= BITS_PER_WORD)
4089 goto fail1;
4090
4091 t1 = expand_shift
4092 (RSHIFT_EXPR, compute_mode, op0,
4093 pre_shift, NULL_RTX, 1);
4094 extra_cost
4095 = (shift_cost (speed, compute_mode, pre_shift)
4096 + shift_cost (speed, compute_mode, post_shift));
4097 t2 = expmed_mult_highpart (compute_mode, t1,
4098 GEN_INT (ml),
4099 NULL_RTX, 1,
4100 max_cost - extra_cost);
4101 if (t2 == 0)
4102 goto fail1;
4103 quotient = expand_shift
4104 (RSHIFT_EXPR, compute_mode, t2,
4105 post_shift, tquotient, 1);
4106 }
4107 }
4108 }
4109 else /* Too wide mode to use tricky code */
4110 break;
4111
4112 insn = get_last_insn ();
4113 if (insn != last)
4114 set_dst_reg_note (insn, REG_EQUAL,
4115 gen_rtx_UDIV (compute_mode, op0, op1),
4116 quotient);
4117 }
4118 else /* TRUNC_DIV, signed */
4119 {
4120 unsigned HOST_WIDE_INT ml;
4121 int lgup, post_shift;
4122 rtx mlr;
4123 HOST_WIDE_INT d = INTVAL (op1);
4124 unsigned HOST_WIDE_INT abs_d;
4125
4126 /* Since d might be INT_MIN, we have to cast to
4127 unsigned HOST_WIDE_INT before negating to avoid
4128 undefined signed overflow. */
4129 abs_d = (d >= 0
4130 ? (unsigned HOST_WIDE_INT) d
4131 : - (unsigned HOST_WIDE_INT) d);
4132
4133 /* n rem d = n rem -d */
4134 if (rem_flag && d < 0)
4135 {
4136 d = abs_d;
4137 op1 = gen_int_mode (abs_d, compute_mode);
4138 }
4139
4140 if (d == 1)
4141 quotient = op0;
4142 else if (d == -1)
4143 quotient = expand_unop (compute_mode, neg_optab, op0,
4144 tquotient, 0);
4145 else if (HOST_BITS_PER_WIDE_INT >= size
4146 && abs_d == (unsigned HOST_WIDE_INT) 1 << (size - 1))
4147 {
4148 /* This case is not handled correctly below. */
4149 quotient = emit_store_flag (tquotient, EQ, op0, op1,
4150 compute_mode, 1, 1);
4151 if (quotient == 0)
4152 goto fail1;
4153 }
4154 else if (EXACT_POWER_OF_2_OR_ZERO_P (d)
4155 && (rem_flag
4156 ? smod_pow2_cheap (speed, compute_mode)
4157 : sdiv_pow2_cheap (speed, compute_mode))
4158 /* We assume that cheap metric is true if the
4159 optab has an expander for this mode. */
4160 && ((optab_handler ((rem_flag ? smod_optab
4161 : sdiv_optab),
4162 compute_mode)
4163 != CODE_FOR_nothing)
4164 || (optab_handler (sdivmod_optab,
4165 compute_mode)
4166 != CODE_FOR_nothing)))
4167 ;
4168 else if (EXACT_POWER_OF_2_OR_ZERO_P (abs_d))
4169 {
4170 if (rem_flag)
4171 {
4172 remainder = expand_smod_pow2 (compute_mode, op0, d);
4173 if (remainder)
4174 return gen_lowpart (mode, remainder);
4175 }
4176
4177 if (sdiv_pow2_cheap (speed, compute_mode)
4178 && ((optab_handler (sdiv_optab, compute_mode)
4179 != CODE_FOR_nothing)
4180 || (optab_handler (sdivmod_optab, compute_mode)
4181 != CODE_FOR_nothing)))
4182 quotient = expand_divmod (0, TRUNC_DIV_EXPR,
4183 compute_mode, op0,
4184 gen_int_mode (abs_d,
4185 compute_mode),
4186 NULL_RTX, 0);
4187 else
4188 quotient = expand_sdiv_pow2 (compute_mode, op0, abs_d);
4189
4190 /* We have computed OP0 / abs(OP1). If OP1 is negative,
4191 negate the quotient. */
4192 if (d < 0)
4193 {
4194 insn = get_last_insn ();
4195 if (insn != last
4196 && abs_d < ((unsigned HOST_WIDE_INT) 1
4197 << (HOST_BITS_PER_WIDE_INT - 1)))
4198 set_dst_reg_note (insn, REG_EQUAL,
4199 gen_rtx_DIV (compute_mode, op0,
4200 gen_int_mode
4201 (abs_d,
4202 compute_mode)),
4203 quotient);
4204
4205 quotient = expand_unop (compute_mode, neg_optab,
4206 quotient, quotient, 0);
4207 }
4208 }
4209 else if (size <= HOST_BITS_PER_WIDE_INT)
4210 {
4211 choose_multiplier (abs_d, size, size - 1,
4212 &ml, &post_shift, &lgup);
4213 if (ml < (unsigned HOST_WIDE_INT) 1 << (size - 1))
4214 {
4215 rtx t1, t2, t3;
4216
4217 if (post_shift >= BITS_PER_WORD
4218 || size - 1 >= BITS_PER_WORD)
4219 goto fail1;
4220
4221 extra_cost = (shift_cost (speed, compute_mode, post_shift)
4222 + shift_cost (speed, compute_mode, size - 1)
4223 + add_cost (speed, compute_mode));
4224 t1 = expmed_mult_highpart (compute_mode, op0,
4225 GEN_INT (ml), NULL_RTX, 0,
4226 max_cost - extra_cost);
4227 if (t1 == 0)
4228 goto fail1;
4229 t2 = expand_shift
4230 (RSHIFT_EXPR, compute_mode, t1,
4231 post_shift, NULL_RTX, 0);
4232 t3 = expand_shift
4233 (RSHIFT_EXPR, compute_mode, op0,
4234 size - 1, NULL_RTX, 0);
4235 if (d < 0)
4236 quotient
4237 = force_operand (gen_rtx_MINUS (compute_mode,
4238 t3, t2),
4239 tquotient);
4240 else
4241 quotient
4242 = force_operand (gen_rtx_MINUS (compute_mode,
4243 t2, t3),
4244 tquotient);
4245 }
4246 else
4247 {
4248 rtx t1, t2, t3, t4;
4249
4250 if (post_shift >= BITS_PER_WORD
4251 || size - 1 >= BITS_PER_WORD)
4252 goto fail1;
4253
4254 ml |= (~(unsigned HOST_WIDE_INT) 0) << (size - 1);
4255 mlr = gen_int_mode (ml, compute_mode);
4256 extra_cost = (shift_cost (speed, compute_mode, post_shift)
4257 + shift_cost (speed, compute_mode, size - 1)
4258 + 2 * add_cost (speed, compute_mode));
4259 t1 = expmed_mult_highpart (compute_mode, op0, mlr,
4260 NULL_RTX, 0,
4261 max_cost - extra_cost);
4262 if (t1 == 0)
4263 goto fail1;
4264 t2 = force_operand (gen_rtx_PLUS (compute_mode,
4265 t1, op0),
4266 NULL_RTX);
4267 t3 = expand_shift
4268 (RSHIFT_EXPR, compute_mode, t2,
4269 post_shift, NULL_RTX, 0);
4270 t4 = expand_shift
4271 (RSHIFT_EXPR, compute_mode, op0,
4272 size - 1, NULL_RTX, 0);
4273 if (d < 0)
4274 quotient
4275 = force_operand (gen_rtx_MINUS (compute_mode,
4276 t4, t3),
4277 tquotient);
4278 else
4279 quotient
4280 = force_operand (gen_rtx_MINUS (compute_mode,
4281 t3, t4),
4282 tquotient);
4283 }
4284 }
4285 else /* Too wide mode to use tricky code */
4286 break;
4287
4288 insn = get_last_insn ();
4289 if (insn != last)
4290 set_dst_reg_note (insn, REG_EQUAL,
4291 gen_rtx_DIV (compute_mode, op0, op1),
4292 quotient);
4293 }
4294 break;
4295 }
4296 fail1:
4297 delete_insns_since (last);
4298 break;
4299
4300 case FLOOR_DIV_EXPR:
4301 case FLOOR_MOD_EXPR:
4302 /* We will come here only for signed operations. */
4303 if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4304 {
4305 unsigned HOST_WIDE_INT mh, ml;
4306 int pre_shift, lgup, post_shift;
4307 HOST_WIDE_INT d = INTVAL (op1);
4308
4309 if (d > 0)
4310 {
4311 /* We could just as easily deal with negative constants here,
4312 but it does not seem worth the trouble for GCC 2.6. */
4313 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4314 {
4315 pre_shift = floor_log2 (d);
4316 if (rem_flag)
4317 {
4318 remainder = expand_binop (compute_mode, and_optab, op0,
4319 GEN_INT (((HOST_WIDE_INT) 1 << pre_shift) - 1),
4320 remainder, 0, OPTAB_LIB_WIDEN);
4321 if (remainder)
4322 return gen_lowpart (mode, remainder);
4323 }
4324 quotient = expand_shift
4325 (RSHIFT_EXPR, compute_mode, op0,
4326 pre_shift, tquotient, 0);
4327 }
4328 else
4329 {
4330 rtx t1, t2, t3, t4;
4331
4332 mh = choose_multiplier (d, size, size - 1,
4333 &ml, &post_shift, &lgup);
4334 gcc_assert (!mh);
4335
4336 if (post_shift < BITS_PER_WORD
4337 && size - 1 < BITS_PER_WORD)
4338 {
4339 t1 = expand_shift
4340 (RSHIFT_EXPR, compute_mode, op0,
4341 size - 1, NULL_RTX, 0);
4342 t2 = expand_binop (compute_mode, xor_optab, op0, t1,
4343 NULL_RTX, 0, OPTAB_WIDEN);
4344 extra_cost = (shift_cost (speed, compute_mode, post_shift)
4345 + shift_cost (speed, compute_mode, size - 1)
4346 + 2 * add_cost (speed, compute_mode));
4347 t3 = expmed_mult_highpart (compute_mode, t2,
4348 GEN_INT (ml), NULL_RTX, 1,
4349 max_cost - extra_cost);
4350 if (t3 != 0)
4351 {
4352 t4 = expand_shift
4353 (RSHIFT_EXPR, compute_mode, t3,
4354 post_shift, NULL_RTX, 1);
4355 quotient = expand_binop (compute_mode, xor_optab,
4356 t4, t1, tquotient, 0,
4357 OPTAB_WIDEN);
4358 }
4359 }
4360 }
4361 }
4362 else
4363 {
4364 rtx nsign, t1, t2, t3, t4;
4365 t1 = force_operand (gen_rtx_PLUS (compute_mode,
4366 op0, constm1_rtx), NULL_RTX);
4367 t2 = expand_binop (compute_mode, ior_optab, op0, t1, NULL_RTX,
4368 0, OPTAB_WIDEN);
4369 nsign = expand_shift
4370 (RSHIFT_EXPR, compute_mode, t2,
4371 size - 1, NULL_RTX, 0);
4372 t3 = force_operand (gen_rtx_MINUS (compute_mode, t1, nsign),
4373 NULL_RTX);
4374 t4 = expand_divmod (0, TRUNC_DIV_EXPR, compute_mode, t3, op1,
4375 NULL_RTX, 0);
4376 if (t4)
4377 {
4378 rtx t5;
4379 t5 = expand_unop (compute_mode, one_cmpl_optab, nsign,
4380 NULL_RTX, 0);
4381 quotient = force_operand (gen_rtx_PLUS (compute_mode,
4382 t4, t5),
4383 tquotient);
4384 }
4385 }
4386 }
4387
4388 if (quotient != 0)
4389 break;
4390 delete_insns_since (last);
4391
4392 /* Try using an instruction that produces both the quotient and
4393 remainder, using truncation. We can easily compensate the quotient
4394 or remainder to get floor rounding, once we have the remainder.
4395 Notice that we compute also the final remainder value here,
4396 and return the result right away. */
4397 if (target == 0 || GET_MODE (target) != compute_mode)
4398 target = gen_reg_rtx (compute_mode);
4399
4400 if (rem_flag)
4401 {
4402 remainder
4403 = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4404 quotient = gen_reg_rtx (compute_mode);
4405 }
4406 else
4407 {
4408 quotient
4409 = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4410 remainder = gen_reg_rtx (compute_mode);
4411 }
4412
4413 if (expand_twoval_binop (sdivmod_optab, op0, op1,
4414 quotient, remainder, 0))
4415 {
4416 /* This could be computed with a branch-less sequence.
4417 Save that for later. */
4418 rtx tem;
4419 rtx label = gen_label_rtx ();
4420 do_cmp_and_jump (remainder, const0_rtx, EQ, compute_mode, label);
4421 tem = expand_binop (compute_mode, xor_optab, op0, op1,
4422 NULL_RTX, 0, OPTAB_WIDEN);
4423 do_cmp_and_jump (tem, const0_rtx, GE, compute_mode, label);
4424 expand_dec (quotient, const1_rtx);
4425 expand_inc (remainder, op1);
4426 emit_label (label);
4427 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4428 }
4429
4430 /* No luck with division elimination or divmod. Have to do it
4431 by conditionally adjusting op0 *and* the result. */
4432 {
4433 rtx label1, label2, label3, label4, label5;
4434 rtx adjusted_op0;
4435 rtx tem;
4436
4437 quotient = gen_reg_rtx (compute_mode);
4438 adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4439 label1 = gen_label_rtx ();
4440 label2 = gen_label_rtx ();
4441 label3 = gen_label_rtx ();
4442 label4 = gen_label_rtx ();
4443 label5 = gen_label_rtx ();
4444 do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4445 do_cmp_and_jump (adjusted_op0, const0_rtx, LT, compute_mode, label1);
4446 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4447 quotient, 0, OPTAB_LIB_WIDEN);
4448 if (tem != quotient)
4449 emit_move_insn (quotient, tem);
4450 emit_jump_insn (gen_jump (label5));
4451 emit_barrier ();
4452 emit_label (label1);
4453 expand_inc (adjusted_op0, const1_rtx);
4454 emit_jump_insn (gen_jump (label4));
4455 emit_barrier ();
4456 emit_label (label2);
4457 do_cmp_and_jump (adjusted_op0, const0_rtx, GT, compute_mode, label3);
4458 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4459 quotient, 0, OPTAB_LIB_WIDEN);
4460 if (tem != quotient)
4461 emit_move_insn (quotient, tem);
4462 emit_jump_insn (gen_jump (label5));
4463 emit_barrier ();
4464 emit_label (label3);
4465 expand_dec (adjusted_op0, const1_rtx);
4466 emit_label (label4);
4467 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4468 quotient, 0, OPTAB_LIB_WIDEN);
4469 if (tem != quotient)
4470 emit_move_insn (quotient, tem);
4471 expand_dec (quotient, const1_rtx);
4472 emit_label (label5);
4473 }
4474 break;
4475
4476 case CEIL_DIV_EXPR:
4477 case CEIL_MOD_EXPR:
4478 if (unsignedp)
4479 {
4480 if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1)))
4481 {
4482 rtx t1, t2, t3;
4483 unsigned HOST_WIDE_INT d = INTVAL (op1);
4484 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4485 floor_log2 (d), tquotient, 1);
4486 t2 = expand_binop (compute_mode, and_optab, op0,
4487 GEN_INT (d - 1),
4488 NULL_RTX, 1, OPTAB_LIB_WIDEN);
4489 t3 = gen_reg_rtx (compute_mode);
4490 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4491 compute_mode, 1, 1);
4492 if (t3 == 0)
4493 {
4494 rtx lab;
4495 lab = gen_label_rtx ();
4496 do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4497 expand_inc (t1, const1_rtx);
4498 emit_label (lab);
4499 quotient = t1;
4500 }
4501 else
4502 quotient = force_operand (gen_rtx_PLUS (compute_mode,
4503 t1, t3),
4504 tquotient);
4505 break;
4506 }
4507
4508 /* Try using an instruction that produces both the quotient and
4509 remainder, using truncation. We can easily compensate the
4510 quotient or remainder to get ceiling rounding, once we have the
4511 remainder. Notice that we compute also the final remainder
4512 value here, and return the result right away. */
4513 if (target == 0 || GET_MODE (target) != compute_mode)
4514 target = gen_reg_rtx (compute_mode);
4515
4516 if (rem_flag)
4517 {
4518 remainder = (REG_P (target)
4519 ? target : gen_reg_rtx (compute_mode));
4520 quotient = gen_reg_rtx (compute_mode);
4521 }
4522 else
4523 {
4524 quotient = (REG_P (target)
4525 ? target : gen_reg_rtx (compute_mode));
4526 remainder = gen_reg_rtx (compute_mode);
4527 }
4528
4529 if (expand_twoval_binop (udivmod_optab, op0, op1, quotient,
4530 remainder, 1))
4531 {
4532 /* This could be computed with a branch-less sequence.
4533 Save that for later. */
4534 rtx label = gen_label_rtx ();
4535 do_cmp_and_jump (remainder, const0_rtx, EQ,
4536 compute_mode, label);
4537 expand_inc (quotient, const1_rtx);
4538 expand_dec (remainder, op1);
4539 emit_label (label);
4540 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4541 }
4542
4543 /* No luck with division elimination or divmod. Have to do it
4544 by conditionally adjusting op0 *and* the result. */
4545 {
4546 rtx label1, label2;
4547 rtx adjusted_op0, tem;
4548
4549 quotient = gen_reg_rtx (compute_mode);
4550 adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4551 label1 = gen_label_rtx ();
4552 label2 = gen_label_rtx ();
4553 do_cmp_and_jump (adjusted_op0, const0_rtx, NE,
4554 compute_mode, label1);
4555 emit_move_insn (quotient, const0_rtx);
4556 emit_jump_insn (gen_jump (label2));
4557 emit_barrier ();
4558 emit_label (label1);
4559 expand_dec (adjusted_op0, const1_rtx);
4560 tem = expand_binop (compute_mode, udiv_optab, adjusted_op0, op1,
4561 quotient, 1, OPTAB_LIB_WIDEN);
4562 if (tem != quotient)
4563 emit_move_insn (quotient, tem);
4564 expand_inc (quotient, const1_rtx);
4565 emit_label (label2);
4566 }
4567 }
4568 else /* signed */
4569 {
4570 if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4571 && INTVAL (op1) >= 0)
4572 {
4573 /* This is extremely similar to the code for the unsigned case
4574 above. For 2.7 we should merge these variants, but for
4575 2.6.1 I don't want to touch the code for unsigned since that
4576 get used in C. The signed case will only be used by other
4577 languages (Ada). */
4578
4579 rtx t1, t2, t3;
4580 unsigned HOST_WIDE_INT d = INTVAL (op1);
4581 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4582 floor_log2 (d), tquotient, 0);
4583 t2 = expand_binop (compute_mode, and_optab, op0,
4584 GEN_INT (d - 1),
4585 NULL_RTX, 1, OPTAB_LIB_WIDEN);
4586 t3 = gen_reg_rtx (compute_mode);
4587 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4588 compute_mode, 1, 1);
4589 if (t3 == 0)
4590 {
4591 rtx lab;
4592 lab = gen_label_rtx ();
4593 do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4594 expand_inc (t1, const1_rtx);
4595 emit_label (lab);
4596 quotient = t1;
4597 }
4598 else
4599 quotient = force_operand (gen_rtx_PLUS (compute_mode,
4600 t1, t3),
4601 tquotient);
4602 break;
4603 }
4604
4605 /* Try using an instruction that produces both the quotient and
4606 remainder, using truncation. We can easily compensate the
4607 quotient or remainder to get ceiling rounding, once we have the
4608 remainder. Notice that we compute also the final remainder
4609 value here, and return the result right away. */
4610 if (target == 0 || GET_MODE (target) != compute_mode)
4611 target = gen_reg_rtx (compute_mode);
4612 if (rem_flag)
4613 {
4614 remainder= (REG_P (target)
4615 ? target : gen_reg_rtx (compute_mode));
4616 quotient = gen_reg_rtx (compute_mode);
4617 }
4618 else
4619 {
4620 quotient = (REG_P (target)
4621 ? target : gen_reg_rtx (compute_mode));
4622 remainder = gen_reg_rtx (compute_mode);
4623 }
4624
4625 if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient,
4626 remainder, 0))
4627 {
4628 /* This could be computed with a branch-less sequence.
4629 Save that for later. */
4630 rtx tem;
4631 rtx label = gen_label_rtx ();
4632 do_cmp_and_jump (remainder, const0_rtx, EQ,
4633 compute_mode, label);
4634 tem = expand_binop (compute_mode, xor_optab, op0, op1,
4635 NULL_RTX, 0, OPTAB_WIDEN);
4636 do_cmp_and_jump (tem, const0_rtx, LT, compute_mode, label);
4637 expand_inc (quotient, const1_rtx);
4638 expand_dec (remainder, op1);
4639 emit_label (label);
4640 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4641 }
4642
4643 /* No luck with division elimination or divmod. Have to do it
4644 by conditionally adjusting op0 *and* the result. */
4645 {
4646 rtx label1, label2, label3, label4, label5;
4647 rtx adjusted_op0;
4648 rtx tem;
4649
4650 quotient = gen_reg_rtx (compute_mode);
4651 adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4652 label1 = gen_label_rtx ();
4653 label2 = gen_label_rtx ();
4654 label3 = gen_label_rtx ();
4655 label4 = gen_label_rtx ();
4656 label5 = gen_label_rtx ();
4657 do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4658 do_cmp_and_jump (adjusted_op0, const0_rtx, GT,
4659 compute_mode, label1);
4660 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4661 quotient, 0, OPTAB_LIB_WIDEN);
4662 if (tem != quotient)
4663 emit_move_insn (quotient, tem);
4664 emit_jump_insn (gen_jump (label5));
4665 emit_barrier ();
4666 emit_label (label1);
4667 expand_dec (adjusted_op0, const1_rtx);
4668 emit_jump_insn (gen_jump (label4));
4669 emit_barrier ();
4670 emit_label (label2);
4671 do_cmp_and_jump (adjusted_op0, const0_rtx, LT,
4672 compute_mode, label3);
4673 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4674 quotient, 0, OPTAB_LIB_WIDEN);
4675 if (tem != quotient)
4676 emit_move_insn (quotient, tem);
4677 emit_jump_insn (gen_jump (label5));
4678 emit_barrier ();
4679 emit_label (label3);
4680 expand_inc (adjusted_op0, const1_rtx);
4681 emit_label (label4);
4682 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4683 quotient, 0, OPTAB_LIB_WIDEN);
4684 if (tem != quotient)
4685 emit_move_insn (quotient, tem);
4686 expand_inc (quotient, const1_rtx);
4687 emit_label (label5);
4688 }
4689 }
4690 break;
4691
4692 case EXACT_DIV_EXPR:
4693 if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4694 {
4695 HOST_WIDE_INT d = INTVAL (op1);
4696 unsigned HOST_WIDE_INT ml;
4697 int pre_shift;
4698 rtx t1;
4699
4700 pre_shift = floor_log2 (d & -d);
4701 ml = invert_mod2n (d >> pre_shift, size);
4702 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4703 pre_shift, NULL_RTX, unsignedp);
4704 quotient = expand_mult (compute_mode, t1,
4705 gen_int_mode (ml, compute_mode),
4706 NULL_RTX, 1);
4707
4708 insn = get_last_insn ();
4709 set_dst_reg_note (insn, REG_EQUAL,
4710 gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
4711 compute_mode, op0, op1),
4712 quotient);
4713 }
4714 break;
4715
4716 case ROUND_DIV_EXPR:
4717 case ROUND_MOD_EXPR:
4718 if (unsignedp)
4719 {
4720 rtx tem;
4721 rtx label;
4722 label = gen_label_rtx ();
4723 quotient = gen_reg_rtx (compute_mode);
4724 remainder = gen_reg_rtx (compute_mode);
4725 if (expand_twoval_binop (udivmod_optab, op0, op1, quotient, remainder, 1) == 0)
4726 {
4727 rtx tem;
4728 quotient = expand_binop (compute_mode, udiv_optab, op0, op1,
4729 quotient, 1, OPTAB_LIB_WIDEN);
4730 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 1);
4731 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4732 remainder, 1, OPTAB_LIB_WIDEN);
4733 }
4734 tem = plus_constant (compute_mode, op1, -1);
4735 tem = expand_shift (RSHIFT_EXPR, compute_mode, tem, 1, NULL_RTX, 1);
4736 do_cmp_and_jump (remainder, tem, LEU, compute_mode, label);
4737 expand_inc (quotient, const1_rtx);
4738 expand_dec (remainder, op1);
4739 emit_label (label);
4740 }
4741 else
4742 {
4743 rtx abs_rem, abs_op1, tem, mask;
4744 rtx label;
4745 label = gen_label_rtx ();
4746 quotient = gen_reg_rtx (compute_mode);
4747 remainder = gen_reg_rtx (compute_mode);
4748 if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient, remainder, 0) == 0)
4749 {
4750 rtx tem;
4751 quotient = expand_binop (compute_mode, sdiv_optab, op0, op1,
4752 quotient, 0, OPTAB_LIB_WIDEN);
4753 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 0);
4754 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4755 remainder, 0, OPTAB_LIB_WIDEN);
4756 }
4757 abs_rem = expand_abs (compute_mode, remainder, NULL_RTX, 1, 0);
4758 abs_op1 = expand_abs (compute_mode, op1, NULL_RTX, 1, 0);
4759 tem = expand_shift (LSHIFT_EXPR, compute_mode, abs_rem,
4760 1, NULL_RTX, 1);
4761 do_cmp_and_jump (tem, abs_op1, LTU, compute_mode, label);
4762 tem = expand_binop (compute_mode, xor_optab, op0, op1,
4763 NULL_RTX, 0, OPTAB_WIDEN);
4764 mask = expand_shift (RSHIFT_EXPR, compute_mode, tem,
4765 size - 1, NULL_RTX, 0);
4766 tem = expand_binop (compute_mode, xor_optab, mask, const1_rtx,
4767 NULL_RTX, 0, OPTAB_WIDEN);
4768 tem = expand_binop (compute_mode, sub_optab, tem, mask,
4769 NULL_RTX, 0, OPTAB_WIDEN);
4770 expand_inc (quotient, tem);
4771 tem = expand_binop (compute_mode, xor_optab, mask, op1,
4772 NULL_RTX, 0, OPTAB_WIDEN);
4773 tem = expand_binop (compute_mode, sub_optab, tem, mask,
4774 NULL_RTX, 0, OPTAB_WIDEN);
4775 expand_dec (remainder, tem);
4776 emit_label (label);
4777 }
4778 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4779
4780 default:
4781 gcc_unreachable ();
4782 }
4783
4784 if (quotient == 0)
4785 {
4786 if (target && GET_MODE (target) != compute_mode)
4787 target = 0;
4788
4789 if (rem_flag)
4790 {
4791 /* Try to produce the remainder without producing the quotient.
4792 If we seem to have a divmod pattern that does not require widening,
4793 don't try widening here. We should really have a WIDEN argument
4794 to expand_twoval_binop, since what we'd really like to do here is
4795 1) try a mod insn in compute_mode
4796 2) try a divmod insn in compute_mode
4797 3) try a div insn in compute_mode and multiply-subtract to get
4798 remainder
4799 4) try the same things with widening allowed. */
4800 remainder
4801 = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4802 op0, op1, target,
4803 unsignedp,
4804 ((optab_handler (optab2, compute_mode)
4805 != CODE_FOR_nothing)
4806 ? OPTAB_DIRECT : OPTAB_WIDEN));
4807 if (remainder == 0)
4808 {
4809 /* No luck there. Can we do remainder and divide at once
4810 without a library call? */
4811 remainder = gen_reg_rtx (compute_mode);
4812 if (! expand_twoval_binop ((unsignedp
4813 ? udivmod_optab
4814 : sdivmod_optab),
4815 op0, op1,
4816 NULL_RTX, remainder, unsignedp))
4817 remainder = 0;
4818 }
4819
4820 if (remainder)
4821 return gen_lowpart (mode, remainder);
4822 }
4823
4824 /* Produce the quotient. Try a quotient insn, but not a library call.
4825 If we have a divmod in this mode, use it in preference to widening
4826 the div (for this test we assume it will not fail). Note that optab2
4827 is set to the one of the two optabs that the call below will use. */
4828 quotient
4829 = sign_expand_binop (compute_mode, udiv_optab, sdiv_optab,
4830 op0, op1, rem_flag ? NULL_RTX : target,
4831 unsignedp,
4832 ((optab_handler (optab2, compute_mode)
4833 != CODE_FOR_nothing)
4834 ? OPTAB_DIRECT : OPTAB_WIDEN));
4835
4836 if (quotient == 0)
4837 {
4838 /* No luck there. Try a quotient-and-remainder insn,
4839 keeping the quotient alone. */
4840 quotient = gen_reg_rtx (compute_mode);
4841 if (! expand_twoval_binop (unsignedp ? udivmod_optab : sdivmod_optab,
4842 op0, op1,
4843 quotient, NULL_RTX, unsignedp))
4844 {
4845 quotient = 0;
4846 if (! rem_flag)
4847 /* Still no luck. If we are not computing the remainder,
4848 use a library call for the quotient. */
4849 quotient = sign_expand_binop (compute_mode,
4850 udiv_optab, sdiv_optab,
4851 op0, op1, target,
4852 unsignedp, OPTAB_LIB_WIDEN);
4853 }
4854 }
4855 }
4856
4857 if (rem_flag)
4858 {
4859 if (target && GET_MODE (target) != compute_mode)
4860 target = 0;
4861
4862 if (quotient == 0)
4863 {
4864 /* No divide instruction either. Use library for remainder. */
4865 remainder = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4866 op0, op1, target,
4867 unsignedp, OPTAB_LIB_WIDEN);
4868 /* No remainder function. Try a quotient-and-remainder
4869 function, keeping the remainder. */
4870 if (!remainder)
4871 {
4872 remainder = gen_reg_rtx (compute_mode);
4873 if (!expand_twoval_binop_libfunc
4874 (unsignedp ? udivmod_optab : sdivmod_optab,
4875 op0, op1,
4876 NULL_RTX, remainder,
4877 unsignedp ? UMOD : MOD))
4878 remainder = NULL_RTX;
4879 }
4880 }
4881 else
4882 {
4883 /* We divided. Now finish doing X - Y * (X / Y). */
4884 remainder = expand_mult (compute_mode, quotient, op1,
4885 NULL_RTX, unsignedp);
4886 remainder = expand_binop (compute_mode, sub_optab, op0,
4887 remainder, target, unsignedp,
4888 OPTAB_LIB_WIDEN);
4889 }
4890 }
4891
4892 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4893 }
4894 \f
4895 /* Return a tree node with data type TYPE, describing the value of X.
4896 Usually this is an VAR_DECL, if there is no obvious better choice.
4897 X may be an expression, however we only support those expressions
4898 generated by loop.c. */
4899
4900 tree
4901 make_tree (tree type, rtx x)
4902 {
4903 tree t;
4904
4905 switch (GET_CODE (x))
4906 {
4907 case CONST_INT:
4908 {
4909 HOST_WIDE_INT hi = 0;
4910
4911 if (INTVAL (x) < 0
4912 && !(TYPE_UNSIGNED (type)
4913 && (GET_MODE_BITSIZE (TYPE_MODE (type))
4914 < HOST_BITS_PER_WIDE_INT)))
4915 hi = -1;
4916
4917 t = build_int_cst_wide (type, INTVAL (x), hi);
4918
4919 return t;
4920 }
4921
4922 case CONST_DOUBLE:
4923 if (GET_MODE (x) == VOIDmode)
4924 t = build_int_cst_wide (type,
4925 CONST_DOUBLE_LOW (x), CONST_DOUBLE_HIGH (x));
4926 else
4927 {
4928 REAL_VALUE_TYPE d;
4929
4930 REAL_VALUE_FROM_CONST_DOUBLE (d, x);
4931 t = build_real (type, d);
4932 }
4933
4934 return t;
4935
4936 case CONST_VECTOR:
4937 {
4938 int units = CONST_VECTOR_NUNITS (x);
4939 tree itype = TREE_TYPE (type);
4940 tree *elts;
4941 int i;
4942
4943 /* Build a tree with vector elements. */
4944 elts = XALLOCAVEC (tree, units);
4945 for (i = units - 1; i >= 0; --i)
4946 {
4947 rtx elt = CONST_VECTOR_ELT (x, i);
4948 elts[i] = make_tree (itype, elt);
4949 }
4950
4951 return build_vector (type, elts);
4952 }
4953
4954 case PLUS:
4955 return fold_build2 (PLUS_EXPR, type, make_tree (type, XEXP (x, 0)),
4956 make_tree (type, XEXP (x, 1)));
4957
4958 case MINUS:
4959 return fold_build2 (MINUS_EXPR, type, make_tree (type, XEXP (x, 0)),
4960 make_tree (type, XEXP (x, 1)));
4961
4962 case NEG:
4963 return fold_build1 (NEGATE_EXPR, type, make_tree (type, XEXP (x, 0)));
4964
4965 case MULT:
4966 return fold_build2 (MULT_EXPR, type, make_tree (type, XEXP (x, 0)),
4967 make_tree (type, XEXP (x, 1)));
4968
4969 case ASHIFT:
4970 return fold_build2 (LSHIFT_EXPR, type, make_tree (type, XEXP (x, 0)),
4971 make_tree (type, XEXP (x, 1)));
4972
4973 case LSHIFTRT:
4974 t = unsigned_type_for (type);
4975 return fold_convert (type, build2 (RSHIFT_EXPR, t,
4976 make_tree (t, XEXP (x, 0)),
4977 make_tree (type, XEXP (x, 1))));
4978
4979 case ASHIFTRT:
4980 t = signed_type_for (type);
4981 return fold_convert (type, build2 (RSHIFT_EXPR, t,
4982 make_tree (t, XEXP (x, 0)),
4983 make_tree (type, XEXP (x, 1))));
4984
4985 case DIV:
4986 if (TREE_CODE (type) != REAL_TYPE)
4987 t = signed_type_for (type);
4988 else
4989 t = type;
4990
4991 return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
4992 make_tree (t, XEXP (x, 0)),
4993 make_tree (t, XEXP (x, 1))));
4994 case UDIV:
4995 t = unsigned_type_for (type);
4996 return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
4997 make_tree (t, XEXP (x, 0)),
4998 make_tree (t, XEXP (x, 1))));
4999
5000 case SIGN_EXTEND:
5001 case ZERO_EXTEND:
5002 t = lang_hooks.types.type_for_mode (GET_MODE (XEXP (x, 0)),
5003 GET_CODE (x) == ZERO_EXTEND);
5004 return fold_convert (type, make_tree (t, XEXP (x, 0)));
5005
5006 case CONST:
5007 return make_tree (type, XEXP (x, 0));
5008
5009 case SYMBOL_REF:
5010 t = SYMBOL_REF_DECL (x);
5011 if (t)
5012 return fold_convert (type, build_fold_addr_expr (t));
5013 /* else fall through. */
5014
5015 default:
5016 t = build_decl (RTL_LOCATION (x), VAR_DECL, NULL_TREE, type);
5017
5018 /* If TYPE is a POINTER_TYPE, we might need to convert X from
5019 address mode to pointer mode. */
5020 if (POINTER_TYPE_P (type))
5021 x = convert_memory_address_addr_space
5022 (TYPE_MODE (type), x, TYPE_ADDR_SPACE (TREE_TYPE (type)));
5023
5024 /* Note that we do *not* use SET_DECL_RTL here, because we do not
5025 want set_decl_rtl to go adjusting REG_ATTRS for this temporary. */
5026 t->decl_with_rtl.rtl = x;
5027
5028 return t;
5029 }
5030 }
5031 \f
5032 /* Compute the logical-and of OP0 and OP1, storing it in TARGET
5033 and returning TARGET.
5034
5035 If TARGET is 0, a pseudo-register or constant is returned. */
5036
5037 rtx
5038 expand_and (enum machine_mode mode, rtx op0, rtx op1, rtx target)
5039 {
5040 rtx tem = 0;
5041
5042 if (GET_MODE (op0) == VOIDmode && GET_MODE (op1) == VOIDmode)
5043 tem = simplify_binary_operation (AND, mode, op0, op1);
5044 if (tem == 0)
5045 tem = expand_binop (mode, and_optab, op0, op1, target, 0, OPTAB_LIB_WIDEN);
5046
5047 if (target == 0)
5048 target = tem;
5049 else if (tem != target)
5050 emit_move_insn (target, tem);
5051 return target;
5052 }
5053
5054 /* Helper function for emit_store_flag. */
5055 static rtx
5056 emit_cstore (rtx target, enum insn_code icode, enum rtx_code code,
5057 enum machine_mode mode, enum machine_mode compare_mode,
5058 int unsignedp, rtx x, rtx y, int normalizep,
5059 enum machine_mode target_mode)
5060 {
5061 struct expand_operand ops[4];
5062 rtx op0, last, comparison, subtarget;
5063 enum machine_mode result_mode = insn_data[(int) icode].operand[0].mode;
5064
5065 last = get_last_insn ();
5066 x = prepare_operand (icode, x, 2, mode, compare_mode, unsignedp);
5067 y = prepare_operand (icode, y, 3, mode, compare_mode, unsignedp);
5068 if (!x || !y)
5069 {
5070 delete_insns_since (last);
5071 return NULL_RTX;
5072 }
5073
5074 if (target_mode == VOIDmode)
5075 target_mode = result_mode;
5076 if (!target)
5077 target = gen_reg_rtx (target_mode);
5078
5079 comparison = gen_rtx_fmt_ee (code, result_mode, x, y);
5080
5081 create_output_operand (&ops[0], optimize ? NULL_RTX : target, result_mode);
5082 create_fixed_operand (&ops[1], comparison);
5083 create_fixed_operand (&ops[2], x);
5084 create_fixed_operand (&ops[3], y);
5085 if (!maybe_expand_insn (icode, 4, ops))
5086 {
5087 delete_insns_since (last);
5088 return NULL_RTX;
5089 }
5090 subtarget = ops[0].value;
5091
5092 /* If we are converting to a wider mode, first convert to
5093 TARGET_MODE, then normalize. This produces better combining
5094 opportunities on machines that have a SIGN_EXTRACT when we are
5095 testing a single bit. This mostly benefits the 68k.
5096
5097 If STORE_FLAG_VALUE does not have the sign bit set when
5098 interpreted in MODE, we can do this conversion as unsigned, which
5099 is usually more efficient. */
5100 if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (result_mode))
5101 {
5102 convert_move (target, subtarget,
5103 val_signbit_known_clear_p (result_mode,
5104 STORE_FLAG_VALUE));
5105 op0 = target;
5106 result_mode = target_mode;
5107 }
5108 else
5109 op0 = subtarget;
5110
5111 /* If we want to keep subexpressions around, don't reuse our last
5112 target. */
5113 if (optimize)
5114 subtarget = 0;
5115
5116 /* Now normalize to the proper value in MODE. Sometimes we don't
5117 have to do anything. */
5118 if (normalizep == 0 || normalizep == STORE_FLAG_VALUE)
5119 ;
5120 /* STORE_FLAG_VALUE might be the most negative number, so write
5121 the comparison this way to avoid a compiler-time warning. */
5122 else if (- normalizep == STORE_FLAG_VALUE)
5123 op0 = expand_unop (result_mode, neg_optab, op0, subtarget, 0);
5124
5125 /* We don't want to use STORE_FLAG_VALUE < 0 below since this makes
5126 it hard to use a value of just the sign bit due to ANSI integer
5127 constant typing rules. */
5128 else if (val_signbit_known_set_p (result_mode, STORE_FLAG_VALUE))
5129 op0 = expand_shift (RSHIFT_EXPR, result_mode, op0,
5130 GET_MODE_BITSIZE (result_mode) - 1, subtarget,
5131 normalizep == 1);
5132 else
5133 {
5134 gcc_assert (STORE_FLAG_VALUE & 1);
5135
5136 op0 = expand_and (result_mode, op0, const1_rtx, subtarget);
5137 if (normalizep == -1)
5138 op0 = expand_unop (result_mode, neg_optab, op0, op0, 0);
5139 }
5140
5141 /* If we were converting to a smaller mode, do the conversion now. */
5142 if (target_mode != result_mode)
5143 {
5144 convert_move (target, op0, 0);
5145 return target;
5146 }
5147 else
5148 return op0;
5149 }
5150
5151
5152 /* A subroutine of emit_store_flag only including "tricks" that do not
5153 need a recursive call. These are kept separate to avoid infinite
5154 loops. */
5155
5156 static rtx
5157 emit_store_flag_1 (rtx target, enum rtx_code code, rtx op0, rtx op1,
5158 enum machine_mode mode, int unsignedp, int normalizep,
5159 enum machine_mode target_mode)
5160 {
5161 rtx subtarget;
5162 enum insn_code icode;
5163 enum machine_mode compare_mode;
5164 enum mode_class mclass;
5165 enum rtx_code scode;
5166 rtx tem;
5167
5168 if (unsignedp)
5169 code = unsigned_condition (code);
5170 scode = swap_condition (code);
5171
5172 /* If one operand is constant, make it the second one. Only do this
5173 if the other operand is not constant as well. */
5174
5175 if (swap_commutative_operands_p (op0, op1))
5176 {
5177 tem = op0;
5178 op0 = op1;
5179 op1 = tem;
5180 code = swap_condition (code);
5181 }
5182
5183 if (mode == VOIDmode)
5184 mode = GET_MODE (op0);
5185
5186 /* For some comparisons with 1 and -1, we can convert this to
5187 comparisons with zero. This will often produce more opportunities for
5188 store-flag insns. */
5189
5190 switch (code)
5191 {
5192 case LT:
5193 if (op1 == const1_rtx)
5194 op1 = const0_rtx, code = LE;
5195 break;
5196 case LE:
5197 if (op1 == constm1_rtx)
5198 op1 = const0_rtx, code = LT;
5199 break;
5200 case GE:
5201 if (op1 == const1_rtx)
5202 op1 = const0_rtx, code = GT;
5203 break;
5204 case GT:
5205 if (op1 == constm1_rtx)
5206 op1 = const0_rtx, code = GE;
5207 break;
5208 case GEU:
5209 if (op1 == const1_rtx)
5210 op1 = const0_rtx, code = NE;
5211 break;
5212 case LTU:
5213 if (op1 == const1_rtx)
5214 op1 = const0_rtx, code = EQ;
5215 break;
5216 default:
5217 break;
5218 }
5219
5220 /* If we are comparing a double-word integer with zero or -1, we can
5221 convert the comparison into one involving a single word. */
5222 if (GET_MODE_BITSIZE (mode) == BITS_PER_WORD * 2
5223 && GET_MODE_CLASS (mode) == MODE_INT
5224 && (!MEM_P (op0) || ! MEM_VOLATILE_P (op0)))
5225 {
5226 if ((code == EQ || code == NE)
5227 && (op1 == const0_rtx || op1 == constm1_rtx))
5228 {
5229 rtx op00, op01;
5230
5231 /* Do a logical OR or AND of the two words and compare the
5232 result. */
5233 op00 = simplify_gen_subreg (word_mode, op0, mode, 0);
5234 op01 = simplify_gen_subreg (word_mode, op0, mode, UNITS_PER_WORD);
5235 tem = expand_binop (word_mode,
5236 op1 == const0_rtx ? ior_optab : and_optab,
5237 op00, op01, NULL_RTX, unsignedp,
5238 OPTAB_DIRECT);
5239
5240 if (tem != 0)
5241 tem = emit_store_flag (NULL_RTX, code, tem, op1, word_mode,
5242 unsignedp, normalizep);
5243 }
5244 else if ((code == LT || code == GE) && op1 == const0_rtx)
5245 {
5246 rtx op0h;
5247
5248 /* If testing the sign bit, can just test on high word. */
5249 op0h = simplify_gen_subreg (word_mode, op0, mode,
5250 subreg_highpart_offset (word_mode,
5251 mode));
5252 tem = emit_store_flag (NULL_RTX, code, op0h, op1, word_mode,
5253 unsignedp, normalizep);
5254 }
5255 else
5256 tem = NULL_RTX;
5257
5258 if (tem)
5259 {
5260 if (target_mode == VOIDmode || GET_MODE (tem) == target_mode)
5261 return tem;
5262 if (!target)
5263 target = gen_reg_rtx (target_mode);
5264
5265 convert_move (target, tem,
5266 !val_signbit_known_set_p (word_mode,
5267 (normalizep ? normalizep
5268 : STORE_FLAG_VALUE)));
5269 return target;
5270 }
5271 }
5272
5273 /* If this is A < 0 or A >= 0, we can do this by taking the ones
5274 complement of A (for GE) and shifting the sign bit to the low bit. */
5275 if (op1 == const0_rtx && (code == LT || code == GE)
5276 && GET_MODE_CLASS (mode) == MODE_INT
5277 && (normalizep || STORE_FLAG_VALUE == 1
5278 || val_signbit_p (mode, STORE_FLAG_VALUE)))
5279 {
5280 subtarget = target;
5281
5282 if (!target)
5283 target_mode = mode;
5284
5285 /* If the result is to be wider than OP0, it is best to convert it
5286 first. If it is to be narrower, it is *incorrect* to convert it
5287 first. */
5288 else if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (mode))
5289 {
5290 op0 = convert_modes (target_mode, mode, op0, 0);
5291 mode = target_mode;
5292 }
5293
5294 if (target_mode != mode)
5295 subtarget = 0;
5296
5297 if (code == GE)
5298 op0 = expand_unop (mode, one_cmpl_optab, op0,
5299 ((STORE_FLAG_VALUE == 1 || normalizep)
5300 ? 0 : subtarget), 0);
5301
5302 if (STORE_FLAG_VALUE == 1 || normalizep)
5303 /* If we are supposed to produce a 0/1 value, we want to do
5304 a logical shift from the sign bit to the low-order bit; for
5305 a -1/0 value, we do an arithmetic shift. */
5306 op0 = expand_shift (RSHIFT_EXPR, mode, op0,
5307 GET_MODE_BITSIZE (mode) - 1,
5308 subtarget, normalizep != -1);
5309
5310 if (mode != target_mode)
5311 op0 = convert_modes (target_mode, mode, op0, 0);
5312
5313 return op0;
5314 }
5315
5316 mclass = GET_MODE_CLASS (mode);
5317 for (compare_mode = mode; compare_mode != VOIDmode;
5318 compare_mode = GET_MODE_WIDER_MODE (compare_mode))
5319 {
5320 enum machine_mode optab_mode = mclass == MODE_CC ? CCmode : compare_mode;
5321 icode = optab_handler (cstore_optab, optab_mode);
5322 if (icode != CODE_FOR_nothing)
5323 {
5324 do_pending_stack_adjust ();
5325 tem = emit_cstore (target, icode, code, mode, compare_mode,
5326 unsignedp, op0, op1, normalizep, target_mode);
5327 if (tem)
5328 return tem;
5329
5330 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5331 {
5332 tem = emit_cstore (target, icode, scode, mode, compare_mode,
5333 unsignedp, op1, op0, normalizep, target_mode);
5334 if (tem)
5335 return tem;
5336 }
5337 break;
5338 }
5339 }
5340
5341 return 0;
5342 }
5343
5344 /* Emit a store-flags instruction for comparison CODE on OP0 and OP1
5345 and storing in TARGET. Normally return TARGET.
5346 Return 0 if that cannot be done.
5347
5348 MODE is the mode to use for OP0 and OP1 should they be CONST_INTs. If
5349 it is VOIDmode, they cannot both be CONST_INT.
5350
5351 UNSIGNEDP is for the case where we have to widen the operands
5352 to perform the operation. It says to use zero-extension.
5353
5354 NORMALIZEP is 1 if we should convert the result to be either zero
5355 or one. Normalize is -1 if we should convert the result to be
5356 either zero or -1. If NORMALIZEP is zero, the result will be left
5357 "raw" out of the scc insn. */
5358
5359 rtx
5360 emit_store_flag (rtx target, enum rtx_code code, rtx op0, rtx op1,
5361 enum machine_mode mode, int unsignedp, int normalizep)
5362 {
5363 enum machine_mode target_mode = target ? GET_MODE (target) : VOIDmode;
5364 enum rtx_code rcode;
5365 rtx subtarget;
5366 rtx tem, last, trueval;
5367
5368 tem = emit_store_flag_1 (target, code, op0, op1, mode, unsignedp, normalizep,
5369 target_mode);
5370 if (tem)
5371 return tem;
5372
5373 /* If we reached here, we can't do this with a scc insn, however there
5374 are some comparisons that can be done in other ways. Don't do any
5375 of these cases if branches are very cheap. */
5376 if (BRANCH_COST (optimize_insn_for_speed_p (), false) == 0)
5377 return 0;
5378
5379 /* See what we need to return. We can only return a 1, -1, or the
5380 sign bit. */
5381
5382 if (normalizep == 0)
5383 {
5384 if (STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1)
5385 normalizep = STORE_FLAG_VALUE;
5386
5387 else if (val_signbit_p (mode, STORE_FLAG_VALUE))
5388 ;
5389 else
5390 return 0;
5391 }
5392
5393 last = get_last_insn ();
5394
5395 /* If optimizing, use different pseudo registers for each insn, instead
5396 of reusing the same pseudo. This leads to better CSE, but slows
5397 down the compiler, since there are more pseudos */
5398 subtarget = (!optimize
5399 && (target_mode == mode)) ? target : NULL_RTX;
5400 trueval = GEN_INT (normalizep ? normalizep : STORE_FLAG_VALUE);
5401
5402 /* For floating-point comparisons, try the reverse comparison or try
5403 changing the "orderedness" of the comparison. */
5404 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5405 {
5406 enum rtx_code first_code;
5407 bool and_them;
5408
5409 rcode = reverse_condition_maybe_unordered (code);
5410 if (can_compare_p (rcode, mode, ccp_store_flag)
5411 && (code == ORDERED || code == UNORDERED
5412 || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
5413 || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
5414 {
5415 int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5416 || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5417
5418 /* For the reverse comparison, use either an addition or a XOR. */
5419 if (want_add
5420 && rtx_cost (GEN_INT (normalizep), PLUS, 1,
5421 optimize_insn_for_speed_p ()) == 0)
5422 {
5423 tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5424 STORE_FLAG_VALUE, target_mode);
5425 if (tem)
5426 return expand_binop (target_mode, add_optab, tem,
5427 GEN_INT (normalizep),
5428 target, 0, OPTAB_WIDEN);
5429 }
5430 else if (!want_add
5431 && rtx_cost (trueval, XOR, 1,
5432 optimize_insn_for_speed_p ()) == 0)
5433 {
5434 tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5435 normalizep, target_mode);
5436 if (tem)
5437 return expand_binop (target_mode, xor_optab, tem, trueval,
5438 target, INTVAL (trueval) >= 0, OPTAB_WIDEN);
5439 }
5440 }
5441
5442 delete_insns_since (last);
5443
5444 /* Cannot split ORDERED and UNORDERED, only try the above trick. */
5445 if (code == ORDERED || code == UNORDERED)
5446 return 0;
5447
5448 and_them = split_comparison (code, mode, &first_code, &code);
5449
5450 /* If there are no NaNs, the first comparison should always fall through.
5451 Effectively change the comparison to the other one. */
5452 if (!HONOR_NANS (mode))
5453 {
5454 gcc_assert (first_code == (and_them ? ORDERED : UNORDERED));
5455 return emit_store_flag_1 (target, code, op0, op1, mode, 0, normalizep,
5456 target_mode);
5457 }
5458
5459 #ifdef HAVE_conditional_move
5460 /* Try using a setcc instruction for ORDERED/UNORDERED, followed by a
5461 conditional move. */
5462 tem = emit_store_flag_1 (subtarget, first_code, op0, op1, mode, 0,
5463 normalizep, target_mode);
5464 if (tem == 0)
5465 return 0;
5466
5467 if (and_them)
5468 tem = emit_conditional_move (target, code, op0, op1, mode,
5469 tem, const0_rtx, GET_MODE (tem), 0);
5470 else
5471 tem = emit_conditional_move (target, code, op0, op1, mode,
5472 trueval, tem, GET_MODE (tem), 0);
5473
5474 if (tem == 0)
5475 delete_insns_since (last);
5476 return tem;
5477 #else
5478 return 0;
5479 #endif
5480 }
5481
5482 /* The remaining tricks only apply to integer comparisons. */
5483
5484 if (GET_MODE_CLASS (mode) != MODE_INT)
5485 return 0;
5486
5487 /* If this is an equality comparison of integers, we can try to exclusive-or
5488 (or subtract) the two operands and use a recursive call to try the
5489 comparison with zero. Don't do any of these cases if branches are
5490 very cheap. */
5491
5492 if ((code == EQ || code == NE) && op1 != const0_rtx)
5493 {
5494 tem = expand_binop (mode, xor_optab, op0, op1, subtarget, 1,
5495 OPTAB_WIDEN);
5496
5497 if (tem == 0)
5498 tem = expand_binop (mode, sub_optab, op0, op1, subtarget, 1,
5499 OPTAB_WIDEN);
5500 if (tem != 0)
5501 tem = emit_store_flag (target, code, tem, const0_rtx,
5502 mode, unsignedp, normalizep);
5503 if (tem != 0)
5504 return tem;
5505
5506 delete_insns_since (last);
5507 }
5508
5509 /* For integer comparisons, try the reverse comparison. However, for
5510 small X and if we'd have anyway to extend, implementing "X != 0"
5511 as "-(int)X >> 31" is still cheaper than inverting "(int)X == 0". */
5512 rcode = reverse_condition (code);
5513 if (can_compare_p (rcode, mode, ccp_store_flag)
5514 && ! (optab_handler (cstore_optab, mode) == CODE_FOR_nothing
5515 && code == NE
5516 && GET_MODE_SIZE (mode) < UNITS_PER_WORD
5517 && op1 == const0_rtx))
5518 {
5519 int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5520 || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5521
5522 /* Again, for the reverse comparison, use either an addition or a XOR. */
5523 if (want_add
5524 && rtx_cost (GEN_INT (normalizep), PLUS, 1,
5525 optimize_insn_for_speed_p ()) == 0)
5526 {
5527 tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5528 STORE_FLAG_VALUE, target_mode);
5529 if (tem != 0)
5530 tem = expand_binop (target_mode, add_optab, tem,
5531 GEN_INT (normalizep), target, 0, OPTAB_WIDEN);
5532 }
5533 else if (!want_add
5534 && rtx_cost (trueval, XOR, 1,
5535 optimize_insn_for_speed_p ()) == 0)
5536 {
5537 tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5538 normalizep, target_mode);
5539 if (tem != 0)
5540 tem = expand_binop (target_mode, xor_optab, tem, trueval, target,
5541 INTVAL (trueval) >= 0, OPTAB_WIDEN);
5542 }
5543
5544 if (tem != 0)
5545 return tem;
5546 delete_insns_since (last);
5547 }
5548
5549 /* Some other cases we can do are EQ, NE, LE, and GT comparisons with
5550 the constant zero. Reject all other comparisons at this point. Only
5551 do LE and GT if branches are expensive since they are expensive on
5552 2-operand machines. */
5553
5554 if (op1 != const0_rtx
5555 || (code != EQ && code != NE
5556 && (BRANCH_COST (optimize_insn_for_speed_p (),
5557 false) <= 1 || (code != LE && code != GT))))
5558 return 0;
5559
5560 /* Try to put the result of the comparison in the sign bit. Assume we can't
5561 do the necessary operation below. */
5562
5563 tem = 0;
5564
5565 /* To see if A <= 0, compute (A | (A - 1)). A <= 0 iff that result has
5566 the sign bit set. */
5567
5568 if (code == LE)
5569 {
5570 /* This is destructive, so SUBTARGET can't be OP0. */
5571 if (rtx_equal_p (subtarget, op0))
5572 subtarget = 0;
5573
5574 tem = expand_binop (mode, sub_optab, op0, const1_rtx, subtarget, 0,
5575 OPTAB_WIDEN);
5576 if (tem)
5577 tem = expand_binop (mode, ior_optab, op0, tem, subtarget, 0,
5578 OPTAB_WIDEN);
5579 }
5580
5581 /* To see if A > 0, compute (((signed) A) << BITS) - A, where BITS is the
5582 number of bits in the mode of OP0, minus one. */
5583
5584 if (code == GT)
5585 {
5586 if (rtx_equal_p (subtarget, op0))
5587 subtarget = 0;
5588
5589 tem = expand_shift (RSHIFT_EXPR, mode, op0,
5590 GET_MODE_BITSIZE (mode) - 1,
5591 subtarget, 0);
5592 tem = expand_binop (mode, sub_optab, tem, op0, subtarget, 0,
5593 OPTAB_WIDEN);
5594 }
5595
5596 if (code == EQ || code == NE)
5597 {
5598 /* For EQ or NE, one way to do the comparison is to apply an operation
5599 that converts the operand into a positive number if it is nonzero
5600 or zero if it was originally zero. Then, for EQ, we subtract 1 and
5601 for NE we negate. This puts the result in the sign bit. Then we
5602 normalize with a shift, if needed.
5603
5604 Two operations that can do the above actions are ABS and FFS, so try
5605 them. If that doesn't work, and MODE is smaller than a full word,
5606 we can use zero-extension to the wider mode (an unsigned conversion)
5607 as the operation. */
5608
5609 /* Note that ABS doesn't yield a positive number for INT_MIN, but
5610 that is compensated by the subsequent overflow when subtracting
5611 one / negating. */
5612
5613 if (optab_handler (abs_optab, mode) != CODE_FOR_nothing)
5614 tem = expand_unop (mode, abs_optab, op0, subtarget, 1);
5615 else if (optab_handler (ffs_optab, mode) != CODE_FOR_nothing)
5616 tem = expand_unop (mode, ffs_optab, op0, subtarget, 1);
5617 else if (GET_MODE_SIZE (mode) < UNITS_PER_WORD)
5618 {
5619 tem = convert_modes (word_mode, mode, op0, 1);
5620 mode = word_mode;
5621 }
5622
5623 if (tem != 0)
5624 {
5625 if (code == EQ)
5626 tem = expand_binop (mode, sub_optab, tem, const1_rtx, subtarget,
5627 0, OPTAB_WIDEN);
5628 else
5629 tem = expand_unop (mode, neg_optab, tem, subtarget, 0);
5630 }
5631
5632 /* If we couldn't do it that way, for NE we can "or" the two's complement
5633 of the value with itself. For EQ, we take the one's complement of
5634 that "or", which is an extra insn, so we only handle EQ if branches
5635 are expensive. */
5636
5637 if (tem == 0
5638 && (code == NE
5639 || BRANCH_COST (optimize_insn_for_speed_p (),
5640 false) > 1))
5641 {
5642 if (rtx_equal_p (subtarget, op0))
5643 subtarget = 0;
5644
5645 tem = expand_unop (mode, neg_optab, op0, subtarget, 0);
5646 tem = expand_binop (mode, ior_optab, tem, op0, subtarget, 0,
5647 OPTAB_WIDEN);
5648
5649 if (tem && code == EQ)
5650 tem = expand_unop (mode, one_cmpl_optab, tem, subtarget, 0);
5651 }
5652 }
5653
5654 if (tem && normalizep)
5655 tem = expand_shift (RSHIFT_EXPR, mode, tem,
5656 GET_MODE_BITSIZE (mode) - 1,
5657 subtarget, normalizep == 1);
5658
5659 if (tem)
5660 {
5661 if (!target)
5662 ;
5663 else if (GET_MODE (tem) != target_mode)
5664 {
5665 convert_move (target, tem, 0);
5666 tem = target;
5667 }
5668 else if (!subtarget)
5669 {
5670 emit_move_insn (target, tem);
5671 tem = target;
5672 }
5673 }
5674 else
5675 delete_insns_since (last);
5676
5677 return tem;
5678 }
5679
5680 /* Like emit_store_flag, but always succeeds. */
5681
5682 rtx
5683 emit_store_flag_force (rtx target, enum rtx_code code, rtx op0, rtx op1,
5684 enum machine_mode mode, int unsignedp, int normalizep)
5685 {
5686 rtx tem, label;
5687 rtx trueval, falseval;
5688
5689 /* First see if emit_store_flag can do the job. */
5690 tem = emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep);
5691 if (tem != 0)
5692 return tem;
5693
5694 if (!target)
5695 target = gen_reg_rtx (word_mode);
5696
5697 /* If this failed, we have to do this with set/compare/jump/set code.
5698 For foo != 0, if foo is in OP0, just replace it with 1 if nonzero. */
5699 trueval = normalizep ? GEN_INT (normalizep) : const1_rtx;
5700 if (code == NE
5701 && GET_MODE_CLASS (mode) == MODE_INT
5702 && REG_P (target)
5703 && op0 == target
5704 && op1 == const0_rtx)
5705 {
5706 label = gen_label_rtx ();
5707 do_compare_rtx_and_jump (target, const0_rtx, EQ, unsignedp,
5708 mode, NULL_RTX, NULL_RTX, label, -1);
5709 emit_move_insn (target, trueval);
5710 emit_label (label);
5711 return target;
5712 }
5713
5714 if (!REG_P (target)
5715 || reg_mentioned_p (target, op0) || reg_mentioned_p (target, op1))
5716 target = gen_reg_rtx (GET_MODE (target));
5717
5718 /* Jump in the right direction if the target cannot implement CODE
5719 but can jump on its reverse condition. */
5720 falseval = const0_rtx;
5721 if (! can_compare_p (code, mode, ccp_jump)
5722 && (! FLOAT_MODE_P (mode)
5723 || code == ORDERED || code == UNORDERED
5724 || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
5725 || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
5726 {
5727 enum rtx_code rcode;
5728 if (FLOAT_MODE_P (mode))
5729 rcode = reverse_condition_maybe_unordered (code);
5730 else
5731 rcode = reverse_condition (code);
5732
5733 /* Canonicalize to UNORDERED for the libcall. */
5734 if (can_compare_p (rcode, mode, ccp_jump)
5735 || (code == ORDERED && ! can_compare_p (ORDERED, mode, ccp_jump)))
5736 {
5737 falseval = trueval;
5738 trueval = const0_rtx;
5739 code = rcode;
5740 }
5741 }
5742
5743 emit_move_insn (target, trueval);
5744 label = gen_label_rtx ();
5745 do_compare_rtx_and_jump (op0, op1, code, unsignedp, mode, NULL_RTX,
5746 NULL_RTX, label, -1);
5747
5748 emit_move_insn (target, falseval);
5749 emit_label (label);
5750
5751 return target;
5752 }
5753 \f
5754 /* Perform possibly multi-word comparison and conditional jump to LABEL
5755 if ARG1 OP ARG2 true where ARG1 and ARG2 are of mode MODE. This is
5756 now a thin wrapper around do_compare_rtx_and_jump. */
5757
5758 static void
5759 do_cmp_and_jump (rtx arg1, rtx arg2, enum rtx_code op, enum machine_mode mode,
5760 rtx label)
5761 {
5762 int unsignedp = (op == LTU || op == LEU || op == GTU || op == GEU);
5763 do_compare_rtx_and_jump (arg1, arg2, op, unsignedp, mode,
5764 NULL_RTX, NULL_RTX, label, -1);
5765 }