re PR middle-end/53823 (FAIL: gcc.c-torture/execute/930921-1.c execution at -O0 and...
[gcc.git] / gcc / expmed.c
1 /* Medium-level subroutines: convert bit-field store and extract
2 and shifts, multiplies and divides to rtl instructions.
3 Copyright (C) 1987, 1988, 1989, 1992, 1993, 1994, 1995, 1996, 1997, 1998,
4 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010,
5 2011, 2012
6 Free Software Foundation, Inc.
7
8 This file is part of GCC.
9
10 GCC is free software; you can redistribute it and/or modify it under
11 the terms of the GNU General Public License as published by the Free
12 Software Foundation; either version 3, or (at your option) any later
13 version.
14
15 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
16 WARRANTY; without even the implied warranty of MERCHANTABILITY or
17 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 for more details.
19
20 You should have received a copy of the GNU General Public License
21 along with GCC; see the file COPYING3. If not see
22 <http://www.gnu.org/licenses/>. */
23
24
25 #include "config.h"
26 #include "system.h"
27 #include "coretypes.h"
28 #include "tm.h"
29 #include "diagnostic-core.h"
30 #include "rtl.h"
31 #include "tree.h"
32 #include "tm_p.h"
33 #include "flags.h"
34 #include "insn-config.h"
35 #include "expr.h"
36 #include "optabs.h"
37 #include "recog.h"
38 #include "langhooks.h"
39 #include "df.h"
40 #include "target.h"
41 #include "expmed.h"
42
43 struct target_expmed default_target_expmed;
44 #if SWITCHABLE_TARGET
45 struct target_expmed *this_target_expmed = &default_target_expmed;
46 #endif
47
48 static void store_fixed_bit_field (rtx, unsigned HOST_WIDE_INT,
49 unsigned HOST_WIDE_INT,
50 unsigned HOST_WIDE_INT,
51 unsigned HOST_WIDE_INT,
52 unsigned HOST_WIDE_INT,
53 rtx);
54 static void store_split_bit_field (rtx, unsigned HOST_WIDE_INT,
55 unsigned HOST_WIDE_INT,
56 unsigned HOST_WIDE_INT,
57 unsigned HOST_WIDE_INT,
58 rtx);
59 static rtx extract_fixed_bit_field (enum machine_mode, rtx,
60 unsigned HOST_WIDE_INT,
61 unsigned HOST_WIDE_INT,
62 unsigned HOST_WIDE_INT, rtx, int, bool);
63 static rtx mask_rtx (enum machine_mode, int, int, int);
64 static rtx lshift_value (enum machine_mode, rtx, int, int);
65 static rtx extract_split_bit_field (rtx, unsigned HOST_WIDE_INT,
66 unsigned HOST_WIDE_INT, int);
67 static void do_cmp_and_jump (rtx, rtx, enum rtx_code, enum machine_mode, rtx);
68 static rtx expand_smod_pow2 (enum machine_mode, rtx, HOST_WIDE_INT);
69 static rtx expand_sdiv_pow2 (enum machine_mode, rtx, HOST_WIDE_INT);
70
71 /* Test whether a value is zero of a power of two. */
72 #define EXACT_POWER_OF_2_OR_ZERO_P(x) (((x) & ((x) - 1)) == 0)
73
74 #ifndef SLOW_UNALIGNED_ACCESS
75 #define SLOW_UNALIGNED_ACCESS(MODE, ALIGN) STRICT_ALIGNMENT
76 #endif
77
78
79 /* Reduce conditional compilation elsewhere. */
80 #ifndef HAVE_insv
81 #define HAVE_insv 0
82 #define CODE_FOR_insv CODE_FOR_nothing
83 #define gen_insv(a,b,c,d) NULL_RTX
84 #endif
85 #ifndef HAVE_extv
86 #define HAVE_extv 0
87 #define CODE_FOR_extv CODE_FOR_nothing
88 #define gen_extv(a,b,c,d) NULL_RTX
89 #endif
90 #ifndef HAVE_extzv
91 #define HAVE_extzv 0
92 #define CODE_FOR_extzv CODE_FOR_nothing
93 #define gen_extzv(a,b,c,d) NULL_RTX
94 #endif
95
96 struct init_expmed_rtl
97 {
98 struct rtx_def reg; rtunion reg_fld[2];
99 struct rtx_def plus; rtunion plus_fld1;
100 struct rtx_def neg;
101 struct rtx_def mult; rtunion mult_fld1;
102 struct rtx_def sdiv; rtunion sdiv_fld1;
103 struct rtx_def udiv; rtunion udiv_fld1;
104 struct rtx_def zext;
105 struct rtx_def sdiv_32; rtunion sdiv_32_fld1;
106 struct rtx_def smod_32; rtunion smod_32_fld1;
107 struct rtx_def wide_mult; rtunion wide_mult_fld1;
108 struct rtx_def wide_lshr; rtunion wide_lshr_fld1;
109 struct rtx_def wide_trunc;
110 struct rtx_def shift; rtunion shift_fld1;
111 struct rtx_def shift_mult; rtunion shift_mult_fld1;
112 struct rtx_def shift_add; rtunion shift_add_fld1;
113 struct rtx_def shift_sub0; rtunion shift_sub0_fld1;
114 struct rtx_def shift_sub1; rtunion shift_sub1_fld1;
115 struct rtx_def convert;
116
117 rtx pow2[MAX_BITS_PER_WORD];
118 rtx cint[MAX_BITS_PER_WORD];
119 };
120
121 static void
122 init_expmed_one_mode (struct init_expmed_rtl *all,
123 enum machine_mode mode, int speed)
124 {
125 int m, n, mode_bitsize;
126 enum machine_mode mode_from;
127
128 mode_bitsize = GET_MODE_UNIT_BITSIZE (mode);
129
130 PUT_MODE (&all->reg, mode);
131 PUT_MODE (&all->plus, mode);
132 PUT_MODE (&all->neg, mode);
133 PUT_MODE (&all->mult, mode);
134 PUT_MODE (&all->sdiv, mode);
135 PUT_MODE (&all->udiv, mode);
136 PUT_MODE (&all->sdiv_32, mode);
137 PUT_MODE (&all->smod_32, mode);
138 PUT_MODE (&all->wide_trunc, mode);
139 PUT_MODE (&all->shift, mode);
140 PUT_MODE (&all->shift_mult, mode);
141 PUT_MODE (&all->shift_add, mode);
142 PUT_MODE (&all->shift_sub0, mode);
143 PUT_MODE (&all->shift_sub1, mode);
144 PUT_MODE (&all->convert, mode);
145
146 set_add_cost (speed, mode, set_src_cost (&all->plus, speed));
147 set_neg_cost (speed, mode, set_src_cost (&all->neg, speed));
148 set_mul_cost (speed, mode, set_src_cost (&all->mult, speed));
149 set_sdiv_cost (speed, mode, set_src_cost (&all->sdiv, speed));
150 set_udiv_cost (speed, mode, set_src_cost (&all->udiv, speed));
151
152 set_sdiv_pow2_cheap (speed, mode, (set_src_cost (&all->sdiv_32, speed)
153 <= 2 * add_cost (speed, mode)));
154 set_smod_pow2_cheap (speed, mode, (set_src_cost (&all->smod_32, speed)
155 <= 4 * add_cost (speed, mode)));
156
157 set_shift_cost (speed, mode, 0, 0);
158 {
159 int cost = add_cost (speed, mode);
160 set_shiftadd_cost (speed, mode, 0, cost);
161 set_shiftsub0_cost (speed, mode, 0, cost);
162 set_shiftsub1_cost (speed, mode, 0, cost);
163 }
164
165 n = MIN (MAX_BITS_PER_WORD, mode_bitsize);
166 for (m = 1; m < n; m++)
167 {
168 XEXP (&all->shift, 1) = all->cint[m];
169 XEXP (&all->shift_mult, 1) = all->pow2[m];
170
171 set_shift_cost (speed, mode, m, set_src_cost (&all->shift, speed));
172 set_shiftadd_cost (speed, mode, m, set_src_cost (&all->shift_add, speed));
173 set_shiftsub0_cost (speed, mode, m, set_src_cost (&all->shift_sub0, speed));
174 set_shiftsub1_cost (speed, mode, m, set_src_cost (&all->shift_sub1, speed));
175 }
176
177 if (SCALAR_INT_MODE_P (mode))
178 {
179 enum machine_mode wider_mode = GET_MODE_WIDER_MODE (mode);
180
181 if (wider_mode != VOIDmode)
182 {
183 PUT_MODE (&all->zext, wider_mode);
184 PUT_MODE (&all->wide_mult, wider_mode);
185 PUT_MODE (&all->wide_lshr, wider_mode);
186 XEXP (&all->wide_lshr, 1) = GEN_INT (mode_bitsize);
187
188 set_mul_widen_cost (speed, wider_mode, set_src_cost (&all->wide_mult, speed));
189 set_mul_highpart_cost (speed, mode, set_src_cost (&all->wide_trunc, speed));
190 }
191
192 for (mode_from = GET_CLASS_NARROWEST_MODE (MODE_INT);
193 mode_from != VOIDmode;
194 mode_from = GET_MODE_WIDER_MODE (mode_from))
195 if (mode != mode_from)
196 {
197 unsigned short size_to = GET_MODE_SIZE (mode);
198 unsigned short size_from = GET_MODE_SIZE (mode_from);
199 if (size_to < size_from)
200 {
201 PUT_CODE (&all->convert, TRUNCATE);
202 PUT_MODE (&all->reg, mode_from);
203 set_convert_cost (mode, mode_from, speed,
204 set_src_cost (&all->convert, speed));
205 }
206 else if (size_from < size_to)
207 {
208 /* Assume cost of zero-extend and sign-extend is the same. */
209 PUT_CODE (&all->convert, ZERO_EXTEND);
210 PUT_MODE (&all->reg, mode_from);
211 set_convert_cost (mode, mode_from, speed,
212 set_src_cost (&all->convert, speed));
213 }
214 }
215 }
216 }
217
218 void
219 init_expmed (void)
220 {
221 struct init_expmed_rtl all;
222 enum machine_mode mode;
223 int m, speed;
224
225 memset (&all, 0, sizeof all);
226 for (m = 1; m < MAX_BITS_PER_WORD; m++)
227 {
228 all.pow2[m] = GEN_INT ((HOST_WIDE_INT) 1 << m);
229 all.cint[m] = GEN_INT (m);
230 }
231
232 PUT_CODE (&all.reg, REG);
233 /* Avoid using hard regs in ways which may be unsupported. */
234 SET_REGNO (&all.reg, LAST_VIRTUAL_REGISTER + 1);
235
236 PUT_CODE (&all.plus, PLUS);
237 XEXP (&all.plus, 0) = &all.reg;
238 XEXP (&all.plus, 1) = &all.reg;
239
240 PUT_CODE (&all.neg, NEG);
241 XEXP (&all.neg, 0) = &all.reg;
242
243 PUT_CODE (&all.mult, MULT);
244 XEXP (&all.mult, 0) = &all.reg;
245 XEXP (&all.mult, 1) = &all.reg;
246
247 PUT_CODE (&all.sdiv, DIV);
248 XEXP (&all.sdiv, 0) = &all.reg;
249 XEXP (&all.sdiv, 1) = &all.reg;
250
251 PUT_CODE (&all.udiv, UDIV);
252 XEXP (&all.udiv, 0) = &all.reg;
253 XEXP (&all.udiv, 1) = &all.reg;
254
255 PUT_CODE (&all.sdiv_32, DIV);
256 XEXP (&all.sdiv_32, 0) = &all.reg;
257 XEXP (&all.sdiv_32, 1) = 32 < MAX_BITS_PER_WORD ? all.cint[32] : GEN_INT (32);
258
259 PUT_CODE (&all.smod_32, MOD);
260 XEXP (&all.smod_32, 0) = &all.reg;
261 XEXP (&all.smod_32, 1) = XEXP (&all.sdiv_32, 1);
262
263 PUT_CODE (&all.zext, ZERO_EXTEND);
264 XEXP (&all.zext, 0) = &all.reg;
265
266 PUT_CODE (&all.wide_mult, MULT);
267 XEXP (&all.wide_mult, 0) = &all.zext;
268 XEXP (&all.wide_mult, 1) = &all.zext;
269
270 PUT_CODE (&all.wide_lshr, LSHIFTRT);
271 XEXP (&all.wide_lshr, 0) = &all.wide_mult;
272
273 PUT_CODE (&all.wide_trunc, TRUNCATE);
274 XEXP (&all.wide_trunc, 0) = &all.wide_lshr;
275
276 PUT_CODE (&all.shift, ASHIFT);
277 XEXP (&all.shift, 0) = &all.reg;
278
279 PUT_CODE (&all.shift_mult, MULT);
280 XEXP (&all.shift_mult, 0) = &all.reg;
281
282 PUT_CODE (&all.shift_add, PLUS);
283 XEXP (&all.shift_add, 0) = &all.shift_mult;
284 XEXP (&all.shift_add, 1) = &all.reg;
285
286 PUT_CODE (&all.shift_sub0, MINUS);
287 XEXP (&all.shift_sub0, 0) = &all.shift_mult;
288 XEXP (&all.shift_sub0, 1) = &all.reg;
289
290 PUT_CODE (&all.shift_sub1, MINUS);
291 XEXP (&all.shift_sub1, 0) = &all.reg;
292 XEXP (&all.shift_sub1, 1) = &all.shift_mult;
293
294 PUT_CODE (&all.convert, TRUNCATE);
295 XEXP (&all.convert, 0) = &all.reg;
296
297 for (speed = 0; speed < 2; speed++)
298 {
299 crtl->maybe_hot_insn_p = speed;
300 set_zero_cost (speed, set_src_cost (const0_rtx, speed));
301
302 for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT);
303 mode != VOIDmode;
304 mode = GET_MODE_WIDER_MODE (mode))
305 init_expmed_one_mode (&all, mode, speed);
306
307 for (mode = GET_CLASS_NARROWEST_MODE (MODE_VECTOR_INT);
308 mode != VOIDmode;
309 mode = GET_MODE_WIDER_MODE (mode))
310 init_expmed_one_mode (&all, mode, speed);
311 }
312
313 if (alg_hash_used_p ())
314 {
315 struct alg_hash_entry *p = alg_hash_entry_ptr (0);
316 memset (p, 0, sizeof (*p) * NUM_ALG_HASH_ENTRIES);
317 }
318 else
319 set_alg_hash_used_p (true);
320 default_rtl_profile ();
321 }
322
323 /* Return an rtx representing minus the value of X.
324 MODE is the intended mode of the result,
325 useful if X is a CONST_INT. */
326
327 rtx
328 negate_rtx (enum machine_mode mode, rtx x)
329 {
330 rtx result = simplify_unary_operation (NEG, mode, x, mode);
331
332 if (result == 0)
333 result = expand_unop (mode, neg_optab, x, NULL_RTX, 0);
334
335 return result;
336 }
337
338 /* Report on the availability of insv/extv/extzv and the desired mode
339 of each of their operands. Returns MAX_MACHINE_MODE if HAVE_foo
340 is false; else the mode of the specified operand. If OPNO is -1,
341 all the caller cares about is whether the insn is available. */
342 enum machine_mode
343 mode_for_extraction (enum extraction_pattern pattern, int opno)
344 {
345 const struct insn_data_d *data;
346
347 switch (pattern)
348 {
349 case EP_insv:
350 if (HAVE_insv)
351 {
352 data = &insn_data[CODE_FOR_insv];
353 break;
354 }
355 return MAX_MACHINE_MODE;
356
357 case EP_extv:
358 if (HAVE_extv)
359 {
360 data = &insn_data[CODE_FOR_extv];
361 break;
362 }
363 return MAX_MACHINE_MODE;
364
365 case EP_extzv:
366 if (HAVE_extzv)
367 {
368 data = &insn_data[CODE_FOR_extzv];
369 break;
370 }
371 return MAX_MACHINE_MODE;
372
373 default:
374 gcc_unreachable ();
375 }
376
377 if (opno == -1)
378 return VOIDmode;
379
380 /* Everyone who uses this function used to follow it with
381 if (result == VOIDmode) result = word_mode; */
382 if (data->operand[opno].mode == VOIDmode)
383 return word_mode;
384 return data->operand[opno].mode;
385 }
386 \f
387 /* A subroutine of store_bit_field, with the same arguments. Return true
388 if the operation could be implemented.
389
390 If FALLBACK_P is true, fall back to store_fixed_bit_field if we have
391 no other way of implementing the operation. If FALLBACK_P is false,
392 return false instead. */
393
394 static bool
395 store_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
396 unsigned HOST_WIDE_INT bitnum,
397 unsigned HOST_WIDE_INT bitregion_start,
398 unsigned HOST_WIDE_INT bitregion_end,
399 enum machine_mode fieldmode,
400 rtx value, bool fallback_p)
401 {
402 unsigned int unit
403 = (MEM_P (str_rtx)) ? BITS_PER_UNIT : BITS_PER_WORD;
404 unsigned HOST_WIDE_INT offset, bitpos;
405 rtx op0 = str_rtx;
406 int byte_offset;
407 rtx orig_value;
408
409 enum machine_mode op_mode = mode_for_extraction (EP_insv, 3);
410
411 while (GET_CODE (op0) == SUBREG)
412 {
413 /* The following line once was done only if WORDS_BIG_ENDIAN,
414 but I think that is a mistake. WORDS_BIG_ENDIAN is
415 meaningful at a much higher level; when structures are copied
416 between memory and regs, the higher-numbered regs
417 always get higher addresses. */
418 int inner_mode_size = GET_MODE_SIZE (GET_MODE (SUBREG_REG (op0)));
419 int outer_mode_size = GET_MODE_SIZE (GET_MODE (op0));
420
421 byte_offset = 0;
422
423 /* Paradoxical subregs need special handling on big endian machines. */
424 if (SUBREG_BYTE (op0) == 0 && inner_mode_size < outer_mode_size)
425 {
426 int difference = inner_mode_size - outer_mode_size;
427
428 if (WORDS_BIG_ENDIAN)
429 byte_offset += (difference / UNITS_PER_WORD) * UNITS_PER_WORD;
430 if (BYTES_BIG_ENDIAN)
431 byte_offset += difference % UNITS_PER_WORD;
432 }
433 else
434 byte_offset = SUBREG_BYTE (op0);
435
436 bitnum += byte_offset * BITS_PER_UNIT;
437 op0 = SUBREG_REG (op0);
438 }
439
440 /* No action is needed if the target is a register and if the field
441 lies completely outside that register. This can occur if the source
442 code contains an out-of-bounds access to a small array. */
443 if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
444 return true;
445
446 /* Use vec_set patterns for inserting parts of vectors whenever
447 available. */
448 if (VECTOR_MODE_P (GET_MODE (op0))
449 && !MEM_P (op0)
450 && optab_handler (vec_set_optab, GET_MODE (op0)) != CODE_FOR_nothing
451 && fieldmode == GET_MODE_INNER (GET_MODE (op0))
452 && bitsize == GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
453 && !(bitnum % GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
454 {
455 struct expand_operand ops[3];
456 enum machine_mode outermode = GET_MODE (op0);
457 enum machine_mode innermode = GET_MODE_INNER (outermode);
458 enum insn_code icode = optab_handler (vec_set_optab, outermode);
459 int pos = bitnum / GET_MODE_BITSIZE (innermode);
460
461 create_fixed_operand (&ops[0], op0);
462 create_input_operand (&ops[1], value, innermode);
463 create_integer_operand (&ops[2], pos);
464 if (maybe_expand_insn (icode, 3, ops))
465 return true;
466 }
467
468 /* If the target is a register, overwriting the entire object, or storing
469 a full-word or multi-word field can be done with just a SUBREG.
470
471 If the target is memory, storing any naturally aligned field can be
472 done with a simple store. For targets that support fast unaligned
473 memory, any naturally sized, unit aligned field can be done directly. */
474
475 offset = bitnum / unit;
476 bitpos = bitnum % unit;
477 byte_offset = (bitnum % BITS_PER_WORD) / BITS_PER_UNIT
478 + (offset * UNITS_PER_WORD);
479
480 if (bitpos == 0
481 && bitsize == GET_MODE_BITSIZE (fieldmode)
482 && (!MEM_P (op0)
483 ? ((GET_MODE_SIZE (fieldmode) >= UNITS_PER_WORD
484 || GET_MODE_SIZE (GET_MODE (op0)) == GET_MODE_SIZE (fieldmode))
485 && ((GET_MODE (op0) == fieldmode && byte_offset == 0)
486 || validate_subreg (fieldmode, GET_MODE (op0), op0,
487 byte_offset)))
488 : (! SLOW_UNALIGNED_ACCESS (fieldmode, MEM_ALIGN (op0))
489 || (offset * BITS_PER_UNIT % bitsize == 0
490 && MEM_ALIGN (op0) % GET_MODE_BITSIZE (fieldmode) == 0))))
491 {
492 if (MEM_P (op0))
493 op0 = adjust_address (op0, fieldmode, offset);
494 else if (GET_MODE (op0) != fieldmode)
495 op0 = simplify_gen_subreg (fieldmode, op0, GET_MODE (op0),
496 byte_offset);
497 emit_move_insn (op0, value);
498 return true;
499 }
500
501 /* Make sure we are playing with integral modes. Pun with subregs
502 if we aren't. This must come after the entire register case above,
503 since that case is valid for any mode. The following cases are only
504 valid for integral modes. */
505 {
506 enum machine_mode imode = int_mode_for_mode (GET_MODE (op0));
507 if (imode != GET_MODE (op0))
508 {
509 if (MEM_P (op0))
510 op0 = adjust_address (op0, imode, 0);
511 else
512 {
513 gcc_assert (imode != BLKmode);
514 op0 = gen_lowpart (imode, op0);
515 }
516 }
517 }
518
519 /* We may be accessing data outside the field, which means
520 we can alias adjacent data. */
521 /* ?? not always for C++0x memory model ?? */
522 if (MEM_P (op0))
523 {
524 op0 = shallow_copy_rtx (op0);
525 set_mem_alias_set (op0, 0);
526 set_mem_expr (op0, 0);
527 }
528
529 /* If OP0 is a register, BITPOS must count within a word.
530 But as we have it, it counts within whatever size OP0 now has.
531 On a bigendian machine, these are not the same, so convert. */
532 if (BYTES_BIG_ENDIAN
533 && !MEM_P (op0)
534 && unit > GET_MODE_BITSIZE (GET_MODE (op0)))
535 bitpos += unit - GET_MODE_BITSIZE (GET_MODE (op0));
536
537 /* Storing an lsb-aligned field in a register
538 can be done with a movestrict instruction. */
539
540 if (!MEM_P (op0)
541 && (BYTES_BIG_ENDIAN ? bitpos + bitsize == unit : bitpos == 0)
542 && bitsize == GET_MODE_BITSIZE (fieldmode)
543 && optab_handler (movstrict_optab, fieldmode) != CODE_FOR_nothing)
544 {
545 struct expand_operand ops[2];
546 enum insn_code icode = optab_handler (movstrict_optab, fieldmode);
547 rtx arg0 = op0;
548 unsigned HOST_WIDE_INT subreg_off;
549
550 if (GET_CODE (arg0) == SUBREG)
551 {
552 /* Else we've got some float mode source being extracted into
553 a different float mode destination -- this combination of
554 subregs results in Severe Tire Damage. */
555 gcc_assert (GET_MODE (SUBREG_REG (arg0)) == fieldmode
556 || GET_MODE_CLASS (fieldmode) == MODE_INT
557 || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT);
558 arg0 = SUBREG_REG (arg0);
559 }
560
561 subreg_off = (bitnum % BITS_PER_WORD) / BITS_PER_UNIT
562 + (offset * UNITS_PER_WORD);
563 if (validate_subreg (fieldmode, GET_MODE (arg0), arg0, subreg_off))
564 {
565 arg0 = gen_rtx_SUBREG (fieldmode, arg0, subreg_off);
566
567 create_fixed_operand (&ops[0], arg0);
568 /* Shrink the source operand to FIELDMODE. */
569 create_convert_operand_to (&ops[1], value, fieldmode, false);
570 if (maybe_expand_insn (icode, 2, ops))
571 return true;
572 }
573 }
574
575 /* Handle fields bigger than a word. */
576
577 if (bitsize > BITS_PER_WORD)
578 {
579 /* Here we transfer the words of the field
580 in the order least significant first.
581 This is because the most significant word is the one which may
582 be less than full.
583 However, only do that if the value is not BLKmode. */
584
585 unsigned int backwards = WORDS_BIG_ENDIAN && fieldmode != BLKmode;
586 unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
587 unsigned int i;
588 rtx last;
589
590 /* This is the mode we must force value to, so that there will be enough
591 subwords to extract. Note that fieldmode will often (always?) be
592 VOIDmode, because that is what store_field uses to indicate that this
593 is a bit field, but passing VOIDmode to operand_subword_force
594 is not allowed. */
595 fieldmode = GET_MODE (value);
596 if (fieldmode == VOIDmode)
597 fieldmode = smallest_mode_for_size (nwords * BITS_PER_WORD, MODE_INT);
598
599 last = get_last_insn ();
600 for (i = 0; i < nwords; i++)
601 {
602 /* If I is 0, use the low-order word in both field and target;
603 if I is 1, use the next to lowest word; and so on. */
604 unsigned int wordnum = (backwards
605 ? GET_MODE_SIZE (fieldmode) / UNITS_PER_WORD
606 - i - 1
607 : i);
608 unsigned int bit_offset = (backwards
609 ? MAX ((int) bitsize - ((int) i + 1)
610 * BITS_PER_WORD,
611 0)
612 : (int) i * BITS_PER_WORD);
613 rtx value_word = operand_subword_force (value, wordnum, fieldmode);
614 unsigned HOST_WIDE_INT new_bitsize =
615 MIN (BITS_PER_WORD, bitsize - i * BITS_PER_WORD);
616
617 /* If the remaining chunk doesn't have full wordsize we have
618 to make sure that for big endian machines the higher order
619 bits are used. */
620 if (new_bitsize < BITS_PER_WORD && BYTES_BIG_ENDIAN && !backwards)
621 value_word = simplify_expand_binop (word_mode, lshr_optab,
622 value_word,
623 GEN_INT (BITS_PER_WORD
624 - new_bitsize),
625 NULL_RTX, true,
626 OPTAB_LIB_WIDEN);
627
628 if (!store_bit_field_1 (op0, new_bitsize,
629 bitnum + bit_offset,
630 bitregion_start, bitregion_end,
631 word_mode,
632 value_word, fallback_p))
633 {
634 delete_insns_since (last);
635 return false;
636 }
637 }
638 return true;
639 }
640
641 /* From here on we can assume that the field to be stored in is
642 a full-word (whatever type that is), since it is shorter than a word. */
643
644 /* OFFSET is the number of words or bytes (UNIT says which)
645 from STR_RTX to the first word or byte containing part of the field. */
646
647 if (!MEM_P (op0))
648 {
649 if (offset != 0
650 || GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
651 {
652 if (!REG_P (op0))
653 {
654 /* Since this is a destination (lvalue), we can't copy
655 it to a pseudo. We can remove a SUBREG that does not
656 change the size of the operand. Such a SUBREG may
657 have been added above. */
658 gcc_assert (GET_CODE (op0) == SUBREG
659 && (GET_MODE_SIZE (GET_MODE (op0))
660 == GET_MODE_SIZE (GET_MODE (SUBREG_REG (op0)))));
661 op0 = SUBREG_REG (op0);
662 }
663 op0 = gen_rtx_SUBREG (mode_for_size (BITS_PER_WORD, MODE_INT, 0),
664 op0, (offset * UNITS_PER_WORD));
665 }
666 offset = 0;
667 }
668
669 /* If VALUE has a floating-point or complex mode, access it as an
670 integer of the corresponding size. This can occur on a machine
671 with 64 bit registers that uses SFmode for float. It can also
672 occur for unaligned float or complex fields. */
673 orig_value = value;
674 if (GET_MODE (value) != VOIDmode
675 && GET_MODE_CLASS (GET_MODE (value)) != MODE_INT
676 && GET_MODE_CLASS (GET_MODE (value)) != MODE_PARTIAL_INT)
677 {
678 value = gen_reg_rtx (int_mode_for_mode (GET_MODE (value)));
679 emit_move_insn (gen_lowpart (GET_MODE (orig_value), value), orig_value);
680 }
681
682 /* Now OFFSET is nonzero only if OP0 is memory
683 and is therefore always measured in bytes. */
684
685 if (HAVE_insv
686 && GET_MODE (value) != BLKmode
687 && bitsize > 0
688 && GET_MODE_BITSIZE (op_mode) >= bitsize
689 /* Do not use insv for volatile bitfields when
690 -fstrict-volatile-bitfields is in effect. */
691 && !(MEM_P (op0) && MEM_VOLATILE_P (op0)
692 && flag_strict_volatile_bitfields > 0)
693 && ! ((REG_P (op0) || GET_CODE (op0) == SUBREG)
694 && (bitsize + bitpos > GET_MODE_BITSIZE (op_mode)))
695 /* Do not use insv if the bit region is restricted and
696 op_mode integer at offset doesn't fit into the
697 restricted region. */
698 && !(MEM_P (op0) && bitregion_end
699 && bitnum - bitpos + GET_MODE_BITSIZE (op_mode)
700 > bitregion_end + 1))
701 {
702 struct expand_operand ops[4];
703 int xbitpos = bitpos;
704 rtx value1;
705 rtx xop0 = op0;
706 rtx last = get_last_insn ();
707 bool copy_back = false;
708
709 /* Add OFFSET into OP0's address. */
710 if (MEM_P (xop0))
711 xop0 = adjust_address (xop0, byte_mode, offset);
712
713 /* If xop0 is a register, we need it in OP_MODE
714 to make it acceptable to the format of insv. */
715 if (GET_CODE (xop0) == SUBREG)
716 /* We can't just change the mode, because this might clobber op0,
717 and we will need the original value of op0 if insv fails. */
718 xop0 = gen_rtx_SUBREG (op_mode, SUBREG_REG (xop0), SUBREG_BYTE (xop0));
719 if (REG_P (xop0) && GET_MODE (xop0) != op_mode)
720 xop0 = gen_lowpart_SUBREG (op_mode, xop0);
721
722 /* If the destination is a paradoxical subreg such that we need a
723 truncate to the inner mode, perform the insertion on a temporary and
724 truncate the result to the original destination. Note that we can't
725 just truncate the paradoxical subreg as (truncate:N (subreg:W (reg:N
726 X) 0)) is (reg:N X). */
727 if (GET_CODE (xop0) == SUBREG
728 && REG_P (SUBREG_REG (xop0))
729 && (!TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (SUBREG_REG (xop0)),
730 op_mode)))
731 {
732 rtx tem = gen_reg_rtx (op_mode);
733 emit_move_insn (tem, xop0);
734 xop0 = tem;
735 copy_back = true;
736 }
737
738 /* We have been counting XBITPOS within UNIT.
739 Count instead within the size of the register. */
740 if (BYTES_BIG_ENDIAN && !MEM_P (xop0))
741 xbitpos += GET_MODE_BITSIZE (op_mode) - unit;
742
743 unit = GET_MODE_BITSIZE (op_mode);
744
745 /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
746 "backwards" from the size of the unit we are inserting into.
747 Otherwise, we count bits from the most significant on a
748 BYTES/BITS_BIG_ENDIAN machine. */
749
750 if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
751 xbitpos = unit - bitsize - xbitpos;
752
753 /* Convert VALUE to op_mode (which insv insn wants) in VALUE1. */
754 value1 = value;
755 if (GET_MODE (value) != op_mode)
756 {
757 if (GET_MODE_BITSIZE (GET_MODE (value)) >= bitsize)
758 {
759 /* Optimization: Don't bother really extending VALUE
760 if it has all the bits we will actually use. However,
761 if we must narrow it, be sure we do it correctly. */
762
763 if (GET_MODE_SIZE (GET_MODE (value)) < GET_MODE_SIZE (op_mode))
764 {
765 rtx tmp;
766
767 tmp = simplify_subreg (op_mode, value1, GET_MODE (value), 0);
768 if (! tmp)
769 tmp = simplify_gen_subreg (op_mode,
770 force_reg (GET_MODE (value),
771 value1),
772 GET_MODE (value), 0);
773 value1 = tmp;
774 }
775 else
776 value1 = gen_lowpart (op_mode, value1);
777 }
778 else if (CONST_INT_P (value))
779 value1 = gen_int_mode (INTVAL (value), op_mode);
780 else
781 /* Parse phase is supposed to make VALUE's data type
782 match that of the component reference, which is a type
783 at least as wide as the field; so VALUE should have
784 a mode that corresponds to that type. */
785 gcc_assert (CONSTANT_P (value));
786 }
787
788 create_fixed_operand (&ops[0], xop0);
789 create_integer_operand (&ops[1], bitsize);
790 create_integer_operand (&ops[2], xbitpos);
791 create_input_operand (&ops[3], value1, op_mode);
792 if (maybe_expand_insn (CODE_FOR_insv, 4, ops))
793 {
794 if (copy_back)
795 convert_move (op0, xop0, true);
796 return true;
797 }
798 delete_insns_since (last);
799 }
800
801 /* If OP0 is a memory, try copying it to a register and seeing if a
802 cheap register alternative is available. */
803 if (HAVE_insv && MEM_P (op0))
804 {
805 enum machine_mode bestmode;
806 unsigned HOST_WIDE_INT maxbits = MAX_FIXED_MODE_SIZE;
807
808 if (bitregion_end)
809 maxbits = bitregion_end - bitregion_start + 1;
810
811 /* Get the mode to use for inserting into this field. If OP0 is
812 BLKmode, get the smallest mode consistent with the alignment. If
813 OP0 is a non-BLKmode object that is no wider than OP_MODE, use its
814 mode. Otherwise, use the smallest mode containing the field. */
815
816 if (GET_MODE (op0) == BLKmode
817 || GET_MODE_BITSIZE (GET_MODE (op0)) > maxbits
818 || (op_mode != MAX_MACHINE_MODE
819 && GET_MODE_SIZE (GET_MODE (op0)) > GET_MODE_SIZE (op_mode)))
820 bestmode = get_best_mode (bitsize, bitnum,
821 bitregion_start, bitregion_end,
822 MEM_ALIGN (op0),
823 (op_mode == MAX_MACHINE_MODE
824 ? VOIDmode : op_mode),
825 MEM_VOLATILE_P (op0));
826 else
827 bestmode = GET_MODE (op0);
828
829 if (bestmode != VOIDmode
830 && GET_MODE_SIZE (bestmode) >= GET_MODE_SIZE (fieldmode)
831 && !(SLOW_UNALIGNED_ACCESS (bestmode, MEM_ALIGN (op0))
832 && GET_MODE_BITSIZE (bestmode) > MEM_ALIGN (op0)))
833 {
834 rtx last, tempreg, xop0;
835 unsigned HOST_WIDE_INT xoffset, xbitpos;
836
837 last = get_last_insn ();
838
839 /* Adjust address to point to the containing unit of
840 that mode. Compute the offset as a multiple of this unit,
841 counting in bytes. */
842 unit = GET_MODE_BITSIZE (bestmode);
843 xoffset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
844 xbitpos = bitnum % unit;
845 xop0 = adjust_address (op0, bestmode, xoffset);
846
847 /* Fetch that unit, store the bitfield in it, then store
848 the unit. */
849 tempreg = copy_to_reg (xop0);
850 if (store_bit_field_1 (tempreg, bitsize, xbitpos,
851 bitregion_start, bitregion_end,
852 fieldmode, orig_value, false))
853 {
854 emit_move_insn (xop0, tempreg);
855 return true;
856 }
857 delete_insns_since (last);
858 }
859 }
860
861 if (!fallback_p)
862 return false;
863
864 store_fixed_bit_field (op0, offset, bitsize, bitpos,
865 bitregion_start, bitregion_end, value);
866 return true;
867 }
868
869 /* Generate code to store value from rtx VALUE
870 into a bit-field within structure STR_RTX
871 containing BITSIZE bits starting at bit BITNUM.
872
873 BITREGION_START is bitpos of the first bitfield in this region.
874 BITREGION_END is the bitpos of the ending bitfield in this region.
875 These two fields are 0, if the C++ memory model does not apply,
876 or we are not interested in keeping track of bitfield regions.
877
878 FIELDMODE is the machine-mode of the FIELD_DECL node for this field. */
879
880 void
881 store_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
882 unsigned HOST_WIDE_INT bitnum,
883 unsigned HOST_WIDE_INT bitregion_start,
884 unsigned HOST_WIDE_INT bitregion_end,
885 enum machine_mode fieldmode,
886 rtx value)
887 {
888 /* Under the C++0x memory model, we must not touch bits outside the
889 bit region. Adjust the address to start at the beginning of the
890 bit region. */
891 if (MEM_P (str_rtx) && bitregion_start > 0)
892 {
893 enum machine_mode bestmode;
894 enum machine_mode op_mode;
895 unsigned HOST_WIDE_INT offset;
896
897 op_mode = mode_for_extraction (EP_insv, 3);
898 if (op_mode == MAX_MACHINE_MODE)
899 op_mode = VOIDmode;
900
901 gcc_assert ((bitregion_start % BITS_PER_UNIT) == 0);
902
903 offset = bitregion_start / BITS_PER_UNIT;
904 bitnum -= bitregion_start;
905 bitregion_end -= bitregion_start;
906 bitregion_start = 0;
907 bestmode = get_best_mode (bitsize, bitnum,
908 bitregion_start, bitregion_end,
909 MEM_ALIGN (str_rtx),
910 op_mode,
911 MEM_VOLATILE_P (str_rtx));
912 str_rtx = adjust_address (str_rtx, bestmode, offset);
913 }
914
915 if (!store_bit_field_1 (str_rtx, bitsize, bitnum,
916 bitregion_start, bitregion_end,
917 fieldmode, value, true))
918 gcc_unreachable ();
919 }
920 \f
921 /* Use shifts and boolean operations to store VALUE
922 into a bit field of width BITSIZE
923 in a memory location specified by OP0 except offset by OFFSET bytes.
924 (OFFSET must be 0 if OP0 is a register.)
925 The field starts at position BITPOS within the byte.
926 (If OP0 is a register, it may be a full word or a narrower mode,
927 but BITPOS still counts within a full word,
928 which is significant on bigendian machines.) */
929
930 static void
931 store_fixed_bit_field (rtx op0, unsigned HOST_WIDE_INT offset,
932 unsigned HOST_WIDE_INT bitsize,
933 unsigned HOST_WIDE_INT bitpos,
934 unsigned HOST_WIDE_INT bitregion_start,
935 unsigned HOST_WIDE_INT bitregion_end,
936 rtx value)
937 {
938 enum machine_mode mode;
939 unsigned int total_bits = BITS_PER_WORD;
940 rtx temp;
941 int all_zero = 0;
942 int all_one = 0;
943
944 /* There is a case not handled here:
945 a structure with a known alignment of just a halfword
946 and a field split across two aligned halfwords within the structure.
947 Or likewise a structure with a known alignment of just a byte
948 and a field split across two bytes.
949 Such cases are not supposed to be able to occur. */
950
951 if (REG_P (op0) || GET_CODE (op0) == SUBREG)
952 {
953 gcc_assert (!offset);
954 /* Special treatment for a bit field split across two registers. */
955 if (bitsize + bitpos > BITS_PER_WORD)
956 {
957 store_split_bit_field (op0, bitsize, bitpos,
958 bitregion_start, bitregion_end,
959 value);
960 return;
961 }
962 }
963 else
964 {
965 unsigned HOST_WIDE_INT maxbits = MAX_FIXED_MODE_SIZE;
966
967 if (bitregion_end)
968 maxbits = bitregion_end - bitregion_start + 1;
969
970 /* Get the proper mode to use for this field. We want a mode that
971 includes the entire field. If such a mode would be larger than
972 a word, we won't be doing the extraction the normal way.
973 We don't want a mode bigger than the destination. */
974
975 mode = GET_MODE (op0);
976 if (GET_MODE_BITSIZE (mode) == 0
977 || GET_MODE_BITSIZE (mode) > GET_MODE_BITSIZE (word_mode))
978 mode = word_mode;
979
980 if (MEM_VOLATILE_P (op0)
981 && GET_MODE_BITSIZE (GET_MODE (op0)) > 0
982 && GET_MODE_BITSIZE (GET_MODE (op0)) <= maxbits
983 && flag_strict_volatile_bitfields > 0)
984 mode = GET_MODE (op0);
985 else
986 mode = get_best_mode (bitsize, bitpos + offset * BITS_PER_UNIT,
987 bitregion_start, bitregion_end,
988 MEM_ALIGN (op0), mode, MEM_VOLATILE_P (op0));
989
990 if (mode == VOIDmode)
991 {
992 /* The only way this should occur is if the field spans word
993 boundaries. */
994 store_split_bit_field (op0, bitsize, bitpos + offset * BITS_PER_UNIT,
995 bitregion_start, bitregion_end, value);
996 return;
997 }
998
999 total_bits = GET_MODE_BITSIZE (mode);
1000
1001 /* Make sure bitpos is valid for the chosen mode. Adjust BITPOS to
1002 be in the range 0 to total_bits-1, and put any excess bytes in
1003 OFFSET. */
1004 if (bitpos >= total_bits)
1005 {
1006 offset += (bitpos / total_bits) * (total_bits / BITS_PER_UNIT);
1007 bitpos -= ((bitpos / total_bits) * (total_bits / BITS_PER_UNIT)
1008 * BITS_PER_UNIT);
1009 }
1010
1011 /* Get ref to an aligned byte, halfword, or word containing the field.
1012 Adjust BITPOS to be position within a word,
1013 and OFFSET to be the offset of that word.
1014 Then alter OP0 to refer to that word. */
1015 bitpos += (offset % (total_bits / BITS_PER_UNIT)) * BITS_PER_UNIT;
1016 offset -= (offset % (total_bits / BITS_PER_UNIT));
1017 op0 = adjust_address (op0, mode, offset);
1018 }
1019
1020 mode = GET_MODE (op0);
1021
1022 /* Now MODE is either some integral mode for a MEM as OP0,
1023 or is a full-word for a REG as OP0. TOTAL_BITS corresponds.
1024 The bit field is contained entirely within OP0.
1025 BITPOS is the starting bit number within OP0.
1026 (OP0's mode may actually be narrower than MODE.) */
1027
1028 if (BYTES_BIG_ENDIAN)
1029 /* BITPOS is the distance between our msb
1030 and that of the containing datum.
1031 Convert it to the distance from the lsb. */
1032 bitpos = total_bits - bitsize - bitpos;
1033
1034 /* Now BITPOS is always the distance between our lsb
1035 and that of OP0. */
1036
1037 /* Shift VALUE left by BITPOS bits. If VALUE is not constant,
1038 we must first convert its mode to MODE. */
1039
1040 if (CONST_INT_P (value))
1041 {
1042 HOST_WIDE_INT v = INTVAL (value);
1043
1044 if (bitsize < HOST_BITS_PER_WIDE_INT)
1045 v &= ((HOST_WIDE_INT) 1 << bitsize) - 1;
1046
1047 if (v == 0)
1048 all_zero = 1;
1049 else if ((bitsize < HOST_BITS_PER_WIDE_INT
1050 && v == ((HOST_WIDE_INT) 1 << bitsize) - 1)
1051 || (bitsize == HOST_BITS_PER_WIDE_INT && v == -1))
1052 all_one = 1;
1053
1054 value = lshift_value (mode, value, bitpos, bitsize);
1055 }
1056 else
1057 {
1058 int must_and = (GET_MODE_BITSIZE (GET_MODE (value)) != bitsize
1059 && bitpos + bitsize != GET_MODE_BITSIZE (mode));
1060
1061 if (GET_MODE (value) != mode)
1062 value = convert_to_mode (mode, value, 1);
1063
1064 if (must_and)
1065 value = expand_binop (mode, and_optab, value,
1066 mask_rtx (mode, 0, bitsize, 0),
1067 NULL_RTX, 1, OPTAB_LIB_WIDEN);
1068 if (bitpos > 0)
1069 value = expand_shift (LSHIFT_EXPR, mode, value,
1070 bitpos, NULL_RTX, 1);
1071 }
1072
1073 /* Now clear the chosen bits in OP0,
1074 except that if VALUE is -1 we need not bother. */
1075 /* We keep the intermediates in registers to allow CSE to combine
1076 consecutive bitfield assignments. */
1077
1078 temp = force_reg (mode, op0);
1079
1080 if (! all_one)
1081 {
1082 temp = expand_binop (mode, and_optab, temp,
1083 mask_rtx (mode, bitpos, bitsize, 1),
1084 NULL_RTX, 1, OPTAB_LIB_WIDEN);
1085 temp = force_reg (mode, temp);
1086 }
1087
1088 /* Now logical-or VALUE into OP0, unless it is zero. */
1089
1090 if (! all_zero)
1091 {
1092 temp = expand_binop (mode, ior_optab, temp, value,
1093 NULL_RTX, 1, OPTAB_LIB_WIDEN);
1094 temp = force_reg (mode, temp);
1095 }
1096
1097 if (op0 != temp)
1098 {
1099 op0 = copy_rtx (op0);
1100 emit_move_insn (op0, temp);
1101 }
1102 }
1103 \f
1104 /* Store a bit field that is split across multiple accessible memory objects.
1105
1106 OP0 is the REG, SUBREG or MEM rtx for the first of the objects.
1107 BITSIZE is the field width; BITPOS the position of its first bit
1108 (within the word).
1109 VALUE is the value to store.
1110
1111 This does not yet handle fields wider than BITS_PER_WORD. */
1112
1113 static void
1114 store_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1115 unsigned HOST_WIDE_INT bitpos,
1116 unsigned HOST_WIDE_INT bitregion_start,
1117 unsigned HOST_WIDE_INT bitregion_end,
1118 rtx value)
1119 {
1120 unsigned int unit;
1121 unsigned int bitsdone = 0;
1122
1123 /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1124 much at a time. */
1125 if (REG_P (op0) || GET_CODE (op0) == SUBREG)
1126 unit = BITS_PER_WORD;
1127 else
1128 unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1129
1130 /* If VALUE is a constant other than a CONST_INT, get it into a register in
1131 WORD_MODE. If we can do this using gen_lowpart_common, do so. Note
1132 that VALUE might be a floating-point constant. */
1133 if (CONSTANT_P (value) && !CONST_INT_P (value))
1134 {
1135 rtx word = gen_lowpart_common (word_mode, value);
1136
1137 if (word && (value != word))
1138 value = word;
1139 else
1140 value = gen_lowpart_common (word_mode,
1141 force_reg (GET_MODE (value) != VOIDmode
1142 ? GET_MODE (value)
1143 : word_mode, value));
1144 }
1145
1146 while (bitsdone < bitsize)
1147 {
1148 unsigned HOST_WIDE_INT thissize;
1149 rtx part, word;
1150 unsigned HOST_WIDE_INT thispos;
1151 unsigned HOST_WIDE_INT offset;
1152
1153 offset = (bitpos + bitsdone) / unit;
1154 thispos = (bitpos + bitsdone) % unit;
1155
1156 /* When region of bytes we can touch is restricted, decrease
1157 UNIT close to the end of the region as needed. */
1158 if (bitregion_end
1159 && unit > BITS_PER_UNIT
1160 && bitpos + bitsdone - thispos + unit > bitregion_end + 1)
1161 {
1162 unit = unit / 2;
1163 continue;
1164 }
1165
1166 /* THISSIZE must not overrun a word boundary. Otherwise,
1167 store_fixed_bit_field will call us again, and we will mutually
1168 recurse forever. */
1169 thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1170 thissize = MIN (thissize, unit - thispos);
1171
1172 if (BYTES_BIG_ENDIAN)
1173 {
1174 int total_bits;
1175
1176 /* We must do an endian conversion exactly the same way as it is
1177 done in extract_bit_field, so that the two calls to
1178 extract_fixed_bit_field will have comparable arguments. */
1179 if (!MEM_P (value) || GET_MODE (value) == BLKmode)
1180 total_bits = BITS_PER_WORD;
1181 else
1182 total_bits = GET_MODE_BITSIZE (GET_MODE (value));
1183
1184 /* Fetch successively less significant portions. */
1185 if (CONST_INT_P (value))
1186 part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1187 >> (bitsize - bitsdone - thissize))
1188 & (((HOST_WIDE_INT) 1 << thissize) - 1));
1189 else
1190 /* The args are chosen so that the last part includes the
1191 lsb. Give extract_bit_field the value it needs (with
1192 endianness compensation) to fetch the piece we want. */
1193 part = extract_fixed_bit_field (word_mode, value, 0, thissize,
1194 total_bits - bitsize + bitsdone,
1195 NULL_RTX, 1, false);
1196 }
1197 else
1198 {
1199 /* Fetch successively more significant portions. */
1200 if (CONST_INT_P (value))
1201 part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1202 >> bitsdone)
1203 & (((HOST_WIDE_INT) 1 << thissize) - 1));
1204 else
1205 part = extract_fixed_bit_field (word_mode, value, 0, thissize,
1206 bitsdone, NULL_RTX, 1, false);
1207 }
1208
1209 /* If OP0 is a register, then handle OFFSET here.
1210
1211 When handling multiword bitfields, extract_bit_field may pass
1212 down a word_mode SUBREG of a larger REG for a bitfield that actually
1213 crosses a word boundary. Thus, for a SUBREG, we must find
1214 the current word starting from the base register. */
1215 if (GET_CODE (op0) == SUBREG)
1216 {
1217 int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset;
1218 enum machine_mode sub_mode = GET_MODE (SUBREG_REG (op0));
1219 if (sub_mode != BLKmode && GET_MODE_SIZE (sub_mode) < UNITS_PER_WORD)
1220 word = word_offset ? const0_rtx : op0;
1221 else
1222 word = operand_subword_force (SUBREG_REG (op0), word_offset,
1223 GET_MODE (SUBREG_REG (op0)));
1224 offset = 0;
1225 }
1226 else if (REG_P (op0))
1227 {
1228 enum machine_mode op0_mode = GET_MODE (op0);
1229 if (op0_mode != BLKmode && GET_MODE_SIZE (op0_mode) < UNITS_PER_WORD)
1230 word = offset ? const0_rtx : op0;
1231 else
1232 word = operand_subword_force (op0, offset, GET_MODE (op0));
1233 offset = 0;
1234 }
1235 else
1236 word = op0;
1237
1238 /* OFFSET is in UNITs, and UNIT is in bits.
1239 store_fixed_bit_field wants offset in bytes. If WORD is const0_rtx,
1240 it is just an out-of-bounds access. Ignore it. */
1241 if (word != const0_rtx)
1242 store_fixed_bit_field (word, offset * unit / BITS_PER_UNIT, thissize,
1243 thispos, bitregion_start, bitregion_end, part);
1244 bitsdone += thissize;
1245 }
1246 }
1247 \f
1248 /* A subroutine of extract_bit_field_1 that converts return value X
1249 to either MODE or TMODE. MODE, TMODE and UNSIGNEDP are arguments
1250 to extract_bit_field. */
1251
1252 static rtx
1253 convert_extracted_bit_field (rtx x, enum machine_mode mode,
1254 enum machine_mode tmode, bool unsignedp)
1255 {
1256 if (GET_MODE (x) == tmode || GET_MODE (x) == mode)
1257 return x;
1258
1259 /* If the x mode is not a scalar integral, first convert to the
1260 integer mode of that size and then access it as a floating-point
1261 value via a SUBREG. */
1262 if (!SCALAR_INT_MODE_P (tmode))
1263 {
1264 enum machine_mode smode;
1265
1266 smode = mode_for_size (GET_MODE_BITSIZE (tmode), MODE_INT, 0);
1267 x = convert_to_mode (smode, x, unsignedp);
1268 x = force_reg (smode, x);
1269 return gen_lowpart (tmode, x);
1270 }
1271
1272 return convert_to_mode (tmode, x, unsignedp);
1273 }
1274
1275 /* A subroutine of extract_bit_field, with the same arguments.
1276 If FALLBACK_P is true, fall back to extract_fixed_bit_field
1277 if we can find no other means of implementing the operation.
1278 if FALLBACK_P is false, return NULL instead. */
1279
1280 static rtx
1281 extract_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1282 unsigned HOST_WIDE_INT bitnum,
1283 int unsignedp, bool packedp, rtx target,
1284 enum machine_mode mode, enum machine_mode tmode,
1285 bool fallback_p)
1286 {
1287 unsigned int unit
1288 = (MEM_P (str_rtx)) ? BITS_PER_UNIT : BITS_PER_WORD;
1289 unsigned HOST_WIDE_INT offset, bitpos;
1290 rtx op0 = str_rtx;
1291 enum machine_mode int_mode;
1292 enum machine_mode ext_mode;
1293 enum machine_mode mode1;
1294 int byte_offset;
1295
1296 if (tmode == VOIDmode)
1297 tmode = mode;
1298
1299 while (GET_CODE (op0) == SUBREG)
1300 {
1301 bitnum += SUBREG_BYTE (op0) * BITS_PER_UNIT;
1302 op0 = SUBREG_REG (op0);
1303 }
1304
1305 /* If we have an out-of-bounds access to a register, just return an
1306 uninitialized register of the required mode. This can occur if the
1307 source code contains an out-of-bounds access to a small array. */
1308 if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
1309 return gen_reg_rtx (tmode);
1310
1311 if (REG_P (op0)
1312 && mode == GET_MODE (op0)
1313 && bitnum == 0
1314 && bitsize == GET_MODE_BITSIZE (GET_MODE (op0)))
1315 {
1316 /* We're trying to extract a full register from itself. */
1317 return op0;
1318 }
1319
1320 /* See if we can get a better vector mode before extracting. */
1321 if (VECTOR_MODE_P (GET_MODE (op0))
1322 && !MEM_P (op0)
1323 && GET_MODE_INNER (GET_MODE (op0)) != tmode)
1324 {
1325 enum machine_mode new_mode;
1326
1327 if (GET_MODE_CLASS (tmode) == MODE_FLOAT)
1328 new_mode = MIN_MODE_VECTOR_FLOAT;
1329 else if (GET_MODE_CLASS (tmode) == MODE_FRACT)
1330 new_mode = MIN_MODE_VECTOR_FRACT;
1331 else if (GET_MODE_CLASS (tmode) == MODE_UFRACT)
1332 new_mode = MIN_MODE_VECTOR_UFRACT;
1333 else if (GET_MODE_CLASS (tmode) == MODE_ACCUM)
1334 new_mode = MIN_MODE_VECTOR_ACCUM;
1335 else if (GET_MODE_CLASS (tmode) == MODE_UACCUM)
1336 new_mode = MIN_MODE_VECTOR_UACCUM;
1337 else
1338 new_mode = MIN_MODE_VECTOR_INT;
1339
1340 for (; new_mode != VOIDmode ; new_mode = GET_MODE_WIDER_MODE (new_mode))
1341 if (GET_MODE_SIZE (new_mode) == GET_MODE_SIZE (GET_MODE (op0))
1342 && targetm.vector_mode_supported_p (new_mode))
1343 break;
1344 if (new_mode != VOIDmode)
1345 op0 = gen_lowpart (new_mode, op0);
1346 }
1347
1348 /* Use vec_extract patterns for extracting parts of vectors whenever
1349 available. */
1350 if (VECTOR_MODE_P (GET_MODE (op0))
1351 && !MEM_P (op0)
1352 && optab_handler (vec_extract_optab, GET_MODE (op0)) != CODE_FOR_nothing
1353 && ((bitnum + bitsize - 1) / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
1354 == bitnum / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
1355 {
1356 struct expand_operand ops[3];
1357 enum machine_mode outermode = GET_MODE (op0);
1358 enum machine_mode innermode = GET_MODE_INNER (outermode);
1359 enum insn_code icode = optab_handler (vec_extract_optab, outermode);
1360 unsigned HOST_WIDE_INT pos = bitnum / GET_MODE_BITSIZE (innermode);
1361
1362 create_output_operand (&ops[0], target, innermode);
1363 create_input_operand (&ops[1], op0, outermode);
1364 create_integer_operand (&ops[2], pos);
1365 if (maybe_expand_insn (icode, 3, ops))
1366 {
1367 target = ops[0].value;
1368 if (GET_MODE (target) != mode)
1369 return gen_lowpart (tmode, target);
1370 return target;
1371 }
1372 }
1373
1374 /* Make sure we are playing with integral modes. Pun with subregs
1375 if we aren't. */
1376 {
1377 enum machine_mode imode = int_mode_for_mode (GET_MODE (op0));
1378 if (imode != GET_MODE (op0))
1379 {
1380 if (MEM_P (op0))
1381 op0 = adjust_address (op0, imode, 0);
1382 else if (imode != BLKmode)
1383 {
1384 op0 = gen_lowpart (imode, op0);
1385
1386 /* If we got a SUBREG, force it into a register since we
1387 aren't going to be able to do another SUBREG on it. */
1388 if (GET_CODE (op0) == SUBREG)
1389 op0 = force_reg (imode, op0);
1390 }
1391 else if (REG_P (op0))
1392 {
1393 rtx reg, subreg;
1394 imode = smallest_mode_for_size (GET_MODE_BITSIZE (GET_MODE (op0)),
1395 MODE_INT);
1396 reg = gen_reg_rtx (imode);
1397 subreg = gen_lowpart_SUBREG (GET_MODE (op0), reg);
1398 emit_move_insn (subreg, op0);
1399 op0 = reg;
1400 bitnum += SUBREG_BYTE (subreg) * BITS_PER_UNIT;
1401 }
1402 else
1403 {
1404 rtx mem = assign_stack_temp (GET_MODE (op0),
1405 GET_MODE_SIZE (GET_MODE (op0)));
1406 emit_move_insn (mem, op0);
1407 op0 = adjust_address (mem, BLKmode, 0);
1408 }
1409 }
1410 }
1411
1412 /* We may be accessing data outside the field, which means
1413 we can alias adjacent data. */
1414 if (MEM_P (op0))
1415 {
1416 op0 = shallow_copy_rtx (op0);
1417 set_mem_alias_set (op0, 0);
1418 set_mem_expr (op0, 0);
1419 }
1420
1421 /* Extraction of a full-word or multi-word value from a structure
1422 in a register or aligned memory can be done with just a SUBREG.
1423 A subword value in the least significant part of a register
1424 can also be extracted with a SUBREG. For this, we need the
1425 byte offset of the value in op0. */
1426
1427 bitpos = bitnum % unit;
1428 offset = bitnum / unit;
1429 byte_offset = bitpos / BITS_PER_UNIT + offset * UNITS_PER_WORD;
1430
1431 /* If OP0 is a register, BITPOS must count within a word.
1432 But as we have it, it counts within whatever size OP0 now has.
1433 On a bigendian machine, these are not the same, so convert. */
1434 if (BYTES_BIG_ENDIAN
1435 && !MEM_P (op0)
1436 && unit > GET_MODE_BITSIZE (GET_MODE (op0)))
1437 bitpos += unit - GET_MODE_BITSIZE (GET_MODE (op0));
1438
1439 /* ??? We currently assume TARGET is at least as big as BITSIZE.
1440 If that's wrong, the solution is to test for it and set TARGET to 0
1441 if needed. */
1442
1443 /* Only scalar integer modes can be converted via subregs. There is an
1444 additional problem for FP modes here in that they can have a precision
1445 which is different from the size. mode_for_size uses precision, but
1446 we want a mode based on the size, so we must avoid calling it for FP
1447 modes. */
1448 mode1 = (SCALAR_INT_MODE_P (tmode)
1449 ? mode_for_size (bitsize, GET_MODE_CLASS (tmode), 0)
1450 : mode);
1451
1452 /* If the bitfield is volatile, we need to make sure the access
1453 remains on a type-aligned boundary. */
1454 if (GET_CODE (op0) == MEM
1455 && MEM_VOLATILE_P (op0)
1456 && GET_MODE_BITSIZE (GET_MODE (op0)) > 0
1457 && flag_strict_volatile_bitfields > 0)
1458 goto no_subreg_mode_swap;
1459
1460 if (((bitsize >= BITS_PER_WORD && bitsize == GET_MODE_BITSIZE (mode)
1461 && bitpos % BITS_PER_WORD == 0)
1462 || (mode1 != BLKmode
1463 /* ??? The big endian test here is wrong. This is correct
1464 if the value is in a register, and if mode_for_size is not
1465 the same mode as op0. This causes us to get unnecessarily
1466 inefficient code from the Thumb port when -mbig-endian. */
1467 && (BYTES_BIG_ENDIAN
1468 ? bitpos + bitsize == BITS_PER_WORD
1469 : bitpos == 0)))
1470 && ((!MEM_P (op0)
1471 && TRULY_NOOP_TRUNCATION_MODES_P (mode1, GET_MODE (op0))
1472 && GET_MODE_SIZE (mode1) != 0
1473 && byte_offset % GET_MODE_SIZE (mode1) == 0)
1474 || (MEM_P (op0)
1475 && (! SLOW_UNALIGNED_ACCESS (mode, MEM_ALIGN (op0))
1476 || (offset * BITS_PER_UNIT % bitsize == 0
1477 && MEM_ALIGN (op0) % bitsize == 0)))))
1478 {
1479 if (MEM_P (op0))
1480 op0 = adjust_address (op0, mode1, offset);
1481 else if (mode1 != GET_MODE (op0))
1482 {
1483 rtx sub = simplify_gen_subreg (mode1, op0, GET_MODE (op0),
1484 byte_offset);
1485 if (sub == NULL)
1486 goto no_subreg_mode_swap;
1487 op0 = sub;
1488 }
1489 if (mode1 != mode)
1490 return convert_to_mode (tmode, op0, unsignedp);
1491 return op0;
1492 }
1493 no_subreg_mode_swap:
1494
1495 /* Handle fields bigger than a word. */
1496
1497 if (bitsize > BITS_PER_WORD)
1498 {
1499 /* Here we transfer the words of the field
1500 in the order least significant first.
1501 This is because the most significant word is the one which may
1502 be less than full. */
1503
1504 unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
1505 unsigned int i;
1506
1507 if (target == 0 || !REG_P (target) || !valid_multiword_target_p (target))
1508 target = gen_reg_rtx (mode);
1509
1510 /* Indicate for flow that the entire target reg is being set. */
1511 emit_clobber (target);
1512
1513 for (i = 0; i < nwords; i++)
1514 {
1515 /* If I is 0, use the low-order word in both field and target;
1516 if I is 1, use the next to lowest word; and so on. */
1517 /* Word number in TARGET to use. */
1518 unsigned int wordnum
1519 = (WORDS_BIG_ENDIAN
1520 ? GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD - i - 1
1521 : i);
1522 /* Offset from start of field in OP0. */
1523 unsigned int bit_offset = (WORDS_BIG_ENDIAN
1524 ? MAX (0, ((int) bitsize - ((int) i + 1)
1525 * (int) BITS_PER_WORD))
1526 : (int) i * BITS_PER_WORD);
1527 rtx target_part = operand_subword (target, wordnum, 1, VOIDmode);
1528 rtx result_part
1529 = extract_bit_field (op0, MIN (BITS_PER_WORD,
1530 bitsize - i * BITS_PER_WORD),
1531 bitnum + bit_offset, 1, false, target_part, mode,
1532 word_mode);
1533
1534 gcc_assert (target_part);
1535
1536 if (result_part != target_part)
1537 emit_move_insn (target_part, result_part);
1538 }
1539
1540 if (unsignedp)
1541 {
1542 /* Unless we've filled TARGET, the upper regs in a multi-reg value
1543 need to be zero'd out. */
1544 if (GET_MODE_SIZE (GET_MODE (target)) > nwords * UNITS_PER_WORD)
1545 {
1546 unsigned int i, total_words;
1547
1548 total_words = GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD;
1549 for (i = nwords; i < total_words; i++)
1550 emit_move_insn
1551 (operand_subword (target,
1552 WORDS_BIG_ENDIAN ? total_words - i - 1 : i,
1553 1, VOIDmode),
1554 const0_rtx);
1555 }
1556 return target;
1557 }
1558
1559 /* Signed bit field: sign-extend with two arithmetic shifts. */
1560 target = expand_shift (LSHIFT_EXPR, mode, target,
1561 GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1562 return expand_shift (RSHIFT_EXPR, mode, target,
1563 GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1564 }
1565
1566 /* From here on we know the desired field is smaller than a word. */
1567
1568 /* Check if there is a correspondingly-sized integer field, so we can
1569 safely extract it as one size of integer, if necessary; then
1570 truncate or extend to the size that is wanted; then use SUBREGs or
1571 convert_to_mode to get one of the modes we really wanted. */
1572
1573 int_mode = int_mode_for_mode (tmode);
1574 if (int_mode == BLKmode)
1575 int_mode = int_mode_for_mode (mode);
1576 /* Should probably push op0 out to memory and then do a load. */
1577 gcc_assert (int_mode != BLKmode);
1578
1579 /* OFFSET is the number of words or bytes (UNIT says which)
1580 from STR_RTX to the first word or byte containing part of the field. */
1581 if (!MEM_P (op0))
1582 {
1583 if (offset != 0
1584 || GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
1585 {
1586 if (!REG_P (op0))
1587 op0 = copy_to_reg (op0);
1588 op0 = gen_rtx_SUBREG (mode_for_size (BITS_PER_WORD, MODE_INT, 0),
1589 op0, (offset * UNITS_PER_WORD));
1590 }
1591 offset = 0;
1592 }
1593
1594 /* Now OFFSET is nonzero only for memory operands. */
1595 ext_mode = mode_for_extraction (unsignedp ? EP_extzv : EP_extv, 0);
1596 if (ext_mode != MAX_MACHINE_MODE
1597 && bitsize > 0
1598 && GET_MODE_BITSIZE (ext_mode) >= bitsize
1599 /* Do not use extv/extzv for volatile bitfields when
1600 -fstrict-volatile-bitfields is in effect. */
1601 && !(MEM_P (op0) && MEM_VOLATILE_P (op0)
1602 && flag_strict_volatile_bitfields > 0)
1603 /* If op0 is a register, we need it in EXT_MODE to make it
1604 acceptable to the format of ext(z)v. */
1605 && !(GET_CODE (op0) == SUBREG && GET_MODE (op0) != ext_mode)
1606 && !((REG_P (op0) || GET_CODE (op0) == SUBREG)
1607 && (bitsize + bitpos > GET_MODE_BITSIZE (ext_mode))))
1608 {
1609 struct expand_operand ops[4];
1610 unsigned HOST_WIDE_INT xbitpos = bitpos, xoffset = offset;
1611 rtx xop0 = op0;
1612 rtx xtarget = target;
1613 rtx xspec_target = target;
1614 rtx xspec_target_subreg = 0;
1615
1616 /* If op0 is a register, we need it in EXT_MODE to make it
1617 acceptable to the format of ext(z)v. */
1618 if (REG_P (xop0) && GET_MODE (xop0) != ext_mode)
1619 xop0 = gen_lowpart_SUBREG (ext_mode, xop0);
1620 if (MEM_P (xop0))
1621 /* Get ref to first byte containing part of the field. */
1622 xop0 = adjust_address (xop0, byte_mode, xoffset);
1623
1624 /* Now convert from counting within UNIT to counting in EXT_MODE. */
1625 if (BYTES_BIG_ENDIAN && !MEM_P (xop0))
1626 xbitpos += GET_MODE_BITSIZE (ext_mode) - unit;
1627
1628 unit = GET_MODE_BITSIZE (ext_mode);
1629
1630 /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
1631 "backwards" from the size of the unit we are extracting from.
1632 Otherwise, we count bits from the most significant on a
1633 BYTES/BITS_BIG_ENDIAN machine. */
1634
1635 if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1636 xbitpos = unit - bitsize - xbitpos;
1637
1638 if (xtarget == 0)
1639 xtarget = xspec_target = gen_reg_rtx (tmode);
1640
1641 if (GET_MODE (xtarget) != ext_mode)
1642 {
1643 /* Don't use LHS paradoxical subreg if explicit truncation is needed
1644 between the mode of the extraction (word_mode) and the target
1645 mode. Instead, create a temporary and use convert_move to set
1646 the target. */
1647 if (REG_P (xtarget)
1648 && TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (xtarget), ext_mode))
1649 {
1650 xtarget = gen_lowpart (ext_mode, xtarget);
1651 if (GET_MODE_PRECISION (ext_mode)
1652 > GET_MODE_PRECISION (GET_MODE (xspec_target)))
1653 xspec_target_subreg = xtarget;
1654 }
1655 else
1656 xtarget = gen_reg_rtx (ext_mode);
1657 }
1658
1659 create_output_operand (&ops[0], xtarget, ext_mode);
1660 create_fixed_operand (&ops[1], xop0);
1661 create_integer_operand (&ops[2], bitsize);
1662 create_integer_operand (&ops[3], xbitpos);
1663 if (maybe_expand_insn (unsignedp ? CODE_FOR_extzv : CODE_FOR_extv,
1664 4, ops))
1665 {
1666 xtarget = ops[0].value;
1667 if (xtarget == xspec_target)
1668 return xtarget;
1669 if (xtarget == xspec_target_subreg)
1670 return xspec_target;
1671 return convert_extracted_bit_field (xtarget, mode, tmode, unsignedp);
1672 }
1673 }
1674
1675 /* If OP0 is a memory, try copying it to a register and seeing if a
1676 cheap register alternative is available. */
1677 if (ext_mode != MAX_MACHINE_MODE && MEM_P (op0))
1678 {
1679 enum machine_mode bestmode;
1680
1681 /* Get the mode to use for inserting into this field. If
1682 OP0 is BLKmode, get the smallest mode consistent with the
1683 alignment. If OP0 is a non-BLKmode object that is no
1684 wider than EXT_MODE, use its mode. Otherwise, use the
1685 smallest mode containing the field. */
1686
1687 if (GET_MODE (op0) == BLKmode
1688 || (ext_mode != MAX_MACHINE_MODE
1689 && GET_MODE_SIZE (GET_MODE (op0)) > GET_MODE_SIZE (ext_mode)))
1690 bestmode = get_best_mode (bitsize, bitnum, 0, 0, MEM_ALIGN (op0),
1691 (ext_mode == MAX_MACHINE_MODE
1692 ? VOIDmode : ext_mode),
1693 MEM_VOLATILE_P (op0));
1694 else
1695 bestmode = GET_MODE (op0);
1696
1697 if (bestmode != VOIDmode
1698 && !(SLOW_UNALIGNED_ACCESS (bestmode, MEM_ALIGN (op0))
1699 && GET_MODE_BITSIZE (bestmode) > MEM_ALIGN (op0)))
1700 {
1701 unsigned HOST_WIDE_INT xoffset, xbitpos;
1702
1703 /* Compute the offset as a multiple of this unit,
1704 counting in bytes. */
1705 unit = GET_MODE_BITSIZE (bestmode);
1706 xoffset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
1707 xbitpos = bitnum % unit;
1708
1709 /* Make sure the register is big enough for the whole field. */
1710 if (xoffset * BITS_PER_UNIT + unit
1711 >= offset * BITS_PER_UNIT + bitsize)
1712 {
1713 rtx last, result, xop0;
1714
1715 last = get_last_insn ();
1716
1717 /* Fetch it to a register in that size. */
1718 xop0 = adjust_address (op0, bestmode, xoffset);
1719 xop0 = force_reg (bestmode, xop0);
1720 result = extract_bit_field_1 (xop0, bitsize, xbitpos,
1721 unsignedp, packedp, target,
1722 mode, tmode, false);
1723 if (result)
1724 return result;
1725
1726 delete_insns_since (last);
1727 }
1728 }
1729 }
1730
1731 if (!fallback_p)
1732 return NULL;
1733
1734 target = extract_fixed_bit_field (int_mode, op0, offset, bitsize,
1735 bitpos, target, unsignedp, packedp);
1736 return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1737 }
1738
1739 /* Generate code to extract a byte-field from STR_RTX
1740 containing BITSIZE bits, starting at BITNUM,
1741 and put it in TARGET if possible (if TARGET is nonzero).
1742 Regardless of TARGET, we return the rtx for where the value is placed.
1743
1744 STR_RTX is the structure containing the byte (a REG or MEM).
1745 UNSIGNEDP is nonzero if this is an unsigned bit field.
1746 PACKEDP is nonzero if the field has the packed attribute.
1747 MODE is the natural mode of the field value once extracted.
1748 TMODE is the mode the caller would like the value to have;
1749 but the value may be returned with type MODE instead.
1750
1751 If a TARGET is specified and we can store in it at no extra cost,
1752 we do so, and return TARGET.
1753 Otherwise, we return a REG of mode TMODE or MODE, with TMODE preferred
1754 if they are equally easy. */
1755
1756 rtx
1757 extract_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1758 unsigned HOST_WIDE_INT bitnum, int unsignedp, bool packedp,
1759 rtx target, enum machine_mode mode, enum machine_mode tmode)
1760 {
1761 return extract_bit_field_1 (str_rtx, bitsize, bitnum, unsignedp, packedp,
1762 target, mode, tmode, true);
1763 }
1764 \f
1765 /* Extract a bit field using shifts and boolean operations
1766 Returns an rtx to represent the value.
1767 OP0 addresses a register (word) or memory (byte).
1768 BITPOS says which bit within the word or byte the bit field starts in.
1769 OFFSET says how many bytes farther the bit field starts;
1770 it is 0 if OP0 is a register.
1771 BITSIZE says how many bits long the bit field is.
1772 (If OP0 is a register, it may be narrower than a full word,
1773 but BITPOS still counts within a full word,
1774 which is significant on bigendian machines.)
1775
1776 UNSIGNEDP is nonzero for an unsigned bit field (don't sign-extend value).
1777 PACKEDP is true if the field has the packed attribute.
1778
1779 If TARGET is nonzero, attempts to store the value there
1780 and return TARGET, but this is not guaranteed.
1781 If TARGET is not used, create a pseudo-reg of mode TMODE for the value. */
1782
1783 static rtx
1784 extract_fixed_bit_field (enum machine_mode tmode, rtx op0,
1785 unsigned HOST_WIDE_INT offset,
1786 unsigned HOST_WIDE_INT bitsize,
1787 unsigned HOST_WIDE_INT bitpos, rtx target,
1788 int unsignedp, bool packedp)
1789 {
1790 unsigned int total_bits = BITS_PER_WORD;
1791 enum machine_mode mode;
1792
1793 if (GET_CODE (op0) == SUBREG || REG_P (op0))
1794 {
1795 /* Special treatment for a bit field split across two registers. */
1796 if (bitsize + bitpos > BITS_PER_WORD)
1797 return extract_split_bit_field (op0, bitsize, bitpos, unsignedp);
1798 }
1799 else
1800 {
1801 /* Get the proper mode to use for this field. We want a mode that
1802 includes the entire field. If such a mode would be larger than
1803 a word, we won't be doing the extraction the normal way. */
1804
1805 if (MEM_VOLATILE_P (op0)
1806 && flag_strict_volatile_bitfields > 0)
1807 {
1808 if (GET_MODE_BITSIZE (GET_MODE (op0)) > 0)
1809 mode = GET_MODE (op0);
1810 else if (target && GET_MODE_BITSIZE (GET_MODE (target)) > 0)
1811 mode = GET_MODE (target);
1812 else
1813 mode = tmode;
1814 }
1815 else
1816 mode = get_best_mode (bitsize, bitpos + offset * BITS_PER_UNIT, 0, 0,
1817 MEM_ALIGN (op0), word_mode, MEM_VOLATILE_P (op0));
1818
1819 if (mode == VOIDmode)
1820 /* The only way this should occur is if the field spans word
1821 boundaries. */
1822 return extract_split_bit_field (op0, bitsize,
1823 bitpos + offset * BITS_PER_UNIT,
1824 unsignedp);
1825
1826 total_bits = GET_MODE_BITSIZE (mode);
1827
1828 /* Make sure bitpos is valid for the chosen mode. Adjust BITPOS to
1829 be in the range 0 to total_bits-1, and put any excess bytes in
1830 OFFSET. */
1831 if (bitpos >= total_bits)
1832 {
1833 offset += (bitpos / total_bits) * (total_bits / BITS_PER_UNIT);
1834 bitpos -= ((bitpos / total_bits) * (total_bits / BITS_PER_UNIT)
1835 * BITS_PER_UNIT);
1836 }
1837
1838 /* If we're accessing a volatile MEM, we can't do the next
1839 alignment step if it results in a multi-word access where we
1840 otherwise wouldn't have one. So, check for that case
1841 here. */
1842 if (MEM_P (op0)
1843 && MEM_VOLATILE_P (op0)
1844 && flag_strict_volatile_bitfields > 0
1845 && bitpos + bitsize <= total_bits
1846 && bitpos + bitsize + (offset % (total_bits / BITS_PER_UNIT)) * BITS_PER_UNIT > total_bits)
1847 {
1848 if (STRICT_ALIGNMENT)
1849 {
1850 static bool informed_about_misalignment = false;
1851 bool warned;
1852
1853 if (packedp)
1854 {
1855 if (bitsize == total_bits)
1856 warned = warning_at (input_location, OPT_fstrict_volatile_bitfields,
1857 "multiple accesses to volatile structure member"
1858 " because of packed attribute");
1859 else
1860 warned = warning_at (input_location, OPT_fstrict_volatile_bitfields,
1861 "multiple accesses to volatile structure bitfield"
1862 " because of packed attribute");
1863
1864 return extract_split_bit_field (op0, bitsize,
1865 bitpos + offset * BITS_PER_UNIT,
1866 unsignedp);
1867 }
1868
1869 if (bitsize == total_bits)
1870 warned = warning_at (input_location, OPT_fstrict_volatile_bitfields,
1871 "mis-aligned access used for structure member");
1872 else
1873 warned = warning_at (input_location, OPT_fstrict_volatile_bitfields,
1874 "mis-aligned access used for structure bitfield");
1875
1876 if (! informed_about_misalignment && warned)
1877 {
1878 informed_about_misalignment = true;
1879 inform (input_location,
1880 "when a volatile object spans multiple type-sized locations,"
1881 " the compiler must choose between using a single mis-aligned access to"
1882 " preserve the volatility, or using multiple aligned accesses to avoid"
1883 " runtime faults; this code may fail at runtime if the hardware does"
1884 " not allow this access");
1885 }
1886 }
1887 }
1888 else
1889 {
1890
1891 /* Get ref to an aligned byte, halfword, or word containing the field.
1892 Adjust BITPOS to be position within a word,
1893 and OFFSET to be the offset of that word.
1894 Then alter OP0 to refer to that word. */
1895 bitpos += (offset % (total_bits / BITS_PER_UNIT)) * BITS_PER_UNIT;
1896 offset -= (offset % (total_bits / BITS_PER_UNIT));
1897 }
1898
1899 op0 = adjust_address (op0, mode, offset);
1900 }
1901
1902 mode = GET_MODE (op0);
1903
1904 if (BYTES_BIG_ENDIAN)
1905 /* BITPOS is the distance between our msb and that of OP0.
1906 Convert it to the distance from the lsb. */
1907 bitpos = total_bits - bitsize - bitpos;
1908
1909 /* Now BITPOS is always the distance between the field's lsb and that of OP0.
1910 We have reduced the big-endian case to the little-endian case. */
1911
1912 if (unsignedp)
1913 {
1914 if (bitpos)
1915 {
1916 /* If the field does not already start at the lsb,
1917 shift it so it does. */
1918 /* Maybe propagate the target for the shift. */
1919 rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
1920 if (tmode != mode)
1921 subtarget = 0;
1922 op0 = expand_shift (RSHIFT_EXPR, mode, op0, bitpos, subtarget, 1);
1923 }
1924 /* Convert the value to the desired mode. */
1925 if (mode != tmode)
1926 op0 = convert_to_mode (tmode, op0, 1);
1927
1928 /* Unless the msb of the field used to be the msb when we shifted,
1929 mask out the upper bits. */
1930
1931 if (GET_MODE_BITSIZE (mode) != bitpos + bitsize)
1932 return expand_binop (GET_MODE (op0), and_optab, op0,
1933 mask_rtx (GET_MODE (op0), 0, bitsize, 0),
1934 target, 1, OPTAB_LIB_WIDEN);
1935 return op0;
1936 }
1937
1938 /* To extract a signed bit-field, first shift its msb to the msb of the word,
1939 then arithmetic-shift its lsb to the lsb of the word. */
1940 op0 = force_reg (mode, op0);
1941
1942 /* Find the narrowest integer mode that contains the field. */
1943
1944 for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT); mode != VOIDmode;
1945 mode = GET_MODE_WIDER_MODE (mode))
1946 if (GET_MODE_BITSIZE (mode) >= bitsize + bitpos)
1947 {
1948 op0 = convert_to_mode (mode, op0, 0);
1949 break;
1950 }
1951
1952 if (mode != tmode)
1953 target = 0;
1954
1955 if (GET_MODE_BITSIZE (mode) != (bitsize + bitpos))
1956 {
1957 int amount = GET_MODE_BITSIZE (mode) - (bitsize + bitpos);
1958 /* Maybe propagate the target for the shift. */
1959 rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
1960 op0 = expand_shift (LSHIFT_EXPR, mode, op0, amount, subtarget, 1);
1961 }
1962
1963 return expand_shift (RSHIFT_EXPR, mode, op0,
1964 GET_MODE_BITSIZE (mode) - bitsize, target, 0);
1965 }
1966 \f
1967 /* Return a constant integer (CONST_INT or CONST_DOUBLE) mask value
1968 of mode MODE with BITSIZE ones followed by BITPOS zeros, or the
1969 complement of that if COMPLEMENT. The mask is truncated if
1970 necessary to the width of mode MODE. The mask is zero-extended if
1971 BITSIZE+BITPOS is too small for MODE. */
1972
1973 static rtx
1974 mask_rtx (enum machine_mode mode, int bitpos, int bitsize, int complement)
1975 {
1976 double_int mask;
1977
1978 mask = double_int_mask (bitsize);
1979 mask = double_int_lshift (mask, bitpos, HOST_BITS_PER_DOUBLE_INT, false);
1980
1981 if (complement)
1982 mask = double_int_not (mask);
1983
1984 return immed_double_int_const (mask, mode);
1985 }
1986
1987 /* Return a constant integer (CONST_INT or CONST_DOUBLE) rtx with the value
1988 VALUE truncated to BITSIZE bits and then shifted left BITPOS bits. */
1989
1990 static rtx
1991 lshift_value (enum machine_mode mode, rtx value, int bitpos, int bitsize)
1992 {
1993 double_int val;
1994
1995 val = double_int_zext (uhwi_to_double_int (INTVAL (value)), bitsize);
1996 val = double_int_lshift (val, bitpos, HOST_BITS_PER_DOUBLE_INT, false);
1997
1998 return immed_double_int_const (val, mode);
1999 }
2000 \f
2001 /* Extract a bit field that is split across two words
2002 and return an RTX for the result.
2003
2004 OP0 is the REG, SUBREG or MEM rtx for the first of the two words.
2005 BITSIZE is the field width; BITPOS, position of its first bit, in the word.
2006 UNSIGNEDP is 1 if should zero-extend the contents; else sign-extend. */
2007
2008 static rtx
2009 extract_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
2010 unsigned HOST_WIDE_INT bitpos, int unsignedp)
2011 {
2012 unsigned int unit;
2013 unsigned int bitsdone = 0;
2014 rtx result = NULL_RTX;
2015 int first = 1;
2016
2017 /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
2018 much at a time. */
2019 if (REG_P (op0) || GET_CODE (op0) == SUBREG)
2020 unit = BITS_PER_WORD;
2021 else
2022 unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
2023
2024 while (bitsdone < bitsize)
2025 {
2026 unsigned HOST_WIDE_INT thissize;
2027 rtx part, word;
2028 unsigned HOST_WIDE_INT thispos;
2029 unsigned HOST_WIDE_INT offset;
2030
2031 offset = (bitpos + bitsdone) / unit;
2032 thispos = (bitpos + bitsdone) % unit;
2033
2034 /* THISSIZE must not overrun a word boundary. Otherwise,
2035 extract_fixed_bit_field will call us again, and we will mutually
2036 recurse forever. */
2037 thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
2038 thissize = MIN (thissize, unit - thispos);
2039
2040 /* If OP0 is a register, then handle OFFSET here.
2041
2042 When handling multiword bitfields, extract_bit_field may pass
2043 down a word_mode SUBREG of a larger REG for a bitfield that actually
2044 crosses a word boundary. Thus, for a SUBREG, we must find
2045 the current word starting from the base register. */
2046 if (GET_CODE (op0) == SUBREG)
2047 {
2048 int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset;
2049 word = operand_subword_force (SUBREG_REG (op0), word_offset,
2050 GET_MODE (SUBREG_REG (op0)));
2051 offset = 0;
2052 }
2053 else if (REG_P (op0))
2054 {
2055 word = operand_subword_force (op0, offset, GET_MODE (op0));
2056 offset = 0;
2057 }
2058 else
2059 word = op0;
2060
2061 /* Extract the parts in bit-counting order,
2062 whose meaning is determined by BYTES_PER_UNIT.
2063 OFFSET is in UNITs, and UNIT is in bits.
2064 extract_fixed_bit_field wants offset in bytes. */
2065 part = extract_fixed_bit_field (word_mode, word,
2066 offset * unit / BITS_PER_UNIT,
2067 thissize, thispos, 0, 1, false);
2068 bitsdone += thissize;
2069
2070 /* Shift this part into place for the result. */
2071 if (BYTES_BIG_ENDIAN)
2072 {
2073 if (bitsize != bitsdone)
2074 part = expand_shift (LSHIFT_EXPR, word_mode, part,
2075 bitsize - bitsdone, 0, 1);
2076 }
2077 else
2078 {
2079 if (bitsdone != thissize)
2080 part = expand_shift (LSHIFT_EXPR, word_mode, part,
2081 bitsdone - thissize, 0, 1);
2082 }
2083
2084 if (first)
2085 result = part;
2086 else
2087 /* Combine the parts with bitwise or. This works
2088 because we extracted each part as an unsigned bit field. */
2089 result = expand_binop (word_mode, ior_optab, part, result, NULL_RTX, 1,
2090 OPTAB_LIB_WIDEN);
2091
2092 first = 0;
2093 }
2094
2095 /* Unsigned bit field: we are done. */
2096 if (unsignedp)
2097 return result;
2098 /* Signed bit field: sign-extend with two arithmetic shifts. */
2099 result = expand_shift (LSHIFT_EXPR, word_mode, result,
2100 BITS_PER_WORD - bitsize, NULL_RTX, 0);
2101 return expand_shift (RSHIFT_EXPR, word_mode, result,
2102 BITS_PER_WORD - bitsize, NULL_RTX, 0);
2103 }
2104 \f
2105 /* Try to read the low bits of SRC as an rvalue of mode MODE, preserving
2106 the bit pattern. SRC_MODE is the mode of SRC; if this is smaller than
2107 MODE, fill the upper bits with zeros. Fail if the layout of either
2108 mode is unknown (as for CC modes) or if the extraction would involve
2109 unprofitable mode punning. Return the value on success, otherwise
2110 return null.
2111
2112 This is different from gen_lowpart* in these respects:
2113
2114 - the returned value must always be considered an rvalue
2115
2116 - when MODE is wider than SRC_MODE, the extraction involves
2117 a zero extension
2118
2119 - when MODE is smaller than SRC_MODE, the extraction involves
2120 a truncation (and is thus subject to TRULY_NOOP_TRUNCATION).
2121
2122 In other words, this routine performs a computation, whereas the
2123 gen_lowpart* routines are conceptually lvalue or rvalue subreg
2124 operations. */
2125
2126 rtx
2127 extract_low_bits (enum machine_mode mode, enum machine_mode src_mode, rtx src)
2128 {
2129 enum machine_mode int_mode, src_int_mode;
2130
2131 if (mode == src_mode)
2132 return src;
2133
2134 if (CONSTANT_P (src))
2135 {
2136 /* simplify_gen_subreg can't be used here, as if simplify_subreg
2137 fails, it will happily create (subreg (symbol_ref)) or similar
2138 invalid SUBREGs. */
2139 unsigned int byte = subreg_lowpart_offset (mode, src_mode);
2140 rtx ret = simplify_subreg (mode, src, src_mode, byte);
2141 if (ret)
2142 return ret;
2143
2144 if (GET_MODE (src) == VOIDmode
2145 || !validate_subreg (mode, src_mode, src, byte))
2146 return NULL_RTX;
2147
2148 src = force_reg (GET_MODE (src), src);
2149 return gen_rtx_SUBREG (mode, src, byte);
2150 }
2151
2152 if (GET_MODE_CLASS (mode) == MODE_CC || GET_MODE_CLASS (src_mode) == MODE_CC)
2153 return NULL_RTX;
2154
2155 if (GET_MODE_BITSIZE (mode) == GET_MODE_BITSIZE (src_mode)
2156 && MODES_TIEABLE_P (mode, src_mode))
2157 {
2158 rtx x = gen_lowpart_common (mode, src);
2159 if (x)
2160 return x;
2161 }
2162
2163 src_int_mode = int_mode_for_mode (src_mode);
2164 int_mode = int_mode_for_mode (mode);
2165 if (src_int_mode == BLKmode || int_mode == BLKmode)
2166 return NULL_RTX;
2167
2168 if (!MODES_TIEABLE_P (src_int_mode, src_mode))
2169 return NULL_RTX;
2170 if (!MODES_TIEABLE_P (int_mode, mode))
2171 return NULL_RTX;
2172
2173 src = gen_lowpart (src_int_mode, src);
2174 src = convert_modes (int_mode, src_int_mode, src, true);
2175 src = gen_lowpart (mode, src);
2176 return src;
2177 }
2178 \f
2179 /* Add INC into TARGET. */
2180
2181 void
2182 expand_inc (rtx target, rtx inc)
2183 {
2184 rtx value = expand_binop (GET_MODE (target), add_optab,
2185 target, inc,
2186 target, 0, OPTAB_LIB_WIDEN);
2187 if (value != target)
2188 emit_move_insn (target, value);
2189 }
2190
2191 /* Subtract DEC from TARGET. */
2192
2193 void
2194 expand_dec (rtx target, rtx dec)
2195 {
2196 rtx value = expand_binop (GET_MODE (target), sub_optab,
2197 target, dec,
2198 target, 0, OPTAB_LIB_WIDEN);
2199 if (value != target)
2200 emit_move_insn (target, value);
2201 }
2202 \f
2203 /* Output a shift instruction for expression code CODE,
2204 with SHIFTED being the rtx for the value to shift,
2205 and AMOUNT the rtx for the amount to shift by.
2206 Store the result in the rtx TARGET, if that is convenient.
2207 If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2208 Return the rtx for where the value is. */
2209
2210 static rtx
2211 expand_shift_1 (enum tree_code code, enum machine_mode mode, rtx shifted,
2212 rtx amount, rtx target, int unsignedp)
2213 {
2214 rtx op1, temp = 0;
2215 int left = (code == LSHIFT_EXPR || code == LROTATE_EXPR);
2216 int rotate = (code == LROTATE_EXPR || code == RROTATE_EXPR);
2217 optab lshift_optab = ashl_optab;
2218 optab rshift_arith_optab = ashr_optab;
2219 optab rshift_uns_optab = lshr_optab;
2220 optab lrotate_optab = rotl_optab;
2221 optab rrotate_optab = rotr_optab;
2222 enum machine_mode op1_mode;
2223 int attempt;
2224 bool speed = optimize_insn_for_speed_p ();
2225
2226 op1 = amount;
2227 op1_mode = GET_MODE (op1);
2228
2229 /* Determine whether the shift/rotate amount is a vector, or scalar. If the
2230 shift amount is a vector, use the vector/vector shift patterns. */
2231 if (VECTOR_MODE_P (mode) && VECTOR_MODE_P (op1_mode))
2232 {
2233 lshift_optab = vashl_optab;
2234 rshift_arith_optab = vashr_optab;
2235 rshift_uns_optab = vlshr_optab;
2236 lrotate_optab = vrotl_optab;
2237 rrotate_optab = vrotr_optab;
2238 }
2239
2240 /* Previously detected shift-counts computed by NEGATE_EXPR
2241 and shifted in the other direction; but that does not work
2242 on all machines. */
2243
2244 if (SHIFT_COUNT_TRUNCATED)
2245 {
2246 if (CONST_INT_P (op1)
2247 && ((unsigned HOST_WIDE_INT) INTVAL (op1) >=
2248 (unsigned HOST_WIDE_INT) GET_MODE_BITSIZE (mode)))
2249 op1 = GEN_INT ((unsigned HOST_WIDE_INT) INTVAL (op1)
2250 % GET_MODE_BITSIZE (mode));
2251 else if (GET_CODE (op1) == SUBREG
2252 && subreg_lowpart_p (op1)
2253 && INTEGRAL_MODE_P (GET_MODE (SUBREG_REG (op1))))
2254 op1 = SUBREG_REG (op1);
2255 }
2256
2257 if (op1 == const0_rtx)
2258 return shifted;
2259
2260 /* Check whether its cheaper to implement a left shift by a constant
2261 bit count by a sequence of additions. */
2262 if (code == LSHIFT_EXPR
2263 && CONST_INT_P (op1)
2264 && INTVAL (op1) > 0
2265 && INTVAL (op1) < GET_MODE_PRECISION (mode)
2266 && INTVAL (op1) < MAX_BITS_PER_WORD
2267 && (shift_cost (speed, mode, INTVAL (op1))
2268 > INTVAL (op1) * add_cost (speed, mode))
2269 && shift_cost (speed, mode, INTVAL (op1)) != MAX_COST)
2270 {
2271 int i;
2272 for (i = 0; i < INTVAL (op1); i++)
2273 {
2274 temp = force_reg (mode, shifted);
2275 shifted = expand_binop (mode, add_optab, temp, temp, NULL_RTX,
2276 unsignedp, OPTAB_LIB_WIDEN);
2277 }
2278 return shifted;
2279 }
2280
2281 for (attempt = 0; temp == 0 && attempt < 3; attempt++)
2282 {
2283 enum optab_methods methods;
2284
2285 if (attempt == 0)
2286 methods = OPTAB_DIRECT;
2287 else if (attempt == 1)
2288 methods = OPTAB_WIDEN;
2289 else
2290 methods = OPTAB_LIB_WIDEN;
2291
2292 if (rotate)
2293 {
2294 /* Widening does not work for rotation. */
2295 if (methods == OPTAB_WIDEN)
2296 continue;
2297 else if (methods == OPTAB_LIB_WIDEN)
2298 {
2299 /* If we have been unable to open-code this by a rotation,
2300 do it as the IOR of two shifts. I.e., to rotate A
2301 by N bits, compute (A << N) | ((unsigned) A >> (C - N))
2302 where C is the bitsize of A.
2303
2304 It is theoretically possible that the target machine might
2305 not be able to perform either shift and hence we would
2306 be making two libcalls rather than just the one for the
2307 shift (similarly if IOR could not be done). We will allow
2308 this extremely unlikely lossage to avoid complicating the
2309 code below. */
2310
2311 rtx subtarget = target == shifted ? 0 : target;
2312 rtx new_amount, other_amount;
2313 rtx temp1;
2314
2315 new_amount = op1;
2316 if (CONST_INT_P (op1))
2317 other_amount = GEN_INT (GET_MODE_BITSIZE (mode)
2318 - INTVAL (op1));
2319 else
2320 other_amount
2321 = simplify_gen_binary (MINUS, GET_MODE (op1),
2322 GEN_INT (GET_MODE_PRECISION (mode)),
2323 op1);
2324
2325 shifted = force_reg (mode, shifted);
2326
2327 temp = expand_shift_1 (left ? LSHIFT_EXPR : RSHIFT_EXPR,
2328 mode, shifted, new_amount, 0, 1);
2329 temp1 = expand_shift_1 (left ? RSHIFT_EXPR : LSHIFT_EXPR,
2330 mode, shifted, other_amount,
2331 subtarget, 1);
2332 return expand_binop (mode, ior_optab, temp, temp1, target,
2333 unsignedp, methods);
2334 }
2335
2336 temp = expand_binop (mode,
2337 left ? lrotate_optab : rrotate_optab,
2338 shifted, op1, target, unsignedp, methods);
2339 }
2340 else if (unsignedp)
2341 temp = expand_binop (mode,
2342 left ? lshift_optab : rshift_uns_optab,
2343 shifted, op1, target, unsignedp, methods);
2344
2345 /* Do arithmetic shifts.
2346 Also, if we are going to widen the operand, we can just as well
2347 use an arithmetic right-shift instead of a logical one. */
2348 if (temp == 0 && ! rotate
2349 && (! unsignedp || (! left && methods == OPTAB_WIDEN)))
2350 {
2351 enum optab_methods methods1 = methods;
2352
2353 /* If trying to widen a log shift to an arithmetic shift,
2354 don't accept an arithmetic shift of the same size. */
2355 if (unsignedp)
2356 methods1 = OPTAB_MUST_WIDEN;
2357
2358 /* Arithmetic shift */
2359
2360 temp = expand_binop (mode,
2361 left ? lshift_optab : rshift_arith_optab,
2362 shifted, op1, target, unsignedp, methods1);
2363 }
2364
2365 /* We used to try extzv here for logical right shifts, but that was
2366 only useful for one machine, the VAX, and caused poor code
2367 generation there for lshrdi3, so the code was deleted and a
2368 define_expand for lshrsi3 was added to vax.md. */
2369 }
2370
2371 gcc_assert (temp);
2372 return temp;
2373 }
2374
2375 /* Output a shift instruction for expression code CODE,
2376 with SHIFTED being the rtx for the value to shift,
2377 and AMOUNT the amount to shift by.
2378 Store the result in the rtx TARGET, if that is convenient.
2379 If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2380 Return the rtx for where the value is. */
2381
2382 rtx
2383 expand_shift (enum tree_code code, enum machine_mode mode, rtx shifted,
2384 int amount, rtx target, int unsignedp)
2385 {
2386 return expand_shift_1 (code, mode,
2387 shifted, GEN_INT (amount), target, unsignedp);
2388 }
2389
2390 /* Output a shift instruction for expression code CODE,
2391 with SHIFTED being the rtx for the value to shift,
2392 and AMOUNT the tree for the amount to shift by.
2393 Store the result in the rtx TARGET, if that is convenient.
2394 If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2395 Return the rtx for where the value is. */
2396
2397 rtx
2398 expand_variable_shift (enum tree_code code, enum machine_mode mode, rtx shifted,
2399 tree amount, rtx target, int unsignedp)
2400 {
2401 return expand_shift_1 (code, mode,
2402 shifted, expand_normal (amount), target, unsignedp);
2403 }
2404
2405 \f
2406 /* Indicates the type of fixup needed after a constant multiplication.
2407 BASIC_VARIANT means no fixup is needed, NEGATE_VARIANT means that
2408 the result should be negated, and ADD_VARIANT means that the
2409 multiplicand should be added to the result. */
2410 enum mult_variant {basic_variant, negate_variant, add_variant};
2411
2412 static void synth_mult (struct algorithm *, unsigned HOST_WIDE_INT,
2413 const struct mult_cost *, enum machine_mode mode);
2414 static bool choose_mult_variant (enum machine_mode, HOST_WIDE_INT,
2415 struct algorithm *, enum mult_variant *, int);
2416 static rtx expand_mult_const (enum machine_mode, rtx, HOST_WIDE_INT, rtx,
2417 const struct algorithm *, enum mult_variant);
2418 static unsigned HOST_WIDE_INT invert_mod2n (unsigned HOST_WIDE_INT, int);
2419 static rtx extract_high_half (enum machine_mode, rtx);
2420 static rtx expmed_mult_highpart (enum machine_mode, rtx, rtx, rtx, int, int);
2421 static rtx expmed_mult_highpart_optab (enum machine_mode, rtx, rtx, rtx,
2422 int, int);
2423 /* Compute and return the best algorithm for multiplying by T.
2424 The algorithm must cost less than cost_limit
2425 If retval.cost >= COST_LIMIT, no algorithm was found and all
2426 other field of the returned struct are undefined.
2427 MODE is the machine mode of the multiplication. */
2428
2429 static void
2430 synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
2431 const struct mult_cost *cost_limit, enum machine_mode mode)
2432 {
2433 int m;
2434 struct algorithm *alg_in, *best_alg;
2435 struct mult_cost best_cost;
2436 struct mult_cost new_limit;
2437 int op_cost, op_latency;
2438 unsigned HOST_WIDE_INT orig_t = t;
2439 unsigned HOST_WIDE_INT q;
2440 int maxm, hash_index;
2441 bool cache_hit = false;
2442 enum alg_code cache_alg = alg_zero;
2443 bool speed = optimize_insn_for_speed_p ();
2444 enum machine_mode imode;
2445 struct alg_hash_entry *entry_ptr;
2446
2447 /* Indicate that no algorithm is yet found. If no algorithm
2448 is found, this value will be returned and indicate failure. */
2449 alg_out->cost.cost = cost_limit->cost + 1;
2450 alg_out->cost.latency = cost_limit->latency + 1;
2451
2452 if (cost_limit->cost < 0
2453 || (cost_limit->cost == 0 && cost_limit->latency <= 0))
2454 return;
2455
2456 /* Be prepared for vector modes. */
2457 imode = GET_MODE_INNER (mode);
2458 if (imode == VOIDmode)
2459 imode = mode;
2460
2461 maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (imode));
2462
2463 /* Restrict the bits of "t" to the multiplication's mode. */
2464 t &= GET_MODE_MASK (imode);
2465
2466 /* t == 1 can be done in zero cost. */
2467 if (t == 1)
2468 {
2469 alg_out->ops = 1;
2470 alg_out->cost.cost = 0;
2471 alg_out->cost.latency = 0;
2472 alg_out->op[0] = alg_m;
2473 return;
2474 }
2475
2476 /* t == 0 sometimes has a cost. If it does and it exceeds our limit,
2477 fail now. */
2478 if (t == 0)
2479 {
2480 if (MULT_COST_LESS (cost_limit, zero_cost (speed)))
2481 return;
2482 else
2483 {
2484 alg_out->ops = 1;
2485 alg_out->cost.cost = zero_cost (speed);
2486 alg_out->cost.latency = zero_cost (speed);
2487 alg_out->op[0] = alg_zero;
2488 return;
2489 }
2490 }
2491
2492 /* We'll be needing a couple extra algorithm structures now. */
2493
2494 alg_in = XALLOCA (struct algorithm);
2495 best_alg = XALLOCA (struct algorithm);
2496 best_cost = *cost_limit;
2497
2498 /* Compute the hash index. */
2499 hash_index = (t ^ (unsigned int) mode ^ (speed * 256)) % NUM_ALG_HASH_ENTRIES;
2500
2501 /* See if we already know what to do for T. */
2502 entry_ptr = alg_hash_entry_ptr (hash_index);
2503 if (entry_ptr->t == t
2504 && entry_ptr->mode == mode
2505 && entry_ptr->mode == mode
2506 && entry_ptr->speed == speed
2507 && entry_ptr->alg != alg_unknown)
2508 {
2509 cache_alg = entry_ptr->alg;
2510
2511 if (cache_alg == alg_impossible)
2512 {
2513 /* The cache tells us that it's impossible to synthesize
2514 multiplication by T within entry_ptr->cost. */
2515 if (!CHEAPER_MULT_COST (&entry_ptr->cost, cost_limit))
2516 /* COST_LIMIT is at least as restrictive as the one
2517 recorded in the hash table, in which case we have no
2518 hope of synthesizing a multiplication. Just
2519 return. */
2520 return;
2521
2522 /* If we get here, COST_LIMIT is less restrictive than the
2523 one recorded in the hash table, so we may be able to
2524 synthesize a multiplication. Proceed as if we didn't
2525 have the cache entry. */
2526 }
2527 else
2528 {
2529 if (CHEAPER_MULT_COST (cost_limit, &entry_ptr->cost))
2530 /* The cached algorithm shows that this multiplication
2531 requires more cost than COST_LIMIT. Just return. This
2532 way, we don't clobber this cache entry with
2533 alg_impossible but retain useful information. */
2534 return;
2535
2536 cache_hit = true;
2537
2538 switch (cache_alg)
2539 {
2540 case alg_shift:
2541 goto do_alg_shift;
2542
2543 case alg_add_t_m2:
2544 case alg_sub_t_m2:
2545 goto do_alg_addsub_t_m2;
2546
2547 case alg_add_factor:
2548 case alg_sub_factor:
2549 goto do_alg_addsub_factor;
2550
2551 case alg_add_t2_m:
2552 goto do_alg_add_t2_m;
2553
2554 case alg_sub_t2_m:
2555 goto do_alg_sub_t2_m;
2556
2557 default:
2558 gcc_unreachable ();
2559 }
2560 }
2561 }
2562
2563 /* If we have a group of zero bits at the low-order part of T, try
2564 multiplying by the remaining bits and then doing a shift. */
2565
2566 if ((t & 1) == 0)
2567 {
2568 do_alg_shift:
2569 m = floor_log2 (t & -t); /* m = number of low zero bits */
2570 if (m < maxm)
2571 {
2572 q = t >> m;
2573 /* The function expand_shift will choose between a shift and
2574 a sequence of additions, so the observed cost is given as
2575 MIN (m * add_cost(speed, mode), shift_cost(speed, mode, m)). */
2576 op_cost = m * add_cost (speed, mode);
2577 if (shift_cost (speed, mode, m) < op_cost)
2578 op_cost = shift_cost (speed, mode, m);
2579 new_limit.cost = best_cost.cost - op_cost;
2580 new_limit.latency = best_cost.latency - op_cost;
2581 synth_mult (alg_in, q, &new_limit, mode);
2582
2583 alg_in->cost.cost += op_cost;
2584 alg_in->cost.latency += op_cost;
2585 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2586 {
2587 struct algorithm *x;
2588 best_cost = alg_in->cost;
2589 x = alg_in, alg_in = best_alg, best_alg = x;
2590 best_alg->log[best_alg->ops] = m;
2591 best_alg->op[best_alg->ops] = alg_shift;
2592 }
2593
2594 /* See if treating ORIG_T as a signed number yields a better
2595 sequence. Try this sequence only for a negative ORIG_T
2596 as it would be useless for a non-negative ORIG_T. */
2597 if ((HOST_WIDE_INT) orig_t < 0)
2598 {
2599 /* Shift ORIG_T as follows because a right shift of a
2600 negative-valued signed type is implementation
2601 defined. */
2602 q = ~(~orig_t >> m);
2603 /* The function expand_shift will choose between a shift
2604 and a sequence of additions, so the observed cost is
2605 given as MIN (m * add_cost(speed, mode),
2606 shift_cost(speed, mode, m)). */
2607 op_cost = m * add_cost (speed, mode);
2608 if (shift_cost (speed, mode, m) < op_cost)
2609 op_cost = shift_cost (speed, mode, m);
2610 new_limit.cost = best_cost.cost - op_cost;
2611 new_limit.latency = best_cost.latency - op_cost;
2612 synth_mult (alg_in, q, &new_limit, mode);
2613
2614 alg_in->cost.cost += op_cost;
2615 alg_in->cost.latency += op_cost;
2616 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2617 {
2618 struct algorithm *x;
2619 best_cost = alg_in->cost;
2620 x = alg_in, alg_in = best_alg, best_alg = x;
2621 best_alg->log[best_alg->ops] = m;
2622 best_alg->op[best_alg->ops] = alg_shift;
2623 }
2624 }
2625 }
2626 if (cache_hit)
2627 goto done;
2628 }
2629
2630 /* If we have an odd number, add or subtract one. */
2631 if ((t & 1) != 0)
2632 {
2633 unsigned HOST_WIDE_INT w;
2634
2635 do_alg_addsub_t_m2:
2636 for (w = 1; (w & t) != 0; w <<= 1)
2637 ;
2638 /* If T was -1, then W will be zero after the loop. This is another
2639 case where T ends with ...111. Handling this with (T + 1) and
2640 subtract 1 produces slightly better code and results in algorithm
2641 selection much faster than treating it like the ...0111 case
2642 below. */
2643 if (w == 0
2644 || (w > 2
2645 /* Reject the case where t is 3.
2646 Thus we prefer addition in that case. */
2647 && t != 3))
2648 {
2649 /* T ends with ...111. Multiply by (T + 1) and subtract 1. */
2650
2651 op_cost = add_cost (speed, mode);
2652 new_limit.cost = best_cost.cost - op_cost;
2653 new_limit.latency = best_cost.latency - op_cost;
2654 synth_mult (alg_in, t + 1, &new_limit, mode);
2655
2656 alg_in->cost.cost += op_cost;
2657 alg_in->cost.latency += op_cost;
2658 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2659 {
2660 struct algorithm *x;
2661 best_cost = alg_in->cost;
2662 x = alg_in, alg_in = best_alg, best_alg = x;
2663 best_alg->log[best_alg->ops] = 0;
2664 best_alg->op[best_alg->ops] = alg_sub_t_m2;
2665 }
2666 }
2667 else
2668 {
2669 /* T ends with ...01 or ...011. Multiply by (T - 1) and add 1. */
2670
2671 op_cost = add_cost (speed, mode);
2672 new_limit.cost = best_cost.cost - op_cost;
2673 new_limit.latency = best_cost.latency - op_cost;
2674 synth_mult (alg_in, t - 1, &new_limit, mode);
2675
2676 alg_in->cost.cost += op_cost;
2677 alg_in->cost.latency += op_cost;
2678 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2679 {
2680 struct algorithm *x;
2681 best_cost = alg_in->cost;
2682 x = alg_in, alg_in = best_alg, best_alg = x;
2683 best_alg->log[best_alg->ops] = 0;
2684 best_alg->op[best_alg->ops] = alg_add_t_m2;
2685 }
2686 }
2687
2688 /* We may be able to calculate a * -7, a * -15, a * -31, etc
2689 quickly with a - a * n for some appropriate constant n. */
2690 m = exact_log2 (-orig_t + 1);
2691 if (m >= 0 && m < maxm)
2692 {
2693 op_cost = shiftsub1_cost (speed, mode, m);
2694 new_limit.cost = best_cost.cost - op_cost;
2695 new_limit.latency = best_cost.latency - op_cost;
2696 synth_mult (alg_in, (unsigned HOST_WIDE_INT) (-orig_t + 1) >> m,
2697 &new_limit, mode);
2698
2699 alg_in->cost.cost += op_cost;
2700 alg_in->cost.latency += op_cost;
2701 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2702 {
2703 struct algorithm *x;
2704 best_cost = alg_in->cost;
2705 x = alg_in, alg_in = best_alg, best_alg = x;
2706 best_alg->log[best_alg->ops] = m;
2707 best_alg->op[best_alg->ops] = alg_sub_t_m2;
2708 }
2709 }
2710
2711 if (cache_hit)
2712 goto done;
2713 }
2714
2715 /* Look for factors of t of the form
2716 t = q(2**m +- 1), 2 <= m <= floor(log2(t - 1)).
2717 If we find such a factor, we can multiply by t using an algorithm that
2718 multiplies by q, shift the result by m and add/subtract it to itself.
2719
2720 We search for large factors first and loop down, even if large factors
2721 are less probable than small; if we find a large factor we will find a
2722 good sequence quickly, and therefore be able to prune (by decreasing
2723 COST_LIMIT) the search. */
2724
2725 do_alg_addsub_factor:
2726 for (m = floor_log2 (t - 1); m >= 2; m--)
2727 {
2728 unsigned HOST_WIDE_INT d;
2729
2730 d = ((unsigned HOST_WIDE_INT) 1 << m) + 1;
2731 if (t % d == 0 && t > d && m < maxm
2732 && (!cache_hit || cache_alg == alg_add_factor))
2733 {
2734 /* If the target has a cheap shift-and-add instruction use
2735 that in preference to a shift insn followed by an add insn.
2736 Assume that the shift-and-add is "atomic" with a latency
2737 equal to its cost, otherwise assume that on superscalar
2738 hardware the shift may be executed concurrently with the
2739 earlier steps in the algorithm. */
2740 op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
2741 if (shiftadd_cost (speed, mode, m) < op_cost)
2742 {
2743 op_cost = shiftadd_cost (speed, mode, m);
2744 op_latency = op_cost;
2745 }
2746 else
2747 op_latency = add_cost (speed, mode);
2748
2749 new_limit.cost = best_cost.cost - op_cost;
2750 new_limit.latency = best_cost.latency - op_latency;
2751 synth_mult (alg_in, t / d, &new_limit, mode);
2752
2753 alg_in->cost.cost += op_cost;
2754 alg_in->cost.latency += op_latency;
2755 if (alg_in->cost.latency < op_cost)
2756 alg_in->cost.latency = op_cost;
2757 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2758 {
2759 struct algorithm *x;
2760 best_cost = alg_in->cost;
2761 x = alg_in, alg_in = best_alg, best_alg = x;
2762 best_alg->log[best_alg->ops] = m;
2763 best_alg->op[best_alg->ops] = alg_add_factor;
2764 }
2765 /* Other factors will have been taken care of in the recursion. */
2766 break;
2767 }
2768
2769 d = ((unsigned HOST_WIDE_INT) 1 << m) - 1;
2770 if (t % d == 0 && t > d && m < maxm
2771 && (!cache_hit || cache_alg == alg_sub_factor))
2772 {
2773 /* If the target has a cheap shift-and-subtract insn use
2774 that in preference to a shift insn followed by a sub insn.
2775 Assume that the shift-and-sub is "atomic" with a latency
2776 equal to it's cost, otherwise assume that on superscalar
2777 hardware the shift may be executed concurrently with the
2778 earlier steps in the algorithm. */
2779 op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
2780 if (shiftsub0_cost (speed, mode, m) < op_cost)
2781 {
2782 op_cost = shiftsub0_cost (speed, mode, m);
2783 op_latency = op_cost;
2784 }
2785 else
2786 op_latency = add_cost (speed, mode);
2787
2788 new_limit.cost = best_cost.cost - op_cost;
2789 new_limit.latency = best_cost.latency - op_latency;
2790 synth_mult (alg_in, t / d, &new_limit, mode);
2791
2792 alg_in->cost.cost += op_cost;
2793 alg_in->cost.latency += op_latency;
2794 if (alg_in->cost.latency < op_cost)
2795 alg_in->cost.latency = op_cost;
2796 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2797 {
2798 struct algorithm *x;
2799 best_cost = alg_in->cost;
2800 x = alg_in, alg_in = best_alg, best_alg = x;
2801 best_alg->log[best_alg->ops] = m;
2802 best_alg->op[best_alg->ops] = alg_sub_factor;
2803 }
2804 break;
2805 }
2806 }
2807 if (cache_hit)
2808 goto done;
2809
2810 /* Try shift-and-add (load effective address) instructions,
2811 i.e. do a*3, a*5, a*9. */
2812 if ((t & 1) != 0)
2813 {
2814 do_alg_add_t2_m:
2815 q = t - 1;
2816 q = q & -q;
2817 m = exact_log2 (q);
2818 if (m >= 0 && m < maxm)
2819 {
2820 op_cost = shiftadd_cost (speed, mode, m);
2821 new_limit.cost = best_cost.cost - op_cost;
2822 new_limit.latency = best_cost.latency - op_cost;
2823 synth_mult (alg_in, (t - 1) >> m, &new_limit, mode);
2824
2825 alg_in->cost.cost += op_cost;
2826 alg_in->cost.latency += op_cost;
2827 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2828 {
2829 struct algorithm *x;
2830 best_cost = alg_in->cost;
2831 x = alg_in, alg_in = best_alg, best_alg = x;
2832 best_alg->log[best_alg->ops] = m;
2833 best_alg->op[best_alg->ops] = alg_add_t2_m;
2834 }
2835 }
2836 if (cache_hit)
2837 goto done;
2838
2839 do_alg_sub_t2_m:
2840 q = t + 1;
2841 q = q & -q;
2842 m = exact_log2 (q);
2843 if (m >= 0 && m < maxm)
2844 {
2845 op_cost = shiftsub0_cost (speed, mode, m);
2846 new_limit.cost = best_cost.cost - op_cost;
2847 new_limit.latency = best_cost.latency - op_cost;
2848 synth_mult (alg_in, (t + 1) >> m, &new_limit, mode);
2849
2850 alg_in->cost.cost += op_cost;
2851 alg_in->cost.latency += op_cost;
2852 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2853 {
2854 struct algorithm *x;
2855 best_cost = alg_in->cost;
2856 x = alg_in, alg_in = best_alg, best_alg = x;
2857 best_alg->log[best_alg->ops] = m;
2858 best_alg->op[best_alg->ops] = alg_sub_t2_m;
2859 }
2860 }
2861 if (cache_hit)
2862 goto done;
2863 }
2864
2865 done:
2866 /* If best_cost has not decreased, we have not found any algorithm. */
2867 if (!CHEAPER_MULT_COST (&best_cost, cost_limit))
2868 {
2869 /* We failed to find an algorithm. Record alg_impossible for
2870 this case (that is, <T, MODE, COST_LIMIT>) so that next time
2871 we are asked to find an algorithm for T within the same or
2872 lower COST_LIMIT, we can immediately return to the
2873 caller. */
2874 entry_ptr->t = t;
2875 entry_ptr->mode = mode;
2876 entry_ptr->speed = speed;
2877 entry_ptr->alg = alg_impossible;
2878 entry_ptr->cost = *cost_limit;
2879 return;
2880 }
2881
2882 /* Cache the result. */
2883 if (!cache_hit)
2884 {
2885 entry_ptr->t = t;
2886 entry_ptr->mode = mode;
2887 entry_ptr->speed = speed;
2888 entry_ptr->alg = best_alg->op[best_alg->ops];
2889 entry_ptr->cost.cost = best_cost.cost;
2890 entry_ptr->cost.latency = best_cost.latency;
2891 }
2892
2893 /* If we are getting a too long sequence for `struct algorithm'
2894 to record, make this search fail. */
2895 if (best_alg->ops == MAX_BITS_PER_WORD)
2896 return;
2897
2898 /* Copy the algorithm from temporary space to the space at alg_out.
2899 We avoid using structure assignment because the majority of
2900 best_alg is normally undefined, and this is a critical function. */
2901 alg_out->ops = best_alg->ops + 1;
2902 alg_out->cost = best_cost;
2903 memcpy (alg_out->op, best_alg->op,
2904 alg_out->ops * sizeof *alg_out->op);
2905 memcpy (alg_out->log, best_alg->log,
2906 alg_out->ops * sizeof *alg_out->log);
2907 }
2908 \f
2909 /* Find the cheapest way of multiplying a value of mode MODE by VAL.
2910 Try three variations:
2911
2912 - a shift/add sequence based on VAL itself
2913 - a shift/add sequence based on -VAL, followed by a negation
2914 - a shift/add sequence based on VAL - 1, followed by an addition.
2915
2916 Return true if the cheapest of these cost less than MULT_COST,
2917 describing the algorithm in *ALG and final fixup in *VARIANT. */
2918
2919 static bool
2920 choose_mult_variant (enum machine_mode mode, HOST_WIDE_INT val,
2921 struct algorithm *alg, enum mult_variant *variant,
2922 int mult_cost)
2923 {
2924 struct algorithm alg2;
2925 struct mult_cost limit;
2926 int op_cost;
2927 bool speed = optimize_insn_for_speed_p ();
2928
2929 /* Fail quickly for impossible bounds. */
2930 if (mult_cost < 0)
2931 return false;
2932
2933 /* Ensure that mult_cost provides a reasonable upper bound.
2934 Any constant multiplication can be performed with less
2935 than 2 * bits additions. */
2936 op_cost = 2 * GET_MODE_UNIT_BITSIZE (mode) * add_cost (speed, mode);
2937 if (mult_cost > op_cost)
2938 mult_cost = op_cost;
2939
2940 *variant = basic_variant;
2941 limit.cost = mult_cost;
2942 limit.latency = mult_cost;
2943 synth_mult (alg, val, &limit, mode);
2944
2945 /* This works only if the inverted value actually fits in an
2946 `unsigned int' */
2947 if (HOST_BITS_PER_INT >= GET_MODE_UNIT_BITSIZE (mode))
2948 {
2949 op_cost = neg_cost(speed, mode);
2950 if (MULT_COST_LESS (&alg->cost, mult_cost))
2951 {
2952 limit.cost = alg->cost.cost - op_cost;
2953 limit.latency = alg->cost.latency - op_cost;
2954 }
2955 else
2956 {
2957 limit.cost = mult_cost - op_cost;
2958 limit.latency = mult_cost - op_cost;
2959 }
2960
2961 synth_mult (&alg2, -val, &limit, mode);
2962 alg2.cost.cost += op_cost;
2963 alg2.cost.latency += op_cost;
2964 if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
2965 *alg = alg2, *variant = negate_variant;
2966 }
2967
2968 /* This proves very useful for division-by-constant. */
2969 op_cost = add_cost (speed, mode);
2970 if (MULT_COST_LESS (&alg->cost, mult_cost))
2971 {
2972 limit.cost = alg->cost.cost - op_cost;
2973 limit.latency = alg->cost.latency - op_cost;
2974 }
2975 else
2976 {
2977 limit.cost = mult_cost - op_cost;
2978 limit.latency = mult_cost - op_cost;
2979 }
2980
2981 synth_mult (&alg2, val - 1, &limit, mode);
2982 alg2.cost.cost += op_cost;
2983 alg2.cost.latency += op_cost;
2984 if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
2985 *alg = alg2, *variant = add_variant;
2986
2987 return MULT_COST_LESS (&alg->cost, mult_cost);
2988 }
2989
2990 /* A subroutine of expand_mult, used for constant multiplications.
2991 Multiply OP0 by VAL in mode MODE, storing the result in TARGET if
2992 convenient. Use the shift/add sequence described by ALG and apply
2993 the final fixup specified by VARIANT. */
2994
2995 static rtx
2996 expand_mult_const (enum machine_mode mode, rtx op0, HOST_WIDE_INT val,
2997 rtx target, const struct algorithm *alg,
2998 enum mult_variant variant)
2999 {
3000 HOST_WIDE_INT val_so_far;
3001 rtx insn, accum, tem;
3002 int opno;
3003 enum machine_mode nmode;
3004
3005 /* Avoid referencing memory over and over and invalid sharing
3006 on SUBREGs. */
3007 op0 = force_reg (mode, op0);
3008
3009 /* ACCUM starts out either as OP0 or as a zero, depending on
3010 the first operation. */
3011
3012 if (alg->op[0] == alg_zero)
3013 {
3014 accum = copy_to_mode_reg (mode, CONST0_RTX (mode));
3015 val_so_far = 0;
3016 }
3017 else if (alg->op[0] == alg_m)
3018 {
3019 accum = copy_to_mode_reg (mode, op0);
3020 val_so_far = 1;
3021 }
3022 else
3023 gcc_unreachable ();
3024
3025 for (opno = 1; opno < alg->ops; opno++)
3026 {
3027 int log = alg->log[opno];
3028 rtx shift_subtarget = optimize ? 0 : accum;
3029 rtx add_target
3030 = (opno == alg->ops - 1 && target != 0 && variant != add_variant
3031 && !optimize)
3032 ? target : 0;
3033 rtx accum_target = optimize ? 0 : accum;
3034 rtx accum_inner;
3035
3036 switch (alg->op[opno])
3037 {
3038 case alg_shift:
3039 tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3040 /* REG_EQUAL note will be attached to the following insn. */
3041 emit_move_insn (accum, tem);
3042 val_so_far <<= log;
3043 break;
3044
3045 case alg_add_t_m2:
3046 tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
3047 accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
3048 add_target ? add_target : accum_target);
3049 val_so_far += (HOST_WIDE_INT) 1 << log;
3050 break;
3051
3052 case alg_sub_t_m2:
3053 tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
3054 accum = force_operand (gen_rtx_MINUS (mode, accum, tem),
3055 add_target ? add_target : accum_target);
3056 val_so_far -= (HOST_WIDE_INT) 1 << log;
3057 break;
3058
3059 case alg_add_t2_m:
3060 accum = expand_shift (LSHIFT_EXPR, mode, accum,
3061 log, shift_subtarget, 0);
3062 accum = force_operand (gen_rtx_PLUS (mode, accum, op0),
3063 add_target ? add_target : accum_target);
3064 val_so_far = (val_so_far << log) + 1;
3065 break;
3066
3067 case alg_sub_t2_m:
3068 accum = expand_shift (LSHIFT_EXPR, mode, accum,
3069 log, shift_subtarget, 0);
3070 accum = force_operand (gen_rtx_MINUS (mode, accum, op0),
3071 add_target ? add_target : accum_target);
3072 val_so_far = (val_so_far << log) - 1;
3073 break;
3074
3075 case alg_add_factor:
3076 tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3077 accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
3078 add_target ? add_target : accum_target);
3079 val_so_far += val_so_far << log;
3080 break;
3081
3082 case alg_sub_factor:
3083 tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3084 accum = force_operand (gen_rtx_MINUS (mode, tem, accum),
3085 (add_target
3086 ? add_target : (optimize ? 0 : tem)));
3087 val_so_far = (val_so_far << log) - val_so_far;
3088 break;
3089
3090 default:
3091 gcc_unreachable ();
3092 }
3093
3094 if (SCALAR_INT_MODE_P (mode))
3095 {
3096 /* Write a REG_EQUAL note on the last insn so that we can cse
3097 multiplication sequences. Note that if ACCUM is a SUBREG,
3098 we've set the inner register and must properly indicate that. */
3099 tem = op0, nmode = mode;
3100 accum_inner = accum;
3101 if (GET_CODE (accum) == SUBREG)
3102 {
3103 accum_inner = SUBREG_REG (accum);
3104 nmode = GET_MODE (accum_inner);
3105 tem = gen_lowpart (nmode, op0);
3106 }
3107
3108 insn = get_last_insn ();
3109 set_dst_reg_note (insn, REG_EQUAL,
3110 gen_rtx_MULT (nmode, tem, GEN_INT (val_so_far)),
3111 accum_inner);
3112 }
3113 }
3114
3115 if (variant == negate_variant)
3116 {
3117 val_so_far = -val_so_far;
3118 accum = expand_unop (mode, neg_optab, accum, target, 0);
3119 }
3120 else if (variant == add_variant)
3121 {
3122 val_so_far = val_so_far + 1;
3123 accum = force_operand (gen_rtx_PLUS (mode, accum, op0), target);
3124 }
3125
3126 /* Compare only the bits of val and val_so_far that are significant
3127 in the result mode, to avoid sign-/zero-extension confusion. */
3128 nmode = GET_MODE_INNER (mode);
3129 if (nmode == VOIDmode)
3130 nmode = mode;
3131 val &= GET_MODE_MASK (nmode);
3132 val_so_far &= GET_MODE_MASK (nmode);
3133 gcc_assert (val == val_so_far);
3134
3135 return accum;
3136 }
3137
3138 /* Perform a multiplication and return an rtx for the result.
3139 MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3140 TARGET is a suggestion for where to store the result (an rtx).
3141
3142 We check specially for a constant integer as OP1.
3143 If you want this check for OP0 as well, then before calling
3144 you should swap the two operands if OP0 would be constant. */
3145
3146 rtx
3147 expand_mult (enum machine_mode mode, rtx op0, rtx op1, rtx target,
3148 int unsignedp)
3149 {
3150 enum mult_variant variant;
3151 struct algorithm algorithm;
3152 rtx scalar_op1;
3153 int max_cost;
3154 bool speed = optimize_insn_for_speed_p ();
3155 bool do_trapv = flag_trapv && SCALAR_INT_MODE_P (mode) && !unsignedp;
3156
3157 if (CONSTANT_P (op0))
3158 {
3159 rtx temp = op0;
3160 op0 = op1;
3161 op1 = temp;
3162 }
3163
3164 /* For vectors, there are several simplifications that can be made if
3165 all elements of the vector constant are identical. */
3166 scalar_op1 = op1;
3167 if (GET_CODE (op1) == CONST_VECTOR)
3168 {
3169 int i, n = CONST_VECTOR_NUNITS (op1);
3170 scalar_op1 = CONST_VECTOR_ELT (op1, 0);
3171 for (i = 1; i < n; ++i)
3172 if (!rtx_equal_p (scalar_op1, CONST_VECTOR_ELT (op1, i)))
3173 goto skip_scalar;
3174 }
3175
3176 if (INTEGRAL_MODE_P (mode))
3177 {
3178 rtx fake_reg;
3179 HOST_WIDE_INT coeff;
3180 bool is_neg;
3181 int mode_bitsize;
3182
3183 if (op1 == CONST0_RTX (mode))
3184 return op1;
3185 if (op1 == CONST1_RTX (mode))
3186 return op0;
3187 if (op1 == CONSTM1_RTX (mode))
3188 return expand_unop (mode, do_trapv ? negv_optab : neg_optab,
3189 op0, target, 0);
3190
3191 if (do_trapv)
3192 goto skip_synth;
3193
3194 /* These are the operations that are potentially turned into
3195 a sequence of shifts and additions. */
3196 mode_bitsize = GET_MODE_UNIT_BITSIZE (mode);
3197
3198 /* synth_mult does an `unsigned int' multiply. As long as the mode is
3199 less than or equal in size to `unsigned int' this doesn't matter.
3200 If the mode is larger than `unsigned int', then synth_mult works
3201 only if the constant value exactly fits in an `unsigned int' without
3202 any truncation. This means that multiplying by negative values does
3203 not work; results are off by 2^32 on a 32 bit machine. */
3204
3205 if (CONST_INT_P (scalar_op1))
3206 {
3207 coeff = INTVAL (scalar_op1);
3208 is_neg = coeff < 0;
3209 }
3210 else if (CONST_DOUBLE_P (scalar_op1))
3211 {
3212 /* If we are multiplying in DImode, it may still be a win
3213 to try to work with shifts and adds. */
3214 if (CONST_DOUBLE_HIGH (scalar_op1) == 0
3215 && CONST_DOUBLE_LOW (scalar_op1) > 0)
3216 {
3217 coeff = CONST_DOUBLE_LOW (scalar_op1);
3218 is_neg = false;
3219 }
3220 else if (CONST_DOUBLE_LOW (scalar_op1) == 0)
3221 {
3222 coeff = CONST_DOUBLE_HIGH (scalar_op1);
3223 if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3224 {
3225 int shift = floor_log2 (coeff) + HOST_BITS_PER_WIDE_INT;
3226 if (shift < HOST_BITS_PER_DOUBLE_INT - 1
3227 || mode_bitsize <= HOST_BITS_PER_DOUBLE_INT)
3228 return expand_shift (LSHIFT_EXPR, mode, op0,
3229 shift, target, unsignedp);
3230 }
3231 goto skip_synth;
3232 }
3233 else
3234 goto skip_synth;
3235 }
3236 else
3237 goto skip_synth;
3238
3239 /* We used to test optimize here, on the grounds that it's better to
3240 produce a smaller program when -O is not used. But this causes
3241 such a terrible slowdown sometimes that it seems better to always
3242 use synth_mult. */
3243
3244 /* Special case powers of two. */
3245 if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3246 return expand_shift (LSHIFT_EXPR, mode, op0,
3247 floor_log2 (coeff), target, unsignedp);
3248
3249 fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3250
3251 /* Attempt to handle multiplication of DImode values by negative
3252 coefficients, by performing the multiplication by a positive
3253 multiplier and then inverting the result. */
3254 /* ??? How is this not slightly redundant with the neg variant? */
3255 if (is_neg && mode_bitsize > HOST_BITS_PER_WIDE_INT)
3256 {
3257 /* Its safe to use -coeff even for INT_MIN, as the
3258 result is interpreted as an unsigned coefficient.
3259 Exclude cost of op0 from max_cost to match the cost
3260 calculation of the synth_mult. */
3261 max_cost = (set_src_cost (gen_rtx_MULT (mode, fake_reg, op1), speed)
3262 - neg_cost(speed, mode));
3263 if (max_cost > 0
3264 && choose_mult_variant (mode, -coeff, &algorithm,
3265 &variant, max_cost))
3266 {
3267 rtx temp = expand_mult_const (mode, op0, -coeff, NULL_RTX,
3268 &algorithm, variant);
3269 return expand_unop (mode, neg_optab, temp, target, 0);
3270 }
3271 }
3272
3273 /* Exclude cost of op0 from max_cost to match the cost
3274 calculation of the synth_mult. */
3275 max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, op1), speed);
3276 if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost))
3277 return expand_mult_const (mode, op0, coeff, target,
3278 &algorithm, variant);
3279 }
3280 skip_synth:
3281
3282 /* Expand x*2.0 as x+x. */
3283 if (GET_CODE (scalar_op1) == CONST_DOUBLE && FLOAT_MODE_P (mode))
3284 {
3285 REAL_VALUE_TYPE d;
3286 REAL_VALUE_FROM_CONST_DOUBLE (d, scalar_op1);
3287
3288 if (REAL_VALUES_EQUAL (d, dconst2))
3289 {
3290 op0 = force_reg (GET_MODE (op0), op0);
3291 return expand_binop (mode, add_optab, op0, op0,
3292 target, unsignedp, OPTAB_LIB_WIDEN);
3293 }
3294 }
3295 skip_scalar:
3296
3297 /* This used to use umul_optab if unsigned, but for non-widening multiply
3298 there is no difference between signed and unsigned. */
3299 op0 = expand_binop (mode, do_trapv ? smulv_optab : smul_optab,
3300 op0, op1, target, unsignedp, OPTAB_LIB_WIDEN);
3301 gcc_assert (op0);
3302 return op0;
3303 }
3304
3305 /* Return a cost estimate for multiplying a register by the given
3306 COEFFicient in the given MODE and SPEED. */
3307
3308 int
3309 mult_by_coeff_cost (HOST_WIDE_INT coeff, enum machine_mode mode, bool speed)
3310 {
3311 int max_cost;
3312 struct algorithm algorithm;
3313 enum mult_variant variant;
3314
3315 rtx fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3316 max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, fake_reg), speed);
3317 if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost))
3318 return algorithm.cost.cost;
3319 else
3320 return max_cost;
3321 }
3322
3323 /* Perform a widening multiplication and return an rtx for the result.
3324 MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3325 TARGET is a suggestion for where to store the result (an rtx).
3326 THIS_OPTAB is the optab we should use, it must be either umul_widen_optab
3327 or smul_widen_optab.
3328
3329 We check specially for a constant integer as OP1, comparing the
3330 cost of a widening multiply against the cost of a sequence of shifts
3331 and adds. */
3332
3333 rtx
3334 expand_widening_mult (enum machine_mode mode, rtx op0, rtx op1, rtx target,
3335 int unsignedp, optab this_optab)
3336 {
3337 bool speed = optimize_insn_for_speed_p ();
3338 rtx cop1;
3339
3340 if (CONST_INT_P (op1)
3341 && GET_MODE (op0) != VOIDmode
3342 && (cop1 = convert_modes (mode, GET_MODE (op0), op1,
3343 this_optab == umul_widen_optab))
3344 && CONST_INT_P (cop1)
3345 && (INTVAL (cop1) >= 0
3346 || HWI_COMPUTABLE_MODE_P (mode)))
3347 {
3348 HOST_WIDE_INT coeff = INTVAL (cop1);
3349 int max_cost;
3350 enum mult_variant variant;
3351 struct algorithm algorithm;
3352
3353 /* Special case powers of two. */
3354 if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3355 {
3356 op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3357 return expand_shift (LSHIFT_EXPR, mode, op0,
3358 floor_log2 (coeff), target, unsignedp);
3359 }
3360
3361 /* Exclude cost of op0 from max_cost to match the cost
3362 calculation of the synth_mult. */
3363 max_cost = mul_widen_cost (speed, mode);
3364 if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3365 max_cost))
3366 {
3367 op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3368 return expand_mult_const (mode, op0, coeff, target,
3369 &algorithm, variant);
3370 }
3371 }
3372 return expand_binop (mode, this_optab, op0, op1, target,
3373 unsignedp, OPTAB_LIB_WIDEN);
3374 }
3375 \f
3376 /* Choose a minimal N + 1 bit approximation to 1/D that can be used to
3377 replace division by D, and put the least significant N bits of the result
3378 in *MULTIPLIER_PTR and return the most significant bit.
3379
3380 The width of operations is N (should be <= HOST_BITS_PER_WIDE_INT), the
3381 needed precision is in PRECISION (should be <= N).
3382
3383 PRECISION should be as small as possible so this function can choose
3384 multiplier more freely.
3385
3386 The rounded-up logarithm of D is placed in *lgup_ptr. A shift count that
3387 is to be used for a final right shift is placed in *POST_SHIFT_PTR.
3388
3389 Using this function, x/D will be equal to (x * m) >> (*POST_SHIFT_PTR),
3390 where m is the full HOST_BITS_PER_WIDE_INT + 1 bit multiplier. */
3391
3392 unsigned HOST_WIDE_INT
3393 choose_multiplier (unsigned HOST_WIDE_INT d, int n, int precision,
3394 unsigned HOST_WIDE_INT *multiplier_ptr,
3395 int *post_shift_ptr, int *lgup_ptr)
3396 {
3397 HOST_WIDE_INT mhigh_hi, mlow_hi;
3398 unsigned HOST_WIDE_INT mhigh_lo, mlow_lo;
3399 int lgup, post_shift;
3400 int pow, pow2;
3401 unsigned HOST_WIDE_INT nl, dummy1;
3402 HOST_WIDE_INT nh, dummy2;
3403
3404 /* lgup = ceil(log2(divisor)); */
3405 lgup = ceil_log2 (d);
3406
3407 gcc_assert (lgup <= n);
3408
3409 pow = n + lgup;
3410 pow2 = n + lgup - precision;
3411
3412 /* We could handle this with some effort, but this case is much
3413 better handled directly with a scc insn, so rely on caller using
3414 that. */
3415 gcc_assert (pow != HOST_BITS_PER_DOUBLE_INT);
3416
3417 /* mlow = 2^(N + lgup)/d */
3418 if (pow >= HOST_BITS_PER_WIDE_INT)
3419 {
3420 nh = (HOST_WIDE_INT) 1 << (pow - HOST_BITS_PER_WIDE_INT);
3421 nl = 0;
3422 }
3423 else
3424 {
3425 nh = 0;
3426 nl = (unsigned HOST_WIDE_INT) 1 << pow;
3427 }
3428 div_and_round_double (TRUNC_DIV_EXPR, 1, nl, nh, d, (HOST_WIDE_INT) 0,
3429 &mlow_lo, &mlow_hi, &dummy1, &dummy2);
3430
3431 /* mhigh = (2^(N + lgup) + 2^N + lgup - precision)/d */
3432 if (pow2 >= HOST_BITS_PER_WIDE_INT)
3433 nh |= (HOST_WIDE_INT) 1 << (pow2 - HOST_BITS_PER_WIDE_INT);
3434 else
3435 nl |= (unsigned HOST_WIDE_INT) 1 << pow2;
3436 div_and_round_double (TRUNC_DIV_EXPR, 1, nl, nh, d, (HOST_WIDE_INT) 0,
3437 &mhigh_lo, &mhigh_hi, &dummy1, &dummy2);
3438
3439 gcc_assert (!mhigh_hi || nh - d < d);
3440 gcc_assert (mhigh_hi <= 1 && mlow_hi <= 1);
3441 /* Assert that mlow < mhigh. */
3442 gcc_assert (mlow_hi < mhigh_hi
3443 || (mlow_hi == mhigh_hi && mlow_lo < mhigh_lo));
3444
3445 /* If precision == N, then mlow, mhigh exceed 2^N
3446 (but they do not exceed 2^(N+1)). */
3447
3448 /* Reduce to lowest terms. */
3449 for (post_shift = lgup; post_shift > 0; post_shift--)
3450 {
3451 unsigned HOST_WIDE_INT ml_lo = (mlow_hi << (HOST_BITS_PER_WIDE_INT - 1)) | (mlow_lo >> 1);
3452 unsigned HOST_WIDE_INT mh_lo = (mhigh_hi << (HOST_BITS_PER_WIDE_INT - 1)) | (mhigh_lo >> 1);
3453 if (ml_lo >= mh_lo)
3454 break;
3455
3456 mlow_hi = 0;
3457 mlow_lo = ml_lo;
3458 mhigh_hi = 0;
3459 mhigh_lo = mh_lo;
3460 }
3461
3462 *post_shift_ptr = post_shift;
3463 *lgup_ptr = lgup;
3464 if (n < HOST_BITS_PER_WIDE_INT)
3465 {
3466 unsigned HOST_WIDE_INT mask = ((unsigned HOST_WIDE_INT) 1 << n) - 1;
3467 *multiplier_ptr = mhigh_lo & mask;
3468 return mhigh_lo >= mask;
3469 }
3470 else
3471 {
3472 *multiplier_ptr = mhigh_lo;
3473 return mhigh_hi;
3474 }
3475 }
3476
3477 /* Compute the inverse of X mod 2**n, i.e., find Y such that X * Y is
3478 congruent to 1 (mod 2**N). */
3479
3480 static unsigned HOST_WIDE_INT
3481 invert_mod2n (unsigned HOST_WIDE_INT x, int n)
3482 {
3483 /* Solve x*y == 1 (mod 2^n), where x is odd. Return y. */
3484
3485 /* The algorithm notes that the choice y = x satisfies
3486 x*y == 1 mod 2^3, since x is assumed odd.
3487 Each iteration doubles the number of bits of significance in y. */
3488
3489 unsigned HOST_WIDE_INT mask;
3490 unsigned HOST_WIDE_INT y = x;
3491 int nbit = 3;
3492
3493 mask = (n == HOST_BITS_PER_WIDE_INT
3494 ? ~(unsigned HOST_WIDE_INT) 0
3495 : ((unsigned HOST_WIDE_INT) 1 << n) - 1);
3496
3497 while (nbit < n)
3498 {
3499 y = y * (2 - x*y) & mask; /* Modulo 2^N */
3500 nbit *= 2;
3501 }
3502 return y;
3503 }
3504
3505 /* Emit code to adjust ADJ_OPERAND after multiplication of wrong signedness
3506 flavor of OP0 and OP1. ADJ_OPERAND is already the high half of the
3507 product OP0 x OP1. If UNSIGNEDP is nonzero, adjust the signed product
3508 to become unsigned, if UNSIGNEDP is zero, adjust the unsigned product to
3509 become signed.
3510
3511 The result is put in TARGET if that is convenient.
3512
3513 MODE is the mode of operation. */
3514
3515 rtx
3516 expand_mult_highpart_adjust (enum machine_mode mode, rtx adj_operand, rtx op0,
3517 rtx op1, rtx target, int unsignedp)
3518 {
3519 rtx tem;
3520 enum rtx_code adj_code = unsignedp ? PLUS : MINUS;
3521
3522 tem = expand_shift (RSHIFT_EXPR, mode, op0,
3523 GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3524 tem = expand_and (mode, tem, op1, NULL_RTX);
3525 adj_operand
3526 = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3527 adj_operand);
3528
3529 tem = expand_shift (RSHIFT_EXPR, mode, op1,
3530 GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3531 tem = expand_and (mode, tem, op0, NULL_RTX);
3532 target = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3533 target);
3534
3535 return target;
3536 }
3537
3538 /* Subroutine of expmed_mult_highpart. Return the MODE high part of OP. */
3539
3540 static rtx
3541 extract_high_half (enum machine_mode mode, rtx op)
3542 {
3543 enum machine_mode wider_mode;
3544
3545 if (mode == word_mode)
3546 return gen_highpart (mode, op);
3547
3548 gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3549
3550 wider_mode = GET_MODE_WIDER_MODE (mode);
3551 op = expand_shift (RSHIFT_EXPR, wider_mode, op,
3552 GET_MODE_BITSIZE (mode), 0, 1);
3553 return convert_modes (mode, wider_mode, op, 0);
3554 }
3555
3556 /* Like expmed_mult_highpart, but only consider using a multiplication
3557 optab. OP1 is an rtx for the constant operand. */
3558
3559 static rtx
3560 expmed_mult_highpart_optab (enum machine_mode mode, rtx op0, rtx op1,
3561 rtx target, int unsignedp, int max_cost)
3562 {
3563 rtx narrow_op1 = gen_int_mode (INTVAL (op1), mode);
3564 enum machine_mode wider_mode;
3565 optab moptab;
3566 rtx tem;
3567 int size;
3568 bool speed = optimize_insn_for_speed_p ();
3569
3570 gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3571
3572 wider_mode = GET_MODE_WIDER_MODE (mode);
3573 size = GET_MODE_BITSIZE (mode);
3574
3575 /* Firstly, try using a multiplication insn that only generates the needed
3576 high part of the product, and in the sign flavor of unsignedp. */
3577 if (mul_highpart_cost (speed, mode) < max_cost)
3578 {
3579 moptab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
3580 tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3581 unsignedp, OPTAB_DIRECT);
3582 if (tem)
3583 return tem;
3584 }
3585
3586 /* Secondly, same as above, but use sign flavor opposite of unsignedp.
3587 Need to adjust the result after the multiplication. */
3588 if (size - 1 < BITS_PER_WORD
3589 && (mul_highpart_cost (speed, mode)
3590 + 2 * shift_cost (speed, mode, size-1)
3591 + 4 * add_cost (speed, mode) < max_cost))
3592 {
3593 moptab = unsignedp ? smul_highpart_optab : umul_highpart_optab;
3594 tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3595 unsignedp, OPTAB_DIRECT);
3596 if (tem)
3597 /* We used the wrong signedness. Adjust the result. */
3598 return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3599 tem, unsignedp);
3600 }
3601
3602 /* Try widening multiplication. */
3603 moptab = unsignedp ? umul_widen_optab : smul_widen_optab;
3604 if (widening_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3605 && mul_widen_cost (speed, wider_mode) < max_cost)
3606 {
3607 tem = expand_binop (wider_mode, moptab, op0, narrow_op1, 0,
3608 unsignedp, OPTAB_WIDEN);
3609 if (tem)
3610 return extract_high_half (mode, tem);
3611 }
3612
3613 /* Try widening the mode and perform a non-widening multiplication. */
3614 if (optab_handler (smul_optab, wider_mode) != CODE_FOR_nothing
3615 && size - 1 < BITS_PER_WORD
3616 && (mul_cost (speed, wider_mode) + shift_cost (speed, mode, size-1)
3617 < max_cost))
3618 {
3619 rtx insns, wop0, wop1;
3620
3621 /* We need to widen the operands, for example to ensure the
3622 constant multiplier is correctly sign or zero extended.
3623 Use a sequence to clean-up any instructions emitted by
3624 the conversions if things don't work out. */
3625 start_sequence ();
3626 wop0 = convert_modes (wider_mode, mode, op0, unsignedp);
3627 wop1 = convert_modes (wider_mode, mode, op1, unsignedp);
3628 tem = expand_binop (wider_mode, smul_optab, wop0, wop1, 0,
3629 unsignedp, OPTAB_WIDEN);
3630 insns = get_insns ();
3631 end_sequence ();
3632
3633 if (tem)
3634 {
3635 emit_insn (insns);
3636 return extract_high_half (mode, tem);
3637 }
3638 }
3639
3640 /* Try widening multiplication of opposite signedness, and adjust. */
3641 moptab = unsignedp ? smul_widen_optab : umul_widen_optab;
3642 if (widening_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3643 && size - 1 < BITS_PER_WORD
3644 && (mul_widen_cost (speed, wider_mode)
3645 + 2 * shift_cost (speed, mode, size-1)
3646 + 4 * add_cost (speed, mode) < max_cost))
3647 {
3648 tem = expand_binop (wider_mode, moptab, op0, narrow_op1,
3649 NULL_RTX, ! unsignedp, OPTAB_WIDEN);
3650 if (tem != 0)
3651 {
3652 tem = extract_high_half (mode, tem);
3653 /* We used the wrong signedness. Adjust the result. */
3654 return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3655 target, unsignedp);
3656 }
3657 }
3658
3659 return 0;
3660 }
3661
3662 /* Emit code to multiply OP0 and OP1 (where OP1 is an integer constant),
3663 putting the high half of the result in TARGET if that is convenient,
3664 and return where the result is. If the operation can not be performed,
3665 0 is returned.
3666
3667 MODE is the mode of operation and result.
3668
3669 UNSIGNEDP nonzero means unsigned multiply.
3670
3671 MAX_COST is the total allowed cost for the expanded RTL. */
3672
3673 static rtx
3674 expmed_mult_highpart (enum machine_mode mode, rtx op0, rtx op1,
3675 rtx target, int unsignedp, int max_cost)
3676 {
3677 enum machine_mode wider_mode = GET_MODE_WIDER_MODE (mode);
3678 unsigned HOST_WIDE_INT cnst1;
3679 int extra_cost;
3680 bool sign_adjust = false;
3681 enum mult_variant variant;
3682 struct algorithm alg;
3683 rtx tem;
3684 bool speed = optimize_insn_for_speed_p ();
3685
3686 gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3687 /* We can't support modes wider than HOST_BITS_PER_INT. */
3688 gcc_assert (HWI_COMPUTABLE_MODE_P (mode));
3689
3690 cnst1 = INTVAL (op1) & GET_MODE_MASK (mode);
3691
3692 /* We can't optimize modes wider than BITS_PER_WORD.
3693 ??? We might be able to perform double-word arithmetic if
3694 mode == word_mode, however all the cost calculations in
3695 synth_mult etc. assume single-word operations. */
3696 if (GET_MODE_BITSIZE (wider_mode) > BITS_PER_WORD)
3697 return expmed_mult_highpart_optab (mode, op0, op1, target,
3698 unsignedp, max_cost);
3699
3700 extra_cost = shift_cost (speed, mode, GET_MODE_BITSIZE (mode) - 1);
3701
3702 /* Check whether we try to multiply by a negative constant. */
3703 if (!unsignedp && ((cnst1 >> (GET_MODE_BITSIZE (mode) - 1)) & 1))
3704 {
3705 sign_adjust = true;
3706 extra_cost += add_cost (speed, mode);
3707 }
3708
3709 /* See whether shift/add multiplication is cheap enough. */
3710 if (choose_mult_variant (wider_mode, cnst1, &alg, &variant,
3711 max_cost - extra_cost))
3712 {
3713 /* See whether the specialized multiplication optabs are
3714 cheaper than the shift/add version. */
3715 tem = expmed_mult_highpart_optab (mode, op0, op1, target, unsignedp,
3716 alg.cost.cost + extra_cost);
3717 if (tem)
3718 return tem;
3719
3720 tem = convert_to_mode (wider_mode, op0, unsignedp);
3721 tem = expand_mult_const (wider_mode, tem, cnst1, 0, &alg, variant);
3722 tem = extract_high_half (mode, tem);
3723
3724 /* Adjust result for signedness. */
3725 if (sign_adjust)
3726 tem = force_operand (gen_rtx_MINUS (mode, tem, op0), tem);
3727
3728 return tem;
3729 }
3730 return expmed_mult_highpart_optab (mode, op0, op1, target,
3731 unsignedp, max_cost);
3732 }
3733
3734
3735 /* Expand signed modulus of OP0 by a power of two D in mode MODE. */
3736
3737 static rtx
3738 expand_smod_pow2 (enum machine_mode mode, rtx op0, HOST_WIDE_INT d)
3739 {
3740 unsigned HOST_WIDE_INT masklow, maskhigh;
3741 rtx result, temp, shift, label;
3742 int logd;
3743
3744 logd = floor_log2 (d);
3745 result = gen_reg_rtx (mode);
3746
3747 /* Avoid conditional branches when they're expensive. */
3748 if (BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2
3749 && optimize_insn_for_speed_p ())
3750 {
3751 rtx signmask = emit_store_flag (result, LT, op0, const0_rtx,
3752 mode, 0, -1);
3753 if (signmask)
3754 {
3755 signmask = force_reg (mode, signmask);
3756 masklow = ((HOST_WIDE_INT) 1 << logd) - 1;
3757 shift = GEN_INT (GET_MODE_BITSIZE (mode) - logd);
3758
3759 /* Use the rtx_cost of a LSHIFTRT instruction to determine
3760 which instruction sequence to use. If logical right shifts
3761 are expensive the use 2 XORs, 2 SUBs and an AND, otherwise
3762 use a LSHIFTRT, 1 ADD, 1 SUB and an AND. */
3763
3764 temp = gen_rtx_LSHIFTRT (mode, result, shift);
3765 if (optab_handler (lshr_optab, mode) == CODE_FOR_nothing
3766 || (set_src_cost (temp, optimize_insn_for_speed_p ())
3767 > COSTS_N_INSNS (2)))
3768 {
3769 temp = expand_binop (mode, xor_optab, op0, signmask,
3770 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3771 temp = expand_binop (mode, sub_optab, temp, signmask,
3772 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3773 temp = expand_binop (mode, and_optab, temp, GEN_INT (masklow),
3774 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3775 temp = expand_binop (mode, xor_optab, temp, signmask,
3776 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3777 temp = expand_binop (mode, sub_optab, temp, signmask,
3778 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3779 }
3780 else
3781 {
3782 signmask = expand_binop (mode, lshr_optab, signmask, shift,
3783 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3784 signmask = force_reg (mode, signmask);
3785
3786 temp = expand_binop (mode, add_optab, op0, signmask,
3787 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3788 temp = expand_binop (mode, and_optab, temp, GEN_INT (masklow),
3789 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3790 temp = expand_binop (mode, sub_optab, temp, signmask,
3791 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3792 }
3793 return temp;
3794 }
3795 }
3796
3797 /* Mask contains the mode's signbit and the significant bits of the
3798 modulus. By including the signbit in the operation, many targets
3799 can avoid an explicit compare operation in the following comparison
3800 against zero. */
3801
3802 masklow = ((HOST_WIDE_INT) 1 << logd) - 1;
3803 if (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
3804 {
3805 masklow |= (HOST_WIDE_INT) -1 << (GET_MODE_BITSIZE (mode) - 1);
3806 maskhigh = -1;
3807 }
3808 else
3809 maskhigh = (HOST_WIDE_INT) -1
3810 << (GET_MODE_BITSIZE (mode) - HOST_BITS_PER_WIDE_INT - 1);
3811
3812 temp = expand_binop (mode, and_optab, op0,
3813 immed_double_const (masklow, maskhigh, mode),
3814 result, 1, OPTAB_LIB_WIDEN);
3815 if (temp != result)
3816 emit_move_insn (result, temp);
3817
3818 label = gen_label_rtx ();
3819 do_cmp_and_jump (result, const0_rtx, GE, mode, label);
3820
3821 temp = expand_binop (mode, sub_optab, result, const1_rtx, result,
3822 0, OPTAB_LIB_WIDEN);
3823 masklow = (HOST_WIDE_INT) -1 << logd;
3824 maskhigh = -1;
3825 temp = expand_binop (mode, ior_optab, temp,
3826 immed_double_const (masklow, maskhigh, mode),
3827 result, 1, OPTAB_LIB_WIDEN);
3828 temp = expand_binop (mode, add_optab, temp, const1_rtx, result,
3829 0, OPTAB_LIB_WIDEN);
3830 if (temp != result)
3831 emit_move_insn (result, temp);
3832 emit_label (label);
3833 return result;
3834 }
3835
3836 /* Expand signed division of OP0 by a power of two D in mode MODE.
3837 This routine is only called for positive values of D. */
3838
3839 static rtx
3840 expand_sdiv_pow2 (enum machine_mode mode, rtx op0, HOST_WIDE_INT d)
3841 {
3842 rtx temp, label;
3843 int logd;
3844
3845 logd = floor_log2 (d);
3846
3847 if (d == 2
3848 && BRANCH_COST (optimize_insn_for_speed_p (),
3849 false) >= 1)
3850 {
3851 temp = gen_reg_rtx (mode);
3852 temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, 1);
3853 temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3854 0, OPTAB_LIB_WIDEN);
3855 return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3856 }
3857
3858 #ifdef HAVE_conditional_move
3859 if (BRANCH_COST (optimize_insn_for_speed_p (), false)
3860 >= 2)
3861 {
3862 rtx temp2;
3863
3864 /* ??? emit_conditional_move forces a stack adjustment via
3865 compare_from_rtx so, if the sequence is discarded, it will
3866 be lost. Do it now instead. */
3867 do_pending_stack_adjust ();
3868
3869 start_sequence ();
3870 temp2 = copy_to_mode_reg (mode, op0);
3871 temp = expand_binop (mode, add_optab, temp2, GEN_INT (d-1),
3872 NULL_RTX, 0, OPTAB_LIB_WIDEN);
3873 temp = force_reg (mode, temp);
3874
3875 /* Construct "temp2 = (temp2 < 0) ? temp : temp2". */
3876 temp2 = emit_conditional_move (temp2, LT, temp2, const0_rtx,
3877 mode, temp, temp2, mode, 0);
3878 if (temp2)
3879 {
3880 rtx seq = get_insns ();
3881 end_sequence ();
3882 emit_insn (seq);
3883 return expand_shift (RSHIFT_EXPR, mode, temp2, logd, NULL_RTX, 0);
3884 }
3885 end_sequence ();
3886 }
3887 #endif
3888
3889 if (BRANCH_COST (optimize_insn_for_speed_p (),
3890 false) >= 2)
3891 {
3892 int ushift = GET_MODE_BITSIZE (mode) - logd;
3893
3894 temp = gen_reg_rtx (mode);
3895 temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, -1);
3896 if (shift_cost (optimize_insn_for_speed_p (), mode, ushift)
3897 > COSTS_N_INSNS (1))
3898 temp = expand_binop (mode, and_optab, temp, GEN_INT (d - 1),
3899 NULL_RTX, 0, OPTAB_LIB_WIDEN);
3900 else
3901 temp = expand_shift (RSHIFT_EXPR, mode, temp,
3902 ushift, NULL_RTX, 1);
3903 temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3904 0, OPTAB_LIB_WIDEN);
3905 return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3906 }
3907
3908 label = gen_label_rtx ();
3909 temp = copy_to_mode_reg (mode, op0);
3910 do_cmp_and_jump (temp, const0_rtx, GE, mode, label);
3911 expand_inc (temp, GEN_INT (d - 1));
3912 emit_label (label);
3913 return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3914 }
3915 \f
3916 /* Emit the code to divide OP0 by OP1, putting the result in TARGET
3917 if that is convenient, and returning where the result is.
3918 You may request either the quotient or the remainder as the result;
3919 specify REM_FLAG nonzero to get the remainder.
3920
3921 CODE is the expression code for which kind of division this is;
3922 it controls how rounding is done. MODE is the machine mode to use.
3923 UNSIGNEDP nonzero means do unsigned division. */
3924
3925 /* ??? For CEIL_MOD_EXPR, can compute incorrect remainder with ANDI
3926 and then correct it by or'ing in missing high bits
3927 if result of ANDI is nonzero.
3928 For ROUND_MOD_EXPR, can use ANDI and then sign-extend the result.
3929 This could optimize to a bfexts instruction.
3930 But C doesn't use these operations, so their optimizations are
3931 left for later. */
3932 /* ??? For modulo, we don't actually need the highpart of the first product,
3933 the low part will do nicely. And for small divisors, the second multiply
3934 can also be a low-part only multiply or even be completely left out.
3935 E.g. to calculate the remainder of a division by 3 with a 32 bit
3936 multiply, multiply with 0x55555556 and extract the upper two bits;
3937 the result is exact for inputs up to 0x1fffffff.
3938 The input range can be reduced by using cross-sum rules.
3939 For odd divisors >= 3, the following table gives right shift counts
3940 so that if a number is shifted by an integer multiple of the given
3941 amount, the remainder stays the same:
3942 2, 4, 3, 6, 10, 12, 4, 8, 18, 6, 11, 20, 18, 0, 5, 10, 12, 0, 12, 20,
3943 14, 12, 23, 21, 8, 0, 20, 18, 0, 0, 6, 12, 0, 22, 0, 18, 20, 30, 0, 0,
3944 0, 8, 0, 11, 12, 10, 36, 0, 30, 0, 0, 12, 0, 0, 0, 0, 44, 12, 24, 0,
3945 20, 0, 7, 14, 0, 18, 36, 0, 0, 46, 60, 0, 42, 0, 15, 24, 20, 0, 0, 33,
3946 0, 20, 0, 0, 18, 0, 60, 0, 0, 0, 0, 0, 40, 18, 0, 0, 12
3947
3948 Cross-sum rules for even numbers can be derived by leaving as many bits
3949 to the right alone as the divisor has zeros to the right.
3950 E.g. if x is an unsigned 32 bit number:
3951 (x mod 12) == (((x & 1023) + ((x >> 8) & ~3)) * 0x15555558 >> 2 * 3) >> 28
3952 */
3953
3954 rtx
3955 expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode,
3956 rtx op0, rtx op1, rtx target, int unsignedp)
3957 {
3958 enum machine_mode compute_mode;
3959 rtx tquotient;
3960 rtx quotient = 0, remainder = 0;
3961 rtx last;
3962 int size;
3963 rtx insn;
3964 optab optab1, optab2;
3965 int op1_is_constant, op1_is_pow2 = 0;
3966 int max_cost, extra_cost;
3967 static HOST_WIDE_INT last_div_const = 0;
3968 static HOST_WIDE_INT ext_op1;
3969 bool speed = optimize_insn_for_speed_p ();
3970
3971 op1_is_constant = CONST_INT_P (op1);
3972 if (op1_is_constant)
3973 {
3974 ext_op1 = INTVAL (op1);
3975 if (unsignedp)
3976 ext_op1 &= GET_MODE_MASK (mode);
3977 op1_is_pow2 = ((EXACT_POWER_OF_2_OR_ZERO_P (ext_op1)
3978 || (! unsignedp && EXACT_POWER_OF_2_OR_ZERO_P (-ext_op1))));
3979 }
3980
3981 /*
3982 This is the structure of expand_divmod:
3983
3984 First comes code to fix up the operands so we can perform the operations
3985 correctly and efficiently.
3986
3987 Second comes a switch statement with code specific for each rounding mode.
3988 For some special operands this code emits all RTL for the desired
3989 operation, for other cases, it generates only a quotient and stores it in
3990 QUOTIENT. The case for trunc division/remainder might leave quotient = 0,
3991 to indicate that it has not done anything.
3992
3993 Last comes code that finishes the operation. If QUOTIENT is set and
3994 REM_FLAG is set, the remainder is computed as OP0 - QUOTIENT * OP1. If
3995 QUOTIENT is not set, it is computed using trunc rounding.
3996
3997 We try to generate special code for division and remainder when OP1 is a
3998 constant. If |OP1| = 2**n we can use shifts and some other fast
3999 operations. For other values of OP1, we compute a carefully selected
4000 fixed-point approximation m = 1/OP1, and generate code that multiplies OP0
4001 by m.
4002
4003 In all cases but EXACT_DIV_EXPR, this multiplication requires the upper
4004 half of the product. Different strategies for generating the product are
4005 implemented in expmed_mult_highpart.
4006
4007 If what we actually want is the remainder, we generate that by another
4008 by-constant multiplication and a subtraction. */
4009
4010 /* We shouldn't be called with OP1 == const1_rtx, but some of the
4011 code below will malfunction if we are, so check here and handle
4012 the special case if so. */
4013 if (op1 == const1_rtx)
4014 return rem_flag ? const0_rtx : op0;
4015
4016 /* When dividing by -1, we could get an overflow.
4017 negv_optab can handle overflows. */
4018 if (! unsignedp && op1 == constm1_rtx)
4019 {
4020 if (rem_flag)
4021 return const0_rtx;
4022 return expand_unop (mode, flag_trapv && GET_MODE_CLASS(mode) == MODE_INT
4023 ? negv_optab : neg_optab, op0, target, 0);
4024 }
4025
4026 if (target
4027 /* Don't use the function value register as a target
4028 since we have to read it as well as write it,
4029 and function-inlining gets confused by this. */
4030 && ((REG_P (target) && REG_FUNCTION_VALUE_P (target))
4031 /* Don't clobber an operand while doing a multi-step calculation. */
4032 || ((rem_flag || op1_is_constant)
4033 && (reg_mentioned_p (target, op0)
4034 || (MEM_P (op0) && MEM_P (target))))
4035 || reg_mentioned_p (target, op1)
4036 || (MEM_P (op1) && MEM_P (target))))
4037 target = 0;
4038
4039 /* Get the mode in which to perform this computation. Normally it will
4040 be MODE, but sometimes we can't do the desired operation in MODE.
4041 If so, pick a wider mode in which we can do the operation. Convert
4042 to that mode at the start to avoid repeated conversions.
4043
4044 First see what operations we need. These depend on the expression
4045 we are evaluating. (We assume that divxx3 insns exist under the
4046 same conditions that modxx3 insns and that these insns don't normally
4047 fail. If these assumptions are not correct, we may generate less
4048 efficient code in some cases.)
4049
4050 Then see if we find a mode in which we can open-code that operation
4051 (either a division, modulus, or shift). Finally, check for the smallest
4052 mode for which we can do the operation with a library call. */
4053
4054 /* We might want to refine this now that we have division-by-constant
4055 optimization. Since expmed_mult_highpart tries so many variants, it is
4056 not straightforward to generalize this. Maybe we should make an array
4057 of possible modes in init_expmed? Save this for GCC 2.7. */
4058
4059 optab1 = ((op1_is_pow2 && op1 != const0_rtx)
4060 ? (unsignedp ? lshr_optab : ashr_optab)
4061 : (unsignedp ? udiv_optab : sdiv_optab));
4062 optab2 = ((op1_is_pow2 && op1 != const0_rtx)
4063 ? optab1
4064 : (unsignedp ? udivmod_optab : sdivmod_optab));
4065
4066 for (compute_mode = mode; compute_mode != VOIDmode;
4067 compute_mode = GET_MODE_WIDER_MODE (compute_mode))
4068 if (optab_handler (optab1, compute_mode) != CODE_FOR_nothing
4069 || optab_handler (optab2, compute_mode) != CODE_FOR_nothing)
4070 break;
4071
4072 if (compute_mode == VOIDmode)
4073 for (compute_mode = mode; compute_mode != VOIDmode;
4074 compute_mode = GET_MODE_WIDER_MODE (compute_mode))
4075 if (optab_libfunc (optab1, compute_mode)
4076 || optab_libfunc (optab2, compute_mode))
4077 break;
4078
4079 /* If we still couldn't find a mode, use MODE, but expand_binop will
4080 probably die. */
4081 if (compute_mode == VOIDmode)
4082 compute_mode = mode;
4083
4084 if (target && GET_MODE (target) == compute_mode)
4085 tquotient = target;
4086 else
4087 tquotient = gen_reg_rtx (compute_mode);
4088
4089 size = GET_MODE_BITSIZE (compute_mode);
4090 #if 0
4091 /* It should be possible to restrict the precision to GET_MODE_BITSIZE
4092 (mode), and thereby get better code when OP1 is a constant. Do that
4093 later. It will require going over all usages of SIZE below. */
4094 size = GET_MODE_BITSIZE (mode);
4095 #endif
4096
4097 /* Only deduct something for a REM if the last divide done was
4098 for a different constant. Then set the constant of the last
4099 divide. */
4100 max_cost = (unsignedp
4101 ? udiv_cost (speed, compute_mode)
4102 : sdiv_cost (speed, compute_mode));
4103 if (rem_flag && ! (last_div_const != 0 && op1_is_constant
4104 && INTVAL (op1) == last_div_const))
4105 max_cost -= (mul_cost (speed, compute_mode)
4106 + add_cost (speed, compute_mode));
4107
4108 last_div_const = ! rem_flag && op1_is_constant ? INTVAL (op1) : 0;
4109
4110 /* Now convert to the best mode to use. */
4111 if (compute_mode != mode)
4112 {
4113 op0 = convert_modes (compute_mode, mode, op0, unsignedp);
4114 op1 = convert_modes (compute_mode, mode, op1, unsignedp);
4115
4116 /* convert_modes may have placed op1 into a register, so we
4117 must recompute the following. */
4118 op1_is_constant = CONST_INT_P (op1);
4119 op1_is_pow2 = (op1_is_constant
4120 && ((EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4121 || (! unsignedp
4122 && EXACT_POWER_OF_2_OR_ZERO_P (-INTVAL (op1)))))) ;
4123 }
4124
4125 /* If one of the operands is a volatile MEM, copy it into a register. */
4126
4127 if (MEM_P (op0) && MEM_VOLATILE_P (op0))
4128 op0 = force_reg (compute_mode, op0);
4129 if (MEM_P (op1) && MEM_VOLATILE_P (op1))
4130 op1 = force_reg (compute_mode, op1);
4131
4132 /* If we need the remainder or if OP1 is constant, we need to
4133 put OP0 in a register in case it has any queued subexpressions. */
4134 if (rem_flag || op1_is_constant)
4135 op0 = force_reg (compute_mode, op0);
4136
4137 last = get_last_insn ();
4138
4139 /* Promote floor rounding to trunc rounding for unsigned operations. */
4140 if (unsignedp)
4141 {
4142 if (code == FLOOR_DIV_EXPR)
4143 code = TRUNC_DIV_EXPR;
4144 if (code == FLOOR_MOD_EXPR)
4145 code = TRUNC_MOD_EXPR;
4146 if (code == EXACT_DIV_EXPR && op1_is_pow2)
4147 code = TRUNC_DIV_EXPR;
4148 }
4149
4150 if (op1 != const0_rtx)
4151 switch (code)
4152 {
4153 case TRUNC_MOD_EXPR:
4154 case TRUNC_DIV_EXPR:
4155 if (op1_is_constant)
4156 {
4157 if (unsignedp)
4158 {
4159 unsigned HOST_WIDE_INT mh, ml;
4160 int pre_shift, post_shift;
4161 int dummy;
4162 unsigned HOST_WIDE_INT d = (INTVAL (op1)
4163 & GET_MODE_MASK (compute_mode));
4164
4165 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4166 {
4167 pre_shift = floor_log2 (d);
4168 if (rem_flag)
4169 {
4170 remainder
4171 = expand_binop (compute_mode, and_optab, op0,
4172 GEN_INT (((HOST_WIDE_INT) 1 << pre_shift) - 1),
4173 remainder, 1,
4174 OPTAB_LIB_WIDEN);
4175 if (remainder)
4176 return gen_lowpart (mode, remainder);
4177 }
4178 quotient = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4179 pre_shift, tquotient, 1);
4180 }
4181 else if (size <= HOST_BITS_PER_WIDE_INT)
4182 {
4183 if (d >= ((unsigned HOST_WIDE_INT) 1 << (size - 1)))
4184 {
4185 /* Most significant bit of divisor is set; emit an scc
4186 insn. */
4187 quotient = emit_store_flag_force (tquotient, GEU, op0, op1,
4188 compute_mode, 1, 1);
4189 }
4190 else
4191 {
4192 /* Find a suitable multiplier and right shift count
4193 instead of multiplying with D. */
4194
4195 mh = choose_multiplier (d, size, size,
4196 &ml, &post_shift, &dummy);
4197
4198 /* If the suggested multiplier is more than SIZE bits,
4199 we can do better for even divisors, using an
4200 initial right shift. */
4201 if (mh != 0 && (d & 1) == 0)
4202 {
4203 pre_shift = floor_log2 (d & -d);
4204 mh = choose_multiplier (d >> pre_shift, size,
4205 size - pre_shift,
4206 &ml, &post_shift, &dummy);
4207 gcc_assert (!mh);
4208 }
4209 else
4210 pre_shift = 0;
4211
4212 if (mh != 0)
4213 {
4214 rtx t1, t2, t3, t4;
4215
4216 if (post_shift - 1 >= BITS_PER_WORD)
4217 goto fail1;
4218
4219 extra_cost
4220 = (shift_cost (speed, compute_mode, post_shift - 1)
4221 + shift_cost (speed, compute_mode, 1)
4222 + 2 * add_cost (speed, compute_mode));
4223 t1 = expmed_mult_highpart (compute_mode, op0,
4224 GEN_INT (ml),
4225 NULL_RTX, 1,
4226 max_cost - extra_cost);
4227 if (t1 == 0)
4228 goto fail1;
4229 t2 = force_operand (gen_rtx_MINUS (compute_mode,
4230 op0, t1),
4231 NULL_RTX);
4232 t3 = expand_shift (RSHIFT_EXPR, compute_mode,
4233 t2, 1, NULL_RTX, 1);
4234 t4 = force_operand (gen_rtx_PLUS (compute_mode,
4235 t1, t3),
4236 NULL_RTX);
4237 quotient = expand_shift
4238 (RSHIFT_EXPR, compute_mode, t4,
4239 post_shift - 1, tquotient, 1);
4240 }
4241 else
4242 {
4243 rtx t1, t2;
4244
4245 if (pre_shift >= BITS_PER_WORD
4246 || post_shift >= BITS_PER_WORD)
4247 goto fail1;
4248
4249 t1 = expand_shift
4250 (RSHIFT_EXPR, compute_mode, op0,
4251 pre_shift, NULL_RTX, 1);
4252 extra_cost
4253 = (shift_cost (speed, compute_mode, pre_shift)
4254 + shift_cost (speed, compute_mode, post_shift));
4255 t2 = expmed_mult_highpart (compute_mode, t1,
4256 GEN_INT (ml),
4257 NULL_RTX, 1,
4258 max_cost - extra_cost);
4259 if (t2 == 0)
4260 goto fail1;
4261 quotient = expand_shift
4262 (RSHIFT_EXPR, compute_mode, t2,
4263 post_shift, tquotient, 1);
4264 }
4265 }
4266 }
4267 else /* Too wide mode to use tricky code */
4268 break;
4269
4270 insn = get_last_insn ();
4271 if (insn != last)
4272 set_dst_reg_note (insn, REG_EQUAL,
4273 gen_rtx_UDIV (compute_mode, op0, op1),
4274 quotient);
4275 }
4276 else /* TRUNC_DIV, signed */
4277 {
4278 unsigned HOST_WIDE_INT ml;
4279 int lgup, post_shift;
4280 rtx mlr;
4281 HOST_WIDE_INT d = INTVAL (op1);
4282 unsigned HOST_WIDE_INT abs_d;
4283
4284 /* Since d might be INT_MIN, we have to cast to
4285 unsigned HOST_WIDE_INT before negating to avoid
4286 undefined signed overflow. */
4287 abs_d = (d >= 0
4288 ? (unsigned HOST_WIDE_INT) d
4289 : - (unsigned HOST_WIDE_INT) d);
4290
4291 /* n rem d = n rem -d */
4292 if (rem_flag && d < 0)
4293 {
4294 d = abs_d;
4295 op1 = gen_int_mode (abs_d, compute_mode);
4296 }
4297
4298 if (d == 1)
4299 quotient = op0;
4300 else if (d == -1)
4301 quotient = expand_unop (compute_mode, neg_optab, op0,
4302 tquotient, 0);
4303 else if (HOST_BITS_PER_WIDE_INT >= size
4304 && abs_d == (unsigned HOST_WIDE_INT) 1 << (size - 1))
4305 {
4306 /* This case is not handled correctly below. */
4307 quotient = emit_store_flag (tquotient, EQ, op0, op1,
4308 compute_mode, 1, 1);
4309 if (quotient == 0)
4310 goto fail1;
4311 }
4312 else if (EXACT_POWER_OF_2_OR_ZERO_P (d)
4313 && (rem_flag
4314 ? smod_pow2_cheap (speed, compute_mode)
4315 : sdiv_pow2_cheap (speed, compute_mode))
4316 /* We assume that cheap metric is true if the
4317 optab has an expander for this mode. */
4318 && ((optab_handler ((rem_flag ? smod_optab
4319 : sdiv_optab),
4320 compute_mode)
4321 != CODE_FOR_nothing)
4322 || (optab_handler (sdivmod_optab,
4323 compute_mode)
4324 != CODE_FOR_nothing)))
4325 ;
4326 else if (EXACT_POWER_OF_2_OR_ZERO_P (abs_d))
4327 {
4328 if (rem_flag)
4329 {
4330 remainder = expand_smod_pow2 (compute_mode, op0, d);
4331 if (remainder)
4332 return gen_lowpart (mode, remainder);
4333 }
4334
4335 if (sdiv_pow2_cheap (speed, compute_mode)
4336 && ((optab_handler (sdiv_optab, compute_mode)
4337 != CODE_FOR_nothing)
4338 || (optab_handler (sdivmod_optab, compute_mode)
4339 != CODE_FOR_nothing)))
4340 quotient = expand_divmod (0, TRUNC_DIV_EXPR,
4341 compute_mode, op0,
4342 gen_int_mode (abs_d,
4343 compute_mode),
4344 NULL_RTX, 0);
4345 else
4346 quotient = expand_sdiv_pow2 (compute_mode, op0, abs_d);
4347
4348 /* We have computed OP0 / abs(OP1). If OP1 is negative,
4349 negate the quotient. */
4350 if (d < 0)
4351 {
4352 insn = get_last_insn ();
4353 if (insn != last
4354 && abs_d < ((unsigned HOST_WIDE_INT) 1
4355 << (HOST_BITS_PER_WIDE_INT - 1)))
4356 set_dst_reg_note (insn, REG_EQUAL,
4357 gen_rtx_DIV (compute_mode, op0,
4358 gen_int_mode
4359 (abs_d,
4360 compute_mode)),
4361 quotient);
4362
4363 quotient = expand_unop (compute_mode, neg_optab,
4364 quotient, quotient, 0);
4365 }
4366 }
4367 else if (size <= HOST_BITS_PER_WIDE_INT)
4368 {
4369 choose_multiplier (abs_d, size, size - 1,
4370 &ml, &post_shift, &lgup);
4371 if (ml < (unsigned HOST_WIDE_INT) 1 << (size - 1))
4372 {
4373 rtx t1, t2, t3;
4374
4375 if (post_shift >= BITS_PER_WORD
4376 || size - 1 >= BITS_PER_WORD)
4377 goto fail1;
4378
4379 extra_cost = (shift_cost (speed, compute_mode, post_shift)
4380 + shift_cost (speed, compute_mode, size - 1)
4381 + add_cost (speed, compute_mode));
4382 t1 = expmed_mult_highpart (compute_mode, op0,
4383 GEN_INT (ml), NULL_RTX, 0,
4384 max_cost - extra_cost);
4385 if (t1 == 0)
4386 goto fail1;
4387 t2 = expand_shift
4388 (RSHIFT_EXPR, compute_mode, t1,
4389 post_shift, NULL_RTX, 0);
4390 t3 = expand_shift
4391 (RSHIFT_EXPR, compute_mode, op0,
4392 size - 1, NULL_RTX, 0);
4393 if (d < 0)
4394 quotient
4395 = force_operand (gen_rtx_MINUS (compute_mode,
4396 t3, t2),
4397 tquotient);
4398 else
4399 quotient
4400 = force_operand (gen_rtx_MINUS (compute_mode,
4401 t2, t3),
4402 tquotient);
4403 }
4404 else
4405 {
4406 rtx t1, t2, t3, t4;
4407
4408 if (post_shift >= BITS_PER_WORD
4409 || size - 1 >= BITS_PER_WORD)
4410 goto fail1;
4411
4412 ml |= (~(unsigned HOST_WIDE_INT) 0) << (size - 1);
4413 mlr = gen_int_mode (ml, compute_mode);
4414 extra_cost = (shift_cost (speed, compute_mode, post_shift)
4415 + shift_cost (speed, compute_mode, size - 1)
4416 + 2 * add_cost (speed, compute_mode));
4417 t1 = expmed_mult_highpart (compute_mode, op0, mlr,
4418 NULL_RTX, 0,
4419 max_cost - extra_cost);
4420 if (t1 == 0)
4421 goto fail1;
4422 t2 = force_operand (gen_rtx_PLUS (compute_mode,
4423 t1, op0),
4424 NULL_RTX);
4425 t3 = expand_shift
4426 (RSHIFT_EXPR, compute_mode, t2,
4427 post_shift, NULL_RTX, 0);
4428 t4 = expand_shift
4429 (RSHIFT_EXPR, compute_mode, op0,
4430 size - 1, NULL_RTX, 0);
4431 if (d < 0)
4432 quotient
4433 = force_operand (gen_rtx_MINUS (compute_mode,
4434 t4, t3),
4435 tquotient);
4436 else
4437 quotient
4438 = force_operand (gen_rtx_MINUS (compute_mode,
4439 t3, t4),
4440 tquotient);
4441 }
4442 }
4443 else /* Too wide mode to use tricky code */
4444 break;
4445
4446 insn = get_last_insn ();
4447 if (insn != last)
4448 set_dst_reg_note (insn, REG_EQUAL,
4449 gen_rtx_DIV (compute_mode, op0, op1),
4450 quotient);
4451 }
4452 break;
4453 }
4454 fail1:
4455 delete_insns_since (last);
4456 break;
4457
4458 case FLOOR_DIV_EXPR:
4459 case FLOOR_MOD_EXPR:
4460 /* We will come here only for signed operations. */
4461 if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4462 {
4463 unsigned HOST_WIDE_INT mh, ml;
4464 int pre_shift, lgup, post_shift;
4465 HOST_WIDE_INT d = INTVAL (op1);
4466
4467 if (d > 0)
4468 {
4469 /* We could just as easily deal with negative constants here,
4470 but it does not seem worth the trouble for GCC 2.6. */
4471 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4472 {
4473 pre_shift = floor_log2 (d);
4474 if (rem_flag)
4475 {
4476 remainder = expand_binop (compute_mode, and_optab, op0,
4477 GEN_INT (((HOST_WIDE_INT) 1 << pre_shift) - 1),
4478 remainder, 0, OPTAB_LIB_WIDEN);
4479 if (remainder)
4480 return gen_lowpart (mode, remainder);
4481 }
4482 quotient = expand_shift
4483 (RSHIFT_EXPR, compute_mode, op0,
4484 pre_shift, tquotient, 0);
4485 }
4486 else
4487 {
4488 rtx t1, t2, t3, t4;
4489
4490 mh = choose_multiplier (d, size, size - 1,
4491 &ml, &post_shift, &lgup);
4492 gcc_assert (!mh);
4493
4494 if (post_shift < BITS_PER_WORD
4495 && size - 1 < BITS_PER_WORD)
4496 {
4497 t1 = expand_shift
4498 (RSHIFT_EXPR, compute_mode, op0,
4499 size - 1, NULL_RTX, 0);
4500 t2 = expand_binop (compute_mode, xor_optab, op0, t1,
4501 NULL_RTX, 0, OPTAB_WIDEN);
4502 extra_cost = (shift_cost (speed, compute_mode, post_shift)
4503 + shift_cost (speed, compute_mode, size - 1)
4504 + 2 * add_cost (speed, compute_mode));
4505 t3 = expmed_mult_highpart (compute_mode, t2,
4506 GEN_INT (ml), NULL_RTX, 1,
4507 max_cost - extra_cost);
4508 if (t3 != 0)
4509 {
4510 t4 = expand_shift
4511 (RSHIFT_EXPR, compute_mode, t3,
4512 post_shift, NULL_RTX, 1);
4513 quotient = expand_binop (compute_mode, xor_optab,
4514 t4, t1, tquotient, 0,
4515 OPTAB_WIDEN);
4516 }
4517 }
4518 }
4519 }
4520 else
4521 {
4522 rtx nsign, t1, t2, t3, t4;
4523 t1 = force_operand (gen_rtx_PLUS (compute_mode,
4524 op0, constm1_rtx), NULL_RTX);
4525 t2 = expand_binop (compute_mode, ior_optab, op0, t1, NULL_RTX,
4526 0, OPTAB_WIDEN);
4527 nsign = expand_shift
4528 (RSHIFT_EXPR, compute_mode, t2,
4529 size - 1, NULL_RTX, 0);
4530 t3 = force_operand (gen_rtx_MINUS (compute_mode, t1, nsign),
4531 NULL_RTX);
4532 t4 = expand_divmod (0, TRUNC_DIV_EXPR, compute_mode, t3, op1,
4533 NULL_RTX, 0);
4534 if (t4)
4535 {
4536 rtx t5;
4537 t5 = expand_unop (compute_mode, one_cmpl_optab, nsign,
4538 NULL_RTX, 0);
4539 quotient = force_operand (gen_rtx_PLUS (compute_mode,
4540 t4, t5),
4541 tquotient);
4542 }
4543 }
4544 }
4545
4546 if (quotient != 0)
4547 break;
4548 delete_insns_since (last);
4549
4550 /* Try using an instruction that produces both the quotient and
4551 remainder, using truncation. We can easily compensate the quotient
4552 or remainder to get floor rounding, once we have the remainder.
4553 Notice that we compute also the final remainder value here,
4554 and return the result right away. */
4555 if (target == 0 || GET_MODE (target) != compute_mode)
4556 target = gen_reg_rtx (compute_mode);
4557
4558 if (rem_flag)
4559 {
4560 remainder
4561 = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4562 quotient = gen_reg_rtx (compute_mode);
4563 }
4564 else
4565 {
4566 quotient
4567 = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4568 remainder = gen_reg_rtx (compute_mode);
4569 }
4570
4571 if (expand_twoval_binop (sdivmod_optab, op0, op1,
4572 quotient, remainder, 0))
4573 {
4574 /* This could be computed with a branch-less sequence.
4575 Save that for later. */
4576 rtx tem;
4577 rtx label = gen_label_rtx ();
4578 do_cmp_and_jump (remainder, const0_rtx, EQ, compute_mode, label);
4579 tem = expand_binop (compute_mode, xor_optab, op0, op1,
4580 NULL_RTX, 0, OPTAB_WIDEN);
4581 do_cmp_and_jump (tem, const0_rtx, GE, compute_mode, label);
4582 expand_dec (quotient, const1_rtx);
4583 expand_inc (remainder, op1);
4584 emit_label (label);
4585 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4586 }
4587
4588 /* No luck with division elimination or divmod. Have to do it
4589 by conditionally adjusting op0 *and* the result. */
4590 {
4591 rtx label1, label2, label3, label4, label5;
4592 rtx adjusted_op0;
4593 rtx tem;
4594
4595 quotient = gen_reg_rtx (compute_mode);
4596 adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4597 label1 = gen_label_rtx ();
4598 label2 = gen_label_rtx ();
4599 label3 = gen_label_rtx ();
4600 label4 = gen_label_rtx ();
4601 label5 = gen_label_rtx ();
4602 do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4603 do_cmp_and_jump (adjusted_op0, const0_rtx, LT, compute_mode, label1);
4604 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4605 quotient, 0, OPTAB_LIB_WIDEN);
4606 if (tem != quotient)
4607 emit_move_insn (quotient, tem);
4608 emit_jump_insn (gen_jump (label5));
4609 emit_barrier ();
4610 emit_label (label1);
4611 expand_inc (adjusted_op0, const1_rtx);
4612 emit_jump_insn (gen_jump (label4));
4613 emit_barrier ();
4614 emit_label (label2);
4615 do_cmp_and_jump (adjusted_op0, const0_rtx, GT, compute_mode, label3);
4616 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4617 quotient, 0, OPTAB_LIB_WIDEN);
4618 if (tem != quotient)
4619 emit_move_insn (quotient, tem);
4620 emit_jump_insn (gen_jump (label5));
4621 emit_barrier ();
4622 emit_label (label3);
4623 expand_dec (adjusted_op0, const1_rtx);
4624 emit_label (label4);
4625 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4626 quotient, 0, OPTAB_LIB_WIDEN);
4627 if (tem != quotient)
4628 emit_move_insn (quotient, tem);
4629 expand_dec (quotient, const1_rtx);
4630 emit_label (label5);
4631 }
4632 break;
4633
4634 case CEIL_DIV_EXPR:
4635 case CEIL_MOD_EXPR:
4636 if (unsignedp)
4637 {
4638 if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1)))
4639 {
4640 rtx t1, t2, t3;
4641 unsigned HOST_WIDE_INT d = INTVAL (op1);
4642 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4643 floor_log2 (d), tquotient, 1);
4644 t2 = expand_binop (compute_mode, and_optab, op0,
4645 GEN_INT (d - 1),
4646 NULL_RTX, 1, OPTAB_LIB_WIDEN);
4647 t3 = gen_reg_rtx (compute_mode);
4648 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4649 compute_mode, 1, 1);
4650 if (t3 == 0)
4651 {
4652 rtx lab;
4653 lab = gen_label_rtx ();
4654 do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4655 expand_inc (t1, const1_rtx);
4656 emit_label (lab);
4657 quotient = t1;
4658 }
4659 else
4660 quotient = force_operand (gen_rtx_PLUS (compute_mode,
4661 t1, t3),
4662 tquotient);
4663 break;
4664 }
4665
4666 /* Try using an instruction that produces both the quotient and
4667 remainder, using truncation. We can easily compensate the
4668 quotient or remainder to get ceiling rounding, once we have the
4669 remainder. Notice that we compute also the final remainder
4670 value here, and return the result right away. */
4671 if (target == 0 || GET_MODE (target) != compute_mode)
4672 target = gen_reg_rtx (compute_mode);
4673
4674 if (rem_flag)
4675 {
4676 remainder = (REG_P (target)
4677 ? target : gen_reg_rtx (compute_mode));
4678 quotient = gen_reg_rtx (compute_mode);
4679 }
4680 else
4681 {
4682 quotient = (REG_P (target)
4683 ? target : gen_reg_rtx (compute_mode));
4684 remainder = gen_reg_rtx (compute_mode);
4685 }
4686
4687 if (expand_twoval_binop (udivmod_optab, op0, op1, quotient,
4688 remainder, 1))
4689 {
4690 /* This could be computed with a branch-less sequence.
4691 Save that for later. */
4692 rtx label = gen_label_rtx ();
4693 do_cmp_and_jump (remainder, const0_rtx, EQ,
4694 compute_mode, label);
4695 expand_inc (quotient, const1_rtx);
4696 expand_dec (remainder, op1);
4697 emit_label (label);
4698 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4699 }
4700
4701 /* No luck with division elimination or divmod. Have to do it
4702 by conditionally adjusting op0 *and* the result. */
4703 {
4704 rtx label1, label2;
4705 rtx adjusted_op0, tem;
4706
4707 quotient = gen_reg_rtx (compute_mode);
4708 adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4709 label1 = gen_label_rtx ();
4710 label2 = gen_label_rtx ();
4711 do_cmp_and_jump (adjusted_op0, const0_rtx, NE,
4712 compute_mode, label1);
4713 emit_move_insn (quotient, const0_rtx);
4714 emit_jump_insn (gen_jump (label2));
4715 emit_barrier ();
4716 emit_label (label1);
4717 expand_dec (adjusted_op0, const1_rtx);
4718 tem = expand_binop (compute_mode, udiv_optab, adjusted_op0, op1,
4719 quotient, 1, OPTAB_LIB_WIDEN);
4720 if (tem != quotient)
4721 emit_move_insn (quotient, tem);
4722 expand_inc (quotient, const1_rtx);
4723 emit_label (label2);
4724 }
4725 }
4726 else /* signed */
4727 {
4728 if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4729 && INTVAL (op1) >= 0)
4730 {
4731 /* This is extremely similar to the code for the unsigned case
4732 above. For 2.7 we should merge these variants, but for
4733 2.6.1 I don't want to touch the code for unsigned since that
4734 get used in C. The signed case will only be used by other
4735 languages (Ada). */
4736
4737 rtx t1, t2, t3;
4738 unsigned HOST_WIDE_INT d = INTVAL (op1);
4739 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4740 floor_log2 (d), tquotient, 0);
4741 t2 = expand_binop (compute_mode, and_optab, op0,
4742 GEN_INT (d - 1),
4743 NULL_RTX, 1, OPTAB_LIB_WIDEN);
4744 t3 = gen_reg_rtx (compute_mode);
4745 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4746 compute_mode, 1, 1);
4747 if (t3 == 0)
4748 {
4749 rtx lab;
4750 lab = gen_label_rtx ();
4751 do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4752 expand_inc (t1, const1_rtx);
4753 emit_label (lab);
4754 quotient = t1;
4755 }
4756 else
4757 quotient = force_operand (gen_rtx_PLUS (compute_mode,
4758 t1, t3),
4759 tquotient);
4760 break;
4761 }
4762
4763 /* Try using an instruction that produces both the quotient and
4764 remainder, using truncation. We can easily compensate the
4765 quotient or remainder to get ceiling rounding, once we have the
4766 remainder. Notice that we compute also the final remainder
4767 value here, and return the result right away. */
4768 if (target == 0 || GET_MODE (target) != compute_mode)
4769 target = gen_reg_rtx (compute_mode);
4770 if (rem_flag)
4771 {
4772 remainder= (REG_P (target)
4773 ? target : gen_reg_rtx (compute_mode));
4774 quotient = gen_reg_rtx (compute_mode);
4775 }
4776 else
4777 {
4778 quotient = (REG_P (target)
4779 ? target : gen_reg_rtx (compute_mode));
4780 remainder = gen_reg_rtx (compute_mode);
4781 }
4782
4783 if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient,
4784 remainder, 0))
4785 {
4786 /* This could be computed with a branch-less sequence.
4787 Save that for later. */
4788 rtx tem;
4789 rtx label = gen_label_rtx ();
4790 do_cmp_and_jump (remainder, const0_rtx, EQ,
4791 compute_mode, label);
4792 tem = expand_binop (compute_mode, xor_optab, op0, op1,
4793 NULL_RTX, 0, OPTAB_WIDEN);
4794 do_cmp_and_jump (tem, const0_rtx, LT, compute_mode, label);
4795 expand_inc (quotient, const1_rtx);
4796 expand_dec (remainder, op1);
4797 emit_label (label);
4798 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4799 }
4800
4801 /* No luck with division elimination or divmod. Have to do it
4802 by conditionally adjusting op0 *and* the result. */
4803 {
4804 rtx label1, label2, label3, label4, label5;
4805 rtx adjusted_op0;
4806 rtx tem;
4807
4808 quotient = gen_reg_rtx (compute_mode);
4809 adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4810 label1 = gen_label_rtx ();
4811 label2 = gen_label_rtx ();
4812 label3 = gen_label_rtx ();
4813 label4 = gen_label_rtx ();
4814 label5 = gen_label_rtx ();
4815 do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4816 do_cmp_and_jump (adjusted_op0, const0_rtx, GT,
4817 compute_mode, label1);
4818 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4819 quotient, 0, OPTAB_LIB_WIDEN);
4820 if (tem != quotient)
4821 emit_move_insn (quotient, tem);
4822 emit_jump_insn (gen_jump (label5));
4823 emit_barrier ();
4824 emit_label (label1);
4825 expand_dec (adjusted_op0, const1_rtx);
4826 emit_jump_insn (gen_jump (label4));
4827 emit_barrier ();
4828 emit_label (label2);
4829 do_cmp_and_jump (adjusted_op0, const0_rtx, LT,
4830 compute_mode, label3);
4831 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4832 quotient, 0, OPTAB_LIB_WIDEN);
4833 if (tem != quotient)
4834 emit_move_insn (quotient, tem);
4835 emit_jump_insn (gen_jump (label5));
4836 emit_barrier ();
4837 emit_label (label3);
4838 expand_inc (adjusted_op0, const1_rtx);
4839 emit_label (label4);
4840 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4841 quotient, 0, OPTAB_LIB_WIDEN);
4842 if (tem != quotient)
4843 emit_move_insn (quotient, tem);
4844 expand_inc (quotient, const1_rtx);
4845 emit_label (label5);
4846 }
4847 }
4848 break;
4849
4850 case EXACT_DIV_EXPR:
4851 if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4852 {
4853 HOST_WIDE_INT d = INTVAL (op1);
4854 unsigned HOST_WIDE_INT ml;
4855 int pre_shift;
4856 rtx t1;
4857
4858 pre_shift = floor_log2 (d & -d);
4859 ml = invert_mod2n (d >> pre_shift, size);
4860 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4861 pre_shift, NULL_RTX, unsignedp);
4862 quotient = expand_mult (compute_mode, t1,
4863 gen_int_mode (ml, compute_mode),
4864 NULL_RTX, 1);
4865
4866 insn = get_last_insn ();
4867 set_dst_reg_note (insn, REG_EQUAL,
4868 gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
4869 compute_mode, op0, op1),
4870 quotient);
4871 }
4872 break;
4873
4874 case ROUND_DIV_EXPR:
4875 case ROUND_MOD_EXPR:
4876 if (unsignedp)
4877 {
4878 rtx tem;
4879 rtx label;
4880 label = gen_label_rtx ();
4881 quotient = gen_reg_rtx (compute_mode);
4882 remainder = gen_reg_rtx (compute_mode);
4883 if (expand_twoval_binop (udivmod_optab, op0, op1, quotient, remainder, 1) == 0)
4884 {
4885 rtx tem;
4886 quotient = expand_binop (compute_mode, udiv_optab, op0, op1,
4887 quotient, 1, OPTAB_LIB_WIDEN);
4888 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 1);
4889 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4890 remainder, 1, OPTAB_LIB_WIDEN);
4891 }
4892 tem = plus_constant (compute_mode, op1, -1);
4893 tem = expand_shift (RSHIFT_EXPR, compute_mode, tem, 1, NULL_RTX, 1);
4894 do_cmp_and_jump (remainder, tem, LEU, compute_mode, label);
4895 expand_inc (quotient, const1_rtx);
4896 expand_dec (remainder, op1);
4897 emit_label (label);
4898 }
4899 else
4900 {
4901 rtx abs_rem, abs_op1, tem, mask;
4902 rtx label;
4903 label = gen_label_rtx ();
4904 quotient = gen_reg_rtx (compute_mode);
4905 remainder = gen_reg_rtx (compute_mode);
4906 if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient, remainder, 0) == 0)
4907 {
4908 rtx tem;
4909 quotient = expand_binop (compute_mode, sdiv_optab, op0, op1,
4910 quotient, 0, OPTAB_LIB_WIDEN);
4911 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 0);
4912 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4913 remainder, 0, OPTAB_LIB_WIDEN);
4914 }
4915 abs_rem = expand_abs (compute_mode, remainder, NULL_RTX, 1, 0);
4916 abs_op1 = expand_abs (compute_mode, op1, NULL_RTX, 1, 0);
4917 tem = expand_shift (LSHIFT_EXPR, compute_mode, abs_rem,
4918 1, NULL_RTX, 1);
4919 do_cmp_and_jump (tem, abs_op1, LTU, compute_mode, label);
4920 tem = expand_binop (compute_mode, xor_optab, op0, op1,
4921 NULL_RTX, 0, OPTAB_WIDEN);
4922 mask = expand_shift (RSHIFT_EXPR, compute_mode, tem,
4923 size - 1, NULL_RTX, 0);
4924 tem = expand_binop (compute_mode, xor_optab, mask, const1_rtx,
4925 NULL_RTX, 0, OPTAB_WIDEN);
4926 tem = expand_binop (compute_mode, sub_optab, tem, mask,
4927 NULL_RTX, 0, OPTAB_WIDEN);
4928 expand_inc (quotient, tem);
4929 tem = expand_binop (compute_mode, xor_optab, mask, op1,
4930 NULL_RTX, 0, OPTAB_WIDEN);
4931 tem = expand_binop (compute_mode, sub_optab, tem, mask,
4932 NULL_RTX, 0, OPTAB_WIDEN);
4933 expand_dec (remainder, tem);
4934 emit_label (label);
4935 }
4936 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4937
4938 default:
4939 gcc_unreachable ();
4940 }
4941
4942 if (quotient == 0)
4943 {
4944 if (target && GET_MODE (target) != compute_mode)
4945 target = 0;
4946
4947 if (rem_flag)
4948 {
4949 /* Try to produce the remainder without producing the quotient.
4950 If we seem to have a divmod pattern that does not require widening,
4951 don't try widening here. We should really have a WIDEN argument
4952 to expand_twoval_binop, since what we'd really like to do here is
4953 1) try a mod insn in compute_mode
4954 2) try a divmod insn in compute_mode
4955 3) try a div insn in compute_mode and multiply-subtract to get
4956 remainder
4957 4) try the same things with widening allowed. */
4958 remainder
4959 = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4960 op0, op1, target,
4961 unsignedp,
4962 ((optab_handler (optab2, compute_mode)
4963 != CODE_FOR_nothing)
4964 ? OPTAB_DIRECT : OPTAB_WIDEN));
4965 if (remainder == 0)
4966 {
4967 /* No luck there. Can we do remainder and divide at once
4968 without a library call? */
4969 remainder = gen_reg_rtx (compute_mode);
4970 if (! expand_twoval_binop ((unsignedp
4971 ? udivmod_optab
4972 : sdivmod_optab),
4973 op0, op1,
4974 NULL_RTX, remainder, unsignedp))
4975 remainder = 0;
4976 }
4977
4978 if (remainder)
4979 return gen_lowpart (mode, remainder);
4980 }
4981
4982 /* Produce the quotient. Try a quotient insn, but not a library call.
4983 If we have a divmod in this mode, use it in preference to widening
4984 the div (for this test we assume it will not fail). Note that optab2
4985 is set to the one of the two optabs that the call below will use. */
4986 quotient
4987 = sign_expand_binop (compute_mode, udiv_optab, sdiv_optab,
4988 op0, op1, rem_flag ? NULL_RTX : target,
4989 unsignedp,
4990 ((optab_handler (optab2, compute_mode)
4991 != CODE_FOR_nothing)
4992 ? OPTAB_DIRECT : OPTAB_WIDEN));
4993
4994 if (quotient == 0)
4995 {
4996 /* No luck there. Try a quotient-and-remainder insn,
4997 keeping the quotient alone. */
4998 quotient = gen_reg_rtx (compute_mode);
4999 if (! expand_twoval_binop (unsignedp ? udivmod_optab : sdivmod_optab,
5000 op0, op1,
5001 quotient, NULL_RTX, unsignedp))
5002 {
5003 quotient = 0;
5004 if (! rem_flag)
5005 /* Still no luck. If we are not computing the remainder,
5006 use a library call for the quotient. */
5007 quotient = sign_expand_binop (compute_mode,
5008 udiv_optab, sdiv_optab,
5009 op0, op1, target,
5010 unsignedp, OPTAB_LIB_WIDEN);
5011 }
5012 }
5013 }
5014
5015 if (rem_flag)
5016 {
5017 if (target && GET_MODE (target) != compute_mode)
5018 target = 0;
5019
5020 if (quotient == 0)
5021 {
5022 /* No divide instruction either. Use library for remainder. */
5023 remainder = sign_expand_binop (compute_mode, umod_optab, smod_optab,
5024 op0, op1, target,
5025 unsignedp, OPTAB_LIB_WIDEN);
5026 /* No remainder function. Try a quotient-and-remainder
5027 function, keeping the remainder. */
5028 if (!remainder)
5029 {
5030 remainder = gen_reg_rtx (compute_mode);
5031 if (!expand_twoval_binop_libfunc
5032 (unsignedp ? udivmod_optab : sdivmod_optab,
5033 op0, op1,
5034 NULL_RTX, remainder,
5035 unsignedp ? UMOD : MOD))
5036 remainder = NULL_RTX;
5037 }
5038 }
5039 else
5040 {
5041 /* We divided. Now finish doing X - Y * (X / Y). */
5042 remainder = expand_mult (compute_mode, quotient, op1,
5043 NULL_RTX, unsignedp);
5044 remainder = expand_binop (compute_mode, sub_optab, op0,
5045 remainder, target, unsignedp,
5046 OPTAB_LIB_WIDEN);
5047 }
5048 }
5049
5050 return gen_lowpart (mode, rem_flag ? remainder : quotient);
5051 }
5052 \f
5053 /* Return a tree node with data type TYPE, describing the value of X.
5054 Usually this is an VAR_DECL, if there is no obvious better choice.
5055 X may be an expression, however we only support those expressions
5056 generated by loop.c. */
5057
5058 tree
5059 make_tree (tree type, rtx x)
5060 {
5061 tree t;
5062
5063 switch (GET_CODE (x))
5064 {
5065 case CONST_INT:
5066 {
5067 HOST_WIDE_INT hi = 0;
5068
5069 if (INTVAL (x) < 0
5070 && !(TYPE_UNSIGNED (type)
5071 && (GET_MODE_BITSIZE (TYPE_MODE (type))
5072 < HOST_BITS_PER_WIDE_INT)))
5073 hi = -1;
5074
5075 t = build_int_cst_wide (type, INTVAL (x), hi);
5076
5077 return t;
5078 }
5079
5080 case CONST_DOUBLE:
5081 if (GET_MODE (x) == VOIDmode)
5082 t = build_int_cst_wide (type,
5083 CONST_DOUBLE_LOW (x), CONST_DOUBLE_HIGH (x));
5084 else
5085 {
5086 REAL_VALUE_TYPE d;
5087
5088 REAL_VALUE_FROM_CONST_DOUBLE (d, x);
5089 t = build_real (type, d);
5090 }
5091
5092 return t;
5093
5094 case CONST_VECTOR:
5095 {
5096 int units = CONST_VECTOR_NUNITS (x);
5097 tree itype = TREE_TYPE (type);
5098 tree *elts;
5099 int i;
5100
5101 /* Build a tree with vector elements. */
5102 elts = XALLOCAVEC (tree, units);
5103 for (i = units - 1; i >= 0; --i)
5104 {
5105 rtx elt = CONST_VECTOR_ELT (x, i);
5106 elts[i] = make_tree (itype, elt);
5107 }
5108
5109 return build_vector (type, elts);
5110 }
5111
5112 case PLUS:
5113 return fold_build2 (PLUS_EXPR, type, make_tree (type, XEXP (x, 0)),
5114 make_tree (type, XEXP (x, 1)));
5115
5116 case MINUS:
5117 return fold_build2 (MINUS_EXPR, type, make_tree (type, XEXP (x, 0)),
5118 make_tree (type, XEXP (x, 1)));
5119
5120 case NEG:
5121 return fold_build1 (NEGATE_EXPR, type, make_tree (type, XEXP (x, 0)));
5122
5123 case MULT:
5124 return fold_build2 (MULT_EXPR, type, make_tree (type, XEXP (x, 0)),
5125 make_tree (type, XEXP (x, 1)));
5126
5127 case ASHIFT:
5128 return fold_build2 (LSHIFT_EXPR, type, make_tree (type, XEXP (x, 0)),
5129 make_tree (type, XEXP (x, 1)));
5130
5131 case LSHIFTRT:
5132 t = unsigned_type_for (type);
5133 return fold_convert (type, build2 (RSHIFT_EXPR, t,
5134 make_tree (t, XEXP (x, 0)),
5135 make_tree (type, XEXP (x, 1))));
5136
5137 case ASHIFTRT:
5138 t = signed_type_for (type);
5139 return fold_convert (type, build2 (RSHIFT_EXPR, t,
5140 make_tree (t, XEXP (x, 0)),
5141 make_tree (type, XEXP (x, 1))));
5142
5143 case DIV:
5144 if (TREE_CODE (type) != REAL_TYPE)
5145 t = signed_type_for (type);
5146 else
5147 t = type;
5148
5149 return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5150 make_tree (t, XEXP (x, 0)),
5151 make_tree (t, XEXP (x, 1))));
5152 case UDIV:
5153 t = unsigned_type_for (type);
5154 return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5155 make_tree (t, XEXP (x, 0)),
5156 make_tree (t, XEXP (x, 1))));
5157
5158 case SIGN_EXTEND:
5159 case ZERO_EXTEND:
5160 t = lang_hooks.types.type_for_mode (GET_MODE (XEXP (x, 0)),
5161 GET_CODE (x) == ZERO_EXTEND);
5162 return fold_convert (type, make_tree (t, XEXP (x, 0)));
5163
5164 case CONST:
5165 return make_tree (type, XEXP (x, 0));
5166
5167 case SYMBOL_REF:
5168 t = SYMBOL_REF_DECL (x);
5169 if (t)
5170 return fold_convert (type, build_fold_addr_expr (t));
5171 /* else fall through. */
5172
5173 default:
5174 t = build_decl (RTL_LOCATION (x), VAR_DECL, NULL_TREE, type);
5175
5176 /* If TYPE is a POINTER_TYPE, we might need to convert X from
5177 address mode to pointer mode. */
5178 if (POINTER_TYPE_P (type))
5179 x = convert_memory_address_addr_space
5180 (TYPE_MODE (type), x, TYPE_ADDR_SPACE (TREE_TYPE (type)));
5181
5182 /* Note that we do *not* use SET_DECL_RTL here, because we do not
5183 want set_decl_rtl to go adjusting REG_ATTRS for this temporary. */
5184 t->decl_with_rtl.rtl = x;
5185
5186 return t;
5187 }
5188 }
5189 \f
5190 /* Compute the logical-and of OP0 and OP1, storing it in TARGET
5191 and returning TARGET.
5192
5193 If TARGET is 0, a pseudo-register or constant is returned. */
5194
5195 rtx
5196 expand_and (enum machine_mode mode, rtx op0, rtx op1, rtx target)
5197 {
5198 rtx tem = 0;
5199
5200 if (GET_MODE (op0) == VOIDmode && GET_MODE (op1) == VOIDmode)
5201 tem = simplify_binary_operation (AND, mode, op0, op1);
5202 if (tem == 0)
5203 tem = expand_binop (mode, and_optab, op0, op1, target, 0, OPTAB_LIB_WIDEN);
5204
5205 if (target == 0)
5206 target = tem;
5207 else if (tem != target)
5208 emit_move_insn (target, tem);
5209 return target;
5210 }
5211
5212 /* Helper function for emit_store_flag. */
5213 static rtx
5214 emit_cstore (rtx target, enum insn_code icode, enum rtx_code code,
5215 enum machine_mode mode, enum machine_mode compare_mode,
5216 int unsignedp, rtx x, rtx y, int normalizep,
5217 enum machine_mode target_mode)
5218 {
5219 struct expand_operand ops[4];
5220 rtx op0, last, comparison, subtarget;
5221 enum machine_mode result_mode = insn_data[(int) icode].operand[0].mode;
5222
5223 last = get_last_insn ();
5224 x = prepare_operand (icode, x, 2, mode, compare_mode, unsignedp);
5225 y = prepare_operand (icode, y, 3, mode, compare_mode, unsignedp);
5226 if (!x || !y)
5227 {
5228 delete_insns_since (last);
5229 return NULL_RTX;
5230 }
5231
5232 if (target_mode == VOIDmode)
5233 target_mode = result_mode;
5234 if (!target)
5235 target = gen_reg_rtx (target_mode);
5236
5237 comparison = gen_rtx_fmt_ee (code, result_mode, x, y);
5238
5239 create_output_operand (&ops[0], optimize ? NULL_RTX : target, result_mode);
5240 create_fixed_operand (&ops[1], comparison);
5241 create_fixed_operand (&ops[2], x);
5242 create_fixed_operand (&ops[3], y);
5243 if (!maybe_expand_insn (icode, 4, ops))
5244 {
5245 delete_insns_since (last);
5246 return NULL_RTX;
5247 }
5248 subtarget = ops[0].value;
5249
5250 /* If we are converting to a wider mode, first convert to
5251 TARGET_MODE, then normalize. This produces better combining
5252 opportunities on machines that have a SIGN_EXTRACT when we are
5253 testing a single bit. This mostly benefits the 68k.
5254
5255 If STORE_FLAG_VALUE does not have the sign bit set when
5256 interpreted in MODE, we can do this conversion as unsigned, which
5257 is usually more efficient. */
5258 if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (result_mode))
5259 {
5260 convert_move (target, subtarget,
5261 val_signbit_known_clear_p (result_mode,
5262 STORE_FLAG_VALUE));
5263 op0 = target;
5264 result_mode = target_mode;
5265 }
5266 else
5267 op0 = subtarget;
5268
5269 /* If we want to keep subexpressions around, don't reuse our last
5270 target. */
5271 if (optimize)
5272 subtarget = 0;
5273
5274 /* Now normalize to the proper value in MODE. Sometimes we don't
5275 have to do anything. */
5276 if (normalizep == 0 || normalizep == STORE_FLAG_VALUE)
5277 ;
5278 /* STORE_FLAG_VALUE might be the most negative number, so write
5279 the comparison this way to avoid a compiler-time warning. */
5280 else if (- normalizep == STORE_FLAG_VALUE)
5281 op0 = expand_unop (result_mode, neg_optab, op0, subtarget, 0);
5282
5283 /* We don't want to use STORE_FLAG_VALUE < 0 below since this makes
5284 it hard to use a value of just the sign bit due to ANSI integer
5285 constant typing rules. */
5286 else if (val_signbit_known_set_p (result_mode, STORE_FLAG_VALUE))
5287 op0 = expand_shift (RSHIFT_EXPR, result_mode, op0,
5288 GET_MODE_BITSIZE (result_mode) - 1, subtarget,
5289 normalizep == 1);
5290 else
5291 {
5292 gcc_assert (STORE_FLAG_VALUE & 1);
5293
5294 op0 = expand_and (result_mode, op0, const1_rtx, subtarget);
5295 if (normalizep == -1)
5296 op0 = expand_unop (result_mode, neg_optab, op0, op0, 0);
5297 }
5298
5299 /* If we were converting to a smaller mode, do the conversion now. */
5300 if (target_mode != result_mode)
5301 {
5302 convert_move (target, op0, 0);
5303 return target;
5304 }
5305 else
5306 return op0;
5307 }
5308
5309
5310 /* A subroutine of emit_store_flag only including "tricks" that do not
5311 need a recursive call. These are kept separate to avoid infinite
5312 loops. */
5313
5314 static rtx
5315 emit_store_flag_1 (rtx target, enum rtx_code code, rtx op0, rtx op1,
5316 enum machine_mode mode, int unsignedp, int normalizep,
5317 enum machine_mode target_mode)
5318 {
5319 rtx subtarget;
5320 enum insn_code icode;
5321 enum machine_mode compare_mode;
5322 enum mode_class mclass;
5323 enum rtx_code scode;
5324 rtx tem;
5325
5326 if (unsignedp)
5327 code = unsigned_condition (code);
5328 scode = swap_condition (code);
5329
5330 /* If one operand is constant, make it the second one. Only do this
5331 if the other operand is not constant as well. */
5332
5333 if (swap_commutative_operands_p (op0, op1))
5334 {
5335 tem = op0;
5336 op0 = op1;
5337 op1 = tem;
5338 code = swap_condition (code);
5339 }
5340
5341 if (mode == VOIDmode)
5342 mode = GET_MODE (op0);
5343
5344 /* For some comparisons with 1 and -1, we can convert this to
5345 comparisons with zero. This will often produce more opportunities for
5346 store-flag insns. */
5347
5348 switch (code)
5349 {
5350 case LT:
5351 if (op1 == const1_rtx)
5352 op1 = const0_rtx, code = LE;
5353 break;
5354 case LE:
5355 if (op1 == constm1_rtx)
5356 op1 = const0_rtx, code = LT;
5357 break;
5358 case GE:
5359 if (op1 == const1_rtx)
5360 op1 = const0_rtx, code = GT;
5361 break;
5362 case GT:
5363 if (op1 == constm1_rtx)
5364 op1 = const0_rtx, code = GE;
5365 break;
5366 case GEU:
5367 if (op1 == const1_rtx)
5368 op1 = const0_rtx, code = NE;
5369 break;
5370 case LTU:
5371 if (op1 == const1_rtx)
5372 op1 = const0_rtx, code = EQ;
5373 break;
5374 default:
5375 break;
5376 }
5377
5378 /* If we are comparing a double-word integer with zero or -1, we can
5379 convert the comparison into one involving a single word. */
5380 if (GET_MODE_BITSIZE (mode) == BITS_PER_WORD * 2
5381 && GET_MODE_CLASS (mode) == MODE_INT
5382 && (!MEM_P (op0) || ! MEM_VOLATILE_P (op0)))
5383 {
5384 if ((code == EQ || code == NE)
5385 && (op1 == const0_rtx || op1 == constm1_rtx))
5386 {
5387 rtx op00, op01;
5388
5389 /* Do a logical OR or AND of the two words and compare the
5390 result. */
5391 op00 = simplify_gen_subreg (word_mode, op0, mode, 0);
5392 op01 = simplify_gen_subreg (word_mode, op0, mode, UNITS_PER_WORD);
5393 tem = expand_binop (word_mode,
5394 op1 == const0_rtx ? ior_optab : and_optab,
5395 op00, op01, NULL_RTX, unsignedp,
5396 OPTAB_DIRECT);
5397
5398 if (tem != 0)
5399 tem = emit_store_flag (NULL_RTX, code, tem, op1, word_mode,
5400 unsignedp, normalizep);
5401 }
5402 else if ((code == LT || code == GE) && op1 == const0_rtx)
5403 {
5404 rtx op0h;
5405
5406 /* If testing the sign bit, can just test on high word. */
5407 op0h = simplify_gen_subreg (word_mode, op0, mode,
5408 subreg_highpart_offset (word_mode,
5409 mode));
5410 tem = emit_store_flag (NULL_RTX, code, op0h, op1, word_mode,
5411 unsignedp, normalizep);
5412 }
5413 else
5414 tem = NULL_RTX;
5415
5416 if (tem)
5417 {
5418 if (target_mode == VOIDmode || GET_MODE (tem) == target_mode)
5419 return tem;
5420 if (!target)
5421 target = gen_reg_rtx (target_mode);
5422
5423 convert_move (target, tem,
5424 !val_signbit_known_set_p (word_mode,
5425 (normalizep ? normalizep
5426 : STORE_FLAG_VALUE)));
5427 return target;
5428 }
5429 }
5430
5431 /* If this is A < 0 or A >= 0, we can do this by taking the ones
5432 complement of A (for GE) and shifting the sign bit to the low bit. */
5433 if (op1 == const0_rtx && (code == LT || code == GE)
5434 && GET_MODE_CLASS (mode) == MODE_INT
5435 && (normalizep || STORE_FLAG_VALUE == 1
5436 || val_signbit_p (mode, STORE_FLAG_VALUE)))
5437 {
5438 subtarget = target;
5439
5440 if (!target)
5441 target_mode = mode;
5442
5443 /* If the result is to be wider than OP0, it is best to convert it
5444 first. If it is to be narrower, it is *incorrect* to convert it
5445 first. */
5446 else if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (mode))
5447 {
5448 op0 = convert_modes (target_mode, mode, op0, 0);
5449 mode = target_mode;
5450 }
5451
5452 if (target_mode != mode)
5453 subtarget = 0;
5454
5455 if (code == GE)
5456 op0 = expand_unop (mode, one_cmpl_optab, op0,
5457 ((STORE_FLAG_VALUE == 1 || normalizep)
5458 ? 0 : subtarget), 0);
5459
5460 if (STORE_FLAG_VALUE == 1 || normalizep)
5461 /* If we are supposed to produce a 0/1 value, we want to do
5462 a logical shift from the sign bit to the low-order bit; for
5463 a -1/0 value, we do an arithmetic shift. */
5464 op0 = expand_shift (RSHIFT_EXPR, mode, op0,
5465 GET_MODE_BITSIZE (mode) - 1,
5466 subtarget, normalizep != -1);
5467
5468 if (mode != target_mode)
5469 op0 = convert_modes (target_mode, mode, op0, 0);
5470
5471 return op0;
5472 }
5473
5474 mclass = GET_MODE_CLASS (mode);
5475 for (compare_mode = mode; compare_mode != VOIDmode;
5476 compare_mode = GET_MODE_WIDER_MODE (compare_mode))
5477 {
5478 enum machine_mode optab_mode = mclass == MODE_CC ? CCmode : compare_mode;
5479 icode = optab_handler (cstore_optab, optab_mode);
5480 if (icode != CODE_FOR_nothing)
5481 {
5482 do_pending_stack_adjust ();
5483 tem = emit_cstore (target, icode, code, mode, compare_mode,
5484 unsignedp, op0, op1, normalizep, target_mode);
5485 if (tem)
5486 return tem;
5487
5488 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5489 {
5490 tem = emit_cstore (target, icode, scode, mode, compare_mode,
5491 unsignedp, op1, op0, normalizep, target_mode);
5492 if (tem)
5493 return tem;
5494 }
5495 break;
5496 }
5497 }
5498
5499 return 0;
5500 }
5501
5502 /* Emit a store-flags instruction for comparison CODE on OP0 and OP1
5503 and storing in TARGET. Normally return TARGET.
5504 Return 0 if that cannot be done.
5505
5506 MODE is the mode to use for OP0 and OP1 should they be CONST_INTs. If
5507 it is VOIDmode, they cannot both be CONST_INT.
5508
5509 UNSIGNEDP is for the case where we have to widen the operands
5510 to perform the operation. It says to use zero-extension.
5511
5512 NORMALIZEP is 1 if we should convert the result to be either zero
5513 or one. Normalize is -1 if we should convert the result to be
5514 either zero or -1. If NORMALIZEP is zero, the result will be left
5515 "raw" out of the scc insn. */
5516
5517 rtx
5518 emit_store_flag (rtx target, enum rtx_code code, rtx op0, rtx op1,
5519 enum machine_mode mode, int unsignedp, int normalizep)
5520 {
5521 enum machine_mode target_mode = target ? GET_MODE (target) : VOIDmode;
5522 enum rtx_code rcode;
5523 rtx subtarget;
5524 rtx tem, last, trueval;
5525
5526 tem = emit_store_flag_1 (target, code, op0, op1, mode, unsignedp, normalizep,
5527 target_mode);
5528 if (tem)
5529 return tem;
5530
5531 /* If we reached here, we can't do this with a scc insn, however there
5532 are some comparisons that can be done in other ways. Don't do any
5533 of these cases if branches are very cheap. */
5534 if (BRANCH_COST (optimize_insn_for_speed_p (), false) == 0)
5535 return 0;
5536
5537 /* See what we need to return. We can only return a 1, -1, or the
5538 sign bit. */
5539
5540 if (normalizep == 0)
5541 {
5542 if (STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1)
5543 normalizep = STORE_FLAG_VALUE;
5544
5545 else if (val_signbit_p (mode, STORE_FLAG_VALUE))
5546 ;
5547 else
5548 return 0;
5549 }
5550
5551 last = get_last_insn ();
5552
5553 /* If optimizing, use different pseudo registers for each insn, instead
5554 of reusing the same pseudo. This leads to better CSE, but slows
5555 down the compiler, since there are more pseudos */
5556 subtarget = (!optimize
5557 && (target_mode == mode)) ? target : NULL_RTX;
5558 trueval = GEN_INT (normalizep ? normalizep : STORE_FLAG_VALUE);
5559
5560 /* For floating-point comparisons, try the reverse comparison or try
5561 changing the "orderedness" of the comparison. */
5562 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5563 {
5564 enum rtx_code first_code;
5565 bool and_them;
5566
5567 rcode = reverse_condition_maybe_unordered (code);
5568 if (can_compare_p (rcode, mode, ccp_store_flag)
5569 && (code == ORDERED || code == UNORDERED
5570 || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
5571 || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
5572 {
5573 int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5574 || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5575
5576 /* For the reverse comparison, use either an addition or a XOR. */
5577 if (want_add
5578 && rtx_cost (GEN_INT (normalizep), PLUS, 1,
5579 optimize_insn_for_speed_p ()) == 0)
5580 {
5581 tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5582 STORE_FLAG_VALUE, target_mode);
5583 if (tem)
5584 return expand_binop (target_mode, add_optab, tem,
5585 GEN_INT (normalizep),
5586 target, 0, OPTAB_WIDEN);
5587 }
5588 else if (!want_add
5589 && rtx_cost (trueval, XOR, 1,
5590 optimize_insn_for_speed_p ()) == 0)
5591 {
5592 tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5593 normalizep, target_mode);
5594 if (tem)
5595 return expand_binop (target_mode, xor_optab, tem, trueval,
5596 target, INTVAL (trueval) >= 0, OPTAB_WIDEN);
5597 }
5598 }
5599
5600 delete_insns_since (last);
5601
5602 /* Cannot split ORDERED and UNORDERED, only try the above trick. */
5603 if (code == ORDERED || code == UNORDERED)
5604 return 0;
5605
5606 and_them = split_comparison (code, mode, &first_code, &code);
5607
5608 /* If there are no NaNs, the first comparison should always fall through.
5609 Effectively change the comparison to the other one. */
5610 if (!HONOR_NANS (mode))
5611 {
5612 gcc_assert (first_code == (and_them ? ORDERED : UNORDERED));
5613 return emit_store_flag_1 (target, code, op0, op1, mode, 0, normalizep,
5614 target_mode);
5615 }
5616
5617 #ifdef HAVE_conditional_move
5618 /* Try using a setcc instruction for ORDERED/UNORDERED, followed by a
5619 conditional move. */
5620 tem = emit_store_flag_1 (subtarget, first_code, op0, op1, mode, 0,
5621 normalizep, target_mode);
5622 if (tem == 0)
5623 return 0;
5624
5625 if (and_them)
5626 tem = emit_conditional_move (target, code, op0, op1, mode,
5627 tem, const0_rtx, GET_MODE (tem), 0);
5628 else
5629 tem = emit_conditional_move (target, code, op0, op1, mode,
5630 trueval, tem, GET_MODE (tem), 0);
5631
5632 if (tem == 0)
5633 delete_insns_since (last);
5634 return tem;
5635 #else
5636 return 0;
5637 #endif
5638 }
5639
5640 /* The remaining tricks only apply to integer comparisons. */
5641
5642 if (GET_MODE_CLASS (mode) != MODE_INT)
5643 return 0;
5644
5645 /* If this is an equality comparison of integers, we can try to exclusive-or
5646 (or subtract) the two operands and use a recursive call to try the
5647 comparison with zero. Don't do any of these cases if branches are
5648 very cheap. */
5649
5650 if ((code == EQ || code == NE) && op1 != const0_rtx)
5651 {
5652 tem = expand_binop (mode, xor_optab, op0, op1, subtarget, 1,
5653 OPTAB_WIDEN);
5654
5655 if (tem == 0)
5656 tem = expand_binop (mode, sub_optab, op0, op1, subtarget, 1,
5657 OPTAB_WIDEN);
5658 if (tem != 0)
5659 tem = emit_store_flag (target, code, tem, const0_rtx,
5660 mode, unsignedp, normalizep);
5661 if (tem != 0)
5662 return tem;
5663
5664 delete_insns_since (last);
5665 }
5666
5667 /* For integer comparisons, try the reverse comparison. However, for
5668 small X and if we'd have anyway to extend, implementing "X != 0"
5669 as "-(int)X >> 31" is still cheaper than inverting "(int)X == 0". */
5670 rcode = reverse_condition (code);
5671 if (can_compare_p (rcode, mode, ccp_store_flag)
5672 && ! (optab_handler (cstore_optab, mode) == CODE_FOR_nothing
5673 && code == NE
5674 && GET_MODE_SIZE (mode) < UNITS_PER_WORD
5675 && op1 == const0_rtx))
5676 {
5677 int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5678 || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5679
5680 /* Again, for the reverse comparison, use either an addition or a XOR. */
5681 if (want_add
5682 && rtx_cost (GEN_INT (normalizep), PLUS, 1,
5683 optimize_insn_for_speed_p ()) == 0)
5684 {
5685 tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5686 STORE_FLAG_VALUE, target_mode);
5687 if (tem != 0)
5688 tem = expand_binop (target_mode, add_optab, tem,
5689 GEN_INT (normalizep), target, 0, OPTAB_WIDEN);
5690 }
5691 else if (!want_add
5692 && rtx_cost (trueval, XOR, 1,
5693 optimize_insn_for_speed_p ()) == 0)
5694 {
5695 tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5696 normalizep, target_mode);
5697 if (tem != 0)
5698 tem = expand_binop (target_mode, xor_optab, tem, trueval, target,
5699 INTVAL (trueval) >= 0, OPTAB_WIDEN);
5700 }
5701
5702 if (tem != 0)
5703 return tem;
5704 delete_insns_since (last);
5705 }
5706
5707 /* Some other cases we can do are EQ, NE, LE, and GT comparisons with
5708 the constant zero. Reject all other comparisons at this point. Only
5709 do LE and GT if branches are expensive since they are expensive on
5710 2-operand machines. */
5711
5712 if (op1 != const0_rtx
5713 || (code != EQ && code != NE
5714 && (BRANCH_COST (optimize_insn_for_speed_p (),
5715 false) <= 1 || (code != LE && code != GT))))
5716 return 0;
5717
5718 /* Try to put the result of the comparison in the sign bit. Assume we can't
5719 do the necessary operation below. */
5720
5721 tem = 0;
5722
5723 /* To see if A <= 0, compute (A | (A - 1)). A <= 0 iff that result has
5724 the sign bit set. */
5725
5726 if (code == LE)
5727 {
5728 /* This is destructive, so SUBTARGET can't be OP0. */
5729 if (rtx_equal_p (subtarget, op0))
5730 subtarget = 0;
5731
5732 tem = expand_binop (mode, sub_optab, op0, const1_rtx, subtarget, 0,
5733 OPTAB_WIDEN);
5734 if (tem)
5735 tem = expand_binop (mode, ior_optab, op0, tem, subtarget, 0,
5736 OPTAB_WIDEN);
5737 }
5738
5739 /* To see if A > 0, compute (((signed) A) << BITS) - A, where BITS is the
5740 number of bits in the mode of OP0, minus one. */
5741
5742 if (code == GT)
5743 {
5744 if (rtx_equal_p (subtarget, op0))
5745 subtarget = 0;
5746
5747 tem = expand_shift (RSHIFT_EXPR, mode, op0,
5748 GET_MODE_BITSIZE (mode) - 1,
5749 subtarget, 0);
5750 tem = expand_binop (mode, sub_optab, tem, op0, subtarget, 0,
5751 OPTAB_WIDEN);
5752 }
5753
5754 if (code == EQ || code == NE)
5755 {
5756 /* For EQ or NE, one way to do the comparison is to apply an operation
5757 that converts the operand into a positive number if it is nonzero
5758 or zero if it was originally zero. Then, for EQ, we subtract 1 and
5759 for NE we negate. This puts the result in the sign bit. Then we
5760 normalize with a shift, if needed.
5761
5762 Two operations that can do the above actions are ABS and FFS, so try
5763 them. If that doesn't work, and MODE is smaller than a full word,
5764 we can use zero-extension to the wider mode (an unsigned conversion)
5765 as the operation. */
5766
5767 /* Note that ABS doesn't yield a positive number for INT_MIN, but
5768 that is compensated by the subsequent overflow when subtracting
5769 one / negating. */
5770
5771 if (optab_handler (abs_optab, mode) != CODE_FOR_nothing)
5772 tem = expand_unop (mode, abs_optab, op0, subtarget, 1);
5773 else if (optab_handler (ffs_optab, mode) != CODE_FOR_nothing)
5774 tem = expand_unop (mode, ffs_optab, op0, subtarget, 1);
5775 else if (GET_MODE_SIZE (mode) < UNITS_PER_WORD)
5776 {
5777 tem = convert_modes (word_mode, mode, op0, 1);
5778 mode = word_mode;
5779 }
5780
5781 if (tem != 0)
5782 {
5783 if (code == EQ)
5784 tem = expand_binop (mode, sub_optab, tem, const1_rtx, subtarget,
5785 0, OPTAB_WIDEN);
5786 else
5787 tem = expand_unop (mode, neg_optab, tem, subtarget, 0);
5788 }
5789
5790 /* If we couldn't do it that way, for NE we can "or" the two's complement
5791 of the value with itself. For EQ, we take the one's complement of
5792 that "or", which is an extra insn, so we only handle EQ if branches
5793 are expensive. */
5794
5795 if (tem == 0
5796 && (code == NE
5797 || BRANCH_COST (optimize_insn_for_speed_p (),
5798 false) > 1))
5799 {
5800 if (rtx_equal_p (subtarget, op0))
5801 subtarget = 0;
5802
5803 tem = expand_unop (mode, neg_optab, op0, subtarget, 0);
5804 tem = expand_binop (mode, ior_optab, tem, op0, subtarget, 0,
5805 OPTAB_WIDEN);
5806
5807 if (tem && code == EQ)
5808 tem = expand_unop (mode, one_cmpl_optab, tem, subtarget, 0);
5809 }
5810 }
5811
5812 if (tem && normalizep)
5813 tem = expand_shift (RSHIFT_EXPR, mode, tem,
5814 GET_MODE_BITSIZE (mode) - 1,
5815 subtarget, normalizep == 1);
5816
5817 if (tem)
5818 {
5819 if (!target)
5820 ;
5821 else if (GET_MODE (tem) != target_mode)
5822 {
5823 convert_move (target, tem, 0);
5824 tem = target;
5825 }
5826 else if (!subtarget)
5827 {
5828 emit_move_insn (target, tem);
5829 tem = target;
5830 }
5831 }
5832 else
5833 delete_insns_since (last);
5834
5835 return tem;
5836 }
5837
5838 /* Like emit_store_flag, but always succeeds. */
5839
5840 rtx
5841 emit_store_flag_force (rtx target, enum rtx_code code, rtx op0, rtx op1,
5842 enum machine_mode mode, int unsignedp, int normalizep)
5843 {
5844 rtx tem, label;
5845 rtx trueval, falseval;
5846
5847 /* First see if emit_store_flag can do the job. */
5848 tem = emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep);
5849 if (tem != 0)
5850 return tem;
5851
5852 if (!target)
5853 target = gen_reg_rtx (word_mode);
5854
5855 /* If this failed, we have to do this with set/compare/jump/set code.
5856 For foo != 0, if foo is in OP0, just replace it with 1 if nonzero. */
5857 trueval = normalizep ? GEN_INT (normalizep) : const1_rtx;
5858 if (code == NE
5859 && GET_MODE_CLASS (mode) == MODE_INT
5860 && REG_P (target)
5861 && op0 == target
5862 && op1 == const0_rtx)
5863 {
5864 label = gen_label_rtx ();
5865 do_compare_rtx_and_jump (target, const0_rtx, EQ, unsignedp,
5866 mode, NULL_RTX, NULL_RTX, label, -1);
5867 emit_move_insn (target, trueval);
5868 emit_label (label);
5869 return target;
5870 }
5871
5872 if (!REG_P (target)
5873 || reg_mentioned_p (target, op0) || reg_mentioned_p (target, op1))
5874 target = gen_reg_rtx (GET_MODE (target));
5875
5876 /* Jump in the right direction if the target cannot implement CODE
5877 but can jump on its reverse condition. */
5878 falseval = const0_rtx;
5879 if (! can_compare_p (code, mode, ccp_jump)
5880 && (! FLOAT_MODE_P (mode)
5881 || code == ORDERED || code == UNORDERED
5882 || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
5883 || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
5884 {
5885 enum rtx_code rcode;
5886 if (FLOAT_MODE_P (mode))
5887 rcode = reverse_condition_maybe_unordered (code);
5888 else
5889 rcode = reverse_condition (code);
5890
5891 /* Canonicalize to UNORDERED for the libcall. */
5892 if (can_compare_p (rcode, mode, ccp_jump)
5893 || (code == ORDERED && ! can_compare_p (ORDERED, mode, ccp_jump)))
5894 {
5895 falseval = trueval;
5896 trueval = const0_rtx;
5897 code = rcode;
5898 }
5899 }
5900
5901 emit_move_insn (target, trueval);
5902 label = gen_label_rtx ();
5903 do_compare_rtx_and_jump (op0, op1, code, unsignedp, mode, NULL_RTX,
5904 NULL_RTX, label, -1);
5905
5906 emit_move_insn (target, falseval);
5907 emit_label (label);
5908
5909 return target;
5910 }
5911 \f
5912 /* Perform possibly multi-word comparison and conditional jump to LABEL
5913 if ARG1 OP ARG2 true where ARG1 and ARG2 are of mode MODE. This is
5914 now a thin wrapper around do_compare_rtx_and_jump. */
5915
5916 static void
5917 do_cmp_and_jump (rtx arg1, rtx arg2, enum rtx_code op, enum machine_mode mode,
5918 rtx label)
5919 {
5920 int unsignedp = (op == LTU || op == LEU || op == GTU || op == GEU);
5921 do_compare_rtx_and_jump (arg1, arg2, op, unsignedp, mode,
5922 NULL_RTX, NULL_RTX, label, -1);
5923 }