9743fc05ee788246b61892fd72b8a1dd1382e933
[gcc.git] / gcc / expmed.c
1 /* Medium-level subroutines: convert bit-field store and extract
2 and shifts, multiplies and divides to rtl instructions.
3 Copyright (C) 1987, 1988, 1989, 1992, 1993, 1994, 1995, 1996, 1997, 1998,
4 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010,
5 2011, 2012
6 Free Software Foundation, Inc.
7
8 This file is part of GCC.
9
10 GCC is free software; you can redistribute it and/or modify it under
11 the terms of the GNU General Public License as published by the Free
12 Software Foundation; either version 3, or (at your option) any later
13 version.
14
15 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
16 WARRANTY; without even the implied warranty of MERCHANTABILITY or
17 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 for more details.
19
20 You should have received a copy of the GNU General Public License
21 along with GCC; see the file COPYING3. If not see
22 <http://www.gnu.org/licenses/>. */
23
24
25 #include "config.h"
26 #include "system.h"
27 #include "coretypes.h"
28 #include "tm.h"
29 #include "diagnostic-core.h"
30 #include "rtl.h"
31 #include "tree.h"
32 #include "tm_p.h"
33 #include "flags.h"
34 #include "insn-config.h"
35 #include "expr.h"
36 #include "optabs.h"
37 #include "recog.h"
38 #include "langhooks.h"
39 #include "df.h"
40 #include "target.h"
41 #include "expmed.h"
42
43 struct target_expmed default_target_expmed;
44 #if SWITCHABLE_TARGET
45 struct target_expmed *this_target_expmed = &default_target_expmed;
46 #endif
47
48 static void store_fixed_bit_field (rtx, unsigned HOST_WIDE_INT,
49 unsigned HOST_WIDE_INT,
50 unsigned HOST_WIDE_INT,
51 unsigned HOST_WIDE_INT,
52 unsigned HOST_WIDE_INT,
53 rtx);
54 static void store_split_bit_field (rtx, unsigned HOST_WIDE_INT,
55 unsigned HOST_WIDE_INT,
56 unsigned HOST_WIDE_INT,
57 unsigned HOST_WIDE_INT,
58 rtx);
59 static rtx extract_fixed_bit_field (enum machine_mode, rtx,
60 unsigned HOST_WIDE_INT,
61 unsigned HOST_WIDE_INT,
62 unsigned HOST_WIDE_INT, rtx, int, bool);
63 static rtx mask_rtx (enum machine_mode, int, int, int);
64 static rtx lshift_value (enum machine_mode, rtx, int, int);
65 static rtx extract_split_bit_field (rtx, unsigned HOST_WIDE_INT,
66 unsigned HOST_WIDE_INT, int);
67 static void do_cmp_and_jump (rtx, rtx, enum rtx_code, enum machine_mode, rtx);
68 static rtx expand_smod_pow2 (enum machine_mode, rtx, HOST_WIDE_INT);
69 static rtx expand_sdiv_pow2 (enum machine_mode, rtx, HOST_WIDE_INT);
70
71 /* Test whether a value is zero of a power of two. */
72 #define EXACT_POWER_OF_2_OR_ZERO_P(x) (((x) & ((x) - 1)) == 0)
73
74 #ifndef SLOW_UNALIGNED_ACCESS
75 #define SLOW_UNALIGNED_ACCESS(MODE, ALIGN) STRICT_ALIGNMENT
76 #endif
77
78
79 /* Reduce conditional compilation elsewhere. */
80 #ifndef HAVE_insv
81 #define HAVE_insv 0
82 #define CODE_FOR_insv CODE_FOR_nothing
83 #define gen_insv(a,b,c,d) NULL_RTX
84 #endif
85 #ifndef HAVE_extv
86 #define HAVE_extv 0
87 #define CODE_FOR_extv CODE_FOR_nothing
88 #define gen_extv(a,b,c,d) NULL_RTX
89 #endif
90 #ifndef HAVE_extzv
91 #define HAVE_extzv 0
92 #define CODE_FOR_extzv CODE_FOR_nothing
93 #define gen_extzv(a,b,c,d) NULL_RTX
94 #endif
95
96 struct init_expmed_rtl
97 {
98 struct rtx_def reg; rtunion reg_fld[2];
99 struct rtx_def plus; rtunion plus_fld1;
100 struct rtx_def neg;
101 struct rtx_def mult; rtunion mult_fld1;
102 struct rtx_def sdiv; rtunion sdiv_fld1;
103 struct rtx_def udiv; rtunion udiv_fld1;
104 struct rtx_def zext;
105 struct rtx_def sdiv_32; rtunion sdiv_32_fld1;
106 struct rtx_def smod_32; rtunion smod_32_fld1;
107 struct rtx_def wide_mult; rtunion wide_mult_fld1;
108 struct rtx_def wide_lshr; rtunion wide_lshr_fld1;
109 struct rtx_def wide_trunc;
110 struct rtx_def shift; rtunion shift_fld1;
111 struct rtx_def shift_mult; rtunion shift_mult_fld1;
112 struct rtx_def shift_add; rtunion shift_add_fld1;
113 struct rtx_def shift_sub0; rtunion shift_sub0_fld1;
114 struct rtx_def shift_sub1; rtunion shift_sub1_fld1;
115 struct rtx_def convert;
116
117 rtx pow2[MAX_BITS_PER_WORD];
118 rtx cint[MAX_BITS_PER_WORD];
119 };
120
121 static void
122 init_expmed_one_mode (struct init_expmed_rtl *all,
123 enum machine_mode mode, int speed)
124 {
125 int m, n, mode_bitsize;
126 enum machine_mode mode_from;
127
128 mode_bitsize = GET_MODE_UNIT_BITSIZE (mode);
129
130 PUT_MODE (&all->reg, mode);
131 PUT_MODE (&all->plus, mode);
132 PUT_MODE (&all->neg, mode);
133 PUT_MODE (&all->mult, mode);
134 PUT_MODE (&all->sdiv, mode);
135 PUT_MODE (&all->udiv, mode);
136 PUT_MODE (&all->sdiv_32, mode);
137 PUT_MODE (&all->smod_32, mode);
138 PUT_MODE (&all->wide_trunc, mode);
139 PUT_MODE (&all->shift, mode);
140 PUT_MODE (&all->shift_mult, mode);
141 PUT_MODE (&all->shift_add, mode);
142 PUT_MODE (&all->shift_sub0, mode);
143 PUT_MODE (&all->shift_sub1, mode);
144 PUT_MODE (&all->convert, mode);
145
146 set_add_cost (speed, mode, set_src_cost (&all->plus, speed));
147 set_neg_cost (speed, mode, set_src_cost (&all->neg, speed));
148 set_mul_cost (speed, mode, set_src_cost (&all->mult, speed));
149 set_sdiv_cost (speed, mode, set_src_cost (&all->sdiv, speed));
150 set_udiv_cost (speed, mode, set_src_cost (&all->udiv, speed));
151
152 set_sdiv_pow2_cheap (speed, mode, (set_src_cost (&all->sdiv_32, speed)
153 <= 2 * add_cost (speed, mode)));
154 set_smod_pow2_cheap (speed, mode, (set_src_cost (&all->smod_32, speed)
155 <= 4 * add_cost (speed, mode)));
156
157 set_shift_cost (speed, mode, 0, 0);
158 {
159 int cost = add_cost (speed, mode);
160 set_shiftadd_cost (speed, mode, 0, cost);
161 set_shiftsub0_cost (speed, mode, 0, cost);
162 set_shiftsub1_cost (speed, mode, 0, cost);
163 }
164
165 n = MIN (MAX_BITS_PER_WORD, mode_bitsize);
166 for (m = 1; m < n; m++)
167 {
168 XEXP (&all->shift, 1) = all->cint[m];
169 XEXP (&all->shift_mult, 1) = all->pow2[m];
170
171 set_shift_cost (speed, mode, m, set_src_cost (&all->shift, speed));
172 set_shiftadd_cost (speed, mode, m, set_src_cost (&all->shift_add, speed));
173 set_shiftsub0_cost (speed, mode, m, set_src_cost (&all->shift_sub0, speed));
174 set_shiftsub1_cost (speed, mode, m, set_src_cost (&all->shift_sub1, speed));
175 }
176
177 if (SCALAR_INT_MODE_P (mode))
178 {
179 enum machine_mode wider_mode = GET_MODE_WIDER_MODE (mode);
180
181 if (wider_mode != VOIDmode)
182 {
183 PUT_MODE (&all->zext, wider_mode);
184 PUT_MODE (&all->wide_mult, wider_mode);
185 PUT_MODE (&all->wide_lshr, wider_mode);
186 XEXP (&all->wide_lshr, 1) = GEN_INT (mode_bitsize);
187
188 set_mul_widen_cost (speed, wider_mode, set_src_cost (&all->wide_mult, speed));
189 set_mul_highpart_cost (speed, mode, set_src_cost (&all->wide_trunc, speed));
190 }
191
192 for (mode_from = GET_CLASS_NARROWEST_MODE (MODE_INT);
193 mode_from != VOIDmode;
194 mode_from = GET_MODE_WIDER_MODE (mode_from))
195 if (mode != mode_from)
196 {
197 unsigned short size_to = GET_MODE_SIZE (mode);
198 unsigned short size_from = GET_MODE_SIZE (mode_from);
199 if (size_to < size_from)
200 {
201 PUT_CODE (&all->convert, TRUNCATE);
202 PUT_MODE (&all->reg, mode_from);
203 set_convert_cost (mode, mode_from, speed,
204 set_src_cost (&all->convert, speed));
205 }
206 else if (size_from < size_to)
207 {
208 /* Assume cost of zero-extend and sign-extend is the same. */
209 PUT_CODE (&all->convert, ZERO_EXTEND);
210 PUT_MODE (&all->reg, mode_from);
211 set_convert_cost (mode, mode_from, speed,
212 set_src_cost (&all->convert, speed));
213 }
214 }
215 }
216 }
217
218 void
219 init_expmed (void)
220 {
221 struct init_expmed_rtl all;
222 enum machine_mode mode;
223 int m, speed;
224
225 memset (&all, 0, sizeof all);
226 for (m = 1; m < MAX_BITS_PER_WORD; m++)
227 {
228 all.pow2[m] = GEN_INT ((HOST_WIDE_INT) 1 << m);
229 all.cint[m] = GEN_INT (m);
230 }
231
232 PUT_CODE (&all.reg, REG);
233 /* Avoid using hard regs in ways which may be unsupported. */
234 SET_REGNO (&all.reg, LAST_VIRTUAL_REGISTER + 1);
235
236 PUT_CODE (&all.plus, PLUS);
237 XEXP (&all.plus, 0) = &all.reg;
238 XEXP (&all.plus, 1) = &all.reg;
239
240 PUT_CODE (&all.neg, NEG);
241 XEXP (&all.neg, 0) = &all.reg;
242
243 PUT_CODE (&all.mult, MULT);
244 XEXP (&all.mult, 0) = &all.reg;
245 XEXP (&all.mult, 1) = &all.reg;
246
247 PUT_CODE (&all.sdiv, DIV);
248 XEXP (&all.sdiv, 0) = &all.reg;
249 XEXP (&all.sdiv, 1) = &all.reg;
250
251 PUT_CODE (&all.udiv, UDIV);
252 XEXP (&all.udiv, 0) = &all.reg;
253 XEXP (&all.udiv, 1) = &all.reg;
254
255 PUT_CODE (&all.sdiv_32, DIV);
256 XEXP (&all.sdiv_32, 0) = &all.reg;
257 XEXP (&all.sdiv_32, 1) = 32 < MAX_BITS_PER_WORD ? all.cint[32] : GEN_INT (32);
258
259 PUT_CODE (&all.smod_32, MOD);
260 XEXP (&all.smod_32, 0) = &all.reg;
261 XEXP (&all.smod_32, 1) = XEXP (&all.sdiv_32, 1);
262
263 PUT_CODE (&all.zext, ZERO_EXTEND);
264 XEXP (&all.zext, 0) = &all.reg;
265
266 PUT_CODE (&all.wide_mult, MULT);
267 XEXP (&all.wide_mult, 0) = &all.zext;
268 XEXP (&all.wide_mult, 1) = &all.zext;
269
270 PUT_CODE (&all.wide_lshr, LSHIFTRT);
271 XEXP (&all.wide_lshr, 0) = &all.wide_mult;
272
273 PUT_CODE (&all.wide_trunc, TRUNCATE);
274 XEXP (&all.wide_trunc, 0) = &all.wide_lshr;
275
276 PUT_CODE (&all.shift, ASHIFT);
277 XEXP (&all.shift, 0) = &all.reg;
278
279 PUT_CODE (&all.shift_mult, MULT);
280 XEXP (&all.shift_mult, 0) = &all.reg;
281
282 PUT_CODE (&all.shift_add, PLUS);
283 XEXP (&all.shift_add, 0) = &all.shift_mult;
284 XEXP (&all.shift_add, 1) = &all.reg;
285
286 PUT_CODE (&all.shift_sub0, MINUS);
287 XEXP (&all.shift_sub0, 0) = &all.shift_mult;
288 XEXP (&all.shift_sub0, 1) = &all.reg;
289
290 PUT_CODE (&all.shift_sub1, MINUS);
291 XEXP (&all.shift_sub1, 0) = &all.reg;
292 XEXP (&all.shift_sub1, 1) = &all.shift_mult;
293
294 PUT_CODE (&all.convert, TRUNCATE);
295 XEXP (&all.convert, 0) = &all.reg;
296
297 for (speed = 0; speed < 2; speed++)
298 {
299 crtl->maybe_hot_insn_p = speed;
300 set_zero_cost (speed, set_src_cost (const0_rtx, speed));
301
302 for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT);
303 mode != VOIDmode;
304 mode = GET_MODE_WIDER_MODE (mode))
305 init_expmed_one_mode (&all, mode, speed);
306
307 for (mode = GET_CLASS_NARROWEST_MODE (MODE_VECTOR_INT);
308 mode != VOIDmode;
309 mode = GET_MODE_WIDER_MODE (mode))
310 init_expmed_one_mode (&all, mode, speed);
311 }
312
313 if (alg_hash_used_p ())
314 {
315 struct alg_hash_entry *p = alg_hash_entry_ptr (0);
316 memset (p, 0, sizeof (*p) * NUM_ALG_HASH_ENTRIES);
317 }
318 else
319 set_alg_hash_used_p (true);
320 default_rtl_profile ();
321 }
322
323 /* Return an rtx representing minus the value of X.
324 MODE is the intended mode of the result,
325 useful if X is a CONST_INT. */
326
327 rtx
328 negate_rtx (enum machine_mode mode, rtx x)
329 {
330 rtx result = simplify_unary_operation (NEG, mode, x, mode);
331
332 if (result == 0)
333 result = expand_unop (mode, neg_optab, x, NULL_RTX, 0);
334
335 return result;
336 }
337
338 /* Report on the availability of insv/extv/extzv and the desired mode
339 of each of their operands. Returns MAX_MACHINE_MODE if HAVE_foo
340 is false; else the mode of the specified operand. If OPNO is -1,
341 all the caller cares about is whether the insn is available. */
342 enum machine_mode
343 mode_for_extraction (enum extraction_pattern pattern, int opno)
344 {
345 const struct insn_data_d *data;
346
347 switch (pattern)
348 {
349 case EP_insv:
350 if (HAVE_insv)
351 {
352 data = &insn_data[CODE_FOR_insv];
353 break;
354 }
355 return MAX_MACHINE_MODE;
356
357 case EP_extv:
358 if (HAVE_extv)
359 {
360 data = &insn_data[CODE_FOR_extv];
361 break;
362 }
363 return MAX_MACHINE_MODE;
364
365 case EP_extzv:
366 if (HAVE_extzv)
367 {
368 data = &insn_data[CODE_FOR_extzv];
369 break;
370 }
371 return MAX_MACHINE_MODE;
372
373 default:
374 gcc_unreachable ();
375 }
376
377 if (opno == -1)
378 return VOIDmode;
379
380 /* Everyone who uses this function used to follow it with
381 if (result == VOIDmode) result = word_mode; */
382 if (data->operand[opno].mode == VOIDmode)
383 return word_mode;
384 return data->operand[opno].mode;
385 }
386 \f
387 /* A subroutine of store_bit_field, with the same arguments. Return true
388 if the operation could be implemented.
389
390 If FALLBACK_P is true, fall back to store_fixed_bit_field if we have
391 no other way of implementing the operation. If FALLBACK_P is false,
392 return false instead. */
393
394 static bool
395 store_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
396 unsigned HOST_WIDE_INT bitnum,
397 unsigned HOST_WIDE_INT bitregion_start,
398 unsigned HOST_WIDE_INT bitregion_end,
399 enum machine_mode fieldmode,
400 rtx value, bool fallback_p)
401 {
402 unsigned int unit
403 = (MEM_P (str_rtx)) ? BITS_PER_UNIT : BITS_PER_WORD;
404 unsigned HOST_WIDE_INT offset, bitpos;
405 rtx op0 = str_rtx;
406 int byte_offset;
407 rtx orig_value;
408
409 enum machine_mode op_mode = mode_for_extraction (EP_insv, 3);
410
411 while (GET_CODE (op0) == SUBREG)
412 {
413 /* The following line once was done only if WORDS_BIG_ENDIAN,
414 but I think that is a mistake. WORDS_BIG_ENDIAN is
415 meaningful at a much higher level; when structures are copied
416 between memory and regs, the higher-numbered regs
417 always get higher addresses. */
418 int inner_mode_size = GET_MODE_SIZE (GET_MODE (SUBREG_REG (op0)));
419 int outer_mode_size = GET_MODE_SIZE (GET_MODE (op0));
420
421 byte_offset = 0;
422
423 /* Paradoxical subregs need special handling on big endian machines. */
424 if (SUBREG_BYTE (op0) == 0 && inner_mode_size < outer_mode_size)
425 {
426 int difference = inner_mode_size - outer_mode_size;
427
428 if (WORDS_BIG_ENDIAN)
429 byte_offset += (difference / UNITS_PER_WORD) * UNITS_PER_WORD;
430 if (BYTES_BIG_ENDIAN)
431 byte_offset += difference % UNITS_PER_WORD;
432 }
433 else
434 byte_offset = SUBREG_BYTE (op0);
435
436 bitnum += byte_offset * BITS_PER_UNIT;
437 op0 = SUBREG_REG (op0);
438 }
439
440 /* No action is needed if the target is a register and if the field
441 lies completely outside that register. This can occur if the source
442 code contains an out-of-bounds access to a small array. */
443 if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
444 return true;
445
446 /* Use vec_set patterns for inserting parts of vectors whenever
447 available. */
448 if (VECTOR_MODE_P (GET_MODE (op0))
449 && !MEM_P (op0)
450 && optab_handler (vec_set_optab, GET_MODE (op0)) != CODE_FOR_nothing
451 && fieldmode == GET_MODE_INNER (GET_MODE (op0))
452 && bitsize == GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
453 && !(bitnum % GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
454 {
455 struct expand_operand ops[3];
456 enum machine_mode outermode = GET_MODE (op0);
457 enum machine_mode innermode = GET_MODE_INNER (outermode);
458 enum insn_code icode = optab_handler (vec_set_optab, outermode);
459 int pos = bitnum / GET_MODE_BITSIZE (innermode);
460
461 create_fixed_operand (&ops[0], op0);
462 create_input_operand (&ops[1], value, innermode);
463 create_integer_operand (&ops[2], pos);
464 if (maybe_expand_insn (icode, 3, ops))
465 return true;
466 }
467
468 /* If the target is a register, overwriting the entire object, or storing
469 a full-word or multi-word field can be done with just a SUBREG.
470
471 If the target is memory, storing any naturally aligned field can be
472 done with a simple store. For targets that support fast unaligned
473 memory, any naturally sized, unit aligned field can be done directly. */
474
475 offset = bitnum / unit;
476 bitpos = bitnum % unit;
477 byte_offset = (bitnum % BITS_PER_WORD) / BITS_PER_UNIT
478 + (offset * UNITS_PER_WORD);
479
480 if (bitpos == 0
481 && bitsize == GET_MODE_BITSIZE (fieldmode)
482 && (!MEM_P (op0)
483 ? ((GET_MODE_SIZE (fieldmode) >= UNITS_PER_WORD
484 || GET_MODE_SIZE (GET_MODE (op0)) == GET_MODE_SIZE (fieldmode))
485 && ((GET_MODE (op0) == fieldmode && byte_offset == 0)
486 || validate_subreg (fieldmode, GET_MODE (op0), op0,
487 byte_offset)))
488 : (! SLOW_UNALIGNED_ACCESS (fieldmode, MEM_ALIGN (op0))
489 || (offset * BITS_PER_UNIT % bitsize == 0
490 && MEM_ALIGN (op0) % GET_MODE_BITSIZE (fieldmode) == 0))))
491 {
492 if (MEM_P (op0))
493 op0 = adjust_address (op0, fieldmode, offset);
494 else if (GET_MODE (op0) != fieldmode)
495 op0 = simplify_gen_subreg (fieldmode, op0, GET_MODE (op0),
496 byte_offset);
497 emit_move_insn (op0, value);
498 return true;
499 }
500
501 /* Make sure we are playing with integral modes. Pun with subregs
502 if we aren't. This must come after the entire register case above,
503 since that case is valid for any mode. The following cases are only
504 valid for integral modes. */
505 {
506 enum machine_mode imode = int_mode_for_mode (GET_MODE (op0));
507 if (imode != GET_MODE (op0))
508 {
509 if (MEM_P (op0))
510 op0 = adjust_address (op0, imode, 0);
511 else
512 {
513 gcc_assert (imode != BLKmode);
514 op0 = gen_lowpart (imode, op0);
515 }
516 }
517 }
518
519 /* We may be accessing data outside the field, which means
520 we can alias adjacent data. */
521 /* ?? not always for C++0x memory model ?? */
522 if (MEM_P (op0))
523 {
524 op0 = shallow_copy_rtx (op0);
525 set_mem_alias_set (op0, 0);
526 set_mem_expr (op0, 0);
527 }
528
529 /* If OP0 is a register, BITPOS must count within a word.
530 But as we have it, it counts within whatever size OP0 now has.
531 On a bigendian machine, these are not the same, so convert. */
532 if (BYTES_BIG_ENDIAN
533 && !MEM_P (op0)
534 && unit > GET_MODE_BITSIZE (GET_MODE (op0)))
535 bitpos += unit - GET_MODE_BITSIZE (GET_MODE (op0));
536
537 /* Storing an lsb-aligned field in a register
538 can be done with a movestrict instruction. */
539
540 if (!MEM_P (op0)
541 && (BYTES_BIG_ENDIAN ? bitpos + bitsize == unit : bitpos == 0)
542 && bitsize == GET_MODE_BITSIZE (fieldmode)
543 && optab_handler (movstrict_optab, fieldmode) != CODE_FOR_nothing)
544 {
545 struct expand_operand ops[2];
546 enum insn_code icode = optab_handler (movstrict_optab, fieldmode);
547 rtx arg0 = op0;
548 unsigned HOST_WIDE_INT subreg_off;
549
550 if (GET_CODE (arg0) == SUBREG)
551 {
552 /* Else we've got some float mode source being extracted into
553 a different float mode destination -- this combination of
554 subregs results in Severe Tire Damage. */
555 gcc_assert (GET_MODE (SUBREG_REG (arg0)) == fieldmode
556 || GET_MODE_CLASS (fieldmode) == MODE_INT
557 || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT);
558 arg0 = SUBREG_REG (arg0);
559 }
560
561 subreg_off = (bitnum % BITS_PER_WORD) / BITS_PER_UNIT
562 + (offset * UNITS_PER_WORD);
563 if (validate_subreg (fieldmode, GET_MODE (arg0), arg0, subreg_off))
564 {
565 arg0 = gen_rtx_SUBREG (fieldmode, arg0, subreg_off);
566
567 create_fixed_operand (&ops[0], arg0);
568 /* Shrink the source operand to FIELDMODE. */
569 create_convert_operand_to (&ops[1], value, fieldmode, false);
570 if (maybe_expand_insn (icode, 2, ops))
571 return true;
572 }
573 }
574
575 /* Handle fields bigger than a word. */
576
577 if (bitsize > BITS_PER_WORD)
578 {
579 /* Here we transfer the words of the field
580 in the order least significant first.
581 This is because the most significant word is the one which may
582 be less than full.
583 However, only do that if the value is not BLKmode. */
584
585 unsigned int backwards = WORDS_BIG_ENDIAN && fieldmode != BLKmode;
586 unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
587 unsigned int i;
588 rtx last;
589
590 /* This is the mode we must force value to, so that there will be enough
591 subwords to extract. Note that fieldmode will often (always?) be
592 VOIDmode, because that is what store_field uses to indicate that this
593 is a bit field, but passing VOIDmode to operand_subword_force
594 is not allowed. */
595 fieldmode = GET_MODE (value);
596 if (fieldmode == VOIDmode)
597 fieldmode = smallest_mode_for_size (nwords * BITS_PER_WORD, MODE_INT);
598
599 last = get_last_insn ();
600 for (i = 0; i < nwords; i++)
601 {
602 /* If I is 0, use the low-order word in both field and target;
603 if I is 1, use the next to lowest word; and so on. */
604 unsigned int wordnum = (backwards
605 ? GET_MODE_SIZE (fieldmode) / UNITS_PER_WORD
606 - i - 1
607 : i);
608 unsigned int bit_offset = (backwards
609 ? MAX ((int) bitsize - ((int) i + 1)
610 * BITS_PER_WORD,
611 0)
612 : (int) i * BITS_PER_WORD);
613 rtx value_word = operand_subword_force (value, wordnum, fieldmode);
614 unsigned HOST_WIDE_INT new_bitsize =
615 MIN (BITS_PER_WORD, bitsize - i * BITS_PER_WORD);
616
617 /* If the remaining chunk doesn't have full wordsize we have
618 to make sure that for big endian machines the higher order
619 bits are used. */
620 if (new_bitsize < BITS_PER_WORD && BYTES_BIG_ENDIAN && !backwards)
621 value_word = simplify_expand_binop (word_mode, lshr_optab,
622 value_word,
623 GEN_INT (BITS_PER_WORD
624 - new_bitsize),
625 NULL_RTX, true,
626 OPTAB_LIB_WIDEN);
627
628 if (!store_bit_field_1 (op0, new_bitsize,
629 bitnum + bit_offset,
630 bitregion_start, bitregion_end,
631 word_mode,
632 value_word, fallback_p))
633 {
634 delete_insns_since (last);
635 return false;
636 }
637 }
638 return true;
639 }
640
641 /* From here on we can assume that the field to be stored in is
642 a full-word (whatever type that is), since it is shorter than a word. */
643
644 /* OFFSET is the number of words or bytes (UNIT says which)
645 from STR_RTX to the first word or byte containing part of the field. */
646
647 if (!MEM_P (op0))
648 {
649 if (offset != 0
650 || GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
651 {
652 if (!REG_P (op0))
653 {
654 /* Since this is a destination (lvalue), we can't copy
655 it to a pseudo. We can remove a SUBREG that does not
656 change the size of the operand. Such a SUBREG may
657 have been added above. */
658 gcc_assert (GET_CODE (op0) == SUBREG
659 && (GET_MODE_SIZE (GET_MODE (op0))
660 == GET_MODE_SIZE (GET_MODE (SUBREG_REG (op0)))));
661 op0 = SUBREG_REG (op0);
662 }
663 op0 = gen_rtx_SUBREG (mode_for_size (BITS_PER_WORD, MODE_INT, 0),
664 op0, (offset * UNITS_PER_WORD));
665 }
666 offset = 0;
667 }
668
669 /* If VALUE has a floating-point or complex mode, access it as an
670 integer of the corresponding size. This can occur on a machine
671 with 64 bit registers that uses SFmode for float. It can also
672 occur for unaligned float or complex fields. */
673 orig_value = value;
674 if (GET_MODE (value) != VOIDmode
675 && GET_MODE_CLASS (GET_MODE (value)) != MODE_INT
676 && GET_MODE_CLASS (GET_MODE (value)) != MODE_PARTIAL_INT)
677 {
678 value = gen_reg_rtx (int_mode_for_mode (GET_MODE (value)));
679 emit_move_insn (gen_lowpart (GET_MODE (orig_value), value), orig_value);
680 }
681
682 /* Now OFFSET is nonzero only if OP0 is memory
683 and is therefore always measured in bytes. */
684
685 if (HAVE_insv
686 && GET_MODE (value) != BLKmode
687 && bitsize > 0
688 && GET_MODE_BITSIZE (op_mode) >= bitsize
689 /* Do not use insv for volatile bitfields when
690 -fstrict-volatile-bitfields is in effect. */
691 && !(MEM_P (op0) && MEM_VOLATILE_P (op0)
692 && flag_strict_volatile_bitfields > 0)
693 && ! ((REG_P (op0) || GET_CODE (op0) == SUBREG)
694 && (bitsize + bitpos > GET_MODE_BITSIZE (op_mode)))
695 /* Do not use insv if the bit region is restricted and
696 op_mode integer at offset doesn't fit into the
697 restricted region. */
698 && !(MEM_P (op0) && bitregion_end
699 && bitnum - bitpos + GET_MODE_BITSIZE (op_mode)
700 > bitregion_end + 1))
701 {
702 struct expand_operand ops[4];
703 int xbitpos = bitpos;
704 rtx value1;
705 rtx xop0 = op0;
706 rtx last = get_last_insn ();
707 bool copy_back = false;
708
709 /* Add OFFSET into OP0's address. */
710 if (MEM_P (xop0))
711 xop0 = adjust_address (xop0, byte_mode, offset);
712
713 /* If xop0 is a register, we need it in OP_MODE
714 to make it acceptable to the format of insv. */
715 if (GET_CODE (xop0) == SUBREG)
716 /* We can't just change the mode, because this might clobber op0,
717 and we will need the original value of op0 if insv fails. */
718 xop0 = gen_rtx_SUBREG (op_mode, SUBREG_REG (xop0), SUBREG_BYTE (xop0));
719 if (REG_P (xop0) && GET_MODE (xop0) != op_mode)
720 xop0 = gen_lowpart_SUBREG (op_mode, xop0);
721
722 /* If the destination is a paradoxical subreg such that we need a
723 truncate to the inner mode, perform the insertion on a temporary and
724 truncate the result to the original destination. Note that we can't
725 just truncate the paradoxical subreg as (truncate:N (subreg:W (reg:N
726 X) 0)) is (reg:N X). */
727 if (GET_CODE (xop0) == SUBREG
728 && REG_P (SUBREG_REG (xop0))
729 && (!TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (SUBREG_REG (xop0)),
730 op_mode)))
731 {
732 rtx tem = gen_reg_rtx (op_mode);
733 emit_move_insn (tem, xop0);
734 xop0 = tem;
735 copy_back = true;
736 }
737
738 /* We have been counting XBITPOS within UNIT.
739 Count instead within the size of the register. */
740 if (BYTES_BIG_ENDIAN && !MEM_P (xop0))
741 xbitpos += GET_MODE_BITSIZE (op_mode) - unit;
742
743 unit = GET_MODE_BITSIZE (op_mode);
744
745 /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
746 "backwards" from the size of the unit we are inserting into.
747 Otherwise, we count bits from the most significant on a
748 BYTES/BITS_BIG_ENDIAN machine. */
749
750 if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
751 xbitpos = unit - bitsize - xbitpos;
752
753 /* Convert VALUE to op_mode (which insv insn wants) in VALUE1. */
754 value1 = value;
755 if (GET_MODE (value) != op_mode)
756 {
757 if (GET_MODE_BITSIZE (GET_MODE (value)) >= bitsize)
758 {
759 /* Optimization: Don't bother really extending VALUE
760 if it has all the bits we will actually use. However,
761 if we must narrow it, be sure we do it correctly. */
762
763 if (GET_MODE_SIZE (GET_MODE (value)) < GET_MODE_SIZE (op_mode))
764 {
765 rtx tmp;
766
767 tmp = simplify_subreg (op_mode, value1, GET_MODE (value), 0);
768 if (! tmp)
769 tmp = simplify_gen_subreg (op_mode,
770 force_reg (GET_MODE (value),
771 value1),
772 GET_MODE (value), 0);
773 value1 = tmp;
774 }
775 else
776 value1 = gen_lowpart (op_mode, value1);
777 }
778 else if (CONST_INT_P (value))
779 value1 = gen_int_mode (INTVAL (value), op_mode);
780 else
781 /* Parse phase is supposed to make VALUE's data type
782 match that of the component reference, which is a type
783 at least as wide as the field; so VALUE should have
784 a mode that corresponds to that type. */
785 gcc_assert (CONSTANT_P (value));
786 }
787
788 create_fixed_operand (&ops[0], xop0);
789 create_integer_operand (&ops[1], bitsize);
790 create_integer_operand (&ops[2], xbitpos);
791 create_input_operand (&ops[3], value1, op_mode);
792 if (maybe_expand_insn (CODE_FOR_insv, 4, ops))
793 {
794 if (copy_back)
795 convert_move (op0, xop0, true);
796 return true;
797 }
798 delete_insns_since (last);
799 }
800
801 /* If OP0 is a memory, try copying it to a register and seeing if a
802 cheap register alternative is available. */
803 if (HAVE_insv && MEM_P (op0))
804 {
805 enum machine_mode bestmode;
806 unsigned HOST_WIDE_INT maxbits = MAX_FIXED_MODE_SIZE;
807
808 if (bitregion_end)
809 maxbits = bitregion_end - bitregion_start + 1;
810
811 /* Get the mode to use for inserting into this field. If OP0 is
812 BLKmode, get the smallest mode consistent with the alignment. If
813 OP0 is a non-BLKmode object that is no wider than OP_MODE, use its
814 mode. Otherwise, use the smallest mode containing the field. */
815
816 if (GET_MODE (op0) == BLKmode
817 || GET_MODE_BITSIZE (GET_MODE (op0)) > maxbits
818 || (op_mode != MAX_MACHINE_MODE
819 && GET_MODE_SIZE (GET_MODE (op0)) > GET_MODE_SIZE (op_mode)))
820 bestmode = get_best_mode (bitsize, bitnum,
821 bitregion_start, bitregion_end,
822 MEM_ALIGN (op0),
823 (op_mode == MAX_MACHINE_MODE
824 ? VOIDmode : op_mode),
825 MEM_VOLATILE_P (op0));
826 else
827 bestmode = GET_MODE (op0);
828
829 if (bestmode != VOIDmode
830 && GET_MODE_SIZE (bestmode) >= GET_MODE_SIZE (fieldmode)
831 && !(SLOW_UNALIGNED_ACCESS (bestmode, MEM_ALIGN (op0))
832 && GET_MODE_BITSIZE (bestmode) > MEM_ALIGN (op0)))
833 {
834 rtx last, tempreg, xop0;
835 unsigned HOST_WIDE_INT xoffset, xbitpos;
836
837 last = get_last_insn ();
838
839 /* Adjust address to point to the containing unit of
840 that mode. Compute the offset as a multiple of this unit,
841 counting in bytes. */
842 unit = GET_MODE_BITSIZE (bestmode);
843 xoffset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
844 xbitpos = bitnum % unit;
845 xop0 = adjust_address (op0, bestmode, xoffset);
846
847 /* Fetch that unit, store the bitfield in it, then store
848 the unit. */
849 tempreg = copy_to_reg (xop0);
850 if (store_bit_field_1 (tempreg, bitsize, xbitpos,
851 bitregion_start, bitregion_end,
852 fieldmode, orig_value, false))
853 {
854 emit_move_insn (xop0, tempreg);
855 return true;
856 }
857 delete_insns_since (last);
858 }
859 }
860
861 if (!fallback_p)
862 return false;
863
864 store_fixed_bit_field (op0, offset, bitsize, bitpos,
865 bitregion_start, bitregion_end, value);
866 return true;
867 }
868
869 /* Generate code to store value from rtx VALUE
870 into a bit-field within structure STR_RTX
871 containing BITSIZE bits starting at bit BITNUM.
872
873 BITREGION_START is bitpos of the first bitfield in this region.
874 BITREGION_END is the bitpos of the ending bitfield in this region.
875 These two fields are 0, if the C++ memory model does not apply,
876 or we are not interested in keeping track of bitfield regions.
877
878 FIELDMODE is the machine-mode of the FIELD_DECL node for this field. */
879
880 void
881 store_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
882 unsigned HOST_WIDE_INT bitnum,
883 unsigned HOST_WIDE_INT bitregion_start,
884 unsigned HOST_WIDE_INT bitregion_end,
885 enum machine_mode fieldmode,
886 rtx value)
887 {
888 /* Under the C++0x memory model, we must not touch bits outside the
889 bit region. Adjust the address to start at the beginning of the
890 bit region. */
891 if (MEM_P (str_rtx) && bitregion_start > 0)
892 {
893 enum machine_mode bestmode;
894 enum machine_mode op_mode;
895 unsigned HOST_WIDE_INT offset;
896
897 op_mode = mode_for_extraction (EP_insv, 3);
898 if (op_mode == MAX_MACHINE_MODE)
899 op_mode = VOIDmode;
900
901 gcc_assert ((bitregion_start % BITS_PER_UNIT) == 0);
902
903 offset = bitregion_start / BITS_PER_UNIT;
904 bitnum -= bitregion_start;
905 bitregion_end -= bitregion_start;
906 bitregion_start = 0;
907 bestmode = get_best_mode (bitsize, bitnum,
908 bitregion_start, bitregion_end,
909 MEM_ALIGN (str_rtx),
910 op_mode,
911 MEM_VOLATILE_P (str_rtx));
912 str_rtx = adjust_address (str_rtx, bestmode, offset);
913 }
914
915 if (!store_bit_field_1 (str_rtx, bitsize, bitnum,
916 bitregion_start, bitregion_end,
917 fieldmode, value, true))
918 gcc_unreachable ();
919 }
920 \f
921 /* Use shifts and boolean operations to store VALUE
922 into a bit field of width BITSIZE
923 in a memory location specified by OP0 except offset by OFFSET bytes.
924 (OFFSET must be 0 if OP0 is a register.)
925 The field starts at position BITPOS within the byte.
926 (If OP0 is a register, it may be a full word or a narrower mode,
927 but BITPOS still counts within a full word,
928 which is significant on bigendian machines.) */
929
930 static void
931 store_fixed_bit_field (rtx op0, unsigned HOST_WIDE_INT offset,
932 unsigned HOST_WIDE_INT bitsize,
933 unsigned HOST_WIDE_INT bitpos,
934 unsigned HOST_WIDE_INT bitregion_start,
935 unsigned HOST_WIDE_INT bitregion_end,
936 rtx value)
937 {
938 enum machine_mode mode;
939 unsigned int total_bits = BITS_PER_WORD;
940 rtx temp;
941 int all_zero = 0;
942 int all_one = 0;
943
944 /* There is a case not handled here:
945 a structure with a known alignment of just a halfword
946 and a field split across two aligned halfwords within the structure.
947 Or likewise a structure with a known alignment of just a byte
948 and a field split across two bytes.
949 Such cases are not supposed to be able to occur. */
950
951 if (REG_P (op0) || GET_CODE (op0) == SUBREG)
952 {
953 gcc_assert (!offset);
954 /* Special treatment for a bit field split across two registers. */
955 if (bitsize + bitpos > BITS_PER_WORD)
956 {
957 store_split_bit_field (op0, bitsize, bitpos,
958 bitregion_start, bitregion_end,
959 value);
960 return;
961 }
962 }
963 else
964 {
965 unsigned HOST_WIDE_INT maxbits = MAX_FIXED_MODE_SIZE;
966
967 if (bitregion_end)
968 maxbits = bitregion_end - bitregion_start + 1;
969
970 /* Get the proper mode to use for this field. We want a mode that
971 includes the entire field. If such a mode would be larger than
972 a word, we won't be doing the extraction the normal way.
973 We don't want a mode bigger than the destination. */
974
975 mode = GET_MODE (op0);
976 if (GET_MODE_BITSIZE (mode) == 0
977 || GET_MODE_BITSIZE (mode) > GET_MODE_BITSIZE (word_mode))
978 mode = word_mode;
979
980 if (MEM_VOLATILE_P (op0)
981 && GET_MODE_BITSIZE (GET_MODE (op0)) > 0
982 && GET_MODE_BITSIZE (GET_MODE (op0)) <= maxbits
983 && flag_strict_volatile_bitfields > 0)
984 mode = GET_MODE (op0);
985 else
986 mode = get_best_mode (bitsize, bitpos + offset * BITS_PER_UNIT,
987 bitregion_start, bitregion_end,
988 MEM_ALIGN (op0), mode, MEM_VOLATILE_P (op0));
989
990 if (mode == VOIDmode)
991 {
992 /* The only way this should occur is if the field spans word
993 boundaries. */
994 store_split_bit_field (op0, bitsize, bitpos + offset * BITS_PER_UNIT,
995 bitregion_start, bitregion_end, value);
996 return;
997 }
998
999 total_bits = GET_MODE_BITSIZE (mode);
1000
1001 /* Make sure bitpos is valid for the chosen mode. Adjust BITPOS to
1002 be in the range 0 to total_bits-1, and put any excess bytes in
1003 OFFSET. */
1004 if (bitpos >= total_bits)
1005 {
1006 offset += (bitpos / total_bits) * (total_bits / BITS_PER_UNIT);
1007 bitpos -= ((bitpos / total_bits) * (total_bits / BITS_PER_UNIT)
1008 * BITS_PER_UNIT);
1009 }
1010
1011 /* Get ref to an aligned byte, halfword, or word containing the field.
1012 Adjust BITPOS to be position within a word,
1013 and OFFSET to be the offset of that word.
1014 Then alter OP0 to refer to that word. */
1015 bitpos += (offset % (total_bits / BITS_PER_UNIT)) * BITS_PER_UNIT;
1016 offset -= (offset % (total_bits / BITS_PER_UNIT));
1017 op0 = adjust_address (op0, mode, offset);
1018 }
1019
1020 mode = GET_MODE (op0);
1021
1022 /* Now MODE is either some integral mode for a MEM as OP0,
1023 or is a full-word for a REG as OP0. TOTAL_BITS corresponds.
1024 The bit field is contained entirely within OP0.
1025 BITPOS is the starting bit number within OP0.
1026 (OP0's mode may actually be narrower than MODE.) */
1027
1028 if (BYTES_BIG_ENDIAN)
1029 /* BITPOS is the distance between our msb
1030 and that of the containing datum.
1031 Convert it to the distance from the lsb. */
1032 bitpos = total_bits - bitsize - bitpos;
1033
1034 /* Now BITPOS is always the distance between our lsb
1035 and that of OP0. */
1036
1037 /* Shift VALUE left by BITPOS bits. If VALUE is not constant,
1038 we must first convert its mode to MODE. */
1039
1040 if (CONST_INT_P (value))
1041 {
1042 HOST_WIDE_INT v = INTVAL (value);
1043
1044 if (bitsize < HOST_BITS_PER_WIDE_INT)
1045 v &= ((HOST_WIDE_INT) 1 << bitsize) - 1;
1046
1047 if (v == 0)
1048 all_zero = 1;
1049 else if ((bitsize < HOST_BITS_PER_WIDE_INT
1050 && v == ((HOST_WIDE_INT) 1 << bitsize) - 1)
1051 || (bitsize == HOST_BITS_PER_WIDE_INT && v == -1))
1052 all_one = 1;
1053
1054 value = lshift_value (mode, value, bitpos, bitsize);
1055 }
1056 else
1057 {
1058 int must_and = (GET_MODE_BITSIZE (GET_MODE (value)) != bitsize
1059 && bitpos + bitsize != GET_MODE_BITSIZE (mode));
1060
1061 if (GET_MODE (value) != mode)
1062 value = convert_to_mode (mode, value, 1);
1063
1064 if (must_and)
1065 value = expand_binop (mode, and_optab, value,
1066 mask_rtx (mode, 0, bitsize, 0),
1067 NULL_RTX, 1, OPTAB_LIB_WIDEN);
1068 if (bitpos > 0)
1069 value = expand_shift (LSHIFT_EXPR, mode, value,
1070 bitpos, NULL_RTX, 1);
1071 }
1072
1073 /* Now clear the chosen bits in OP0,
1074 except that if VALUE is -1 we need not bother. */
1075 /* We keep the intermediates in registers to allow CSE to combine
1076 consecutive bitfield assignments. */
1077
1078 temp = force_reg (mode, op0);
1079
1080 if (! all_one)
1081 {
1082 temp = expand_binop (mode, and_optab, temp,
1083 mask_rtx (mode, bitpos, bitsize, 1),
1084 NULL_RTX, 1, OPTAB_LIB_WIDEN);
1085 temp = force_reg (mode, temp);
1086 }
1087
1088 /* Now logical-or VALUE into OP0, unless it is zero. */
1089
1090 if (! all_zero)
1091 {
1092 temp = expand_binop (mode, ior_optab, temp, value,
1093 NULL_RTX, 1, OPTAB_LIB_WIDEN);
1094 temp = force_reg (mode, temp);
1095 }
1096
1097 if (op0 != temp)
1098 {
1099 op0 = copy_rtx (op0);
1100 emit_move_insn (op0, temp);
1101 }
1102 }
1103 \f
1104 /* Store a bit field that is split across multiple accessible memory objects.
1105
1106 OP0 is the REG, SUBREG or MEM rtx for the first of the objects.
1107 BITSIZE is the field width; BITPOS the position of its first bit
1108 (within the word).
1109 VALUE is the value to store.
1110
1111 This does not yet handle fields wider than BITS_PER_WORD. */
1112
1113 static void
1114 store_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1115 unsigned HOST_WIDE_INT bitpos,
1116 unsigned HOST_WIDE_INT bitregion_start,
1117 unsigned HOST_WIDE_INT bitregion_end,
1118 rtx value)
1119 {
1120 unsigned int unit;
1121 unsigned int bitsdone = 0;
1122
1123 /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1124 much at a time. */
1125 if (REG_P (op0) || GET_CODE (op0) == SUBREG)
1126 unit = BITS_PER_WORD;
1127 else
1128 unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1129
1130 /* If VALUE is a constant other than a CONST_INT, get it into a register in
1131 WORD_MODE. If we can do this using gen_lowpart_common, do so. Note
1132 that VALUE might be a floating-point constant. */
1133 if (CONSTANT_P (value) && !CONST_INT_P (value))
1134 {
1135 rtx word = gen_lowpart_common (word_mode, value);
1136
1137 if (word && (value != word))
1138 value = word;
1139 else
1140 value = gen_lowpart_common (word_mode,
1141 force_reg (GET_MODE (value) != VOIDmode
1142 ? GET_MODE (value)
1143 : word_mode, value));
1144 }
1145
1146 while (bitsdone < bitsize)
1147 {
1148 unsigned HOST_WIDE_INT thissize;
1149 rtx part, word;
1150 unsigned HOST_WIDE_INT thispos;
1151 unsigned HOST_WIDE_INT offset;
1152
1153 offset = (bitpos + bitsdone) / unit;
1154 thispos = (bitpos + bitsdone) % unit;
1155
1156 /* When region of bytes we can touch is restricted, decrease
1157 UNIT close to the end of the region as needed. */
1158 if (bitregion_end
1159 && unit > BITS_PER_UNIT
1160 && bitpos + bitsdone - thispos + unit > bitregion_end + 1)
1161 {
1162 unit = unit / 2;
1163 continue;
1164 }
1165
1166 /* THISSIZE must not overrun a word boundary. Otherwise,
1167 store_fixed_bit_field will call us again, and we will mutually
1168 recurse forever. */
1169 thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1170 thissize = MIN (thissize, unit - thispos);
1171
1172 if (BYTES_BIG_ENDIAN)
1173 {
1174 int total_bits;
1175
1176 /* We must do an endian conversion exactly the same way as it is
1177 done in extract_bit_field, so that the two calls to
1178 extract_fixed_bit_field will have comparable arguments. */
1179 if (!MEM_P (value) || GET_MODE (value) == BLKmode)
1180 total_bits = BITS_PER_WORD;
1181 else
1182 total_bits = GET_MODE_BITSIZE (GET_MODE (value));
1183
1184 /* Fetch successively less significant portions. */
1185 if (CONST_INT_P (value))
1186 part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1187 >> (bitsize - bitsdone - thissize))
1188 & (((HOST_WIDE_INT) 1 << thissize) - 1));
1189 else
1190 /* The args are chosen so that the last part includes the
1191 lsb. Give extract_bit_field the value it needs (with
1192 endianness compensation) to fetch the piece we want. */
1193 part = extract_fixed_bit_field (word_mode, value, 0, thissize,
1194 total_bits - bitsize + bitsdone,
1195 NULL_RTX, 1, false);
1196 }
1197 else
1198 {
1199 /* Fetch successively more significant portions. */
1200 if (CONST_INT_P (value))
1201 part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1202 >> bitsdone)
1203 & (((HOST_WIDE_INT) 1 << thissize) - 1));
1204 else
1205 part = extract_fixed_bit_field (word_mode, value, 0, thissize,
1206 bitsdone, NULL_RTX, 1, false);
1207 }
1208
1209 /* If OP0 is a register, then handle OFFSET here.
1210
1211 When handling multiword bitfields, extract_bit_field may pass
1212 down a word_mode SUBREG of a larger REG for a bitfield that actually
1213 crosses a word boundary. Thus, for a SUBREG, we must find
1214 the current word starting from the base register. */
1215 if (GET_CODE (op0) == SUBREG)
1216 {
1217 int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset;
1218 enum machine_mode sub_mode = GET_MODE (SUBREG_REG (op0));
1219 if (sub_mode != BLKmode && GET_MODE_SIZE (sub_mode) < UNITS_PER_WORD)
1220 word = word_offset ? const0_rtx : op0;
1221 else
1222 word = operand_subword_force (SUBREG_REG (op0), word_offset,
1223 GET_MODE (SUBREG_REG (op0)));
1224 offset = 0;
1225 }
1226 else if (REG_P (op0))
1227 {
1228 enum machine_mode op0_mode = GET_MODE (op0);
1229 if (op0_mode != BLKmode && GET_MODE_SIZE (op0_mode) < UNITS_PER_WORD)
1230 word = offset ? const0_rtx : op0;
1231 else
1232 word = operand_subword_force (op0, offset, GET_MODE (op0));
1233 offset = 0;
1234 }
1235 else
1236 word = op0;
1237
1238 /* OFFSET is in UNITs, and UNIT is in bits.
1239 store_fixed_bit_field wants offset in bytes. If WORD is const0_rtx,
1240 it is just an out-of-bounds access. Ignore it. */
1241 if (word != const0_rtx)
1242 store_fixed_bit_field (word, offset * unit / BITS_PER_UNIT, thissize,
1243 thispos, bitregion_start, bitregion_end, part);
1244 bitsdone += thissize;
1245 }
1246 }
1247 \f
1248 /* A subroutine of extract_bit_field_1 that converts return value X
1249 to either MODE or TMODE. MODE, TMODE and UNSIGNEDP are arguments
1250 to extract_bit_field. */
1251
1252 static rtx
1253 convert_extracted_bit_field (rtx x, enum machine_mode mode,
1254 enum machine_mode tmode, bool unsignedp)
1255 {
1256 if (GET_MODE (x) == tmode || GET_MODE (x) == mode)
1257 return x;
1258
1259 /* If the x mode is not a scalar integral, first convert to the
1260 integer mode of that size and then access it as a floating-point
1261 value via a SUBREG. */
1262 if (!SCALAR_INT_MODE_P (tmode))
1263 {
1264 enum machine_mode smode;
1265
1266 smode = mode_for_size (GET_MODE_BITSIZE (tmode), MODE_INT, 0);
1267 x = convert_to_mode (smode, x, unsignedp);
1268 x = force_reg (smode, x);
1269 return gen_lowpart (tmode, x);
1270 }
1271
1272 return convert_to_mode (tmode, x, unsignedp);
1273 }
1274
1275 /* A subroutine of extract_bit_field, with the same arguments.
1276 If FALLBACK_P is true, fall back to extract_fixed_bit_field
1277 if we can find no other means of implementing the operation.
1278 if FALLBACK_P is false, return NULL instead. */
1279
1280 static rtx
1281 extract_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1282 unsigned HOST_WIDE_INT bitnum,
1283 int unsignedp, bool packedp, rtx target,
1284 enum machine_mode mode, enum machine_mode tmode,
1285 bool fallback_p)
1286 {
1287 unsigned int unit
1288 = (MEM_P (str_rtx)) ? BITS_PER_UNIT : BITS_PER_WORD;
1289 unsigned HOST_WIDE_INT offset, bitpos;
1290 rtx op0 = str_rtx;
1291 enum machine_mode int_mode;
1292 enum machine_mode ext_mode;
1293 enum machine_mode mode1;
1294 int byte_offset;
1295
1296 if (tmode == VOIDmode)
1297 tmode = mode;
1298
1299 while (GET_CODE (op0) == SUBREG)
1300 {
1301 bitnum += SUBREG_BYTE (op0) * BITS_PER_UNIT;
1302 op0 = SUBREG_REG (op0);
1303 }
1304
1305 /* If we have an out-of-bounds access to a register, just return an
1306 uninitialized register of the required mode. This can occur if the
1307 source code contains an out-of-bounds access to a small array. */
1308 if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
1309 return gen_reg_rtx (tmode);
1310
1311 if (REG_P (op0)
1312 && mode == GET_MODE (op0)
1313 && bitnum == 0
1314 && bitsize == GET_MODE_BITSIZE (GET_MODE (op0)))
1315 {
1316 /* We're trying to extract a full register from itself. */
1317 return op0;
1318 }
1319
1320 /* See if we can get a better vector mode before extracting. */
1321 if (VECTOR_MODE_P (GET_MODE (op0))
1322 && !MEM_P (op0)
1323 && GET_MODE_INNER (GET_MODE (op0)) != tmode)
1324 {
1325 enum machine_mode new_mode;
1326
1327 if (GET_MODE_CLASS (tmode) == MODE_FLOAT)
1328 new_mode = MIN_MODE_VECTOR_FLOAT;
1329 else if (GET_MODE_CLASS (tmode) == MODE_FRACT)
1330 new_mode = MIN_MODE_VECTOR_FRACT;
1331 else if (GET_MODE_CLASS (tmode) == MODE_UFRACT)
1332 new_mode = MIN_MODE_VECTOR_UFRACT;
1333 else if (GET_MODE_CLASS (tmode) == MODE_ACCUM)
1334 new_mode = MIN_MODE_VECTOR_ACCUM;
1335 else if (GET_MODE_CLASS (tmode) == MODE_UACCUM)
1336 new_mode = MIN_MODE_VECTOR_UACCUM;
1337 else
1338 new_mode = MIN_MODE_VECTOR_INT;
1339
1340 for (; new_mode != VOIDmode ; new_mode = GET_MODE_WIDER_MODE (new_mode))
1341 if (GET_MODE_SIZE (new_mode) == GET_MODE_SIZE (GET_MODE (op0))
1342 && targetm.vector_mode_supported_p (new_mode))
1343 break;
1344 if (new_mode != VOIDmode)
1345 op0 = gen_lowpart (new_mode, op0);
1346 }
1347
1348 /* Use vec_extract patterns for extracting parts of vectors whenever
1349 available. */
1350 if (VECTOR_MODE_P (GET_MODE (op0))
1351 && !MEM_P (op0)
1352 && optab_handler (vec_extract_optab, GET_MODE (op0)) != CODE_FOR_nothing
1353 && ((bitnum + bitsize - 1) / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
1354 == bitnum / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
1355 {
1356 struct expand_operand ops[3];
1357 enum machine_mode outermode = GET_MODE (op0);
1358 enum machine_mode innermode = GET_MODE_INNER (outermode);
1359 enum insn_code icode = optab_handler (vec_extract_optab, outermode);
1360 unsigned HOST_WIDE_INT pos = bitnum / GET_MODE_BITSIZE (innermode);
1361
1362 create_output_operand (&ops[0], target, innermode);
1363 create_input_operand (&ops[1], op0, outermode);
1364 create_integer_operand (&ops[2], pos);
1365 if (maybe_expand_insn (icode, 3, ops))
1366 {
1367 target = ops[0].value;
1368 if (GET_MODE (target) != mode)
1369 return gen_lowpart (tmode, target);
1370 return target;
1371 }
1372 }
1373
1374 /* Make sure we are playing with integral modes. Pun with subregs
1375 if we aren't. */
1376 {
1377 enum machine_mode imode = int_mode_for_mode (GET_MODE (op0));
1378 if (imode != GET_MODE (op0))
1379 {
1380 if (MEM_P (op0))
1381 op0 = adjust_address (op0, imode, 0);
1382 else if (imode != BLKmode)
1383 {
1384 op0 = gen_lowpart (imode, op0);
1385
1386 /* If we got a SUBREG, force it into a register since we
1387 aren't going to be able to do another SUBREG on it. */
1388 if (GET_CODE (op0) == SUBREG)
1389 op0 = force_reg (imode, op0);
1390 }
1391 else if (REG_P (op0))
1392 {
1393 rtx reg, subreg;
1394 imode = smallest_mode_for_size (GET_MODE_BITSIZE (GET_MODE (op0)),
1395 MODE_INT);
1396 reg = gen_reg_rtx (imode);
1397 subreg = gen_lowpart_SUBREG (GET_MODE (op0), reg);
1398 emit_move_insn (subreg, op0);
1399 op0 = reg;
1400 bitnum += SUBREG_BYTE (subreg) * BITS_PER_UNIT;
1401 }
1402 else
1403 {
1404 rtx mem = assign_stack_temp (GET_MODE (op0),
1405 GET_MODE_SIZE (GET_MODE (op0)));
1406 emit_move_insn (mem, op0);
1407 op0 = adjust_address (mem, BLKmode, 0);
1408 }
1409 }
1410 }
1411
1412 /* We may be accessing data outside the field, which means
1413 we can alias adjacent data. */
1414 if (MEM_P (op0))
1415 {
1416 op0 = shallow_copy_rtx (op0);
1417 set_mem_alias_set (op0, 0);
1418 set_mem_expr (op0, 0);
1419 }
1420
1421 /* Extraction of a full-word or multi-word value from a structure
1422 in a register or aligned memory can be done with just a SUBREG.
1423 A subword value in the least significant part of a register
1424 can also be extracted with a SUBREG. For this, we need the
1425 byte offset of the value in op0. */
1426
1427 bitpos = bitnum % unit;
1428 offset = bitnum / unit;
1429 byte_offset = bitpos / BITS_PER_UNIT + offset * UNITS_PER_WORD;
1430
1431 /* If OP0 is a register, BITPOS must count within a word.
1432 But as we have it, it counts within whatever size OP0 now has.
1433 On a bigendian machine, these are not the same, so convert. */
1434 if (BYTES_BIG_ENDIAN
1435 && !MEM_P (op0)
1436 && unit > GET_MODE_BITSIZE (GET_MODE (op0)))
1437 bitpos += unit - GET_MODE_BITSIZE (GET_MODE (op0));
1438
1439 /* ??? We currently assume TARGET is at least as big as BITSIZE.
1440 If that's wrong, the solution is to test for it and set TARGET to 0
1441 if needed. */
1442
1443 /* Only scalar integer modes can be converted via subregs. There is an
1444 additional problem for FP modes here in that they can have a precision
1445 which is different from the size. mode_for_size uses precision, but
1446 we want a mode based on the size, so we must avoid calling it for FP
1447 modes. */
1448 mode1 = (SCALAR_INT_MODE_P (tmode)
1449 ? mode_for_size (bitsize, GET_MODE_CLASS (tmode), 0)
1450 : mode);
1451
1452 /* If the bitfield is volatile, we need to make sure the access
1453 remains on a type-aligned boundary. */
1454 if (GET_CODE (op0) == MEM
1455 && MEM_VOLATILE_P (op0)
1456 && GET_MODE_BITSIZE (GET_MODE (op0)) > 0
1457 && flag_strict_volatile_bitfields > 0)
1458 goto no_subreg_mode_swap;
1459
1460 if (((bitsize >= BITS_PER_WORD && bitsize == GET_MODE_BITSIZE (mode)
1461 && bitpos % BITS_PER_WORD == 0)
1462 || (mode1 != BLKmode
1463 /* ??? The big endian test here is wrong. This is correct
1464 if the value is in a register, and if mode_for_size is not
1465 the same mode as op0. This causes us to get unnecessarily
1466 inefficient code from the Thumb port when -mbig-endian. */
1467 && (BYTES_BIG_ENDIAN
1468 ? bitpos + bitsize == BITS_PER_WORD
1469 : bitpos == 0)))
1470 && ((!MEM_P (op0)
1471 && TRULY_NOOP_TRUNCATION_MODES_P (mode1, GET_MODE (op0))
1472 && GET_MODE_SIZE (mode1) != 0
1473 && byte_offset % GET_MODE_SIZE (mode1) == 0)
1474 || (MEM_P (op0)
1475 && (! SLOW_UNALIGNED_ACCESS (mode, MEM_ALIGN (op0))
1476 || (offset * BITS_PER_UNIT % bitsize == 0
1477 && MEM_ALIGN (op0) % bitsize == 0)))))
1478 {
1479 if (MEM_P (op0))
1480 op0 = adjust_address (op0, mode1, offset);
1481 else if (mode1 != GET_MODE (op0))
1482 {
1483 rtx sub = simplify_gen_subreg (mode1, op0, GET_MODE (op0),
1484 byte_offset);
1485 if (sub == NULL)
1486 goto no_subreg_mode_swap;
1487 op0 = sub;
1488 }
1489 if (mode1 != mode)
1490 return convert_to_mode (tmode, op0, unsignedp);
1491 return op0;
1492 }
1493 no_subreg_mode_swap:
1494
1495 /* Handle fields bigger than a word. */
1496
1497 if (bitsize > BITS_PER_WORD)
1498 {
1499 /* Here we transfer the words of the field
1500 in the order least significant first.
1501 This is because the most significant word is the one which may
1502 be less than full. */
1503
1504 unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
1505 unsigned int i;
1506
1507 if (target == 0 || !REG_P (target) || !valid_multiword_target_p (target))
1508 target = gen_reg_rtx (mode);
1509
1510 /* Indicate for flow that the entire target reg is being set. */
1511 emit_clobber (target);
1512
1513 for (i = 0; i < nwords; i++)
1514 {
1515 /* If I is 0, use the low-order word in both field and target;
1516 if I is 1, use the next to lowest word; and so on. */
1517 /* Word number in TARGET to use. */
1518 unsigned int wordnum
1519 = (WORDS_BIG_ENDIAN
1520 ? GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD - i - 1
1521 : i);
1522 /* Offset from start of field in OP0. */
1523 unsigned int bit_offset = (WORDS_BIG_ENDIAN
1524 ? MAX (0, ((int) bitsize - ((int) i + 1)
1525 * (int) BITS_PER_WORD))
1526 : (int) i * BITS_PER_WORD);
1527 rtx target_part = operand_subword (target, wordnum, 1, VOIDmode);
1528 rtx result_part
1529 = extract_bit_field (op0, MIN (BITS_PER_WORD,
1530 bitsize - i * BITS_PER_WORD),
1531 bitnum + bit_offset, 1, false, target_part, mode,
1532 word_mode);
1533
1534 gcc_assert (target_part);
1535
1536 if (result_part != target_part)
1537 emit_move_insn (target_part, result_part);
1538 }
1539
1540 if (unsignedp)
1541 {
1542 /* Unless we've filled TARGET, the upper regs in a multi-reg value
1543 need to be zero'd out. */
1544 if (GET_MODE_SIZE (GET_MODE (target)) > nwords * UNITS_PER_WORD)
1545 {
1546 unsigned int i, total_words;
1547
1548 total_words = GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD;
1549 for (i = nwords; i < total_words; i++)
1550 emit_move_insn
1551 (operand_subword (target,
1552 WORDS_BIG_ENDIAN ? total_words - i - 1 : i,
1553 1, VOIDmode),
1554 const0_rtx);
1555 }
1556 return target;
1557 }
1558
1559 /* Signed bit field: sign-extend with two arithmetic shifts. */
1560 target = expand_shift (LSHIFT_EXPR, mode, target,
1561 GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1562 return expand_shift (RSHIFT_EXPR, mode, target,
1563 GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1564 }
1565
1566 /* From here on we know the desired field is smaller than a word. */
1567
1568 /* Check if there is a correspondingly-sized integer field, so we can
1569 safely extract it as one size of integer, if necessary; then
1570 truncate or extend to the size that is wanted; then use SUBREGs or
1571 convert_to_mode to get one of the modes we really wanted. */
1572
1573 int_mode = int_mode_for_mode (tmode);
1574 if (int_mode == BLKmode)
1575 int_mode = int_mode_for_mode (mode);
1576 /* Should probably push op0 out to memory and then do a load. */
1577 gcc_assert (int_mode != BLKmode);
1578
1579 /* OFFSET is the number of words or bytes (UNIT says which)
1580 from STR_RTX to the first word or byte containing part of the field. */
1581 if (!MEM_P (op0))
1582 {
1583 if (offset != 0
1584 || GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
1585 {
1586 if (!REG_P (op0))
1587 op0 = copy_to_reg (op0);
1588 op0 = gen_rtx_SUBREG (mode_for_size (BITS_PER_WORD, MODE_INT, 0),
1589 op0, (offset * UNITS_PER_WORD));
1590 }
1591 offset = 0;
1592 }
1593
1594 /* Now OFFSET is nonzero only for memory operands. */
1595 ext_mode = mode_for_extraction (unsignedp ? EP_extzv : EP_extv, 0);
1596 if (ext_mode != MAX_MACHINE_MODE
1597 && bitsize > 0
1598 && GET_MODE_BITSIZE (ext_mode) >= bitsize
1599 /* Do not use extv/extzv for volatile bitfields when
1600 -fstrict-volatile-bitfields is in effect. */
1601 && !(MEM_P (op0) && MEM_VOLATILE_P (op0)
1602 && flag_strict_volatile_bitfields > 0)
1603 /* If op0 is a register, we need it in EXT_MODE to make it
1604 acceptable to the format of ext(z)v. */
1605 && !(GET_CODE (op0) == SUBREG && GET_MODE (op0) != ext_mode)
1606 && !((REG_P (op0) || GET_CODE (op0) == SUBREG)
1607 && (bitsize + bitpos > GET_MODE_BITSIZE (ext_mode))))
1608 {
1609 struct expand_operand ops[4];
1610 unsigned HOST_WIDE_INT xbitpos = bitpos, xoffset = offset;
1611 rtx xop0 = op0;
1612 rtx xtarget = target;
1613 rtx xspec_target = target;
1614 rtx xspec_target_subreg = 0;
1615
1616 /* If op0 is a register, we need it in EXT_MODE to make it
1617 acceptable to the format of ext(z)v. */
1618 if (REG_P (xop0) && GET_MODE (xop0) != ext_mode)
1619 xop0 = gen_lowpart_SUBREG (ext_mode, xop0);
1620 if (MEM_P (xop0))
1621 /* Get ref to first byte containing part of the field. */
1622 xop0 = adjust_address (xop0, byte_mode, xoffset);
1623
1624 /* Now convert from counting within UNIT to counting in EXT_MODE. */
1625 if (BYTES_BIG_ENDIAN && !MEM_P (xop0))
1626 xbitpos += GET_MODE_BITSIZE (ext_mode) - unit;
1627
1628 unit = GET_MODE_BITSIZE (ext_mode);
1629
1630 /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
1631 "backwards" from the size of the unit we are extracting from.
1632 Otherwise, we count bits from the most significant on a
1633 BYTES/BITS_BIG_ENDIAN machine. */
1634
1635 if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1636 xbitpos = unit - bitsize - xbitpos;
1637
1638 if (xtarget == 0)
1639 xtarget = xspec_target = gen_reg_rtx (tmode);
1640
1641 if (GET_MODE (xtarget) != ext_mode)
1642 {
1643 /* Don't use LHS paradoxical subreg if explicit truncation is needed
1644 between the mode of the extraction (word_mode) and the target
1645 mode. Instead, create a temporary and use convert_move to set
1646 the target. */
1647 if (REG_P (xtarget)
1648 && TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (xtarget), ext_mode))
1649 {
1650 xtarget = gen_lowpart (ext_mode, xtarget);
1651 if (GET_MODE_PRECISION (ext_mode)
1652 > GET_MODE_PRECISION (GET_MODE (xspec_target)))
1653 xspec_target_subreg = xtarget;
1654 }
1655 else
1656 xtarget = gen_reg_rtx (ext_mode);
1657 }
1658
1659 create_output_operand (&ops[0], xtarget, ext_mode);
1660 create_fixed_operand (&ops[1], xop0);
1661 create_integer_operand (&ops[2], bitsize);
1662 create_integer_operand (&ops[3], xbitpos);
1663 if (maybe_expand_insn (unsignedp ? CODE_FOR_extzv : CODE_FOR_extv,
1664 4, ops))
1665 {
1666 xtarget = ops[0].value;
1667 if (xtarget == xspec_target)
1668 return xtarget;
1669 if (xtarget == xspec_target_subreg)
1670 return xspec_target;
1671 return convert_extracted_bit_field (xtarget, mode, tmode, unsignedp);
1672 }
1673 }
1674
1675 /* If OP0 is a memory, try copying it to a register and seeing if a
1676 cheap register alternative is available. */
1677 if (ext_mode != MAX_MACHINE_MODE && MEM_P (op0))
1678 {
1679 enum machine_mode bestmode;
1680
1681 /* Get the mode to use for inserting into this field. If
1682 OP0 is BLKmode, get the smallest mode consistent with the
1683 alignment. If OP0 is a non-BLKmode object that is no
1684 wider than EXT_MODE, use its mode. Otherwise, use the
1685 smallest mode containing the field. */
1686
1687 if (GET_MODE (op0) == BLKmode
1688 || (ext_mode != MAX_MACHINE_MODE
1689 && GET_MODE_SIZE (GET_MODE (op0)) > GET_MODE_SIZE (ext_mode)))
1690 bestmode = get_best_mode (bitsize, bitnum, 0, 0, MEM_ALIGN (op0),
1691 (ext_mode == MAX_MACHINE_MODE
1692 ? VOIDmode : ext_mode),
1693 MEM_VOLATILE_P (op0));
1694 else
1695 bestmode = GET_MODE (op0);
1696
1697 if (bestmode != VOIDmode
1698 && !(SLOW_UNALIGNED_ACCESS (bestmode, MEM_ALIGN (op0))
1699 && GET_MODE_BITSIZE (bestmode) > MEM_ALIGN (op0)))
1700 {
1701 unsigned HOST_WIDE_INT xoffset, xbitpos;
1702
1703 /* Compute the offset as a multiple of this unit,
1704 counting in bytes. */
1705 unit = GET_MODE_BITSIZE (bestmode);
1706 xoffset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
1707 xbitpos = bitnum % unit;
1708
1709 /* Make sure the register is big enough for the whole field. */
1710 if (xoffset * BITS_PER_UNIT + unit
1711 >= offset * BITS_PER_UNIT + bitsize)
1712 {
1713 rtx last, result, xop0;
1714
1715 last = get_last_insn ();
1716
1717 /* Fetch it to a register in that size. */
1718 xop0 = adjust_address (op0, bestmode, xoffset);
1719 xop0 = force_reg (bestmode, xop0);
1720 result = extract_bit_field_1 (xop0, bitsize, xbitpos,
1721 unsignedp, packedp, target,
1722 mode, tmode, false);
1723 if (result)
1724 return result;
1725
1726 delete_insns_since (last);
1727 }
1728 }
1729 }
1730
1731 if (!fallback_p)
1732 return NULL;
1733
1734 target = extract_fixed_bit_field (int_mode, op0, offset, bitsize,
1735 bitpos, target, unsignedp, packedp);
1736 return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1737 }
1738
1739 /* Generate code to extract a byte-field from STR_RTX
1740 containing BITSIZE bits, starting at BITNUM,
1741 and put it in TARGET if possible (if TARGET is nonzero).
1742 Regardless of TARGET, we return the rtx for where the value is placed.
1743
1744 STR_RTX is the structure containing the byte (a REG or MEM).
1745 UNSIGNEDP is nonzero if this is an unsigned bit field.
1746 PACKEDP is nonzero if the field has the packed attribute.
1747 MODE is the natural mode of the field value once extracted.
1748 TMODE is the mode the caller would like the value to have;
1749 but the value may be returned with type MODE instead.
1750
1751 If a TARGET is specified and we can store in it at no extra cost,
1752 we do so, and return TARGET.
1753 Otherwise, we return a REG of mode TMODE or MODE, with TMODE preferred
1754 if they are equally easy. */
1755
1756 rtx
1757 extract_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1758 unsigned HOST_WIDE_INT bitnum, int unsignedp, bool packedp,
1759 rtx target, enum machine_mode mode, enum machine_mode tmode)
1760 {
1761 return extract_bit_field_1 (str_rtx, bitsize, bitnum, unsignedp, packedp,
1762 target, mode, tmode, true);
1763 }
1764 \f
1765 /* Extract a bit field using shifts and boolean operations
1766 Returns an rtx to represent the value.
1767 OP0 addresses a register (word) or memory (byte).
1768 BITPOS says which bit within the word or byte the bit field starts in.
1769 OFFSET says how many bytes farther the bit field starts;
1770 it is 0 if OP0 is a register.
1771 BITSIZE says how many bits long the bit field is.
1772 (If OP0 is a register, it may be narrower than a full word,
1773 but BITPOS still counts within a full word,
1774 which is significant on bigendian machines.)
1775
1776 UNSIGNEDP is nonzero for an unsigned bit field (don't sign-extend value).
1777 PACKEDP is true if the field has the packed attribute.
1778
1779 If TARGET is nonzero, attempts to store the value there
1780 and return TARGET, but this is not guaranteed.
1781 If TARGET is not used, create a pseudo-reg of mode TMODE for the value. */
1782
1783 static rtx
1784 extract_fixed_bit_field (enum machine_mode tmode, rtx op0,
1785 unsigned HOST_WIDE_INT offset,
1786 unsigned HOST_WIDE_INT bitsize,
1787 unsigned HOST_WIDE_INT bitpos, rtx target,
1788 int unsignedp, bool packedp)
1789 {
1790 unsigned int total_bits = BITS_PER_WORD;
1791 enum machine_mode mode;
1792
1793 if (GET_CODE (op0) == SUBREG || REG_P (op0))
1794 {
1795 /* Special treatment for a bit field split across two registers. */
1796 if (bitsize + bitpos > BITS_PER_WORD)
1797 return extract_split_bit_field (op0, bitsize, bitpos, unsignedp);
1798 }
1799 else
1800 {
1801 /* Get the proper mode to use for this field. We want a mode that
1802 includes the entire field. If such a mode would be larger than
1803 a word, we won't be doing the extraction the normal way. */
1804
1805 if (MEM_VOLATILE_P (op0)
1806 && flag_strict_volatile_bitfields > 0)
1807 {
1808 if (GET_MODE_BITSIZE (GET_MODE (op0)) > 0)
1809 mode = GET_MODE (op0);
1810 else if (target && GET_MODE_BITSIZE (GET_MODE (target)) > 0)
1811 mode = GET_MODE (target);
1812 else
1813 mode = tmode;
1814 }
1815 else
1816 mode = get_best_mode (bitsize, bitpos + offset * BITS_PER_UNIT, 0, 0,
1817 MEM_ALIGN (op0), word_mode, MEM_VOLATILE_P (op0));
1818
1819 if (mode == VOIDmode)
1820 /* The only way this should occur is if the field spans word
1821 boundaries. */
1822 return extract_split_bit_field (op0, bitsize,
1823 bitpos + offset * BITS_PER_UNIT,
1824 unsignedp);
1825
1826 total_bits = GET_MODE_BITSIZE (mode);
1827
1828 /* Make sure bitpos is valid for the chosen mode. Adjust BITPOS to
1829 be in the range 0 to total_bits-1, and put any excess bytes in
1830 OFFSET. */
1831 if (bitpos >= total_bits)
1832 {
1833 offset += (bitpos / total_bits) * (total_bits / BITS_PER_UNIT);
1834 bitpos -= ((bitpos / total_bits) * (total_bits / BITS_PER_UNIT)
1835 * BITS_PER_UNIT);
1836 }
1837
1838 /* If we're accessing a volatile MEM, we can't do the next
1839 alignment step if it results in a multi-word access where we
1840 otherwise wouldn't have one. So, check for that case
1841 here. */
1842 if (MEM_P (op0)
1843 && MEM_VOLATILE_P (op0)
1844 && flag_strict_volatile_bitfields > 0
1845 && bitpos + bitsize <= total_bits
1846 && bitpos + bitsize + (offset % (total_bits / BITS_PER_UNIT)) * BITS_PER_UNIT > total_bits)
1847 {
1848 if (STRICT_ALIGNMENT)
1849 {
1850 static bool informed_about_misalignment = false;
1851 bool warned;
1852
1853 if (packedp)
1854 {
1855 if (bitsize == total_bits)
1856 warned = warning_at (input_location, OPT_fstrict_volatile_bitfields,
1857 "multiple accesses to volatile structure member"
1858 " because of packed attribute");
1859 else
1860 warned = warning_at (input_location, OPT_fstrict_volatile_bitfields,
1861 "multiple accesses to volatile structure bitfield"
1862 " because of packed attribute");
1863
1864 return extract_split_bit_field (op0, bitsize,
1865 bitpos + offset * BITS_PER_UNIT,
1866 unsignedp);
1867 }
1868
1869 if (bitsize == total_bits)
1870 warned = warning_at (input_location, OPT_fstrict_volatile_bitfields,
1871 "mis-aligned access used for structure member");
1872 else
1873 warned = warning_at (input_location, OPT_fstrict_volatile_bitfields,
1874 "mis-aligned access used for structure bitfield");
1875
1876 if (! informed_about_misalignment && warned)
1877 {
1878 informed_about_misalignment = true;
1879 inform (input_location,
1880 "when a volatile object spans multiple type-sized locations,"
1881 " the compiler must choose between using a single mis-aligned access to"
1882 " preserve the volatility, or using multiple aligned accesses to avoid"
1883 " runtime faults; this code may fail at runtime if the hardware does"
1884 " not allow this access");
1885 }
1886 }
1887 }
1888 else
1889 {
1890
1891 /* Get ref to an aligned byte, halfword, or word containing the field.
1892 Adjust BITPOS to be position within a word,
1893 and OFFSET to be the offset of that word.
1894 Then alter OP0 to refer to that word. */
1895 bitpos += (offset % (total_bits / BITS_PER_UNIT)) * BITS_PER_UNIT;
1896 offset -= (offset % (total_bits / BITS_PER_UNIT));
1897 }
1898
1899 op0 = adjust_address (op0, mode, offset);
1900 }
1901
1902 mode = GET_MODE (op0);
1903
1904 if (BYTES_BIG_ENDIAN)
1905 /* BITPOS is the distance between our msb and that of OP0.
1906 Convert it to the distance from the lsb. */
1907 bitpos = total_bits - bitsize - bitpos;
1908
1909 /* Now BITPOS is always the distance between the field's lsb and that of OP0.
1910 We have reduced the big-endian case to the little-endian case. */
1911
1912 if (unsignedp)
1913 {
1914 if (bitpos)
1915 {
1916 /* If the field does not already start at the lsb,
1917 shift it so it does. */
1918 /* Maybe propagate the target for the shift. */
1919 rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
1920 if (tmode != mode)
1921 subtarget = 0;
1922 op0 = expand_shift (RSHIFT_EXPR, mode, op0, bitpos, subtarget, 1);
1923 }
1924 /* Convert the value to the desired mode. */
1925 if (mode != tmode)
1926 op0 = convert_to_mode (tmode, op0, 1);
1927
1928 /* Unless the msb of the field used to be the msb when we shifted,
1929 mask out the upper bits. */
1930
1931 if (GET_MODE_BITSIZE (mode) != bitpos + bitsize)
1932 return expand_binop (GET_MODE (op0), and_optab, op0,
1933 mask_rtx (GET_MODE (op0), 0, bitsize, 0),
1934 target, 1, OPTAB_LIB_WIDEN);
1935 return op0;
1936 }
1937
1938 /* To extract a signed bit-field, first shift its msb to the msb of the word,
1939 then arithmetic-shift its lsb to the lsb of the word. */
1940 op0 = force_reg (mode, op0);
1941
1942 /* Find the narrowest integer mode that contains the field. */
1943
1944 for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT); mode != VOIDmode;
1945 mode = GET_MODE_WIDER_MODE (mode))
1946 if (GET_MODE_BITSIZE (mode) >= bitsize + bitpos)
1947 {
1948 op0 = convert_to_mode (mode, op0, 0);
1949 break;
1950 }
1951
1952 if (mode != tmode)
1953 target = 0;
1954
1955 if (GET_MODE_BITSIZE (mode) != (bitsize + bitpos))
1956 {
1957 int amount = GET_MODE_BITSIZE (mode) - (bitsize + bitpos);
1958 /* Maybe propagate the target for the shift. */
1959 rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
1960 op0 = expand_shift (LSHIFT_EXPR, mode, op0, amount, subtarget, 1);
1961 }
1962
1963 return expand_shift (RSHIFT_EXPR, mode, op0,
1964 GET_MODE_BITSIZE (mode) - bitsize, target, 0);
1965 }
1966 \f
1967 /* Return a constant integer (CONST_INT or CONST_DOUBLE) mask value
1968 of mode MODE with BITSIZE ones followed by BITPOS zeros, or the
1969 complement of that if COMPLEMENT. The mask is truncated if
1970 necessary to the width of mode MODE. The mask is zero-extended if
1971 BITSIZE+BITPOS is too small for MODE. */
1972
1973 static rtx
1974 mask_rtx (enum machine_mode mode, int bitpos, int bitsize, int complement)
1975 {
1976 double_int mask;
1977
1978 mask = double_int_mask (bitsize);
1979 mask = double_int_lshift (mask, bitpos, HOST_BITS_PER_DOUBLE_INT, false);
1980
1981 if (complement)
1982 mask = double_int_not (mask);
1983
1984 return immed_double_int_const (mask, mode);
1985 }
1986
1987 /* Return a constant integer (CONST_INT or CONST_DOUBLE) rtx with the value
1988 VALUE truncated to BITSIZE bits and then shifted left BITPOS bits. */
1989
1990 static rtx
1991 lshift_value (enum machine_mode mode, rtx value, int bitpos, int bitsize)
1992 {
1993 double_int val;
1994
1995 val = double_int_zext (uhwi_to_double_int (INTVAL (value)), bitsize);
1996 val = double_int_lshift (val, bitpos, HOST_BITS_PER_DOUBLE_INT, false);
1997
1998 return immed_double_int_const (val, mode);
1999 }
2000 \f
2001 /* Extract a bit field that is split across two words
2002 and return an RTX for the result.
2003
2004 OP0 is the REG, SUBREG or MEM rtx for the first of the two words.
2005 BITSIZE is the field width; BITPOS, position of its first bit, in the word.
2006 UNSIGNEDP is 1 if should zero-extend the contents; else sign-extend. */
2007
2008 static rtx
2009 extract_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
2010 unsigned HOST_WIDE_INT bitpos, int unsignedp)
2011 {
2012 unsigned int unit;
2013 unsigned int bitsdone = 0;
2014 rtx result = NULL_RTX;
2015 int first = 1;
2016
2017 /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
2018 much at a time. */
2019 if (REG_P (op0) || GET_CODE (op0) == SUBREG)
2020 unit = BITS_PER_WORD;
2021 else
2022 unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
2023
2024 while (bitsdone < bitsize)
2025 {
2026 unsigned HOST_WIDE_INT thissize;
2027 rtx part, word;
2028 unsigned HOST_WIDE_INT thispos;
2029 unsigned HOST_WIDE_INT offset;
2030
2031 offset = (bitpos + bitsdone) / unit;
2032 thispos = (bitpos + bitsdone) % unit;
2033
2034 /* THISSIZE must not overrun a word boundary. Otherwise,
2035 extract_fixed_bit_field will call us again, and we will mutually
2036 recurse forever. */
2037 thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
2038 thissize = MIN (thissize, unit - thispos);
2039
2040 /* If OP0 is a register, then handle OFFSET here.
2041
2042 When handling multiword bitfields, extract_bit_field may pass
2043 down a word_mode SUBREG of a larger REG for a bitfield that actually
2044 crosses a word boundary. Thus, for a SUBREG, we must find
2045 the current word starting from the base register. */
2046 if (GET_CODE (op0) == SUBREG)
2047 {
2048 int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset;
2049 word = operand_subword_force (SUBREG_REG (op0), word_offset,
2050 GET_MODE (SUBREG_REG (op0)));
2051 offset = 0;
2052 }
2053 else if (REG_P (op0))
2054 {
2055 word = operand_subword_force (op0, offset, GET_MODE (op0));
2056 offset = 0;
2057 }
2058 else
2059 word = op0;
2060
2061 /* Extract the parts in bit-counting order,
2062 whose meaning is determined by BYTES_PER_UNIT.
2063 OFFSET is in UNITs, and UNIT is in bits.
2064 extract_fixed_bit_field wants offset in bytes. */
2065 part = extract_fixed_bit_field (word_mode, word,
2066 offset * unit / BITS_PER_UNIT,
2067 thissize, thispos, 0, 1, false);
2068 bitsdone += thissize;
2069
2070 /* Shift this part into place for the result. */
2071 if (BYTES_BIG_ENDIAN)
2072 {
2073 if (bitsize != bitsdone)
2074 part = expand_shift (LSHIFT_EXPR, word_mode, part,
2075 bitsize - bitsdone, 0, 1);
2076 }
2077 else
2078 {
2079 if (bitsdone != thissize)
2080 part = expand_shift (LSHIFT_EXPR, word_mode, part,
2081 bitsdone - thissize, 0, 1);
2082 }
2083
2084 if (first)
2085 result = part;
2086 else
2087 /* Combine the parts with bitwise or. This works
2088 because we extracted each part as an unsigned bit field. */
2089 result = expand_binop (word_mode, ior_optab, part, result, NULL_RTX, 1,
2090 OPTAB_LIB_WIDEN);
2091
2092 first = 0;
2093 }
2094
2095 /* Unsigned bit field: we are done. */
2096 if (unsignedp)
2097 return result;
2098 /* Signed bit field: sign-extend with two arithmetic shifts. */
2099 result = expand_shift (LSHIFT_EXPR, word_mode, result,
2100 BITS_PER_WORD - bitsize, NULL_RTX, 0);
2101 return expand_shift (RSHIFT_EXPR, word_mode, result,
2102 BITS_PER_WORD - bitsize, NULL_RTX, 0);
2103 }
2104 \f
2105 /* Try to read the low bits of SRC as an rvalue of mode MODE, preserving
2106 the bit pattern. SRC_MODE is the mode of SRC; if this is smaller than
2107 MODE, fill the upper bits with zeros. Fail if the layout of either
2108 mode is unknown (as for CC modes) or if the extraction would involve
2109 unprofitable mode punning. Return the value on success, otherwise
2110 return null.
2111
2112 This is different from gen_lowpart* in these respects:
2113
2114 - the returned value must always be considered an rvalue
2115
2116 - when MODE is wider than SRC_MODE, the extraction involves
2117 a zero extension
2118
2119 - when MODE is smaller than SRC_MODE, the extraction involves
2120 a truncation (and is thus subject to TRULY_NOOP_TRUNCATION).
2121
2122 In other words, this routine performs a computation, whereas the
2123 gen_lowpart* routines are conceptually lvalue or rvalue subreg
2124 operations. */
2125
2126 rtx
2127 extract_low_bits (enum machine_mode mode, enum machine_mode src_mode, rtx src)
2128 {
2129 enum machine_mode int_mode, src_int_mode;
2130
2131 if (mode == src_mode)
2132 return src;
2133
2134 if (CONSTANT_P (src))
2135 {
2136 /* simplify_gen_subreg can't be used here, as if simplify_subreg
2137 fails, it will happily create (subreg (symbol_ref)) or similar
2138 invalid SUBREGs. */
2139 unsigned int byte = subreg_lowpart_offset (mode, src_mode);
2140 rtx ret = simplify_subreg (mode, src, src_mode, byte);
2141 if (ret)
2142 return ret;
2143
2144 if (GET_MODE (src) == VOIDmode
2145 || !validate_subreg (mode, src_mode, src, byte))
2146 return NULL_RTX;
2147
2148 src = force_reg (GET_MODE (src), src);
2149 return gen_rtx_SUBREG (mode, src, byte);
2150 }
2151
2152 if (GET_MODE_CLASS (mode) == MODE_CC || GET_MODE_CLASS (src_mode) == MODE_CC)
2153 return NULL_RTX;
2154
2155 if (GET_MODE_BITSIZE (mode) == GET_MODE_BITSIZE (src_mode)
2156 && MODES_TIEABLE_P (mode, src_mode))
2157 {
2158 rtx x = gen_lowpart_common (mode, src);
2159 if (x)
2160 return x;
2161 }
2162
2163 src_int_mode = int_mode_for_mode (src_mode);
2164 int_mode = int_mode_for_mode (mode);
2165 if (src_int_mode == BLKmode || int_mode == BLKmode)
2166 return NULL_RTX;
2167
2168 if (!MODES_TIEABLE_P (src_int_mode, src_mode))
2169 return NULL_RTX;
2170 if (!MODES_TIEABLE_P (int_mode, mode))
2171 return NULL_RTX;
2172
2173 src = gen_lowpart (src_int_mode, src);
2174 src = convert_modes (int_mode, src_int_mode, src, true);
2175 src = gen_lowpart (mode, src);
2176 return src;
2177 }
2178 \f
2179 /* Add INC into TARGET. */
2180
2181 void
2182 expand_inc (rtx target, rtx inc)
2183 {
2184 rtx value = expand_binop (GET_MODE (target), add_optab,
2185 target, inc,
2186 target, 0, OPTAB_LIB_WIDEN);
2187 if (value != target)
2188 emit_move_insn (target, value);
2189 }
2190
2191 /* Subtract DEC from TARGET. */
2192
2193 void
2194 expand_dec (rtx target, rtx dec)
2195 {
2196 rtx value = expand_binop (GET_MODE (target), sub_optab,
2197 target, dec,
2198 target, 0, OPTAB_LIB_WIDEN);
2199 if (value != target)
2200 emit_move_insn (target, value);
2201 }
2202 \f
2203 /* Output a shift instruction for expression code CODE,
2204 with SHIFTED being the rtx for the value to shift,
2205 and AMOUNT the rtx for the amount to shift by.
2206 Store the result in the rtx TARGET, if that is convenient.
2207 If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2208 Return the rtx for where the value is. */
2209
2210 static rtx
2211 expand_shift_1 (enum tree_code code, enum machine_mode mode, rtx shifted,
2212 rtx amount, rtx target, int unsignedp)
2213 {
2214 rtx op1, temp = 0;
2215 int left = (code == LSHIFT_EXPR || code == LROTATE_EXPR);
2216 int rotate = (code == LROTATE_EXPR || code == RROTATE_EXPR);
2217 optab lshift_optab = ashl_optab;
2218 optab rshift_arith_optab = ashr_optab;
2219 optab rshift_uns_optab = lshr_optab;
2220 optab lrotate_optab = rotl_optab;
2221 optab rrotate_optab = rotr_optab;
2222 enum machine_mode op1_mode;
2223 int attempt;
2224 bool speed = optimize_insn_for_speed_p ();
2225
2226 op1 = amount;
2227 op1_mode = GET_MODE (op1);
2228
2229 /* Determine whether the shift/rotate amount is a vector, or scalar. If the
2230 shift amount is a vector, use the vector/vector shift patterns. */
2231 if (VECTOR_MODE_P (mode) && VECTOR_MODE_P (op1_mode))
2232 {
2233 lshift_optab = vashl_optab;
2234 rshift_arith_optab = vashr_optab;
2235 rshift_uns_optab = vlshr_optab;
2236 lrotate_optab = vrotl_optab;
2237 rrotate_optab = vrotr_optab;
2238 }
2239
2240 /* Previously detected shift-counts computed by NEGATE_EXPR
2241 and shifted in the other direction; but that does not work
2242 on all machines. */
2243
2244 if (SHIFT_COUNT_TRUNCATED)
2245 {
2246 if (CONST_INT_P (op1)
2247 && ((unsigned HOST_WIDE_INT) INTVAL (op1) >=
2248 (unsigned HOST_WIDE_INT) GET_MODE_BITSIZE (mode)))
2249 op1 = GEN_INT ((unsigned HOST_WIDE_INT) INTVAL (op1)
2250 % GET_MODE_BITSIZE (mode));
2251 else if (GET_CODE (op1) == SUBREG
2252 && subreg_lowpart_p (op1)
2253 && INTEGRAL_MODE_P (GET_MODE (SUBREG_REG (op1))))
2254 op1 = SUBREG_REG (op1);
2255 }
2256
2257 if (op1 == const0_rtx)
2258 return shifted;
2259
2260 /* Check whether its cheaper to implement a left shift by a constant
2261 bit count by a sequence of additions. */
2262 if (code == LSHIFT_EXPR
2263 && CONST_INT_P (op1)
2264 && INTVAL (op1) > 0
2265 && INTVAL (op1) < GET_MODE_PRECISION (mode)
2266 && INTVAL (op1) < MAX_BITS_PER_WORD
2267 && (shift_cost (speed, mode, INTVAL (op1))
2268 > INTVAL (op1) * add_cost (speed, mode))
2269 && shift_cost (speed, mode, INTVAL (op1)) != MAX_COST)
2270 {
2271 int i;
2272 for (i = 0; i < INTVAL (op1); i++)
2273 {
2274 temp = force_reg (mode, shifted);
2275 shifted = expand_binop (mode, add_optab, temp, temp, NULL_RTX,
2276 unsignedp, OPTAB_LIB_WIDEN);
2277 }
2278 return shifted;
2279 }
2280
2281 for (attempt = 0; temp == 0 && attempt < 3; attempt++)
2282 {
2283 enum optab_methods methods;
2284
2285 if (attempt == 0)
2286 methods = OPTAB_DIRECT;
2287 else if (attempt == 1)
2288 methods = OPTAB_WIDEN;
2289 else
2290 methods = OPTAB_LIB_WIDEN;
2291
2292 if (rotate)
2293 {
2294 /* Widening does not work for rotation. */
2295 if (methods == OPTAB_WIDEN)
2296 continue;
2297 else if (methods == OPTAB_LIB_WIDEN)
2298 {
2299 /* If we have been unable to open-code this by a rotation,
2300 do it as the IOR of two shifts. I.e., to rotate A
2301 by N bits, compute (A << N) | ((unsigned) A >> (C - N))
2302 where C is the bitsize of A.
2303
2304 It is theoretically possible that the target machine might
2305 not be able to perform either shift and hence we would
2306 be making two libcalls rather than just the one for the
2307 shift (similarly if IOR could not be done). We will allow
2308 this extremely unlikely lossage to avoid complicating the
2309 code below. */
2310
2311 rtx subtarget = target == shifted ? 0 : target;
2312 rtx new_amount, other_amount;
2313 rtx temp1;
2314
2315 new_amount = op1;
2316 if (CONST_INT_P (op1))
2317 other_amount = GEN_INT (GET_MODE_BITSIZE (mode)
2318 - INTVAL (op1));
2319 else
2320 other_amount
2321 = simplify_gen_binary (MINUS, GET_MODE (op1),
2322 GEN_INT (GET_MODE_PRECISION (mode)),
2323 op1);
2324
2325 shifted = force_reg (mode, shifted);
2326
2327 temp = expand_shift_1 (left ? LSHIFT_EXPR : RSHIFT_EXPR,
2328 mode, shifted, new_amount, 0, 1);
2329 temp1 = expand_shift_1 (left ? RSHIFT_EXPR : LSHIFT_EXPR,
2330 mode, shifted, other_amount,
2331 subtarget, 1);
2332 return expand_binop (mode, ior_optab, temp, temp1, target,
2333 unsignedp, methods);
2334 }
2335
2336 temp = expand_binop (mode,
2337 left ? lrotate_optab : rrotate_optab,
2338 shifted, op1, target, unsignedp, methods);
2339 }
2340 else if (unsignedp)
2341 temp = expand_binop (mode,
2342 left ? lshift_optab : rshift_uns_optab,
2343 shifted, op1, target, unsignedp, methods);
2344
2345 /* Do arithmetic shifts.
2346 Also, if we are going to widen the operand, we can just as well
2347 use an arithmetic right-shift instead of a logical one. */
2348 if (temp == 0 && ! rotate
2349 && (! unsignedp || (! left && methods == OPTAB_WIDEN)))
2350 {
2351 enum optab_methods methods1 = methods;
2352
2353 /* If trying to widen a log shift to an arithmetic shift,
2354 don't accept an arithmetic shift of the same size. */
2355 if (unsignedp)
2356 methods1 = OPTAB_MUST_WIDEN;
2357
2358 /* Arithmetic shift */
2359
2360 temp = expand_binop (mode,
2361 left ? lshift_optab : rshift_arith_optab,
2362 shifted, op1, target, unsignedp, methods1);
2363 }
2364
2365 /* We used to try extzv here for logical right shifts, but that was
2366 only useful for one machine, the VAX, and caused poor code
2367 generation there for lshrdi3, so the code was deleted and a
2368 define_expand for lshrsi3 was added to vax.md. */
2369 }
2370
2371 gcc_assert (temp);
2372 return temp;
2373 }
2374
2375 /* Output a shift instruction for expression code CODE,
2376 with SHIFTED being the rtx for the value to shift,
2377 and AMOUNT the amount to shift by.
2378 Store the result in the rtx TARGET, if that is convenient.
2379 If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2380 Return the rtx for where the value is. */
2381
2382 rtx
2383 expand_shift (enum tree_code code, enum machine_mode mode, rtx shifted,
2384 int amount, rtx target, int unsignedp)
2385 {
2386 return expand_shift_1 (code, mode,
2387 shifted, GEN_INT (amount), target, unsignedp);
2388 }
2389
2390 /* Output a shift instruction for expression code CODE,
2391 with SHIFTED being the rtx for the value to shift,
2392 and AMOUNT the tree for the amount to shift by.
2393 Store the result in the rtx TARGET, if that is convenient.
2394 If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2395 Return the rtx for where the value is. */
2396
2397 rtx
2398 expand_variable_shift (enum tree_code code, enum machine_mode mode, rtx shifted,
2399 tree amount, rtx target, int unsignedp)
2400 {
2401 return expand_shift_1 (code, mode,
2402 shifted, expand_normal (amount), target, unsignedp);
2403 }
2404
2405 \f
2406 /* Indicates the type of fixup needed after a constant multiplication.
2407 BASIC_VARIANT means no fixup is needed, NEGATE_VARIANT means that
2408 the result should be negated, and ADD_VARIANT means that the
2409 multiplicand should be added to the result. */
2410 enum mult_variant {basic_variant, negate_variant, add_variant};
2411
2412 static void synth_mult (struct algorithm *, unsigned HOST_WIDE_INT,
2413 const struct mult_cost *, enum machine_mode mode);
2414 static bool choose_mult_variant (enum machine_mode, HOST_WIDE_INT,
2415 struct algorithm *, enum mult_variant *, int);
2416 static rtx expand_mult_const (enum machine_mode, rtx, HOST_WIDE_INT, rtx,
2417 const struct algorithm *, enum mult_variant);
2418 static unsigned HOST_WIDE_INT invert_mod2n (unsigned HOST_WIDE_INT, int);
2419 static rtx extract_high_half (enum machine_mode, rtx);
2420 static rtx expmed_mult_highpart (enum machine_mode, rtx, rtx, rtx, int, int);
2421 static rtx expmed_mult_highpart_optab (enum machine_mode, rtx, rtx, rtx,
2422 int, int);
2423 /* Compute and return the best algorithm for multiplying by T.
2424 The algorithm must cost less than cost_limit
2425 If retval.cost >= COST_LIMIT, no algorithm was found and all
2426 other field of the returned struct are undefined.
2427 MODE is the machine mode of the multiplication. */
2428
2429 static void
2430 synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
2431 const struct mult_cost *cost_limit, enum machine_mode mode)
2432 {
2433 int m;
2434 struct algorithm *alg_in, *best_alg;
2435 struct mult_cost best_cost;
2436 struct mult_cost new_limit;
2437 int op_cost, op_latency;
2438 unsigned HOST_WIDE_INT orig_t = t;
2439 unsigned HOST_WIDE_INT q;
2440 int maxm, hash_index;
2441 bool cache_hit = false;
2442 enum alg_code cache_alg = alg_zero;
2443 bool speed = optimize_insn_for_speed_p ();
2444 enum machine_mode imode;
2445 struct alg_hash_entry *entry_ptr;
2446
2447 /* Indicate that no algorithm is yet found. If no algorithm
2448 is found, this value will be returned and indicate failure. */
2449 alg_out->cost.cost = cost_limit->cost + 1;
2450 alg_out->cost.latency = cost_limit->latency + 1;
2451
2452 if (cost_limit->cost < 0
2453 || (cost_limit->cost == 0 && cost_limit->latency <= 0))
2454 return;
2455
2456 /* Be prepared for vector modes. */
2457 imode = GET_MODE_INNER (mode);
2458 if (imode == VOIDmode)
2459 imode = mode;
2460
2461 maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (imode));
2462
2463 /* Restrict the bits of "t" to the multiplication's mode. */
2464 t &= GET_MODE_MASK (imode);
2465
2466 /* t == 1 can be done in zero cost. */
2467 if (t == 1)
2468 {
2469 alg_out->ops = 1;
2470 alg_out->cost.cost = 0;
2471 alg_out->cost.latency = 0;
2472 alg_out->op[0] = alg_m;
2473 return;
2474 }
2475
2476 /* t == 0 sometimes has a cost. If it does and it exceeds our limit,
2477 fail now. */
2478 if (t == 0)
2479 {
2480 if (MULT_COST_LESS (cost_limit, zero_cost (speed)))
2481 return;
2482 else
2483 {
2484 alg_out->ops = 1;
2485 alg_out->cost.cost = zero_cost (speed);
2486 alg_out->cost.latency = zero_cost (speed);
2487 alg_out->op[0] = alg_zero;
2488 return;
2489 }
2490 }
2491
2492 /* We'll be needing a couple extra algorithm structures now. */
2493
2494 alg_in = XALLOCA (struct algorithm);
2495 best_alg = XALLOCA (struct algorithm);
2496 best_cost = *cost_limit;
2497
2498 /* Compute the hash index. */
2499 hash_index = (t ^ (unsigned int) mode ^ (speed * 256)) % NUM_ALG_HASH_ENTRIES;
2500
2501 /* See if we already know what to do for T. */
2502 entry_ptr = alg_hash_entry_ptr (hash_index);
2503 if (entry_ptr->t == t
2504 && entry_ptr->mode == mode
2505 && entry_ptr->mode == mode
2506 && entry_ptr->speed == speed
2507 && entry_ptr->alg != alg_unknown)
2508 {
2509 cache_alg = entry_ptr->alg;
2510
2511 if (cache_alg == alg_impossible)
2512 {
2513 /* The cache tells us that it's impossible to synthesize
2514 multiplication by T within entry_ptr->cost. */
2515 if (!CHEAPER_MULT_COST (&entry_ptr->cost, cost_limit))
2516 /* COST_LIMIT is at least as restrictive as the one
2517 recorded in the hash table, in which case we have no
2518 hope of synthesizing a multiplication. Just
2519 return. */
2520 return;
2521
2522 /* If we get here, COST_LIMIT is less restrictive than the
2523 one recorded in the hash table, so we may be able to
2524 synthesize a multiplication. Proceed as if we didn't
2525 have the cache entry. */
2526 }
2527 else
2528 {
2529 if (CHEAPER_MULT_COST (cost_limit, &entry_ptr->cost))
2530 /* The cached algorithm shows that this multiplication
2531 requires more cost than COST_LIMIT. Just return. This
2532 way, we don't clobber this cache entry with
2533 alg_impossible but retain useful information. */
2534 return;
2535
2536 cache_hit = true;
2537
2538 switch (cache_alg)
2539 {
2540 case alg_shift:
2541 goto do_alg_shift;
2542
2543 case alg_add_t_m2:
2544 case alg_sub_t_m2:
2545 goto do_alg_addsub_t_m2;
2546
2547 case alg_add_factor:
2548 case alg_sub_factor:
2549 goto do_alg_addsub_factor;
2550
2551 case alg_add_t2_m:
2552 goto do_alg_add_t2_m;
2553
2554 case alg_sub_t2_m:
2555 goto do_alg_sub_t2_m;
2556
2557 default:
2558 gcc_unreachable ();
2559 }
2560 }
2561 }
2562
2563 /* If we have a group of zero bits at the low-order part of T, try
2564 multiplying by the remaining bits and then doing a shift. */
2565
2566 if ((t & 1) == 0)
2567 {
2568 do_alg_shift:
2569 m = floor_log2 (t & -t); /* m = number of low zero bits */
2570 if (m < maxm)
2571 {
2572 q = t >> m;
2573 /* The function expand_shift will choose between a shift and
2574 a sequence of additions, so the observed cost is given as
2575 MIN (m * add_cost(speed, mode), shift_cost(speed, mode, m)). */
2576 op_cost = m * add_cost (speed, mode);
2577 if (shift_cost (speed, mode, m) < op_cost)
2578 op_cost = shift_cost (speed, mode, m);
2579 new_limit.cost = best_cost.cost - op_cost;
2580 new_limit.latency = best_cost.latency - op_cost;
2581 synth_mult (alg_in, q, &new_limit, mode);
2582
2583 alg_in->cost.cost += op_cost;
2584 alg_in->cost.latency += op_cost;
2585 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2586 {
2587 struct algorithm *x;
2588 best_cost = alg_in->cost;
2589 x = alg_in, alg_in = best_alg, best_alg = x;
2590 best_alg->log[best_alg->ops] = m;
2591 best_alg->op[best_alg->ops] = alg_shift;
2592 }
2593
2594 /* See if treating ORIG_T as a signed number yields a better
2595 sequence. Try this sequence only for a negative ORIG_T
2596 as it would be useless for a non-negative ORIG_T. */
2597 if ((HOST_WIDE_INT) orig_t < 0)
2598 {
2599 /* Shift ORIG_T as follows because a right shift of a
2600 negative-valued signed type is implementation
2601 defined. */
2602 q = ~(~orig_t >> m);
2603 /* The function expand_shift will choose between a shift
2604 and a sequence of additions, so the observed cost is
2605 given as MIN (m * add_cost(speed, mode),
2606 shift_cost(speed, mode, m)). */
2607 op_cost = m * add_cost (speed, mode);
2608 if (shift_cost (speed, mode, m) < op_cost)
2609 op_cost = shift_cost (speed, mode, m);
2610 new_limit.cost = best_cost.cost - op_cost;
2611 new_limit.latency = best_cost.latency - op_cost;
2612 synth_mult (alg_in, q, &new_limit, mode);
2613
2614 alg_in->cost.cost += op_cost;
2615 alg_in->cost.latency += op_cost;
2616 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2617 {
2618 struct algorithm *x;
2619 best_cost = alg_in->cost;
2620 x = alg_in, alg_in = best_alg, best_alg = x;
2621 best_alg->log[best_alg->ops] = m;
2622 best_alg->op[best_alg->ops] = alg_shift;
2623 }
2624 }
2625 }
2626 if (cache_hit)
2627 goto done;
2628 }
2629
2630 /* If we have an odd number, add or subtract one. */
2631 if ((t & 1) != 0)
2632 {
2633 unsigned HOST_WIDE_INT w;
2634
2635 do_alg_addsub_t_m2:
2636 for (w = 1; (w & t) != 0; w <<= 1)
2637 ;
2638 /* If T was -1, then W will be zero after the loop. This is another
2639 case where T ends with ...111. Handling this with (T + 1) and
2640 subtract 1 produces slightly better code and results in algorithm
2641 selection much faster than treating it like the ...0111 case
2642 below. */
2643 if (w == 0
2644 || (w > 2
2645 /* Reject the case where t is 3.
2646 Thus we prefer addition in that case. */
2647 && t != 3))
2648 {
2649 /* T ends with ...111. Multiply by (T + 1) and subtract 1. */
2650
2651 op_cost = add_cost (speed, mode);
2652 new_limit.cost = best_cost.cost - op_cost;
2653 new_limit.latency = best_cost.latency - op_cost;
2654 synth_mult (alg_in, t + 1, &new_limit, mode);
2655
2656 alg_in->cost.cost += op_cost;
2657 alg_in->cost.latency += op_cost;
2658 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2659 {
2660 struct algorithm *x;
2661 best_cost = alg_in->cost;
2662 x = alg_in, alg_in = best_alg, best_alg = x;
2663 best_alg->log[best_alg->ops] = 0;
2664 best_alg->op[best_alg->ops] = alg_sub_t_m2;
2665 }
2666 }
2667 else
2668 {
2669 /* T ends with ...01 or ...011. Multiply by (T - 1) and add 1. */
2670
2671 op_cost = add_cost (speed, mode);
2672 new_limit.cost = best_cost.cost - op_cost;
2673 new_limit.latency = best_cost.latency - op_cost;
2674 synth_mult (alg_in, t - 1, &new_limit, mode);
2675
2676 alg_in->cost.cost += op_cost;
2677 alg_in->cost.latency += op_cost;
2678 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2679 {
2680 struct algorithm *x;
2681 best_cost = alg_in->cost;
2682 x = alg_in, alg_in = best_alg, best_alg = x;
2683 best_alg->log[best_alg->ops] = 0;
2684 best_alg->op[best_alg->ops] = alg_add_t_m2;
2685 }
2686 }
2687
2688 /* We may be able to calculate a * -7, a * -15, a * -31, etc
2689 quickly with a - a * n for some appropriate constant n. */
2690 m = exact_log2 (-orig_t + 1);
2691 if (m >= 0 && m < maxm)
2692 {
2693 op_cost = shiftsub1_cost (speed, mode, m);
2694 new_limit.cost = best_cost.cost - op_cost;
2695 new_limit.latency = best_cost.latency - op_cost;
2696 synth_mult (alg_in, (unsigned HOST_WIDE_INT) (-orig_t + 1) >> m,
2697 &new_limit, mode);
2698
2699 alg_in->cost.cost += op_cost;
2700 alg_in->cost.latency += op_cost;
2701 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2702 {
2703 struct algorithm *x;
2704 best_cost = alg_in->cost;
2705 x = alg_in, alg_in = best_alg, best_alg = x;
2706 best_alg->log[best_alg->ops] = m;
2707 best_alg->op[best_alg->ops] = alg_sub_t_m2;
2708 }
2709 }
2710
2711 if (cache_hit)
2712 goto done;
2713 }
2714
2715 /* Look for factors of t of the form
2716 t = q(2**m +- 1), 2 <= m <= floor(log2(t - 1)).
2717 If we find such a factor, we can multiply by t using an algorithm that
2718 multiplies by q, shift the result by m and add/subtract it to itself.
2719
2720 We search for large factors first and loop down, even if large factors
2721 are less probable than small; if we find a large factor we will find a
2722 good sequence quickly, and therefore be able to prune (by decreasing
2723 COST_LIMIT) the search. */
2724
2725 do_alg_addsub_factor:
2726 for (m = floor_log2 (t - 1); m >= 2; m--)
2727 {
2728 unsigned HOST_WIDE_INT d;
2729
2730 d = ((unsigned HOST_WIDE_INT) 1 << m) + 1;
2731 if (t % d == 0 && t > d && m < maxm
2732 && (!cache_hit || cache_alg == alg_add_factor))
2733 {
2734 /* If the target has a cheap shift-and-add instruction use
2735 that in preference to a shift insn followed by an add insn.
2736 Assume that the shift-and-add is "atomic" with a latency
2737 equal to its cost, otherwise assume that on superscalar
2738 hardware the shift may be executed concurrently with the
2739 earlier steps in the algorithm. */
2740 op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
2741 if (shiftadd_cost (speed, mode, m) < op_cost)
2742 {
2743 op_cost = shiftadd_cost (speed, mode, m);
2744 op_latency = op_cost;
2745 }
2746 else
2747 op_latency = add_cost (speed, mode);
2748
2749 new_limit.cost = best_cost.cost - op_cost;
2750 new_limit.latency = best_cost.latency - op_latency;
2751 synth_mult (alg_in, t / d, &new_limit, mode);
2752
2753 alg_in->cost.cost += op_cost;
2754 alg_in->cost.latency += op_latency;
2755 if (alg_in->cost.latency < op_cost)
2756 alg_in->cost.latency = op_cost;
2757 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2758 {
2759 struct algorithm *x;
2760 best_cost = alg_in->cost;
2761 x = alg_in, alg_in = best_alg, best_alg = x;
2762 best_alg->log[best_alg->ops] = m;
2763 best_alg->op[best_alg->ops] = alg_add_factor;
2764 }
2765 /* Other factors will have been taken care of in the recursion. */
2766 break;
2767 }
2768
2769 d = ((unsigned HOST_WIDE_INT) 1 << m) - 1;
2770 if (t % d == 0 && t > d && m < maxm
2771 && (!cache_hit || cache_alg == alg_sub_factor))
2772 {
2773 /* If the target has a cheap shift-and-subtract insn use
2774 that in preference to a shift insn followed by a sub insn.
2775 Assume that the shift-and-sub is "atomic" with a latency
2776 equal to it's cost, otherwise assume that on superscalar
2777 hardware the shift may be executed concurrently with the
2778 earlier steps in the algorithm. */
2779 op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
2780 if (shiftsub0_cost (speed, mode, m) < op_cost)
2781 {
2782 op_cost = shiftsub0_cost (speed, mode, m);
2783 op_latency = op_cost;
2784 }
2785 else
2786 op_latency = add_cost (speed, mode);
2787
2788 new_limit.cost = best_cost.cost - op_cost;
2789 new_limit.latency = best_cost.latency - op_latency;
2790 synth_mult (alg_in, t / d, &new_limit, mode);
2791
2792 alg_in->cost.cost += op_cost;
2793 alg_in->cost.latency += op_latency;
2794 if (alg_in->cost.latency < op_cost)
2795 alg_in->cost.latency = op_cost;
2796 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2797 {
2798 struct algorithm *x;
2799 best_cost = alg_in->cost;
2800 x = alg_in, alg_in = best_alg, best_alg = x;
2801 best_alg->log[best_alg->ops] = m;
2802 best_alg->op[best_alg->ops] = alg_sub_factor;
2803 }
2804 break;
2805 }
2806 }
2807 if (cache_hit)
2808 goto done;
2809
2810 /* Try shift-and-add (load effective address) instructions,
2811 i.e. do a*3, a*5, a*9. */
2812 if ((t & 1) != 0)
2813 {
2814 do_alg_add_t2_m:
2815 q = t - 1;
2816 q = q & -q;
2817 m = exact_log2 (q);
2818 if (m >= 0 && m < maxm)
2819 {
2820 op_cost = shiftadd_cost (speed, mode, m);
2821 new_limit.cost = best_cost.cost - op_cost;
2822 new_limit.latency = best_cost.latency - op_cost;
2823 synth_mult (alg_in, (t - 1) >> m, &new_limit, mode);
2824
2825 alg_in->cost.cost += op_cost;
2826 alg_in->cost.latency += op_cost;
2827 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2828 {
2829 struct algorithm *x;
2830 best_cost = alg_in->cost;
2831 x = alg_in, alg_in = best_alg, best_alg = x;
2832 best_alg->log[best_alg->ops] = m;
2833 best_alg->op[best_alg->ops] = alg_add_t2_m;
2834 }
2835 }
2836 if (cache_hit)
2837 goto done;
2838
2839 do_alg_sub_t2_m:
2840 q = t + 1;
2841 q = q & -q;
2842 m = exact_log2 (q);
2843 if (m >= 0 && m < maxm)
2844 {
2845 op_cost = shiftsub0_cost (speed, mode, m);
2846 new_limit.cost = best_cost.cost - op_cost;
2847 new_limit.latency = best_cost.latency - op_cost;
2848 synth_mult (alg_in, (t + 1) >> m, &new_limit, mode);
2849
2850 alg_in->cost.cost += op_cost;
2851 alg_in->cost.latency += op_cost;
2852 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2853 {
2854 struct algorithm *x;
2855 best_cost = alg_in->cost;
2856 x = alg_in, alg_in = best_alg, best_alg = x;
2857 best_alg->log[best_alg->ops] = m;
2858 best_alg->op[best_alg->ops] = alg_sub_t2_m;
2859 }
2860 }
2861 if (cache_hit)
2862 goto done;
2863 }
2864
2865 done:
2866 /* If best_cost has not decreased, we have not found any algorithm. */
2867 if (!CHEAPER_MULT_COST (&best_cost, cost_limit))
2868 {
2869 /* We failed to find an algorithm. Record alg_impossible for
2870 this case (that is, <T, MODE, COST_LIMIT>) so that next time
2871 we are asked to find an algorithm for T within the same or
2872 lower COST_LIMIT, we can immediately return to the
2873 caller. */
2874 entry_ptr->t = t;
2875 entry_ptr->mode = mode;
2876 entry_ptr->speed = speed;
2877 entry_ptr->alg = alg_impossible;
2878 entry_ptr->cost = *cost_limit;
2879 return;
2880 }
2881
2882 /* Cache the result. */
2883 if (!cache_hit)
2884 {
2885 entry_ptr->t = t;
2886 entry_ptr->mode = mode;
2887 entry_ptr->speed = speed;
2888 entry_ptr->alg = best_alg->op[best_alg->ops];
2889 entry_ptr->cost.cost = best_cost.cost;
2890 entry_ptr->cost.latency = best_cost.latency;
2891 }
2892
2893 /* If we are getting a too long sequence for `struct algorithm'
2894 to record, make this search fail. */
2895 if (best_alg->ops == MAX_BITS_PER_WORD)
2896 return;
2897
2898 /* Copy the algorithm from temporary space to the space at alg_out.
2899 We avoid using structure assignment because the majority of
2900 best_alg is normally undefined, and this is a critical function. */
2901 alg_out->ops = best_alg->ops + 1;
2902 alg_out->cost = best_cost;
2903 memcpy (alg_out->op, best_alg->op,
2904 alg_out->ops * sizeof *alg_out->op);
2905 memcpy (alg_out->log, best_alg->log,
2906 alg_out->ops * sizeof *alg_out->log);
2907 }
2908 \f
2909 /* Find the cheapest way of multiplying a value of mode MODE by VAL.
2910 Try three variations:
2911
2912 - a shift/add sequence based on VAL itself
2913 - a shift/add sequence based on -VAL, followed by a negation
2914 - a shift/add sequence based on VAL - 1, followed by an addition.
2915
2916 Return true if the cheapest of these cost less than MULT_COST,
2917 describing the algorithm in *ALG and final fixup in *VARIANT. */
2918
2919 static bool
2920 choose_mult_variant (enum machine_mode mode, HOST_WIDE_INT val,
2921 struct algorithm *alg, enum mult_variant *variant,
2922 int mult_cost)
2923 {
2924 struct algorithm alg2;
2925 struct mult_cost limit;
2926 int op_cost;
2927 bool speed = optimize_insn_for_speed_p ();
2928
2929 /* Fail quickly for impossible bounds. */
2930 if (mult_cost < 0)
2931 return false;
2932
2933 /* Ensure that mult_cost provides a reasonable upper bound.
2934 Any constant multiplication can be performed with less
2935 than 2 * bits additions. */
2936 op_cost = 2 * GET_MODE_UNIT_BITSIZE (mode) * add_cost (speed, mode);
2937 if (mult_cost > op_cost)
2938 mult_cost = op_cost;
2939
2940 *variant = basic_variant;
2941 limit.cost = mult_cost;
2942 limit.latency = mult_cost;
2943 synth_mult (alg, val, &limit, mode);
2944
2945 /* This works only if the inverted value actually fits in an
2946 `unsigned int' */
2947 if (HOST_BITS_PER_INT >= GET_MODE_UNIT_BITSIZE (mode))
2948 {
2949 op_cost = neg_cost(speed, mode);
2950 if (MULT_COST_LESS (&alg->cost, mult_cost))
2951 {
2952 limit.cost = alg->cost.cost - op_cost;
2953 limit.latency = alg->cost.latency - op_cost;
2954 }
2955 else
2956 {
2957 limit.cost = mult_cost - op_cost;
2958 limit.latency = mult_cost - op_cost;
2959 }
2960
2961 synth_mult (&alg2, -val, &limit, mode);
2962 alg2.cost.cost += op_cost;
2963 alg2.cost.latency += op_cost;
2964 if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
2965 *alg = alg2, *variant = negate_variant;
2966 }
2967
2968 /* This proves very useful for division-by-constant. */
2969 op_cost = add_cost (speed, mode);
2970 if (MULT_COST_LESS (&alg->cost, mult_cost))
2971 {
2972 limit.cost = alg->cost.cost - op_cost;
2973 limit.latency = alg->cost.latency - op_cost;
2974 }
2975 else
2976 {
2977 limit.cost = mult_cost - op_cost;
2978 limit.latency = mult_cost - op_cost;
2979 }
2980
2981 synth_mult (&alg2, val - 1, &limit, mode);
2982 alg2.cost.cost += op_cost;
2983 alg2.cost.latency += op_cost;
2984 if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
2985 *alg = alg2, *variant = add_variant;
2986
2987 return MULT_COST_LESS (&alg->cost, mult_cost);
2988 }
2989
2990 /* A subroutine of expand_mult, used for constant multiplications.
2991 Multiply OP0 by VAL in mode MODE, storing the result in TARGET if
2992 convenient. Use the shift/add sequence described by ALG and apply
2993 the final fixup specified by VARIANT. */
2994
2995 static rtx
2996 expand_mult_const (enum machine_mode mode, rtx op0, HOST_WIDE_INT val,
2997 rtx target, const struct algorithm *alg,
2998 enum mult_variant variant)
2999 {
3000 HOST_WIDE_INT val_so_far;
3001 rtx insn, accum, tem;
3002 int opno;
3003 enum machine_mode nmode;
3004
3005 /* Avoid referencing memory over and over and invalid sharing
3006 on SUBREGs. */
3007 op0 = force_reg (mode, op0);
3008
3009 /* ACCUM starts out either as OP0 or as a zero, depending on
3010 the first operation. */
3011
3012 if (alg->op[0] == alg_zero)
3013 {
3014 accum = copy_to_mode_reg (mode, CONST0_RTX (mode));
3015 val_so_far = 0;
3016 }
3017 else if (alg->op[0] == alg_m)
3018 {
3019 accum = copy_to_mode_reg (mode, op0);
3020 val_so_far = 1;
3021 }
3022 else
3023 gcc_unreachable ();
3024
3025 for (opno = 1; opno < alg->ops; opno++)
3026 {
3027 int log = alg->log[opno];
3028 rtx shift_subtarget = optimize ? 0 : accum;
3029 rtx add_target
3030 = (opno == alg->ops - 1 && target != 0 && variant != add_variant
3031 && !optimize)
3032 ? target : 0;
3033 rtx accum_target = optimize ? 0 : accum;
3034 rtx accum_inner;
3035
3036 switch (alg->op[opno])
3037 {
3038 case alg_shift:
3039 tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3040 /* REG_EQUAL note will be attached to the following insn. */
3041 emit_move_insn (accum, tem);
3042 val_so_far <<= log;
3043 break;
3044
3045 case alg_add_t_m2:
3046 tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
3047 accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
3048 add_target ? add_target : accum_target);
3049 val_so_far += (HOST_WIDE_INT) 1 << log;
3050 break;
3051
3052 case alg_sub_t_m2:
3053 tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
3054 accum = force_operand (gen_rtx_MINUS (mode, accum, tem),
3055 add_target ? add_target : accum_target);
3056 val_so_far -= (HOST_WIDE_INT) 1 << log;
3057 break;
3058
3059 case alg_add_t2_m:
3060 accum = expand_shift (LSHIFT_EXPR, mode, accum,
3061 log, shift_subtarget, 0);
3062 accum = force_operand (gen_rtx_PLUS (mode, accum, op0),
3063 add_target ? add_target : accum_target);
3064 val_so_far = (val_so_far << log) + 1;
3065 break;
3066
3067 case alg_sub_t2_m:
3068 accum = expand_shift (LSHIFT_EXPR, mode, accum,
3069 log, shift_subtarget, 0);
3070 accum = force_operand (gen_rtx_MINUS (mode, accum, op0),
3071 add_target ? add_target : accum_target);
3072 val_so_far = (val_so_far << log) - 1;
3073 break;
3074
3075 case alg_add_factor:
3076 tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3077 accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
3078 add_target ? add_target : accum_target);
3079 val_so_far += val_so_far << log;
3080 break;
3081
3082 case alg_sub_factor:
3083 tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3084 accum = force_operand (gen_rtx_MINUS (mode, tem, accum),
3085 (add_target
3086 ? add_target : (optimize ? 0 : tem)));
3087 val_so_far = (val_so_far << log) - val_so_far;
3088 break;
3089
3090 default:
3091 gcc_unreachable ();
3092 }
3093
3094 if (SCALAR_INT_MODE_P (mode))
3095 {
3096 /* Write a REG_EQUAL note on the last insn so that we can cse
3097 multiplication sequences. Note that if ACCUM is a SUBREG,
3098 we've set the inner register and must properly indicate that. */
3099 tem = op0, nmode = mode;
3100 accum_inner = accum;
3101 if (GET_CODE (accum) == SUBREG)
3102 {
3103 accum_inner = SUBREG_REG (accum);
3104 nmode = GET_MODE (accum_inner);
3105 tem = gen_lowpart (nmode, op0);
3106 }
3107
3108 insn = get_last_insn ();
3109 set_dst_reg_note (insn, REG_EQUAL,
3110 gen_rtx_MULT (nmode, tem, GEN_INT (val_so_far)),
3111 accum_inner);
3112 }
3113 }
3114
3115 if (variant == negate_variant)
3116 {
3117 val_so_far = -val_so_far;
3118 accum = expand_unop (mode, neg_optab, accum, target, 0);
3119 }
3120 else if (variant == add_variant)
3121 {
3122 val_so_far = val_so_far + 1;
3123 accum = force_operand (gen_rtx_PLUS (mode, accum, op0), target);
3124 }
3125
3126 /* Compare only the bits of val and val_so_far that are significant
3127 in the result mode, to avoid sign-/zero-extension confusion. */
3128 nmode = GET_MODE_INNER (mode);
3129 if (nmode == VOIDmode)
3130 nmode = mode;
3131 val &= GET_MODE_MASK (nmode);
3132 val_so_far &= GET_MODE_MASK (nmode);
3133 gcc_assert (val == val_so_far);
3134
3135 return accum;
3136 }
3137
3138 /* Perform a multiplication and return an rtx for the result.
3139 MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3140 TARGET is a suggestion for where to store the result (an rtx).
3141
3142 We check specially for a constant integer as OP1.
3143 If you want this check for OP0 as well, then before calling
3144 you should swap the two operands if OP0 would be constant. */
3145
3146 rtx
3147 expand_mult (enum machine_mode mode, rtx op0, rtx op1, rtx target,
3148 int unsignedp)
3149 {
3150 enum mult_variant variant;
3151 struct algorithm algorithm;
3152 rtx scalar_op1;
3153 int max_cost;
3154 bool speed = optimize_insn_for_speed_p ();
3155 bool do_trapv = flag_trapv && SCALAR_INT_MODE_P (mode) && !unsignedp;
3156
3157 if (CONSTANT_P (op0))
3158 {
3159 rtx temp = op0;
3160 op0 = op1;
3161 op1 = temp;
3162 }
3163
3164 /* For vectors, there are several simplifications that can be made if
3165 all elements of the vector constant are identical. */
3166 scalar_op1 = op1;
3167 if (GET_CODE (op1) == CONST_VECTOR)
3168 {
3169 int i, n = CONST_VECTOR_NUNITS (op1);
3170 scalar_op1 = CONST_VECTOR_ELT (op1, 0);
3171 for (i = 1; i < n; ++i)
3172 if (!rtx_equal_p (scalar_op1, CONST_VECTOR_ELT (op1, i)))
3173 goto skip_scalar;
3174 }
3175
3176 if (INTEGRAL_MODE_P (mode))
3177 {
3178 rtx fake_reg;
3179 HOST_WIDE_INT coeff = 0;
3180 bool is_neg = false;
3181 int mode_bitsize;
3182
3183 if (op1 == CONST0_RTX (mode))
3184 return op1;
3185 if (op1 == CONST1_RTX (mode))
3186 return op0;
3187 if (op1 == CONSTM1_RTX (mode))
3188 return expand_unop (mode, do_trapv ? negv_optab : neg_optab,
3189 op0, target, 0);
3190
3191 if (do_trapv)
3192 goto skip_synth;
3193
3194 /* These are the operations that are potentially turned into
3195 a sequence of shifts and additions. */
3196 mode_bitsize = GET_MODE_UNIT_BITSIZE (mode);
3197
3198 /* synth_mult does an `unsigned int' multiply. As long as the mode is
3199 less than or equal in size to `unsigned int' this doesn't matter.
3200 If the mode is larger than `unsigned int', then synth_mult works
3201 only if the constant value exactly fits in an `unsigned int' without
3202 any truncation. This means that multiplying by negative values does
3203 not work; results are off by 2^32 on a 32 bit machine. */
3204
3205 if (CONST_INT_P (scalar_op1))
3206 {
3207 coeff = INTVAL (scalar_op1);
3208 is_neg = coeff < 0;
3209 }
3210 else if (CONST_DOUBLE_P (scalar_op1))
3211 {
3212 /* If we are multiplying in DImode, it may still be a win
3213 to try to work with shifts and adds. */
3214 if (CONST_DOUBLE_HIGH (scalar_op1) == 0
3215 && CONST_DOUBLE_LOW (scalar_op1) > 0)
3216 {
3217 coeff = CONST_DOUBLE_LOW (scalar_op1);
3218 is_neg = false;
3219 }
3220 else if (CONST_DOUBLE_LOW (scalar_op1) == 0)
3221 {
3222 coeff = CONST_DOUBLE_HIGH (scalar_op1);
3223 if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3224 {
3225 int shift = floor_log2 (coeff) + HOST_BITS_PER_WIDE_INT;
3226 if (shift < HOST_BITS_PER_DOUBLE_INT - 1
3227 || mode_bitsize <= HOST_BITS_PER_DOUBLE_INT)
3228 return expand_shift (LSHIFT_EXPR, mode, op0,
3229 shift, target, unsignedp);
3230 }
3231 goto skip_synth;
3232 }
3233 }
3234 else
3235 goto skip_synth;
3236
3237 /* We used to test optimize here, on the grounds that it's better to
3238 produce a smaller program when -O is not used. But this causes
3239 such a terrible slowdown sometimes that it seems better to always
3240 use synth_mult. */
3241
3242 /* Special case powers of two. */
3243 if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3244 return expand_shift (LSHIFT_EXPR, mode, op0,
3245 floor_log2 (coeff), target, unsignedp);
3246
3247 fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3248
3249 /* Attempt to handle multiplication of DImode values by negative
3250 coefficients, by performing the multiplication by a positive
3251 multiplier and then inverting the result. */
3252 /* ??? How is this not slightly redundant with the neg variant? */
3253 if (is_neg && mode_bitsize > HOST_BITS_PER_WIDE_INT)
3254 {
3255 /* Its safe to use -coeff even for INT_MIN, as the
3256 result is interpreted as an unsigned coefficient.
3257 Exclude cost of op0 from max_cost to match the cost
3258 calculation of the synth_mult. */
3259 max_cost = (set_src_cost (gen_rtx_MULT (mode, fake_reg, op1), speed)
3260 - neg_cost(speed, mode));
3261 if (max_cost > 0
3262 && choose_mult_variant (mode, -coeff, &algorithm,
3263 &variant, max_cost))
3264 {
3265 rtx temp = expand_mult_const (mode, op0, -coeff, NULL_RTX,
3266 &algorithm, variant);
3267 return expand_unop (mode, neg_optab, temp, target, 0);
3268 }
3269 }
3270
3271 /* Exclude cost of op0 from max_cost to match the cost
3272 calculation of the synth_mult. */
3273 max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, op1), speed);
3274 if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost))
3275 return expand_mult_const (mode, op0, coeff, target,
3276 &algorithm, variant);
3277 }
3278 skip_synth:
3279
3280 /* Expand x*2.0 as x+x. */
3281 if (GET_CODE (scalar_op1) == CONST_DOUBLE && FLOAT_MODE_P (mode))
3282 {
3283 REAL_VALUE_TYPE d;
3284 REAL_VALUE_FROM_CONST_DOUBLE (d, scalar_op1);
3285
3286 if (REAL_VALUES_EQUAL (d, dconst2))
3287 {
3288 op0 = force_reg (GET_MODE (op0), op0);
3289 return expand_binop (mode, add_optab, op0, op0,
3290 target, unsignedp, OPTAB_LIB_WIDEN);
3291 }
3292 }
3293 skip_scalar:
3294
3295 /* This used to use umul_optab if unsigned, but for non-widening multiply
3296 there is no difference between signed and unsigned. */
3297 op0 = expand_binop (mode, do_trapv ? smulv_optab : smul_optab,
3298 op0, op1, target, unsignedp, OPTAB_LIB_WIDEN);
3299 gcc_assert (op0);
3300 return op0;
3301 }
3302
3303 /* Return a cost estimate for multiplying a register by the given
3304 COEFFicient in the given MODE and SPEED. */
3305
3306 int
3307 mult_by_coeff_cost (HOST_WIDE_INT coeff, enum machine_mode mode, bool speed)
3308 {
3309 int max_cost;
3310 struct algorithm algorithm;
3311 enum mult_variant variant;
3312
3313 rtx fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3314 max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, fake_reg), speed);
3315 if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost))
3316 return algorithm.cost.cost;
3317 else
3318 return max_cost;
3319 }
3320
3321 /* Perform a widening multiplication and return an rtx for the result.
3322 MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3323 TARGET is a suggestion for where to store the result (an rtx).
3324 THIS_OPTAB is the optab we should use, it must be either umul_widen_optab
3325 or smul_widen_optab.
3326
3327 We check specially for a constant integer as OP1, comparing the
3328 cost of a widening multiply against the cost of a sequence of shifts
3329 and adds. */
3330
3331 rtx
3332 expand_widening_mult (enum machine_mode mode, rtx op0, rtx op1, rtx target,
3333 int unsignedp, optab this_optab)
3334 {
3335 bool speed = optimize_insn_for_speed_p ();
3336 rtx cop1;
3337
3338 if (CONST_INT_P (op1)
3339 && GET_MODE (op0) != VOIDmode
3340 && (cop1 = convert_modes (mode, GET_MODE (op0), op1,
3341 this_optab == umul_widen_optab))
3342 && CONST_INT_P (cop1)
3343 && (INTVAL (cop1) >= 0
3344 || HWI_COMPUTABLE_MODE_P (mode)))
3345 {
3346 HOST_WIDE_INT coeff = INTVAL (cop1);
3347 int max_cost;
3348 enum mult_variant variant;
3349 struct algorithm algorithm;
3350
3351 /* Special case powers of two. */
3352 if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3353 {
3354 op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3355 return expand_shift (LSHIFT_EXPR, mode, op0,
3356 floor_log2 (coeff), target, unsignedp);
3357 }
3358
3359 /* Exclude cost of op0 from max_cost to match the cost
3360 calculation of the synth_mult. */
3361 max_cost = mul_widen_cost (speed, mode);
3362 if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3363 max_cost))
3364 {
3365 op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3366 return expand_mult_const (mode, op0, coeff, target,
3367 &algorithm, variant);
3368 }
3369 }
3370 return expand_binop (mode, this_optab, op0, op1, target,
3371 unsignedp, OPTAB_LIB_WIDEN);
3372 }
3373 \f
3374 /* Choose a minimal N + 1 bit approximation to 1/D that can be used to
3375 replace division by D, and put the least significant N bits of the result
3376 in *MULTIPLIER_PTR and return the most significant bit.
3377
3378 The width of operations is N (should be <= HOST_BITS_PER_WIDE_INT), the
3379 needed precision is in PRECISION (should be <= N).
3380
3381 PRECISION should be as small as possible so this function can choose
3382 multiplier more freely.
3383
3384 The rounded-up logarithm of D is placed in *lgup_ptr. A shift count that
3385 is to be used for a final right shift is placed in *POST_SHIFT_PTR.
3386
3387 Using this function, x/D will be equal to (x * m) >> (*POST_SHIFT_PTR),
3388 where m is the full HOST_BITS_PER_WIDE_INT + 1 bit multiplier. */
3389
3390 unsigned HOST_WIDE_INT
3391 choose_multiplier (unsigned HOST_WIDE_INT d, int n, int precision,
3392 unsigned HOST_WIDE_INT *multiplier_ptr,
3393 int *post_shift_ptr, int *lgup_ptr)
3394 {
3395 HOST_WIDE_INT mhigh_hi, mlow_hi;
3396 unsigned HOST_WIDE_INT mhigh_lo, mlow_lo;
3397 int lgup, post_shift;
3398 int pow, pow2;
3399 unsigned HOST_WIDE_INT nl, dummy1;
3400 HOST_WIDE_INT nh, dummy2;
3401
3402 /* lgup = ceil(log2(divisor)); */
3403 lgup = ceil_log2 (d);
3404
3405 gcc_assert (lgup <= n);
3406
3407 pow = n + lgup;
3408 pow2 = n + lgup - precision;
3409
3410 /* We could handle this with some effort, but this case is much
3411 better handled directly with a scc insn, so rely on caller using
3412 that. */
3413 gcc_assert (pow != HOST_BITS_PER_DOUBLE_INT);
3414
3415 /* mlow = 2^(N + lgup)/d */
3416 if (pow >= HOST_BITS_PER_WIDE_INT)
3417 {
3418 nh = (HOST_WIDE_INT) 1 << (pow - HOST_BITS_PER_WIDE_INT);
3419 nl = 0;
3420 }
3421 else
3422 {
3423 nh = 0;
3424 nl = (unsigned HOST_WIDE_INT) 1 << pow;
3425 }
3426 div_and_round_double (TRUNC_DIV_EXPR, 1, nl, nh, d, (HOST_WIDE_INT) 0,
3427 &mlow_lo, &mlow_hi, &dummy1, &dummy2);
3428
3429 /* mhigh = (2^(N + lgup) + 2^N + lgup - precision)/d */
3430 if (pow2 >= HOST_BITS_PER_WIDE_INT)
3431 nh |= (HOST_WIDE_INT) 1 << (pow2 - HOST_BITS_PER_WIDE_INT);
3432 else
3433 nl |= (unsigned HOST_WIDE_INT) 1 << pow2;
3434 div_and_round_double (TRUNC_DIV_EXPR, 1, nl, nh, d, (HOST_WIDE_INT) 0,
3435 &mhigh_lo, &mhigh_hi, &dummy1, &dummy2);
3436
3437 gcc_assert (!mhigh_hi || nh - d < d);
3438 gcc_assert (mhigh_hi <= 1 && mlow_hi <= 1);
3439 /* Assert that mlow < mhigh. */
3440 gcc_assert (mlow_hi < mhigh_hi
3441 || (mlow_hi == mhigh_hi && mlow_lo < mhigh_lo));
3442
3443 /* If precision == N, then mlow, mhigh exceed 2^N
3444 (but they do not exceed 2^(N+1)). */
3445
3446 /* Reduce to lowest terms. */
3447 for (post_shift = lgup; post_shift > 0; post_shift--)
3448 {
3449 unsigned HOST_WIDE_INT ml_lo = (mlow_hi << (HOST_BITS_PER_WIDE_INT - 1)) | (mlow_lo >> 1);
3450 unsigned HOST_WIDE_INT mh_lo = (mhigh_hi << (HOST_BITS_PER_WIDE_INT - 1)) | (mhigh_lo >> 1);
3451 if (ml_lo >= mh_lo)
3452 break;
3453
3454 mlow_hi = 0;
3455 mlow_lo = ml_lo;
3456 mhigh_hi = 0;
3457 mhigh_lo = mh_lo;
3458 }
3459
3460 *post_shift_ptr = post_shift;
3461 *lgup_ptr = lgup;
3462 if (n < HOST_BITS_PER_WIDE_INT)
3463 {
3464 unsigned HOST_WIDE_INT mask = ((unsigned HOST_WIDE_INT) 1 << n) - 1;
3465 *multiplier_ptr = mhigh_lo & mask;
3466 return mhigh_lo >= mask;
3467 }
3468 else
3469 {
3470 *multiplier_ptr = mhigh_lo;
3471 return mhigh_hi;
3472 }
3473 }
3474
3475 /* Compute the inverse of X mod 2**n, i.e., find Y such that X * Y is
3476 congruent to 1 (mod 2**N). */
3477
3478 static unsigned HOST_WIDE_INT
3479 invert_mod2n (unsigned HOST_WIDE_INT x, int n)
3480 {
3481 /* Solve x*y == 1 (mod 2^n), where x is odd. Return y. */
3482
3483 /* The algorithm notes that the choice y = x satisfies
3484 x*y == 1 mod 2^3, since x is assumed odd.
3485 Each iteration doubles the number of bits of significance in y. */
3486
3487 unsigned HOST_WIDE_INT mask;
3488 unsigned HOST_WIDE_INT y = x;
3489 int nbit = 3;
3490
3491 mask = (n == HOST_BITS_PER_WIDE_INT
3492 ? ~(unsigned HOST_WIDE_INT) 0
3493 : ((unsigned HOST_WIDE_INT) 1 << n) - 1);
3494
3495 while (nbit < n)
3496 {
3497 y = y * (2 - x*y) & mask; /* Modulo 2^N */
3498 nbit *= 2;
3499 }
3500 return y;
3501 }
3502
3503 /* Emit code to adjust ADJ_OPERAND after multiplication of wrong signedness
3504 flavor of OP0 and OP1. ADJ_OPERAND is already the high half of the
3505 product OP0 x OP1. If UNSIGNEDP is nonzero, adjust the signed product
3506 to become unsigned, if UNSIGNEDP is zero, adjust the unsigned product to
3507 become signed.
3508
3509 The result is put in TARGET if that is convenient.
3510
3511 MODE is the mode of operation. */
3512
3513 rtx
3514 expand_mult_highpart_adjust (enum machine_mode mode, rtx adj_operand, rtx op0,
3515 rtx op1, rtx target, int unsignedp)
3516 {
3517 rtx tem;
3518 enum rtx_code adj_code = unsignedp ? PLUS : MINUS;
3519
3520 tem = expand_shift (RSHIFT_EXPR, mode, op0,
3521 GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3522 tem = expand_and (mode, tem, op1, NULL_RTX);
3523 adj_operand
3524 = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3525 adj_operand);
3526
3527 tem = expand_shift (RSHIFT_EXPR, mode, op1,
3528 GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3529 tem = expand_and (mode, tem, op0, NULL_RTX);
3530 target = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3531 target);
3532
3533 return target;
3534 }
3535
3536 /* Subroutine of expmed_mult_highpart. Return the MODE high part of OP. */
3537
3538 static rtx
3539 extract_high_half (enum machine_mode mode, rtx op)
3540 {
3541 enum machine_mode wider_mode;
3542
3543 if (mode == word_mode)
3544 return gen_highpart (mode, op);
3545
3546 gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3547
3548 wider_mode = GET_MODE_WIDER_MODE (mode);
3549 op = expand_shift (RSHIFT_EXPR, wider_mode, op,
3550 GET_MODE_BITSIZE (mode), 0, 1);
3551 return convert_modes (mode, wider_mode, op, 0);
3552 }
3553
3554 /* Like expmed_mult_highpart, but only consider using a multiplication
3555 optab. OP1 is an rtx for the constant operand. */
3556
3557 static rtx
3558 expmed_mult_highpart_optab (enum machine_mode mode, rtx op0, rtx op1,
3559 rtx target, int unsignedp, int max_cost)
3560 {
3561 rtx narrow_op1 = gen_int_mode (INTVAL (op1), mode);
3562 enum machine_mode wider_mode;
3563 optab moptab;
3564 rtx tem;
3565 int size;
3566 bool speed = optimize_insn_for_speed_p ();
3567
3568 gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3569
3570 wider_mode = GET_MODE_WIDER_MODE (mode);
3571 size = GET_MODE_BITSIZE (mode);
3572
3573 /* Firstly, try using a multiplication insn that only generates the needed
3574 high part of the product, and in the sign flavor of unsignedp. */
3575 if (mul_highpart_cost (speed, mode) < max_cost)
3576 {
3577 moptab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
3578 tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3579 unsignedp, OPTAB_DIRECT);
3580 if (tem)
3581 return tem;
3582 }
3583
3584 /* Secondly, same as above, but use sign flavor opposite of unsignedp.
3585 Need to adjust the result after the multiplication. */
3586 if (size - 1 < BITS_PER_WORD
3587 && (mul_highpart_cost (speed, mode)
3588 + 2 * shift_cost (speed, mode, size-1)
3589 + 4 * add_cost (speed, mode) < max_cost))
3590 {
3591 moptab = unsignedp ? smul_highpart_optab : umul_highpart_optab;
3592 tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3593 unsignedp, OPTAB_DIRECT);
3594 if (tem)
3595 /* We used the wrong signedness. Adjust the result. */
3596 return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3597 tem, unsignedp);
3598 }
3599
3600 /* Try widening multiplication. */
3601 moptab = unsignedp ? umul_widen_optab : smul_widen_optab;
3602 if (widening_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3603 && mul_widen_cost (speed, wider_mode) < max_cost)
3604 {
3605 tem = expand_binop (wider_mode, moptab, op0, narrow_op1, 0,
3606 unsignedp, OPTAB_WIDEN);
3607 if (tem)
3608 return extract_high_half (mode, tem);
3609 }
3610
3611 /* Try widening the mode and perform a non-widening multiplication. */
3612 if (optab_handler (smul_optab, wider_mode) != CODE_FOR_nothing
3613 && size - 1 < BITS_PER_WORD
3614 && (mul_cost (speed, wider_mode) + shift_cost (speed, mode, size-1)
3615 < max_cost))
3616 {
3617 rtx insns, wop0, wop1;
3618
3619 /* We need to widen the operands, for example to ensure the
3620 constant multiplier is correctly sign or zero extended.
3621 Use a sequence to clean-up any instructions emitted by
3622 the conversions if things don't work out. */
3623 start_sequence ();
3624 wop0 = convert_modes (wider_mode, mode, op0, unsignedp);
3625 wop1 = convert_modes (wider_mode, mode, op1, unsignedp);
3626 tem = expand_binop (wider_mode, smul_optab, wop0, wop1, 0,
3627 unsignedp, OPTAB_WIDEN);
3628 insns = get_insns ();
3629 end_sequence ();
3630
3631 if (tem)
3632 {
3633 emit_insn (insns);
3634 return extract_high_half (mode, tem);
3635 }
3636 }
3637
3638 /* Try widening multiplication of opposite signedness, and adjust. */
3639 moptab = unsignedp ? smul_widen_optab : umul_widen_optab;
3640 if (widening_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3641 && size - 1 < BITS_PER_WORD
3642 && (mul_widen_cost (speed, wider_mode)
3643 + 2 * shift_cost (speed, mode, size-1)
3644 + 4 * add_cost (speed, mode) < max_cost))
3645 {
3646 tem = expand_binop (wider_mode, moptab, op0, narrow_op1,
3647 NULL_RTX, ! unsignedp, OPTAB_WIDEN);
3648 if (tem != 0)
3649 {
3650 tem = extract_high_half (mode, tem);
3651 /* We used the wrong signedness. Adjust the result. */
3652 return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3653 target, unsignedp);
3654 }
3655 }
3656
3657 return 0;
3658 }
3659
3660 /* Emit code to multiply OP0 and OP1 (where OP1 is an integer constant),
3661 putting the high half of the result in TARGET if that is convenient,
3662 and return where the result is. If the operation can not be performed,
3663 0 is returned.
3664
3665 MODE is the mode of operation and result.
3666
3667 UNSIGNEDP nonzero means unsigned multiply.
3668
3669 MAX_COST is the total allowed cost for the expanded RTL. */
3670
3671 static rtx
3672 expmed_mult_highpart (enum machine_mode mode, rtx op0, rtx op1,
3673 rtx target, int unsignedp, int max_cost)
3674 {
3675 enum machine_mode wider_mode = GET_MODE_WIDER_MODE (mode);
3676 unsigned HOST_WIDE_INT cnst1;
3677 int extra_cost;
3678 bool sign_adjust = false;
3679 enum mult_variant variant;
3680 struct algorithm alg;
3681 rtx tem;
3682 bool speed = optimize_insn_for_speed_p ();
3683
3684 gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3685 /* We can't support modes wider than HOST_BITS_PER_INT. */
3686 gcc_assert (HWI_COMPUTABLE_MODE_P (mode));
3687
3688 cnst1 = INTVAL (op1) & GET_MODE_MASK (mode);
3689
3690 /* We can't optimize modes wider than BITS_PER_WORD.
3691 ??? We might be able to perform double-word arithmetic if
3692 mode == word_mode, however all the cost calculations in
3693 synth_mult etc. assume single-word operations. */
3694 if (GET_MODE_BITSIZE (wider_mode) > BITS_PER_WORD)
3695 return expmed_mult_highpart_optab (mode, op0, op1, target,
3696 unsignedp, max_cost);
3697
3698 extra_cost = shift_cost (speed, mode, GET_MODE_BITSIZE (mode) - 1);
3699
3700 /* Check whether we try to multiply by a negative constant. */
3701 if (!unsignedp && ((cnst1 >> (GET_MODE_BITSIZE (mode) - 1)) & 1))
3702 {
3703 sign_adjust = true;
3704 extra_cost += add_cost (speed, mode);
3705 }
3706
3707 /* See whether shift/add multiplication is cheap enough. */
3708 if (choose_mult_variant (wider_mode, cnst1, &alg, &variant,
3709 max_cost - extra_cost))
3710 {
3711 /* See whether the specialized multiplication optabs are
3712 cheaper than the shift/add version. */
3713 tem = expmed_mult_highpart_optab (mode, op0, op1, target, unsignedp,
3714 alg.cost.cost + extra_cost);
3715 if (tem)
3716 return tem;
3717
3718 tem = convert_to_mode (wider_mode, op0, unsignedp);
3719 tem = expand_mult_const (wider_mode, tem, cnst1, 0, &alg, variant);
3720 tem = extract_high_half (mode, tem);
3721
3722 /* Adjust result for signedness. */
3723 if (sign_adjust)
3724 tem = force_operand (gen_rtx_MINUS (mode, tem, op0), tem);
3725
3726 return tem;
3727 }
3728 return expmed_mult_highpart_optab (mode, op0, op1, target,
3729 unsignedp, max_cost);
3730 }
3731
3732
3733 /* Expand signed modulus of OP0 by a power of two D in mode MODE. */
3734
3735 static rtx
3736 expand_smod_pow2 (enum machine_mode mode, rtx op0, HOST_WIDE_INT d)
3737 {
3738 unsigned HOST_WIDE_INT masklow, maskhigh;
3739 rtx result, temp, shift, label;
3740 int logd;
3741
3742 logd = floor_log2 (d);
3743 result = gen_reg_rtx (mode);
3744
3745 /* Avoid conditional branches when they're expensive. */
3746 if (BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2
3747 && optimize_insn_for_speed_p ())
3748 {
3749 rtx signmask = emit_store_flag (result, LT, op0, const0_rtx,
3750 mode, 0, -1);
3751 if (signmask)
3752 {
3753 signmask = force_reg (mode, signmask);
3754 masklow = ((HOST_WIDE_INT) 1 << logd) - 1;
3755 shift = GEN_INT (GET_MODE_BITSIZE (mode) - logd);
3756
3757 /* Use the rtx_cost of a LSHIFTRT instruction to determine
3758 which instruction sequence to use. If logical right shifts
3759 are expensive the use 2 XORs, 2 SUBs and an AND, otherwise
3760 use a LSHIFTRT, 1 ADD, 1 SUB and an AND. */
3761
3762 temp = gen_rtx_LSHIFTRT (mode, result, shift);
3763 if (optab_handler (lshr_optab, mode) == CODE_FOR_nothing
3764 || (set_src_cost (temp, optimize_insn_for_speed_p ())
3765 > COSTS_N_INSNS (2)))
3766 {
3767 temp = expand_binop (mode, xor_optab, op0, signmask,
3768 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3769 temp = expand_binop (mode, sub_optab, temp, signmask,
3770 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3771 temp = expand_binop (mode, and_optab, temp, GEN_INT (masklow),
3772 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3773 temp = expand_binop (mode, xor_optab, temp, signmask,
3774 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3775 temp = expand_binop (mode, sub_optab, temp, signmask,
3776 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3777 }
3778 else
3779 {
3780 signmask = expand_binop (mode, lshr_optab, signmask, shift,
3781 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3782 signmask = force_reg (mode, signmask);
3783
3784 temp = expand_binop (mode, add_optab, op0, signmask,
3785 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3786 temp = expand_binop (mode, and_optab, temp, GEN_INT (masklow),
3787 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3788 temp = expand_binop (mode, sub_optab, temp, signmask,
3789 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3790 }
3791 return temp;
3792 }
3793 }
3794
3795 /* Mask contains the mode's signbit and the significant bits of the
3796 modulus. By including the signbit in the operation, many targets
3797 can avoid an explicit compare operation in the following comparison
3798 against zero. */
3799
3800 masklow = ((HOST_WIDE_INT) 1 << logd) - 1;
3801 if (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
3802 {
3803 masklow |= (HOST_WIDE_INT) -1 << (GET_MODE_BITSIZE (mode) - 1);
3804 maskhigh = -1;
3805 }
3806 else
3807 maskhigh = (HOST_WIDE_INT) -1
3808 << (GET_MODE_BITSIZE (mode) - HOST_BITS_PER_WIDE_INT - 1);
3809
3810 temp = expand_binop (mode, and_optab, op0,
3811 immed_double_const (masklow, maskhigh, mode),
3812 result, 1, OPTAB_LIB_WIDEN);
3813 if (temp != result)
3814 emit_move_insn (result, temp);
3815
3816 label = gen_label_rtx ();
3817 do_cmp_and_jump (result, const0_rtx, GE, mode, label);
3818
3819 temp = expand_binop (mode, sub_optab, result, const1_rtx, result,
3820 0, OPTAB_LIB_WIDEN);
3821 masklow = (HOST_WIDE_INT) -1 << logd;
3822 maskhigh = -1;
3823 temp = expand_binop (mode, ior_optab, temp,
3824 immed_double_const (masklow, maskhigh, mode),
3825 result, 1, OPTAB_LIB_WIDEN);
3826 temp = expand_binop (mode, add_optab, temp, const1_rtx, result,
3827 0, OPTAB_LIB_WIDEN);
3828 if (temp != result)
3829 emit_move_insn (result, temp);
3830 emit_label (label);
3831 return result;
3832 }
3833
3834 /* Expand signed division of OP0 by a power of two D in mode MODE.
3835 This routine is only called for positive values of D. */
3836
3837 static rtx
3838 expand_sdiv_pow2 (enum machine_mode mode, rtx op0, HOST_WIDE_INT d)
3839 {
3840 rtx temp, label;
3841 int logd;
3842
3843 logd = floor_log2 (d);
3844
3845 if (d == 2
3846 && BRANCH_COST (optimize_insn_for_speed_p (),
3847 false) >= 1)
3848 {
3849 temp = gen_reg_rtx (mode);
3850 temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, 1);
3851 temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3852 0, OPTAB_LIB_WIDEN);
3853 return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3854 }
3855
3856 #ifdef HAVE_conditional_move
3857 if (BRANCH_COST (optimize_insn_for_speed_p (), false)
3858 >= 2)
3859 {
3860 rtx temp2;
3861
3862 /* ??? emit_conditional_move forces a stack adjustment via
3863 compare_from_rtx so, if the sequence is discarded, it will
3864 be lost. Do it now instead. */
3865 do_pending_stack_adjust ();
3866
3867 start_sequence ();
3868 temp2 = copy_to_mode_reg (mode, op0);
3869 temp = expand_binop (mode, add_optab, temp2, GEN_INT (d-1),
3870 NULL_RTX, 0, OPTAB_LIB_WIDEN);
3871 temp = force_reg (mode, temp);
3872
3873 /* Construct "temp2 = (temp2 < 0) ? temp : temp2". */
3874 temp2 = emit_conditional_move (temp2, LT, temp2, const0_rtx,
3875 mode, temp, temp2, mode, 0);
3876 if (temp2)
3877 {
3878 rtx seq = get_insns ();
3879 end_sequence ();
3880 emit_insn (seq);
3881 return expand_shift (RSHIFT_EXPR, mode, temp2, logd, NULL_RTX, 0);
3882 }
3883 end_sequence ();
3884 }
3885 #endif
3886
3887 if (BRANCH_COST (optimize_insn_for_speed_p (),
3888 false) >= 2)
3889 {
3890 int ushift = GET_MODE_BITSIZE (mode) - logd;
3891
3892 temp = gen_reg_rtx (mode);
3893 temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, -1);
3894 if (shift_cost (optimize_insn_for_speed_p (), mode, ushift)
3895 > COSTS_N_INSNS (1))
3896 temp = expand_binop (mode, and_optab, temp, GEN_INT (d - 1),
3897 NULL_RTX, 0, OPTAB_LIB_WIDEN);
3898 else
3899 temp = expand_shift (RSHIFT_EXPR, mode, temp,
3900 ushift, NULL_RTX, 1);
3901 temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3902 0, OPTAB_LIB_WIDEN);
3903 return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3904 }
3905
3906 label = gen_label_rtx ();
3907 temp = copy_to_mode_reg (mode, op0);
3908 do_cmp_and_jump (temp, const0_rtx, GE, mode, label);
3909 expand_inc (temp, GEN_INT (d - 1));
3910 emit_label (label);
3911 return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3912 }
3913 \f
3914 /* Emit the code to divide OP0 by OP1, putting the result in TARGET
3915 if that is convenient, and returning where the result is.
3916 You may request either the quotient or the remainder as the result;
3917 specify REM_FLAG nonzero to get the remainder.
3918
3919 CODE is the expression code for which kind of division this is;
3920 it controls how rounding is done. MODE is the machine mode to use.
3921 UNSIGNEDP nonzero means do unsigned division. */
3922
3923 /* ??? For CEIL_MOD_EXPR, can compute incorrect remainder with ANDI
3924 and then correct it by or'ing in missing high bits
3925 if result of ANDI is nonzero.
3926 For ROUND_MOD_EXPR, can use ANDI and then sign-extend the result.
3927 This could optimize to a bfexts instruction.
3928 But C doesn't use these operations, so their optimizations are
3929 left for later. */
3930 /* ??? For modulo, we don't actually need the highpart of the first product,
3931 the low part will do nicely. And for small divisors, the second multiply
3932 can also be a low-part only multiply or even be completely left out.
3933 E.g. to calculate the remainder of a division by 3 with a 32 bit
3934 multiply, multiply with 0x55555556 and extract the upper two bits;
3935 the result is exact for inputs up to 0x1fffffff.
3936 The input range can be reduced by using cross-sum rules.
3937 For odd divisors >= 3, the following table gives right shift counts
3938 so that if a number is shifted by an integer multiple of the given
3939 amount, the remainder stays the same:
3940 2, 4, 3, 6, 10, 12, 4, 8, 18, 6, 11, 20, 18, 0, 5, 10, 12, 0, 12, 20,
3941 14, 12, 23, 21, 8, 0, 20, 18, 0, 0, 6, 12, 0, 22, 0, 18, 20, 30, 0, 0,
3942 0, 8, 0, 11, 12, 10, 36, 0, 30, 0, 0, 12, 0, 0, 0, 0, 44, 12, 24, 0,
3943 20, 0, 7, 14, 0, 18, 36, 0, 0, 46, 60, 0, 42, 0, 15, 24, 20, 0, 0, 33,
3944 0, 20, 0, 0, 18, 0, 60, 0, 0, 0, 0, 0, 40, 18, 0, 0, 12
3945
3946 Cross-sum rules for even numbers can be derived by leaving as many bits
3947 to the right alone as the divisor has zeros to the right.
3948 E.g. if x is an unsigned 32 bit number:
3949 (x mod 12) == (((x & 1023) + ((x >> 8) & ~3)) * 0x15555558 >> 2 * 3) >> 28
3950 */
3951
3952 rtx
3953 expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode,
3954 rtx op0, rtx op1, rtx target, int unsignedp)
3955 {
3956 enum machine_mode compute_mode;
3957 rtx tquotient;
3958 rtx quotient = 0, remainder = 0;
3959 rtx last;
3960 int size;
3961 rtx insn;
3962 optab optab1, optab2;
3963 int op1_is_constant, op1_is_pow2 = 0;
3964 int max_cost, extra_cost;
3965 static HOST_WIDE_INT last_div_const = 0;
3966 static HOST_WIDE_INT ext_op1;
3967 bool speed = optimize_insn_for_speed_p ();
3968
3969 op1_is_constant = CONST_INT_P (op1);
3970 if (op1_is_constant)
3971 {
3972 ext_op1 = INTVAL (op1);
3973 if (unsignedp)
3974 ext_op1 &= GET_MODE_MASK (mode);
3975 op1_is_pow2 = ((EXACT_POWER_OF_2_OR_ZERO_P (ext_op1)
3976 || (! unsignedp && EXACT_POWER_OF_2_OR_ZERO_P (-ext_op1))));
3977 }
3978
3979 /*
3980 This is the structure of expand_divmod:
3981
3982 First comes code to fix up the operands so we can perform the operations
3983 correctly and efficiently.
3984
3985 Second comes a switch statement with code specific for each rounding mode.
3986 For some special operands this code emits all RTL for the desired
3987 operation, for other cases, it generates only a quotient and stores it in
3988 QUOTIENT. The case for trunc division/remainder might leave quotient = 0,
3989 to indicate that it has not done anything.
3990
3991 Last comes code that finishes the operation. If QUOTIENT is set and
3992 REM_FLAG is set, the remainder is computed as OP0 - QUOTIENT * OP1. If
3993 QUOTIENT is not set, it is computed using trunc rounding.
3994
3995 We try to generate special code for division and remainder when OP1 is a
3996 constant. If |OP1| = 2**n we can use shifts and some other fast
3997 operations. For other values of OP1, we compute a carefully selected
3998 fixed-point approximation m = 1/OP1, and generate code that multiplies OP0
3999 by m.
4000
4001 In all cases but EXACT_DIV_EXPR, this multiplication requires the upper
4002 half of the product. Different strategies for generating the product are
4003 implemented in expmed_mult_highpart.
4004
4005 If what we actually want is the remainder, we generate that by another
4006 by-constant multiplication and a subtraction. */
4007
4008 /* We shouldn't be called with OP1 == const1_rtx, but some of the
4009 code below will malfunction if we are, so check here and handle
4010 the special case if so. */
4011 if (op1 == const1_rtx)
4012 return rem_flag ? const0_rtx : op0;
4013
4014 /* When dividing by -1, we could get an overflow.
4015 negv_optab can handle overflows. */
4016 if (! unsignedp && op1 == constm1_rtx)
4017 {
4018 if (rem_flag)
4019 return const0_rtx;
4020 return expand_unop (mode, flag_trapv && GET_MODE_CLASS(mode) == MODE_INT
4021 ? negv_optab : neg_optab, op0, target, 0);
4022 }
4023
4024 if (target
4025 /* Don't use the function value register as a target
4026 since we have to read it as well as write it,
4027 and function-inlining gets confused by this. */
4028 && ((REG_P (target) && REG_FUNCTION_VALUE_P (target))
4029 /* Don't clobber an operand while doing a multi-step calculation. */
4030 || ((rem_flag || op1_is_constant)
4031 && (reg_mentioned_p (target, op0)
4032 || (MEM_P (op0) && MEM_P (target))))
4033 || reg_mentioned_p (target, op1)
4034 || (MEM_P (op1) && MEM_P (target))))
4035 target = 0;
4036
4037 /* Get the mode in which to perform this computation. Normally it will
4038 be MODE, but sometimes we can't do the desired operation in MODE.
4039 If so, pick a wider mode in which we can do the operation. Convert
4040 to that mode at the start to avoid repeated conversions.
4041
4042 First see what operations we need. These depend on the expression
4043 we are evaluating. (We assume that divxx3 insns exist under the
4044 same conditions that modxx3 insns and that these insns don't normally
4045 fail. If these assumptions are not correct, we may generate less
4046 efficient code in some cases.)
4047
4048 Then see if we find a mode in which we can open-code that operation
4049 (either a division, modulus, or shift). Finally, check for the smallest
4050 mode for which we can do the operation with a library call. */
4051
4052 /* We might want to refine this now that we have division-by-constant
4053 optimization. Since expmed_mult_highpart tries so many variants, it is
4054 not straightforward to generalize this. Maybe we should make an array
4055 of possible modes in init_expmed? Save this for GCC 2.7. */
4056
4057 optab1 = ((op1_is_pow2 && op1 != const0_rtx)
4058 ? (unsignedp ? lshr_optab : ashr_optab)
4059 : (unsignedp ? udiv_optab : sdiv_optab));
4060 optab2 = ((op1_is_pow2 && op1 != const0_rtx)
4061 ? optab1
4062 : (unsignedp ? udivmod_optab : sdivmod_optab));
4063
4064 for (compute_mode = mode; compute_mode != VOIDmode;
4065 compute_mode = GET_MODE_WIDER_MODE (compute_mode))
4066 if (optab_handler (optab1, compute_mode) != CODE_FOR_nothing
4067 || optab_handler (optab2, compute_mode) != CODE_FOR_nothing)
4068 break;
4069
4070 if (compute_mode == VOIDmode)
4071 for (compute_mode = mode; compute_mode != VOIDmode;
4072 compute_mode = GET_MODE_WIDER_MODE (compute_mode))
4073 if (optab_libfunc (optab1, compute_mode)
4074 || optab_libfunc (optab2, compute_mode))
4075 break;
4076
4077 /* If we still couldn't find a mode, use MODE, but expand_binop will
4078 probably die. */
4079 if (compute_mode == VOIDmode)
4080 compute_mode = mode;
4081
4082 if (target && GET_MODE (target) == compute_mode)
4083 tquotient = target;
4084 else
4085 tquotient = gen_reg_rtx (compute_mode);
4086
4087 size = GET_MODE_BITSIZE (compute_mode);
4088 #if 0
4089 /* It should be possible to restrict the precision to GET_MODE_BITSIZE
4090 (mode), and thereby get better code when OP1 is a constant. Do that
4091 later. It will require going over all usages of SIZE below. */
4092 size = GET_MODE_BITSIZE (mode);
4093 #endif
4094
4095 /* Only deduct something for a REM if the last divide done was
4096 for a different constant. Then set the constant of the last
4097 divide. */
4098 max_cost = (unsignedp
4099 ? udiv_cost (speed, compute_mode)
4100 : sdiv_cost (speed, compute_mode));
4101 if (rem_flag && ! (last_div_const != 0 && op1_is_constant
4102 && INTVAL (op1) == last_div_const))
4103 max_cost -= (mul_cost (speed, compute_mode)
4104 + add_cost (speed, compute_mode));
4105
4106 last_div_const = ! rem_flag && op1_is_constant ? INTVAL (op1) : 0;
4107
4108 /* Now convert to the best mode to use. */
4109 if (compute_mode != mode)
4110 {
4111 op0 = convert_modes (compute_mode, mode, op0, unsignedp);
4112 op1 = convert_modes (compute_mode, mode, op1, unsignedp);
4113
4114 /* convert_modes may have placed op1 into a register, so we
4115 must recompute the following. */
4116 op1_is_constant = CONST_INT_P (op1);
4117 op1_is_pow2 = (op1_is_constant
4118 && ((EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4119 || (! unsignedp
4120 && EXACT_POWER_OF_2_OR_ZERO_P (-INTVAL (op1)))))) ;
4121 }
4122
4123 /* If one of the operands is a volatile MEM, copy it into a register. */
4124
4125 if (MEM_P (op0) && MEM_VOLATILE_P (op0))
4126 op0 = force_reg (compute_mode, op0);
4127 if (MEM_P (op1) && MEM_VOLATILE_P (op1))
4128 op1 = force_reg (compute_mode, op1);
4129
4130 /* If we need the remainder or if OP1 is constant, we need to
4131 put OP0 in a register in case it has any queued subexpressions. */
4132 if (rem_flag || op1_is_constant)
4133 op0 = force_reg (compute_mode, op0);
4134
4135 last = get_last_insn ();
4136
4137 /* Promote floor rounding to trunc rounding for unsigned operations. */
4138 if (unsignedp)
4139 {
4140 if (code == FLOOR_DIV_EXPR)
4141 code = TRUNC_DIV_EXPR;
4142 if (code == FLOOR_MOD_EXPR)
4143 code = TRUNC_MOD_EXPR;
4144 if (code == EXACT_DIV_EXPR && op1_is_pow2)
4145 code = TRUNC_DIV_EXPR;
4146 }
4147
4148 if (op1 != const0_rtx)
4149 switch (code)
4150 {
4151 case TRUNC_MOD_EXPR:
4152 case TRUNC_DIV_EXPR:
4153 if (op1_is_constant)
4154 {
4155 if (unsignedp)
4156 {
4157 unsigned HOST_WIDE_INT mh, ml;
4158 int pre_shift, post_shift;
4159 int dummy;
4160 unsigned HOST_WIDE_INT d = (INTVAL (op1)
4161 & GET_MODE_MASK (compute_mode));
4162
4163 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4164 {
4165 pre_shift = floor_log2 (d);
4166 if (rem_flag)
4167 {
4168 remainder
4169 = expand_binop (compute_mode, and_optab, op0,
4170 GEN_INT (((HOST_WIDE_INT) 1 << pre_shift) - 1),
4171 remainder, 1,
4172 OPTAB_LIB_WIDEN);
4173 if (remainder)
4174 return gen_lowpart (mode, remainder);
4175 }
4176 quotient = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4177 pre_shift, tquotient, 1);
4178 }
4179 else if (size <= HOST_BITS_PER_WIDE_INT)
4180 {
4181 if (d >= ((unsigned HOST_WIDE_INT) 1 << (size - 1)))
4182 {
4183 /* Most significant bit of divisor is set; emit an scc
4184 insn. */
4185 quotient = emit_store_flag_force (tquotient, GEU, op0, op1,
4186 compute_mode, 1, 1);
4187 }
4188 else
4189 {
4190 /* Find a suitable multiplier and right shift count
4191 instead of multiplying with D. */
4192
4193 mh = choose_multiplier (d, size, size,
4194 &ml, &post_shift, &dummy);
4195
4196 /* If the suggested multiplier is more than SIZE bits,
4197 we can do better for even divisors, using an
4198 initial right shift. */
4199 if (mh != 0 && (d & 1) == 0)
4200 {
4201 pre_shift = floor_log2 (d & -d);
4202 mh = choose_multiplier (d >> pre_shift, size,
4203 size - pre_shift,
4204 &ml, &post_shift, &dummy);
4205 gcc_assert (!mh);
4206 }
4207 else
4208 pre_shift = 0;
4209
4210 if (mh != 0)
4211 {
4212 rtx t1, t2, t3, t4;
4213
4214 if (post_shift - 1 >= BITS_PER_WORD)
4215 goto fail1;
4216
4217 extra_cost
4218 = (shift_cost (speed, compute_mode, post_shift - 1)
4219 + shift_cost (speed, compute_mode, 1)
4220 + 2 * add_cost (speed, compute_mode));
4221 t1 = expmed_mult_highpart (compute_mode, op0,
4222 GEN_INT (ml),
4223 NULL_RTX, 1,
4224 max_cost - extra_cost);
4225 if (t1 == 0)
4226 goto fail1;
4227 t2 = force_operand (gen_rtx_MINUS (compute_mode,
4228 op0, t1),
4229 NULL_RTX);
4230 t3 = expand_shift (RSHIFT_EXPR, compute_mode,
4231 t2, 1, NULL_RTX, 1);
4232 t4 = force_operand (gen_rtx_PLUS (compute_mode,
4233 t1, t3),
4234 NULL_RTX);
4235 quotient = expand_shift
4236 (RSHIFT_EXPR, compute_mode, t4,
4237 post_shift - 1, tquotient, 1);
4238 }
4239 else
4240 {
4241 rtx t1, t2;
4242
4243 if (pre_shift >= BITS_PER_WORD
4244 || post_shift >= BITS_PER_WORD)
4245 goto fail1;
4246
4247 t1 = expand_shift
4248 (RSHIFT_EXPR, compute_mode, op0,
4249 pre_shift, NULL_RTX, 1);
4250 extra_cost
4251 = (shift_cost (speed, compute_mode, pre_shift)
4252 + shift_cost (speed, compute_mode, post_shift));
4253 t2 = expmed_mult_highpart (compute_mode, t1,
4254 GEN_INT (ml),
4255 NULL_RTX, 1,
4256 max_cost - extra_cost);
4257 if (t2 == 0)
4258 goto fail1;
4259 quotient = expand_shift
4260 (RSHIFT_EXPR, compute_mode, t2,
4261 post_shift, tquotient, 1);
4262 }
4263 }
4264 }
4265 else /* Too wide mode to use tricky code */
4266 break;
4267
4268 insn = get_last_insn ();
4269 if (insn != last)
4270 set_dst_reg_note (insn, REG_EQUAL,
4271 gen_rtx_UDIV (compute_mode, op0, op1),
4272 quotient);
4273 }
4274 else /* TRUNC_DIV, signed */
4275 {
4276 unsigned HOST_WIDE_INT ml;
4277 int lgup, post_shift;
4278 rtx mlr;
4279 HOST_WIDE_INT d = INTVAL (op1);
4280 unsigned HOST_WIDE_INT abs_d;
4281
4282 /* Since d might be INT_MIN, we have to cast to
4283 unsigned HOST_WIDE_INT before negating to avoid
4284 undefined signed overflow. */
4285 abs_d = (d >= 0
4286 ? (unsigned HOST_WIDE_INT) d
4287 : - (unsigned HOST_WIDE_INT) d);
4288
4289 /* n rem d = n rem -d */
4290 if (rem_flag && d < 0)
4291 {
4292 d = abs_d;
4293 op1 = gen_int_mode (abs_d, compute_mode);
4294 }
4295
4296 if (d == 1)
4297 quotient = op0;
4298 else if (d == -1)
4299 quotient = expand_unop (compute_mode, neg_optab, op0,
4300 tquotient, 0);
4301 else if (HOST_BITS_PER_WIDE_INT >= size
4302 && abs_d == (unsigned HOST_WIDE_INT) 1 << (size - 1))
4303 {
4304 /* This case is not handled correctly below. */
4305 quotient = emit_store_flag (tquotient, EQ, op0, op1,
4306 compute_mode, 1, 1);
4307 if (quotient == 0)
4308 goto fail1;
4309 }
4310 else if (EXACT_POWER_OF_2_OR_ZERO_P (d)
4311 && (rem_flag
4312 ? smod_pow2_cheap (speed, compute_mode)
4313 : sdiv_pow2_cheap (speed, compute_mode))
4314 /* We assume that cheap metric is true if the
4315 optab has an expander for this mode. */
4316 && ((optab_handler ((rem_flag ? smod_optab
4317 : sdiv_optab),
4318 compute_mode)
4319 != CODE_FOR_nothing)
4320 || (optab_handler (sdivmod_optab,
4321 compute_mode)
4322 != CODE_FOR_nothing)))
4323 ;
4324 else if (EXACT_POWER_OF_2_OR_ZERO_P (abs_d))
4325 {
4326 if (rem_flag)
4327 {
4328 remainder = expand_smod_pow2 (compute_mode, op0, d);
4329 if (remainder)
4330 return gen_lowpart (mode, remainder);
4331 }
4332
4333 if (sdiv_pow2_cheap (speed, compute_mode)
4334 && ((optab_handler (sdiv_optab, compute_mode)
4335 != CODE_FOR_nothing)
4336 || (optab_handler (sdivmod_optab, compute_mode)
4337 != CODE_FOR_nothing)))
4338 quotient = expand_divmod (0, TRUNC_DIV_EXPR,
4339 compute_mode, op0,
4340 gen_int_mode (abs_d,
4341 compute_mode),
4342 NULL_RTX, 0);
4343 else
4344 quotient = expand_sdiv_pow2 (compute_mode, op0, abs_d);
4345
4346 /* We have computed OP0 / abs(OP1). If OP1 is negative,
4347 negate the quotient. */
4348 if (d < 0)
4349 {
4350 insn = get_last_insn ();
4351 if (insn != last
4352 && abs_d < ((unsigned HOST_WIDE_INT) 1
4353 << (HOST_BITS_PER_WIDE_INT - 1)))
4354 set_dst_reg_note (insn, REG_EQUAL,
4355 gen_rtx_DIV (compute_mode, op0,
4356 gen_int_mode
4357 (abs_d,
4358 compute_mode)),
4359 quotient);
4360
4361 quotient = expand_unop (compute_mode, neg_optab,
4362 quotient, quotient, 0);
4363 }
4364 }
4365 else if (size <= HOST_BITS_PER_WIDE_INT)
4366 {
4367 choose_multiplier (abs_d, size, size - 1,
4368 &ml, &post_shift, &lgup);
4369 if (ml < (unsigned HOST_WIDE_INT) 1 << (size - 1))
4370 {
4371 rtx t1, t2, t3;
4372
4373 if (post_shift >= BITS_PER_WORD
4374 || size - 1 >= BITS_PER_WORD)
4375 goto fail1;
4376
4377 extra_cost = (shift_cost (speed, compute_mode, post_shift)
4378 + shift_cost (speed, compute_mode, size - 1)
4379 + add_cost (speed, compute_mode));
4380 t1 = expmed_mult_highpart (compute_mode, op0,
4381 GEN_INT (ml), NULL_RTX, 0,
4382 max_cost - extra_cost);
4383 if (t1 == 0)
4384 goto fail1;
4385 t2 = expand_shift
4386 (RSHIFT_EXPR, compute_mode, t1,
4387 post_shift, NULL_RTX, 0);
4388 t3 = expand_shift
4389 (RSHIFT_EXPR, compute_mode, op0,
4390 size - 1, NULL_RTX, 0);
4391 if (d < 0)
4392 quotient
4393 = force_operand (gen_rtx_MINUS (compute_mode,
4394 t3, t2),
4395 tquotient);
4396 else
4397 quotient
4398 = force_operand (gen_rtx_MINUS (compute_mode,
4399 t2, t3),
4400 tquotient);
4401 }
4402 else
4403 {
4404 rtx t1, t2, t3, t4;
4405
4406 if (post_shift >= BITS_PER_WORD
4407 || size - 1 >= BITS_PER_WORD)
4408 goto fail1;
4409
4410 ml |= (~(unsigned HOST_WIDE_INT) 0) << (size - 1);
4411 mlr = gen_int_mode (ml, compute_mode);
4412 extra_cost = (shift_cost (speed, compute_mode, post_shift)
4413 + shift_cost (speed, compute_mode, size - 1)
4414 + 2 * add_cost (speed, compute_mode));
4415 t1 = expmed_mult_highpart (compute_mode, op0, mlr,
4416 NULL_RTX, 0,
4417 max_cost - extra_cost);
4418 if (t1 == 0)
4419 goto fail1;
4420 t2 = force_operand (gen_rtx_PLUS (compute_mode,
4421 t1, op0),
4422 NULL_RTX);
4423 t3 = expand_shift
4424 (RSHIFT_EXPR, compute_mode, t2,
4425 post_shift, NULL_RTX, 0);
4426 t4 = expand_shift
4427 (RSHIFT_EXPR, compute_mode, op0,
4428 size - 1, NULL_RTX, 0);
4429 if (d < 0)
4430 quotient
4431 = force_operand (gen_rtx_MINUS (compute_mode,
4432 t4, t3),
4433 tquotient);
4434 else
4435 quotient
4436 = force_operand (gen_rtx_MINUS (compute_mode,
4437 t3, t4),
4438 tquotient);
4439 }
4440 }
4441 else /* Too wide mode to use tricky code */
4442 break;
4443
4444 insn = get_last_insn ();
4445 if (insn != last)
4446 set_dst_reg_note (insn, REG_EQUAL,
4447 gen_rtx_DIV (compute_mode, op0, op1),
4448 quotient);
4449 }
4450 break;
4451 }
4452 fail1:
4453 delete_insns_since (last);
4454 break;
4455
4456 case FLOOR_DIV_EXPR:
4457 case FLOOR_MOD_EXPR:
4458 /* We will come here only for signed operations. */
4459 if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4460 {
4461 unsigned HOST_WIDE_INT mh, ml;
4462 int pre_shift, lgup, post_shift;
4463 HOST_WIDE_INT d = INTVAL (op1);
4464
4465 if (d > 0)
4466 {
4467 /* We could just as easily deal with negative constants here,
4468 but it does not seem worth the trouble for GCC 2.6. */
4469 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4470 {
4471 pre_shift = floor_log2 (d);
4472 if (rem_flag)
4473 {
4474 remainder = expand_binop (compute_mode, and_optab, op0,
4475 GEN_INT (((HOST_WIDE_INT) 1 << pre_shift) - 1),
4476 remainder, 0, OPTAB_LIB_WIDEN);
4477 if (remainder)
4478 return gen_lowpart (mode, remainder);
4479 }
4480 quotient = expand_shift
4481 (RSHIFT_EXPR, compute_mode, op0,
4482 pre_shift, tquotient, 0);
4483 }
4484 else
4485 {
4486 rtx t1, t2, t3, t4;
4487
4488 mh = choose_multiplier (d, size, size - 1,
4489 &ml, &post_shift, &lgup);
4490 gcc_assert (!mh);
4491
4492 if (post_shift < BITS_PER_WORD
4493 && size - 1 < BITS_PER_WORD)
4494 {
4495 t1 = expand_shift
4496 (RSHIFT_EXPR, compute_mode, op0,
4497 size - 1, NULL_RTX, 0);
4498 t2 = expand_binop (compute_mode, xor_optab, op0, t1,
4499 NULL_RTX, 0, OPTAB_WIDEN);
4500 extra_cost = (shift_cost (speed, compute_mode, post_shift)
4501 + shift_cost (speed, compute_mode, size - 1)
4502 + 2 * add_cost (speed, compute_mode));
4503 t3 = expmed_mult_highpart (compute_mode, t2,
4504 GEN_INT (ml), NULL_RTX, 1,
4505 max_cost - extra_cost);
4506 if (t3 != 0)
4507 {
4508 t4 = expand_shift
4509 (RSHIFT_EXPR, compute_mode, t3,
4510 post_shift, NULL_RTX, 1);
4511 quotient = expand_binop (compute_mode, xor_optab,
4512 t4, t1, tquotient, 0,
4513 OPTAB_WIDEN);
4514 }
4515 }
4516 }
4517 }
4518 else
4519 {
4520 rtx nsign, t1, t2, t3, t4;
4521 t1 = force_operand (gen_rtx_PLUS (compute_mode,
4522 op0, constm1_rtx), NULL_RTX);
4523 t2 = expand_binop (compute_mode, ior_optab, op0, t1, NULL_RTX,
4524 0, OPTAB_WIDEN);
4525 nsign = expand_shift
4526 (RSHIFT_EXPR, compute_mode, t2,
4527 size - 1, NULL_RTX, 0);
4528 t3 = force_operand (gen_rtx_MINUS (compute_mode, t1, nsign),
4529 NULL_RTX);
4530 t4 = expand_divmod (0, TRUNC_DIV_EXPR, compute_mode, t3, op1,
4531 NULL_RTX, 0);
4532 if (t4)
4533 {
4534 rtx t5;
4535 t5 = expand_unop (compute_mode, one_cmpl_optab, nsign,
4536 NULL_RTX, 0);
4537 quotient = force_operand (gen_rtx_PLUS (compute_mode,
4538 t4, t5),
4539 tquotient);
4540 }
4541 }
4542 }
4543
4544 if (quotient != 0)
4545 break;
4546 delete_insns_since (last);
4547
4548 /* Try using an instruction that produces both the quotient and
4549 remainder, using truncation. We can easily compensate the quotient
4550 or remainder to get floor rounding, once we have the remainder.
4551 Notice that we compute also the final remainder value here,
4552 and return the result right away. */
4553 if (target == 0 || GET_MODE (target) != compute_mode)
4554 target = gen_reg_rtx (compute_mode);
4555
4556 if (rem_flag)
4557 {
4558 remainder
4559 = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4560 quotient = gen_reg_rtx (compute_mode);
4561 }
4562 else
4563 {
4564 quotient
4565 = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4566 remainder = gen_reg_rtx (compute_mode);
4567 }
4568
4569 if (expand_twoval_binop (sdivmod_optab, op0, op1,
4570 quotient, remainder, 0))
4571 {
4572 /* This could be computed with a branch-less sequence.
4573 Save that for later. */
4574 rtx tem;
4575 rtx label = gen_label_rtx ();
4576 do_cmp_and_jump (remainder, const0_rtx, EQ, compute_mode, label);
4577 tem = expand_binop (compute_mode, xor_optab, op0, op1,
4578 NULL_RTX, 0, OPTAB_WIDEN);
4579 do_cmp_and_jump (tem, const0_rtx, GE, compute_mode, label);
4580 expand_dec (quotient, const1_rtx);
4581 expand_inc (remainder, op1);
4582 emit_label (label);
4583 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4584 }
4585
4586 /* No luck with division elimination or divmod. Have to do it
4587 by conditionally adjusting op0 *and* the result. */
4588 {
4589 rtx label1, label2, label3, label4, label5;
4590 rtx adjusted_op0;
4591 rtx tem;
4592
4593 quotient = gen_reg_rtx (compute_mode);
4594 adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4595 label1 = gen_label_rtx ();
4596 label2 = gen_label_rtx ();
4597 label3 = gen_label_rtx ();
4598 label4 = gen_label_rtx ();
4599 label5 = gen_label_rtx ();
4600 do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4601 do_cmp_and_jump (adjusted_op0, const0_rtx, LT, compute_mode, label1);
4602 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4603 quotient, 0, OPTAB_LIB_WIDEN);
4604 if (tem != quotient)
4605 emit_move_insn (quotient, tem);
4606 emit_jump_insn (gen_jump (label5));
4607 emit_barrier ();
4608 emit_label (label1);
4609 expand_inc (adjusted_op0, const1_rtx);
4610 emit_jump_insn (gen_jump (label4));
4611 emit_barrier ();
4612 emit_label (label2);
4613 do_cmp_and_jump (adjusted_op0, const0_rtx, GT, compute_mode, label3);
4614 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4615 quotient, 0, OPTAB_LIB_WIDEN);
4616 if (tem != quotient)
4617 emit_move_insn (quotient, tem);
4618 emit_jump_insn (gen_jump (label5));
4619 emit_barrier ();
4620 emit_label (label3);
4621 expand_dec (adjusted_op0, const1_rtx);
4622 emit_label (label4);
4623 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4624 quotient, 0, OPTAB_LIB_WIDEN);
4625 if (tem != quotient)
4626 emit_move_insn (quotient, tem);
4627 expand_dec (quotient, const1_rtx);
4628 emit_label (label5);
4629 }
4630 break;
4631
4632 case CEIL_DIV_EXPR:
4633 case CEIL_MOD_EXPR:
4634 if (unsignedp)
4635 {
4636 if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1)))
4637 {
4638 rtx t1, t2, t3;
4639 unsigned HOST_WIDE_INT d = INTVAL (op1);
4640 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4641 floor_log2 (d), tquotient, 1);
4642 t2 = expand_binop (compute_mode, and_optab, op0,
4643 GEN_INT (d - 1),
4644 NULL_RTX, 1, OPTAB_LIB_WIDEN);
4645 t3 = gen_reg_rtx (compute_mode);
4646 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4647 compute_mode, 1, 1);
4648 if (t3 == 0)
4649 {
4650 rtx lab;
4651 lab = gen_label_rtx ();
4652 do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4653 expand_inc (t1, const1_rtx);
4654 emit_label (lab);
4655 quotient = t1;
4656 }
4657 else
4658 quotient = force_operand (gen_rtx_PLUS (compute_mode,
4659 t1, t3),
4660 tquotient);
4661 break;
4662 }
4663
4664 /* Try using an instruction that produces both the quotient and
4665 remainder, using truncation. We can easily compensate the
4666 quotient or remainder to get ceiling rounding, once we have the
4667 remainder. Notice that we compute also the final remainder
4668 value here, and return the result right away. */
4669 if (target == 0 || GET_MODE (target) != compute_mode)
4670 target = gen_reg_rtx (compute_mode);
4671
4672 if (rem_flag)
4673 {
4674 remainder = (REG_P (target)
4675 ? target : gen_reg_rtx (compute_mode));
4676 quotient = gen_reg_rtx (compute_mode);
4677 }
4678 else
4679 {
4680 quotient = (REG_P (target)
4681 ? target : gen_reg_rtx (compute_mode));
4682 remainder = gen_reg_rtx (compute_mode);
4683 }
4684
4685 if (expand_twoval_binop (udivmod_optab, op0, op1, quotient,
4686 remainder, 1))
4687 {
4688 /* This could be computed with a branch-less sequence.
4689 Save that for later. */
4690 rtx label = gen_label_rtx ();
4691 do_cmp_and_jump (remainder, const0_rtx, EQ,
4692 compute_mode, label);
4693 expand_inc (quotient, const1_rtx);
4694 expand_dec (remainder, op1);
4695 emit_label (label);
4696 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4697 }
4698
4699 /* No luck with division elimination or divmod. Have to do it
4700 by conditionally adjusting op0 *and* the result. */
4701 {
4702 rtx label1, label2;
4703 rtx adjusted_op0, tem;
4704
4705 quotient = gen_reg_rtx (compute_mode);
4706 adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4707 label1 = gen_label_rtx ();
4708 label2 = gen_label_rtx ();
4709 do_cmp_and_jump (adjusted_op0, const0_rtx, NE,
4710 compute_mode, label1);
4711 emit_move_insn (quotient, const0_rtx);
4712 emit_jump_insn (gen_jump (label2));
4713 emit_barrier ();
4714 emit_label (label1);
4715 expand_dec (adjusted_op0, const1_rtx);
4716 tem = expand_binop (compute_mode, udiv_optab, adjusted_op0, op1,
4717 quotient, 1, OPTAB_LIB_WIDEN);
4718 if (tem != quotient)
4719 emit_move_insn (quotient, tem);
4720 expand_inc (quotient, const1_rtx);
4721 emit_label (label2);
4722 }
4723 }
4724 else /* signed */
4725 {
4726 if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4727 && INTVAL (op1) >= 0)
4728 {
4729 /* This is extremely similar to the code for the unsigned case
4730 above. For 2.7 we should merge these variants, but for
4731 2.6.1 I don't want to touch the code for unsigned since that
4732 get used in C. The signed case will only be used by other
4733 languages (Ada). */
4734
4735 rtx t1, t2, t3;
4736 unsigned HOST_WIDE_INT d = INTVAL (op1);
4737 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4738 floor_log2 (d), tquotient, 0);
4739 t2 = expand_binop (compute_mode, and_optab, op0,
4740 GEN_INT (d - 1),
4741 NULL_RTX, 1, OPTAB_LIB_WIDEN);
4742 t3 = gen_reg_rtx (compute_mode);
4743 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4744 compute_mode, 1, 1);
4745 if (t3 == 0)
4746 {
4747 rtx lab;
4748 lab = gen_label_rtx ();
4749 do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4750 expand_inc (t1, const1_rtx);
4751 emit_label (lab);
4752 quotient = t1;
4753 }
4754 else
4755 quotient = force_operand (gen_rtx_PLUS (compute_mode,
4756 t1, t3),
4757 tquotient);
4758 break;
4759 }
4760
4761 /* Try using an instruction that produces both the quotient and
4762 remainder, using truncation. We can easily compensate the
4763 quotient or remainder to get ceiling rounding, once we have the
4764 remainder. Notice that we compute also the final remainder
4765 value here, and return the result right away. */
4766 if (target == 0 || GET_MODE (target) != compute_mode)
4767 target = gen_reg_rtx (compute_mode);
4768 if (rem_flag)
4769 {
4770 remainder= (REG_P (target)
4771 ? target : gen_reg_rtx (compute_mode));
4772 quotient = gen_reg_rtx (compute_mode);
4773 }
4774 else
4775 {
4776 quotient = (REG_P (target)
4777 ? target : gen_reg_rtx (compute_mode));
4778 remainder = gen_reg_rtx (compute_mode);
4779 }
4780
4781 if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient,
4782 remainder, 0))
4783 {
4784 /* This could be computed with a branch-less sequence.
4785 Save that for later. */
4786 rtx tem;
4787 rtx label = gen_label_rtx ();
4788 do_cmp_and_jump (remainder, const0_rtx, EQ,
4789 compute_mode, label);
4790 tem = expand_binop (compute_mode, xor_optab, op0, op1,
4791 NULL_RTX, 0, OPTAB_WIDEN);
4792 do_cmp_and_jump (tem, const0_rtx, LT, compute_mode, label);
4793 expand_inc (quotient, const1_rtx);
4794 expand_dec (remainder, op1);
4795 emit_label (label);
4796 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4797 }
4798
4799 /* No luck with division elimination or divmod. Have to do it
4800 by conditionally adjusting op0 *and* the result. */
4801 {
4802 rtx label1, label2, label3, label4, label5;
4803 rtx adjusted_op0;
4804 rtx tem;
4805
4806 quotient = gen_reg_rtx (compute_mode);
4807 adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4808 label1 = gen_label_rtx ();
4809 label2 = gen_label_rtx ();
4810 label3 = gen_label_rtx ();
4811 label4 = gen_label_rtx ();
4812 label5 = gen_label_rtx ();
4813 do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4814 do_cmp_and_jump (adjusted_op0, const0_rtx, GT,
4815 compute_mode, label1);
4816 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4817 quotient, 0, OPTAB_LIB_WIDEN);
4818 if (tem != quotient)
4819 emit_move_insn (quotient, tem);
4820 emit_jump_insn (gen_jump (label5));
4821 emit_barrier ();
4822 emit_label (label1);
4823 expand_dec (adjusted_op0, const1_rtx);
4824 emit_jump_insn (gen_jump (label4));
4825 emit_barrier ();
4826 emit_label (label2);
4827 do_cmp_and_jump (adjusted_op0, const0_rtx, LT,
4828 compute_mode, label3);
4829 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4830 quotient, 0, OPTAB_LIB_WIDEN);
4831 if (tem != quotient)
4832 emit_move_insn (quotient, tem);
4833 emit_jump_insn (gen_jump (label5));
4834 emit_barrier ();
4835 emit_label (label3);
4836 expand_inc (adjusted_op0, const1_rtx);
4837 emit_label (label4);
4838 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4839 quotient, 0, OPTAB_LIB_WIDEN);
4840 if (tem != quotient)
4841 emit_move_insn (quotient, tem);
4842 expand_inc (quotient, const1_rtx);
4843 emit_label (label5);
4844 }
4845 }
4846 break;
4847
4848 case EXACT_DIV_EXPR:
4849 if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4850 {
4851 HOST_WIDE_INT d = INTVAL (op1);
4852 unsigned HOST_WIDE_INT ml;
4853 int pre_shift;
4854 rtx t1;
4855
4856 pre_shift = floor_log2 (d & -d);
4857 ml = invert_mod2n (d >> pre_shift, size);
4858 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4859 pre_shift, NULL_RTX, unsignedp);
4860 quotient = expand_mult (compute_mode, t1,
4861 gen_int_mode (ml, compute_mode),
4862 NULL_RTX, 1);
4863
4864 insn = get_last_insn ();
4865 set_dst_reg_note (insn, REG_EQUAL,
4866 gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
4867 compute_mode, op0, op1),
4868 quotient);
4869 }
4870 break;
4871
4872 case ROUND_DIV_EXPR:
4873 case ROUND_MOD_EXPR:
4874 if (unsignedp)
4875 {
4876 rtx tem;
4877 rtx label;
4878 label = gen_label_rtx ();
4879 quotient = gen_reg_rtx (compute_mode);
4880 remainder = gen_reg_rtx (compute_mode);
4881 if (expand_twoval_binop (udivmod_optab, op0, op1, quotient, remainder, 1) == 0)
4882 {
4883 rtx tem;
4884 quotient = expand_binop (compute_mode, udiv_optab, op0, op1,
4885 quotient, 1, OPTAB_LIB_WIDEN);
4886 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 1);
4887 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4888 remainder, 1, OPTAB_LIB_WIDEN);
4889 }
4890 tem = plus_constant (compute_mode, op1, -1);
4891 tem = expand_shift (RSHIFT_EXPR, compute_mode, tem, 1, NULL_RTX, 1);
4892 do_cmp_and_jump (remainder, tem, LEU, compute_mode, label);
4893 expand_inc (quotient, const1_rtx);
4894 expand_dec (remainder, op1);
4895 emit_label (label);
4896 }
4897 else
4898 {
4899 rtx abs_rem, abs_op1, tem, mask;
4900 rtx label;
4901 label = gen_label_rtx ();
4902 quotient = gen_reg_rtx (compute_mode);
4903 remainder = gen_reg_rtx (compute_mode);
4904 if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient, remainder, 0) == 0)
4905 {
4906 rtx tem;
4907 quotient = expand_binop (compute_mode, sdiv_optab, op0, op1,
4908 quotient, 0, OPTAB_LIB_WIDEN);
4909 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 0);
4910 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4911 remainder, 0, OPTAB_LIB_WIDEN);
4912 }
4913 abs_rem = expand_abs (compute_mode, remainder, NULL_RTX, 1, 0);
4914 abs_op1 = expand_abs (compute_mode, op1, NULL_RTX, 1, 0);
4915 tem = expand_shift (LSHIFT_EXPR, compute_mode, abs_rem,
4916 1, NULL_RTX, 1);
4917 do_cmp_and_jump (tem, abs_op1, LTU, compute_mode, label);
4918 tem = expand_binop (compute_mode, xor_optab, op0, op1,
4919 NULL_RTX, 0, OPTAB_WIDEN);
4920 mask = expand_shift (RSHIFT_EXPR, compute_mode, tem,
4921 size - 1, NULL_RTX, 0);
4922 tem = expand_binop (compute_mode, xor_optab, mask, const1_rtx,
4923 NULL_RTX, 0, OPTAB_WIDEN);
4924 tem = expand_binop (compute_mode, sub_optab, tem, mask,
4925 NULL_RTX, 0, OPTAB_WIDEN);
4926 expand_inc (quotient, tem);
4927 tem = expand_binop (compute_mode, xor_optab, mask, op1,
4928 NULL_RTX, 0, OPTAB_WIDEN);
4929 tem = expand_binop (compute_mode, sub_optab, tem, mask,
4930 NULL_RTX, 0, OPTAB_WIDEN);
4931 expand_dec (remainder, tem);
4932 emit_label (label);
4933 }
4934 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4935
4936 default:
4937 gcc_unreachable ();
4938 }
4939
4940 if (quotient == 0)
4941 {
4942 if (target && GET_MODE (target) != compute_mode)
4943 target = 0;
4944
4945 if (rem_flag)
4946 {
4947 /* Try to produce the remainder without producing the quotient.
4948 If we seem to have a divmod pattern that does not require widening,
4949 don't try widening here. We should really have a WIDEN argument
4950 to expand_twoval_binop, since what we'd really like to do here is
4951 1) try a mod insn in compute_mode
4952 2) try a divmod insn in compute_mode
4953 3) try a div insn in compute_mode and multiply-subtract to get
4954 remainder
4955 4) try the same things with widening allowed. */
4956 remainder
4957 = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4958 op0, op1, target,
4959 unsignedp,
4960 ((optab_handler (optab2, compute_mode)
4961 != CODE_FOR_nothing)
4962 ? OPTAB_DIRECT : OPTAB_WIDEN));
4963 if (remainder == 0)
4964 {
4965 /* No luck there. Can we do remainder and divide at once
4966 without a library call? */
4967 remainder = gen_reg_rtx (compute_mode);
4968 if (! expand_twoval_binop ((unsignedp
4969 ? udivmod_optab
4970 : sdivmod_optab),
4971 op0, op1,
4972 NULL_RTX, remainder, unsignedp))
4973 remainder = 0;
4974 }
4975
4976 if (remainder)
4977 return gen_lowpart (mode, remainder);
4978 }
4979
4980 /* Produce the quotient. Try a quotient insn, but not a library call.
4981 If we have a divmod in this mode, use it in preference to widening
4982 the div (for this test we assume it will not fail). Note that optab2
4983 is set to the one of the two optabs that the call below will use. */
4984 quotient
4985 = sign_expand_binop (compute_mode, udiv_optab, sdiv_optab,
4986 op0, op1, rem_flag ? NULL_RTX : target,
4987 unsignedp,
4988 ((optab_handler (optab2, compute_mode)
4989 != CODE_FOR_nothing)
4990 ? OPTAB_DIRECT : OPTAB_WIDEN));
4991
4992 if (quotient == 0)
4993 {
4994 /* No luck there. Try a quotient-and-remainder insn,
4995 keeping the quotient alone. */
4996 quotient = gen_reg_rtx (compute_mode);
4997 if (! expand_twoval_binop (unsignedp ? udivmod_optab : sdivmod_optab,
4998 op0, op1,
4999 quotient, NULL_RTX, unsignedp))
5000 {
5001 quotient = 0;
5002 if (! rem_flag)
5003 /* Still no luck. If we are not computing the remainder,
5004 use a library call for the quotient. */
5005 quotient = sign_expand_binop (compute_mode,
5006 udiv_optab, sdiv_optab,
5007 op0, op1, target,
5008 unsignedp, OPTAB_LIB_WIDEN);
5009 }
5010 }
5011 }
5012
5013 if (rem_flag)
5014 {
5015 if (target && GET_MODE (target) != compute_mode)
5016 target = 0;
5017
5018 if (quotient == 0)
5019 {
5020 /* No divide instruction either. Use library for remainder. */
5021 remainder = sign_expand_binop (compute_mode, umod_optab, smod_optab,
5022 op0, op1, target,
5023 unsignedp, OPTAB_LIB_WIDEN);
5024 /* No remainder function. Try a quotient-and-remainder
5025 function, keeping the remainder. */
5026 if (!remainder)
5027 {
5028 remainder = gen_reg_rtx (compute_mode);
5029 if (!expand_twoval_binop_libfunc
5030 (unsignedp ? udivmod_optab : sdivmod_optab,
5031 op0, op1,
5032 NULL_RTX, remainder,
5033 unsignedp ? UMOD : MOD))
5034 remainder = NULL_RTX;
5035 }
5036 }
5037 else
5038 {
5039 /* We divided. Now finish doing X - Y * (X / Y). */
5040 remainder = expand_mult (compute_mode, quotient, op1,
5041 NULL_RTX, unsignedp);
5042 remainder = expand_binop (compute_mode, sub_optab, op0,
5043 remainder, target, unsignedp,
5044 OPTAB_LIB_WIDEN);
5045 }
5046 }
5047
5048 return gen_lowpart (mode, rem_flag ? remainder : quotient);
5049 }
5050 \f
5051 /* Return a tree node with data type TYPE, describing the value of X.
5052 Usually this is an VAR_DECL, if there is no obvious better choice.
5053 X may be an expression, however we only support those expressions
5054 generated by loop.c. */
5055
5056 tree
5057 make_tree (tree type, rtx x)
5058 {
5059 tree t;
5060
5061 switch (GET_CODE (x))
5062 {
5063 case CONST_INT:
5064 {
5065 HOST_WIDE_INT hi = 0;
5066
5067 if (INTVAL (x) < 0
5068 && !(TYPE_UNSIGNED (type)
5069 && (GET_MODE_BITSIZE (TYPE_MODE (type))
5070 < HOST_BITS_PER_WIDE_INT)))
5071 hi = -1;
5072
5073 t = build_int_cst_wide (type, INTVAL (x), hi);
5074
5075 return t;
5076 }
5077
5078 case CONST_DOUBLE:
5079 if (GET_MODE (x) == VOIDmode)
5080 t = build_int_cst_wide (type,
5081 CONST_DOUBLE_LOW (x), CONST_DOUBLE_HIGH (x));
5082 else
5083 {
5084 REAL_VALUE_TYPE d;
5085
5086 REAL_VALUE_FROM_CONST_DOUBLE (d, x);
5087 t = build_real (type, d);
5088 }
5089
5090 return t;
5091
5092 case CONST_VECTOR:
5093 {
5094 int units = CONST_VECTOR_NUNITS (x);
5095 tree itype = TREE_TYPE (type);
5096 tree *elts;
5097 int i;
5098
5099 /* Build a tree with vector elements. */
5100 elts = XALLOCAVEC (tree, units);
5101 for (i = units - 1; i >= 0; --i)
5102 {
5103 rtx elt = CONST_VECTOR_ELT (x, i);
5104 elts[i] = make_tree (itype, elt);
5105 }
5106
5107 return build_vector (type, elts);
5108 }
5109
5110 case PLUS:
5111 return fold_build2 (PLUS_EXPR, type, make_tree (type, XEXP (x, 0)),
5112 make_tree (type, XEXP (x, 1)));
5113
5114 case MINUS:
5115 return fold_build2 (MINUS_EXPR, type, make_tree (type, XEXP (x, 0)),
5116 make_tree (type, XEXP (x, 1)));
5117
5118 case NEG:
5119 return fold_build1 (NEGATE_EXPR, type, make_tree (type, XEXP (x, 0)));
5120
5121 case MULT:
5122 return fold_build2 (MULT_EXPR, type, make_tree (type, XEXP (x, 0)),
5123 make_tree (type, XEXP (x, 1)));
5124
5125 case ASHIFT:
5126 return fold_build2 (LSHIFT_EXPR, type, make_tree (type, XEXP (x, 0)),
5127 make_tree (type, XEXP (x, 1)));
5128
5129 case LSHIFTRT:
5130 t = unsigned_type_for (type);
5131 return fold_convert (type, build2 (RSHIFT_EXPR, t,
5132 make_tree (t, XEXP (x, 0)),
5133 make_tree (type, XEXP (x, 1))));
5134
5135 case ASHIFTRT:
5136 t = signed_type_for (type);
5137 return fold_convert (type, build2 (RSHIFT_EXPR, t,
5138 make_tree (t, XEXP (x, 0)),
5139 make_tree (type, XEXP (x, 1))));
5140
5141 case DIV:
5142 if (TREE_CODE (type) != REAL_TYPE)
5143 t = signed_type_for (type);
5144 else
5145 t = type;
5146
5147 return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5148 make_tree (t, XEXP (x, 0)),
5149 make_tree (t, XEXP (x, 1))));
5150 case UDIV:
5151 t = unsigned_type_for (type);
5152 return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5153 make_tree (t, XEXP (x, 0)),
5154 make_tree (t, XEXP (x, 1))));
5155
5156 case SIGN_EXTEND:
5157 case ZERO_EXTEND:
5158 t = lang_hooks.types.type_for_mode (GET_MODE (XEXP (x, 0)),
5159 GET_CODE (x) == ZERO_EXTEND);
5160 return fold_convert (type, make_tree (t, XEXP (x, 0)));
5161
5162 case CONST:
5163 return make_tree (type, XEXP (x, 0));
5164
5165 case SYMBOL_REF:
5166 t = SYMBOL_REF_DECL (x);
5167 if (t)
5168 return fold_convert (type, build_fold_addr_expr (t));
5169 /* else fall through. */
5170
5171 default:
5172 t = build_decl (RTL_LOCATION (x), VAR_DECL, NULL_TREE, type);
5173
5174 /* If TYPE is a POINTER_TYPE, we might need to convert X from
5175 address mode to pointer mode. */
5176 if (POINTER_TYPE_P (type))
5177 x = convert_memory_address_addr_space
5178 (TYPE_MODE (type), x, TYPE_ADDR_SPACE (TREE_TYPE (type)));
5179
5180 /* Note that we do *not* use SET_DECL_RTL here, because we do not
5181 want set_decl_rtl to go adjusting REG_ATTRS for this temporary. */
5182 t->decl_with_rtl.rtl = x;
5183
5184 return t;
5185 }
5186 }
5187 \f
5188 /* Compute the logical-and of OP0 and OP1, storing it in TARGET
5189 and returning TARGET.
5190
5191 If TARGET is 0, a pseudo-register or constant is returned. */
5192
5193 rtx
5194 expand_and (enum machine_mode mode, rtx op0, rtx op1, rtx target)
5195 {
5196 rtx tem = 0;
5197
5198 if (GET_MODE (op0) == VOIDmode && GET_MODE (op1) == VOIDmode)
5199 tem = simplify_binary_operation (AND, mode, op0, op1);
5200 if (tem == 0)
5201 tem = expand_binop (mode, and_optab, op0, op1, target, 0, OPTAB_LIB_WIDEN);
5202
5203 if (target == 0)
5204 target = tem;
5205 else if (tem != target)
5206 emit_move_insn (target, tem);
5207 return target;
5208 }
5209
5210 /* Helper function for emit_store_flag. */
5211 static rtx
5212 emit_cstore (rtx target, enum insn_code icode, enum rtx_code code,
5213 enum machine_mode mode, enum machine_mode compare_mode,
5214 int unsignedp, rtx x, rtx y, int normalizep,
5215 enum machine_mode target_mode)
5216 {
5217 struct expand_operand ops[4];
5218 rtx op0, last, comparison, subtarget;
5219 enum machine_mode result_mode = insn_data[(int) icode].operand[0].mode;
5220
5221 last = get_last_insn ();
5222 x = prepare_operand (icode, x, 2, mode, compare_mode, unsignedp);
5223 y = prepare_operand (icode, y, 3, mode, compare_mode, unsignedp);
5224 if (!x || !y)
5225 {
5226 delete_insns_since (last);
5227 return NULL_RTX;
5228 }
5229
5230 if (target_mode == VOIDmode)
5231 target_mode = result_mode;
5232 if (!target)
5233 target = gen_reg_rtx (target_mode);
5234
5235 comparison = gen_rtx_fmt_ee (code, result_mode, x, y);
5236
5237 create_output_operand (&ops[0], optimize ? NULL_RTX : target, result_mode);
5238 create_fixed_operand (&ops[1], comparison);
5239 create_fixed_operand (&ops[2], x);
5240 create_fixed_operand (&ops[3], y);
5241 if (!maybe_expand_insn (icode, 4, ops))
5242 {
5243 delete_insns_since (last);
5244 return NULL_RTX;
5245 }
5246 subtarget = ops[0].value;
5247
5248 /* If we are converting to a wider mode, first convert to
5249 TARGET_MODE, then normalize. This produces better combining
5250 opportunities on machines that have a SIGN_EXTRACT when we are
5251 testing a single bit. This mostly benefits the 68k.
5252
5253 If STORE_FLAG_VALUE does not have the sign bit set when
5254 interpreted in MODE, we can do this conversion as unsigned, which
5255 is usually more efficient. */
5256 if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (result_mode))
5257 {
5258 convert_move (target, subtarget,
5259 val_signbit_known_clear_p (result_mode,
5260 STORE_FLAG_VALUE));
5261 op0 = target;
5262 result_mode = target_mode;
5263 }
5264 else
5265 op0 = subtarget;
5266
5267 /* If we want to keep subexpressions around, don't reuse our last
5268 target. */
5269 if (optimize)
5270 subtarget = 0;
5271
5272 /* Now normalize to the proper value in MODE. Sometimes we don't
5273 have to do anything. */
5274 if (normalizep == 0 || normalizep == STORE_FLAG_VALUE)
5275 ;
5276 /* STORE_FLAG_VALUE might be the most negative number, so write
5277 the comparison this way to avoid a compiler-time warning. */
5278 else if (- normalizep == STORE_FLAG_VALUE)
5279 op0 = expand_unop (result_mode, neg_optab, op0, subtarget, 0);
5280
5281 /* We don't want to use STORE_FLAG_VALUE < 0 below since this makes
5282 it hard to use a value of just the sign bit due to ANSI integer
5283 constant typing rules. */
5284 else if (val_signbit_known_set_p (result_mode, STORE_FLAG_VALUE))
5285 op0 = expand_shift (RSHIFT_EXPR, result_mode, op0,
5286 GET_MODE_BITSIZE (result_mode) - 1, subtarget,
5287 normalizep == 1);
5288 else
5289 {
5290 gcc_assert (STORE_FLAG_VALUE & 1);
5291
5292 op0 = expand_and (result_mode, op0, const1_rtx, subtarget);
5293 if (normalizep == -1)
5294 op0 = expand_unop (result_mode, neg_optab, op0, op0, 0);
5295 }
5296
5297 /* If we were converting to a smaller mode, do the conversion now. */
5298 if (target_mode != result_mode)
5299 {
5300 convert_move (target, op0, 0);
5301 return target;
5302 }
5303 else
5304 return op0;
5305 }
5306
5307
5308 /* A subroutine of emit_store_flag only including "tricks" that do not
5309 need a recursive call. These are kept separate to avoid infinite
5310 loops. */
5311
5312 static rtx
5313 emit_store_flag_1 (rtx target, enum rtx_code code, rtx op0, rtx op1,
5314 enum machine_mode mode, int unsignedp, int normalizep,
5315 enum machine_mode target_mode)
5316 {
5317 rtx subtarget;
5318 enum insn_code icode;
5319 enum machine_mode compare_mode;
5320 enum mode_class mclass;
5321 enum rtx_code scode;
5322 rtx tem;
5323
5324 if (unsignedp)
5325 code = unsigned_condition (code);
5326 scode = swap_condition (code);
5327
5328 /* If one operand is constant, make it the second one. Only do this
5329 if the other operand is not constant as well. */
5330
5331 if (swap_commutative_operands_p (op0, op1))
5332 {
5333 tem = op0;
5334 op0 = op1;
5335 op1 = tem;
5336 code = swap_condition (code);
5337 }
5338
5339 if (mode == VOIDmode)
5340 mode = GET_MODE (op0);
5341
5342 /* For some comparisons with 1 and -1, we can convert this to
5343 comparisons with zero. This will often produce more opportunities for
5344 store-flag insns. */
5345
5346 switch (code)
5347 {
5348 case LT:
5349 if (op1 == const1_rtx)
5350 op1 = const0_rtx, code = LE;
5351 break;
5352 case LE:
5353 if (op1 == constm1_rtx)
5354 op1 = const0_rtx, code = LT;
5355 break;
5356 case GE:
5357 if (op1 == const1_rtx)
5358 op1 = const0_rtx, code = GT;
5359 break;
5360 case GT:
5361 if (op1 == constm1_rtx)
5362 op1 = const0_rtx, code = GE;
5363 break;
5364 case GEU:
5365 if (op1 == const1_rtx)
5366 op1 = const0_rtx, code = NE;
5367 break;
5368 case LTU:
5369 if (op1 == const1_rtx)
5370 op1 = const0_rtx, code = EQ;
5371 break;
5372 default:
5373 break;
5374 }
5375
5376 /* If we are comparing a double-word integer with zero or -1, we can
5377 convert the comparison into one involving a single word. */
5378 if (GET_MODE_BITSIZE (mode) == BITS_PER_WORD * 2
5379 && GET_MODE_CLASS (mode) == MODE_INT
5380 && (!MEM_P (op0) || ! MEM_VOLATILE_P (op0)))
5381 {
5382 if ((code == EQ || code == NE)
5383 && (op1 == const0_rtx || op1 == constm1_rtx))
5384 {
5385 rtx op00, op01;
5386
5387 /* Do a logical OR or AND of the two words and compare the
5388 result. */
5389 op00 = simplify_gen_subreg (word_mode, op0, mode, 0);
5390 op01 = simplify_gen_subreg (word_mode, op0, mode, UNITS_PER_WORD);
5391 tem = expand_binop (word_mode,
5392 op1 == const0_rtx ? ior_optab : and_optab,
5393 op00, op01, NULL_RTX, unsignedp,
5394 OPTAB_DIRECT);
5395
5396 if (tem != 0)
5397 tem = emit_store_flag (NULL_RTX, code, tem, op1, word_mode,
5398 unsignedp, normalizep);
5399 }
5400 else if ((code == LT || code == GE) && op1 == const0_rtx)
5401 {
5402 rtx op0h;
5403
5404 /* If testing the sign bit, can just test on high word. */
5405 op0h = simplify_gen_subreg (word_mode, op0, mode,
5406 subreg_highpart_offset (word_mode,
5407 mode));
5408 tem = emit_store_flag (NULL_RTX, code, op0h, op1, word_mode,
5409 unsignedp, normalizep);
5410 }
5411 else
5412 tem = NULL_RTX;
5413
5414 if (tem)
5415 {
5416 if (target_mode == VOIDmode || GET_MODE (tem) == target_mode)
5417 return tem;
5418 if (!target)
5419 target = gen_reg_rtx (target_mode);
5420
5421 convert_move (target, tem,
5422 !val_signbit_known_set_p (word_mode,
5423 (normalizep ? normalizep
5424 : STORE_FLAG_VALUE)));
5425 return target;
5426 }
5427 }
5428
5429 /* If this is A < 0 or A >= 0, we can do this by taking the ones
5430 complement of A (for GE) and shifting the sign bit to the low bit. */
5431 if (op1 == const0_rtx && (code == LT || code == GE)
5432 && GET_MODE_CLASS (mode) == MODE_INT
5433 && (normalizep || STORE_FLAG_VALUE == 1
5434 || val_signbit_p (mode, STORE_FLAG_VALUE)))
5435 {
5436 subtarget = target;
5437
5438 if (!target)
5439 target_mode = mode;
5440
5441 /* If the result is to be wider than OP0, it is best to convert it
5442 first. If it is to be narrower, it is *incorrect* to convert it
5443 first. */
5444 else if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (mode))
5445 {
5446 op0 = convert_modes (target_mode, mode, op0, 0);
5447 mode = target_mode;
5448 }
5449
5450 if (target_mode != mode)
5451 subtarget = 0;
5452
5453 if (code == GE)
5454 op0 = expand_unop (mode, one_cmpl_optab, op0,
5455 ((STORE_FLAG_VALUE == 1 || normalizep)
5456 ? 0 : subtarget), 0);
5457
5458 if (STORE_FLAG_VALUE == 1 || normalizep)
5459 /* If we are supposed to produce a 0/1 value, we want to do
5460 a logical shift from the sign bit to the low-order bit; for
5461 a -1/0 value, we do an arithmetic shift. */
5462 op0 = expand_shift (RSHIFT_EXPR, mode, op0,
5463 GET_MODE_BITSIZE (mode) - 1,
5464 subtarget, normalizep != -1);
5465
5466 if (mode != target_mode)
5467 op0 = convert_modes (target_mode, mode, op0, 0);
5468
5469 return op0;
5470 }
5471
5472 mclass = GET_MODE_CLASS (mode);
5473 for (compare_mode = mode; compare_mode != VOIDmode;
5474 compare_mode = GET_MODE_WIDER_MODE (compare_mode))
5475 {
5476 enum machine_mode optab_mode = mclass == MODE_CC ? CCmode : compare_mode;
5477 icode = optab_handler (cstore_optab, optab_mode);
5478 if (icode != CODE_FOR_nothing)
5479 {
5480 do_pending_stack_adjust ();
5481 tem = emit_cstore (target, icode, code, mode, compare_mode,
5482 unsignedp, op0, op1, normalizep, target_mode);
5483 if (tem)
5484 return tem;
5485
5486 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5487 {
5488 tem = emit_cstore (target, icode, scode, mode, compare_mode,
5489 unsignedp, op1, op0, normalizep, target_mode);
5490 if (tem)
5491 return tem;
5492 }
5493 break;
5494 }
5495 }
5496
5497 return 0;
5498 }
5499
5500 /* Emit a store-flags instruction for comparison CODE on OP0 and OP1
5501 and storing in TARGET. Normally return TARGET.
5502 Return 0 if that cannot be done.
5503
5504 MODE is the mode to use for OP0 and OP1 should they be CONST_INTs. If
5505 it is VOIDmode, they cannot both be CONST_INT.
5506
5507 UNSIGNEDP is for the case where we have to widen the operands
5508 to perform the operation. It says to use zero-extension.
5509
5510 NORMALIZEP is 1 if we should convert the result to be either zero
5511 or one. Normalize is -1 if we should convert the result to be
5512 either zero or -1. If NORMALIZEP is zero, the result will be left
5513 "raw" out of the scc insn. */
5514
5515 rtx
5516 emit_store_flag (rtx target, enum rtx_code code, rtx op0, rtx op1,
5517 enum machine_mode mode, int unsignedp, int normalizep)
5518 {
5519 enum machine_mode target_mode = target ? GET_MODE (target) : VOIDmode;
5520 enum rtx_code rcode;
5521 rtx subtarget;
5522 rtx tem, last, trueval;
5523
5524 tem = emit_store_flag_1 (target, code, op0, op1, mode, unsignedp, normalizep,
5525 target_mode);
5526 if (tem)
5527 return tem;
5528
5529 /* If we reached here, we can't do this with a scc insn, however there
5530 are some comparisons that can be done in other ways. Don't do any
5531 of these cases if branches are very cheap. */
5532 if (BRANCH_COST (optimize_insn_for_speed_p (), false) == 0)
5533 return 0;
5534
5535 /* See what we need to return. We can only return a 1, -1, or the
5536 sign bit. */
5537
5538 if (normalizep == 0)
5539 {
5540 if (STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1)
5541 normalizep = STORE_FLAG_VALUE;
5542
5543 else if (val_signbit_p (mode, STORE_FLAG_VALUE))
5544 ;
5545 else
5546 return 0;
5547 }
5548
5549 last = get_last_insn ();
5550
5551 /* If optimizing, use different pseudo registers for each insn, instead
5552 of reusing the same pseudo. This leads to better CSE, but slows
5553 down the compiler, since there are more pseudos */
5554 subtarget = (!optimize
5555 && (target_mode == mode)) ? target : NULL_RTX;
5556 trueval = GEN_INT (normalizep ? normalizep : STORE_FLAG_VALUE);
5557
5558 /* For floating-point comparisons, try the reverse comparison or try
5559 changing the "orderedness" of the comparison. */
5560 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5561 {
5562 enum rtx_code first_code;
5563 bool and_them;
5564
5565 rcode = reverse_condition_maybe_unordered (code);
5566 if (can_compare_p (rcode, mode, ccp_store_flag)
5567 && (code == ORDERED || code == UNORDERED
5568 || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
5569 || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
5570 {
5571 int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5572 || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5573
5574 /* For the reverse comparison, use either an addition or a XOR. */
5575 if (want_add
5576 && rtx_cost (GEN_INT (normalizep), PLUS, 1,
5577 optimize_insn_for_speed_p ()) == 0)
5578 {
5579 tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5580 STORE_FLAG_VALUE, target_mode);
5581 if (tem)
5582 return expand_binop (target_mode, add_optab, tem,
5583 GEN_INT (normalizep),
5584 target, 0, OPTAB_WIDEN);
5585 }
5586 else if (!want_add
5587 && rtx_cost (trueval, XOR, 1,
5588 optimize_insn_for_speed_p ()) == 0)
5589 {
5590 tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5591 normalizep, target_mode);
5592 if (tem)
5593 return expand_binop (target_mode, xor_optab, tem, trueval,
5594 target, INTVAL (trueval) >= 0, OPTAB_WIDEN);
5595 }
5596 }
5597
5598 delete_insns_since (last);
5599
5600 /* Cannot split ORDERED and UNORDERED, only try the above trick. */
5601 if (code == ORDERED || code == UNORDERED)
5602 return 0;
5603
5604 and_them = split_comparison (code, mode, &first_code, &code);
5605
5606 /* If there are no NaNs, the first comparison should always fall through.
5607 Effectively change the comparison to the other one. */
5608 if (!HONOR_NANS (mode))
5609 {
5610 gcc_assert (first_code == (and_them ? ORDERED : UNORDERED));
5611 return emit_store_flag_1 (target, code, op0, op1, mode, 0, normalizep,
5612 target_mode);
5613 }
5614
5615 #ifdef HAVE_conditional_move
5616 /* Try using a setcc instruction for ORDERED/UNORDERED, followed by a
5617 conditional move. */
5618 tem = emit_store_flag_1 (subtarget, first_code, op0, op1, mode, 0,
5619 normalizep, target_mode);
5620 if (tem == 0)
5621 return 0;
5622
5623 if (and_them)
5624 tem = emit_conditional_move (target, code, op0, op1, mode,
5625 tem, const0_rtx, GET_MODE (tem), 0);
5626 else
5627 tem = emit_conditional_move (target, code, op0, op1, mode,
5628 trueval, tem, GET_MODE (tem), 0);
5629
5630 if (tem == 0)
5631 delete_insns_since (last);
5632 return tem;
5633 #else
5634 return 0;
5635 #endif
5636 }
5637
5638 /* The remaining tricks only apply to integer comparisons. */
5639
5640 if (GET_MODE_CLASS (mode) != MODE_INT)
5641 return 0;
5642
5643 /* If this is an equality comparison of integers, we can try to exclusive-or
5644 (or subtract) the two operands and use a recursive call to try the
5645 comparison with zero. Don't do any of these cases if branches are
5646 very cheap. */
5647
5648 if ((code == EQ || code == NE) && op1 != const0_rtx)
5649 {
5650 tem = expand_binop (mode, xor_optab, op0, op1, subtarget, 1,
5651 OPTAB_WIDEN);
5652
5653 if (tem == 0)
5654 tem = expand_binop (mode, sub_optab, op0, op1, subtarget, 1,
5655 OPTAB_WIDEN);
5656 if (tem != 0)
5657 tem = emit_store_flag (target, code, tem, const0_rtx,
5658 mode, unsignedp, normalizep);
5659 if (tem != 0)
5660 return tem;
5661
5662 delete_insns_since (last);
5663 }
5664
5665 /* For integer comparisons, try the reverse comparison. However, for
5666 small X and if we'd have anyway to extend, implementing "X != 0"
5667 as "-(int)X >> 31" is still cheaper than inverting "(int)X == 0". */
5668 rcode = reverse_condition (code);
5669 if (can_compare_p (rcode, mode, ccp_store_flag)
5670 && ! (optab_handler (cstore_optab, mode) == CODE_FOR_nothing
5671 && code == NE
5672 && GET_MODE_SIZE (mode) < UNITS_PER_WORD
5673 && op1 == const0_rtx))
5674 {
5675 int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5676 || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5677
5678 /* Again, for the reverse comparison, use either an addition or a XOR. */
5679 if (want_add
5680 && rtx_cost (GEN_INT (normalizep), PLUS, 1,
5681 optimize_insn_for_speed_p ()) == 0)
5682 {
5683 tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5684 STORE_FLAG_VALUE, target_mode);
5685 if (tem != 0)
5686 tem = expand_binop (target_mode, add_optab, tem,
5687 GEN_INT (normalizep), target, 0, OPTAB_WIDEN);
5688 }
5689 else if (!want_add
5690 && rtx_cost (trueval, XOR, 1,
5691 optimize_insn_for_speed_p ()) == 0)
5692 {
5693 tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5694 normalizep, target_mode);
5695 if (tem != 0)
5696 tem = expand_binop (target_mode, xor_optab, tem, trueval, target,
5697 INTVAL (trueval) >= 0, OPTAB_WIDEN);
5698 }
5699
5700 if (tem != 0)
5701 return tem;
5702 delete_insns_since (last);
5703 }
5704
5705 /* Some other cases we can do are EQ, NE, LE, and GT comparisons with
5706 the constant zero. Reject all other comparisons at this point. Only
5707 do LE and GT if branches are expensive since they are expensive on
5708 2-operand machines. */
5709
5710 if (op1 != const0_rtx
5711 || (code != EQ && code != NE
5712 && (BRANCH_COST (optimize_insn_for_speed_p (),
5713 false) <= 1 || (code != LE && code != GT))))
5714 return 0;
5715
5716 /* Try to put the result of the comparison in the sign bit. Assume we can't
5717 do the necessary operation below. */
5718
5719 tem = 0;
5720
5721 /* To see if A <= 0, compute (A | (A - 1)). A <= 0 iff that result has
5722 the sign bit set. */
5723
5724 if (code == LE)
5725 {
5726 /* This is destructive, so SUBTARGET can't be OP0. */
5727 if (rtx_equal_p (subtarget, op0))
5728 subtarget = 0;
5729
5730 tem = expand_binop (mode, sub_optab, op0, const1_rtx, subtarget, 0,
5731 OPTAB_WIDEN);
5732 if (tem)
5733 tem = expand_binop (mode, ior_optab, op0, tem, subtarget, 0,
5734 OPTAB_WIDEN);
5735 }
5736
5737 /* To see if A > 0, compute (((signed) A) << BITS) - A, where BITS is the
5738 number of bits in the mode of OP0, minus one. */
5739
5740 if (code == GT)
5741 {
5742 if (rtx_equal_p (subtarget, op0))
5743 subtarget = 0;
5744
5745 tem = expand_shift (RSHIFT_EXPR, mode, op0,
5746 GET_MODE_BITSIZE (mode) - 1,
5747 subtarget, 0);
5748 tem = expand_binop (mode, sub_optab, tem, op0, subtarget, 0,
5749 OPTAB_WIDEN);
5750 }
5751
5752 if (code == EQ || code == NE)
5753 {
5754 /* For EQ or NE, one way to do the comparison is to apply an operation
5755 that converts the operand into a positive number if it is nonzero
5756 or zero if it was originally zero. Then, for EQ, we subtract 1 and
5757 for NE we negate. This puts the result in the sign bit. Then we
5758 normalize with a shift, if needed.
5759
5760 Two operations that can do the above actions are ABS and FFS, so try
5761 them. If that doesn't work, and MODE is smaller than a full word,
5762 we can use zero-extension to the wider mode (an unsigned conversion)
5763 as the operation. */
5764
5765 /* Note that ABS doesn't yield a positive number for INT_MIN, but
5766 that is compensated by the subsequent overflow when subtracting
5767 one / negating. */
5768
5769 if (optab_handler (abs_optab, mode) != CODE_FOR_nothing)
5770 tem = expand_unop (mode, abs_optab, op0, subtarget, 1);
5771 else if (optab_handler (ffs_optab, mode) != CODE_FOR_nothing)
5772 tem = expand_unop (mode, ffs_optab, op0, subtarget, 1);
5773 else if (GET_MODE_SIZE (mode) < UNITS_PER_WORD)
5774 {
5775 tem = convert_modes (word_mode, mode, op0, 1);
5776 mode = word_mode;
5777 }
5778
5779 if (tem != 0)
5780 {
5781 if (code == EQ)
5782 tem = expand_binop (mode, sub_optab, tem, const1_rtx, subtarget,
5783 0, OPTAB_WIDEN);
5784 else
5785 tem = expand_unop (mode, neg_optab, tem, subtarget, 0);
5786 }
5787
5788 /* If we couldn't do it that way, for NE we can "or" the two's complement
5789 of the value with itself. For EQ, we take the one's complement of
5790 that "or", which is an extra insn, so we only handle EQ if branches
5791 are expensive. */
5792
5793 if (tem == 0
5794 && (code == NE
5795 || BRANCH_COST (optimize_insn_for_speed_p (),
5796 false) > 1))
5797 {
5798 if (rtx_equal_p (subtarget, op0))
5799 subtarget = 0;
5800
5801 tem = expand_unop (mode, neg_optab, op0, subtarget, 0);
5802 tem = expand_binop (mode, ior_optab, tem, op0, subtarget, 0,
5803 OPTAB_WIDEN);
5804
5805 if (tem && code == EQ)
5806 tem = expand_unop (mode, one_cmpl_optab, tem, subtarget, 0);
5807 }
5808 }
5809
5810 if (tem && normalizep)
5811 tem = expand_shift (RSHIFT_EXPR, mode, tem,
5812 GET_MODE_BITSIZE (mode) - 1,
5813 subtarget, normalizep == 1);
5814
5815 if (tem)
5816 {
5817 if (!target)
5818 ;
5819 else if (GET_MODE (tem) != target_mode)
5820 {
5821 convert_move (target, tem, 0);
5822 tem = target;
5823 }
5824 else if (!subtarget)
5825 {
5826 emit_move_insn (target, tem);
5827 tem = target;
5828 }
5829 }
5830 else
5831 delete_insns_since (last);
5832
5833 return tem;
5834 }
5835
5836 /* Like emit_store_flag, but always succeeds. */
5837
5838 rtx
5839 emit_store_flag_force (rtx target, enum rtx_code code, rtx op0, rtx op1,
5840 enum machine_mode mode, int unsignedp, int normalizep)
5841 {
5842 rtx tem, label;
5843 rtx trueval, falseval;
5844
5845 /* First see if emit_store_flag can do the job. */
5846 tem = emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep);
5847 if (tem != 0)
5848 return tem;
5849
5850 if (!target)
5851 target = gen_reg_rtx (word_mode);
5852
5853 /* If this failed, we have to do this with set/compare/jump/set code.
5854 For foo != 0, if foo is in OP0, just replace it with 1 if nonzero. */
5855 trueval = normalizep ? GEN_INT (normalizep) : const1_rtx;
5856 if (code == NE
5857 && GET_MODE_CLASS (mode) == MODE_INT
5858 && REG_P (target)
5859 && op0 == target
5860 && op1 == const0_rtx)
5861 {
5862 label = gen_label_rtx ();
5863 do_compare_rtx_and_jump (target, const0_rtx, EQ, unsignedp,
5864 mode, NULL_RTX, NULL_RTX, label, -1);
5865 emit_move_insn (target, trueval);
5866 emit_label (label);
5867 return target;
5868 }
5869
5870 if (!REG_P (target)
5871 || reg_mentioned_p (target, op0) || reg_mentioned_p (target, op1))
5872 target = gen_reg_rtx (GET_MODE (target));
5873
5874 /* Jump in the right direction if the target cannot implement CODE
5875 but can jump on its reverse condition. */
5876 falseval = const0_rtx;
5877 if (! can_compare_p (code, mode, ccp_jump)
5878 && (! FLOAT_MODE_P (mode)
5879 || code == ORDERED || code == UNORDERED
5880 || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
5881 || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
5882 {
5883 enum rtx_code rcode;
5884 if (FLOAT_MODE_P (mode))
5885 rcode = reverse_condition_maybe_unordered (code);
5886 else
5887 rcode = reverse_condition (code);
5888
5889 /* Canonicalize to UNORDERED for the libcall. */
5890 if (can_compare_p (rcode, mode, ccp_jump)
5891 || (code == ORDERED && ! can_compare_p (ORDERED, mode, ccp_jump)))
5892 {
5893 falseval = trueval;
5894 trueval = const0_rtx;
5895 code = rcode;
5896 }
5897 }
5898
5899 emit_move_insn (target, trueval);
5900 label = gen_label_rtx ();
5901 do_compare_rtx_and_jump (op0, op1, code, unsignedp, mode, NULL_RTX,
5902 NULL_RTX, label, -1);
5903
5904 emit_move_insn (target, falseval);
5905 emit_label (label);
5906
5907 return target;
5908 }
5909 \f
5910 /* Perform possibly multi-word comparison and conditional jump to LABEL
5911 if ARG1 OP ARG2 true where ARG1 and ARG2 are of mode MODE. This is
5912 now a thin wrapper around do_compare_rtx_and_jump. */
5913
5914 static void
5915 do_cmp_and_jump (rtx arg1, rtx arg2, enum rtx_code op, enum machine_mode mode,
5916 rtx label)
5917 {
5918 int unsignedp = (op == LTU || op == LEU || op == GTU || op == GEU);
5919 do_compare_rtx_and_jump (arg1, arg2, op, unsignedp, mode,
5920 NULL_RTX, NULL_RTX, label, -1);
5921 }