expmed.c (store_bit_field): Assert that BITREGION_START is a multiple of a unit befor...
[gcc.git] / gcc / expmed.c
1 /* Medium-level subroutines: convert bit-field store and extract
2 and shifts, multiplies and divides to rtl instructions.
3 Copyright (C) 1987, 1988, 1989, 1992, 1993, 1994, 1995, 1996, 1997, 1998,
4 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010,
5 2011
6 Free Software Foundation, Inc.
7
8 This file is part of GCC.
9
10 GCC is free software; you can redistribute it and/or modify it under
11 the terms of the GNU General Public License as published by the Free
12 Software Foundation; either version 3, or (at your option) any later
13 version.
14
15 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
16 WARRANTY; without even the implied warranty of MERCHANTABILITY or
17 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 for more details.
19
20 You should have received a copy of the GNU General Public License
21 along with GCC; see the file COPYING3. If not see
22 <http://www.gnu.org/licenses/>. */
23
24
25 #include "config.h"
26 #include "system.h"
27 #include "coretypes.h"
28 #include "tm.h"
29 #include "diagnostic-core.h"
30 #include "rtl.h"
31 #include "tree.h"
32 #include "tm_p.h"
33 #include "flags.h"
34 #include "insn-config.h"
35 #include "expr.h"
36 #include "optabs.h"
37 #include "recog.h"
38 #include "langhooks.h"
39 #include "df.h"
40 #include "target.h"
41 #include "expmed.h"
42
43 struct target_expmed default_target_expmed;
44 #if SWITCHABLE_TARGET
45 struct target_expmed *this_target_expmed = &default_target_expmed;
46 #endif
47
48 static void store_fixed_bit_field (rtx, unsigned HOST_WIDE_INT,
49 unsigned HOST_WIDE_INT,
50 unsigned HOST_WIDE_INT,
51 unsigned HOST_WIDE_INT,
52 unsigned HOST_WIDE_INT,
53 rtx);
54 static void store_split_bit_field (rtx, unsigned HOST_WIDE_INT,
55 unsigned HOST_WIDE_INT,
56 unsigned HOST_WIDE_INT,
57 unsigned HOST_WIDE_INT,
58 rtx);
59 static rtx extract_fixed_bit_field (enum machine_mode, rtx,
60 unsigned HOST_WIDE_INT,
61 unsigned HOST_WIDE_INT,
62 unsigned HOST_WIDE_INT, rtx, int, bool);
63 static rtx mask_rtx (enum machine_mode, int, int, int);
64 static rtx lshift_value (enum machine_mode, rtx, int, int);
65 static rtx extract_split_bit_field (rtx, unsigned HOST_WIDE_INT,
66 unsigned HOST_WIDE_INT, int);
67 static void do_cmp_and_jump (rtx, rtx, enum rtx_code, enum machine_mode, rtx);
68 static rtx expand_smod_pow2 (enum machine_mode, rtx, HOST_WIDE_INT);
69 static rtx expand_sdiv_pow2 (enum machine_mode, rtx, HOST_WIDE_INT);
70
71 /* Test whether a value is zero of a power of two. */
72 #define EXACT_POWER_OF_2_OR_ZERO_P(x) (((x) & ((x) - 1)) == 0)
73
74 #ifndef SLOW_UNALIGNED_ACCESS
75 #define SLOW_UNALIGNED_ACCESS(MODE, ALIGN) STRICT_ALIGNMENT
76 #endif
77
78
79 /* Reduce conditional compilation elsewhere. */
80 #ifndef HAVE_insv
81 #define HAVE_insv 0
82 #define CODE_FOR_insv CODE_FOR_nothing
83 #define gen_insv(a,b,c,d) NULL_RTX
84 #endif
85 #ifndef HAVE_extv
86 #define HAVE_extv 0
87 #define CODE_FOR_extv CODE_FOR_nothing
88 #define gen_extv(a,b,c,d) NULL_RTX
89 #endif
90 #ifndef HAVE_extzv
91 #define HAVE_extzv 0
92 #define CODE_FOR_extzv CODE_FOR_nothing
93 #define gen_extzv(a,b,c,d) NULL_RTX
94 #endif
95
96 void
97 init_expmed (void)
98 {
99 struct
100 {
101 struct rtx_def reg; rtunion reg_fld[2];
102 struct rtx_def plus; rtunion plus_fld1;
103 struct rtx_def neg;
104 struct rtx_def mult; rtunion mult_fld1;
105 struct rtx_def sdiv; rtunion sdiv_fld1;
106 struct rtx_def udiv; rtunion udiv_fld1;
107 struct rtx_def zext;
108 struct rtx_def sdiv_32; rtunion sdiv_32_fld1;
109 struct rtx_def smod_32; rtunion smod_32_fld1;
110 struct rtx_def wide_mult; rtunion wide_mult_fld1;
111 struct rtx_def wide_lshr; rtunion wide_lshr_fld1;
112 struct rtx_def wide_trunc;
113 struct rtx_def shift; rtunion shift_fld1;
114 struct rtx_def shift_mult; rtunion shift_mult_fld1;
115 struct rtx_def shift_add; rtunion shift_add_fld1;
116 struct rtx_def shift_sub0; rtunion shift_sub0_fld1;
117 struct rtx_def shift_sub1; rtunion shift_sub1_fld1;
118 } all;
119
120 rtx pow2[MAX_BITS_PER_WORD];
121 rtx cint[MAX_BITS_PER_WORD];
122 int m, n;
123 enum machine_mode mode, wider_mode;
124 int speed;
125
126
127 for (m = 1; m < MAX_BITS_PER_WORD; m++)
128 {
129 pow2[m] = GEN_INT ((HOST_WIDE_INT) 1 << m);
130 cint[m] = GEN_INT (m);
131 }
132 memset (&all, 0, sizeof all);
133
134 PUT_CODE (&all.reg, REG);
135 /* Avoid using hard regs in ways which may be unsupported. */
136 SET_REGNO (&all.reg, LAST_VIRTUAL_REGISTER + 1);
137
138 PUT_CODE (&all.plus, PLUS);
139 XEXP (&all.plus, 0) = &all.reg;
140 XEXP (&all.plus, 1) = &all.reg;
141
142 PUT_CODE (&all.neg, NEG);
143 XEXP (&all.neg, 0) = &all.reg;
144
145 PUT_CODE (&all.mult, MULT);
146 XEXP (&all.mult, 0) = &all.reg;
147 XEXP (&all.mult, 1) = &all.reg;
148
149 PUT_CODE (&all.sdiv, DIV);
150 XEXP (&all.sdiv, 0) = &all.reg;
151 XEXP (&all.sdiv, 1) = &all.reg;
152
153 PUT_CODE (&all.udiv, UDIV);
154 XEXP (&all.udiv, 0) = &all.reg;
155 XEXP (&all.udiv, 1) = &all.reg;
156
157 PUT_CODE (&all.sdiv_32, DIV);
158 XEXP (&all.sdiv_32, 0) = &all.reg;
159 XEXP (&all.sdiv_32, 1) = 32 < MAX_BITS_PER_WORD ? cint[32] : GEN_INT (32);
160
161 PUT_CODE (&all.smod_32, MOD);
162 XEXP (&all.smod_32, 0) = &all.reg;
163 XEXP (&all.smod_32, 1) = XEXP (&all.sdiv_32, 1);
164
165 PUT_CODE (&all.zext, ZERO_EXTEND);
166 XEXP (&all.zext, 0) = &all.reg;
167
168 PUT_CODE (&all.wide_mult, MULT);
169 XEXP (&all.wide_mult, 0) = &all.zext;
170 XEXP (&all.wide_mult, 1) = &all.zext;
171
172 PUT_CODE (&all.wide_lshr, LSHIFTRT);
173 XEXP (&all.wide_lshr, 0) = &all.wide_mult;
174
175 PUT_CODE (&all.wide_trunc, TRUNCATE);
176 XEXP (&all.wide_trunc, 0) = &all.wide_lshr;
177
178 PUT_CODE (&all.shift, ASHIFT);
179 XEXP (&all.shift, 0) = &all.reg;
180
181 PUT_CODE (&all.shift_mult, MULT);
182 XEXP (&all.shift_mult, 0) = &all.reg;
183
184 PUT_CODE (&all.shift_add, PLUS);
185 XEXP (&all.shift_add, 0) = &all.shift_mult;
186 XEXP (&all.shift_add, 1) = &all.reg;
187
188 PUT_CODE (&all.shift_sub0, MINUS);
189 XEXP (&all.shift_sub0, 0) = &all.shift_mult;
190 XEXP (&all.shift_sub0, 1) = &all.reg;
191
192 PUT_CODE (&all.shift_sub1, MINUS);
193 XEXP (&all.shift_sub1, 0) = &all.reg;
194 XEXP (&all.shift_sub1, 1) = &all.shift_mult;
195
196 for (speed = 0; speed < 2; speed++)
197 {
198 crtl->maybe_hot_insn_p = speed;
199 zero_cost[speed] = set_src_cost (const0_rtx, speed);
200
201 for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT);
202 mode != VOIDmode;
203 mode = GET_MODE_WIDER_MODE (mode))
204 {
205 PUT_MODE (&all.reg, mode);
206 PUT_MODE (&all.plus, mode);
207 PUT_MODE (&all.neg, mode);
208 PUT_MODE (&all.mult, mode);
209 PUT_MODE (&all.sdiv, mode);
210 PUT_MODE (&all.udiv, mode);
211 PUT_MODE (&all.sdiv_32, mode);
212 PUT_MODE (&all.smod_32, mode);
213 PUT_MODE (&all.wide_trunc, mode);
214 PUT_MODE (&all.shift, mode);
215 PUT_MODE (&all.shift_mult, mode);
216 PUT_MODE (&all.shift_add, mode);
217 PUT_MODE (&all.shift_sub0, mode);
218 PUT_MODE (&all.shift_sub1, mode);
219
220 add_cost[speed][mode] = set_src_cost (&all.plus, speed);
221 neg_cost[speed][mode] = set_src_cost (&all.neg, speed);
222 mul_cost[speed][mode] = set_src_cost (&all.mult, speed);
223 sdiv_cost[speed][mode] = set_src_cost (&all.sdiv, speed);
224 udiv_cost[speed][mode] = set_src_cost (&all.udiv, speed);
225
226 sdiv_pow2_cheap[speed][mode] = (set_src_cost (&all.sdiv_32, speed)
227 <= 2 * add_cost[speed][mode]);
228 smod_pow2_cheap[speed][mode] = (set_src_cost (&all.smod_32, speed)
229 <= 4 * add_cost[speed][mode]);
230
231 wider_mode = GET_MODE_WIDER_MODE (mode);
232 if (wider_mode != VOIDmode)
233 {
234 PUT_MODE (&all.zext, wider_mode);
235 PUT_MODE (&all.wide_mult, wider_mode);
236 PUT_MODE (&all.wide_lshr, wider_mode);
237 XEXP (&all.wide_lshr, 1) = GEN_INT (GET_MODE_BITSIZE (mode));
238
239 mul_widen_cost[speed][wider_mode]
240 = set_src_cost (&all.wide_mult, speed);
241 mul_highpart_cost[speed][mode]
242 = set_src_cost (&all.wide_trunc, speed);
243 }
244
245 shift_cost[speed][mode][0] = 0;
246 shiftadd_cost[speed][mode][0] = shiftsub0_cost[speed][mode][0]
247 = shiftsub1_cost[speed][mode][0] = add_cost[speed][mode];
248
249 n = MIN (MAX_BITS_PER_WORD, GET_MODE_BITSIZE (mode));
250 for (m = 1; m < n; m++)
251 {
252 XEXP (&all.shift, 1) = cint[m];
253 XEXP (&all.shift_mult, 1) = pow2[m];
254
255 shift_cost[speed][mode][m] = set_src_cost (&all.shift, speed);
256 shiftadd_cost[speed][mode][m] = set_src_cost (&all.shift_add,
257 speed);
258 shiftsub0_cost[speed][mode][m] = set_src_cost (&all.shift_sub0,
259 speed);
260 shiftsub1_cost[speed][mode][m] = set_src_cost (&all.shift_sub1,
261 speed);
262 }
263 }
264 }
265 if (alg_hash_used_p)
266 memset (alg_hash, 0, sizeof (alg_hash));
267 else
268 alg_hash_used_p = true;
269 default_rtl_profile ();
270 }
271
272 /* Return an rtx representing minus the value of X.
273 MODE is the intended mode of the result,
274 useful if X is a CONST_INT. */
275
276 rtx
277 negate_rtx (enum machine_mode mode, rtx x)
278 {
279 rtx result = simplify_unary_operation (NEG, mode, x, mode);
280
281 if (result == 0)
282 result = expand_unop (mode, neg_optab, x, NULL_RTX, 0);
283
284 return result;
285 }
286
287 /* Report on the availability of insv/extv/extzv and the desired mode
288 of each of their operands. Returns MAX_MACHINE_MODE if HAVE_foo
289 is false; else the mode of the specified operand. If OPNO is -1,
290 all the caller cares about is whether the insn is available. */
291 enum machine_mode
292 mode_for_extraction (enum extraction_pattern pattern, int opno)
293 {
294 const struct insn_data_d *data;
295
296 switch (pattern)
297 {
298 case EP_insv:
299 if (HAVE_insv)
300 {
301 data = &insn_data[CODE_FOR_insv];
302 break;
303 }
304 return MAX_MACHINE_MODE;
305
306 case EP_extv:
307 if (HAVE_extv)
308 {
309 data = &insn_data[CODE_FOR_extv];
310 break;
311 }
312 return MAX_MACHINE_MODE;
313
314 case EP_extzv:
315 if (HAVE_extzv)
316 {
317 data = &insn_data[CODE_FOR_extzv];
318 break;
319 }
320 return MAX_MACHINE_MODE;
321
322 default:
323 gcc_unreachable ();
324 }
325
326 if (opno == -1)
327 return VOIDmode;
328
329 /* Everyone who uses this function used to follow it with
330 if (result == VOIDmode) result = word_mode; */
331 if (data->operand[opno].mode == VOIDmode)
332 return word_mode;
333 return data->operand[opno].mode;
334 }
335 \f
336 /* A subroutine of store_bit_field, with the same arguments. Return true
337 if the operation could be implemented.
338
339 If FALLBACK_P is true, fall back to store_fixed_bit_field if we have
340 no other way of implementing the operation. If FALLBACK_P is false,
341 return false instead. */
342
343 static bool
344 store_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
345 unsigned HOST_WIDE_INT bitnum,
346 unsigned HOST_WIDE_INT bitregion_start,
347 unsigned HOST_WIDE_INT bitregion_end,
348 enum machine_mode fieldmode,
349 rtx value, bool fallback_p)
350 {
351 unsigned int unit
352 = (MEM_P (str_rtx)) ? BITS_PER_UNIT : BITS_PER_WORD;
353 unsigned HOST_WIDE_INT offset, bitpos;
354 rtx op0 = str_rtx;
355 int byte_offset;
356 rtx orig_value;
357
358 enum machine_mode op_mode = mode_for_extraction (EP_insv, 3);
359
360 while (GET_CODE (op0) == SUBREG)
361 {
362 /* The following line once was done only if WORDS_BIG_ENDIAN,
363 but I think that is a mistake. WORDS_BIG_ENDIAN is
364 meaningful at a much higher level; when structures are copied
365 between memory and regs, the higher-numbered regs
366 always get higher addresses. */
367 int inner_mode_size = GET_MODE_SIZE (GET_MODE (SUBREG_REG (op0)));
368 int outer_mode_size = GET_MODE_SIZE (GET_MODE (op0));
369
370 byte_offset = 0;
371
372 /* Paradoxical subregs need special handling on big endian machines. */
373 if (SUBREG_BYTE (op0) == 0 && inner_mode_size < outer_mode_size)
374 {
375 int difference = inner_mode_size - outer_mode_size;
376
377 if (WORDS_BIG_ENDIAN)
378 byte_offset += (difference / UNITS_PER_WORD) * UNITS_PER_WORD;
379 if (BYTES_BIG_ENDIAN)
380 byte_offset += difference % UNITS_PER_WORD;
381 }
382 else
383 byte_offset = SUBREG_BYTE (op0);
384
385 bitnum += byte_offset * BITS_PER_UNIT;
386 op0 = SUBREG_REG (op0);
387 }
388
389 /* No action is needed if the target is a register and if the field
390 lies completely outside that register. This can occur if the source
391 code contains an out-of-bounds access to a small array. */
392 if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
393 return true;
394
395 /* Use vec_set patterns for inserting parts of vectors whenever
396 available. */
397 if (VECTOR_MODE_P (GET_MODE (op0))
398 && !MEM_P (op0)
399 && optab_handler (vec_set_optab, GET_MODE (op0)) != CODE_FOR_nothing
400 && fieldmode == GET_MODE_INNER (GET_MODE (op0))
401 && bitsize == GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
402 && !(bitnum % GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
403 {
404 struct expand_operand ops[3];
405 enum machine_mode outermode = GET_MODE (op0);
406 enum machine_mode innermode = GET_MODE_INNER (outermode);
407 enum insn_code icode = optab_handler (vec_set_optab, outermode);
408 int pos = bitnum / GET_MODE_BITSIZE (innermode);
409
410 create_fixed_operand (&ops[0], op0);
411 create_input_operand (&ops[1], value, innermode);
412 create_integer_operand (&ops[2], pos);
413 if (maybe_expand_insn (icode, 3, ops))
414 return true;
415 }
416
417 /* If the target is a register, overwriting the entire object, or storing
418 a full-word or multi-word field can be done with just a SUBREG.
419
420 If the target is memory, storing any naturally aligned field can be
421 done with a simple store. For targets that support fast unaligned
422 memory, any naturally sized, unit aligned field can be done directly. */
423
424 offset = bitnum / unit;
425 bitpos = bitnum % unit;
426 byte_offset = (bitnum % BITS_PER_WORD) / BITS_PER_UNIT
427 + (offset * UNITS_PER_WORD);
428
429 if (bitpos == 0
430 && bitsize == GET_MODE_BITSIZE (fieldmode)
431 && (!MEM_P (op0)
432 ? ((GET_MODE_SIZE (fieldmode) >= UNITS_PER_WORD
433 || GET_MODE_SIZE (GET_MODE (op0)) == GET_MODE_SIZE (fieldmode))
434 && ((GET_MODE (op0) == fieldmode && byte_offset == 0)
435 || validate_subreg (fieldmode, GET_MODE (op0), op0,
436 byte_offset)))
437 : (! SLOW_UNALIGNED_ACCESS (fieldmode, MEM_ALIGN (op0))
438 || (offset * BITS_PER_UNIT % bitsize == 0
439 && MEM_ALIGN (op0) % GET_MODE_BITSIZE (fieldmode) == 0))))
440 {
441 if (MEM_P (op0))
442 op0 = adjust_address (op0, fieldmode, offset);
443 else if (GET_MODE (op0) != fieldmode)
444 op0 = simplify_gen_subreg (fieldmode, op0, GET_MODE (op0),
445 byte_offset);
446 emit_move_insn (op0, value);
447 return true;
448 }
449
450 /* Make sure we are playing with integral modes. Pun with subregs
451 if we aren't. This must come after the entire register case above,
452 since that case is valid for any mode. The following cases are only
453 valid for integral modes. */
454 {
455 enum machine_mode imode = int_mode_for_mode (GET_MODE (op0));
456 if (imode != GET_MODE (op0))
457 {
458 if (MEM_P (op0))
459 op0 = adjust_address (op0, imode, 0);
460 else
461 {
462 gcc_assert (imode != BLKmode);
463 op0 = gen_lowpart (imode, op0);
464 }
465 }
466 }
467
468 /* We may be accessing data outside the field, which means
469 we can alias adjacent data. */
470 /* ?? not always for C++0x memory model ?? */
471 if (MEM_P (op0))
472 {
473 op0 = shallow_copy_rtx (op0);
474 set_mem_alias_set (op0, 0);
475 set_mem_expr (op0, 0);
476 }
477
478 /* If OP0 is a register, BITPOS must count within a word.
479 But as we have it, it counts within whatever size OP0 now has.
480 On a bigendian machine, these are not the same, so convert. */
481 if (BYTES_BIG_ENDIAN
482 && !MEM_P (op0)
483 && unit > GET_MODE_BITSIZE (GET_MODE (op0)))
484 bitpos += unit - GET_MODE_BITSIZE (GET_MODE (op0));
485
486 /* Storing an lsb-aligned field in a register
487 can be done with a movestrict instruction. */
488
489 if (!MEM_P (op0)
490 && (BYTES_BIG_ENDIAN ? bitpos + bitsize == unit : bitpos == 0)
491 && bitsize == GET_MODE_BITSIZE (fieldmode)
492 && optab_handler (movstrict_optab, fieldmode) != CODE_FOR_nothing)
493 {
494 struct expand_operand ops[2];
495 enum insn_code icode = optab_handler (movstrict_optab, fieldmode);
496 rtx arg0 = op0;
497 unsigned HOST_WIDE_INT subreg_off;
498
499 if (GET_CODE (arg0) == SUBREG)
500 {
501 /* Else we've got some float mode source being extracted into
502 a different float mode destination -- this combination of
503 subregs results in Severe Tire Damage. */
504 gcc_assert (GET_MODE (SUBREG_REG (arg0)) == fieldmode
505 || GET_MODE_CLASS (fieldmode) == MODE_INT
506 || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT);
507 arg0 = SUBREG_REG (arg0);
508 }
509
510 subreg_off = (bitnum % BITS_PER_WORD) / BITS_PER_UNIT
511 + (offset * UNITS_PER_WORD);
512 if (validate_subreg (fieldmode, GET_MODE (arg0), arg0, subreg_off))
513 {
514 arg0 = gen_rtx_SUBREG (fieldmode, arg0, subreg_off);
515
516 create_fixed_operand (&ops[0], arg0);
517 /* Shrink the source operand to FIELDMODE. */
518 create_convert_operand_to (&ops[1], value, fieldmode, false);
519 if (maybe_expand_insn (icode, 2, ops))
520 return true;
521 }
522 }
523
524 /* Handle fields bigger than a word. */
525
526 if (bitsize > BITS_PER_WORD)
527 {
528 /* Here we transfer the words of the field
529 in the order least significant first.
530 This is because the most significant word is the one which may
531 be less than full.
532 However, only do that if the value is not BLKmode. */
533
534 unsigned int backwards = WORDS_BIG_ENDIAN && fieldmode != BLKmode;
535 unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
536 unsigned int i;
537 rtx last;
538
539 /* This is the mode we must force value to, so that there will be enough
540 subwords to extract. Note that fieldmode will often (always?) be
541 VOIDmode, because that is what store_field uses to indicate that this
542 is a bit field, but passing VOIDmode to operand_subword_force
543 is not allowed. */
544 fieldmode = GET_MODE (value);
545 if (fieldmode == VOIDmode)
546 fieldmode = smallest_mode_for_size (nwords * BITS_PER_WORD, MODE_INT);
547
548 last = get_last_insn ();
549 for (i = 0; i < nwords; i++)
550 {
551 /* If I is 0, use the low-order word in both field and target;
552 if I is 1, use the next to lowest word; and so on. */
553 unsigned int wordnum = (backwards ? nwords - i - 1 : i);
554 unsigned int bit_offset = (backwards
555 ? MAX ((int) bitsize - ((int) i + 1)
556 * BITS_PER_WORD,
557 0)
558 : (int) i * BITS_PER_WORD);
559 rtx value_word = operand_subword_force (value, wordnum, fieldmode);
560 unsigned HOST_WIDE_INT new_bitsize =
561 MIN (BITS_PER_WORD, bitsize - i * BITS_PER_WORD);
562
563 /* If the remaining chunk doesn't have full wordsize we have
564 to make sure that for big endian machines the higher order
565 bits are used. */
566 if (new_bitsize < BITS_PER_WORD && BYTES_BIG_ENDIAN && !backwards)
567 value_word = simplify_expand_binop (word_mode, lshr_optab,
568 value_word,
569 GEN_INT (BITS_PER_WORD
570 - new_bitsize),
571 NULL_RTX, true,
572 OPTAB_LIB_WIDEN);
573
574 if (!store_bit_field_1 (op0, new_bitsize,
575 bitnum + bit_offset,
576 bitregion_start, bitregion_end,
577 word_mode,
578 value_word, fallback_p))
579 {
580 delete_insns_since (last);
581 return false;
582 }
583 }
584 return true;
585 }
586
587 /* From here on we can assume that the field to be stored in is
588 a full-word (whatever type that is), since it is shorter than a word. */
589
590 /* OFFSET is the number of words or bytes (UNIT says which)
591 from STR_RTX to the first word or byte containing part of the field. */
592
593 if (!MEM_P (op0))
594 {
595 if (offset != 0
596 || GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
597 {
598 if (!REG_P (op0))
599 {
600 /* Since this is a destination (lvalue), we can't copy
601 it to a pseudo. We can remove a SUBREG that does not
602 change the size of the operand. Such a SUBREG may
603 have been added above. */
604 gcc_assert (GET_CODE (op0) == SUBREG
605 && (GET_MODE_SIZE (GET_MODE (op0))
606 == GET_MODE_SIZE (GET_MODE (SUBREG_REG (op0)))));
607 op0 = SUBREG_REG (op0);
608 }
609 op0 = gen_rtx_SUBREG (mode_for_size (BITS_PER_WORD, MODE_INT, 0),
610 op0, (offset * UNITS_PER_WORD));
611 }
612 offset = 0;
613 }
614
615 /* If VALUE has a floating-point or complex mode, access it as an
616 integer of the corresponding size. This can occur on a machine
617 with 64 bit registers that uses SFmode for float. It can also
618 occur for unaligned float or complex fields. */
619 orig_value = value;
620 if (GET_MODE (value) != VOIDmode
621 && GET_MODE_CLASS (GET_MODE (value)) != MODE_INT
622 && GET_MODE_CLASS (GET_MODE (value)) != MODE_PARTIAL_INT)
623 {
624 value = gen_reg_rtx (int_mode_for_mode (GET_MODE (value)));
625 emit_move_insn (gen_lowpart (GET_MODE (orig_value), value), orig_value);
626 }
627
628 /* Now OFFSET is nonzero only if OP0 is memory
629 and is therefore always measured in bytes. */
630
631 if (HAVE_insv
632 && GET_MODE (value) != BLKmode
633 && bitsize > 0
634 && GET_MODE_BITSIZE (op_mode) >= bitsize
635 /* Do not use insv for volatile bitfields when
636 -fstrict-volatile-bitfields is in effect. */
637 && !(MEM_P (op0) && MEM_VOLATILE_P (op0)
638 && flag_strict_volatile_bitfields > 0)
639 && ! ((REG_P (op0) || GET_CODE (op0) == SUBREG)
640 && (bitsize + bitpos > GET_MODE_BITSIZE (op_mode))))
641 {
642 struct expand_operand ops[4];
643 int xbitpos = bitpos;
644 rtx value1;
645 rtx xop0 = op0;
646 rtx last = get_last_insn ();
647 bool copy_back = false;
648
649 /* Add OFFSET into OP0's address. */
650 if (MEM_P (xop0))
651 xop0 = adjust_address (xop0, byte_mode, offset);
652
653 /* If xop0 is a register, we need it in OP_MODE
654 to make it acceptable to the format of insv. */
655 if (GET_CODE (xop0) == SUBREG)
656 /* We can't just change the mode, because this might clobber op0,
657 and we will need the original value of op0 if insv fails. */
658 xop0 = gen_rtx_SUBREG (op_mode, SUBREG_REG (xop0), SUBREG_BYTE (xop0));
659 if (REG_P (xop0) && GET_MODE (xop0) != op_mode)
660 xop0 = gen_lowpart_SUBREG (op_mode, xop0);
661
662 /* If the destination is a paradoxical subreg such that we need a
663 truncate to the inner mode, perform the insertion on a temporary and
664 truncate the result to the original destination. Note that we can't
665 just truncate the paradoxical subreg as (truncate:N (subreg:W (reg:N
666 X) 0)) is (reg:N X). */
667 if (GET_CODE (xop0) == SUBREG
668 && REG_P (SUBREG_REG (xop0))
669 && (!TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (SUBREG_REG (xop0)),
670 op_mode)))
671 {
672 rtx tem = gen_reg_rtx (op_mode);
673 emit_move_insn (tem, xop0);
674 xop0 = tem;
675 copy_back = true;
676 }
677
678 /* We have been counting XBITPOS within UNIT.
679 Count instead within the size of the register. */
680 if (BYTES_BIG_ENDIAN && !MEM_P (xop0))
681 xbitpos += GET_MODE_BITSIZE (op_mode) - unit;
682
683 unit = GET_MODE_BITSIZE (op_mode);
684
685 /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
686 "backwards" from the size of the unit we are inserting into.
687 Otherwise, we count bits from the most significant on a
688 BYTES/BITS_BIG_ENDIAN machine. */
689
690 if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
691 xbitpos = unit - bitsize - xbitpos;
692
693 /* Convert VALUE to op_mode (which insv insn wants) in VALUE1. */
694 value1 = value;
695 if (GET_MODE (value) != op_mode)
696 {
697 if (GET_MODE_BITSIZE (GET_MODE (value)) >= bitsize)
698 {
699 /* Optimization: Don't bother really extending VALUE
700 if it has all the bits we will actually use. However,
701 if we must narrow it, be sure we do it correctly. */
702
703 if (GET_MODE_SIZE (GET_MODE (value)) < GET_MODE_SIZE (op_mode))
704 {
705 rtx tmp;
706
707 tmp = simplify_subreg (op_mode, value1, GET_MODE (value), 0);
708 if (! tmp)
709 tmp = simplify_gen_subreg (op_mode,
710 force_reg (GET_MODE (value),
711 value1),
712 GET_MODE (value), 0);
713 value1 = tmp;
714 }
715 else
716 value1 = gen_lowpart (op_mode, value1);
717 }
718 else if (CONST_INT_P (value))
719 value1 = gen_int_mode (INTVAL (value), op_mode);
720 else
721 /* Parse phase is supposed to make VALUE's data type
722 match that of the component reference, which is a type
723 at least as wide as the field; so VALUE should have
724 a mode that corresponds to that type. */
725 gcc_assert (CONSTANT_P (value));
726 }
727
728 create_fixed_operand (&ops[0], xop0);
729 create_integer_operand (&ops[1], bitsize);
730 create_integer_operand (&ops[2], xbitpos);
731 create_input_operand (&ops[3], value1, op_mode);
732 if (maybe_expand_insn (CODE_FOR_insv, 4, ops))
733 {
734 if (copy_back)
735 convert_move (op0, xop0, true);
736 return true;
737 }
738 delete_insns_since (last);
739 }
740
741 /* If OP0 is a memory, try copying it to a register and seeing if a
742 cheap register alternative is available. */
743 if (HAVE_insv && MEM_P (op0))
744 {
745 enum machine_mode bestmode;
746 unsigned HOST_WIDE_INT maxbits = MAX_FIXED_MODE_SIZE;
747
748 if (bitregion_end)
749 maxbits = bitregion_end - bitregion_start + 1;
750
751 /* Get the mode to use for inserting into this field. If OP0 is
752 BLKmode, get the smallest mode consistent with the alignment. If
753 OP0 is a non-BLKmode object that is no wider than OP_MODE, use its
754 mode. Otherwise, use the smallest mode containing the field. */
755
756 if (GET_MODE (op0) == BLKmode
757 || GET_MODE_BITSIZE (GET_MODE (op0)) > maxbits
758 || (op_mode != MAX_MACHINE_MODE
759 && GET_MODE_SIZE (GET_MODE (op0)) > GET_MODE_SIZE (op_mode)))
760 bestmode = get_best_mode (bitsize, bitnum,
761 bitregion_start, bitregion_end,
762 MEM_ALIGN (op0),
763 (op_mode == MAX_MACHINE_MODE
764 ? VOIDmode : op_mode),
765 MEM_VOLATILE_P (op0));
766 else
767 bestmode = GET_MODE (op0);
768
769 if (bestmode != VOIDmode
770 && GET_MODE_SIZE (bestmode) >= GET_MODE_SIZE (fieldmode)
771 && !(SLOW_UNALIGNED_ACCESS (bestmode, MEM_ALIGN (op0))
772 && GET_MODE_BITSIZE (bestmode) > MEM_ALIGN (op0)))
773 {
774 rtx last, tempreg, xop0;
775 unsigned HOST_WIDE_INT xoffset, xbitpos;
776
777 last = get_last_insn ();
778
779 /* Adjust address to point to the containing unit of
780 that mode. Compute the offset as a multiple of this unit,
781 counting in bytes. */
782 unit = GET_MODE_BITSIZE (bestmode);
783 xoffset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
784 xbitpos = bitnum % unit;
785 xop0 = adjust_address (op0, bestmode, xoffset);
786
787 /* Fetch that unit, store the bitfield in it, then store
788 the unit. */
789 tempreg = copy_to_reg (xop0);
790 if (store_bit_field_1 (tempreg, bitsize, xbitpos,
791 bitregion_start, bitregion_end,
792 fieldmode, orig_value, false))
793 {
794 emit_move_insn (xop0, tempreg);
795 return true;
796 }
797 delete_insns_since (last);
798 }
799 }
800
801 if (!fallback_p)
802 return false;
803
804 store_fixed_bit_field (op0, offset, bitsize, bitpos,
805 bitregion_start, bitregion_end, value);
806 return true;
807 }
808
809 /* Generate code to store value from rtx VALUE
810 into a bit-field within structure STR_RTX
811 containing BITSIZE bits starting at bit BITNUM.
812
813 BITREGION_START is bitpos of the first bitfield in this region.
814 BITREGION_END is the bitpos of the ending bitfield in this region.
815 These two fields are 0, if the C++ memory model does not apply,
816 or we are not interested in keeping track of bitfield regions.
817
818 FIELDMODE is the machine-mode of the FIELD_DECL node for this field. */
819
820 void
821 store_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
822 unsigned HOST_WIDE_INT bitnum,
823 unsigned HOST_WIDE_INT bitregion_start,
824 unsigned HOST_WIDE_INT bitregion_end,
825 enum machine_mode fieldmode,
826 rtx value)
827 {
828 /* Under the C++0x memory model, we must not touch bits outside the
829 bit region. Adjust the address to start at the beginning of the
830 bit region. */
831 if (MEM_P (str_rtx) && bitregion_start > 0)
832 {
833 enum machine_mode bestmode;
834 enum machine_mode op_mode;
835 unsigned HOST_WIDE_INT offset;
836
837 op_mode = mode_for_extraction (EP_insv, 3);
838 if (op_mode == MAX_MACHINE_MODE)
839 op_mode = VOIDmode;
840
841 gcc_assert ((bitregion_start % BITS_PER_UNIT) == 0);
842
843 offset = bitregion_start / BITS_PER_UNIT;
844 bitnum -= bitregion_start;
845 bitregion_end -= bitregion_start;
846 bitregion_start = 0;
847 bestmode = get_best_mode (bitsize, bitnum,
848 bitregion_start, bitregion_end,
849 MEM_ALIGN (str_rtx),
850 op_mode,
851 MEM_VOLATILE_P (str_rtx));
852 str_rtx = adjust_address (str_rtx, bestmode, offset);
853 }
854
855 if (!store_bit_field_1 (str_rtx, bitsize, bitnum,
856 bitregion_start, bitregion_end,
857 fieldmode, value, true))
858 gcc_unreachable ();
859 }
860 \f
861 /* Use shifts and boolean operations to store VALUE
862 into a bit field of width BITSIZE
863 in a memory location specified by OP0 except offset by OFFSET bytes.
864 (OFFSET must be 0 if OP0 is a register.)
865 The field starts at position BITPOS within the byte.
866 (If OP0 is a register, it may be a full word or a narrower mode,
867 but BITPOS still counts within a full word,
868 which is significant on bigendian machines.) */
869
870 static void
871 store_fixed_bit_field (rtx op0, unsigned HOST_WIDE_INT offset,
872 unsigned HOST_WIDE_INT bitsize,
873 unsigned HOST_WIDE_INT bitpos,
874 unsigned HOST_WIDE_INT bitregion_start,
875 unsigned HOST_WIDE_INT bitregion_end,
876 rtx value)
877 {
878 enum machine_mode mode;
879 unsigned int total_bits = BITS_PER_WORD;
880 rtx temp;
881 int all_zero = 0;
882 int all_one = 0;
883
884 /* There is a case not handled here:
885 a structure with a known alignment of just a halfword
886 and a field split across two aligned halfwords within the structure.
887 Or likewise a structure with a known alignment of just a byte
888 and a field split across two bytes.
889 Such cases are not supposed to be able to occur. */
890
891 if (REG_P (op0) || GET_CODE (op0) == SUBREG)
892 {
893 gcc_assert (!offset);
894 /* Special treatment for a bit field split across two registers. */
895 if (bitsize + bitpos > BITS_PER_WORD)
896 {
897 store_split_bit_field (op0, bitsize, bitpos,
898 bitregion_start, bitregion_end,
899 value);
900 return;
901 }
902 }
903 else
904 {
905 unsigned HOST_WIDE_INT maxbits = MAX_FIXED_MODE_SIZE;
906
907 if (bitregion_end)
908 maxbits = bitregion_end - bitregion_start + 1;
909
910 /* Get the proper mode to use for this field. We want a mode that
911 includes the entire field. If such a mode would be larger than
912 a word, we won't be doing the extraction the normal way.
913 We don't want a mode bigger than the destination. */
914
915 mode = GET_MODE (op0);
916 if (GET_MODE_BITSIZE (mode) == 0
917 || GET_MODE_BITSIZE (mode) > GET_MODE_BITSIZE (word_mode))
918 mode = word_mode;
919
920 if (MEM_VOLATILE_P (op0)
921 && GET_MODE_BITSIZE (GET_MODE (op0)) > 0
922 && GET_MODE_BITSIZE (GET_MODE (op0)) <= maxbits
923 && flag_strict_volatile_bitfields > 0)
924 mode = GET_MODE (op0);
925 else
926 mode = get_best_mode (bitsize, bitpos + offset * BITS_PER_UNIT,
927 bitregion_start, bitregion_end,
928 MEM_ALIGN (op0), mode, MEM_VOLATILE_P (op0));
929
930 if (mode == VOIDmode)
931 {
932 /* The only way this should occur is if the field spans word
933 boundaries. */
934 store_split_bit_field (op0, bitsize, bitpos + offset * BITS_PER_UNIT,
935 bitregion_start, bitregion_end, value);
936 return;
937 }
938
939 total_bits = GET_MODE_BITSIZE (mode);
940
941 /* Make sure bitpos is valid for the chosen mode. Adjust BITPOS to
942 be in the range 0 to total_bits-1, and put any excess bytes in
943 OFFSET. */
944 if (bitpos >= total_bits)
945 {
946 offset += (bitpos / total_bits) * (total_bits / BITS_PER_UNIT);
947 bitpos -= ((bitpos / total_bits) * (total_bits / BITS_PER_UNIT)
948 * BITS_PER_UNIT);
949 }
950
951 /* Get ref to an aligned byte, halfword, or word containing the field.
952 Adjust BITPOS to be position within a word,
953 and OFFSET to be the offset of that word.
954 Then alter OP0 to refer to that word. */
955 bitpos += (offset % (total_bits / BITS_PER_UNIT)) * BITS_PER_UNIT;
956 offset -= (offset % (total_bits / BITS_PER_UNIT));
957 op0 = adjust_address (op0, mode, offset);
958 }
959
960 mode = GET_MODE (op0);
961
962 /* Now MODE is either some integral mode for a MEM as OP0,
963 or is a full-word for a REG as OP0. TOTAL_BITS corresponds.
964 The bit field is contained entirely within OP0.
965 BITPOS is the starting bit number within OP0.
966 (OP0's mode may actually be narrower than MODE.) */
967
968 if (BYTES_BIG_ENDIAN)
969 /* BITPOS is the distance between our msb
970 and that of the containing datum.
971 Convert it to the distance from the lsb. */
972 bitpos = total_bits - bitsize - bitpos;
973
974 /* Now BITPOS is always the distance between our lsb
975 and that of OP0. */
976
977 /* Shift VALUE left by BITPOS bits. If VALUE is not constant,
978 we must first convert its mode to MODE. */
979
980 if (CONST_INT_P (value))
981 {
982 HOST_WIDE_INT v = INTVAL (value);
983
984 if (bitsize < HOST_BITS_PER_WIDE_INT)
985 v &= ((HOST_WIDE_INT) 1 << bitsize) - 1;
986
987 if (v == 0)
988 all_zero = 1;
989 else if ((bitsize < HOST_BITS_PER_WIDE_INT
990 && v == ((HOST_WIDE_INT) 1 << bitsize) - 1)
991 || (bitsize == HOST_BITS_PER_WIDE_INT && v == -1))
992 all_one = 1;
993
994 value = lshift_value (mode, value, bitpos, bitsize);
995 }
996 else
997 {
998 int must_and = (GET_MODE_BITSIZE (GET_MODE (value)) != bitsize
999 && bitpos + bitsize != GET_MODE_BITSIZE (mode));
1000
1001 if (GET_MODE (value) != mode)
1002 value = convert_to_mode (mode, value, 1);
1003
1004 if (must_and)
1005 value = expand_binop (mode, and_optab, value,
1006 mask_rtx (mode, 0, bitsize, 0),
1007 NULL_RTX, 1, OPTAB_LIB_WIDEN);
1008 if (bitpos > 0)
1009 value = expand_shift (LSHIFT_EXPR, mode, value,
1010 bitpos, NULL_RTX, 1);
1011 }
1012
1013 /* Now clear the chosen bits in OP0,
1014 except that if VALUE is -1 we need not bother. */
1015 /* We keep the intermediates in registers to allow CSE to combine
1016 consecutive bitfield assignments. */
1017
1018 temp = force_reg (mode, op0);
1019
1020 if (! all_one)
1021 {
1022 temp = expand_binop (mode, and_optab, temp,
1023 mask_rtx (mode, bitpos, bitsize, 1),
1024 NULL_RTX, 1, OPTAB_LIB_WIDEN);
1025 temp = force_reg (mode, temp);
1026 }
1027
1028 /* Now logical-or VALUE into OP0, unless it is zero. */
1029
1030 if (! all_zero)
1031 {
1032 temp = expand_binop (mode, ior_optab, temp, value,
1033 NULL_RTX, 1, OPTAB_LIB_WIDEN);
1034 temp = force_reg (mode, temp);
1035 }
1036
1037 if (op0 != temp)
1038 {
1039 op0 = copy_rtx (op0);
1040 emit_move_insn (op0, temp);
1041 }
1042 }
1043 \f
1044 /* Store a bit field that is split across multiple accessible memory objects.
1045
1046 OP0 is the REG, SUBREG or MEM rtx for the first of the objects.
1047 BITSIZE is the field width; BITPOS the position of its first bit
1048 (within the word).
1049 VALUE is the value to store.
1050
1051 This does not yet handle fields wider than BITS_PER_WORD. */
1052
1053 static void
1054 store_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1055 unsigned HOST_WIDE_INT bitpos,
1056 unsigned HOST_WIDE_INT bitregion_start,
1057 unsigned HOST_WIDE_INT bitregion_end,
1058 rtx value)
1059 {
1060 unsigned int unit;
1061 unsigned int bitsdone = 0;
1062
1063 /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1064 much at a time. */
1065 if (REG_P (op0) || GET_CODE (op0) == SUBREG)
1066 unit = BITS_PER_WORD;
1067 else
1068 unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1069
1070 /* If VALUE is a constant other than a CONST_INT, get it into a register in
1071 WORD_MODE. If we can do this using gen_lowpart_common, do so. Note
1072 that VALUE might be a floating-point constant. */
1073 if (CONSTANT_P (value) && !CONST_INT_P (value))
1074 {
1075 rtx word = gen_lowpart_common (word_mode, value);
1076
1077 if (word && (value != word))
1078 value = word;
1079 else
1080 value = gen_lowpart_common (word_mode,
1081 force_reg (GET_MODE (value) != VOIDmode
1082 ? GET_MODE (value)
1083 : word_mode, value));
1084 }
1085
1086 while (bitsdone < bitsize)
1087 {
1088 unsigned HOST_WIDE_INT thissize;
1089 rtx part, word;
1090 unsigned HOST_WIDE_INT thispos;
1091 unsigned HOST_WIDE_INT offset;
1092
1093 offset = (bitpos + bitsdone) / unit;
1094 thispos = (bitpos + bitsdone) % unit;
1095
1096 /* THISSIZE must not overrun a word boundary. Otherwise,
1097 store_fixed_bit_field will call us again, and we will mutually
1098 recurse forever. */
1099 thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1100 thissize = MIN (thissize, unit - thispos);
1101
1102 if (BYTES_BIG_ENDIAN)
1103 {
1104 int total_bits;
1105
1106 /* We must do an endian conversion exactly the same way as it is
1107 done in extract_bit_field, so that the two calls to
1108 extract_fixed_bit_field will have comparable arguments. */
1109 if (!MEM_P (value) || GET_MODE (value) == BLKmode)
1110 total_bits = BITS_PER_WORD;
1111 else
1112 total_bits = GET_MODE_BITSIZE (GET_MODE (value));
1113
1114 /* Fetch successively less significant portions. */
1115 if (CONST_INT_P (value))
1116 part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1117 >> (bitsize - bitsdone - thissize))
1118 & (((HOST_WIDE_INT) 1 << thissize) - 1));
1119 else
1120 /* The args are chosen so that the last part includes the
1121 lsb. Give extract_bit_field the value it needs (with
1122 endianness compensation) to fetch the piece we want. */
1123 part = extract_fixed_bit_field (word_mode, value, 0, thissize,
1124 total_bits - bitsize + bitsdone,
1125 NULL_RTX, 1, false);
1126 }
1127 else
1128 {
1129 /* Fetch successively more significant portions. */
1130 if (CONST_INT_P (value))
1131 part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1132 >> bitsdone)
1133 & (((HOST_WIDE_INT) 1 << thissize) - 1));
1134 else
1135 part = extract_fixed_bit_field (word_mode, value, 0, thissize,
1136 bitsdone, NULL_RTX, 1, false);
1137 }
1138
1139 /* If OP0 is a register, then handle OFFSET here.
1140
1141 When handling multiword bitfields, extract_bit_field may pass
1142 down a word_mode SUBREG of a larger REG for a bitfield that actually
1143 crosses a word boundary. Thus, for a SUBREG, we must find
1144 the current word starting from the base register. */
1145 if (GET_CODE (op0) == SUBREG)
1146 {
1147 int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset;
1148 enum machine_mode sub_mode = GET_MODE (SUBREG_REG (op0));
1149 if (sub_mode != BLKmode && GET_MODE_SIZE (sub_mode) < UNITS_PER_WORD)
1150 word = word_offset ? const0_rtx : op0;
1151 else
1152 word = operand_subword_force (SUBREG_REG (op0), word_offset,
1153 GET_MODE (SUBREG_REG (op0)));
1154 offset = 0;
1155 }
1156 else if (REG_P (op0))
1157 {
1158 enum machine_mode op0_mode = GET_MODE (op0);
1159 if (op0_mode != BLKmode && GET_MODE_SIZE (op0_mode) < UNITS_PER_WORD)
1160 word = offset ? const0_rtx : op0;
1161 else
1162 word = operand_subword_force (op0, offset, GET_MODE (op0));
1163 offset = 0;
1164 }
1165 else
1166 word = op0;
1167
1168 /* OFFSET is in UNITs, and UNIT is in bits.
1169 store_fixed_bit_field wants offset in bytes. If WORD is const0_rtx,
1170 it is just an out-of-bounds access. Ignore it. */
1171 if (word != const0_rtx)
1172 store_fixed_bit_field (word, offset * unit / BITS_PER_UNIT, thissize,
1173 thispos, bitregion_start, bitregion_end, part);
1174 bitsdone += thissize;
1175 }
1176 }
1177 \f
1178 /* A subroutine of extract_bit_field_1 that converts return value X
1179 to either MODE or TMODE. MODE, TMODE and UNSIGNEDP are arguments
1180 to extract_bit_field. */
1181
1182 static rtx
1183 convert_extracted_bit_field (rtx x, enum machine_mode mode,
1184 enum machine_mode tmode, bool unsignedp)
1185 {
1186 if (GET_MODE (x) == tmode || GET_MODE (x) == mode)
1187 return x;
1188
1189 /* If the x mode is not a scalar integral, first convert to the
1190 integer mode of that size and then access it as a floating-point
1191 value via a SUBREG. */
1192 if (!SCALAR_INT_MODE_P (tmode))
1193 {
1194 enum machine_mode smode;
1195
1196 smode = mode_for_size (GET_MODE_BITSIZE (tmode), MODE_INT, 0);
1197 x = convert_to_mode (smode, x, unsignedp);
1198 x = force_reg (smode, x);
1199 return gen_lowpart (tmode, x);
1200 }
1201
1202 return convert_to_mode (tmode, x, unsignedp);
1203 }
1204
1205 /* A subroutine of extract_bit_field, with the same arguments.
1206 If FALLBACK_P is true, fall back to extract_fixed_bit_field
1207 if we can find no other means of implementing the operation.
1208 if FALLBACK_P is false, return NULL instead. */
1209
1210 static rtx
1211 extract_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1212 unsigned HOST_WIDE_INT bitnum,
1213 int unsignedp, bool packedp, rtx target,
1214 enum machine_mode mode, enum machine_mode tmode,
1215 bool fallback_p)
1216 {
1217 unsigned int unit
1218 = (MEM_P (str_rtx)) ? BITS_PER_UNIT : BITS_PER_WORD;
1219 unsigned HOST_WIDE_INT offset, bitpos;
1220 rtx op0 = str_rtx;
1221 enum machine_mode int_mode;
1222 enum machine_mode ext_mode;
1223 enum machine_mode mode1;
1224 int byte_offset;
1225
1226 if (tmode == VOIDmode)
1227 tmode = mode;
1228
1229 while (GET_CODE (op0) == SUBREG)
1230 {
1231 bitnum += SUBREG_BYTE (op0) * BITS_PER_UNIT;
1232 op0 = SUBREG_REG (op0);
1233 }
1234
1235 /* If we have an out-of-bounds access to a register, just return an
1236 uninitialized register of the required mode. This can occur if the
1237 source code contains an out-of-bounds access to a small array. */
1238 if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
1239 return gen_reg_rtx (tmode);
1240
1241 if (REG_P (op0)
1242 && mode == GET_MODE (op0)
1243 && bitnum == 0
1244 && bitsize == GET_MODE_BITSIZE (GET_MODE (op0)))
1245 {
1246 /* We're trying to extract a full register from itself. */
1247 return op0;
1248 }
1249
1250 /* See if we can get a better vector mode before extracting. */
1251 if (VECTOR_MODE_P (GET_MODE (op0))
1252 && !MEM_P (op0)
1253 && GET_MODE_INNER (GET_MODE (op0)) != tmode)
1254 {
1255 enum machine_mode new_mode;
1256
1257 if (GET_MODE_CLASS (tmode) == MODE_FLOAT)
1258 new_mode = MIN_MODE_VECTOR_FLOAT;
1259 else if (GET_MODE_CLASS (tmode) == MODE_FRACT)
1260 new_mode = MIN_MODE_VECTOR_FRACT;
1261 else if (GET_MODE_CLASS (tmode) == MODE_UFRACT)
1262 new_mode = MIN_MODE_VECTOR_UFRACT;
1263 else if (GET_MODE_CLASS (tmode) == MODE_ACCUM)
1264 new_mode = MIN_MODE_VECTOR_ACCUM;
1265 else if (GET_MODE_CLASS (tmode) == MODE_UACCUM)
1266 new_mode = MIN_MODE_VECTOR_UACCUM;
1267 else
1268 new_mode = MIN_MODE_VECTOR_INT;
1269
1270 for (; new_mode != VOIDmode ; new_mode = GET_MODE_WIDER_MODE (new_mode))
1271 if (GET_MODE_SIZE (new_mode) == GET_MODE_SIZE (GET_MODE (op0))
1272 && targetm.vector_mode_supported_p (new_mode))
1273 break;
1274 if (new_mode != VOIDmode)
1275 op0 = gen_lowpart (new_mode, op0);
1276 }
1277
1278 /* Use vec_extract patterns for extracting parts of vectors whenever
1279 available. */
1280 if (VECTOR_MODE_P (GET_MODE (op0))
1281 && !MEM_P (op0)
1282 && optab_handler (vec_extract_optab, GET_MODE (op0)) != CODE_FOR_nothing
1283 && ((bitnum + bitsize - 1) / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
1284 == bitnum / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
1285 {
1286 struct expand_operand ops[3];
1287 enum machine_mode outermode = GET_MODE (op0);
1288 enum machine_mode innermode = GET_MODE_INNER (outermode);
1289 enum insn_code icode = optab_handler (vec_extract_optab, outermode);
1290 unsigned HOST_WIDE_INT pos = bitnum / GET_MODE_BITSIZE (innermode);
1291
1292 create_output_operand (&ops[0], target, innermode);
1293 create_input_operand (&ops[1], op0, outermode);
1294 create_integer_operand (&ops[2], pos);
1295 if (maybe_expand_insn (icode, 3, ops))
1296 {
1297 target = ops[0].value;
1298 if (GET_MODE (target) != mode)
1299 return gen_lowpart (tmode, target);
1300 return target;
1301 }
1302 }
1303
1304 /* Make sure we are playing with integral modes. Pun with subregs
1305 if we aren't. */
1306 {
1307 enum machine_mode imode = int_mode_for_mode (GET_MODE (op0));
1308 if (imode != GET_MODE (op0))
1309 {
1310 if (MEM_P (op0))
1311 op0 = adjust_address (op0, imode, 0);
1312 else if (imode != BLKmode)
1313 {
1314 op0 = gen_lowpart (imode, op0);
1315
1316 /* If we got a SUBREG, force it into a register since we
1317 aren't going to be able to do another SUBREG on it. */
1318 if (GET_CODE (op0) == SUBREG)
1319 op0 = force_reg (imode, op0);
1320 }
1321 else if (REG_P (op0))
1322 {
1323 rtx reg, subreg;
1324 imode = smallest_mode_for_size (GET_MODE_BITSIZE (GET_MODE (op0)),
1325 MODE_INT);
1326 reg = gen_reg_rtx (imode);
1327 subreg = gen_lowpart_SUBREG (GET_MODE (op0), reg);
1328 emit_move_insn (subreg, op0);
1329 op0 = reg;
1330 bitnum += SUBREG_BYTE (subreg) * BITS_PER_UNIT;
1331 }
1332 else
1333 {
1334 rtx mem = assign_stack_temp (GET_MODE (op0),
1335 GET_MODE_SIZE (GET_MODE (op0)), 0);
1336 emit_move_insn (mem, op0);
1337 op0 = adjust_address (mem, BLKmode, 0);
1338 }
1339 }
1340 }
1341
1342 /* We may be accessing data outside the field, which means
1343 we can alias adjacent data. */
1344 if (MEM_P (op0))
1345 {
1346 op0 = shallow_copy_rtx (op0);
1347 set_mem_alias_set (op0, 0);
1348 set_mem_expr (op0, 0);
1349 }
1350
1351 /* Extraction of a full-word or multi-word value from a structure
1352 in a register or aligned memory can be done with just a SUBREG.
1353 A subword value in the least significant part of a register
1354 can also be extracted with a SUBREG. For this, we need the
1355 byte offset of the value in op0. */
1356
1357 bitpos = bitnum % unit;
1358 offset = bitnum / unit;
1359 byte_offset = bitpos / BITS_PER_UNIT + offset * UNITS_PER_WORD;
1360
1361 /* If OP0 is a register, BITPOS must count within a word.
1362 But as we have it, it counts within whatever size OP0 now has.
1363 On a bigendian machine, these are not the same, so convert. */
1364 if (BYTES_BIG_ENDIAN
1365 && !MEM_P (op0)
1366 && unit > GET_MODE_BITSIZE (GET_MODE (op0)))
1367 bitpos += unit - GET_MODE_BITSIZE (GET_MODE (op0));
1368
1369 /* ??? We currently assume TARGET is at least as big as BITSIZE.
1370 If that's wrong, the solution is to test for it and set TARGET to 0
1371 if needed. */
1372
1373 /* Only scalar integer modes can be converted via subregs. There is an
1374 additional problem for FP modes here in that they can have a precision
1375 which is different from the size. mode_for_size uses precision, but
1376 we want a mode based on the size, so we must avoid calling it for FP
1377 modes. */
1378 mode1 = (SCALAR_INT_MODE_P (tmode)
1379 ? mode_for_size (bitsize, GET_MODE_CLASS (tmode), 0)
1380 : mode);
1381
1382 /* If the bitfield is volatile, we need to make sure the access
1383 remains on a type-aligned boundary. */
1384 if (GET_CODE (op0) == MEM
1385 && MEM_VOLATILE_P (op0)
1386 && GET_MODE_BITSIZE (GET_MODE (op0)) > 0
1387 && flag_strict_volatile_bitfields > 0)
1388 goto no_subreg_mode_swap;
1389
1390 if (((bitsize >= BITS_PER_WORD && bitsize == GET_MODE_BITSIZE (mode)
1391 && bitpos % BITS_PER_WORD == 0)
1392 || (mode1 != BLKmode
1393 /* ??? The big endian test here is wrong. This is correct
1394 if the value is in a register, and if mode_for_size is not
1395 the same mode as op0. This causes us to get unnecessarily
1396 inefficient code from the Thumb port when -mbig-endian. */
1397 && (BYTES_BIG_ENDIAN
1398 ? bitpos + bitsize == BITS_PER_WORD
1399 : bitpos == 0)))
1400 && ((!MEM_P (op0)
1401 && TRULY_NOOP_TRUNCATION_MODES_P (mode1, GET_MODE (op0))
1402 && GET_MODE_SIZE (mode1) != 0
1403 && byte_offset % GET_MODE_SIZE (mode1) == 0)
1404 || (MEM_P (op0)
1405 && (! SLOW_UNALIGNED_ACCESS (mode, MEM_ALIGN (op0))
1406 || (offset * BITS_PER_UNIT % bitsize == 0
1407 && MEM_ALIGN (op0) % bitsize == 0)))))
1408 {
1409 if (MEM_P (op0))
1410 op0 = adjust_address (op0, mode1, offset);
1411 else if (mode1 != GET_MODE (op0))
1412 {
1413 rtx sub = simplify_gen_subreg (mode1, op0, GET_MODE (op0),
1414 byte_offset);
1415 if (sub == NULL)
1416 goto no_subreg_mode_swap;
1417 op0 = sub;
1418 }
1419 if (mode1 != mode)
1420 return convert_to_mode (tmode, op0, unsignedp);
1421 return op0;
1422 }
1423 no_subreg_mode_swap:
1424
1425 /* Handle fields bigger than a word. */
1426
1427 if (bitsize > BITS_PER_WORD)
1428 {
1429 /* Here we transfer the words of the field
1430 in the order least significant first.
1431 This is because the most significant word is the one which may
1432 be less than full. */
1433
1434 unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
1435 unsigned int i;
1436
1437 if (target == 0 || !REG_P (target) || !valid_multiword_target_p (target))
1438 target = gen_reg_rtx (mode);
1439
1440 /* Indicate for flow that the entire target reg is being set. */
1441 emit_clobber (target);
1442
1443 for (i = 0; i < nwords; i++)
1444 {
1445 /* If I is 0, use the low-order word in both field and target;
1446 if I is 1, use the next to lowest word; and so on. */
1447 /* Word number in TARGET to use. */
1448 unsigned int wordnum
1449 = (WORDS_BIG_ENDIAN
1450 ? GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD - i - 1
1451 : i);
1452 /* Offset from start of field in OP0. */
1453 unsigned int bit_offset = (WORDS_BIG_ENDIAN
1454 ? MAX (0, ((int) bitsize - ((int) i + 1)
1455 * (int) BITS_PER_WORD))
1456 : (int) i * BITS_PER_WORD);
1457 rtx target_part = operand_subword (target, wordnum, 1, VOIDmode);
1458 rtx result_part
1459 = extract_bit_field (op0, MIN (BITS_PER_WORD,
1460 bitsize - i * BITS_PER_WORD),
1461 bitnum + bit_offset, 1, false, target_part, mode,
1462 word_mode);
1463
1464 gcc_assert (target_part);
1465
1466 if (result_part != target_part)
1467 emit_move_insn (target_part, result_part);
1468 }
1469
1470 if (unsignedp)
1471 {
1472 /* Unless we've filled TARGET, the upper regs in a multi-reg value
1473 need to be zero'd out. */
1474 if (GET_MODE_SIZE (GET_MODE (target)) > nwords * UNITS_PER_WORD)
1475 {
1476 unsigned int i, total_words;
1477
1478 total_words = GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD;
1479 for (i = nwords; i < total_words; i++)
1480 emit_move_insn
1481 (operand_subword (target,
1482 WORDS_BIG_ENDIAN ? total_words - i - 1 : i,
1483 1, VOIDmode),
1484 const0_rtx);
1485 }
1486 return target;
1487 }
1488
1489 /* Signed bit field: sign-extend with two arithmetic shifts. */
1490 target = expand_shift (LSHIFT_EXPR, mode, target,
1491 GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1492 return expand_shift (RSHIFT_EXPR, mode, target,
1493 GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1494 }
1495
1496 /* From here on we know the desired field is smaller than a word. */
1497
1498 /* Check if there is a correspondingly-sized integer field, so we can
1499 safely extract it as one size of integer, if necessary; then
1500 truncate or extend to the size that is wanted; then use SUBREGs or
1501 convert_to_mode to get one of the modes we really wanted. */
1502
1503 int_mode = int_mode_for_mode (tmode);
1504 if (int_mode == BLKmode)
1505 int_mode = int_mode_for_mode (mode);
1506 /* Should probably push op0 out to memory and then do a load. */
1507 gcc_assert (int_mode != BLKmode);
1508
1509 /* OFFSET is the number of words or bytes (UNIT says which)
1510 from STR_RTX to the first word or byte containing part of the field. */
1511 if (!MEM_P (op0))
1512 {
1513 if (offset != 0
1514 || GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
1515 {
1516 if (!REG_P (op0))
1517 op0 = copy_to_reg (op0);
1518 op0 = gen_rtx_SUBREG (mode_for_size (BITS_PER_WORD, MODE_INT, 0),
1519 op0, (offset * UNITS_PER_WORD));
1520 }
1521 offset = 0;
1522 }
1523
1524 /* Now OFFSET is nonzero only for memory operands. */
1525 ext_mode = mode_for_extraction (unsignedp ? EP_extzv : EP_extv, 0);
1526 if (ext_mode != MAX_MACHINE_MODE
1527 && bitsize > 0
1528 && GET_MODE_BITSIZE (ext_mode) >= bitsize
1529 /* Do not use extv/extzv for volatile bitfields when
1530 -fstrict-volatile-bitfields is in effect. */
1531 && !(MEM_P (op0) && MEM_VOLATILE_P (op0)
1532 && flag_strict_volatile_bitfields > 0)
1533 /* If op0 is a register, we need it in EXT_MODE to make it
1534 acceptable to the format of ext(z)v. */
1535 && !(GET_CODE (op0) == SUBREG && GET_MODE (op0) != ext_mode)
1536 && !((REG_P (op0) || GET_CODE (op0) == SUBREG)
1537 && (bitsize + bitpos > GET_MODE_BITSIZE (ext_mode))))
1538 {
1539 struct expand_operand ops[4];
1540 unsigned HOST_WIDE_INT xbitpos = bitpos, xoffset = offset;
1541 rtx xop0 = op0;
1542 rtx xtarget = target;
1543 rtx xspec_target = target;
1544 rtx xspec_target_subreg = 0;
1545
1546 /* If op0 is a register, we need it in EXT_MODE to make it
1547 acceptable to the format of ext(z)v. */
1548 if (REG_P (xop0) && GET_MODE (xop0) != ext_mode)
1549 xop0 = gen_lowpart_SUBREG (ext_mode, xop0);
1550 if (MEM_P (xop0))
1551 /* Get ref to first byte containing part of the field. */
1552 xop0 = adjust_address (xop0, byte_mode, xoffset);
1553
1554 /* Now convert from counting within UNIT to counting in EXT_MODE. */
1555 if (BYTES_BIG_ENDIAN && !MEM_P (xop0))
1556 xbitpos += GET_MODE_BITSIZE (ext_mode) - unit;
1557
1558 unit = GET_MODE_BITSIZE (ext_mode);
1559
1560 /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
1561 "backwards" from the size of the unit we are extracting from.
1562 Otherwise, we count bits from the most significant on a
1563 BYTES/BITS_BIG_ENDIAN machine. */
1564
1565 if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1566 xbitpos = unit - bitsize - xbitpos;
1567
1568 if (xtarget == 0)
1569 xtarget = xspec_target = gen_reg_rtx (tmode);
1570
1571 if (GET_MODE (xtarget) != ext_mode)
1572 {
1573 /* Don't use LHS paradoxical subreg if explicit truncation is needed
1574 between the mode of the extraction (word_mode) and the target
1575 mode. Instead, create a temporary and use convert_move to set
1576 the target. */
1577 if (REG_P (xtarget)
1578 && TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (xtarget), ext_mode))
1579 {
1580 xtarget = gen_lowpart (ext_mode, xtarget);
1581 if (GET_MODE_PRECISION (ext_mode)
1582 > GET_MODE_PRECISION (GET_MODE (xspec_target)))
1583 xspec_target_subreg = xtarget;
1584 }
1585 else
1586 xtarget = gen_reg_rtx (ext_mode);
1587 }
1588
1589 create_output_operand (&ops[0], xtarget, ext_mode);
1590 create_fixed_operand (&ops[1], xop0);
1591 create_integer_operand (&ops[2], bitsize);
1592 create_integer_operand (&ops[3], xbitpos);
1593 if (maybe_expand_insn (unsignedp ? CODE_FOR_extzv : CODE_FOR_extv,
1594 4, ops))
1595 {
1596 xtarget = ops[0].value;
1597 if (xtarget == xspec_target)
1598 return xtarget;
1599 if (xtarget == xspec_target_subreg)
1600 return xspec_target;
1601 return convert_extracted_bit_field (xtarget, mode, tmode, unsignedp);
1602 }
1603 }
1604
1605 /* If OP0 is a memory, try copying it to a register and seeing if a
1606 cheap register alternative is available. */
1607 if (ext_mode != MAX_MACHINE_MODE && MEM_P (op0))
1608 {
1609 enum machine_mode bestmode;
1610
1611 /* Get the mode to use for inserting into this field. If
1612 OP0 is BLKmode, get the smallest mode consistent with the
1613 alignment. If OP0 is a non-BLKmode object that is no
1614 wider than EXT_MODE, use its mode. Otherwise, use the
1615 smallest mode containing the field. */
1616
1617 if (GET_MODE (op0) == BLKmode
1618 || (ext_mode != MAX_MACHINE_MODE
1619 && GET_MODE_SIZE (GET_MODE (op0)) > GET_MODE_SIZE (ext_mode)))
1620 bestmode = get_best_mode (bitsize, bitnum, 0, 0, MEM_ALIGN (op0),
1621 (ext_mode == MAX_MACHINE_MODE
1622 ? VOIDmode : ext_mode),
1623 MEM_VOLATILE_P (op0));
1624 else
1625 bestmode = GET_MODE (op0);
1626
1627 if (bestmode != VOIDmode
1628 && !(SLOW_UNALIGNED_ACCESS (bestmode, MEM_ALIGN (op0))
1629 && GET_MODE_BITSIZE (bestmode) > MEM_ALIGN (op0)))
1630 {
1631 unsigned HOST_WIDE_INT xoffset, xbitpos;
1632
1633 /* Compute the offset as a multiple of this unit,
1634 counting in bytes. */
1635 unit = GET_MODE_BITSIZE (bestmode);
1636 xoffset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
1637 xbitpos = bitnum % unit;
1638
1639 /* Make sure the register is big enough for the whole field. */
1640 if (xoffset * BITS_PER_UNIT + unit
1641 >= offset * BITS_PER_UNIT + bitsize)
1642 {
1643 rtx last, result, xop0;
1644
1645 last = get_last_insn ();
1646
1647 /* Fetch it to a register in that size. */
1648 xop0 = adjust_address (op0, bestmode, xoffset);
1649 xop0 = force_reg (bestmode, xop0);
1650 result = extract_bit_field_1 (xop0, bitsize, xbitpos,
1651 unsignedp, packedp, target,
1652 mode, tmode, false);
1653 if (result)
1654 return result;
1655
1656 delete_insns_since (last);
1657 }
1658 }
1659 }
1660
1661 if (!fallback_p)
1662 return NULL;
1663
1664 target = extract_fixed_bit_field (int_mode, op0, offset, bitsize,
1665 bitpos, target, unsignedp, packedp);
1666 return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1667 }
1668
1669 /* Generate code to extract a byte-field from STR_RTX
1670 containing BITSIZE bits, starting at BITNUM,
1671 and put it in TARGET if possible (if TARGET is nonzero).
1672 Regardless of TARGET, we return the rtx for where the value is placed.
1673
1674 STR_RTX is the structure containing the byte (a REG or MEM).
1675 UNSIGNEDP is nonzero if this is an unsigned bit field.
1676 PACKEDP is nonzero if the field has the packed attribute.
1677 MODE is the natural mode of the field value once extracted.
1678 TMODE is the mode the caller would like the value to have;
1679 but the value may be returned with type MODE instead.
1680
1681 If a TARGET is specified and we can store in it at no extra cost,
1682 we do so, and return TARGET.
1683 Otherwise, we return a REG of mode TMODE or MODE, with TMODE preferred
1684 if they are equally easy. */
1685
1686 rtx
1687 extract_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1688 unsigned HOST_WIDE_INT bitnum, int unsignedp, bool packedp,
1689 rtx target, enum machine_mode mode, enum machine_mode tmode)
1690 {
1691 return extract_bit_field_1 (str_rtx, bitsize, bitnum, unsignedp, packedp,
1692 target, mode, tmode, true);
1693 }
1694 \f
1695 /* Extract a bit field using shifts and boolean operations
1696 Returns an rtx to represent the value.
1697 OP0 addresses a register (word) or memory (byte).
1698 BITPOS says which bit within the word or byte the bit field starts in.
1699 OFFSET says how many bytes farther the bit field starts;
1700 it is 0 if OP0 is a register.
1701 BITSIZE says how many bits long the bit field is.
1702 (If OP0 is a register, it may be narrower than a full word,
1703 but BITPOS still counts within a full word,
1704 which is significant on bigendian machines.)
1705
1706 UNSIGNEDP is nonzero for an unsigned bit field (don't sign-extend value).
1707 PACKEDP is true if the field has the packed attribute.
1708
1709 If TARGET is nonzero, attempts to store the value there
1710 and return TARGET, but this is not guaranteed.
1711 If TARGET is not used, create a pseudo-reg of mode TMODE for the value. */
1712
1713 static rtx
1714 extract_fixed_bit_field (enum machine_mode tmode, rtx op0,
1715 unsigned HOST_WIDE_INT offset,
1716 unsigned HOST_WIDE_INT bitsize,
1717 unsigned HOST_WIDE_INT bitpos, rtx target,
1718 int unsignedp, bool packedp)
1719 {
1720 unsigned int total_bits = BITS_PER_WORD;
1721 enum machine_mode mode;
1722
1723 if (GET_CODE (op0) == SUBREG || REG_P (op0))
1724 {
1725 /* Special treatment for a bit field split across two registers. */
1726 if (bitsize + bitpos > BITS_PER_WORD)
1727 return extract_split_bit_field (op0, bitsize, bitpos, unsignedp);
1728 }
1729 else
1730 {
1731 /* Get the proper mode to use for this field. We want a mode that
1732 includes the entire field. If such a mode would be larger than
1733 a word, we won't be doing the extraction the normal way. */
1734
1735 if (MEM_VOLATILE_P (op0)
1736 && flag_strict_volatile_bitfields > 0)
1737 {
1738 if (GET_MODE_BITSIZE (GET_MODE (op0)) > 0)
1739 mode = GET_MODE (op0);
1740 else if (target && GET_MODE_BITSIZE (GET_MODE (target)) > 0)
1741 mode = GET_MODE (target);
1742 else
1743 mode = tmode;
1744 }
1745 else
1746 mode = get_best_mode (bitsize, bitpos + offset * BITS_PER_UNIT, 0, 0,
1747 MEM_ALIGN (op0), word_mode, MEM_VOLATILE_P (op0));
1748
1749 if (mode == VOIDmode)
1750 /* The only way this should occur is if the field spans word
1751 boundaries. */
1752 return extract_split_bit_field (op0, bitsize,
1753 bitpos + offset * BITS_PER_UNIT,
1754 unsignedp);
1755
1756 total_bits = GET_MODE_BITSIZE (mode);
1757
1758 /* Make sure bitpos is valid for the chosen mode. Adjust BITPOS to
1759 be in the range 0 to total_bits-1, and put any excess bytes in
1760 OFFSET. */
1761 if (bitpos >= total_bits)
1762 {
1763 offset += (bitpos / total_bits) * (total_bits / BITS_PER_UNIT);
1764 bitpos -= ((bitpos / total_bits) * (total_bits / BITS_PER_UNIT)
1765 * BITS_PER_UNIT);
1766 }
1767
1768 /* If we're accessing a volatile MEM, we can't do the next
1769 alignment step if it results in a multi-word access where we
1770 otherwise wouldn't have one. So, check for that case
1771 here. */
1772 if (MEM_P (op0)
1773 && MEM_VOLATILE_P (op0)
1774 && flag_strict_volatile_bitfields > 0
1775 && bitpos + bitsize <= total_bits
1776 && bitpos + bitsize + (offset % (total_bits / BITS_PER_UNIT)) * BITS_PER_UNIT > total_bits)
1777 {
1778 if (STRICT_ALIGNMENT)
1779 {
1780 static bool informed_about_misalignment = false;
1781 bool warned;
1782
1783 if (packedp)
1784 {
1785 if (bitsize == total_bits)
1786 warned = warning_at (input_location, OPT_fstrict_volatile_bitfields,
1787 "multiple accesses to volatile structure member"
1788 " because of packed attribute");
1789 else
1790 warned = warning_at (input_location, OPT_fstrict_volatile_bitfields,
1791 "multiple accesses to volatile structure bitfield"
1792 " because of packed attribute");
1793
1794 return extract_split_bit_field (op0, bitsize,
1795 bitpos + offset * BITS_PER_UNIT,
1796 unsignedp);
1797 }
1798
1799 if (bitsize == total_bits)
1800 warned = warning_at (input_location, OPT_fstrict_volatile_bitfields,
1801 "mis-aligned access used for structure member");
1802 else
1803 warned = warning_at (input_location, OPT_fstrict_volatile_bitfields,
1804 "mis-aligned access used for structure bitfield");
1805
1806 if (! informed_about_misalignment && warned)
1807 {
1808 informed_about_misalignment = true;
1809 inform (input_location,
1810 "when a volatile object spans multiple type-sized locations,"
1811 " the compiler must choose between using a single mis-aligned access to"
1812 " preserve the volatility, or using multiple aligned accesses to avoid"
1813 " runtime faults; this code may fail at runtime if the hardware does"
1814 " not allow this access");
1815 }
1816 }
1817 }
1818 else
1819 {
1820
1821 /* Get ref to an aligned byte, halfword, or word containing the field.
1822 Adjust BITPOS to be position within a word,
1823 and OFFSET to be the offset of that word.
1824 Then alter OP0 to refer to that word. */
1825 bitpos += (offset % (total_bits / BITS_PER_UNIT)) * BITS_PER_UNIT;
1826 offset -= (offset % (total_bits / BITS_PER_UNIT));
1827 }
1828
1829 op0 = adjust_address (op0, mode, offset);
1830 }
1831
1832 mode = GET_MODE (op0);
1833
1834 if (BYTES_BIG_ENDIAN)
1835 /* BITPOS is the distance between our msb and that of OP0.
1836 Convert it to the distance from the lsb. */
1837 bitpos = total_bits - bitsize - bitpos;
1838
1839 /* Now BITPOS is always the distance between the field's lsb and that of OP0.
1840 We have reduced the big-endian case to the little-endian case. */
1841
1842 if (unsignedp)
1843 {
1844 if (bitpos)
1845 {
1846 /* If the field does not already start at the lsb,
1847 shift it so it does. */
1848 /* Maybe propagate the target for the shift. */
1849 /* But not if we will return it--could confuse integrate.c. */
1850 rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
1851 if (tmode != mode) subtarget = 0;
1852 op0 = expand_shift (RSHIFT_EXPR, mode, op0, bitpos, subtarget, 1);
1853 }
1854 /* Convert the value to the desired mode. */
1855 if (mode != tmode)
1856 op0 = convert_to_mode (tmode, op0, 1);
1857
1858 /* Unless the msb of the field used to be the msb when we shifted,
1859 mask out the upper bits. */
1860
1861 if (GET_MODE_BITSIZE (mode) != bitpos + bitsize)
1862 return expand_binop (GET_MODE (op0), and_optab, op0,
1863 mask_rtx (GET_MODE (op0), 0, bitsize, 0),
1864 target, 1, OPTAB_LIB_WIDEN);
1865 return op0;
1866 }
1867
1868 /* To extract a signed bit-field, first shift its msb to the msb of the word,
1869 then arithmetic-shift its lsb to the lsb of the word. */
1870 op0 = force_reg (mode, op0);
1871
1872 /* Find the narrowest integer mode that contains the field. */
1873
1874 for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT); mode != VOIDmode;
1875 mode = GET_MODE_WIDER_MODE (mode))
1876 if (GET_MODE_BITSIZE (mode) >= bitsize + bitpos)
1877 {
1878 op0 = convert_to_mode (mode, op0, 0);
1879 break;
1880 }
1881
1882 if (mode != tmode)
1883 target = 0;
1884
1885 if (GET_MODE_BITSIZE (mode) != (bitsize + bitpos))
1886 {
1887 int amount = GET_MODE_BITSIZE (mode) - (bitsize + bitpos);
1888 /* Maybe propagate the target for the shift. */
1889 rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
1890 op0 = expand_shift (LSHIFT_EXPR, mode, op0, amount, subtarget, 1);
1891 }
1892
1893 return expand_shift (RSHIFT_EXPR, mode, op0,
1894 GET_MODE_BITSIZE (mode) - bitsize, target, 0);
1895 }
1896 \f
1897 /* Return a constant integer (CONST_INT or CONST_DOUBLE) mask value
1898 of mode MODE with BITSIZE ones followed by BITPOS zeros, or the
1899 complement of that if COMPLEMENT. The mask is truncated if
1900 necessary to the width of mode MODE. The mask is zero-extended if
1901 BITSIZE+BITPOS is too small for MODE. */
1902
1903 static rtx
1904 mask_rtx (enum machine_mode mode, int bitpos, int bitsize, int complement)
1905 {
1906 double_int mask;
1907
1908 mask = double_int_mask (bitsize);
1909 mask = double_int_lshift (mask, bitpos, HOST_BITS_PER_DOUBLE_INT, false);
1910
1911 if (complement)
1912 mask = double_int_not (mask);
1913
1914 return immed_double_int_const (mask, mode);
1915 }
1916
1917 /* Return a constant integer (CONST_INT or CONST_DOUBLE) rtx with the value
1918 VALUE truncated to BITSIZE bits and then shifted left BITPOS bits. */
1919
1920 static rtx
1921 lshift_value (enum machine_mode mode, rtx value, int bitpos, int bitsize)
1922 {
1923 double_int val;
1924
1925 val = double_int_zext (uhwi_to_double_int (INTVAL (value)), bitsize);
1926 val = double_int_lshift (val, bitpos, HOST_BITS_PER_DOUBLE_INT, false);
1927
1928 return immed_double_int_const (val, mode);
1929 }
1930 \f
1931 /* Extract a bit field that is split across two words
1932 and return an RTX for the result.
1933
1934 OP0 is the REG, SUBREG or MEM rtx for the first of the two words.
1935 BITSIZE is the field width; BITPOS, position of its first bit, in the word.
1936 UNSIGNEDP is 1 if should zero-extend the contents; else sign-extend. */
1937
1938 static rtx
1939 extract_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1940 unsigned HOST_WIDE_INT bitpos, int unsignedp)
1941 {
1942 unsigned int unit;
1943 unsigned int bitsdone = 0;
1944 rtx result = NULL_RTX;
1945 int first = 1;
1946
1947 /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1948 much at a time. */
1949 if (REG_P (op0) || GET_CODE (op0) == SUBREG)
1950 unit = BITS_PER_WORD;
1951 else
1952 unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1953
1954 while (bitsdone < bitsize)
1955 {
1956 unsigned HOST_WIDE_INT thissize;
1957 rtx part, word;
1958 unsigned HOST_WIDE_INT thispos;
1959 unsigned HOST_WIDE_INT offset;
1960
1961 offset = (bitpos + bitsdone) / unit;
1962 thispos = (bitpos + bitsdone) % unit;
1963
1964 /* THISSIZE must not overrun a word boundary. Otherwise,
1965 extract_fixed_bit_field will call us again, and we will mutually
1966 recurse forever. */
1967 thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1968 thissize = MIN (thissize, unit - thispos);
1969
1970 /* If OP0 is a register, then handle OFFSET here.
1971
1972 When handling multiword bitfields, extract_bit_field may pass
1973 down a word_mode SUBREG of a larger REG for a bitfield that actually
1974 crosses a word boundary. Thus, for a SUBREG, we must find
1975 the current word starting from the base register. */
1976 if (GET_CODE (op0) == SUBREG)
1977 {
1978 int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset;
1979 word = operand_subword_force (SUBREG_REG (op0), word_offset,
1980 GET_MODE (SUBREG_REG (op0)));
1981 offset = 0;
1982 }
1983 else if (REG_P (op0))
1984 {
1985 word = operand_subword_force (op0, offset, GET_MODE (op0));
1986 offset = 0;
1987 }
1988 else
1989 word = op0;
1990
1991 /* Extract the parts in bit-counting order,
1992 whose meaning is determined by BYTES_PER_UNIT.
1993 OFFSET is in UNITs, and UNIT is in bits.
1994 extract_fixed_bit_field wants offset in bytes. */
1995 part = extract_fixed_bit_field (word_mode, word,
1996 offset * unit / BITS_PER_UNIT,
1997 thissize, thispos, 0, 1, false);
1998 bitsdone += thissize;
1999
2000 /* Shift this part into place for the result. */
2001 if (BYTES_BIG_ENDIAN)
2002 {
2003 if (bitsize != bitsdone)
2004 part = expand_shift (LSHIFT_EXPR, word_mode, part,
2005 bitsize - bitsdone, 0, 1);
2006 }
2007 else
2008 {
2009 if (bitsdone != thissize)
2010 part = expand_shift (LSHIFT_EXPR, word_mode, part,
2011 bitsdone - thissize, 0, 1);
2012 }
2013
2014 if (first)
2015 result = part;
2016 else
2017 /* Combine the parts with bitwise or. This works
2018 because we extracted each part as an unsigned bit field. */
2019 result = expand_binop (word_mode, ior_optab, part, result, NULL_RTX, 1,
2020 OPTAB_LIB_WIDEN);
2021
2022 first = 0;
2023 }
2024
2025 /* Unsigned bit field: we are done. */
2026 if (unsignedp)
2027 return result;
2028 /* Signed bit field: sign-extend with two arithmetic shifts. */
2029 result = expand_shift (LSHIFT_EXPR, word_mode, result,
2030 BITS_PER_WORD - bitsize, NULL_RTX, 0);
2031 return expand_shift (RSHIFT_EXPR, word_mode, result,
2032 BITS_PER_WORD - bitsize, NULL_RTX, 0);
2033 }
2034 \f
2035 /* Try to read the low bits of SRC as an rvalue of mode MODE, preserving
2036 the bit pattern. SRC_MODE is the mode of SRC; if this is smaller than
2037 MODE, fill the upper bits with zeros. Fail if the layout of either
2038 mode is unknown (as for CC modes) or if the extraction would involve
2039 unprofitable mode punning. Return the value on success, otherwise
2040 return null.
2041
2042 This is different from gen_lowpart* in these respects:
2043
2044 - the returned value must always be considered an rvalue
2045
2046 - when MODE is wider than SRC_MODE, the extraction involves
2047 a zero extension
2048
2049 - when MODE is smaller than SRC_MODE, the extraction involves
2050 a truncation (and is thus subject to TRULY_NOOP_TRUNCATION).
2051
2052 In other words, this routine performs a computation, whereas the
2053 gen_lowpart* routines are conceptually lvalue or rvalue subreg
2054 operations. */
2055
2056 rtx
2057 extract_low_bits (enum machine_mode mode, enum machine_mode src_mode, rtx src)
2058 {
2059 enum machine_mode int_mode, src_int_mode;
2060
2061 if (mode == src_mode)
2062 return src;
2063
2064 if (CONSTANT_P (src))
2065 {
2066 /* simplify_gen_subreg can't be used here, as if simplify_subreg
2067 fails, it will happily create (subreg (symbol_ref)) or similar
2068 invalid SUBREGs. */
2069 unsigned int byte = subreg_lowpart_offset (mode, src_mode);
2070 rtx ret = simplify_subreg (mode, src, src_mode, byte);
2071 if (ret)
2072 return ret;
2073
2074 if (GET_MODE (src) == VOIDmode
2075 || !validate_subreg (mode, src_mode, src, byte))
2076 return NULL_RTX;
2077
2078 src = force_reg (GET_MODE (src), src);
2079 return gen_rtx_SUBREG (mode, src, byte);
2080 }
2081
2082 if (GET_MODE_CLASS (mode) == MODE_CC || GET_MODE_CLASS (src_mode) == MODE_CC)
2083 return NULL_RTX;
2084
2085 if (GET_MODE_BITSIZE (mode) == GET_MODE_BITSIZE (src_mode)
2086 && MODES_TIEABLE_P (mode, src_mode))
2087 {
2088 rtx x = gen_lowpart_common (mode, src);
2089 if (x)
2090 return x;
2091 }
2092
2093 src_int_mode = int_mode_for_mode (src_mode);
2094 int_mode = int_mode_for_mode (mode);
2095 if (src_int_mode == BLKmode || int_mode == BLKmode)
2096 return NULL_RTX;
2097
2098 if (!MODES_TIEABLE_P (src_int_mode, src_mode))
2099 return NULL_RTX;
2100 if (!MODES_TIEABLE_P (int_mode, mode))
2101 return NULL_RTX;
2102
2103 src = gen_lowpart (src_int_mode, src);
2104 src = convert_modes (int_mode, src_int_mode, src, true);
2105 src = gen_lowpart (mode, src);
2106 return src;
2107 }
2108 \f
2109 /* Add INC into TARGET. */
2110
2111 void
2112 expand_inc (rtx target, rtx inc)
2113 {
2114 rtx value = expand_binop (GET_MODE (target), add_optab,
2115 target, inc,
2116 target, 0, OPTAB_LIB_WIDEN);
2117 if (value != target)
2118 emit_move_insn (target, value);
2119 }
2120
2121 /* Subtract DEC from TARGET. */
2122
2123 void
2124 expand_dec (rtx target, rtx dec)
2125 {
2126 rtx value = expand_binop (GET_MODE (target), sub_optab,
2127 target, dec,
2128 target, 0, OPTAB_LIB_WIDEN);
2129 if (value != target)
2130 emit_move_insn (target, value);
2131 }
2132 \f
2133 /* Output a shift instruction for expression code CODE,
2134 with SHIFTED being the rtx for the value to shift,
2135 and AMOUNT the rtx for the amount to shift by.
2136 Store the result in the rtx TARGET, if that is convenient.
2137 If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2138 Return the rtx for where the value is. */
2139
2140 static rtx
2141 expand_shift_1 (enum tree_code code, enum machine_mode mode, rtx shifted,
2142 rtx amount, rtx target, int unsignedp)
2143 {
2144 rtx op1, temp = 0;
2145 int left = (code == LSHIFT_EXPR || code == LROTATE_EXPR);
2146 int rotate = (code == LROTATE_EXPR || code == RROTATE_EXPR);
2147 optab lshift_optab = ashl_optab;
2148 optab rshift_arith_optab = ashr_optab;
2149 optab rshift_uns_optab = lshr_optab;
2150 optab lrotate_optab = rotl_optab;
2151 optab rrotate_optab = rotr_optab;
2152 enum machine_mode op1_mode;
2153 int attempt;
2154 bool speed = optimize_insn_for_speed_p ();
2155
2156 op1 = amount;
2157 op1_mode = GET_MODE (op1);
2158
2159 /* Determine whether the shift/rotate amount is a vector, or scalar. If the
2160 shift amount is a vector, use the vector/vector shift patterns. */
2161 if (VECTOR_MODE_P (mode) && VECTOR_MODE_P (op1_mode))
2162 {
2163 lshift_optab = vashl_optab;
2164 rshift_arith_optab = vashr_optab;
2165 rshift_uns_optab = vlshr_optab;
2166 lrotate_optab = vrotl_optab;
2167 rrotate_optab = vrotr_optab;
2168 }
2169
2170 /* Previously detected shift-counts computed by NEGATE_EXPR
2171 and shifted in the other direction; but that does not work
2172 on all machines. */
2173
2174 if (SHIFT_COUNT_TRUNCATED)
2175 {
2176 if (CONST_INT_P (op1)
2177 && ((unsigned HOST_WIDE_INT) INTVAL (op1) >=
2178 (unsigned HOST_WIDE_INT) GET_MODE_BITSIZE (mode)))
2179 op1 = GEN_INT ((unsigned HOST_WIDE_INT) INTVAL (op1)
2180 % GET_MODE_BITSIZE (mode));
2181 else if (GET_CODE (op1) == SUBREG
2182 && subreg_lowpart_p (op1)
2183 && INTEGRAL_MODE_P (GET_MODE (SUBREG_REG (op1))))
2184 op1 = SUBREG_REG (op1);
2185 }
2186
2187 if (op1 == const0_rtx)
2188 return shifted;
2189
2190 /* Check whether its cheaper to implement a left shift by a constant
2191 bit count by a sequence of additions. */
2192 if (code == LSHIFT_EXPR
2193 && CONST_INT_P (op1)
2194 && INTVAL (op1) > 0
2195 && INTVAL (op1) < GET_MODE_PRECISION (mode)
2196 && INTVAL (op1) < MAX_BITS_PER_WORD
2197 && shift_cost[speed][mode][INTVAL (op1)] > INTVAL (op1) * add_cost[speed][mode]
2198 && shift_cost[speed][mode][INTVAL (op1)] != MAX_COST)
2199 {
2200 int i;
2201 for (i = 0; i < INTVAL (op1); i++)
2202 {
2203 temp = force_reg (mode, shifted);
2204 shifted = expand_binop (mode, add_optab, temp, temp, NULL_RTX,
2205 unsignedp, OPTAB_LIB_WIDEN);
2206 }
2207 return shifted;
2208 }
2209
2210 for (attempt = 0; temp == 0 && attempt < 3; attempt++)
2211 {
2212 enum optab_methods methods;
2213
2214 if (attempt == 0)
2215 methods = OPTAB_DIRECT;
2216 else if (attempt == 1)
2217 methods = OPTAB_WIDEN;
2218 else
2219 methods = OPTAB_LIB_WIDEN;
2220
2221 if (rotate)
2222 {
2223 /* Widening does not work for rotation. */
2224 if (methods == OPTAB_WIDEN)
2225 continue;
2226 else if (methods == OPTAB_LIB_WIDEN)
2227 {
2228 /* If we have been unable to open-code this by a rotation,
2229 do it as the IOR of two shifts. I.e., to rotate A
2230 by N bits, compute (A << N) | ((unsigned) A >> (C - N))
2231 where C is the bitsize of A.
2232
2233 It is theoretically possible that the target machine might
2234 not be able to perform either shift and hence we would
2235 be making two libcalls rather than just the one for the
2236 shift (similarly if IOR could not be done). We will allow
2237 this extremely unlikely lossage to avoid complicating the
2238 code below. */
2239
2240 rtx subtarget = target == shifted ? 0 : target;
2241 rtx new_amount, other_amount;
2242 rtx temp1;
2243
2244 new_amount = op1;
2245 if (CONST_INT_P (op1))
2246 other_amount = GEN_INT (GET_MODE_BITSIZE (mode)
2247 - INTVAL (op1));
2248 else
2249 other_amount
2250 = simplify_gen_binary (MINUS, GET_MODE (op1),
2251 GEN_INT (GET_MODE_PRECISION (mode)),
2252 op1);
2253
2254 shifted = force_reg (mode, shifted);
2255
2256 temp = expand_shift_1 (left ? LSHIFT_EXPR : RSHIFT_EXPR,
2257 mode, shifted, new_amount, 0, 1);
2258 temp1 = expand_shift_1 (left ? RSHIFT_EXPR : LSHIFT_EXPR,
2259 mode, shifted, other_amount,
2260 subtarget, 1);
2261 return expand_binop (mode, ior_optab, temp, temp1, target,
2262 unsignedp, methods);
2263 }
2264
2265 temp = expand_binop (mode,
2266 left ? lrotate_optab : rrotate_optab,
2267 shifted, op1, target, unsignedp, methods);
2268 }
2269 else if (unsignedp)
2270 temp = expand_binop (mode,
2271 left ? lshift_optab : rshift_uns_optab,
2272 shifted, op1, target, unsignedp, methods);
2273
2274 /* Do arithmetic shifts.
2275 Also, if we are going to widen the operand, we can just as well
2276 use an arithmetic right-shift instead of a logical one. */
2277 if (temp == 0 && ! rotate
2278 && (! unsignedp || (! left && methods == OPTAB_WIDEN)))
2279 {
2280 enum optab_methods methods1 = methods;
2281
2282 /* If trying to widen a log shift to an arithmetic shift,
2283 don't accept an arithmetic shift of the same size. */
2284 if (unsignedp)
2285 methods1 = OPTAB_MUST_WIDEN;
2286
2287 /* Arithmetic shift */
2288
2289 temp = expand_binop (mode,
2290 left ? lshift_optab : rshift_arith_optab,
2291 shifted, op1, target, unsignedp, methods1);
2292 }
2293
2294 /* We used to try extzv here for logical right shifts, but that was
2295 only useful for one machine, the VAX, and caused poor code
2296 generation there for lshrdi3, so the code was deleted and a
2297 define_expand for lshrsi3 was added to vax.md. */
2298 }
2299
2300 gcc_assert (temp);
2301 return temp;
2302 }
2303
2304 /* Output a shift instruction for expression code CODE,
2305 with SHIFTED being the rtx for the value to shift,
2306 and AMOUNT the amount to shift by.
2307 Store the result in the rtx TARGET, if that is convenient.
2308 If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2309 Return the rtx for where the value is. */
2310
2311 rtx
2312 expand_shift (enum tree_code code, enum machine_mode mode, rtx shifted,
2313 int amount, rtx target, int unsignedp)
2314 {
2315 return expand_shift_1 (code, mode,
2316 shifted, GEN_INT (amount), target, unsignedp);
2317 }
2318
2319 /* Output a shift instruction for expression code CODE,
2320 with SHIFTED being the rtx for the value to shift,
2321 and AMOUNT the tree for the amount to shift by.
2322 Store the result in the rtx TARGET, if that is convenient.
2323 If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2324 Return the rtx for where the value is. */
2325
2326 rtx
2327 expand_variable_shift (enum tree_code code, enum machine_mode mode, rtx shifted,
2328 tree amount, rtx target, int unsignedp)
2329 {
2330 return expand_shift_1 (code, mode,
2331 shifted, expand_normal (amount), target, unsignedp);
2332 }
2333
2334 \f
2335 /* Indicates the type of fixup needed after a constant multiplication.
2336 BASIC_VARIANT means no fixup is needed, NEGATE_VARIANT means that
2337 the result should be negated, and ADD_VARIANT means that the
2338 multiplicand should be added to the result. */
2339 enum mult_variant {basic_variant, negate_variant, add_variant};
2340
2341 static void synth_mult (struct algorithm *, unsigned HOST_WIDE_INT,
2342 const struct mult_cost *, enum machine_mode mode);
2343 static bool choose_mult_variant (enum machine_mode, HOST_WIDE_INT,
2344 struct algorithm *, enum mult_variant *, int);
2345 static rtx expand_mult_const (enum machine_mode, rtx, HOST_WIDE_INT, rtx,
2346 const struct algorithm *, enum mult_variant);
2347 static unsigned HOST_WIDE_INT choose_multiplier (unsigned HOST_WIDE_INT, int,
2348 int, rtx *, int *, int *);
2349 static unsigned HOST_WIDE_INT invert_mod2n (unsigned HOST_WIDE_INT, int);
2350 static rtx extract_high_half (enum machine_mode, rtx);
2351 static rtx expand_mult_highpart (enum machine_mode, rtx, rtx, rtx, int, int);
2352 static rtx expand_mult_highpart_optab (enum machine_mode, rtx, rtx, rtx,
2353 int, int);
2354 /* Compute and return the best algorithm for multiplying by T.
2355 The algorithm must cost less than cost_limit
2356 If retval.cost >= COST_LIMIT, no algorithm was found and all
2357 other field of the returned struct are undefined.
2358 MODE is the machine mode of the multiplication. */
2359
2360 static void
2361 synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
2362 const struct mult_cost *cost_limit, enum machine_mode mode)
2363 {
2364 int m;
2365 struct algorithm *alg_in, *best_alg;
2366 struct mult_cost best_cost;
2367 struct mult_cost new_limit;
2368 int op_cost, op_latency;
2369 unsigned HOST_WIDE_INT orig_t = t;
2370 unsigned HOST_WIDE_INT q;
2371 int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode));
2372 int hash_index;
2373 bool cache_hit = false;
2374 enum alg_code cache_alg = alg_zero;
2375 bool speed = optimize_insn_for_speed_p ();
2376
2377 /* Indicate that no algorithm is yet found. If no algorithm
2378 is found, this value will be returned and indicate failure. */
2379 alg_out->cost.cost = cost_limit->cost + 1;
2380 alg_out->cost.latency = cost_limit->latency + 1;
2381
2382 if (cost_limit->cost < 0
2383 || (cost_limit->cost == 0 && cost_limit->latency <= 0))
2384 return;
2385
2386 /* Restrict the bits of "t" to the multiplication's mode. */
2387 t &= GET_MODE_MASK (mode);
2388
2389 /* t == 1 can be done in zero cost. */
2390 if (t == 1)
2391 {
2392 alg_out->ops = 1;
2393 alg_out->cost.cost = 0;
2394 alg_out->cost.latency = 0;
2395 alg_out->op[0] = alg_m;
2396 return;
2397 }
2398
2399 /* t == 0 sometimes has a cost. If it does and it exceeds our limit,
2400 fail now. */
2401 if (t == 0)
2402 {
2403 if (MULT_COST_LESS (cost_limit, zero_cost[speed]))
2404 return;
2405 else
2406 {
2407 alg_out->ops = 1;
2408 alg_out->cost.cost = zero_cost[speed];
2409 alg_out->cost.latency = zero_cost[speed];
2410 alg_out->op[0] = alg_zero;
2411 return;
2412 }
2413 }
2414
2415 /* We'll be needing a couple extra algorithm structures now. */
2416
2417 alg_in = XALLOCA (struct algorithm);
2418 best_alg = XALLOCA (struct algorithm);
2419 best_cost = *cost_limit;
2420
2421 /* Compute the hash index. */
2422 hash_index = (t ^ (unsigned int) mode ^ (speed * 256)) % NUM_ALG_HASH_ENTRIES;
2423
2424 /* See if we already know what to do for T. */
2425 if (alg_hash[hash_index].t == t
2426 && alg_hash[hash_index].mode == mode
2427 && alg_hash[hash_index].mode == mode
2428 && alg_hash[hash_index].speed == speed
2429 && alg_hash[hash_index].alg != alg_unknown)
2430 {
2431 cache_alg = alg_hash[hash_index].alg;
2432
2433 if (cache_alg == alg_impossible)
2434 {
2435 /* The cache tells us that it's impossible to synthesize
2436 multiplication by T within alg_hash[hash_index].cost. */
2437 if (!CHEAPER_MULT_COST (&alg_hash[hash_index].cost, cost_limit))
2438 /* COST_LIMIT is at least as restrictive as the one
2439 recorded in the hash table, in which case we have no
2440 hope of synthesizing a multiplication. Just
2441 return. */
2442 return;
2443
2444 /* If we get here, COST_LIMIT is less restrictive than the
2445 one recorded in the hash table, so we may be able to
2446 synthesize a multiplication. Proceed as if we didn't
2447 have the cache entry. */
2448 }
2449 else
2450 {
2451 if (CHEAPER_MULT_COST (cost_limit, &alg_hash[hash_index].cost))
2452 /* The cached algorithm shows that this multiplication
2453 requires more cost than COST_LIMIT. Just return. This
2454 way, we don't clobber this cache entry with
2455 alg_impossible but retain useful information. */
2456 return;
2457
2458 cache_hit = true;
2459
2460 switch (cache_alg)
2461 {
2462 case alg_shift:
2463 goto do_alg_shift;
2464
2465 case alg_add_t_m2:
2466 case alg_sub_t_m2:
2467 goto do_alg_addsub_t_m2;
2468
2469 case alg_add_factor:
2470 case alg_sub_factor:
2471 goto do_alg_addsub_factor;
2472
2473 case alg_add_t2_m:
2474 goto do_alg_add_t2_m;
2475
2476 case alg_sub_t2_m:
2477 goto do_alg_sub_t2_m;
2478
2479 default:
2480 gcc_unreachable ();
2481 }
2482 }
2483 }
2484
2485 /* If we have a group of zero bits at the low-order part of T, try
2486 multiplying by the remaining bits and then doing a shift. */
2487
2488 if ((t & 1) == 0)
2489 {
2490 do_alg_shift:
2491 m = floor_log2 (t & -t); /* m = number of low zero bits */
2492 if (m < maxm)
2493 {
2494 q = t >> m;
2495 /* The function expand_shift will choose between a shift and
2496 a sequence of additions, so the observed cost is given as
2497 MIN (m * add_cost[speed][mode], shift_cost[speed][mode][m]). */
2498 op_cost = m * add_cost[speed][mode];
2499 if (shift_cost[speed][mode][m] < op_cost)
2500 op_cost = shift_cost[speed][mode][m];
2501 new_limit.cost = best_cost.cost - op_cost;
2502 new_limit.latency = best_cost.latency - op_cost;
2503 synth_mult (alg_in, q, &new_limit, mode);
2504
2505 alg_in->cost.cost += op_cost;
2506 alg_in->cost.latency += op_cost;
2507 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2508 {
2509 struct algorithm *x;
2510 best_cost = alg_in->cost;
2511 x = alg_in, alg_in = best_alg, best_alg = x;
2512 best_alg->log[best_alg->ops] = m;
2513 best_alg->op[best_alg->ops] = alg_shift;
2514 }
2515
2516 /* See if treating ORIG_T as a signed number yields a better
2517 sequence. Try this sequence only for a negative ORIG_T
2518 as it would be useless for a non-negative ORIG_T. */
2519 if ((HOST_WIDE_INT) orig_t < 0)
2520 {
2521 /* Shift ORIG_T as follows because a right shift of a
2522 negative-valued signed type is implementation
2523 defined. */
2524 q = ~(~orig_t >> m);
2525 /* The function expand_shift will choose between a shift
2526 and a sequence of additions, so the observed cost is
2527 given as MIN (m * add_cost[speed][mode],
2528 shift_cost[speed][mode][m]). */
2529 op_cost = m * add_cost[speed][mode];
2530 if (shift_cost[speed][mode][m] < op_cost)
2531 op_cost = shift_cost[speed][mode][m];
2532 new_limit.cost = best_cost.cost - op_cost;
2533 new_limit.latency = best_cost.latency - op_cost;
2534 synth_mult (alg_in, q, &new_limit, mode);
2535
2536 alg_in->cost.cost += op_cost;
2537 alg_in->cost.latency += op_cost;
2538 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2539 {
2540 struct algorithm *x;
2541 best_cost = alg_in->cost;
2542 x = alg_in, alg_in = best_alg, best_alg = x;
2543 best_alg->log[best_alg->ops] = m;
2544 best_alg->op[best_alg->ops] = alg_shift;
2545 }
2546 }
2547 }
2548 if (cache_hit)
2549 goto done;
2550 }
2551
2552 /* If we have an odd number, add or subtract one. */
2553 if ((t & 1) != 0)
2554 {
2555 unsigned HOST_WIDE_INT w;
2556
2557 do_alg_addsub_t_m2:
2558 for (w = 1; (w & t) != 0; w <<= 1)
2559 ;
2560 /* If T was -1, then W will be zero after the loop. This is another
2561 case where T ends with ...111. Handling this with (T + 1) and
2562 subtract 1 produces slightly better code and results in algorithm
2563 selection much faster than treating it like the ...0111 case
2564 below. */
2565 if (w == 0
2566 || (w > 2
2567 /* Reject the case where t is 3.
2568 Thus we prefer addition in that case. */
2569 && t != 3))
2570 {
2571 /* T ends with ...111. Multiply by (T + 1) and subtract 1. */
2572
2573 op_cost = add_cost[speed][mode];
2574 new_limit.cost = best_cost.cost - op_cost;
2575 new_limit.latency = best_cost.latency - op_cost;
2576 synth_mult (alg_in, t + 1, &new_limit, mode);
2577
2578 alg_in->cost.cost += op_cost;
2579 alg_in->cost.latency += op_cost;
2580 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2581 {
2582 struct algorithm *x;
2583 best_cost = alg_in->cost;
2584 x = alg_in, alg_in = best_alg, best_alg = x;
2585 best_alg->log[best_alg->ops] = 0;
2586 best_alg->op[best_alg->ops] = alg_sub_t_m2;
2587 }
2588 }
2589 else
2590 {
2591 /* T ends with ...01 or ...011. Multiply by (T - 1) and add 1. */
2592
2593 op_cost = add_cost[speed][mode];
2594 new_limit.cost = best_cost.cost - op_cost;
2595 new_limit.latency = best_cost.latency - op_cost;
2596 synth_mult (alg_in, t - 1, &new_limit, mode);
2597
2598 alg_in->cost.cost += op_cost;
2599 alg_in->cost.latency += op_cost;
2600 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2601 {
2602 struct algorithm *x;
2603 best_cost = alg_in->cost;
2604 x = alg_in, alg_in = best_alg, best_alg = x;
2605 best_alg->log[best_alg->ops] = 0;
2606 best_alg->op[best_alg->ops] = alg_add_t_m2;
2607 }
2608 }
2609
2610 /* We may be able to calculate a * -7, a * -15, a * -31, etc
2611 quickly with a - a * n for some appropriate constant n. */
2612 m = exact_log2 (-orig_t + 1);
2613 if (m >= 0 && m < maxm)
2614 {
2615 op_cost = shiftsub1_cost[speed][mode][m];
2616 new_limit.cost = best_cost.cost - op_cost;
2617 new_limit.latency = best_cost.latency - op_cost;
2618 synth_mult (alg_in, (unsigned HOST_WIDE_INT) (-orig_t + 1) >> m, &new_limit, mode);
2619
2620 alg_in->cost.cost += op_cost;
2621 alg_in->cost.latency += op_cost;
2622 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2623 {
2624 struct algorithm *x;
2625 best_cost = alg_in->cost;
2626 x = alg_in, alg_in = best_alg, best_alg = x;
2627 best_alg->log[best_alg->ops] = m;
2628 best_alg->op[best_alg->ops] = alg_sub_t_m2;
2629 }
2630 }
2631
2632 if (cache_hit)
2633 goto done;
2634 }
2635
2636 /* Look for factors of t of the form
2637 t = q(2**m +- 1), 2 <= m <= floor(log2(t - 1)).
2638 If we find such a factor, we can multiply by t using an algorithm that
2639 multiplies by q, shift the result by m and add/subtract it to itself.
2640
2641 We search for large factors first and loop down, even if large factors
2642 are less probable than small; if we find a large factor we will find a
2643 good sequence quickly, and therefore be able to prune (by decreasing
2644 COST_LIMIT) the search. */
2645
2646 do_alg_addsub_factor:
2647 for (m = floor_log2 (t - 1); m >= 2; m--)
2648 {
2649 unsigned HOST_WIDE_INT d;
2650
2651 d = ((unsigned HOST_WIDE_INT) 1 << m) + 1;
2652 if (t % d == 0 && t > d && m < maxm
2653 && (!cache_hit || cache_alg == alg_add_factor))
2654 {
2655 /* If the target has a cheap shift-and-add instruction use
2656 that in preference to a shift insn followed by an add insn.
2657 Assume that the shift-and-add is "atomic" with a latency
2658 equal to its cost, otherwise assume that on superscalar
2659 hardware the shift may be executed concurrently with the
2660 earlier steps in the algorithm. */
2661 op_cost = add_cost[speed][mode] + shift_cost[speed][mode][m];
2662 if (shiftadd_cost[speed][mode][m] < op_cost)
2663 {
2664 op_cost = shiftadd_cost[speed][mode][m];
2665 op_latency = op_cost;
2666 }
2667 else
2668 op_latency = add_cost[speed][mode];
2669
2670 new_limit.cost = best_cost.cost - op_cost;
2671 new_limit.latency = best_cost.latency - op_latency;
2672 synth_mult (alg_in, t / d, &new_limit, mode);
2673
2674 alg_in->cost.cost += op_cost;
2675 alg_in->cost.latency += op_latency;
2676 if (alg_in->cost.latency < op_cost)
2677 alg_in->cost.latency = op_cost;
2678 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2679 {
2680 struct algorithm *x;
2681 best_cost = alg_in->cost;
2682 x = alg_in, alg_in = best_alg, best_alg = x;
2683 best_alg->log[best_alg->ops] = m;
2684 best_alg->op[best_alg->ops] = alg_add_factor;
2685 }
2686 /* Other factors will have been taken care of in the recursion. */
2687 break;
2688 }
2689
2690 d = ((unsigned HOST_WIDE_INT) 1 << m) - 1;
2691 if (t % d == 0 && t > d && m < maxm
2692 && (!cache_hit || cache_alg == alg_sub_factor))
2693 {
2694 /* If the target has a cheap shift-and-subtract insn use
2695 that in preference to a shift insn followed by a sub insn.
2696 Assume that the shift-and-sub is "atomic" with a latency
2697 equal to it's cost, otherwise assume that on superscalar
2698 hardware the shift may be executed concurrently with the
2699 earlier steps in the algorithm. */
2700 op_cost = add_cost[speed][mode] + shift_cost[speed][mode][m];
2701 if (shiftsub0_cost[speed][mode][m] < op_cost)
2702 {
2703 op_cost = shiftsub0_cost[speed][mode][m];
2704 op_latency = op_cost;
2705 }
2706 else
2707 op_latency = add_cost[speed][mode];
2708
2709 new_limit.cost = best_cost.cost - op_cost;
2710 new_limit.latency = best_cost.latency - op_latency;
2711 synth_mult (alg_in, t / d, &new_limit, mode);
2712
2713 alg_in->cost.cost += op_cost;
2714 alg_in->cost.latency += op_latency;
2715 if (alg_in->cost.latency < op_cost)
2716 alg_in->cost.latency = op_cost;
2717 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2718 {
2719 struct algorithm *x;
2720 best_cost = alg_in->cost;
2721 x = alg_in, alg_in = best_alg, best_alg = x;
2722 best_alg->log[best_alg->ops] = m;
2723 best_alg->op[best_alg->ops] = alg_sub_factor;
2724 }
2725 break;
2726 }
2727 }
2728 if (cache_hit)
2729 goto done;
2730
2731 /* Try shift-and-add (load effective address) instructions,
2732 i.e. do a*3, a*5, a*9. */
2733 if ((t & 1) != 0)
2734 {
2735 do_alg_add_t2_m:
2736 q = t - 1;
2737 q = q & -q;
2738 m = exact_log2 (q);
2739 if (m >= 0 && m < maxm)
2740 {
2741 op_cost = shiftadd_cost[speed][mode][m];
2742 new_limit.cost = best_cost.cost - op_cost;
2743 new_limit.latency = best_cost.latency - op_cost;
2744 synth_mult (alg_in, (t - 1) >> m, &new_limit, mode);
2745
2746 alg_in->cost.cost += op_cost;
2747 alg_in->cost.latency += op_cost;
2748 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2749 {
2750 struct algorithm *x;
2751 best_cost = alg_in->cost;
2752 x = alg_in, alg_in = best_alg, best_alg = x;
2753 best_alg->log[best_alg->ops] = m;
2754 best_alg->op[best_alg->ops] = alg_add_t2_m;
2755 }
2756 }
2757 if (cache_hit)
2758 goto done;
2759
2760 do_alg_sub_t2_m:
2761 q = t + 1;
2762 q = q & -q;
2763 m = exact_log2 (q);
2764 if (m >= 0 && m < maxm)
2765 {
2766 op_cost = shiftsub0_cost[speed][mode][m];
2767 new_limit.cost = best_cost.cost - op_cost;
2768 new_limit.latency = best_cost.latency - op_cost;
2769 synth_mult (alg_in, (t + 1) >> m, &new_limit, mode);
2770
2771 alg_in->cost.cost += op_cost;
2772 alg_in->cost.latency += op_cost;
2773 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2774 {
2775 struct algorithm *x;
2776 best_cost = alg_in->cost;
2777 x = alg_in, alg_in = best_alg, best_alg = x;
2778 best_alg->log[best_alg->ops] = m;
2779 best_alg->op[best_alg->ops] = alg_sub_t2_m;
2780 }
2781 }
2782 if (cache_hit)
2783 goto done;
2784 }
2785
2786 done:
2787 /* If best_cost has not decreased, we have not found any algorithm. */
2788 if (!CHEAPER_MULT_COST (&best_cost, cost_limit))
2789 {
2790 /* We failed to find an algorithm. Record alg_impossible for
2791 this case (that is, <T, MODE, COST_LIMIT>) so that next time
2792 we are asked to find an algorithm for T within the same or
2793 lower COST_LIMIT, we can immediately return to the
2794 caller. */
2795 alg_hash[hash_index].t = t;
2796 alg_hash[hash_index].mode = mode;
2797 alg_hash[hash_index].speed = speed;
2798 alg_hash[hash_index].alg = alg_impossible;
2799 alg_hash[hash_index].cost = *cost_limit;
2800 return;
2801 }
2802
2803 /* Cache the result. */
2804 if (!cache_hit)
2805 {
2806 alg_hash[hash_index].t = t;
2807 alg_hash[hash_index].mode = mode;
2808 alg_hash[hash_index].speed = speed;
2809 alg_hash[hash_index].alg = best_alg->op[best_alg->ops];
2810 alg_hash[hash_index].cost.cost = best_cost.cost;
2811 alg_hash[hash_index].cost.latency = best_cost.latency;
2812 }
2813
2814 /* If we are getting a too long sequence for `struct algorithm'
2815 to record, make this search fail. */
2816 if (best_alg->ops == MAX_BITS_PER_WORD)
2817 return;
2818
2819 /* Copy the algorithm from temporary space to the space at alg_out.
2820 We avoid using structure assignment because the majority of
2821 best_alg is normally undefined, and this is a critical function. */
2822 alg_out->ops = best_alg->ops + 1;
2823 alg_out->cost = best_cost;
2824 memcpy (alg_out->op, best_alg->op,
2825 alg_out->ops * sizeof *alg_out->op);
2826 memcpy (alg_out->log, best_alg->log,
2827 alg_out->ops * sizeof *alg_out->log);
2828 }
2829 \f
2830 /* Find the cheapest way of multiplying a value of mode MODE by VAL.
2831 Try three variations:
2832
2833 - a shift/add sequence based on VAL itself
2834 - a shift/add sequence based on -VAL, followed by a negation
2835 - a shift/add sequence based on VAL - 1, followed by an addition.
2836
2837 Return true if the cheapest of these cost less than MULT_COST,
2838 describing the algorithm in *ALG and final fixup in *VARIANT. */
2839
2840 static bool
2841 choose_mult_variant (enum machine_mode mode, HOST_WIDE_INT val,
2842 struct algorithm *alg, enum mult_variant *variant,
2843 int mult_cost)
2844 {
2845 struct algorithm alg2;
2846 struct mult_cost limit;
2847 int op_cost;
2848 bool speed = optimize_insn_for_speed_p ();
2849
2850 /* Fail quickly for impossible bounds. */
2851 if (mult_cost < 0)
2852 return false;
2853
2854 /* Ensure that mult_cost provides a reasonable upper bound.
2855 Any constant multiplication can be performed with less
2856 than 2 * bits additions. */
2857 op_cost = 2 * GET_MODE_BITSIZE (mode) * add_cost[speed][mode];
2858 if (mult_cost > op_cost)
2859 mult_cost = op_cost;
2860
2861 *variant = basic_variant;
2862 limit.cost = mult_cost;
2863 limit.latency = mult_cost;
2864 synth_mult (alg, val, &limit, mode);
2865
2866 /* This works only if the inverted value actually fits in an
2867 `unsigned int' */
2868 if (HOST_BITS_PER_INT >= GET_MODE_BITSIZE (mode))
2869 {
2870 op_cost = neg_cost[speed][mode];
2871 if (MULT_COST_LESS (&alg->cost, mult_cost))
2872 {
2873 limit.cost = alg->cost.cost - op_cost;
2874 limit.latency = alg->cost.latency - op_cost;
2875 }
2876 else
2877 {
2878 limit.cost = mult_cost - op_cost;
2879 limit.latency = mult_cost - op_cost;
2880 }
2881
2882 synth_mult (&alg2, -val, &limit, mode);
2883 alg2.cost.cost += op_cost;
2884 alg2.cost.latency += op_cost;
2885 if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
2886 *alg = alg2, *variant = negate_variant;
2887 }
2888
2889 /* This proves very useful for division-by-constant. */
2890 op_cost = add_cost[speed][mode];
2891 if (MULT_COST_LESS (&alg->cost, mult_cost))
2892 {
2893 limit.cost = alg->cost.cost - op_cost;
2894 limit.latency = alg->cost.latency - op_cost;
2895 }
2896 else
2897 {
2898 limit.cost = mult_cost - op_cost;
2899 limit.latency = mult_cost - op_cost;
2900 }
2901
2902 synth_mult (&alg2, val - 1, &limit, mode);
2903 alg2.cost.cost += op_cost;
2904 alg2.cost.latency += op_cost;
2905 if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
2906 *alg = alg2, *variant = add_variant;
2907
2908 return MULT_COST_LESS (&alg->cost, mult_cost);
2909 }
2910
2911 /* A subroutine of expand_mult, used for constant multiplications.
2912 Multiply OP0 by VAL in mode MODE, storing the result in TARGET if
2913 convenient. Use the shift/add sequence described by ALG and apply
2914 the final fixup specified by VARIANT. */
2915
2916 static rtx
2917 expand_mult_const (enum machine_mode mode, rtx op0, HOST_WIDE_INT val,
2918 rtx target, const struct algorithm *alg,
2919 enum mult_variant variant)
2920 {
2921 HOST_WIDE_INT val_so_far;
2922 rtx insn, accum, tem;
2923 int opno;
2924 enum machine_mode nmode;
2925
2926 /* Avoid referencing memory over and over and invalid sharing
2927 on SUBREGs. */
2928 op0 = force_reg (mode, op0);
2929
2930 /* ACCUM starts out either as OP0 or as a zero, depending on
2931 the first operation. */
2932
2933 if (alg->op[0] == alg_zero)
2934 {
2935 accum = copy_to_mode_reg (mode, const0_rtx);
2936 val_so_far = 0;
2937 }
2938 else if (alg->op[0] == alg_m)
2939 {
2940 accum = copy_to_mode_reg (mode, op0);
2941 val_so_far = 1;
2942 }
2943 else
2944 gcc_unreachable ();
2945
2946 for (opno = 1; opno < alg->ops; opno++)
2947 {
2948 int log = alg->log[opno];
2949 rtx shift_subtarget = optimize ? 0 : accum;
2950 rtx add_target
2951 = (opno == alg->ops - 1 && target != 0 && variant != add_variant
2952 && !optimize)
2953 ? target : 0;
2954 rtx accum_target = optimize ? 0 : accum;
2955 rtx accum_inner;
2956
2957 switch (alg->op[opno])
2958 {
2959 case alg_shift:
2960 tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
2961 /* REG_EQUAL note will be attached to the following insn. */
2962 emit_move_insn (accum, tem);
2963 val_so_far <<= log;
2964 break;
2965
2966 case alg_add_t_m2:
2967 tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
2968 accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
2969 add_target ? add_target : accum_target);
2970 val_so_far += (HOST_WIDE_INT) 1 << log;
2971 break;
2972
2973 case alg_sub_t_m2:
2974 tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
2975 accum = force_operand (gen_rtx_MINUS (mode, accum, tem),
2976 add_target ? add_target : accum_target);
2977 val_so_far -= (HOST_WIDE_INT) 1 << log;
2978 break;
2979
2980 case alg_add_t2_m:
2981 accum = expand_shift (LSHIFT_EXPR, mode, accum,
2982 log, shift_subtarget, 0);
2983 accum = force_operand (gen_rtx_PLUS (mode, accum, op0),
2984 add_target ? add_target : accum_target);
2985 val_so_far = (val_so_far << log) + 1;
2986 break;
2987
2988 case alg_sub_t2_m:
2989 accum = expand_shift (LSHIFT_EXPR, mode, accum,
2990 log, shift_subtarget, 0);
2991 accum = force_operand (gen_rtx_MINUS (mode, accum, op0),
2992 add_target ? add_target : accum_target);
2993 val_so_far = (val_so_far << log) - 1;
2994 break;
2995
2996 case alg_add_factor:
2997 tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
2998 accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
2999 add_target ? add_target : accum_target);
3000 val_so_far += val_so_far << log;
3001 break;
3002
3003 case alg_sub_factor:
3004 tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3005 accum = force_operand (gen_rtx_MINUS (mode, tem, accum),
3006 (add_target
3007 ? add_target : (optimize ? 0 : tem)));
3008 val_so_far = (val_so_far << log) - val_so_far;
3009 break;
3010
3011 default:
3012 gcc_unreachable ();
3013 }
3014
3015 /* Write a REG_EQUAL note on the last insn so that we can cse
3016 multiplication sequences. Note that if ACCUM is a SUBREG,
3017 we've set the inner register and must properly indicate
3018 that. */
3019
3020 tem = op0, nmode = mode;
3021 accum_inner = accum;
3022 if (GET_CODE (accum) == SUBREG)
3023 {
3024 accum_inner = SUBREG_REG (accum);
3025 nmode = GET_MODE (accum_inner);
3026 tem = gen_lowpart (nmode, op0);
3027 }
3028
3029 insn = get_last_insn ();
3030 set_dst_reg_note (insn, REG_EQUAL,
3031 gen_rtx_MULT (nmode, tem, GEN_INT (val_so_far)),
3032 accum_inner);
3033 }
3034
3035 if (variant == negate_variant)
3036 {
3037 val_so_far = -val_so_far;
3038 accum = expand_unop (mode, neg_optab, accum, target, 0);
3039 }
3040 else if (variant == add_variant)
3041 {
3042 val_so_far = val_so_far + 1;
3043 accum = force_operand (gen_rtx_PLUS (mode, accum, op0), target);
3044 }
3045
3046 /* Compare only the bits of val and val_so_far that are significant
3047 in the result mode, to avoid sign-/zero-extension confusion. */
3048 val &= GET_MODE_MASK (mode);
3049 val_so_far &= GET_MODE_MASK (mode);
3050 gcc_assert (val == val_so_far);
3051
3052 return accum;
3053 }
3054
3055 /* Perform a multiplication and return an rtx for the result.
3056 MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3057 TARGET is a suggestion for where to store the result (an rtx).
3058
3059 We check specially for a constant integer as OP1.
3060 If you want this check for OP0 as well, then before calling
3061 you should swap the two operands if OP0 would be constant. */
3062
3063 rtx
3064 expand_mult (enum machine_mode mode, rtx op0, rtx op1, rtx target,
3065 int unsignedp)
3066 {
3067 enum mult_variant variant;
3068 struct algorithm algorithm;
3069 int max_cost;
3070 bool speed = optimize_insn_for_speed_p ();
3071
3072 /* Handling const0_rtx here allows us to use zero as a rogue value for
3073 coeff below. */
3074 if (op1 == const0_rtx)
3075 return const0_rtx;
3076 if (op1 == const1_rtx)
3077 return op0;
3078 if (op1 == constm1_rtx)
3079 return expand_unop (mode,
3080 GET_MODE_CLASS (mode) == MODE_INT
3081 && !unsignedp && flag_trapv
3082 ? negv_optab : neg_optab,
3083 op0, target, 0);
3084
3085 /* These are the operations that are potentially turned into a sequence
3086 of shifts and additions. */
3087 if (SCALAR_INT_MODE_P (mode)
3088 && (unsignedp || !flag_trapv))
3089 {
3090 HOST_WIDE_INT coeff = 0;
3091 rtx fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3092
3093 /* synth_mult does an `unsigned int' multiply. As long as the mode is
3094 less than or equal in size to `unsigned int' this doesn't matter.
3095 If the mode is larger than `unsigned int', then synth_mult works
3096 only if the constant value exactly fits in an `unsigned int' without
3097 any truncation. This means that multiplying by negative values does
3098 not work; results are off by 2^32 on a 32 bit machine. */
3099
3100 if (CONST_INT_P (op1))
3101 {
3102 /* Attempt to handle multiplication of DImode values by negative
3103 coefficients, by performing the multiplication by a positive
3104 multiplier and then inverting the result. */
3105 if (INTVAL (op1) < 0
3106 && GET_MODE_BITSIZE (mode) > HOST_BITS_PER_WIDE_INT)
3107 {
3108 /* Its safe to use -INTVAL (op1) even for INT_MIN, as the
3109 result is interpreted as an unsigned coefficient.
3110 Exclude cost of op0 from max_cost to match the cost
3111 calculation of the synth_mult. */
3112 max_cost = (set_src_cost (gen_rtx_MULT (mode, fake_reg, op1),
3113 speed)
3114 - neg_cost[speed][mode]);
3115 if (max_cost > 0
3116 && choose_mult_variant (mode, -INTVAL (op1), &algorithm,
3117 &variant, max_cost))
3118 {
3119 rtx temp = expand_mult_const (mode, op0, -INTVAL (op1),
3120 NULL_RTX, &algorithm,
3121 variant);
3122 return expand_unop (mode, neg_optab, temp, target, 0);
3123 }
3124 }
3125 else coeff = INTVAL (op1);
3126 }
3127 else if (GET_CODE (op1) == CONST_DOUBLE)
3128 {
3129 /* If we are multiplying in DImode, it may still be a win
3130 to try to work with shifts and adds. */
3131 if (CONST_DOUBLE_HIGH (op1) == 0
3132 && CONST_DOUBLE_LOW (op1) > 0)
3133 coeff = CONST_DOUBLE_LOW (op1);
3134 else if (CONST_DOUBLE_LOW (op1) == 0
3135 && EXACT_POWER_OF_2_OR_ZERO_P (CONST_DOUBLE_HIGH (op1)))
3136 {
3137 int shift = floor_log2 (CONST_DOUBLE_HIGH (op1))
3138 + HOST_BITS_PER_WIDE_INT;
3139 return expand_shift (LSHIFT_EXPR, mode, op0,
3140 shift, target, unsignedp);
3141 }
3142 }
3143
3144 /* We used to test optimize here, on the grounds that it's better to
3145 produce a smaller program when -O is not used. But this causes
3146 such a terrible slowdown sometimes that it seems better to always
3147 use synth_mult. */
3148 if (coeff != 0)
3149 {
3150 /* Special case powers of two. */
3151 if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3152 return expand_shift (LSHIFT_EXPR, mode, op0,
3153 floor_log2 (coeff), target, unsignedp);
3154
3155 /* Exclude cost of op0 from max_cost to match the cost
3156 calculation of the synth_mult. */
3157 max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, op1), speed);
3158 if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3159 max_cost))
3160 return expand_mult_const (mode, op0, coeff, target,
3161 &algorithm, variant);
3162 }
3163 }
3164
3165 if (GET_CODE (op0) == CONST_DOUBLE)
3166 {
3167 rtx temp = op0;
3168 op0 = op1;
3169 op1 = temp;
3170 }
3171
3172 /* Expand x*2.0 as x+x. */
3173 if (GET_CODE (op1) == CONST_DOUBLE
3174 && SCALAR_FLOAT_MODE_P (mode))
3175 {
3176 REAL_VALUE_TYPE d;
3177 REAL_VALUE_FROM_CONST_DOUBLE (d, op1);
3178
3179 if (REAL_VALUES_EQUAL (d, dconst2))
3180 {
3181 op0 = force_reg (GET_MODE (op0), op0);
3182 return expand_binop (mode, add_optab, op0, op0,
3183 target, unsignedp, OPTAB_LIB_WIDEN);
3184 }
3185 }
3186
3187 /* This used to use umul_optab if unsigned, but for non-widening multiply
3188 there is no difference between signed and unsigned. */
3189 op0 = expand_binop (mode,
3190 ! unsignedp
3191 && flag_trapv && (GET_MODE_CLASS(mode) == MODE_INT)
3192 ? smulv_optab : smul_optab,
3193 op0, op1, target, unsignedp, OPTAB_LIB_WIDEN);
3194 gcc_assert (op0);
3195 return op0;
3196 }
3197
3198 /* Perform a widening multiplication and return an rtx for the result.
3199 MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3200 TARGET is a suggestion for where to store the result (an rtx).
3201 THIS_OPTAB is the optab we should use, it must be either umul_widen_optab
3202 or smul_widen_optab.
3203
3204 We check specially for a constant integer as OP1, comparing the
3205 cost of a widening multiply against the cost of a sequence of shifts
3206 and adds. */
3207
3208 rtx
3209 expand_widening_mult (enum machine_mode mode, rtx op0, rtx op1, rtx target,
3210 int unsignedp, optab this_optab)
3211 {
3212 bool speed = optimize_insn_for_speed_p ();
3213 rtx cop1;
3214
3215 if (CONST_INT_P (op1)
3216 && GET_MODE (op0) != VOIDmode
3217 && (cop1 = convert_modes (mode, GET_MODE (op0), op1,
3218 this_optab == umul_widen_optab))
3219 && CONST_INT_P (cop1)
3220 && (INTVAL (cop1) >= 0
3221 || HWI_COMPUTABLE_MODE_P (mode)))
3222 {
3223 HOST_WIDE_INT coeff = INTVAL (cop1);
3224 int max_cost;
3225 enum mult_variant variant;
3226 struct algorithm algorithm;
3227
3228 /* Special case powers of two. */
3229 if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3230 {
3231 op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3232 return expand_shift (LSHIFT_EXPR, mode, op0,
3233 floor_log2 (coeff), target, unsignedp);
3234 }
3235
3236 /* Exclude cost of op0 from max_cost to match the cost
3237 calculation of the synth_mult. */
3238 max_cost = mul_widen_cost[speed][mode];
3239 if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3240 max_cost))
3241 {
3242 op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3243 return expand_mult_const (mode, op0, coeff, target,
3244 &algorithm, variant);
3245 }
3246 }
3247 return expand_binop (mode, this_optab, op0, op1, target,
3248 unsignedp, OPTAB_LIB_WIDEN);
3249 }
3250 \f
3251 /* Return the smallest n such that 2**n >= X. */
3252
3253 int
3254 ceil_log2 (unsigned HOST_WIDE_INT x)
3255 {
3256 return floor_log2 (x - 1) + 1;
3257 }
3258
3259 /* Choose a minimal N + 1 bit approximation to 1/D that can be used to
3260 replace division by D, and put the least significant N bits of the result
3261 in *MULTIPLIER_PTR and return the most significant bit.
3262
3263 The width of operations is N (should be <= HOST_BITS_PER_WIDE_INT), the
3264 needed precision is in PRECISION (should be <= N).
3265
3266 PRECISION should be as small as possible so this function can choose
3267 multiplier more freely.
3268
3269 The rounded-up logarithm of D is placed in *lgup_ptr. A shift count that
3270 is to be used for a final right shift is placed in *POST_SHIFT_PTR.
3271
3272 Using this function, x/D will be equal to (x * m) >> (*POST_SHIFT_PTR),
3273 where m is the full HOST_BITS_PER_WIDE_INT + 1 bit multiplier. */
3274
3275 static
3276 unsigned HOST_WIDE_INT
3277 choose_multiplier (unsigned HOST_WIDE_INT d, int n, int precision,
3278 rtx *multiplier_ptr, int *post_shift_ptr, int *lgup_ptr)
3279 {
3280 HOST_WIDE_INT mhigh_hi, mlow_hi;
3281 unsigned HOST_WIDE_INT mhigh_lo, mlow_lo;
3282 int lgup, post_shift;
3283 int pow, pow2;
3284 unsigned HOST_WIDE_INT nl, dummy1;
3285 HOST_WIDE_INT nh, dummy2;
3286
3287 /* lgup = ceil(log2(divisor)); */
3288 lgup = ceil_log2 (d);
3289
3290 gcc_assert (lgup <= n);
3291
3292 pow = n + lgup;
3293 pow2 = n + lgup - precision;
3294
3295 /* We could handle this with some effort, but this case is much
3296 better handled directly with a scc insn, so rely on caller using
3297 that. */
3298 gcc_assert (pow != 2 * HOST_BITS_PER_WIDE_INT);
3299
3300 /* mlow = 2^(N + lgup)/d */
3301 if (pow >= HOST_BITS_PER_WIDE_INT)
3302 {
3303 nh = (HOST_WIDE_INT) 1 << (pow - HOST_BITS_PER_WIDE_INT);
3304 nl = 0;
3305 }
3306 else
3307 {
3308 nh = 0;
3309 nl = (unsigned HOST_WIDE_INT) 1 << pow;
3310 }
3311 div_and_round_double (TRUNC_DIV_EXPR, 1, nl, nh, d, (HOST_WIDE_INT) 0,
3312 &mlow_lo, &mlow_hi, &dummy1, &dummy2);
3313
3314 /* mhigh = (2^(N + lgup) + 2^N + lgup - precision)/d */
3315 if (pow2 >= HOST_BITS_PER_WIDE_INT)
3316 nh |= (HOST_WIDE_INT) 1 << (pow2 - HOST_BITS_PER_WIDE_INT);
3317 else
3318 nl |= (unsigned HOST_WIDE_INT) 1 << pow2;
3319 div_and_round_double (TRUNC_DIV_EXPR, 1, nl, nh, d, (HOST_WIDE_INT) 0,
3320 &mhigh_lo, &mhigh_hi, &dummy1, &dummy2);
3321
3322 gcc_assert (!mhigh_hi || nh - d < d);
3323 gcc_assert (mhigh_hi <= 1 && mlow_hi <= 1);
3324 /* Assert that mlow < mhigh. */
3325 gcc_assert (mlow_hi < mhigh_hi
3326 || (mlow_hi == mhigh_hi && mlow_lo < mhigh_lo));
3327
3328 /* If precision == N, then mlow, mhigh exceed 2^N
3329 (but they do not exceed 2^(N+1)). */
3330
3331 /* Reduce to lowest terms. */
3332 for (post_shift = lgup; post_shift > 0; post_shift--)
3333 {
3334 unsigned HOST_WIDE_INT ml_lo = (mlow_hi << (HOST_BITS_PER_WIDE_INT - 1)) | (mlow_lo >> 1);
3335 unsigned HOST_WIDE_INT mh_lo = (mhigh_hi << (HOST_BITS_PER_WIDE_INT - 1)) | (mhigh_lo >> 1);
3336 if (ml_lo >= mh_lo)
3337 break;
3338
3339 mlow_hi = 0;
3340 mlow_lo = ml_lo;
3341 mhigh_hi = 0;
3342 mhigh_lo = mh_lo;
3343 }
3344
3345 *post_shift_ptr = post_shift;
3346 *lgup_ptr = lgup;
3347 if (n < HOST_BITS_PER_WIDE_INT)
3348 {
3349 unsigned HOST_WIDE_INT mask = ((unsigned HOST_WIDE_INT) 1 << n) - 1;
3350 *multiplier_ptr = GEN_INT (mhigh_lo & mask);
3351 return mhigh_lo >= mask;
3352 }
3353 else
3354 {
3355 *multiplier_ptr = GEN_INT (mhigh_lo);
3356 return mhigh_hi;
3357 }
3358 }
3359
3360 /* Compute the inverse of X mod 2**n, i.e., find Y such that X * Y is
3361 congruent to 1 (mod 2**N). */
3362
3363 static unsigned HOST_WIDE_INT
3364 invert_mod2n (unsigned HOST_WIDE_INT x, int n)
3365 {
3366 /* Solve x*y == 1 (mod 2^n), where x is odd. Return y. */
3367
3368 /* The algorithm notes that the choice y = x satisfies
3369 x*y == 1 mod 2^3, since x is assumed odd.
3370 Each iteration doubles the number of bits of significance in y. */
3371
3372 unsigned HOST_WIDE_INT mask;
3373 unsigned HOST_WIDE_INT y = x;
3374 int nbit = 3;
3375
3376 mask = (n == HOST_BITS_PER_WIDE_INT
3377 ? ~(unsigned HOST_WIDE_INT) 0
3378 : ((unsigned HOST_WIDE_INT) 1 << n) - 1);
3379
3380 while (nbit < n)
3381 {
3382 y = y * (2 - x*y) & mask; /* Modulo 2^N */
3383 nbit *= 2;
3384 }
3385 return y;
3386 }
3387
3388 /* Emit code to adjust ADJ_OPERAND after multiplication of wrong signedness
3389 flavor of OP0 and OP1. ADJ_OPERAND is already the high half of the
3390 product OP0 x OP1. If UNSIGNEDP is nonzero, adjust the signed product
3391 to become unsigned, if UNSIGNEDP is zero, adjust the unsigned product to
3392 become signed.
3393
3394 The result is put in TARGET if that is convenient.
3395
3396 MODE is the mode of operation. */
3397
3398 rtx
3399 expand_mult_highpart_adjust (enum machine_mode mode, rtx adj_operand, rtx op0,
3400 rtx op1, rtx target, int unsignedp)
3401 {
3402 rtx tem;
3403 enum rtx_code adj_code = unsignedp ? PLUS : MINUS;
3404
3405 tem = expand_shift (RSHIFT_EXPR, mode, op0,
3406 GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3407 tem = expand_and (mode, tem, op1, NULL_RTX);
3408 adj_operand
3409 = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3410 adj_operand);
3411
3412 tem = expand_shift (RSHIFT_EXPR, mode, op1,
3413 GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3414 tem = expand_and (mode, tem, op0, NULL_RTX);
3415 target = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3416 target);
3417
3418 return target;
3419 }
3420
3421 /* Subroutine of expand_mult_highpart. Return the MODE high part of OP. */
3422
3423 static rtx
3424 extract_high_half (enum machine_mode mode, rtx op)
3425 {
3426 enum machine_mode wider_mode;
3427
3428 if (mode == word_mode)
3429 return gen_highpart (mode, op);
3430
3431 gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3432
3433 wider_mode = GET_MODE_WIDER_MODE (mode);
3434 op = expand_shift (RSHIFT_EXPR, wider_mode, op,
3435 GET_MODE_BITSIZE (mode), 0, 1);
3436 return convert_modes (mode, wider_mode, op, 0);
3437 }
3438
3439 /* Like expand_mult_highpart, but only consider using a multiplication
3440 optab. OP1 is an rtx for the constant operand. */
3441
3442 static rtx
3443 expand_mult_highpart_optab (enum machine_mode mode, rtx op0, rtx op1,
3444 rtx target, int unsignedp, int max_cost)
3445 {
3446 rtx narrow_op1 = gen_int_mode (INTVAL (op1), mode);
3447 enum machine_mode wider_mode;
3448 optab moptab;
3449 rtx tem;
3450 int size;
3451 bool speed = optimize_insn_for_speed_p ();
3452
3453 gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3454
3455 wider_mode = GET_MODE_WIDER_MODE (mode);
3456 size = GET_MODE_BITSIZE (mode);
3457
3458 /* Firstly, try using a multiplication insn that only generates the needed
3459 high part of the product, and in the sign flavor of unsignedp. */
3460 if (mul_highpart_cost[speed][mode] < max_cost)
3461 {
3462 moptab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
3463 tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3464 unsignedp, OPTAB_DIRECT);
3465 if (tem)
3466 return tem;
3467 }
3468
3469 /* Secondly, same as above, but use sign flavor opposite of unsignedp.
3470 Need to adjust the result after the multiplication. */
3471 if (size - 1 < BITS_PER_WORD
3472 && (mul_highpart_cost[speed][mode] + 2 * shift_cost[speed][mode][size-1]
3473 + 4 * add_cost[speed][mode] < max_cost))
3474 {
3475 moptab = unsignedp ? smul_highpart_optab : umul_highpart_optab;
3476 tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3477 unsignedp, OPTAB_DIRECT);
3478 if (tem)
3479 /* We used the wrong signedness. Adjust the result. */
3480 return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3481 tem, unsignedp);
3482 }
3483
3484 /* Try widening multiplication. */
3485 moptab = unsignedp ? umul_widen_optab : smul_widen_optab;
3486 if (widening_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3487 && mul_widen_cost[speed][wider_mode] < max_cost)
3488 {
3489 tem = expand_binop (wider_mode, moptab, op0, narrow_op1, 0,
3490 unsignedp, OPTAB_WIDEN);
3491 if (tem)
3492 return extract_high_half (mode, tem);
3493 }
3494
3495 /* Try widening the mode and perform a non-widening multiplication. */
3496 if (optab_handler (smul_optab, wider_mode) != CODE_FOR_nothing
3497 && size - 1 < BITS_PER_WORD
3498 && mul_cost[speed][wider_mode] + shift_cost[speed][mode][size-1] < max_cost)
3499 {
3500 rtx insns, wop0, wop1;
3501
3502 /* We need to widen the operands, for example to ensure the
3503 constant multiplier is correctly sign or zero extended.
3504 Use a sequence to clean-up any instructions emitted by
3505 the conversions if things don't work out. */
3506 start_sequence ();
3507 wop0 = convert_modes (wider_mode, mode, op0, unsignedp);
3508 wop1 = convert_modes (wider_mode, mode, op1, unsignedp);
3509 tem = expand_binop (wider_mode, smul_optab, wop0, wop1, 0,
3510 unsignedp, OPTAB_WIDEN);
3511 insns = get_insns ();
3512 end_sequence ();
3513
3514 if (tem)
3515 {
3516 emit_insn (insns);
3517 return extract_high_half (mode, tem);
3518 }
3519 }
3520
3521 /* Try widening multiplication of opposite signedness, and adjust. */
3522 moptab = unsignedp ? smul_widen_optab : umul_widen_optab;
3523 if (widening_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3524 && size - 1 < BITS_PER_WORD
3525 && (mul_widen_cost[speed][wider_mode] + 2 * shift_cost[speed][mode][size-1]
3526 + 4 * add_cost[speed][mode] < max_cost))
3527 {
3528 tem = expand_binop (wider_mode, moptab, op0, narrow_op1,
3529 NULL_RTX, ! unsignedp, OPTAB_WIDEN);
3530 if (tem != 0)
3531 {
3532 tem = extract_high_half (mode, tem);
3533 /* We used the wrong signedness. Adjust the result. */
3534 return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3535 target, unsignedp);
3536 }
3537 }
3538
3539 return 0;
3540 }
3541
3542 /* Emit code to multiply OP0 and OP1 (where OP1 is an integer constant),
3543 putting the high half of the result in TARGET if that is convenient,
3544 and return where the result is. If the operation can not be performed,
3545 0 is returned.
3546
3547 MODE is the mode of operation and result.
3548
3549 UNSIGNEDP nonzero means unsigned multiply.
3550
3551 MAX_COST is the total allowed cost for the expanded RTL. */
3552
3553 static rtx
3554 expand_mult_highpart (enum machine_mode mode, rtx op0, rtx op1,
3555 rtx target, int unsignedp, int max_cost)
3556 {
3557 enum machine_mode wider_mode = GET_MODE_WIDER_MODE (mode);
3558 unsigned HOST_WIDE_INT cnst1;
3559 int extra_cost;
3560 bool sign_adjust = false;
3561 enum mult_variant variant;
3562 struct algorithm alg;
3563 rtx tem;
3564 bool speed = optimize_insn_for_speed_p ();
3565
3566 gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3567 /* We can't support modes wider than HOST_BITS_PER_INT. */
3568 gcc_assert (HWI_COMPUTABLE_MODE_P (mode));
3569
3570 cnst1 = INTVAL (op1) & GET_MODE_MASK (mode);
3571
3572 /* We can't optimize modes wider than BITS_PER_WORD.
3573 ??? We might be able to perform double-word arithmetic if
3574 mode == word_mode, however all the cost calculations in
3575 synth_mult etc. assume single-word operations. */
3576 if (GET_MODE_BITSIZE (wider_mode) > BITS_PER_WORD)
3577 return expand_mult_highpart_optab (mode, op0, op1, target,
3578 unsignedp, max_cost);
3579
3580 extra_cost = shift_cost[speed][mode][GET_MODE_BITSIZE (mode) - 1];
3581
3582 /* Check whether we try to multiply by a negative constant. */
3583 if (!unsignedp && ((cnst1 >> (GET_MODE_BITSIZE (mode) - 1)) & 1))
3584 {
3585 sign_adjust = true;
3586 extra_cost += add_cost[speed][mode];
3587 }
3588
3589 /* See whether shift/add multiplication is cheap enough. */
3590 if (choose_mult_variant (wider_mode, cnst1, &alg, &variant,
3591 max_cost - extra_cost))
3592 {
3593 /* See whether the specialized multiplication optabs are
3594 cheaper than the shift/add version. */
3595 tem = expand_mult_highpart_optab (mode, op0, op1, target, unsignedp,
3596 alg.cost.cost + extra_cost);
3597 if (tem)
3598 return tem;
3599
3600 tem = convert_to_mode (wider_mode, op0, unsignedp);
3601 tem = expand_mult_const (wider_mode, tem, cnst1, 0, &alg, variant);
3602 tem = extract_high_half (mode, tem);
3603
3604 /* Adjust result for signedness. */
3605 if (sign_adjust)
3606 tem = force_operand (gen_rtx_MINUS (mode, tem, op0), tem);
3607
3608 return tem;
3609 }
3610 return expand_mult_highpart_optab (mode, op0, op1, target,
3611 unsignedp, max_cost);
3612 }
3613
3614
3615 /* Expand signed modulus of OP0 by a power of two D in mode MODE. */
3616
3617 static rtx
3618 expand_smod_pow2 (enum machine_mode mode, rtx op0, HOST_WIDE_INT d)
3619 {
3620 unsigned HOST_WIDE_INT masklow, maskhigh;
3621 rtx result, temp, shift, label;
3622 int logd;
3623
3624 logd = floor_log2 (d);
3625 result = gen_reg_rtx (mode);
3626
3627 /* Avoid conditional branches when they're expensive. */
3628 if (BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2
3629 && optimize_insn_for_speed_p ())
3630 {
3631 rtx signmask = emit_store_flag (result, LT, op0, const0_rtx,
3632 mode, 0, -1);
3633 if (signmask)
3634 {
3635 signmask = force_reg (mode, signmask);
3636 masklow = ((HOST_WIDE_INT) 1 << logd) - 1;
3637 shift = GEN_INT (GET_MODE_BITSIZE (mode) - logd);
3638
3639 /* Use the rtx_cost of a LSHIFTRT instruction to determine
3640 which instruction sequence to use. If logical right shifts
3641 are expensive the use 2 XORs, 2 SUBs and an AND, otherwise
3642 use a LSHIFTRT, 1 ADD, 1 SUB and an AND. */
3643
3644 temp = gen_rtx_LSHIFTRT (mode, result, shift);
3645 if (optab_handler (lshr_optab, mode) == CODE_FOR_nothing
3646 || (set_src_cost (temp, optimize_insn_for_speed_p ())
3647 > COSTS_N_INSNS (2)))
3648 {
3649 temp = expand_binop (mode, xor_optab, op0, signmask,
3650 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3651 temp = expand_binop (mode, sub_optab, temp, signmask,
3652 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3653 temp = expand_binop (mode, and_optab, temp, GEN_INT (masklow),
3654 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3655 temp = expand_binop (mode, xor_optab, temp, signmask,
3656 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3657 temp = expand_binop (mode, sub_optab, temp, signmask,
3658 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3659 }
3660 else
3661 {
3662 signmask = expand_binop (mode, lshr_optab, signmask, shift,
3663 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3664 signmask = force_reg (mode, signmask);
3665
3666 temp = expand_binop (mode, add_optab, op0, signmask,
3667 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3668 temp = expand_binop (mode, and_optab, temp, GEN_INT (masklow),
3669 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3670 temp = expand_binop (mode, sub_optab, temp, signmask,
3671 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3672 }
3673 return temp;
3674 }
3675 }
3676
3677 /* Mask contains the mode's signbit and the significant bits of the
3678 modulus. By including the signbit in the operation, many targets
3679 can avoid an explicit compare operation in the following comparison
3680 against zero. */
3681
3682 masklow = ((HOST_WIDE_INT) 1 << logd) - 1;
3683 if (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
3684 {
3685 masklow |= (HOST_WIDE_INT) -1 << (GET_MODE_BITSIZE (mode) - 1);
3686 maskhigh = -1;
3687 }
3688 else
3689 maskhigh = (HOST_WIDE_INT) -1
3690 << (GET_MODE_BITSIZE (mode) - HOST_BITS_PER_WIDE_INT - 1);
3691
3692 temp = expand_binop (mode, and_optab, op0,
3693 immed_double_const (masklow, maskhigh, mode),
3694 result, 1, OPTAB_LIB_WIDEN);
3695 if (temp != result)
3696 emit_move_insn (result, temp);
3697
3698 label = gen_label_rtx ();
3699 do_cmp_and_jump (result, const0_rtx, GE, mode, label);
3700
3701 temp = expand_binop (mode, sub_optab, result, const1_rtx, result,
3702 0, OPTAB_LIB_WIDEN);
3703 masklow = (HOST_WIDE_INT) -1 << logd;
3704 maskhigh = -1;
3705 temp = expand_binop (mode, ior_optab, temp,
3706 immed_double_const (masklow, maskhigh, mode),
3707 result, 1, OPTAB_LIB_WIDEN);
3708 temp = expand_binop (mode, add_optab, temp, const1_rtx, result,
3709 0, OPTAB_LIB_WIDEN);
3710 if (temp != result)
3711 emit_move_insn (result, temp);
3712 emit_label (label);
3713 return result;
3714 }
3715
3716 /* Expand signed division of OP0 by a power of two D in mode MODE.
3717 This routine is only called for positive values of D. */
3718
3719 static rtx
3720 expand_sdiv_pow2 (enum machine_mode mode, rtx op0, HOST_WIDE_INT d)
3721 {
3722 rtx temp, label;
3723 int logd;
3724
3725 logd = floor_log2 (d);
3726
3727 if (d == 2
3728 && BRANCH_COST (optimize_insn_for_speed_p (),
3729 false) >= 1)
3730 {
3731 temp = gen_reg_rtx (mode);
3732 temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, 1);
3733 temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3734 0, OPTAB_LIB_WIDEN);
3735 return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3736 }
3737
3738 #ifdef HAVE_conditional_move
3739 if (BRANCH_COST (optimize_insn_for_speed_p (), false)
3740 >= 2)
3741 {
3742 rtx temp2;
3743
3744 /* ??? emit_conditional_move forces a stack adjustment via
3745 compare_from_rtx so, if the sequence is discarded, it will
3746 be lost. Do it now instead. */
3747 do_pending_stack_adjust ();
3748
3749 start_sequence ();
3750 temp2 = copy_to_mode_reg (mode, op0);
3751 temp = expand_binop (mode, add_optab, temp2, GEN_INT (d-1),
3752 NULL_RTX, 0, OPTAB_LIB_WIDEN);
3753 temp = force_reg (mode, temp);
3754
3755 /* Construct "temp2 = (temp2 < 0) ? temp : temp2". */
3756 temp2 = emit_conditional_move (temp2, LT, temp2, const0_rtx,
3757 mode, temp, temp2, mode, 0);
3758 if (temp2)
3759 {
3760 rtx seq = get_insns ();
3761 end_sequence ();
3762 emit_insn (seq);
3763 return expand_shift (RSHIFT_EXPR, mode, temp2, logd, NULL_RTX, 0);
3764 }
3765 end_sequence ();
3766 }
3767 #endif
3768
3769 if (BRANCH_COST (optimize_insn_for_speed_p (),
3770 false) >= 2)
3771 {
3772 int ushift = GET_MODE_BITSIZE (mode) - logd;
3773
3774 temp = gen_reg_rtx (mode);
3775 temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, -1);
3776 if (shift_cost[optimize_insn_for_speed_p ()][mode][ushift] > COSTS_N_INSNS (1))
3777 temp = expand_binop (mode, and_optab, temp, GEN_INT (d - 1),
3778 NULL_RTX, 0, OPTAB_LIB_WIDEN);
3779 else
3780 temp = expand_shift (RSHIFT_EXPR, mode, temp,
3781 ushift, NULL_RTX, 1);
3782 temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3783 0, OPTAB_LIB_WIDEN);
3784 return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3785 }
3786
3787 label = gen_label_rtx ();
3788 temp = copy_to_mode_reg (mode, op0);
3789 do_cmp_and_jump (temp, const0_rtx, GE, mode, label);
3790 expand_inc (temp, GEN_INT (d - 1));
3791 emit_label (label);
3792 return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3793 }
3794 \f
3795 /* Emit the code to divide OP0 by OP1, putting the result in TARGET
3796 if that is convenient, and returning where the result is.
3797 You may request either the quotient or the remainder as the result;
3798 specify REM_FLAG nonzero to get the remainder.
3799
3800 CODE is the expression code for which kind of division this is;
3801 it controls how rounding is done. MODE is the machine mode to use.
3802 UNSIGNEDP nonzero means do unsigned division. */
3803
3804 /* ??? For CEIL_MOD_EXPR, can compute incorrect remainder with ANDI
3805 and then correct it by or'ing in missing high bits
3806 if result of ANDI is nonzero.
3807 For ROUND_MOD_EXPR, can use ANDI and then sign-extend the result.
3808 This could optimize to a bfexts instruction.
3809 But C doesn't use these operations, so their optimizations are
3810 left for later. */
3811 /* ??? For modulo, we don't actually need the highpart of the first product,
3812 the low part will do nicely. And for small divisors, the second multiply
3813 can also be a low-part only multiply or even be completely left out.
3814 E.g. to calculate the remainder of a division by 3 with a 32 bit
3815 multiply, multiply with 0x55555556 and extract the upper two bits;
3816 the result is exact for inputs up to 0x1fffffff.
3817 The input range can be reduced by using cross-sum rules.
3818 For odd divisors >= 3, the following table gives right shift counts
3819 so that if a number is shifted by an integer multiple of the given
3820 amount, the remainder stays the same:
3821 2, 4, 3, 6, 10, 12, 4, 8, 18, 6, 11, 20, 18, 0, 5, 10, 12, 0, 12, 20,
3822 14, 12, 23, 21, 8, 0, 20, 18, 0, 0, 6, 12, 0, 22, 0, 18, 20, 30, 0, 0,
3823 0, 8, 0, 11, 12, 10, 36, 0, 30, 0, 0, 12, 0, 0, 0, 0, 44, 12, 24, 0,
3824 20, 0, 7, 14, 0, 18, 36, 0, 0, 46, 60, 0, 42, 0, 15, 24, 20, 0, 0, 33,
3825 0, 20, 0, 0, 18, 0, 60, 0, 0, 0, 0, 0, 40, 18, 0, 0, 12
3826
3827 Cross-sum rules for even numbers can be derived by leaving as many bits
3828 to the right alone as the divisor has zeros to the right.
3829 E.g. if x is an unsigned 32 bit number:
3830 (x mod 12) == (((x & 1023) + ((x >> 8) & ~3)) * 0x15555558 >> 2 * 3) >> 28
3831 */
3832
3833 rtx
3834 expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode,
3835 rtx op0, rtx op1, rtx target, int unsignedp)
3836 {
3837 enum machine_mode compute_mode;
3838 rtx tquotient;
3839 rtx quotient = 0, remainder = 0;
3840 rtx last;
3841 int size;
3842 rtx insn;
3843 optab optab1, optab2;
3844 int op1_is_constant, op1_is_pow2 = 0;
3845 int max_cost, extra_cost;
3846 static HOST_WIDE_INT last_div_const = 0;
3847 static HOST_WIDE_INT ext_op1;
3848 bool speed = optimize_insn_for_speed_p ();
3849
3850 op1_is_constant = CONST_INT_P (op1);
3851 if (op1_is_constant)
3852 {
3853 ext_op1 = INTVAL (op1);
3854 if (unsignedp)
3855 ext_op1 &= GET_MODE_MASK (mode);
3856 op1_is_pow2 = ((EXACT_POWER_OF_2_OR_ZERO_P (ext_op1)
3857 || (! unsignedp && EXACT_POWER_OF_2_OR_ZERO_P (-ext_op1))));
3858 }
3859
3860 /*
3861 This is the structure of expand_divmod:
3862
3863 First comes code to fix up the operands so we can perform the operations
3864 correctly and efficiently.
3865
3866 Second comes a switch statement with code specific for each rounding mode.
3867 For some special operands this code emits all RTL for the desired
3868 operation, for other cases, it generates only a quotient and stores it in
3869 QUOTIENT. The case for trunc division/remainder might leave quotient = 0,
3870 to indicate that it has not done anything.
3871
3872 Last comes code that finishes the operation. If QUOTIENT is set and
3873 REM_FLAG is set, the remainder is computed as OP0 - QUOTIENT * OP1. If
3874 QUOTIENT is not set, it is computed using trunc rounding.
3875
3876 We try to generate special code for division and remainder when OP1 is a
3877 constant. If |OP1| = 2**n we can use shifts and some other fast
3878 operations. For other values of OP1, we compute a carefully selected
3879 fixed-point approximation m = 1/OP1, and generate code that multiplies OP0
3880 by m.
3881
3882 In all cases but EXACT_DIV_EXPR, this multiplication requires the upper
3883 half of the product. Different strategies for generating the product are
3884 implemented in expand_mult_highpart.
3885
3886 If what we actually want is the remainder, we generate that by another
3887 by-constant multiplication and a subtraction. */
3888
3889 /* We shouldn't be called with OP1 == const1_rtx, but some of the
3890 code below will malfunction if we are, so check here and handle
3891 the special case if so. */
3892 if (op1 == const1_rtx)
3893 return rem_flag ? const0_rtx : op0;
3894
3895 /* When dividing by -1, we could get an overflow.
3896 negv_optab can handle overflows. */
3897 if (! unsignedp && op1 == constm1_rtx)
3898 {
3899 if (rem_flag)
3900 return const0_rtx;
3901 return expand_unop (mode, flag_trapv && GET_MODE_CLASS(mode) == MODE_INT
3902 ? negv_optab : neg_optab, op0, target, 0);
3903 }
3904
3905 if (target
3906 /* Don't use the function value register as a target
3907 since we have to read it as well as write it,
3908 and function-inlining gets confused by this. */
3909 && ((REG_P (target) && REG_FUNCTION_VALUE_P (target))
3910 /* Don't clobber an operand while doing a multi-step calculation. */
3911 || ((rem_flag || op1_is_constant)
3912 && (reg_mentioned_p (target, op0)
3913 || (MEM_P (op0) && MEM_P (target))))
3914 || reg_mentioned_p (target, op1)
3915 || (MEM_P (op1) && MEM_P (target))))
3916 target = 0;
3917
3918 /* Get the mode in which to perform this computation. Normally it will
3919 be MODE, but sometimes we can't do the desired operation in MODE.
3920 If so, pick a wider mode in which we can do the operation. Convert
3921 to that mode at the start to avoid repeated conversions.
3922
3923 First see what operations we need. These depend on the expression
3924 we are evaluating. (We assume that divxx3 insns exist under the
3925 same conditions that modxx3 insns and that these insns don't normally
3926 fail. If these assumptions are not correct, we may generate less
3927 efficient code in some cases.)
3928
3929 Then see if we find a mode in which we can open-code that operation
3930 (either a division, modulus, or shift). Finally, check for the smallest
3931 mode for which we can do the operation with a library call. */
3932
3933 /* We might want to refine this now that we have division-by-constant
3934 optimization. Since expand_mult_highpart tries so many variants, it is
3935 not straightforward to generalize this. Maybe we should make an array
3936 of possible modes in init_expmed? Save this for GCC 2.7. */
3937
3938 optab1 = ((op1_is_pow2 && op1 != const0_rtx)
3939 ? (unsignedp ? lshr_optab : ashr_optab)
3940 : (unsignedp ? udiv_optab : sdiv_optab));
3941 optab2 = ((op1_is_pow2 && op1 != const0_rtx)
3942 ? optab1
3943 : (unsignedp ? udivmod_optab : sdivmod_optab));
3944
3945 for (compute_mode = mode; compute_mode != VOIDmode;
3946 compute_mode = GET_MODE_WIDER_MODE (compute_mode))
3947 if (optab_handler (optab1, compute_mode) != CODE_FOR_nothing
3948 || optab_handler (optab2, compute_mode) != CODE_FOR_nothing)
3949 break;
3950
3951 if (compute_mode == VOIDmode)
3952 for (compute_mode = mode; compute_mode != VOIDmode;
3953 compute_mode = GET_MODE_WIDER_MODE (compute_mode))
3954 if (optab_libfunc (optab1, compute_mode)
3955 || optab_libfunc (optab2, compute_mode))
3956 break;
3957
3958 /* If we still couldn't find a mode, use MODE, but expand_binop will
3959 probably die. */
3960 if (compute_mode == VOIDmode)
3961 compute_mode = mode;
3962
3963 if (target && GET_MODE (target) == compute_mode)
3964 tquotient = target;
3965 else
3966 tquotient = gen_reg_rtx (compute_mode);
3967
3968 size = GET_MODE_BITSIZE (compute_mode);
3969 #if 0
3970 /* It should be possible to restrict the precision to GET_MODE_BITSIZE
3971 (mode), and thereby get better code when OP1 is a constant. Do that
3972 later. It will require going over all usages of SIZE below. */
3973 size = GET_MODE_BITSIZE (mode);
3974 #endif
3975
3976 /* Only deduct something for a REM if the last divide done was
3977 for a different constant. Then set the constant of the last
3978 divide. */
3979 max_cost = unsignedp ? udiv_cost[speed][compute_mode] : sdiv_cost[speed][compute_mode];
3980 if (rem_flag && ! (last_div_const != 0 && op1_is_constant
3981 && INTVAL (op1) == last_div_const))
3982 max_cost -= mul_cost[speed][compute_mode] + add_cost[speed][compute_mode];
3983
3984 last_div_const = ! rem_flag && op1_is_constant ? INTVAL (op1) : 0;
3985
3986 /* Now convert to the best mode to use. */
3987 if (compute_mode != mode)
3988 {
3989 op0 = convert_modes (compute_mode, mode, op0, unsignedp);
3990 op1 = convert_modes (compute_mode, mode, op1, unsignedp);
3991
3992 /* convert_modes may have placed op1 into a register, so we
3993 must recompute the following. */
3994 op1_is_constant = CONST_INT_P (op1);
3995 op1_is_pow2 = (op1_is_constant
3996 && ((EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
3997 || (! unsignedp
3998 && EXACT_POWER_OF_2_OR_ZERO_P (-INTVAL (op1)))))) ;
3999 }
4000
4001 /* If one of the operands is a volatile MEM, copy it into a register. */
4002
4003 if (MEM_P (op0) && MEM_VOLATILE_P (op0))
4004 op0 = force_reg (compute_mode, op0);
4005 if (MEM_P (op1) && MEM_VOLATILE_P (op1))
4006 op1 = force_reg (compute_mode, op1);
4007
4008 /* If we need the remainder or if OP1 is constant, we need to
4009 put OP0 in a register in case it has any queued subexpressions. */
4010 if (rem_flag || op1_is_constant)
4011 op0 = force_reg (compute_mode, op0);
4012
4013 last = get_last_insn ();
4014
4015 /* Promote floor rounding to trunc rounding for unsigned operations. */
4016 if (unsignedp)
4017 {
4018 if (code == FLOOR_DIV_EXPR)
4019 code = TRUNC_DIV_EXPR;
4020 if (code == FLOOR_MOD_EXPR)
4021 code = TRUNC_MOD_EXPR;
4022 if (code == EXACT_DIV_EXPR && op1_is_pow2)
4023 code = TRUNC_DIV_EXPR;
4024 }
4025
4026 if (op1 != const0_rtx)
4027 switch (code)
4028 {
4029 case TRUNC_MOD_EXPR:
4030 case TRUNC_DIV_EXPR:
4031 if (op1_is_constant)
4032 {
4033 if (unsignedp)
4034 {
4035 unsigned HOST_WIDE_INT mh;
4036 int pre_shift, post_shift;
4037 int dummy;
4038 rtx ml;
4039 unsigned HOST_WIDE_INT d = (INTVAL (op1)
4040 & GET_MODE_MASK (compute_mode));
4041
4042 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4043 {
4044 pre_shift = floor_log2 (d);
4045 if (rem_flag)
4046 {
4047 remainder
4048 = expand_binop (compute_mode, and_optab, op0,
4049 GEN_INT (((HOST_WIDE_INT) 1 << pre_shift) - 1),
4050 remainder, 1,
4051 OPTAB_LIB_WIDEN);
4052 if (remainder)
4053 return gen_lowpart (mode, remainder);
4054 }
4055 quotient = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4056 pre_shift, tquotient, 1);
4057 }
4058 else if (size <= HOST_BITS_PER_WIDE_INT)
4059 {
4060 if (d >= ((unsigned HOST_WIDE_INT) 1 << (size - 1)))
4061 {
4062 /* Most significant bit of divisor is set; emit an scc
4063 insn. */
4064 quotient = emit_store_flag_force (tquotient, GEU, op0, op1,
4065 compute_mode, 1, 1);
4066 }
4067 else
4068 {
4069 /* Find a suitable multiplier and right shift count
4070 instead of multiplying with D. */
4071
4072 mh = choose_multiplier (d, size, size,
4073 &ml, &post_shift, &dummy);
4074
4075 /* If the suggested multiplier is more than SIZE bits,
4076 we can do better for even divisors, using an
4077 initial right shift. */
4078 if (mh != 0 && (d & 1) == 0)
4079 {
4080 pre_shift = floor_log2 (d & -d);
4081 mh = choose_multiplier (d >> pre_shift, size,
4082 size - pre_shift,
4083 &ml, &post_shift, &dummy);
4084 gcc_assert (!mh);
4085 }
4086 else
4087 pre_shift = 0;
4088
4089 if (mh != 0)
4090 {
4091 rtx t1, t2, t3, t4;
4092
4093 if (post_shift - 1 >= BITS_PER_WORD)
4094 goto fail1;
4095
4096 extra_cost
4097 = (shift_cost[speed][compute_mode][post_shift - 1]
4098 + shift_cost[speed][compute_mode][1]
4099 + 2 * add_cost[speed][compute_mode]);
4100 t1 = expand_mult_highpart (compute_mode, op0, ml,
4101 NULL_RTX, 1,
4102 max_cost - extra_cost);
4103 if (t1 == 0)
4104 goto fail1;
4105 t2 = force_operand (gen_rtx_MINUS (compute_mode,
4106 op0, t1),
4107 NULL_RTX);
4108 t3 = expand_shift (RSHIFT_EXPR, compute_mode,
4109 t2, 1, NULL_RTX, 1);
4110 t4 = force_operand (gen_rtx_PLUS (compute_mode,
4111 t1, t3),
4112 NULL_RTX);
4113 quotient = expand_shift
4114 (RSHIFT_EXPR, compute_mode, t4,
4115 post_shift - 1, tquotient, 1);
4116 }
4117 else
4118 {
4119 rtx t1, t2;
4120
4121 if (pre_shift >= BITS_PER_WORD
4122 || post_shift >= BITS_PER_WORD)
4123 goto fail1;
4124
4125 t1 = expand_shift
4126 (RSHIFT_EXPR, compute_mode, op0,
4127 pre_shift, NULL_RTX, 1);
4128 extra_cost
4129 = (shift_cost[speed][compute_mode][pre_shift]
4130 + shift_cost[speed][compute_mode][post_shift]);
4131 t2 = expand_mult_highpart (compute_mode, t1, ml,
4132 NULL_RTX, 1,
4133 max_cost - extra_cost);
4134 if (t2 == 0)
4135 goto fail1;
4136 quotient = expand_shift
4137 (RSHIFT_EXPR, compute_mode, t2,
4138 post_shift, tquotient, 1);
4139 }
4140 }
4141 }
4142 else /* Too wide mode to use tricky code */
4143 break;
4144
4145 insn = get_last_insn ();
4146 if (insn != last)
4147 set_dst_reg_note (insn, REG_EQUAL,
4148 gen_rtx_UDIV (compute_mode, op0, op1),
4149 quotient);
4150 }
4151 else /* TRUNC_DIV, signed */
4152 {
4153 unsigned HOST_WIDE_INT ml;
4154 int lgup, post_shift;
4155 rtx mlr;
4156 HOST_WIDE_INT d = INTVAL (op1);
4157 unsigned HOST_WIDE_INT abs_d;
4158
4159 /* Since d might be INT_MIN, we have to cast to
4160 unsigned HOST_WIDE_INT before negating to avoid
4161 undefined signed overflow. */
4162 abs_d = (d >= 0
4163 ? (unsigned HOST_WIDE_INT) d
4164 : - (unsigned HOST_WIDE_INT) d);
4165
4166 /* n rem d = n rem -d */
4167 if (rem_flag && d < 0)
4168 {
4169 d = abs_d;
4170 op1 = gen_int_mode (abs_d, compute_mode);
4171 }
4172
4173 if (d == 1)
4174 quotient = op0;
4175 else if (d == -1)
4176 quotient = expand_unop (compute_mode, neg_optab, op0,
4177 tquotient, 0);
4178 else if (HOST_BITS_PER_WIDE_INT >= size
4179 && abs_d == (unsigned HOST_WIDE_INT) 1 << (size - 1))
4180 {
4181 /* This case is not handled correctly below. */
4182 quotient = emit_store_flag (tquotient, EQ, op0, op1,
4183 compute_mode, 1, 1);
4184 if (quotient == 0)
4185 goto fail1;
4186 }
4187 else if (EXACT_POWER_OF_2_OR_ZERO_P (d)
4188 && (rem_flag ? smod_pow2_cheap[speed][compute_mode]
4189 : sdiv_pow2_cheap[speed][compute_mode])
4190 /* We assume that cheap metric is true if the
4191 optab has an expander for this mode. */
4192 && ((optab_handler ((rem_flag ? smod_optab
4193 : sdiv_optab),
4194 compute_mode)
4195 != CODE_FOR_nothing)
4196 || (optab_handler (sdivmod_optab,
4197 compute_mode)
4198 != CODE_FOR_nothing)))
4199 ;
4200 else if (EXACT_POWER_OF_2_OR_ZERO_P (abs_d))
4201 {
4202 if (rem_flag)
4203 {
4204 remainder = expand_smod_pow2 (compute_mode, op0, d);
4205 if (remainder)
4206 return gen_lowpart (mode, remainder);
4207 }
4208
4209 if (sdiv_pow2_cheap[speed][compute_mode]
4210 && ((optab_handler (sdiv_optab, compute_mode)
4211 != CODE_FOR_nothing)
4212 || (optab_handler (sdivmod_optab, compute_mode)
4213 != CODE_FOR_nothing)))
4214 quotient = expand_divmod (0, TRUNC_DIV_EXPR,
4215 compute_mode, op0,
4216 gen_int_mode (abs_d,
4217 compute_mode),
4218 NULL_RTX, 0);
4219 else
4220 quotient = expand_sdiv_pow2 (compute_mode, op0, abs_d);
4221
4222 /* We have computed OP0 / abs(OP1). If OP1 is negative,
4223 negate the quotient. */
4224 if (d < 0)
4225 {
4226 insn = get_last_insn ();
4227 if (insn != last
4228 && abs_d < ((unsigned HOST_WIDE_INT) 1
4229 << (HOST_BITS_PER_WIDE_INT - 1)))
4230 set_dst_reg_note (insn, REG_EQUAL,
4231 gen_rtx_DIV (compute_mode, op0,
4232 gen_int_mode
4233 (abs_d,
4234 compute_mode)),
4235 quotient);
4236
4237 quotient = expand_unop (compute_mode, neg_optab,
4238 quotient, quotient, 0);
4239 }
4240 }
4241 else if (size <= HOST_BITS_PER_WIDE_INT)
4242 {
4243 choose_multiplier (abs_d, size, size - 1,
4244 &mlr, &post_shift, &lgup);
4245 ml = (unsigned HOST_WIDE_INT) INTVAL (mlr);
4246 if (ml < (unsigned HOST_WIDE_INT) 1 << (size - 1))
4247 {
4248 rtx t1, t2, t3;
4249
4250 if (post_shift >= BITS_PER_WORD
4251 || size - 1 >= BITS_PER_WORD)
4252 goto fail1;
4253
4254 extra_cost = (shift_cost[speed][compute_mode][post_shift]
4255 + shift_cost[speed][compute_mode][size - 1]
4256 + add_cost[speed][compute_mode]);
4257 t1 = expand_mult_highpart (compute_mode, op0, mlr,
4258 NULL_RTX, 0,
4259 max_cost - extra_cost);
4260 if (t1 == 0)
4261 goto fail1;
4262 t2 = expand_shift
4263 (RSHIFT_EXPR, compute_mode, t1,
4264 post_shift, NULL_RTX, 0);
4265 t3 = expand_shift
4266 (RSHIFT_EXPR, compute_mode, op0,
4267 size - 1, NULL_RTX, 0);
4268 if (d < 0)
4269 quotient
4270 = force_operand (gen_rtx_MINUS (compute_mode,
4271 t3, t2),
4272 tquotient);
4273 else
4274 quotient
4275 = force_operand (gen_rtx_MINUS (compute_mode,
4276 t2, t3),
4277 tquotient);
4278 }
4279 else
4280 {
4281 rtx t1, t2, t3, t4;
4282
4283 if (post_shift >= BITS_PER_WORD
4284 || size - 1 >= BITS_PER_WORD)
4285 goto fail1;
4286
4287 ml |= (~(unsigned HOST_WIDE_INT) 0) << (size - 1);
4288 mlr = gen_int_mode (ml, compute_mode);
4289 extra_cost = (shift_cost[speed][compute_mode][post_shift]
4290 + shift_cost[speed][compute_mode][size - 1]
4291 + 2 * add_cost[speed][compute_mode]);
4292 t1 = expand_mult_highpart (compute_mode, op0, mlr,
4293 NULL_RTX, 0,
4294 max_cost - extra_cost);
4295 if (t1 == 0)
4296 goto fail1;
4297 t2 = force_operand (gen_rtx_PLUS (compute_mode,
4298 t1, op0),
4299 NULL_RTX);
4300 t3 = expand_shift
4301 (RSHIFT_EXPR, compute_mode, t2,
4302 post_shift, NULL_RTX, 0);
4303 t4 = expand_shift
4304 (RSHIFT_EXPR, compute_mode, op0,
4305 size - 1, NULL_RTX, 0);
4306 if (d < 0)
4307 quotient
4308 = force_operand (gen_rtx_MINUS (compute_mode,
4309 t4, t3),
4310 tquotient);
4311 else
4312 quotient
4313 = force_operand (gen_rtx_MINUS (compute_mode,
4314 t3, t4),
4315 tquotient);
4316 }
4317 }
4318 else /* Too wide mode to use tricky code */
4319 break;
4320
4321 insn = get_last_insn ();
4322 if (insn != last)
4323 set_dst_reg_note (insn, REG_EQUAL,
4324 gen_rtx_DIV (compute_mode, op0, op1),
4325 quotient);
4326 }
4327 break;
4328 }
4329 fail1:
4330 delete_insns_since (last);
4331 break;
4332
4333 case FLOOR_DIV_EXPR:
4334 case FLOOR_MOD_EXPR:
4335 /* We will come here only for signed operations. */
4336 if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4337 {
4338 unsigned HOST_WIDE_INT mh;
4339 int pre_shift, lgup, post_shift;
4340 HOST_WIDE_INT d = INTVAL (op1);
4341 rtx ml;
4342
4343 if (d > 0)
4344 {
4345 /* We could just as easily deal with negative constants here,
4346 but it does not seem worth the trouble for GCC 2.6. */
4347 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4348 {
4349 pre_shift = floor_log2 (d);
4350 if (rem_flag)
4351 {
4352 remainder = expand_binop (compute_mode, and_optab, op0,
4353 GEN_INT (((HOST_WIDE_INT) 1 << pre_shift) - 1),
4354 remainder, 0, OPTAB_LIB_WIDEN);
4355 if (remainder)
4356 return gen_lowpart (mode, remainder);
4357 }
4358 quotient = expand_shift
4359 (RSHIFT_EXPR, compute_mode, op0,
4360 pre_shift, tquotient, 0);
4361 }
4362 else
4363 {
4364 rtx t1, t2, t3, t4;
4365
4366 mh = choose_multiplier (d, size, size - 1,
4367 &ml, &post_shift, &lgup);
4368 gcc_assert (!mh);
4369
4370 if (post_shift < BITS_PER_WORD
4371 && size - 1 < BITS_PER_WORD)
4372 {
4373 t1 = expand_shift
4374 (RSHIFT_EXPR, compute_mode, op0,
4375 size - 1, NULL_RTX, 0);
4376 t2 = expand_binop (compute_mode, xor_optab, op0, t1,
4377 NULL_RTX, 0, OPTAB_WIDEN);
4378 extra_cost = (shift_cost[speed][compute_mode][post_shift]
4379 + shift_cost[speed][compute_mode][size - 1]
4380 + 2 * add_cost[speed][compute_mode]);
4381 t3 = expand_mult_highpart (compute_mode, t2, ml,
4382 NULL_RTX, 1,
4383 max_cost - extra_cost);
4384 if (t3 != 0)
4385 {
4386 t4 = expand_shift
4387 (RSHIFT_EXPR, compute_mode, t3,
4388 post_shift, NULL_RTX, 1);
4389 quotient = expand_binop (compute_mode, xor_optab,
4390 t4, t1, tquotient, 0,
4391 OPTAB_WIDEN);
4392 }
4393 }
4394 }
4395 }
4396 else
4397 {
4398 rtx nsign, t1, t2, t3, t4;
4399 t1 = force_operand (gen_rtx_PLUS (compute_mode,
4400 op0, constm1_rtx), NULL_RTX);
4401 t2 = expand_binop (compute_mode, ior_optab, op0, t1, NULL_RTX,
4402 0, OPTAB_WIDEN);
4403 nsign = expand_shift
4404 (RSHIFT_EXPR, compute_mode, t2,
4405 size - 1, NULL_RTX, 0);
4406 t3 = force_operand (gen_rtx_MINUS (compute_mode, t1, nsign),
4407 NULL_RTX);
4408 t4 = expand_divmod (0, TRUNC_DIV_EXPR, compute_mode, t3, op1,
4409 NULL_RTX, 0);
4410 if (t4)
4411 {
4412 rtx t5;
4413 t5 = expand_unop (compute_mode, one_cmpl_optab, nsign,
4414 NULL_RTX, 0);
4415 quotient = force_operand (gen_rtx_PLUS (compute_mode,
4416 t4, t5),
4417 tquotient);
4418 }
4419 }
4420 }
4421
4422 if (quotient != 0)
4423 break;
4424 delete_insns_since (last);
4425
4426 /* Try using an instruction that produces both the quotient and
4427 remainder, using truncation. We can easily compensate the quotient
4428 or remainder to get floor rounding, once we have the remainder.
4429 Notice that we compute also the final remainder value here,
4430 and return the result right away. */
4431 if (target == 0 || GET_MODE (target) != compute_mode)
4432 target = gen_reg_rtx (compute_mode);
4433
4434 if (rem_flag)
4435 {
4436 remainder
4437 = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4438 quotient = gen_reg_rtx (compute_mode);
4439 }
4440 else
4441 {
4442 quotient
4443 = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4444 remainder = gen_reg_rtx (compute_mode);
4445 }
4446
4447 if (expand_twoval_binop (sdivmod_optab, op0, op1,
4448 quotient, remainder, 0))
4449 {
4450 /* This could be computed with a branch-less sequence.
4451 Save that for later. */
4452 rtx tem;
4453 rtx label = gen_label_rtx ();
4454 do_cmp_and_jump (remainder, const0_rtx, EQ, compute_mode, label);
4455 tem = expand_binop (compute_mode, xor_optab, op0, op1,
4456 NULL_RTX, 0, OPTAB_WIDEN);
4457 do_cmp_and_jump (tem, const0_rtx, GE, compute_mode, label);
4458 expand_dec (quotient, const1_rtx);
4459 expand_inc (remainder, op1);
4460 emit_label (label);
4461 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4462 }
4463
4464 /* No luck with division elimination or divmod. Have to do it
4465 by conditionally adjusting op0 *and* the result. */
4466 {
4467 rtx label1, label2, label3, label4, label5;
4468 rtx adjusted_op0;
4469 rtx tem;
4470
4471 quotient = gen_reg_rtx (compute_mode);
4472 adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4473 label1 = gen_label_rtx ();
4474 label2 = gen_label_rtx ();
4475 label3 = gen_label_rtx ();
4476 label4 = gen_label_rtx ();
4477 label5 = gen_label_rtx ();
4478 do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4479 do_cmp_and_jump (adjusted_op0, const0_rtx, LT, compute_mode, label1);
4480 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4481 quotient, 0, OPTAB_LIB_WIDEN);
4482 if (tem != quotient)
4483 emit_move_insn (quotient, tem);
4484 emit_jump_insn (gen_jump (label5));
4485 emit_barrier ();
4486 emit_label (label1);
4487 expand_inc (adjusted_op0, const1_rtx);
4488 emit_jump_insn (gen_jump (label4));
4489 emit_barrier ();
4490 emit_label (label2);
4491 do_cmp_and_jump (adjusted_op0, const0_rtx, GT, compute_mode, label3);
4492 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4493 quotient, 0, OPTAB_LIB_WIDEN);
4494 if (tem != quotient)
4495 emit_move_insn (quotient, tem);
4496 emit_jump_insn (gen_jump (label5));
4497 emit_barrier ();
4498 emit_label (label3);
4499 expand_dec (adjusted_op0, const1_rtx);
4500 emit_label (label4);
4501 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4502 quotient, 0, OPTAB_LIB_WIDEN);
4503 if (tem != quotient)
4504 emit_move_insn (quotient, tem);
4505 expand_dec (quotient, const1_rtx);
4506 emit_label (label5);
4507 }
4508 break;
4509
4510 case CEIL_DIV_EXPR:
4511 case CEIL_MOD_EXPR:
4512 if (unsignedp)
4513 {
4514 if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1)))
4515 {
4516 rtx t1, t2, t3;
4517 unsigned HOST_WIDE_INT d = INTVAL (op1);
4518 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4519 floor_log2 (d), tquotient, 1);
4520 t2 = expand_binop (compute_mode, and_optab, op0,
4521 GEN_INT (d - 1),
4522 NULL_RTX, 1, OPTAB_LIB_WIDEN);
4523 t3 = gen_reg_rtx (compute_mode);
4524 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4525 compute_mode, 1, 1);
4526 if (t3 == 0)
4527 {
4528 rtx lab;
4529 lab = gen_label_rtx ();
4530 do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4531 expand_inc (t1, const1_rtx);
4532 emit_label (lab);
4533 quotient = t1;
4534 }
4535 else
4536 quotient = force_operand (gen_rtx_PLUS (compute_mode,
4537 t1, t3),
4538 tquotient);
4539 break;
4540 }
4541
4542 /* Try using an instruction that produces both the quotient and
4543 remainder, using truncation. We can easily compensate the
4544 quotient or remainder to get ceiling rounding, once we have the
4545 remainder. Notice that we compute also the final remainder
4546 value here, and return the result right away. */
4547 if (target == 0 || GET_MODE (target) != compute_mode)
4548 target = gen_reg_rtx (compute_mode);
4549
4550 if (rem_flag)
4551 {
4552 remainder = (REG_P (target)
4553 ? target : gen_reg_rtx (compute_mode));
4554 quotient = gen_reg_rtx (compute_mode);
4555 }
4556 else
4557 {
4558 quotient = (REG_P (target)
4559 ? target : gen_reg_rtx (compute_mode));
4560 remainder = gen_reg_rtx (compute_mode);
4561 }
4562
4563 if (expand_twoval_binop (udivmod_optab, op0, op1, quotient,
4564 remainder, 1))
4565 {
4566 /* This could be computed with a branch-less sequence.
4567 Save that for later. */
4568 rtx label = gen_label_rtx ();
4569 do_cmp_and_jump (remainder, const0_rtx, EQ,
4570 compute_mode, label);
4571 expand_inc (quotient, const1_rtx);
4572 expand_dec (remainder, op1);
4573 emit_label (label);
4574 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4575 }
4576
4577 /* No luck with division elimination or divmod. Have to do it
4578 by conditionally adjusting op0 *and* the result. */
4579 {
4580 rtx label1, label2;
4581 rtx adjusted_op0, tem;
4582
4583 quotient = gen_reg_rtx (compute_mode);
4584 adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4585 label1 = gen_label_rtx ();
4586 label2 = gen_label_rtx ();
4587 do_cmp_and_jump (adjusted_op0, const0_rtx, NE,
4588 compute_mode, label1);
4589 emit_move_insn (quotient, const0_rtx);
4590 emit_jump_insn (gen_jump (label2));
4591 emit_barrier ();
4592 emit_label (label1);
4593 expand_dec (adjusted_op0, const1_rtx);
4594 tem = expand_binop (compute_mode, udiv_optab, adjusted_op0, op1,
4595 quotient, 1, OPTAB_LIB_WIDEN);
4596 if (tem != quotient)
4597 emit_move_insn (quotient, tem);
4598 expand_inc (quotient, const1_rtx);
4599 emit_label (label2);
4600 }
4601 }
4602 else /* signed */
4603 {
4604 if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4605 && INTVAL (op1) >= 0)
4606 {
4607 /* This is extremely similar to the code for the unsigned case
4608 above. For 2.7 we should merge these variants, but for
4609 2.6.1 I don't want to touch the code for unsigned since that
4610 get used in C. The signed case will only be used by other
4611 languages (Ada). */
4612
4613 rtx t1, t2, t3;
4614 unsigned HOST_WIDE_INT d = INTVAL (op1);
4615 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4616 floor_log2 (d), tquotient, 0);
4617 t2 = expand_binop (compute_mode, and_optab, op0,
4618 GEN_INT (d - 1),
4619 NULL_RTX, 1, OPTAB_LIB_WIDEN);
4620 t3 = gen_reg_rtx (compute_mode);
4621 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4622 compute_mode, 1, 1);
4623 if (t3 == 0)
4624 {
4625 rtx lab;
4626 lab = gen_label_rtx ();
4627 do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4628 expand_inc (t1, const1_rtx);
4629 emit_label (lab);
4630 quotient = t1;
4631 }
4632 else
4633 quotient = force_operand (gen_rtx_PLUS (compute_mode,
4634 t1, t3),
4635 tquotient);
4636 break;
4637 }
4638
4639 /* Try using an instruction that produces both the quotient and
4640 remainder, using truncation. We can easily compensate the
4641 quotient or remainder to get ceiling rounding, once we have the
4642 remainder. Notice that we compute also the final remainder
4643 value here, and return the result right away. */
4644 if (target == 0 || GET_MODE (target) != compute_mode)
4645 target = gen_reg_rtx (compute_mode);
4646 if (rem_flag)
4647 {
4648 remainder= (REG_P (target)
4649 ? target : gen_reg_rtx (compute_mode));
4650 quotient = gen_reg_rtx (compute_mode);
4651 }
4652 else
4653 {
4654 quotient = (REG_P (target)
4655 ? target : gen_reg_rtx (compute_mode));
4656 remainder = gen_reg_rtx (compute_mode);
4657 }
4658
4659 if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient,
4660 remainder, 0))
4661 {
4662 /* This could be computed with a branch-less sequence.
4663 Save that for later. */
4664 rtx tem;
4665 rtx label = gen_label_rtx ();
4666 do_cmp_and_jump (remainder, const0_rtx, EQ,
4667 compute_mode, label);
4668 tem = expand_binop (compute_mode, xor_optab, op0, op1,
4669 NULL_RTX, 0, OPTAB_WIDEN);
4670 do_cmp_and_jump (tem, const0_rtx, LT, compute_mode, label);
4671 expand_inc (quotient, const1_rtx);
4672 expand_dec (remainder, op1);
4673 emit_label (label);
4674 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4675 }
4676
4677 /* No luck with division elimination or divmod. Have to do it
4678 by conditionally adjusting op0 *and* the result. */
4679 {
4680 rtx label1, label2, label3, label4, label5;
4681 rtx adjusted_op0;
4682 rtx tem;
4683
4684 quotient = gen_reg_rtx (compute_mode);
4685 adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4686 label1 = gen_label_rtx ();
4687 label2 = gen_label_rtx ();
4688 label3 = gen_label_rtx ();
4689 label4 = gen_label_rtx ();
4690 label5 = gen_label_rtx ();
4691 do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4692 do_cmp_and_jump (adjusted_op0, const0_rtx, GT,
4693 compute_mode, label1);
4694 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4695 quotient, 0, OPTAB_LIB_WIDEN);
4696 if (tem != quotient)
4697 emit_move_insn (quotient, tem);
4698 emit_jump_insn (gen_jump (label5));
4699 emit_barrier ();
4700 emit_label (label1);
4701 expand_dec (adjusted_op0, const1_rtx);
4702 emit_jump_insn (gen_jump (label4));
4703 emit_barrier ();
4704 emit_label (label2);
4705 do_cmp_and_jump (adjusted_op0, const0_rtx, LT,
4706 compute_mode, label3);
4707 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4708 quotient, 0, OPTAB_LIB_WIDEN);
4709 if (tem != quotient)
4710 emit_move_insn (quotient, tem);
4711 emit_jump_insn (gen_jump (label5));
4712 emit_barrier ();
4713 emit_label (label3);
4714 expand_inc (adjusted_op0, const1_rtx);
4715 emit_label (label4);
4716 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4717 quotient, 0, OPTAB_LIB_WIDEN);
4718 if (tem != quotient)
4719 emit_move_insn (quotient, tem);
4720 expand_inc (quotient, const1_rtx);
4721 emit_label (label5);
4722 }
4723 }
4724 break;
4725
4726 case EXACT_DIV_EXPR:
4727 if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4728 {
4729 HOST_WIDE_INT d = INTVAL (op1);
4730 unsigned HOST_WIDE_INT ml;
4731 int pre_shift;
4732 rtx t1;
4733
4734 pre_shift = floor_log2 (d & -d);
4735 ml = invert_mod2n (d >> pre_shift, size);
4736 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4737 pre_shift, NULL_RTX, unsignedp);
4738 quotient = expand_mult (compute_mode, t1,
4739 gen_int_mode (ml, compute_mode),
4740 NULL_RTX, 1);
4741
4742 insn = get_last_insn ();
4743 set_dst_reg_note (insn, REG_EQUAL,
4744 gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
4745 compute_mode, op0, op1),
4746 quotient);
4747 }
4748 break;
4749
4750 case ROUND_DIV_EXPR:
4751 case ROUND_MOD_EXPR:
4752 if (unsignedp)
4753 {
4754 rtx tem;
4755 rtx label;
4756 label = gen_label_rtx ();
4757 quotient = gen_reg_rtx (compute_mode);
4758 remainder = gen_reg_rtx (compute_mode);
4759 if (expand_twoval_binop (udivmod_optab, op0, op1, quotient, remainder, 1) == 0)
4760 {
4761 rtx tem;
4762 quotient = expand_binop (compute_mode, udiv_optab, op0, op1,
4763 quotient, 1, OPTAB_LIB_WIDEN);
4764 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 1);
4765 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4766 remainder, 1, OPTAB_LIB_WIDEN);
4767 }
4768 tem = plus_constant (op1, -1);
4769 tem = expand_shift (RSHIFT_EXPR, compute_mode, tem, 1, NULL_RTX, 1);
4770 do_cmp_and_jump (remainder, tem, LEU, compute_mode, label);
4771 expand_inc (quotient, const1_rtx);
4772 expand_dec (remainder, op1);
4773 emit_label (label);
4774 }
4775 else
4776 {
4777 rtx abs_rem, abs_op1, tem, mask;
4778 rtx label;
4779 label = gen_label_rtx ();
4780 quotient = gen_reg_rtx (compute_mode);
4781 remainder = gen_reg_rtx (compute_mode);
4782 if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient, remainder, 0) == 0)
4783 {
4784 rtx tem;
4785 quotient = expand_binop (compute_mode, sdiv_optab, op0, op1,
4786 quotient, 0, OPTAB_LIB_WIDEN);
4787 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 0);
4788 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4789 remainder, 0, OPTAB_LIB_WIDEN);
4790 }
4791 abs_rem = expand_abs (compute_mode, remainder, NULL_RTX, 1, 0);
4792 abs_op1 = expand_abs (compute_mode, op1, NULL_RTX, 1, 0);
4793 tem = expand_shift (LSHIFT_EXPR, compute_mode, abs_rem,
4794 1, NULL_RTX, 1);
4795 do_cmp_and_jump (tem, abs_op1, LTU, compute_mode, label);
4796 tem = expand_binop (compute_mode, xor_optab, op0, op1,
4797 NULL_RTX, 0, OPTAB_WIDEN);
4798 mask = expand_shift (RSHIFT_EXPR, compute_mode, tem,
4799 size - 1, NULL_RTX, 0);
4800 tem = expand_binop (compute_mode, xor_optab, mask, const1_rtx,
4801 NULL_RTX, 0, OPTAB_WIDEN);
4802 tem = expand_binop (compute_mode, sub_optab, tem, mask,
4803 NULL_RTX, 0, OPTAB_WIDEN);
4804 expand_inc (quotient, tem);
4805 tem = expand_binop (compute_mode, xor_optab, mask, op1,
4806 NULL_RTX, 0, OPTAB_WIDEN);
4807 tem = expand_binop (compute_mode, sub_optab, tem, mask,
4808 NULL_RTX, 0, OPTAB_WIDEN);
4809 expand_dec (remainder, tem);
4810 emit_label (label);
4811 }
4812 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4813
4814 default:
4815 gcc_unreachable ();
4816 }
4817
4818 if (quotient == 0)
4819 {
4820 if (target && GET_MODE (target) != compute_mode)
4821 target = 0;
4822
4823 if (rem_flag)
4824 {
4825 /* Try to produce the remainder without producing the quotient.
4826 If we seem to have a divmod pattern that does not require widening,
4827 don't try widening here. We should really have a WIDEN argument
4828 to expand_twoval_binop, since what we'd really like to do here is
4829 1) try a mod insn in compute_mode
4830 2) try a divmod insn in compute_mode
4831 3) try a div insn in compute_mode and multiply-subtract to get
4832 remainder
4833 4) try the same things with widening allowed. */
4834 remainder
4835 = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4836 op0, op1, target,
4837 unsignedp,
4838 ((optab_handler (optab2, compute_mode)
4839 != CODE_FOR_nothing)
4840 ? OPTAB_DIRECT : OPTAB_WIDEN));
4841 if (remainder == 0)
4842 {
4843 /* No luck there. Can we do remainder and divide at once
4844 without a library call? */
4845 remainder = gen_reg_rtx (compute_mode);
4846 if (! expand_twoval_binop ((unsignedp
4847 ? udivmod_optab
4848 : sdivmod_optab),
4849 op0, op1,
4850 NULL_RTX, remainder, unsignedp))
4851 remainder = 0;
4852 }
4853
4854 if (remainder)
4855 return gen_lowpart (mode, remainder);
4856 }
4857
4858 /* Produce the quotient. Try a quotient insn, but not a library call.
4859 If we have a divmod in this mode, use it in preference to widening
4860 the div (for this test we assume it will not fail). Note that optab2
4861 is set to the one of the two optabs that the call below will use. */
4862 quotient
4863 = sign_expand_binop (compute_mode, udiv_optab, sdiv_optab,
4864 op0, op1, rem_flag ? NULL_RTX : target,
4865 unsignedp,
4866 ((optab_handler (optab2, compute_mode)
4867 != CODE_FOR_nothing)
4868 ? OPTAB_DIRECT : OPTAB_WIDEN));
4869
4870 if (quotient == 0)
4871 {
4872 /* No luck there. Try a quotient-and-remainder insn,
4873 keeping the quotient alone. */
4874 quotient = gen_reg_rtx (compute_mode);
4875 if (! expand_twoval_binop (unsignedp ? udivmod_optab : sdivmod_optab,
4876 op0, op1,
4877 quotient, NULL_RTX, unsignedp))
4878 {
4879 quotient = 0;
4880 if (! rem_flag)
4881 /* Still no luck. If we are not computing the remainder,
4882 use a library call for the quotient. */
4883 quotient = sign_expand_binop (compute_mode,
4884 udiv_optab, sdiv_optab,
4885 op0, op1, target,
4886 unsignedp, OPTAB_LIB_WIDEN);
4887 }
4888 }
4889 }
4890
4891 if (rem_flag)
4892 {
4893 if (target && GET_MODE (target) != compute_mode)
4894 target = 0;
4895
4896 if (quotient == 0)
4897 {
4898 /* No divide instruction either. Use library for remainder. */
4899 remainder = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4900 op0, op1, target,
4901 unsignedp, OPTAB_LIB_WIDEN);
4902 /* No remainder function. Try a quotient-and-remainder
4903 function, keeping the remainder. */
4904 if (!remainder)
4905 {
4906 remainder = gen_reg_rtx (compute_mode);
4907 if (!expand_twoval_binop_libfunc
4908 (unsignedp ? udivmod_optab : sdivmod_optab,
4909 op0, op1,
4910 NULL_RTX, remainder,
4911 unsignedp ? UMOD : MOD))
4912 remainder = NULL_RTX;
4913 }
4914 }
4915 else
4916 {
4917 /* We divided. Now finish doing X - Y * (X / Y). */
4918 remainder = expand_mult (compute_mode, quotient, op1,
4919 NULL_RTX, unsignedp);
4920 remainder = expand_binop (compute_mode, sub_optab, op0,
4921 remainder, target, unsignedp,
4922 OPTAB_LIB_WIDEN);
4923 }
4924 }
4925
4926 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4927 }
4928 \f
4929 /* Return a tree node with data type TYPE, describing the value of X.
4930 Usually this is an VAR_DECL, if there is no obvious better choice.
4931 X may be an expression, however we only support those expressions
4932 generated by loop.c. */
4933
4934 tree
4935 make_tree (tree type, rtx x)
4936 {
4937 tree t;
4938
4939 switch (GET_CODE (x))
4940 {
4941 case CONST_INT:
4942 {
4943 HOST_WIDE_INT hi = 0;
4944
4945 if (INTVAL (x) < 0
4946 && !(TYPE_UNSIGNED (type)
4947 && (GET_MODE_BITSIZE (TYPE_MODE (type))
4948 < HOST_BITS_PER_WIDE_INT)))
4949 hi = -1;
4950
4951 t = build_int_cst_wide (type, INTVAL (x), hi);
4952
4953 return t;
4954 }
4955
4956 case CONST_DOUBLE:
4957 if (GET_MODE (x) == VOIDmode)
4958 t = build_int_cst_wide (type,
4959 CONST_DOUBLE_LOW (x), CONST_DOUBLE_HIGH (x));
4960 else
4961 {
4962 REAL_VALUE_TYPE d;
4963
4964 REAL_VALUE_FROM_CONST_DOUBLE (d, x);
4965 t = build_real (type, d);
4966 }
4967
4968 return t;
4969
4970 case CONST_VECTOR:
4971 {
4972 int units = CONST_VECTOR_NUNITS (x);
4973 tree itype = TREE_TYPE (type);
4974 tree *elts;
4975 int i;
4976
4977 /* Build a tree with vector elements. */
4978 elts = XALLOCAVEC (tree, units);
4979 for (i = units - 1; i >= 0; --i)
4980 {
4981 rtx elt = CONST_VECTOR_ELT (x, i);
4982 elts[i] = make_tree (itype, elt);
4983 }
4984
4985 return build_vector (type, elts);
4986 }
4987
4988 case PLUS:
4989 return fold_build2 (PLUS_EXPR, type, make_tree (type, XEXP (x, 0)),
4990 make_tree (type, XEXP (x, 1)));
4991
4992 case MINUS:
4993 return fold_build2 (MINUS_EXPR, type, make_tree (type, XEXP (x, 0)),
4994 make_tree (type, XEXP (x, 1)));
4995
4996 case NEG:
4997 return fold_build1 (NEGATE_EXPR, type, make_tree (type, XEXP (x, 0)));
4998
4999 case MULT:
5000 return fold_build2 (MULT_EXPR, type, make_tree (type, XEXP (x, 0)),
5001 make_tree (type, XEXP (x, 1)));
5002
5003 case ASHIFT:
5004 return fold_build2 (LSHIFT_EXPR, type, make_tree (type, XEXP (x, 0)),
5005 make_tree (type, XEXP (x, 1)));
5006
5007 case LSHIFTRT:
5008 t = unsigned_type_for (type);
5009 return fold_convert (type, build2 (RSHIFT_EXPR, t,
5010 make_tree (t, XEXP (x, 0)),
5011 make_tree (type, XEXP (x, 1))));
5012
5013 case ASHIFTRT:
5014 t = signed_type_for (type);
5015 return fold_convert (type, build2 (RSHIFT_EXPR, t,
5016 make_tree (t, XEXP (x, 0)),
5017 make_tree (type, XEXP (x, 1))));
5018
5019 case DIV:
5020 if (TREE_CODE (type) != REAL_TYPE)
5021 t = signed_type_for (type);
5022 else
5023 t = type;
5024
5025 return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5026 make_tree (t, XEXP (x, 0)),
5027 make_tree (t, XEXP (x, 1))));
5028 case UDIV:
5029 t = unsigned_type_for (type);
5030 return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5031 make_tree (t, XEXP (x, 0)),
5032 make_tree (t, XEXP (x, 1))));
5033
5034 case SIGN_EXTEND:
5035 case ZERO_EXTEND:
5036 t = lang_hooks.types.type_for_mode (GET_MODE (XEXP (x, 0)),
5037 GET_CODE (x) == ZERO_EXTEND);
5038 return fold_convert (type, make_tree (t, XEXP (x, 0)));
5039
5040 case CONST:
5041 return make_tree (type, XEXP (x, 0));
5042
5043 case SYMBOL_REF:
5044 t = SYMBOL_REF_DECL (x);
5045 if (t)
5046 return fold_convert (type, build_fold_addr_expr (t));
5047 /* else fall through. */
5048
5049 default:
5050 t = build_decl (RTL_LOCATION (x), VAR_DECL, NULL_TREE, type);
5051
5052 /* If TYPE is a POINTER_TYPE, we might need to convert X from
5053 address mode to pointer mode. */
5054 if (POINTER_TYPE_P (type))
5055 x = convert_memory_address_addr_space
5056 (TYPE_MODE (type), x, TYPE_ADDR_SPACE (TREE_TYPE (type)));
5057
5058 /* Note that we do *not* use SET_DECL_RTL here, because we do not
5059 want set_decl_rtl to go adjusting REG_ATTRS for this temporary. */
5060 t->decl_with_rtl.rtl = x;
5061
5062 return t;
5063 }
5064 }
5065 \f
5066 /* Compute the logical-and of OP0 and OP1, storing it in TARGET
5067 and returning TARGET.
5068
5069 If TARGET is 0, a pseudo-register or constant is returned. */
5070
5071 rtx
5072 expand_and (enum machine_mode mode, rtx op0, rtx op1, rtx target)
5073 {
5074 rtx tem = 0;
5075
5076 if (GET_MODE (op0) == VOIDmode && GET_MODE (op1) == VOIDmode)
5077 tem = simplify_binary_operation (AND, mode, op0, op1);
5078 if (tem == 0)
5079 tem = expand_binop (mode, and_optab, op0, op1, target, 0, OPTAB_LIB_WIDEN);
5080
5081 if (target == 0)
5082 target = tem;
5083 else if (tem != target)
5084 emit_move_insn (target, tem);
5085 return target;
5086 }
5087
5088 /* Helper function for emit_store_flag. */
5089 static rtx
5090 emit_cstore (rtx target, enum insn_code icode, enum rtx_code code,
5091 enum machine_mode mode, enum machine_mode compare_mode,
5092 int unsignedp, rtx x, rtx y, int normalizep,
5093 enum machine_mode target_mode)
5094 {
5095 struct expand_operand ops[4];
5096 rtx op0, last, comparison, subtarget;
5097 enum machine_mode result_mode = insn_data[(int) icode].operand[0].mode;
5098
5099 last = get_last_insn ();
5100 x = prepare_operand (icode, x, 2, mode, compare_mode, unsignedp);
5101 y = prepare_operand (icode, y, 3, mode, compare_mode, unsignedp);
5102 if (!x || !y)
5103 {
5104 delete_insns_since (last);
5105 return NULL_RTX;
5106 }
5107
5108 if (target_mode == VOIDmode)
5109 target_mode = result_mode;
5110 if (!target)
5111 target = gen_reg_rtx (target_mode);
5112
5113 comparison = gen_rtx_fmt_ee (code, result_mode, x, y);
5114
5115 create_output_operand (&ops[0], optimize ? NULL_RTX : target, result_mode);
5116 create_fixed_operand (&ops[1], comparison);
5117 create_fixed_operand (&ops[2], x);
5118 create_fixed_operand (&ops[3], y);
5119 if (!maybe_expand_insn (icode, 4, ops))
5120 {
5121 delete_insns_since (last);
5122 return NULL_RTX;
5123 }
5124 subtarget = ops[0].value;
5125
5126 /* If we are converting to a wider mode, first convert to
5127 TARGET_MODE, then normalize. This produces better combining
5128 opportunities on machines that have a SIGN_EXTRACT when we are
5129 testing a single bit. This mostly benefits the 68k.
5130
5131 If STORE_FLAG_VALUE does not have the sign bit set when
5132 interpreted in MODE, we can do this conversion as unsigned, which
5133 is usually more efficient. */
5134 if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (result_mode))
5135 {
5136 convert_move (target, subtarget,
5137 val_signbit_known_clear_p (result_mode,
5138 STORE_FLAG_VALUE));
5139 op0 = target;
5140 result_mode = target_mode;
5141 }
5142 else
5143 op0 = subtarget;
5144
5145 /* If we want to keep subexpressions around, don't reuse our last
5146 target. */
5147 if (optimize)
5148 subtarget = 0;
5149
5150 /* Now normalize to the proper value in MODE. Sometimes we don't
5151 have to do anything. */
5152 if (normalizep == 0 || normalizep == STORE_FLAG_VALUE)
5153 ;
5154 /* STORE_FLAG_VALUE might be the most negative number, so write
5155 the comparison this way to avoid a compiler-time warning. */
5156 else if (- normalizep == STORE_FLAG_VALUE)
5157 op0 = expand_unop (result_mode, neg_optab, op0, subtarget, 0);
5158
5159 /* We don't want to use STORE_FLAG_VALUE < 0 below since this makes
5160 it hard to use a value of just the sign bit due to ANSI integer
5161 constant typing rules. */
5162 else if (val_signbit_known_set_p (result_mode, STORE_FLAG_VALUE))
5163 op0 = expand_shift (RSHIFT_EXPR, result_mode, op0,
5164 GET_MODE_BITSIZE (result_mode) - 1, subtarget,
5165 normalizep == 1);
5166 else
5167 {
5168 gcc_assert (STORE_FLAG_VALUE & 1);
5169
5170 op0 = expand_and (result_mode, op0, const1_rtx, subtarget);
5171 if (normalizep == -1)
5172 op0 = expand_unop (result_mode, neg_optab, op0, op0, 0);
5173 }
5174
5175 /* If we were converting to a smaller mode, do the conversion now. */
5176 if (target_mode != result_mode)
5177 {
5178 convert_move (target, op0, 0);
5179 return target;
5180 }
5181 else
5182 return op0;
5183 }
5184
5185
5186 /* A subroutine of emit_store_flag only including "tricks" that do not
5187 need a recursive call. These are kept separate to avoid infinite
5188 loops. */
5189
5190 static rtx
5191 emit_store_flag_1 (rtx target, enum rtx_code code, rtx op0, rtx op1,
5192 enum machine_mode mode, int unsignedp, int normalizep,
5193 enum machine_mode target_mode)
5194 {
5195 rtx subtarget;
5196 enum insn_code icode;
5197 enum machine_mode compare_mode;
5198 enum mode_class mclass;
5199 enum rtx_code scode;
5200 rtx tem;
5201
5202 if (unsignedp)
5203 code = unsigned_condition (code);
5204 scode = swap_condition (code);
5205
5206 /* If one operand is constant, make it the second one. Only do this
5207 if the other operand is not constant as well. */
5208
5209 if (swap_commutative_operands_p (op0, op1))
5210 {
5211 tem = op0;
5212 op0 = op1;
5213 op1 = tem;
5214 code = swap_condition (code);
5215 }
5216
5217 if (mode == VOIDmode)
5218 mode = GET_MODE (op0);
5219
5220 /* For some comparisons with 1 and -1, we can convert this to
5221 comparisons with zero. This will often produce more opportunities for
5222 store-flag insns. */
5223
5224 switch (code)
5225 {
5226 case LT:
5227 if (op1 == const1_rtx)
5228 op1 = const0_rtx, code = LE;
5229 break;
5230 case LE:
5231 if (op1 == constm1_rtx)
5232 op1 = const0_rtx, code = LT;
5233 break;
5234 case GE:
5235 if (op1 == const1_rtx)
5236 op1 = const0_rtx, code = GT;
5237 break;
5238 case GT:
5239 if (op1 == constm1_rtx)
5240 op1 = const0_rtx, code = GE;
5241 break;
5242 case GEU:
5243 if (op1 == const1_rtx)
5244 op1 = const0_rtx, code = NE;
5245 break;
5246 case LTU:
5247 if (op1 == const1_rtx)
5248 op1 = const0_rtx, code = EQ;
5249 break;
5250 default:
5251 break;
5252 }
5253
5254 /* If we are comparing a double-word integer with zero or -1, we can
5255 convert the comparison into one involving a single word. */
5256 if (GET_MODE_BITSIZE (mode) == BITS_PER_WORD * 2
5257 && GET_MODE_CLASS (mode) == MODE_INT
5258 && (!MEM_P (op0) || ! MEM_VOLATILE_P (op0)))
5259 {
5260 if ((code == EQ || code == NE)
5261 && (op1 == const0_rtx || op1 == constm1_rtx))
5262 {
5263 rtx op00, op01;
5264
5265 /* Do a logical OR or AND of the two words and compare the
5266 result. */
5267 op00 = simplify_gen_subreg (word_mode, op0, mode, 0);
5268 op01 = simplify_gen_subreg (word_mode, op0, mode, UNITS_PER_WORD);
5269 tem = expand_binop (word_mode,
5270 op1 == const0_rtx ? ior_optab : and_optab,
5271 op00, op01, NULL_RTX, unsignedp,
5272 OPTAB_DIRECT);
5273
5274 if (tem != 0)
5275 tem = emit_store_flag (NULL_RTX, code, tem, op1, word_mode,
5276 unsignedp, normalizep);
5277 }
5278 else if ((code == LT || code == GE) && op1 == const0_rtx)
5279 {
5280 rtx op0h;
5281
5282 /* If testing the sign bit, can just test on high word. */
5283 op0h = simplify_gen_subreg (word_mode, op0, mode,
5284 subreg_highpart_offset (word_mode,
5285 mode));
5286 tem = emit_store_flag (NULL_RTX, code, op0h, op1, word_mode,
5287 unsignedp, normalizep);
5288 }
5289 else
5290 tem = NULL_RTX;
5291
5292 if (tem)
5293 {
5294 if (target_mode == VOIDmode || GET_MODE (tem) == target_mode)
5295 return tem;
5296 if (!target)
5297 target = gen_reg_rtx (target_mode);
5298
5299 convert_move (target, tem,
5300 !val_signbit_known_set_p (word_mode,
5301 (normalizep ? normalizep
5302 : STORE_FLAG_VALUE)));
5303 return target;
5304 }
5305 }
5306
5307 /* If this is A < 0 or A >= 0, we can do this by taking the ones
5308 complement of A (for GE) and shifting the sign bit to the low bit. */
5309 if (op1 == const0_rtx && (code == LT || code == GE)
5310 && GET_MODE_CLASS (mode) == MODE_INT
5311 && (normalizep || STORE_FLAG_VALUE == 1
5312 || val_signbit_p (mode, STORE_FLAG_VALUE)))
5313 {
5314 subtarget = target;
5315
5316 if (!target)
5317 target_mode = mode;
5318
5319 /* If the result is to be wider than OP0, it is best to convert it
5320 first. If it is to be narrower, it is *incorrect* to convert it
5321 first. */
5322 else if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (mode))
5323 {
5324 op0 = convert_modes (target_mode, mode, op0, 0);
5325 mode = target_mode;
5326 }
5327
5328 if (target_mode != mode)
5329 subtarget = 0;
5330
5331 if (code == GE)
5332 op0 = expand_unop (mode, one_cmpl_optab, op0,
5333 ((STORE_FLAG_VALUE == 1 || normalizep)
5334 ? 0 : subtarget), 0);
5335
5336 if (STORE_FLAG_VALUE == 1 || normalizep)
5337 /* If we are supposed to produce a 0/1 value, we want to do
5338 a logical shift from the sign bit to the low-order bit; for
5339 a -1/0 value, we do an arithmetic shift. */
5340 op0 = expand_shift (RSHIFT_EXPR, mode, op0,
5341 GET_MODE_BITSIZE (mode) - 1,
5342 subtarget, normalizep != -1);
5343
5344 if (mode != target_mode)
5345 op0 = convert_modes (target_mode, mode, op0, 0);
5346
5347 return op0;
5348 }
5349
5350 mclass = GET_MODE_CLASS (mode);
5351 for (compare_mode = mode; compare_mode != VOIDmode;
5352 compare_mode = GET_MODE_WIDER_MODE (compare_mode))
5353 {
5354 enum machine_mode optab_mode = mclass == MODE_CC ? CCmode : compare_mode;
5355 icode = optab_handler (cstore_optab, optab_mode);
5356 if (icode != CODE_FOR_nothing)
5357 {
5358 do_pending_stack_adjust ();
5359 tem = emit_cstore (target, icode, code, mode, compare_mode,
5360 unsignedp, op0, op1, normalizep, target_mode);
5361 if (tem)
5362 return tem;
5363
5364 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5365 {
5366 tem = emit_cstore (target, icode, scode, mode, compare_mode,
5367 unsignedp, op1, op0, normalizep, target_mode);
5368 if (tem)
5369 return tem;
5370 }
5371 break;
5372 }
5373 }
5374
5375 return 0;
5376 }
5377
5378 /* Emit a store-flags instruction for comparison CODE on OP0 and OP1
5379 and storing in TARGET. Normally return TARGET.
5380 Return 0 if that cannot be done.
5381
5382 MODE is the mode to use for OP0 and OP1 should they be CONST_INTs. If
5383 it is VOIDmode, they cannot both be CONST_INT.
5384
5385 UNSIGNEDP is for the case where we have to widen the operands
5386 to perform the operation. It says to use zero-extension.
5387
5388 NORMALIZEP is 1 if we should convert the result to be either zero
5389 or one. Normalize is -1 if we should convert the result to be
5390 either zero or -1. If NORMALIZEP is zero, the result will be left
5391 "raw" out of the scc insn. */
5392
5393 rtx
5394 emit_store_flag (rtx target, enum rtx_code code, rtx op0, rtx op1,
5395 enum machine_mode mode, int unsignedp, int normalizep)
5396 {
5397 enum machine_mode target_mode = target ? GET_MODE (target) : VOIDmode;
5398 enum rtx_code rcode;
5399 rtx subtarget;
5400 rtx tem, last, trueval;
5401
5402 tem = emit_store_flag_1 (target, code, op0, op1, mode, unsignedp, normalizep,
5403 target_mode);
5404 if (tem)
5405 return tem;
5406
5407 /* If we reached here, we can't do this with a scc insn, however there
5408 are some comparisons that can be done in other ways. Don't do any
5409 of these cases if branches are very cheap. */
5410 if (BRANCH_COST (optimize_insn_for_speed_p (), false) == 0)
5411 return 0;
5412
5413 /* See what we need to return. We can only return a 1, -1, or the
5414 sign bit. */
5415
5416 if (normalizep == 0)
5417 {
5418 if (STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1)
5419 normalizep = STORE_FLAG_VALUE;
5420
5421 else if (val_signbit_p (mode, STORE_FLAG_VALUE))
5422 ;
5423 else
5424 return 0;
5425 }
5426
5427 last = get_last_insn ();
5428
5429 /* If optimizing, use different pseudo registers for each insn, instead
5430 of reusing the same pseudo. This leads to better CSE, but slows
5431 down the compiler, since there are more pseudos */
5432 subtarget = (!optimize
5433 && (target_mode == mode)) ? target : NULL_RTX;
5434 trueval = GEN_INT (normalizep ? normalizep : STORE_FLAG_VALUE);
5435
5436 /* For floating-point comparisons, try the reverse comparison or try
5437 changing the "orderedness" of the comparison. */
5438 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5439 {
5440 enum rtx_code first_code;
5441 bool and_them;
5442
5443 rcode = reverse_condition_maybe_unordered (code);
5444 if (can_compare_p (rcode, mode, ccp_store_flag)
5445 && (code == ORDERED || code == UNORDERED
5446 || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
5447 || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
5448 {
5449 int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5450 || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5451
5452 /* For the reverse comparison, use either an addition or a XOR. */
5453 if (want_add
5454 && rtx_cost (GEN_INT (normalizep), PLUS, 1,
5455 optimize_insn_for_speed_p ()) == 0)
5456 {
5457 tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5458 STORE_FLAG_VALUE, target_mode);
5459 if (tem)
5460 return expand_binop (target_mode, add_optab, tem,
5461 GEN_INT (normalizep),
5462 target, 0, OPTAB_WIDEN);
5463 }
5464 else if (!want_add
5465 && rtx_cost (trueval, XOR, 1,
5466 optimize_insn_for_speed_p ()) == 0)
5467 {
5468 tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5469 normalizep, target_mode);
5470 if (tem)
5471 return expand_binop (target_mode, xor_optab, tem, trueval,
5472 target, INTVAL (trueval) >= 0, OPTAB_WIDEN);
5473 }
5474 }
5475
5476 delete_insns_since (last);
5477
5478 /* Cannot split ORDERED and UNORDERED, only try the above trick. */
5479 if (code == ORDERED || code == UNORDERED)
5480 return 0;
5481
5482 and_them = split_comparison (code, mode, &first_code, &code);
5483
5484 /* If there are no NaNs, the first comparison should always fall through.
5485 Effectively change the comparison to the other one. */
5486 if (!HONOR_NANS (mode))
5487 {
5488 gcc_assert (first_code == (and_them ? ORDERED : UNORDERED));
5489 return emit_store_flag_1 (target, code, op0, op1, mode, 0, normalizep,
5490 target_mode);
5491 }
5492
5493 #ifdef HAVE_conditional_move
5494 /* Try using a setcc instruction for ORDERED/UNORDERED, followed by a
5495 conditional move. */
5496 tem = emit_store_flag_1 (subtarget, first_code, op0, op1, mode, 0,
5497 normalizep, target_mode);
5498 if (tem == 0)
5499 return 0;
5500
5501 if (and_them)
5502 tem = emit_conditional_move (target, code, op0, op1, mode,
5503 tem, const0_rtx, GET_MODE (tem), 0);
5504 else
5505 tem = emit_conditional_move (target, code, op0, op1, mode,
5506 trueval, tem, GET_MODE (tem), 0);
5507
5508 if (tem == 0)
5509 delete_insns_since (last);
5510 return tem;
5511 #else
5512 return 0;
5513 #endif
5514 }
5515
5516 /* The remaining tricks only apply to integer comparisons. */
5517
5518 if (GET_MODE_CLASS (mode) != MODE_INT)
5519 return 0;
5520
5521 /* If this is an equality comparison of integers, we can try to exclusive-or
5522 (or subtract) the two operands and use a recursive call to try the
5523 comparison with zero. Don't do any of these cases if branches are
5524 very cheap. */
5525
5526 if ((code == EQ || code == NE) && op1 != const0_rtx)
5527 {
5528 tem = expand_binop (mode, xor_optab, op0, op1, subtarget, 1,
5529 OPTAB_WIDEN);
5530
5531 if (tem == 0)
5532 tem = expand_binop (mode, sub_optab, op0, op1, subtarget, 1,
5533 OPTAB_WIDEN);
5534 if (tem != 0)
5535 tem = emit_store_flag (target, code, tem, const0_rtx,
5536 mode, unsignedp, normalizep);
5537 if (tem != 0)
5538 return tem;
5539
5540 delete_insns_since (last);
5541 }
5542
5543 /* For integer comparisons, try the reverse comparison. However, for
5544 small X and if we'd have anyway to extend, implementing "X != 0"
5545 as "-(int)X >> 31" is still cheaper than inverting "(int)X == 0". */
5546 rcode = reverse_condition (code);
5547 if (can_compare_p (rcode, mode, ccp_store_flag)
5548 && ! (optab_handler (cstore_optab, mode) == CODE_FOR_nothing
5549 && code == NE
5550 && GET_MODE_SIZE (mode) < UNITS_PER_WORD
5551 && op1 == const0_rtx))
5552 {
5553 int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5554 || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5555
5556 /* Again, for the reverse comparison, use either an addition or a XOR. */
5557 if (want_add
5558 && rtx_cost (GEN_INT (normalizep), PLUS, 1,
5559 optimize_insn_for_speed_p ()) == 0)
5560 {
5561 tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5562 STORE_FLAG_VALUE, target_mode);
5563 if (tem != 0)
5564 tem = expand_binop (target_mode, add_optab, tem,
5565 GEN_INT (normalizep), target, 0, OPTAB_WIDEN);
5566 }
5567 else if (!want_add
5568 && rtx_cost (trueval, XOR, 1,
5569 optimize_insn_for_speed_p ()) == 0)
5570 {
5571 tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5572 normalizep, target_mode);
5573 if (tem != 0)
5574 tem = expand_binop (target_mode, xor_optab, tem, trueval, target,
5575 INTVAL (trueval) >= 0, OPTAB_WIDEN);
5576 }
5577
5578 if (tem != 0)
5579 return tem;
5580 delete_insns_since (last);
5581 }
5582
5583 /* Some other cases we can do are EQ, NE, LE, and GT comparisons with
5584 the constant zero. Reject all other comparisons at this point. Only
5585 do LE and GT if branches are expensive since they are expensive on
5586 2-operand machines. */
5587
5588 if (op1 != const0_rtx
5589 || (code != EQ && code != NE
5590 && (BRANCH_COST (optimize_insn_for_speed_p (),
5591 false) <= 1 || (code != LE && code != GT))))
5592 return 0;
5593
5594 /* Try to put the result of the comparison in the sign bit. Assume we can't
5595 do the necessary operation below. */
5596
5597 tem = 0;
5598
5599 /* To see if A <= 0, compute (A | (A - 1)). A <= 0 iff that result has
5600 the sign bit set. */
5601
5602 if (code == LE)
5603 {
5604 /* This is destructive, so SUBTARGET can't be OP0. */
5605 if (rtx_equal_p (subtarget, op0))
5606 subtarget = 0;
5607
5608 tem = expand_binop (mode, sub_optab, op0, const1_rtx, subtarget, 0,
5609 OPTAB_WIDEN);
5610 if (tem)
5611 tem = expand_binop (mode, ior_optab, op0, tem, subtarget, 0,
5612 OPTAB_WIDEN);
5613 }
5614
5615 /* To see if A > 0, compute (((signed) A) << BITS) - A, where BITS is the
5616 number of bits in the mode of OP0, minus one. */
5617
5618 if (code == GT)
5619 {
5620 if (rtx_equal_p (subtarget, op0))
5621 subtarget = 0;
5622
5623 tem = expand_shift (RSHIFT_EXPR, mode, op0,
5624 GET_MODE_BITSIZE (mode) - 1,
5625 subtarget, 0);
5626 tem = expand_binop (mode, sub_optab, tem, op0, subtarget, 0,
5627 OPTAB_WIDEN);
5628 }
5629
5630 if (code == EQ || code == NE)
5631 {
5632 /* For EQ or NE, one way to do the comparison is to apply an operation
5633 that converts the operand into a positive number if it is nonzero
5634 or zero if it was originally zero. Then, for EQ, we subtract 1 and
5635 for NE we negate. This puts the result in the sign bit. Then we
5636 normalize with a shift, if needed.
5637
5638 Two operations that can do the above actions are ABS and FFS, so try
5639 them. If that doesn't work, and MODE is smaller than a full word,
5640 we can use zero-extension to the wider mode (an unsigned conversion)
5641 as the operation. */
5642
5643 /* Note that ABS doesn't yield a positive number for INT_MIN, but
5644 that is compensated by the subsequent overflow when subtracting
5645 one / negating. */
5646
5647 if (optab_handler (abs_optab, mode) != CODE_FOR_nothing)
5648 tem = expand_unop (mode, abs_optab, op0, subtarget, 1);
5649 else if (optab_handler (ffs_optab, mode) != CODE_FOR_nothing)
5650 tem = expand_unop (mode, ffs_optab, op0, subtarget, 1);
5651 else if (GET_MODE_SIZE (mode) < UNITS_PER_WORD)
5652 {
5653 tem = convert_modes (word_mode, mode, op0, 1);
5654 mode = word_mode;
5655 }
5656
5657 if (tem != 0)
5658 {
5659 if (code == EQ)
5660 tem = expand_binop (mode, sub_optab, tem, const1_rtx, subtarget,
5661 0, OPTAB_WIDEN);
5662 else
5663 tem = expand_unop (mode, neg_optab, tem, subtarget, 0);
5664 }
5665
5666 /* If we couldn't do it that way, for NE we can "or" the two's complement
5667 of the value with itself. For EQ, we take the one's complement of
5668 that "or", which is an extra insn, so we only handle EQ if branches
5669 are expensive. */
5670
5671 if (tem == 0
5672 && (code == NE
5673 || BRANCH_COST (optimize_insn_for_speed_p (),
5674 false) > 1))
5675 {
5676 if (rtx_equal_p (subtarget, op0))
5677 subtarget = 0;
5678
5679 tem = expand_unop (mode, neg_optab, op0, subtarget, 0);
5680 tem = expand_binop (mode, ior_optab, tem, op0, subtarget, 0,
5681 OPTAB_WIDEN);
5682
5683 if (tem && code == EQ)
5684 tem = expand_unop (mode, one_cmpl_optab, tem, subtarget, 0);
5685 }
5686 }
5687
5688 if (tem && normalizep)
5689 tem = expand_shift (RSHIFT_EXPR, mode, tem,
5690 GET_MODE_BITSIZE (mode) - 1,
5691 subtarget, normalizep == 1);
5692
5693 if (tem)
5694 {
5695 if (!target)
5696 ;
5697 else if (GET_MODE (tem) != target_mode)
5698 {
5699 convert_move (target, tem, 0);
5700 tem = target;
5701 }
5702 else if (!subtarget)
5703 {
5704 emit_move_insn (target, tem);
5705 tem = target;
5706 }
5707 }
5708 else
5709 delete_insns_since (last);
5710
5711 return tem;
5712 }
5713
5714 /* Like emit_store_flag, but always succeeds. */
5715
5716 rtx
5717 emit_store_flag_force (rtx target, enum rtx_code code, rtx op0, rtx op1,
5718 enum machine_mode mode, int unsignedp, int normalizep)
5719 {
5720 rtx tem, label;
5721 rtx trueval, falseval;
5722
5723 /* First see if emit_store_flag can do the job. */
5724 tem = emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep);
5725 if (tem != 0)
5726 return tem;
5727
5728 if (!target)
5729 target = gen_reg_rtx (word_mode);
5730
5731 /* If this failed, we have to do this with set/compare/jump/set code.
5732 For foo != 0, if foo is in OP0, just replace it with 1 if nonzero. */
5733 trueval = normalizep ? GEN_INT (normalizep) : const1_rtx;
5734 if (code == NE
5735 && GET_MODE_CLASS (mode) == MODE_INT
5736 && REG_P (target)
5737 && op0 == target
5738 && op1 == const0_rtx)
5739 {
5740 label = gen_label_rtx ();
5741 do_compare_rtx_and_jump (target, const0_rtx, EQ, unsignedp,
5742 mode, NULL_RTX, NULL_RTX, label, -1);
5743 emit_move_insn (target, trueval);
5744 emit_label (label);
5745 return target;
5746 }
5747
5748 if (!REG_P (target)
5749 || reg_mentioned_p (target, op0) || reg_mentioned_p (target, op1))
5750 target = gen_reg_rtx (GET_MODE (target));
5751
5752 /* Jump in the right direction if the target cannot implement CODE
5753 but can jump on its reverse condition. */
5754 falseval = const0_rtx;
5755 if (! can_compare_p (code, mode, ccp_jump)
5756 && (! FLOAT_MODE_P (mode)
5757 || code == ORDERED || code == UNORDERED
5758 || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
5759 || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
5760 {
5761 enum rtx_code rcode;
5762 if (FLOAT_MODE_P (mode))
5763 rcode = reverse_condition_maybe_unordered (code);
5764 else
5765 rcode = reverse_condition (code);
5766
5767 /* Canonicalize to UNORDERED for the libcall. */
5768 if (can_compare_p (rcode, mode, ccp_jump)
5769 || (code == ORDERED && ! can_compare_p (ORDERED, mode, ccp_jump)))
5770 {
5771 falseval = trueval;
5772 trueval = const0_rtx;
5773 code = rcode;
5774 }
5775 }
5776
5777 emit_move_insn (target, trueval);
5778 label = gen_label_rtx ();
5779 do_compare_rtx_and_jump (op0, op1, code, unsignedp, mode, NULL_RTX,
5780 NULL_RTX, label, -1);
5781
5782 emit_move_insn (target, falseval);
5783 emit_label (label);
5784
5785 return target;
5786 }
5787 \f
5788 /* Perform possibly multi-word comparison and conditional jump to LABEL
5789 if ARG1 OP ARG2 true where ARG1 and ARG2 are of mode MODE. This is
5790 now a thin wrapper around do_compare_rtx_and_jump. */
5791
5792 static void
5793 do_cmp_and_jump (rtx arg1, rtx arg2, enum rtx_code op, enum machine_mode mode,
5794 rtx label)
5795 {
5796 int unsignedp = (op == LTU || op == LEU || op == GTU || op == GEU);
5797 do_compare_rtx_and_jump (arg1, arg2, op, unsignedp, mode,
5798 NULL_RTX, NULL_RTX, label, -1);
5799 }