re PR tree-optimization/51581 (Integer division by constant is not vectorized)
[gcc.git] / gcc / expmed.c
1 /* Medium-level subroutines: convert bit-field store and extract
2 and shifts, multiplies and divides to rtl instructions.
3 Copyright (C) 1987, 1988, 1989, 1992, 1993, 1994, 1995, 1996, 1997, 1998,
4 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010,
5 2011
6 Free Software Foundation, Inc.
7
8 This file is part of GCC.
9
10 GCC is free software; you can redistribute it and/or modify it under
11 the terms of the GNU General Public License as published by the Free
12 Software Foundation; either version 3, or (at your option) any later
13 version.
14
15 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
16 WARRANTY; without even the implied warranty of MERCHANTABILITY or
17 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 for more details.
19
20 You should have received a copy of the GNU General Public License
21 along with GCC; see the file COPYING3. If not see
22 <http://www.gnu.org/licenses/>. */
23
24
25 #include "config.h"
26 #include "system.h"
27 #include "coretypes.h"
28 #include "tm.h"
29 #include "diagnostic-core.h"
30 #include "rtl.h"
31 #include "tree.h"
32 #include "tm_p.h"
33 #include "flags.h"
34 #include "insn-config.h"
35 #include "expr.h"
36 #include "optabs.h"
37 #include "recog.h"
38 #include "langhooks.h"
39 #include "df.h"
40 #include "target.h"
41 #include "expmed.h"
42
43 struct target_expmed default_target_expmed;
44 #if SWITCHABLE_TARGET
45 struct target_expmed *this_target_expmed = &default_target_expmed;
46 #endif
47
48 static void store_fixed_bit_field (rtx, unsigned HOST_WIDE_INT,
49 unsigned HOST_WIDE_INT,
50 unsigned HOST_WIDE_INT,
51 unsigned HOST_WIDE_INT,
52 unsigned HOST_WIDE_INT,
53 rtx);
54 static void store_split_bit_field (rtx, unsigned HOST_WIDE_INT,
55 unsigned HOST_WIDE_INT,
56 unsigned HOST_WIDE_INT,
57 unsigned HOST_WIDE_INT,
58 rtx);
59 static rtx extract_fixed_bit_field (enum machine_mode, rtx,
60 unsigned HOST_WIDE_INT,
61 unsigned HOST_WIDE_INT,
62 unsigned HOST_WIDE_INT, rtx, int, bool);
63 static rtx mask_rtx (enum machine_mode, int, int, int);
64 static rtx lshift_value (enum machine_mode, rtx, int, int);
65 static rtx extract_split_bit_field (rtx, unsigned HOST_WIDE_INT,
66 unsigned HOST_WIDE_INT, int);
67 static void do_cmp_and_jump (rtx, rtx, enum rtx_code, enum machine_mode, rtx);
68 static rtx expand_smod_pow2 (enum machine_mode, rtx, HOST_WIDE_INT);
69 static rtx expand_sdiv_pow2 (enum machine_mode, rtx, HOST_WIDE_INT);
70
71 /* Test whether a value is zero of a power of two. */
72 #define EXACT_POWER_OF_2_OR_ZERO_P(x) (((x) & ((x) - 1)) == 0)
73
74 #ifndef SLOW_UNALIGNED_ACCESS
75 #define SLOW_UNALIGNED_ACCESS(MODE, ALIGN) STRICT_ALIGNMENT
76 #endif
77
78
79 /* Reduce conditional compilation elsewhere. */
80 #ifndef HAVE_insv
81 #define HAVE_insv 0
82 #define CODE_FOR_insv CODE_FOR_nothing
83 #define gen_insv(a,b,c,d) NULL_RTX
84 #endif
85 #ifndef HAVE_extv
86 #define HAVE_extv 0
87 #define CODE_FOR_extv CODE_FOR_nothing
88 #define gen_extv(a,b,c,d) NULL_RTX
89 #endif
90 #ifndef HAVE_extzv
91 #define HAVE_extzv 0
92 #define CODE_FOR_extzv CODE_FOR_nothing
93 #define gen_extzv(a,b,c,d) NULL_RTX
94 #endif
95
96 void
97 init_expmed (void)
98 {
99 struct
100 {
101 struct rtx_def reg; rtunion reg_fld[2];
102 struct rtx_def plus; rtunion plus_fld1;
103 struct rtx_def neg;
104 struct rtx_def mult; rtunion mult_fld1;
105 struct rtx_def sdiv; rtunion sdiv_fld1;
106 struct rtx_def udiv; rtunion udiv_fld1;
107 struct rtx_def zext;
108 struct rtx_def sdiv_32; rtunion sdiv_32_fld1;
109 struct rtx_def smod_32; rtunion smod_32_fld1;
110 struct rtx_def wide_mult; rtunion wide_mult_fld1;
111 struct rtx_def wide_lshr; rtunion wide_lshr_fld1;
112 struct rtx_def wide_trunc;
113 struct rtx_def shift; rtunion shift_fld1;
114 struct rtx_def shift_mult; rtunion shift_mult_fld1;
115 struct rtx_def shift_add; rtunion shift_add_fld1;
116 struct rtx_def shift_sub0; rtunion shift_sub0_fld1;
117 struct rtx_def shift_sub1; rtunion shift_sub1_fld1;
118 } all;
119
120 rtx pow2[MAX_BITS_PER_WORD];
121 rtx cint[MAX_BITS_PER_WORD];
122 int m, n;
123 enum machine_mode mode, wider_mode;
124 int speed;
125
126
127 for (m = 1; m < MAX_BITS_PER_WORD; m++)
128 {
129 pow2[m] = GEN_INT ((HOST_WIDE_INT) 1 << m);
130 cint[m] = GEN_INT (m);
131 }
132 memset (&all, 0, sizeof all);
133
134 PUT_CODE (&all.reg, REG);
135 /* Avoid using hard regs in ways which may be unsupported. */
136 SET_REGNO (&all.reg, LAST_VIRTUAL_REGISTER + 1);
137
138 PUT_CODE (&all.plus, PLUS);
139 XEXP (&all.plus, 0) = &all.reg;
140 XEXP (&all.plus, 1) = &all.reg;
141
142 PUT_CODE (&all.neg, NEG);
143 XEXP (&all.neg, 0) = &all.reg;
144
145 PUT_CODE (&all.mult, MULT);
146 XEXP (&all.mult, 0) = &all.reg;
147 XEXP (&all.mult, 1) = &all.reg;
148
149 PUT_CODE (&all.sdiv, DIV);
150 XEXP (&all.sdiv, 0) = &all.reg;
151 XEXP (&all.sdiv, 1) = &all.reg;
152
153 PUT_CODE (&all.udiv, UDIV);
154 XEXP (&all.udiv, 0) = &all.reg;
155 XEXP (&all.udiv, 1) = &all.reg;
156
157 PUT_CODE (&all.sdiv_32, DIV);
158 XEXP (&all.sdiv_32, 0) = &all.reg;
159 XEXP (&all.sdiv_32, 1) = 32 < MAX_BITS_PER_WORD ? cint[32] : GEN_INT (32);
160
161 PUT_CODE (&all.smod_32, MOD);
162 XEXP (&all.smod_32, 0) = &all.reg;
163 XEXP (&all.smod_32, 1) = XEXP (&all.sdiv_32, 1);
164
165 PUT_CODE (&all.zext, ZERO_EXTEND);
166 XEXP (&all.zext, 0) = &all.reg;
167
168 PUT_CODE (&all.wide_mult, MULT);
169 XEXP (&all.wide_mult, 0) = &all.zext;
170 XEXP (&all.wide_mult, 1) = &all.zext;
171
172 PUT_CODE (&all.wide_lshr, LSHIFTRT);
173 XEXP (&all.wide_lshr, 0) = &all.wide_mult;
174
175 PUT_CODE (&all.wide_trunc, TRUNCATE);
176 XEXP (&all.wide_trunc, 0) = &all.wide_lshr;
177
178 PUT_CODE (&all.shift, ASHIFT);
179 XEXP (&all.shift, 0) = &all.reg;
180
181 PUT_CODE (&all.shift_mult, MULT);
182 XEXP (&all.shift_mult, 0) = &all.reg;
183
184 PUT_CODE (&all.shift_add, PLUS);
185 XEXP (&all.shift_add, 0) = &all.shift_mult;
186 XEXP (&all.shift_add, 1) = &all.reg;
187
188 PUT_CODE (&all.shift_sub0, MINUS);
189 XEXP (&all.shift_sub0, 0) = &all.shift_mult;
190 XEXP (&all.shift_sub0, 1) = &all.reg;
191
192 PUT_CODE (&all.shift_sub1, MINUS);
193 XEXP (&all.shift_sub1, 0) = &all.reg;
194 XEXP (&all.shift_sub1, 1) = &all.shift_mult;
195
196 for (speed = 0; speed < 2; speed++)
197 {
198 crtl->maybe_hot_insn_p = speed;
199 zero_cost[speed] = set_src_cost (const0_rtx, speed);
200
201 for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT);
202 mode != VOIDmode;
203 mode = GET_MODE_WIDER_MODE (mode))
204 {
205 PUT_MODE (&all.reg, mode);
206 PUT_MODE (&all.plus, mode);
207 PUT_MODE (&all.neg, mode);
208 PUT_MODE (&all.mult, mode);
209 PUT_MODE (&all.sdiv, mode);
210 PUT_MODE (&all.udiv, mode);
211 PUT_MODE (&all.sdiv_32, mode);
212 PUT_MODE (&all.smod_32, mode);
213 PUT_MODE (&all.wide_trunc, mode);
214 PUT_MODE (&all.shift, mode);
215 PUT_MODE (&all.shift_mult, mode);
216 PUT_MODE (&all.shift_add, mode);
217 PUT_MODE (&all.shift_sub0, mode);
218 PUT_MODE (&all.shift_sub1, mode);
219
220 add_cost[speed][mode] = set_src_cost (&all.plus, speed);
221 neg_cost[speed][mode] = set_src_cost (&all.neg, speed);
222 mul_cost[speed][mode] = set_src_cost (&all.mult, speed);
223 sdiv_cost[speed][mode] = set_src_cost (&all.sdiv, speed);
224 udiv_cost[speed][mode] = set_src_cost (&all.udiv, speed);
225
226 sdiv_pow2_cheap[speed][mode] = (set_src_cost (&all.sdiv_32, speed)
227 <= 2 * add_cost[speed][mode]);
228 smod_pow2_cheap[speed][mode] = (set_src_cost (&all.smod_32, speed)
229 <= 4 * add_cost[speed][mode]);
230
231 wider_mode = GET_MODE_WIDER_MODE (mode);
232 if (wider_mode != VOIDmode)
233 {
234 PUT_MODE (&all.zext, wider_mode);
235 PUT_MODE (&all.wide_mult, wider_mode);
236 PUT_MODE (&all.wide_lshr, wider_mode);
237 XEXP (&all.wide_lshr, 1) = GEN_INT (GET_MODE_BITSIZE (mode));
238
239 mul_widen_cost[speed][wider_mode]
240 = set_src_cost (&all.wide_mult, speed);
241 mul_highpart_cost[speed][mode]
242 = set_src_cost (&all.wide_trunc, speed);
243 }
244
245 shift_cost[speed][mode][0] = 0;
246 shiftadd_cost[speed][mode][0] = shiftsub0_cost[speed][mode][0]
247 = shiftsub1_cost[speed][mode][0] = add_cost[speed][mode];
248
249 n = MIN (MAX_BITS_PER_WORD, GET_MODE_BITSIZE (mode));
250 for (m = 1; m < n; m++)
251 {
252 XEXP (&all.shift, 1) = cint[m];
253 XEXP (&all.shift_mult, 1) = pow2[m];
254
255 shift_cost[speed][mode][m] = set_src_cost (&all.shift, speed);
256 shiftadd_cost[speed][mode][m] = set_src_cost (&all.shift_add,
257 speed);
258 shiftsub0_cost[speed][mode][m] = set_src_cost (&all.shift_sub0,
259 speed);
260 shiftsub1_cost[speed][mode][m] = set_src_cost (&all.shift_sub1,
261 speed);
262 }
263 }
264 }
265 if (alg_hash_used_p)
266 memset (alg_hash, 0, sizeof (alg_hash));
267 else
268 alg_hash_used_p = true;
269 default_rtl_profile ();
270 }
271
272 /* Return an rtx representing minus the value of X.
273 MODE is the intended mode of the result,
274 useful if X is a CONST_INT. */
275
276 rtx
277 negate_rtx (enum machine_mode mode, rtx x)
278 {
279 rtx result = simplify_unary_operation (NEG, mode, x, mode);
280
281 if (result == 0)
282 result = expand_unop (mode, neg_optab, x, NULL_RTX, 0);
283
284 return result;
285 }
286
287 /* Report on the availability of insv/extv/extzv and the desired mode
288 of each of their operands. Returns MAX_MACHINE_MODE if HAVE_foo
289 is false; else the mode of the specified operand. If OPNO is -1,
290 all the caller cares about is whether the insn is available. */
291 enum machine_mode
292 mode_for_extraction (enum extraction_pattern pattern, int opno)
293 {
294 const struct insn_data_d *data;
295
296 switch (pattern)
297 {
298 case EP_insv:
299 if (HAVE_insv)
300 {
301 data = &insn_data[CODE_FOR_insv];
302 break;
303 }
304 return MAX_MACHINE_MODE;
305
306 case EP_extv:
307 if (HAVE_extv)
308 {
309 data = &insn_data[CODE_FOR_extv];
310 break;
311 }
312 return MAX_MACHINE_MODE;
313
314 case EP_extzv:
315 if (HAVE_extzv)
316 {
317 data = &insn_data[CODE_FOR_extzv];
318 break;
319 }
320 return MAX_MACHINE_MODE;
321
322 default:
323 gcc_unreachable ();
324 }
325
326 if (opno == -1)
327 return VOIDmode;
328
329 /* Everyone who uses this function used to follow it with
330 if (result == VOIDmode) result = word_mode; */
331 if (data->operand[opno].mode == VOIDmode)
332 return word_mode;
333 return data->operand[opno].mode;
334 }
335 \f
336 /* A subroutine of store_bit_field, with the same arguments. Return true
337 if the operation could be implemented.
338
339 If FALLBACK_P is true, fall back to store_fixed_bit_field if we have
340 no other way of implementing the operation. If FALLBACK_P is false,
341 return false instead. */
342
343 static bool
344 store_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
345 unsigned HOST_WIDE_INT bitnum,
346 unsigned HOST_WIDE_INT bitregion_start,
347 unsigned HOST_WIDE_INT bitregion_end,
348 enum machine_mode fieldmode,
349 rtx value, bool fallback_p)
350 {
351 unsigned int unit
352 = (MEM_P (str_rtx)) ? BITS_PER_UNIT : BITS_PER_WORD;
353 unsigned HOST_WIDE_INT offset, bitpos;
354 rtx op0 = str_rtx;
355 int byte_offset;
356 rtx orig_value;
357
358 enum machine_mode op_mode = mode_for_extraction (EP_insv, 3);
359
360 while (GET_CODE (op0) == SUBREG)
361 {
362 /* The following line once was done only if WORDS_BIG_ENDIAN,
363 but I think that is a mistake. WORDS_BIG_ENDIAN is
364 meaningful at a much higher level; when structures are copied
365 between memory and regs, the higher-numbered regs
366 always get higher addresses. */
367 int inner_mode_size = GET_MODE_SIZE (GET_MODE (SUBREG_REG (op0)));
368 int outer_mode_size = GET_MODE_SIZE (GET_MODE (op0));
369
370 byte_offset = 0;
371
372 /* Paradoxical subregs need special handling on big endian machines. */
373 if (SUBREG_BYTE (op0) == 0 && inner_mode_size < outer_mode_size)
374 {
375 int difference = inner_mode_size - outer_mode_size;
376
377 if (WORDS_BIG_ENDIAN)
378 byte_offset += (difference / UNITS_PER_WORD) * UNITS_PER_WORD;
379 if (BYTES_BIG_ENDIAN)
380 byte_offset += difference % UNITS_PER_WORD;
381 }
382 else
383 byte_offset = SUBREG_BYTE (op0);
384
385 bitnum += byte_offset * BITS_PER_UNIT;
386 op0 = SUBREG_REG (op0);
387 }
388
389 /* No action is needed if the target is a register and if the field
390 lies completely outside that register. This can occur if the source
391 code contains an out-of-bounds access to a small array. */
392 if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
393 return true;
394
395 /* Use vec_set patterns for inserting parts of vectors whenever
396 available. */
397 if (VECTOR_MODE_P (GET_MODE (op0))
398 && !MEM_P (op0)
399 && optab_handler (vec_set_optab, GET_MODE (op0)) != CODE_FOR_nothing
400 && fieldmode == GET_MODE_INNER (GET_MODE (op0))
401 && bitsize == GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
402 && !(bitnum % GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
403 {
404 struct expand_operand ops[3];
405 enum machine_mode outermode = GET_MODE (op0);
406 enum machine_mode innermode = GET_MODE_INNER (outermode);
407 enum insn_code icode = optab_handler (vec_set_optab, outermode);
408 int pos = bitnum / GET_MODE_BITSIZE (innermode);
409
410 create_fixed_operand (&ops[0], op0);
411 create_input_operand (&ops[1], value, innermode);
412 create_integer_operand (&ops[2], pos);
413 if (maybe_expand_insn (icode, 3, ops))
414 return true;
415 }
416
417 /* If the target is a register, overwriting the entire object, or storing
418 a full-word or multi-word field can be done with just a SUBREG.
419
420 If the target is memory, storing any naturally aligned field can be
421 done with a simple store. For targets that support fast unaligned
422 memory, any naturally sized, unit aligned field can be done directly. */
423
424 offset = bitnum / unit;
425 bitpos = bitnum % unit;
426 byte_offset = (bitnum % BITS_PER_WORD) / BITS_PER_UNIT
427 + (offset * UNITS_PER_WORD);
428
429 if (bitpos == 0
430 && bitsize == GET_MODE_BITSIZE (fieldmode)
431 && (!MEM_P (op0)
432 ? ((GET_MODE_SIZE (fieldmode) >= UNITS_PER_WORD
433 || GET_MODE_SIZE (GET_MODE (op0)) == GET_MODE_SIZE (fieldmode))
434 && ((GET_MODE (op0) == fieldmode && byte_offset == 0)
435 || validate_subreg (fieldmode, GET_MODE (op0), op0,
436 byte_offset)))
437 : (! SLOW_UNALIGNED_ACCESS (fieldmode, MEM_ALIGN (op0))
438 || (offset * BITS_PER_UNIT % bitsize == 0
439 && MEM_ALIGN (op0) % GET_MODE_BITSIZE (fieldmode) == 0))))
440 {
441 if (MEM_P (op0))
442 op0 = adjust_address (op0, fieldmode, offset);
443 else if (GET_MODE (op0) != fieldmode)
444 op0 = simplify_gen_subreg (fieldmode, op0, GET_MODE (op0),
445 byte_offset);
446 emit_move_insn (op0, value);
447 return true;
448 }
449
450 /* Make sure we are playing with integral modes. Pun with subregs
451 if we aren't. This must come after the entire register case above,
452 since that case is valid for any mode. The following cases are only
453 valid for integral modes. */
454 {
455 enum machine_mode imode = int_mode_for_mode (GET_MODE (op0));
456 if (imode != GET_MODE (op0))
457 {
458 if (MEM_P (op0))
459 op0 = adjust_address (op0, imode, 0);
460 else
461 {
462 gcc_assert (imode != BLKmode);
463 op0 = gen_lowpart (imode, op0);
464 }
465 }
466 }
467
468 /* We may be accessing data outside the field, which means
469 we can alias adjacent data. */
470 /* ?? not always for C++0x memory model ?? */
471 if (MEM_P (op0))
472 {
473 op0 = shallow_copy_rtx (op0);
474 set_mem_alias_set (op0, 0);
475 set_mem_expr (op0, 0);
476 }
477
478 /* If OP0 is a register, BITPOS must count within a word.
479 But as we have it, it counts within whatever size OP0 now has.
480 On a bigendian machine, these are not the same, so convert. */
481 if (BYTES_BIG_ENDIAN
482 && !MEM_P (op0)
483 && unit > GET_MODE_BITSIZE (GET_MODE (op0)))
484 bitpos += unit - GET_MODE_BITSIZE (GET_MODE (op0));
485
486 /* Storing an lsb-aligned field in a register
487 can be done with a movestrict instruction. */
488
489 if (!MEM_P (op0)
490 && (BYTES_BIG_ENDIAN ? bitpos + bitsize == unit : bitpos == 0)
491 && bitsize == GET_MODE_BITSIZE (fieldmode)
492 && optab_handler (movstrict_optab, fieldmode) != CODE_FOR_nothing)
493 {
494 struct expand_operand ops[2];
495 enum insn_code icode = optab_handler (movstrict_optab, fieldmode);
496 rtx arg0 = op0;
497 unsigned HOST_WIDE_INT subreg_off;
498
499 if (GET_CODE (arg0) == SUBREG)
500 {
501 /* Else we've got some float mode source being extracted into
502 a different float mode destination -- this combination of
503 subregs results in Severe Tire Damage. */
504 gcc_assert (GET_MODE (SUBREG_REG (arg0)) == fieldmode
505 || GET_MODE_CLASS (fieldmode) == MODE_INT
506 || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT);
507 arg0 = SUBREG_REG (arg0);
508 }
509
510 subreg_off = (bitnum % BITS_PER_WORD) / BITS_PER_UNIT
511 + (offset * UNITS_PER_WORD);
512 if (validate_subreg (fieldmode, GET_MODE (arg0), arg0, subreg_off))
513 {
514 arg0 = gen_rtx_SUBREG (fieldmode, arg0, subreg_off);
515
516 create_fixed_operand (&ops[0], arg0);
517 /* Shrink the source operand to FIELDMODE. */
518 create_convert_operand_to (&ops[1], value, fieldmode, false);
519 if (maybe_expand_insn (icode, 2, ops))
520 return true;
521 }
522 }
523
524 /* Handle fields bigger than a word. */
525
526 if (bitsize > BITS_PER_WORD)
527 {
528 /* Here we transfer the words of the field
529 in the order least significant first.
530 This is because the most significant word is the one which may
531 be less than full.
532 However, only do that if the value is not BLKmode. */
533
534 unsigned int backwards = WORDS_BIG_ENDIAN && fieldmode != BLKmode;
535 unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
536 unsigned int i;
537 rtx last;
538
539 /* This is the mode we must force value to, so that there will be enough
540 subwords to extract. Note that fieldmode will often (always?) be
541 VOIDmode, because that is what store_field uses to indicate that this
542 is a bit field, but passing VOIDmode to operand_subword_force
543 is not allowed. */
544 fieldmode = GET_MODE (value);
545 if (fieldmode == VOIDmode)
546 fieldmode = smallest_mode_for_size (nwords * BITS_PER_WORD, MODE_INT);
547
548 last = get_last_insn ();
549 for (i = 0; i < nwords; i++)
550 {
551 /* If I is 0, use the low-order word in both field and target;
552 if I is 1, use the next to lowest word; and so on. */
553 unsigned int wordnum = (backwards
554 ? GET_MODE_SIZE (fieldmode) / UNITS_PER_WORD
555 - i - 1
556 : i);
557 unsigned int bit_offset = (backwards
558 ? MAX ((int) bitsize - ((int) i + 1)
559 * BITS_PER_WORD,
560 0)
561 : (int) i * BITS_PER_WORD);
562 rtx value_word = operand_subword_force (value, wordnum, fieldmode);
563 unsigned HOST_WIDE_INT new_bitsize =
564 MIN (BITS_PER_WORD, bitsize - i * BITS_PER_WORD);
565
566 /* If the remaining chunk doesn't have full wordsize we have
567 to make sure that for big endian machines the higher order
568 bits are used. */
569 if (new_bitsize < BITS_PER_WORD && BYTES_BIG_ENDIAN && !backwards)
570 value_word = simplify_expand_binop (word_mode, lshr_optab,
571 value_word,
572 GEN_INT (BITS_PER_WORD
573 - new_bitsize),
574 NULL_RTX, true,
575 OPTAB_LIB_WIDEN);
576
577 if (!store_bit_field_1 (op0, new_bitsize,
578 bitnum + bit_offset,
579 bitregion_start, bitregion_end,
580 word_mode,
581 value_word, fallback_p))
582 {
583 delete_insns_since (last);
584 return false;
585 }
586 }
587 return true;
588 }
589
590 /* From here on we can assume that the field to be stored in is
591 a full-word (whatever type that is), since it is shorter than a word. */
592
593 /* OFFSET is the number of words or bytes (UNIT says which)
594 from STR_RTX to the first word or byte containing part of the field. */
595
596 if (!MEM_P (op0))
597 {
598 if (offset != 0
599 || GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
600 {
601 if (!REG_P (op0))
602 {
603 /* Since this is a destination (lvalue), we can't copy
604 it to a pseudo. We can remove a SUBREG that does not
605 change the size of the operand. Such a SUBREG may
606 have been added above. */
607 gcc_assert (GET_CODE (op0) == SUBREG
608 && (GET_MODE_SIZE (GET_MODE (op0))
609 == GET_MODE_SIZE (GET_MODE (SUBREG_REG (op0)))));
610 op0 = SUBREG_REG (op0);
611 }
612 op0 = gen_rtx_SUBREG (mode_for_size (BITS_PER_WORD, MODE_INT, 0),
613 op0, (offset * UNITS_PER_WORD));
614 }
615 offset = 0;
616 }
617
618 /* If VALUE has a floating-point or complex mode, access it as an
619 integer of the corresponding size. This can occur on a machine
620 with 64 bit registers that uses SFmode for float. It can also
621 occur for unaligned float or complex fields. */
622 orig_value = value;
623 if (GET_MODE (value) != VOIDmode
624 && GET_MODE_CLASS (GET_MODE (value)) != MODE_INT
625 && GET_MODE_CLASS (GET_MODE (value)) != MODE_PARTIAL_INT)
626 {
627 value = gen_reg_rtx (int_mode_for_mode (GET_MODE (value)));
628 emit_move_insn (gen_lowpart (GET_MODE (orig_value), value), orig_value);
629 }
630
631 /* Now OFFSET is nonzero only if OP0 is memory
632 and is therefore always measured in bytes. */
633
634 if (HAVE_insv
635 && GET_MODE (value) != BLKmode
636 && bitsize > 0
637 && GET_MODE_BITSIZE (op_mode) >= bitsize
638 /* Do not use insv for volatile bitfields when
639 -fstrict-volatile-bitfields is in effect. */
640 && !(MEM_P (op0) && MEM_VOLATILE_P (op0)
641 && flag_strict_volatile_bitfields > 0)
642 && ! ((REG_P (op0) || GET_CODE (op0) == SUBREG)
643 && (bitsize + bitpos > GET_MODE_BITSIZE (op_mode)))
644 /* Do not use insv if the bit region is restricted and
645 op_mode integer at offset doesn't fit into the
646 restricted region. */
647 && !(MEM_P (op0) && bitregion_end
648 && bitnum - bitpos + GET_MODE_BITSIZE (op_mode)
649 > bitregion_end + 1))
650 {
651 struct expand_operand ops[4];
652 int xbitpos = bitpos;
653 rtx value1;
654 rtx xop0 = op0;
655 rtx last = get_last_insn ();
656 bool copy_back = false;
657
658 /* Add OFFSET into OP0's address. */
659 if (MEM_P (xop0))
660 xop0 = adjust_address (xop0, byte_mode, offset);
661
662 /* If xop0 is a register, we need it in OP_MODE
663 to make it acceptable to the format of insv. */
664 if (GET_CODE (xop0) == SUBREG)
665 /* We can't just change the mode, because this might clobber op0,
666 and we will need the original value of op0 if insv fails. */
667 xop0 = gen_rtx_SUBREG (op_mode, SUBREG_REG (xop0), SUBREG_BYTE (xop0));
668 if (REG_P (xop0) && GET_MODE (xop0) != op_mode)
669 xop0 = gen_lowpart_SUBREG (op_mode, xop0);
670
671 /* If the destination is a paradoxical subreg such that we need a
672 truncate to the inner mode, perform the insertion on a temporary and
673 truncate the result to the original destination. Note that we can't
674 just truncate the paradoxical subreg as (truncate:N (subreg:W (reg:N
675 X) 0)) is (reg:N X). */
676 if (GET_CODE (xop0) == SUBREG
677 && REG_P (SUBREG_REG (xop0))
678 && (!TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (SUBREG_REG (xop0)),
679 op_mode)))
680 {
681 rtx tem = gen_reg_rtx (op_mode);
682 emit_move_insn (tem, xop0);
683 xop0 = tem;
684 copy_back = true;
685 }
686
687 /* We have been counting XBITPOS within UNIT.
688 Count instead within the size of the register. */
689 if (BYTES_BIG_ENDIAN && !MEM_P (xop0))
690 xbitpos += GET_MODE_BITSIZE (op_mode) - unit;
691
692 unit = GET_MODE_BITSIZE (op_mode);
693
694 /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
695 "backwards" from the size of the unit we are inserting into.
696 Otherwise, we count bits from the most significant on a
697 BYTES/BITS_BIG_ENDIAN machine. */
698
699 if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
700 xbitpos = unit - bitsize - xbitpos;
701
702 /* Convert VALUE to op_mode (which insv insn wants) in VALUE1. */
703 value1 = value;
704 if (GET_MODE (value) != op_mode)
705 {
706 if (GET_MODE_BITSIZE (GET_MODE (value)) >= bitsize)
707 {
708 /* Optimization: Don't bother really extending VALUE
709 if it has all the bits we will actually use. However,
710 if we must narrow it, be sure we do it correctly. */
711
712 if (GET_MODE_SIZE (GET_MODE (value)) < GET_MODE_SIZE (op_mode))
713 {
714 rtx tmp;
715
716 tmp = simplify_subreg (op_mode, value1, GET_MODE (value), 0);
717 if (! tmp)
718 tmp = simplify_gen_subreg (op_mode,
719 force_reg (GET_MODE (value),
720 value1),
721 GET_MODE (value), 0);
722 value1 = tmp;
723 }
724 else
725 value1 = gen_lowpart (op_mode, value1);
726 }
727 else if (CONST_INT_P (value))
728 value1 = gen_int_mode (INTVAL (value), op_mode);
729 else
730 /* Parse phase is supposed to make VALUE's data type
731 match that of the component reference, which is a type
732 at least as wide as the field; so VALUE should have
733 a mode that corresponds to that type. */
734 gcc_assert (CONSTANT_P (value));
735 }
736
737 create_fixed_operand (&ops[0], xop0);
738 create_integer_operand (&ops[1], bitsize);
739 create_integer_operand (&ops[2], xbitpos);
740 create_input_operand (&ops[3], value1, op_mode);
741 if (maybe_expand_insn (CODE_FOR_insv, 4, ops))
742 {
743 if (copy_back)
744 convert_move (op0, xop0, true);
745 return true;
746 }
747 delete_insns_since (last);
748 }
749
750 /* If OP0 is a memory, try copying it to a register and seeing if a
751 cheap register alternative is available. */
752 if (HAVE_insv && MEM_P (op0))
753 {
754 enum machine_mode bestmode;
755 unsigned HOST_WIDE_INT maxbits = MAX_FIXED_MODE_SIZE;
756
757 if (bitregion_end)
758 maxbits = bitregion_end - bitregion_start + 1;
759
760 /* Get the mode to use for inserting into this field. If OP0 is
761 BLKmode, get the smallest mode consistent with the alignment. If
762 OP0 is a non-BLKmode object that is no wider than OP_MODE, use its
763 mode. Otherwise, use the smallest mode containing the field. */
764
765 if (GET_MODE (op0) == BLKmode
766 || GET_MODE_BITSIZE (GET_MODE (op0)) > maxbits
767 || (op_mode != MAX_MACHINE_MODE
768 && GET_MODE_SIZE (GET_MODE (op0)) > GET_MODE_SIZE (op_mode)))
769 bestmode = get_best_mode (bitsize, bitnum,
770 bitregion_start, bitregion_end,
771 MEM_ALIGN (op0),
772 (op_mode == MAX_MACHINE_MODE
773 ? VOIDmode : op_mode),
774 MEM_VOLATILE_P (op0));
775 else
776 bestmode = GET_MODE (op0);
777
778 if (bestmode != VOIDmode
779 && GET_MODE_SIZE (bestmode) >= GET_MODE_SIZE (fieldmode)
780 && !(SLOW_UNALIGNED_ACCESS (bestmode, MEM_ALIGN (op0))
781 && GET_MODE_BITSIZE (bestmode) > MEM_ALIGN (op0)))
782 {
783 rtx last, tempreg, xop0;
784 unsigned HOST_WIDE_INT xoffset, xbitpos;
785
786 last = get_last_insn ();
787
788 /* Adjust address to point to the containing unit of
789 that mode. Compute the offset as a multiple of this unit,
790 counting in bytes. */
791 unit = GET_MODE_BITSIZE (bestmode);
792 xoffset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
793 xbitpos = bitnum % unit;
794 xop0 = adjust_address (op0, bestmode, xoffset);
795
796 /* Fetch that unit, store the bitfield in it, then store
797 the unit. */
798 tempreg = copy_to_reg (xop0);
799 if (store_bit_field_1 (tempreg, bitsize, xbitpos,
800 bitregion_start, bitregion_end,
801 fieldmode, orig_value, false))
802 {
803 emit_move_insn (xop0, tempreg);
804 return true;
805 }
806 delete_insns_since (last);
807 }
808 }
809
810 if (!fallback_p)
811 return false;
812
813 store_fixed_bit_field (op0, offset, bitsize, bitpos,
814 bitregion_start, bitregion_end, value);
815 return true;
816 }
817
818 /* Generate code to store value from rtx VALUE
819 into a bit-field within structure STR_RTX
820 containing BITSIZE bits starting at bit BITNUM.
821
822 BITREGION_START is bitpos of the first bitfield in this region.
823 BITREGION_END is the bitpos of the ending bitfield in this region.
824 These two fields are 0, if the C++ memory model does not apply,
825 or we are not interested in keeping track of bitfield regions.
826
827 FIELDMODE is the machine-mode of the FIELD_DECL node for this field. */
828
829 void
830 store_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
831 unsigned HOST_WIDE_INT bitnum,
832 unsigned HOST_WIDE_INT bitregion_start,
833 unsigned HOST_WIDE_INT bitregion_end,
834 enum machine_mode fieldmode,
835 rtx value)
836 {
837 /* Under the C++0x memory model, we must not touch bits outside the
838 bit region. Adjust the address to start at the beginning of the
839 bit region. */
840 if (MEM_P (str_rtx) && bitregion_start > 0)
841 {
842 enum machine_mode bestmode;
843 enum machine_mode op_mode;
844 unsigned HOST_WIDE_INT offset;
845
846 op_mode = mode_for_extraction (EP_insv, 3);
847 if (op_mode == MAX_MACHINE_MODE)
848 op_mode = VOIDmode;
849
850 gcc_assert ((bitregion_start % BITS_PER_UNIT) == 0);
851
852 offset = bitregion_start / BITS_PER_UNIT;
853 bitnum -= bitregion_start;
854 bitregion_end -= bitregion_start;
855 bitregion_start = 0;
856 bestmode = get_best_mode (bitsize, bitnum,
857 bitregion_start, bitregion_end,
858 MEM_ALIGN (str_rtx),
859 op_mode,
860 MEM_VOLATILE_P (str_rtx));
861 str_rtx = adjust_address (str_rtx, bestmode, offset);
862 }
863
864 if (!store_bit_field_1 (str_rtx, bitsize, bitnum,
865 bitregion_start, bitregion_end,
866 fieldmode, value, true))
867 gcc_unreachable ();
868 }
869 \f
870 /* Use shifts and boolean operations to store VALUE
871 into a bit field of width BITSIZE
872 in a memory location specified by OP0 except offset by OFFSET bytes.
873 (OFFSET must be 0 if OP0 is a register.)
874 The field starts at position BITPOS within the byte.
875 (If OP0 is a register, it may be a full word or a narrower mode,
876 but BITPOS still counts within a full word,
877 which is significant on bigendian machines.) */
878
879 static void
880 store_fixed_bit_field (rtx op0, unsigned HOST_WIDE_INT offset,
881 unsigned HOST_WIDE_INT bitsize,
882 unsigned HOST_WIDE_INT bitpos,
883 unsigned HOST_WIDE_INT bitregion_start,
884 unsigned HOST_WIDE_INT bitregion_end,
885 rtx value)
886 {
887 enum machine_mode mode;
888 unsigned int total_bits = BITS_PER_WORD;
889 rtx temp;
890 int all_zero = 0;
891 int all_one = 0;
892
893 /* There is a case not handled here:
894 a structure with a known alignment of just a halfword
895 and a field split across two aligned halfwords within the structure.
896 Or likewise a structure with a known alignment of just a byte
897 and a field split across two bytes.
898 Such cases are not supposed to be able to occur. */
899
900 if (REG_P (op0) || GET_CODE (op0) == SUBREG)
901 {
902 gcc_assert (!offset);
903 /* Special treatment for a bit field split across two registers. */
904 if (bitsize + bitpos > BITS_PER_WORD)
905 {
906 store_split_bit_field (op0, bitsize, bitpos,
907 bitregion_start, bitregion_end,
908 value);
909 return;
910 }
911 }
912 else
913 {
914 unsigned HOST_WIDE_INT maxbits = MAX_FIXED_MODE_SIZE;
915
916 if (bitregion_end)
917 maxbits = bitregion_end - bitregion_start + 1;
918
919 /* Get the proper mode to use for this field. We want a mode that
920 includes the entire field. If such a mode would be larger than
921 a word, we won't be doing the extraction the normal way.
922 We don't want a mode bigger than the destination. */
923
924 mode = GET_MODE (op0);
925 if (GET_MODE_BITSIZE (mode) == 0
926 || GET_MODE_BITSIZE (mode) > GET_MODE_BITSIZE (word_mode))
927 mode = word_mode;
928
929 if (MEM_VOLATILE_P (op0)
930 && GET_MODE_BITSIZE (GET_MODE (op0)) > 0
931 && GET_MODE_BITSIZE (GET_MODE (op0)) <= maxbits
932 && flag_strict_volatile_bitfields > 0)
933 mode = GET_MODE (op0);
934 else
935 mode = get_best_mode (bitsize, bitpos + offset * BITS_PER_UNIT,
936 bitregion_start, bitregion_end,
937 MEM_ALIGN (op0), mode, MEM_VOLATILE_P (op0));
938
939 if (mode == VOIDmode)
940 {
941 /* The only way this should occur is if the field spans word
942 boundaries. */
943 store_split_bit_field (op0, bitsize, bitpos + offset * BITS_PER_UNIT,
944 bitregion_start, bitregion_end, value);
945 return;
946 }
947
948 total_bits = GET_MODE_BITSIZE (mode);
949
950 /* Make sure bitpos is valid for the chosen mode. Adjust BITPOS to
951 be in the range 0 to total_bits-1, and put any excess bytes in
952 OFFSET. */
953 if (bitpos >= total_bits)
954 {
955 offset += (bitpos / total_bits) * (total_bits / BITS_PER_UNIT);
956 bitpos -= ((bitpos / total_bits) * (total_bits / BITS_PER_UNIT)
957 * BITS_PER_UNIT);
958 }
959
960 /* Get ref to an aligned byte, halfword, or word containing the field.
961 Adjust BITPOS to be position within a word,
962 and OFFSET to be the offset of that word.
963 Then alter OP0 to refer to that word. */
964 bitpos += (offset % (total_bits / BITS_PER_UNIT)) * BITS_PER_UNIT;
965 offset -= (offset % (total_bits / BITS_PER_UNIT));
966 op0 = adjust_address (op0, mode, offset);
967 }
968
969 mode = GET_MODE (op0);
970
971 /* Now MODE is either some integral mode for a MEM as OP0,
972 or is a full-word for a REG as OP0. TOTAL_BITS corresponds.
973 The bit field is contained entirely within OP0.
974 BITPOS is the starting bit number within OP0.
975 (OP0's mode may actually be narrower than MODE.) */
976
977 if (BYTES_BIG_ENDIAN)
978 /* BITPOS is the distance between our msb
979 and that of the containing datum.
980 Convert it to the distance from the lsb. */
981 bitpos = total_bits - bitsize - bitpos;
982
983 /* Now BITPOS is always the distance between our lsb
984 and that of OP0. */
985
986 /* Shift VALUE left by BITPOS bits. If VALUE is not constant,
987 we must first convert its mode to MODE. */
988
989 if (CONST_INT_P (value))
990 {
991 HOST_WIDE_INT v = INTVAL (value);
992
993 if (bitsize < HOST_BITS_PER_WIDE_INT)
994 v &= ((HOST_WIDE_INT) 1 << bitsize) - 1;
995
996 if (v == 0)
997 all_zero = 1;
998 else if ((bitsize < HOST_BITS_PER_WIDE_INT
999 && v == ((HOST_WIDE_INT) 1 << bitsize) - 1)
1000 || (bitsize == HOST_BITS_PER_WIDE_INT && v == -1))
1001 all_one = 1;
1002
1003 value = lshift_value (mode, value, bitpos, bitsize);
1004 }
1005 else
1006 {
1007 int must_and = (GET_MODE_BITSIZE (GET_MODE (value)) != bitsize
1008 && bitpos + bitsize != GET_MODE_BITSIZE (mode));
1009
1010 if (GET_MODE (value) != mode)
1011 value = convert_to_mode (mode, value, 1);
1012
1013 if (must_and)
1014 value = expand_binop (mode, and_optab, value,
1015 mask_rtx (mode, 0, bitsize, 0),
1016 NULL_RTX, 1, OPTAB_LIB_WIDEN);
1017 if (bitpos > 0)
1018 value = expand_shift (LSHIFT_EXPR, mode, value,
1019 bitpos, NULL_RTX, 1);
1020 }
1021
1022 /* Now clear the chosen bits in OP0,
1023 except that if VALUE is -1 we need not bother. */
1024 /* We keep the intermediates in registers to allow CSE to combine
1025 consecutive bitfield assignments. */
1026
1027 temp = force_reg (mode, op0);
1028
1029 if (! all_one)
1030 {
1031 temp = expand_binop (mode, and_optab, temp,
1032 mask_rtx (mode, bitpos, bitsize, 1),
1033 NULL_RTX, 1, OPTAB_LIB_WIDEN);
1034 temp = force_reg (mode, temp);
1035 }
1036
1037 /* Now logical-or VALUE into OP0, unless it is zero. */
1038
1039 if (! all_zero)
1040 {
1041 temp = expand_binop (mode, ior_optab, temp, value,
1042 NULL_RTX, 1, OPTAB_LIB_WIDEN);
1043 temp = force_reg (mode, temp);
1044 }
1045
1046 if (op0 != temp)
1047 {
1048 op0 = copy_rtx (op0);
1049 emit_move_insn (op0, temp);
1050 }
1051 }
1052 \f
1053 /* Store a bit field that is split across multiple accessible memory objects.
1054
1055 OP0 is the REG, SUBREG or MEM rtx for the first of the objects.
1056 BITSIZE is the field width; BITPOS the position of its first bit
1057 (within the word).
1058 VALUE is the value to store.
1059
1060 This does not yet handle fields wider than BITS_PER_WORD. */
1061
1062 static void
1063 store_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1064 unsigned HOST_WIDE_INT bitpos,
1065 unsigned HOST_WIDE_INT bitregion_start,
1066 unsigned HOST_WIDE_INT bitregion_end,
1067 rtx value)
1068 {
1069 unsigned int unit;
1070 unsigned int bitsdone = 0;
1071
1072 /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1073 much at a time. */
1074 if (REG_P (op0) || GET_CODE (op0) == SUBREG)
1075 unit = BITS_PER_WORD;
1076 else
1077 unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1078
1079 /* If VALUE is a constant other than a CONST_INT, get it into a register in
1080 WORD_MODE. If we can do this using gen_lowpart_common, do so. Note
1081 that VALUE might be a floating-point constant. */
1082 if (CONSTANT_P (value) && !CONST_INT_P (value))
1083 {
1084 rtx word = gen_lowpart_common (word_mode, value);
1085
1086 if (word && (value != word))
1087 value = word;
1088 else
1089 value = gen_lowpart_common (word_mode,
1090 force_reg (GET_MODE (value) != VOIDmode
1091 ? GET_MODE (value)
1092 : word_mode, value));
1093 }
1094
1095 while (bitsdone < bitsize)
1096 {
1097 unsigned HOST_WIDE_INT thissize;
1098 rtx part, word;
1099 unsigned HOST_WIDE_INT thispos;
1100 unsigned HOST_WIDE_INT offset;
1101
1102 offset = (bitpos + bitsdone) / unit;
1103 thispos = (bitpos + bitsdone) % unit;
1104
1105 /* When region of bytes we can touch is restricted, decrease
1106 UNIT close to the end of the region as needed. */
1107 if (bitregion_end
1108 && unit > BITS_PER_UNIT
1109 && bitpos + bitsdone - thispos + unit > bitregion_end + 1)
1110 {
1111 unit = unit / 2;
1112 continue;
1113 }
1114
1115 /* THISSIZE must not overrun a word boundary. Otherwise,
1116 store_fixed_bit_field will call us again, and we will mutually
1117 recurse forever. */
1118 thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1119 thissize = MIN (thissize, unit - thispos);
1120
1121 if (BYTES_BIG_ENDIAN)
1122 {
1123 int total_bits;
1124
1125 /* We must do an endian conversion exactly the same way as it is
1126 done in extract_bit_field, so that the two calls to
1127 extract_fixed_bit_field will have comparable arguments. */
1128 if (!MEM_P (value) || GET_MODE (value) == BLKmode)
1129 total_bits = BITS_PER_WORD;
1130 else
1131 total_bits = GET_MODE_BITSIZE (GET_MODE (value));
1132
1133 /* Fetch successively less significant portions. */
1134 if (CONST_INT_P (value))
1135 part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1136 >> (bitsize - bitsdone - thissize))
1137 & (((HOST_WIDE_INT) 1 << thissize) - 1));
1138 else
1139 /* The args are chosen so that the last part includes the
1140 lsb. Give extract_bit_field the value it needs (with
1141 endianness compensation) to fetch the piece we want. */
1142 part = extract_fixed_bit_field (word_mode, value, 0, thissize,
1143 total_bits - bitsize + bitsdone,
1144 NULL_RTX, 1, false);
1145 }
1146 else
1147 {
1148 /* Fetch successively more significant portions. */
1149 if (CONST_INT_P (value))
1150 part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1151 >> bitsdone)
1152 & (((HOST_WIDE_INT) 1 << thissize) - 1));
1153 else
1154 part = extract_fixed_bit_field (word_mode, value, 0, thissize,
1155 bitsdone, NULL_RTX, 1, false);
1156 }
1157
1158 /* If OP0 is a register, then handle OFFSET here.
1159
1160 When handling multiword bitfields, extract_bit_field may pass
1161 down a word_mode SUBREG of a larger REG for a bitfield that actually
1162 crosses a word boundary. Thus, for a SUBREG, we must find
1163 the current word starting from the base register. */
1164 if (GET_CODE (op0) == SUBREG)
1165 {
1166 int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset;
1167 enum machine_mode sub_mode = GET_MODE (SUBREG_REG (op0));
1168 if (sub_mode != BLKmode && GET_MODE_SIZE (sub_mode) < UNITS_PER_WORD)
1169 word = word_offset ? const0_rtx : op0;
1170 else
1171 word = operand_subword_force (SUBREG_REG (op0), word_offset,
1172 GET_MODE (SUBREG_REG (op0)));
1173 offset = 0;
1174 }
1175 else if (REG_P (op0))
1176 {
1177 enum machine_mode op0_mode = GET_MODE (op0);
1178 if (op0_mode != BLKmode && GET_MODE_SIZE (op0_mode) < UNITS_PER_WORD)
1179 word = offset ? const0_rtx : op0;
1180 else
1181 word = operand_subword_force (op0, offset, GET_MODE (op0));
1182 offset = 0;
1183 }
1184 else
1185 word = op0;
1186
1187 /* OFFSET is in UNITs, and UNIT is in bits.
1188 store_fixed_bit_field wants offset in bytes. If WORD is const0_rtx,
1189 it is just an out-of-bounds access. Ignore it. */
1190 if (word != const0_rtx)
1191 store_fixed_bit_field (word, offset * unit / BITS_PER_UNIT, thissize,
1192 thispos, bitregion_start, bitregion_end, part);
1193 bitsdone += thissize;
1194 }
1195 }
1196 \f
1197 /* A subroutine of extract_bit_field_1 that converts return value X
1198 to either MODE or TMODE. MODE, TMODE and UNSIGNEDP are arguments
1199 to extract_bit_field. */
1200
1201 static rtx
1202 convert_extracted_bit_field (rtx x, enum machine_mode mode,
1203 enum machine_mode tmode, bool unsignedp)
1204 {
1205 if (GET_MODE (x) == tmode || GET_MODE (x) == mode)
1206 return x;
1207
1208 /* If the x mode is not a scalar integral, first convert to the
1209 integer mode of that size and then access it as a floating-point
1210 value via a SUBREG. */
1211 if (!SCALAR_INT_MODE_P (tmode))
1212 {
1213 enum machine_mode smode;
1214
1215 smode = mode_for_size (GET_MODE_BITSIZE (tmode), MODE_INT, 0);
1216 x = convert_to_mode (smode, x, unsignedp);
1217 x = force_reg (smode, x);
1218 return gen_lowpart (tmode, x);
1219 }
1220
1221 return convert_to_mode (tmode, x, unsignedp);
1222 }
1223
1224 /* A subroutine of extract_bit_field, with the same arguments.
1225 If FALLBACK_P is true, fall back to extract_fixed_bit_field
1226 if we can find no other means of implementing the operation.
1227 if FALLBACK_P is false, return NULL instead. */
1228
1229 static rtx
1230 extract_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1231 unsigned HOST_WIDE_INT bitnum,
1232 int unsignedp, bool packedp, rtx target,
1233 enum machine_mode mode, enum machine_mode tmode,
1234 bool fallback_p)
1235 {
1236 unsigned int unit
1237 = (MEM_P (str_rtx)) ? BITS_PER_UNIT : BITS_PER_WORD;
1238 unsigned HOST_WIDE_INT offset, bitpos;
1239 rtx op0 = str_rtx;
1240 enum machine_mode int_mode;
1241 enum machine_mode ext_mode;
1242 enum machine_mode mode1;
1243 int byte_offset;
1244
1245 if (tmode == VOIDmode)
1246 tmode = mode;
1247
1248 while (GET_CODE (op0) == SUBREG)
1249 {
1250 bitnum += SUBREG_BYTE (op0) * BITS_PER_UNIT;
1251 op0 = SUBREG_REG (op0);
1252 }
1253
1254 /* If we have an out-of-bounds access to a register, just return an
1255 uninitialized register of the required mode. This can occur if the
1256 source code contains an out-of-bounds access to a small array. */
1257 if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
1258 return gen_reg_rtx (tmode);
1259
1260 if (REG_P (op0)
1261 && mode == GET_MODE (op0)
1262 && bitnum == 0
1263 && bitsize == GET_MODE_BITSIZE (GET_MODE (op0)))
1264 {
1265 /* We're trying to extract a full register from itself. */
1266 return op0;
1267 }
1268
1269 /* See if we can get a better vector mode before extracting. */
1270 if (VECTOR_MODE_P (GET_MODE (op0))
1271 && !MEM_P (op0)
1272 && GET_MODE_INNER (GET_MODE (op0)) != tmode)
1273 {
1274 enum machine_mode new_mode;
1275
1276 if (GET_MODE_CLASS (tmode) == MODE_FLOAT)
1277 new_mode = MIN_MODE_VECTOR_FLOAT;
1278 else if (GET_MODE_CLASS (tmode) == MODE_FRACT)
1279 new_mode = MIN_MODE_VECTOR_FRACT;
1280 else if (GET_MODE_CLASS (tmode) == MODE_UFRACT)
1281 new_mode = MIN_MODE_VECTOR_UFRACT;
1282 else if (GET_MODE_CLASS (tmode) == MODE_ACCUM)
1283 new_mode = MIN_MODE_VECTOR_ACCUM;
1284 else if (GET_MODE_CLASS (tmode) == MODE_UACCUM)
1285 new_mode = MIN_MODE_VECTOR_UACCUM;
1286 else
1287 new_mode = MIN_MODE_VECTOR_INT;
1288
1289 for (; new_mode != VOIDmode ; new_mode = GET_MODE_WIDER_MODE (new_mode))
1290 if (GET_MODE_SIZE (new_mode) == GET_MODE_SIZE (GET_MODE (op0))
1291 && targetm.vector_mode_supported_p (new_mode))
1292 break;
1293 if (new_mode != VOIDmode)
1294 op0 = gen_lowpart (new_mode, op0);
1295 }
1296
1297 /* Use vec_extract patterns for extracting parts of vectors whenever
1298 available. */
1299 if (VECTOR_MODE_P (GET_MODE (op0))
1300 && !MEM_P (op0)
1301 && optab_handler (vec_extract_optab, GET_MODE (op0)) != CODE_FOR_nothing
1302 && ((bitnum + bitsize - 1) / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
1303 == bitnum / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
1304 {
1305 struct expand_operand ops[3];
1306 enum machine_mode outermode = GET_MODE (op0);
1307 enum machine_mode innermode = GET_MODE_INNER (outermode);
1308 enum insn_code icode = optab_handler (vec_extract_optab, outermode);
1309 unsigned HOST_WIDE_INT pos = bitnum / GET_MODE_BITSIZE (innermode);
1310
1311 create_output_operand (&ops[0], target, innermode);
1312 create_input_operand (&ops[1], op0, outermode);
1313 create_integer_operand (&ops[2], pos);
1314 if (maybe_expand_insn (icode, 3, ops))
1315 {
1316 target = ops[0].value;
1317 if (GET_MODE (target) != mode)
1318 return gen_lowpart (tmode, target);
1319 return target;
1320 }
1321 }
1322
1323 /* Make sure we are playing with integral modes. Pun with subregs
1324 if we aren't. */
1325 {
1326 enum machine_mode imode = int_mode_for_mode (GET_MODE (op0));
1327 if (imode != GET_MODE (op0))
1328 {
1329 if (MEM_P (op0))
1330 op0 = adjust_address (op0, imode, 0);
1331 else if (imode != BLKmode)
1332 {
1333 op0 = gen_lowpart (imode, op0);
1334
1335 /* If we got a SUBREG, force it into a register since we
1336 aren't going to be able to do another SUBREG on it. */
1337 if (GET_CODE (op0) == SUBREG)
1338 op0 = force_reg (imode, op0);
1339 }
1340 else if (REG_P (op0))
1341 {
1342 rtx reg, subreg;
1343 imode = smallest_mode_for_size (GET_MODE_BITSIZE (GET_MODE (op0)),
1344 MODE_INT);
1345 reg = gen_reg_rtx (imode);
1346 subreg = gen_lowpart_SUBREG (GET_MODE (op0), reg);
1347 emit_move_insn (subreg, op0);
1348 op0 = reg;
1349 bitnum += SUBREG_BYTE (subreg) * BITS_PER_UNIT;
1350 }
1351 else
1352 {
1353 rtx mem = assign_stack_temp (GET_MODE (op0),
1354 GET_MODE_SIZE (GET_MODE (op0)));
1355 emit_move_insn (mem, op0);
1356 op0 = adjust_address (mem, BLKmode, 0);
1357 }
1358 }
1359 }
1360
1361 /* We may be accessing data outside the field, which means
1362 we can alias adjacent data. */
1363 if (MEM_P (op0))
1364 {
1365 op0 = shallow_copy_rtx (op0);
1366 set_mem_alias_set (op0, 0);
1367 set_mem_expr (op0, 0);
1368 }
1369
1370 /* Extraction of a full-word or multi-word value from a structure
1371 in a register or aligned memory can be done with just a SUBREG.
1372 A subword value in the least significant part of a register
1373 can also be extracted with a SUBREG. For this, we need the
1374 byte offset of the value in op0. */
1375
1376 bitpos = bitnum % unit;
1377 offset = bitnum / unit;
1378 byte_offset = bitpos / BITS_PER_UNIT + offset * UNITS_PER_WORD;
1379
1380 /* If OP0 is a register, BITPOS must count within a word.
1381 But as we have it, it counts within whatever size OP0 now has.
1382 On a bigendian machine, these are not the same, so convert. */
1383 if (BYTES_BIG_ENDIAN
1384 && !MEM_P (op0)
1385 && unit > GET_MODE_BITSIZE (GET_MODE (op0)))
1386 bitpos += unit - GET_MODE_BITSIZE (GET_MODE (op0));
1387
1388 /* ??? We currently assume TARGET is at least as big as BITSIZE.
1389 If that's wrong, the solution is to test for it and set TARGET to 0
1390 if needed. */
1391
1392 /* Only scalar integer modes can be converted via subregs. There is an
1393 additional problem for FP modes here in that they can have a precision
1394 which is different from the size. mode_for_size uses precision, but
1395 we want a mode based on the size, so we must avoid calling it for FP
1396 modes. */
1397 mode1 = (SCALAR_INT_MODE_P (tmode)
1398 ? mode_for_size (bitsize, GET_MODE_CLASS (tmode), 0)
1399 : mode);
1400
1401 /* If the bitfield is volatile, we need to make sure the access
1402 remains on a type-aligned boundary. */
1403 if (GET_CODE (op0) == MEM
1404 && MEM_VOLATILE_P (op0)
1405 && GET_MODE_BITSIZE (GET_MODE (op0)) > 0
1406 && flag_strict_volatile_bitfields > 0)
1407 goto no_subreg_mode_swap;
1408
1409 if (((bitsize >= BITS_PER_WORD && bitsize == GET_MODE_BITSIZE (mode)
1410 && bitpos % BITS_PER_WORD == 0)
1411 || (mode1 != BLKmode
1412 /* ??? The big endian test here is wrong. This is correct
1413 if the value is in a register, and if mode_for_size is not
1414 the same mode as op0. This causes us to get unnecessarily
1415 inefficient code from the Thumb port when -mbig-endian. */
1416 && (BYTES_BIG_ENDIAN
1417 ? bitpos + bitsize == BITS_PER_WORD
1418 : bitpos == 0)))
1419 && ((!MEM_P (op0)
1420 && TRULY_NOOP_TRUNCATION_MODES_P (mode1, GET_MODE (op0))
1421 && GET_MODE_SIZE (mode1) != 0
1422 && byte_offset % GET_MODE_SIZE (mode1) == 0)
1423 || (MEM_P (op0)
1424 && (! SLOW_UNALIGNED_ACCESS (mode, MEM_ALIGN (op0))
1425 || (offset * BITS_PER_UNIT % bitsize == 0
1426 && MEM_ALIGN (op0) % bitsize == 0)))))
1427 {
1428 if (MEM_P (op0))
1429 op0 = adjust_address (op0, mode1, offset);
1430 else if (mode1 != GET_MODE (op0))
1431 {
1432 rtx sub = simplify_gen_subreg (mode1, op0, GET_MODE (op0),
1433 byte_offset);
1434 if (sub == NULL)
1435 goto no_subreg_mode_swap;
1436 op0 = sub;
1437 }
1438 if (mode1 != mode)
1439 return convert_to_mode (tmode, op0, unsignedp);
1440 return op0;
1441 }
1442 no_subreg_mode_swap:
1443
1444 /* Handle fields bigger than a word. */
1445
1446 if (bitsize > BITS_PER_WORD)
1447 {
1448 /* Here we transfer the words of the field
1449 in the order least significant first.
1450 This is because the most significant word is the one which may
1451 be less than full. */
1452
1453 unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
1454 unsigned int i;
1455
1456 if (target == 0 || !REG_P (target) || !valid_multiword_target_p (target))
1457 target = gen_reg_rtx (mode);
1458
1459 /* Indicate for flow that the entire target reg is being set. */
1460 emit_clobber (target);
1461
1462 for (i = 0; i < nwords; i++)
1463 {
1464 /* If I is 0, use the low-order word in both field and target;
1465 if I is 1, use the next to lowest word; and so on. */
1466 /* Word number in TARGET to use. */
1467 unsigned int wordnum
1468 = (WORDS_BIG_ENDIAN
1469 ? GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD - i - 1
1470 : i);
1471 /* Offset from start of field in OP0. */
1472 unsigned int bit_offset = (WORDS_BIG_ENDIAN
1473 ? MAX (0, ((int) bitsize - ((int) i + 1)
1474 * (int) BITS_PER_WORD))
1475 : (int) i * BITS_PER_WORD);
1476 rtx target_part = operand_subword (target, wordnum, 1, VOIDmode);
1477 rtx result_part
1478 = extract_bit_field (op0, MIN (BITS_PER_WORD,
1479 bitsize - i * BITS_PER_WORD),
1480 bitnum + bit_offset, 1, false, target_part, mode,
1481 word_mode);
1482
1483 gcc_assert (target_part);
1484
1485 if (result_part != target_part)
1486 emit_move_insn (target_part, result_part);
1487 }
1488
1489 if (unsignedp)
1490 {
1491 /* Unless we've filled TARGET, the upper regs in a multi-reg value
1492 need to be zero'd out. */
1493 if (GET_MODE_SIZE (GET_MODE (target)) > nwords * UNITS_PER_WORD)
1494 {
1495 unsigned int i, total_words;
1496
1497 total_words = GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD;
1498 for (i = nwords; i < total_words; i++)
1499 emit_move_insn
1500 (operand_subword (target,
1501 WORDS_BIG_ENDIAN ? total_words - i - 1 : i,
1502 1, VOIDmode),
1503 const0_rtx);
1504 }
1505 return target;
1506 }
1507
1508 /* Signed bit field: sign-extend with two arithmetic shifts. */
1509 target = expand_shift (LSHIFT_EXPR, mode, target,
1510 GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1511 return expand_shift (RSHIFT_EXPR, mode, target,
1512 GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1513 }
1514
1515 /* From here on we know the desired field is smaller than a word. */
1516
1517 /* Check if there is a correspondingly-sized integer field, so we can
1518 safely extract it as one size of integer, if necessary; then
1519 truncate or extend to the size that is wanted; then use SUBREGs or
1520 convert_to_mode to get one of the modes we really wanted. */
1521
1522 int_mode = int_mode_for_mode (tmode);
1523 if (int_mode == BLKmode)
1524 int_mode = int_mode_for_mode (mode);
1525 /* Should probably push op0 out to memory and then do a load. */
1526 gcc_assert (int_mode != BLKmode);
1527
1528 /* OFFSET is the number of words or bytes (UNIT says which)
1529 from STR_RTX to the first word or byte containing part of the field. */
1530 if (!MEM_P (op0))
1531 {
1532 if (offset != 0
1533 || GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
1534 {
1535 if (!REG_P (op0))
1536 op0 = copy_to_reg (op0);
1537 op0 = gen_rtx_SUBREG (mode_for_size (BITS_PER_WORD, MODE_INT, 0),
1538 op0, (offset * UNITS_PER_WORD));
1539 }
1540 offset = 0;
1541 }
1542
1543 /* Now OFFSET is nonzero only for memory operands. */
1544 ext_mode = mode_for_extraction (unsignedp ? EP_extzv : EP_extv, 0);
1545 if (ext_mode != MAX_MACHINE_MODE
1546 && bitsize > 0
1547 && GET_MODE_BITSIZE (ext_mode) >= bitsize
1548 /* Do not use extv/extzv for volatile bitfields when
1549 -fstrict-volatile-bitfields is in effect. */
1550 && !(MEM_P (op0) && MEM_VOLATILE_P (op0)
1551 && flag_strict_volatile_bitfields > 0)
1552 /* If op0 is a register, we need it in EXT_MODE to make it
1553 acceptable to the format of ext(z)v. */
1554 && !(GET_CODE (op0) == SUBREG && GET_MODE (op0) != ext_mode)
1555 && !((REG_P (op0) || GET_CODE (op0) == SUBREG)
1556 && (bitsize + bitpos > GET_MODE_BITSIZE (ext_mode))))
1557 {
1558 struct expand_operand ops[4];
1559 unsigned HOST_WIDE_INT xbitpos = bitpos, xoffset = offset;
1560 rtx xop0 = op0;
1561 rtx xtarget = target;
1562 rtx xspec_target = target;
1563 rtx xspec_target_subreg = 0;
1564
1565 /* If op0 is a register, we need it in EXT_MODE to make it
1566 acceptable to the format of ext(z)v. */
1567 if (REG_P (xop0) && GET_MODE (xop0) != ext_mode)
1568 xop0 = gen_lowpart_SUBREG (ext_mode, xop0);
1569 if (MEM_P (xop0))
1570 /* Get ref to first byte containing part of the field. */
1571 xop0 = adjust_address (xop0, byte_mode, xoffset);
1572
1573 /* Now convert from counting within UNIT to counting in EXT_MODE. */
1574 if (BYTES_BIG_ENDIAN && !MEM_P (xop0))
1575 xbitpos += GET_MODE_BITSIZE (ext_mode) - unit;
1576
1577 unit = GET_MODE_BITSIZE (ext_mode);
1578
1579 /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
1580 "backwards" from the size of the unit we are extracting from.
1581 Otherwise, we count bits from the most significant on a
1582 BYTES/BITS_BIG_ENDIAN machine. */
1583
1584 if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1585 xbitpos = unit - bitsize - xbitpos;
1586
1587 if (xtarget == 0)
1588 xtarget = xspec_target = gen_reg_rtx (tmode);
1589
1590 if (GET_MODE (xtarget) != ext_mode)
1591 {
1592 /* Don't use LHS paradoxical subreg if explicit truncation is needed
1593 between the mode of the extraction (word_mode) and the target
1594 mode. Instead, create a temporary and use convert_move to set
1595 the target. */
1596 if (REG_P (xtarget)
1597 && TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (xtarget), ext_mode))
1598 {
1599 xtarget = gen_lowpart (ext_mode, xtarget);
1600 if (GET_MODE_PRECISION (ext_mode)
1601 > GET_MODE_PRECISION (GET_MODE (xspec_target)))
1602 xspec_target_subreg = xtarget;
1603 }
1604 else
1605 xtarget = gen_reg_rtx (ext_mode);
1606 }
1607
1608 create_output_operand (&ops[0], xtarget, ext_mode);
1609 create_fixed_operand (&ops[1], xop0);
1610 create_integer_operand (&ops[2], bitsize);
1611 create_integer_operand (&ops[3], xbitpos);
1612 if (maybe_expand_insn (unsignedp ? CODE_FOR_extzv : CODE_FOR_extv,
1613 4, ops))
1614 {
1615 xtarget = ops[0].value;
1616 if (xtarget == xspec_target)
1617 return xtarget;
1618 if (xtarget == xspec_target_subreg)
1619 return xspec_target;
1620 return convert_extracted_bit_field (xtarget, mode, tmode, unsignedp);
1621 }
1622 }
1623
1624 /* If OP0 is a memory, try copying it to a register and seeing if a
1625 cheap register alternative is available. */
1626 if (ext_mode != MAX_MACHINE_MODE && MEM_P (op0))
1627 {
1628 enum machine_mode bestmode;
1629
1630 /* Get the mode to use for inserting into this field. If
1631 OP0 is BLKmode, get the smallest mode consistent with the
1632 alignment. If OP0 is a non-BLKmode object that is no
1633 wider than EXT_MODE, use its mode. Otherwise, use the
1634 smallest mode containing the field. */
1635
1636 if (GET_MODE (op0) == BLKmode
1637 || (ext_mode != MAX_MACHINE_MODE
1638 && GET_MODE_SIZE (GET_MODE (op0)) > GET_MODE_SIZE (ext_mode)))
1639 bestmode = get_best_mode (bitsize, bitnum, 0, 0, MEM_ALIGN (op0),
1640 (ext_mode == MAX_MACHINE_MODE
1641 ? VOIDmode : ext_mode),
1642 MEM_VOLATILE_P (op0));
1643 else
1644 bestmode = GET_MODE (op0);
1645
1646 if (bestmode != VOIDmode
1647 && !(SLOW_UNALIGNED_ACCESS (bestmode, MEM_ALIGN (op0))
1648 && GET_MODE_BITSIZE (bestmode) > MEM_ALIGN (op0)))
1649 {
1650 unsigned HOST_WIDE_INT xoffset, xbitpos;
1651
1652 /* Compute the offset as a multiple of this unit,
1653 counting in bytes. */
1654 unit = GET_MODE_BITSIZE (bestmode);
1655 xoffset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
1656 xbitpos = bitnum % unit;
1657
1658 /* Make sure the register is big enough for the whole field. */
1659 if (xoffset * BITS_PER_UNIT + unit
1660 >= offset * BITS_PER_UNIT + bitsize)
1661 {
1662 rtx last, result, xop0;
1663
1664 last = get_last_insn ();
1665
1666 /* Fetch it to a register in that size. */
1667 xop0 = adjust_address (op0, bestmode, xoffset);
1668 xop0 = force_reg (bestmode, xop0);
1669 result = extract_bit_field_1 (xop0, bitsize, xbitpos,
1670 unsignedp, packedp, target,
1671 mode, tmode, false);
1672 if (result)
1673 return result;
1674
1675 delete_insns_since (last);
1676 }
1677 }
1678 }
1679
1680 if (!fallback_p)
1681 return NULL;
1682
1683 target = extract_fixed_bit_field (int_mode, op0, offset, bitsize,
1684 bitpos, target, unsignedp, packedp);
1685 return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1686 }
1687
1688 /* Generate code to extract a byte-field from STR_RTX
1689 containing BITSIZE bits, starting at BITNUM,
1690 and put it in TARGET if possible (if TARGET is nonzero).
1691 Regardless of TARGET, we return the rtx for where the value is placed.
1692
1693 STR_RTX is the structure containing the byte (a REG or MEM).
1694 UNSIGNEDP is nonzero if this is an unsigned bit field.
1695 PACKEDP is nonzero if the field has the packed attribute.
1696 MODE is the natural mode of the field value once extracted.
1697 TMODE is the mode the caller would like the value to have;
1698 but the value may be returned with type MODE instead.
1699
1700 If a TARGET is specified and we can store in it at no extra cost,
1701 we do so, and return TARGET.
1702 Otherwise, we return a REG of mode TMODE or MODE, with TMODE preferred
1703 if they are equally easy. */
1704
1705 rtx
1706 extract_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1707 unsigned HOST_WIDE_INT bitnum, int unsignedp, bool packedp,
1708 rtx target, enum machine_mode mode, enum machine_mode tmode)
1709 {
1710 return extract_bit_field_1 (str_rtx, bitsize, bitnum, unsignedp, packedp,
1711 target, mode, tmode, true);
1712 }
1713 \f
1714 /* Extract a bit field using shifts and boolean operations
1715 Returns an rtx to represent the value.
1716 OP0 addresses a register (word) or memory (byte).
1717 BITPOS says which bit within the word or byte the bit field starts in.
1718 OFFSET says how many bytes farther the bit field starts;
1719 it is 0 if OP0 is a register.
1720 BITSIZE says how many bits long the bit field is.
1721 (If OP0 is a register, it may be narrower than a full word,
1722 but BITPOS still counts within a full word,
1723 which is significant on bigendian machines.)
1724
1725 UNSIGNEDP is nonzero for an unsigned bit field (don't sign-extend value).
1726 PACKEDP is true if the field has the packed attribute.
1727
1728 If TARGET is nonzero, attempts to store the value there
1729 and return TARGET, but this is not guaranteed.
1730 If TARGET is not used, create a pseudo-reg of mode TMODE for the value. */
1731
1732 static rtx
1733 extract_fixed_bit_field (enum machine_mode tmode, rtx op0,
1734 unsigned HOST_WIDE_INT offset,
1735 unsigned HOST_WIDE_INT bitsize,
1736 unsigned HOST_WIDE_INT bitpos, rtx target,
1737 int unsignedp, bool packedp)
1738 {
1739 unsigned int total_bits = BITS_PER_WORD;
1740 enum machine_mode mode;
1741
1742 if (GET_CODE (op0) == SUBREG || REG_P (op0))
1743 {
1744 /* Special treatment for a bit field split across two registers. */
1745 if (bitsize + bitpos > BITS_PER_WORD)
1746 return extract_split_bit_field (op0, bitsize, bitpos, unsignedp);
1747 }
1748 else
1749 {
1750 /* Get the proper mode to use for this field. We want a mode that
1751 includes the entire field. If such a mode would be larger than
1752 a word, we won't be doing the extraction the normal way. */
1753
1754 if (MEM_VOLATILE_P (op0)
1755 && flag_strict_volatile_bitfields > 0)
1756 {
1757 if (GET_MODE_BITSIZE (GET_MODE (op0)) > 0)
1758 mode = GET_MODE (op0);
1759 else if (target && GET_MODE_BITSIZE (GET_MODE (target)) > 0)
1760 mode = GET_MODE (target);
1761 else
1762 mode = tmode;
1763 }
1764 else
1765 mode = get_best_mode (bitsize, bitpos + offset * BITS_PER_UNIT, 0, 0,
1766 MEM_ALIGN (op0), word_mode, MEM_VOLATILE_P (op0));
1767
1768 if (mode == VOIDmode)
1769 /* The only way this should occur is if the field spans word
1770 boundaries. */
1771 return extract_split_bit_field (op0, bitsize,
1772 bitpos + offset * BITS_PER_UNIT,
1773 unsignedp);
1774
1775 total_bits = GET_MODE_BITSIZE (mode);
1776
1777 /* Make sure bitpos is valid for the chosen mode. Adjust BITPOS to
1778 be in the range 0 to total_bits-1, and put any excess bytes in
1779 OFFSET. */
1780 if (bitpos >= total_bits)
1781 {
1782 offset += (bitpos / total_bits) * (total_bits / BITS_PER_UNIT);
1783 bitpos -= ((bitpos / total_bits) * (total_bits / BITS_PER_UNIT)
1784 * BITS_PER_UNIT);
1785 }
1786
1787 /* If we're accessing a volatile MEM, we can't do the next
1788 alignment step if it results in a multi-word access where we
1789 otherwise wouldn't have one. So, check for that case
1790 here. */
1791 if (MEM_P (op0)
1792 && MEM_VOLATILE_P (op0)
1793 && flag_strict_volatile_bitfields > 0
1794 && bitpos + bitsize <= total_bits
1795 && bitpos + bitsize + (offset % (total_bits / BITS_PER_UNIT)) * BITS_PER_UNIT > total_bits)
1796 {
1797 if (STRICT_ALIGNMENT)
1798 {
1799 static bool informed_about_misalignment = false;
1800 bool warned;
1801
1802 if (packedp)
1803 {
1804 if (bitsize == total_bits)
1805 warned = warning_at (input_location, OPT_fstrict_volatile_bitfields,
1806 "multiple accesses to volatile structure member"
1807 " because of packed attribute");
1808 else
1809 warned = warning_at (input_location, OPT_fstrict_volatile_bitfields,
1810 "multiple accesses to volatile structure bitfield"
1811 " because of packed attribute");
1812
1813 return extract_split_bit_field (op0, bitsize,
1814 bitpos + offset * BITS_PER_UNIT,
1815 unsignedp);
1816 }
1817
1818 if (bitsize == total_bits)
1819 warned = warning_at (input_location, OPT_fstrict_volatile_bitfields,
1820 "mis-aligned access used for structure member");
1821 else
1822 warned = warning_at (input_location, OPT_fstrict_volatile_bitfields,
1823 "mis-aligned access used for structure bitfield");
1824
1825 if (! informed_about_misalignment && warned)
1826 {
1827 informed_about_misalignment = true;
1828 inform (input_location,
1829 "when a volatile object spans multiple type-sized locations,"
1830 " the compiler must choose between using a single mis-aligned access to"
1831 " preserve the volatility, or using multiple aligned accesses to avoid"
1832 " runtime faults; this code may fail at runtime if the hardware does"
1833 " not allow this access");
1834 }
1835 }
1836 }
1837 else
1838 {
1839
1840 /* Get ref to an aligned byte, halfword, or word containing the field.
1841 Adjust BITPOS to be position within a word,
1842 and OFFSET to be the offset of that word.
1843 Then alter OP0 to refer to that word. */
1844 bitpos += (offset % (total_bits / BITS_PER_UNIT)) * BITS_PER_UNIT;
1845 offset -= (offset % (total_bits / BITS_PER_UNIT));
1846 }
1847
1848 op0 = adjust_address (op0, mode, offset);
1849 }
1850
1851 mode = GET_MODE (op0);
1852
1853 if (BYTES_BIG_ENDIAN)
1854 /* BITPOS is the distance between our msb and that of OP0.
1855 Convert it to the distance from the lsb. */
1856 bitpos = total_bits - bitsize - bitpos;
1857
1858 /* Now BITPOS is always the distance between the field's lsb and that of OP0.
1859 We have reduced the big-endian case to the little-endian case. */
1860
1861 if (unsignedp)
1862 {
1863 if (bitpos)
1864 {
1865 /* If the field does not already start at the lsb,
1866 shift it so it does. */
1867 /* Maybe propagate the target for the shift. */
1868 rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
1869 if (tmode != mode)
1870 subtarget = 0;
1871 op0 = expand_shift (RSHIFT_EXPR, mode, op0, bitpos, subtarget, 1);
1872 }
1873 /* Convert the value to the desired mode. */
1874 if (mode != tmode)
1875 op0 = convert_to_mode (tmode, op0, 1);
1876
1877 /* Unless the msb of the field used to be the msb when we shifted,
1878 mask out the upper bits. */
1879
1880 if (GET_MODE_BITSIZE (mode) != bitpos + bitsize)
1881 return expand_binop (GET_MODE (op0), and_optab, op0,
1882 mask_rtx (GET_MODE (op0), 0, bitsize, 0),
1883 target, 1, OPTAB_LIB_WIDEN);
1884 return op0;
1885 }
1886
1887 /* To extract a signed bit-field, first shift its msb to the msb of the word,
1888 then arithmetic-shift its lsb to the lsb of the word. */
1889 op0 = force_reg (mode, op0);
1890
1891 /* Find the narrowest integer mode that contains the field. */
1892
1893 for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT); mode != VOIDmode;
1894 mode = GET_MODE_WIDER_MODE (mode))
1895 if (GET_MODE_BITSIZE (mode) >= bitsize + bitpos)
1896 {
1897 op0 = convert_to_mode (mode, op0, 0);
1898 break;
1899 }
1900
1901 if (mode != tmode)
1902 target = 0;
1903
1904 if (GET_MODE_BITSIZE (mode) != (bitsize + bitpos))
1905 {
1906 int amount = GET_MODE_BITSIZE (mode) - (bitsize + bitpos);
1907 /* Maybe propagate the target for the shift. */
1908 rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
1909 op0 = expand_shift (LSHIFT_EXPR, mode, op0, amount, subtarget, 1);
1910 }
1911
1912 return expand_shift (RSHIFT_EXPR, mode, op0,
1913 GET_MODE_BITSIZE (mode) - bitsize, target, 0);
1914 }
1915 \f
1916 /* Return a constant integer (CONST_INT or CONST_DOUBLE) mask value
1917 of mode MODE with BITSIZE ones followed by BITPOS zeros, or the
1918 complement of that if COMPLEMENT. The mask is truncated if
1919 necessary to the width of mode MODE. The mask is zero-extended if
1920 BITSIZE+BITPOS is too small for MODE. */
1921
1922 static rtx
1923 mask_rtx (enum machine_mode mode, int bitpos, int bitsize, int complement)
1924 {
1925 double_int mask;
1926
1927 mask = double_int_mask (bitsize);
1928 mask = double_int_lshift (mask, bitpos, HOST_BITS_PER_DOUBLE_INT, false);
1929
1930 if (complement)
1931 mask = double_int_not (mask);
1932
1933 return immed_double_int_const (mask, mode);
1934 }
1935
1936 /* Return a constant integer (CONST_INT or CONST_DOUBLE) rtx with the value
1937 VALUE truncated to BITSIZE bits and then shifted left BITPOS bits. */
1938
1939 static rtx
1940 lshift_value (enum machine_mode mode, rtx value, int bitpos, int bitsize)
1941 {
1942 double_int val;
1943
1944 val = double_int_zext (uhwi_to_double_int (INTVAL (value)), bitsize);
1945 val = double_int_lshift (val, bitpos, HOST_BITS_PER_DOUBLE_INT, false);
1946
1947 return immed_double_int_const (val, mode);
1948 }
1949 \f
1950 /* Extract a bit field that is split across two words
1951 and return an RTX for the result.
1952
1953 OP0 is the REG, SUBREG or MEM rtx for the first of the two words.
1954 BITSIZE is the field width; BITPOS, position of its first bit, in the word.
1955 UNSIGNEDP is 1 if should zero-extend the contents; else sign-extend. */
1956
1957 static rtx
1958 extract_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1959 unsigned HOST_WIDE_INT bitpos, int unsignedp)
1960 {
1961 unsigned int unit;
1962 unsigned int bitsdone = 0;
1963 rtx result = NULL_RTX;
1964 int first = 1;
1965
1966 /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1967 much at a time. */
1968 if (REG_P (op0) || GET_CODE (op0) == SUBREG)
1969 unit = BITS_PER_WORD;
1970 else
1971 unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1972
1973 while (bitsdone < bitsize)
1974 {
1975 unsigned HOST_WIDE_INT thissize;
1976 rtx part, word;
1977 unsigned HOST_WIDE_INT thispos;
1978 unsigned HOST_WIDE_INT offset;
1979
1980 offset = (bitpos + bitsdone) / unit;
1981 thispos = (bitpos + bitsdone) % unit;
1982
1983 /* THISSIZE must not overrun a word boundary. Otherwise,
1984 extract_fixed_bit_field will call us again, and we will mutually
1985 recurse forever. */
1986 thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1987 thissize = MIN (thissize, unit - thispos);
1988
1989 /* If OP0 is a register, then handle OFFSET here.
1990
1991 When handling multiword bitfields, extract_bit_field may pass
1992 down a word_mode SUBREG of a larger REG for a bitfield that actually
1993 crosses a word boundary. Thus, for a SUBREG, we must find
1994 the current word starting from the base register. */
1995 if (GET_CODE (op0) == SUBREG)
1996 {
1997 int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset;
1998 word = operand_subword_force (SUBREG_REG (op0), word_offset,
1999 GET_MODE (SUBREG_REG (op0)));
2000 offset = 0;
2001 }
2002 else if (REG_P (op0))
2003 {
2004 word = operand_subword_force (op0, offset, GET_MODE (op0));
2005 offset = 0;
2006 }
2007 else
2008 word = op0;
2009
2010 /* Extract the parts in bit-counting order,
2011 whose meaning is determined by BYTES_PER_UNIT.
2012 OFFSET is in UNITs, and UNIT is in bits.
2013 extract_fixed_bit_field wants offset in bytes. */
2014 part = extract_fixed_bit_field (word_mode, word,
2015 offset * unit / BITS_PER_UNIT,
2016 thissize, thispos, 0, 1, false);
2017 bitsdone += thissize;
2018
2019 /* Shift this part into place for the result. */
2020 if (BYTES_BIG_ENDIAN)
2021 {
2022 if (bitsize != bitsdone)
2023 part = expand_shift (LSHIFT_EXPR, word_mode, part,
2024 bitsize - bitsdone, 0, 1);
2025 }
2026 else
2027 {
2028 if (bitsdone != thissize)
2029 part = expand_shift (LSHIFT_EXPR, word_mode, part,
2030 bitsdone - thissize, 0, 1);
2031 }
2032
2033 if (first)
2034 result = part;
2035 else
2036 /* Combine the parts with bitwise or. This works
2037 because we extracted each part as an unsigned bit field. */
2038 result = expand_binop (word_mode, ior_optab, part, result, NULL_RTX, 1,
2039 OPTAB_LIB_WIDEN);
2040
2041 first = 0;
2042 }
2043
2044 /* Unsigned bit field: we are done. */
2045 if (unsignedp)
2046 return result;
2047 /* Signed bit field: sign-extend with two arithmetic shifts. */
2048 result = expand_shift (LSHIFT_EXPR, word_mode, result,
2049 BITS_PER_WORD - bitsize, NULL_RTX, 0);
2050 return expand_shift (RSHIFT_EXPR, word_mode, result,
2051 BITS_PER_WORD - bitsize, NULL_RTX, 0);
2052 }
2053 \f
2054 /* Try to read the low bits of SRC as an rvalue of mode MODE, preserving
2055 the bit pattern. SRC_MODE is the mode of SRC; if this is smaller than
2056 MODE, fill the upper bits with zeros. Fail if the layout of either
2057 mode is unknown (as for CC modes) or if the extraction would involve
2058 unprofitable mode punning. Return the value on success, otherwise
2059 return null.
2060
2061 This is different from gen_lowpart* in these respects:
2062
2063 - the returned value must always be considered an rvalue
2064
2065 - when MODE is wider than SRC_MODE, the extraction involves
2066 a zero extension
2067
2068 - when MODE is smaller than SRC_MODE, the extraction involves
2069 a truncation (and is thus subject to TRULY_NOOP_TRUNCATION).
2070
2071 In other words, this routine performs a computation, whereas the
2072 gen_lowpart* routines are conceptually lvalue or rvalue subreg
2073 operations. */
2074
2075 rtx
2076 extract_low_bits (enum machine_mode mode, enum machine_mode src_mode, rtx src)
2077 {
2078 enum machine_mode int_mode, src_int_mode;
2079
2080 if (mode == src_mode)
2081 return src;
2082
2083 if (CONSTANT_P (src))
2084 {
2085 /* simplify_gen_subreg can't be used here, as if simplify_subreg
2086 fails, it will happily create (subreg (symbol_ref)) or similar
2087 invalid SUBREGs. */
2088 unsigned int byte = subreg_lowpart_offset (mode, src_mode);
2089 rtx ret = simplify_subreg (mode, src, src_mode, byte);
2090 if (ret)
2091 return ret;
2092
2093 if (GET_MODE (src) == VOIDmode
2094 || !validate_subreg (mode, src_mode, src, byte))
2095 return NULL_RTX;
2096
2097 src = force_reg (GET_MODE (src), src);
2098 return gen_rtx_SUBREG (mode, src, byte);
2099 }
2100
2101 if (GET_MODE_CLASS (mode) == MODE_CC || GET_MODE_CLASS (src_mode) == MODE_CC)
2102 return NULL_RTX;
2103
2104 if (GET_MODE_BITSIZE (mode) == GET_MODE_BITSIZE (src_mode)
2105 && MODES_TIEABLE_P (mode, src_mode))
2106 {
2107 rtx x = gen_lowpart_common (mode, src);
2108 if (x)
2109 return x;
2110 }
2111
2112 src_int_mode = int_mode_for_mode (src_mode);
2113 int_mode = int_mode_for_mode (mode);
2114 if (src_int_mode == BLKmode || int_mode == BLKmode)
2115 return NULL_RTX;
2116
2117 if (!MODES_TIEABLE_P (src_int_mode, src_mode))
2118 return NULL_RTX;
2119 if (!MODES_TIEABLE_P (int_mode, mode))
2120 return NULL_RTX;
2121
2122 src = gen_lowpart (src_int_mode, src);
2123 src = convert_modes (int_mode, src_int_mode, src, true);
2124 src = gen_lowpart (mode, src);
2125 return src;
2126 }
2127 \f
2128 /* Add INC into TARGET. */
2129
2130 void
2131 expand_inc (rtx target, rtx inc)
2132 {
2133 rtx value = expand_binop (GET_MODE (target), add_optab,
2134 target, inc,
2135 target, 0, OPTAB_LIB_WIDEN);
2136 if (value != target)
2137 emit_move_insn (target, value);
2138 }
2139
2140 /* Subtract DEC from TARGET. */
2141
2142 void
2143 expand_dec (rtx target, rtx dec)
2144 {
2145 rtx value = expand_binop (GET_MODE (target), sub_optab,
2146 target, dec,
2147 target, 0, OPTAB_LIB_WIDEN);
2148 if (value != target)
2149 emit_move_insn (target, value);
2150 }
2151 \f
2152 /* Output a shift instruction for expression code CODE,
2153 with SHIFTED being the rtx for the value to shift,
2154 and AMOUNT the rtx for the amount to shift by.
2155 Store the result in the rtx TARGET, if that is convenient.
2156 If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2157 Return the rtx for where the value is. */
2158
2159 static rtx
2160 expand_shift_1 (enum tree_code code, enum machine_mode mode, rtx shifted,
2161 rtx amount, rtx target, int unsignedp)
2162 {
2163 rtx op1, temp = 0;
2164 int left = (code == LSHIFT_EXPR || code == LROTATE_EXPR);
2165 int rotate = (code == LROTATE_EXPR || code == RROTATE_EXPR);
2166 optab lshift_optab = ashl_optab;
2167 optab rshift_arith_optab = ashr_optab;
2168 optab rshift_uns_optab = lshr_optab;
2169 optab lrotate_optab = rotl_optab;
2170 optab rrotate_optab = rotr_optab;
2171 enum machine_mode op1_mode;
2172 int attempt;
2173 bool speed = optimize_insn_for_speed_p ();
2174
2175 op1 = amount;
2176 op1_mode = GET_MODE (op1);
2177
2178 /* Determine whether the shift/rotate amount is a vector, or scalar. If the
2179 shift amount is a vector, use the vector/vector shift patterns. */
2180 if (VECTOR_MODE_P (mode) && VECTOR_MODE_P (op1_mode))
2181 {
2182 lshift_optab = vashl_optab;
2183 rshift_arith_optab = vashr_optab;
2184 rshift_uns_optab = vlshr_optab;
2185 lrotate_optab = vrotl_optab;
2186 rrotate_optab = vrotr_optab;
2187 }
2188
2189 /* Previously detected shift-counts computed by NEGATE_EXPR
2190 and shifted in the other direction; but that does not work
2191 on all machines. */
2192
2193 if (SHIFT_COUNT_TRUNCATED)
2194 {
2195 if (CONST_INT_P (op1)
2196 && ((unsigned HOST_WIDE_INT) INTVAL (op1) >=
2197 (unsigned HOST_WIDE_INT) GET_MODE_BITSIZE (mode)))
2198 op1 = GEN_INT ((unsigned HOST_WIDE_INT) INTVAL (op1)
2199 % GET_MODE_BITSIZE (mode));
2200 else if (GET_CODE (op1) == SUBREG
2201 && subreg_lowpart_p (op1)
2202 && INTEGRAL_MODE_P (GET_MODE (SUBREG_REG (op1))))
2203 op1 = SUBREG_REG (op1);
2204 }
2205
2206 if (op1 == const0_rtx)
2207 return shifted;
2208
2209 /* Check whether its cheaper to implement a left shift by a constant
2210 bit count by a sequence of additions. */
2211 if (code == LSHIFT_EXPR
2212 && CONST_INT_P (op1)
2213 && INTVAL (op1) > 0
2214 && INTVAL (op1) < GET_MODE_PRECISION (mode)
2215 && INTVAL (op1) < MAX_BITS_PER_WORD
2216 && shift_cost[speed][mode][INTVAL (op1)] > INTVAL (op1) * add_cost[speed][mode]
2217 && shift_cost[speed][mode][INTVAL (op1)] != MAX_COST)
2218 {
2219 int i;
2220 for (i = 0; i < INTVAL (op1); i++)
2221 {
2222 temp = force_reg (mode, shifted);
2223 shifted = expand_binop (mode, add_optab, temp, temp, NULL_RTX,
2224 unsignedp, OPTAB_LIB_WIDEN);
2225 }
2226 return shifted;
2227 }
2228
2229 for (attempt = 0; temp == 0 && attempt < 3; attempt++)
2230 {
2231 enum optab_methods methods;
2232
2233 if (attempt == 0)
2234 methods = OPTAB_DIRECT;
2235 else if (attempt == 1)
2236 methods = OPTAB_WIDEN;
2237 else
2238 methods = OPTAB_LIB_WIDEN;
2239
2240 if (rotate)
2241 {
2242 /* Widening does not work for rotation. */
2243 if (methods == OPTAB_WIDEN)
2244 continue;
2245 else if (methods == OPTAB_LIB_WIDEN)
2246 {
2247 /* If we have been unable to open-code this by a rotation,
2248 do it as the IOR of two shifts. I.e., to rotate A
2249 by N bits, compute (A << N) | ((unsigned) A >> (C - N))
2250 where C is the bitsize of A.
2251
2252 It is theoretically possible that the target machine might
2253 not be able to perform either shift and hence we would
2254 be making two libcalls rather than just the one for the
2255 shift (similarly if IOR could not be done). We will allow
2256 this extremely unlikely lossage to avoid complicating the
2257 code below. */
2258
2259 rtx subtarget = target == shifted ? 0 : target;
2260 rtx new_amount, other_amount;
2261 rtx temp1;
2262
2263 new_amount = op1;
2264 if (CONST_INT_P (op1))
2265 other_amount = GEN_INT (GET_MODE_BITSIZE (mode)
2266 - INTVAL (op1));
2267 else
2268 other_amount
2269 = simplify_gen_binary (MINUS, GET_MODE (op1),
2270 GEN_INT (GET_MODE_PRECISION (mode)),
2271 op1);
2272
2273 shifted = force_reg (mode, shifted);
2274
2275 temp = expand_shift_1 (left ? LSHIFT_EXPR : RSHIFT_EXPR,
2276 mode, shifted, new_amount, 0, 1);
2277 temp1 = expand_shift_1 (left ? RSHIFT_EXPR : LSHIFT_EXPR,
2278 mode, shifted, other_amount,
2279 subtarget, 1);
2280 return expand_binop (mode, ior_optab, temp, temp1, target,
2281 unsignedp, methods);
2282 }
2283
2284 temp = expand_binop (mode,
2285 left ? lrotate_optab : rrotate_optab,
2286 shifted, op1, target, unsignedp, methods);
2287 }
2288 else if (unsignedp)
2289 temp = expand_binop (mode,
2290 left ? lshift_optab : rshift_uns_optab,
2291 shifted, op1, target, unsignedp, methods);
2292
2293 /* Do arithmetic shifts.
2294 Also, if we are going to widen the operand, we can just as well
2295 use an arithmetic right-shift instead of a logical one. */
2296 if (temp == 0 && ! rotate
2297 && (! unsignedp || (! left && methods == OPTAB_WIDEN)))
2298 {
2299 enum optab_methods methods1 = methods;
2300
2301 /* If trying to widen a log shift to an arithmetic shift,
2302 don't accept an arithmetic shift of the same size. */
2303 if (unsignedp)
2304 methods1 = OPTAB_MUST_WIDEN;
2305
2306 /* Arithmetic shift */
2307
2308 temp = expand_binop (mode,
2309 left ? lshift_optab : rshift_arith_optab,
2310 shifted, op1, target, unsignedp, methods1);
2311 }
2312
2313 /* We used to try extzv here for logical right shifts, but that was
2314 only useful for one machine, the VAX, and caused poor code
2315 generation there for lshrdi3, so the code was deleted and a
2316 define_expand for lshrsi3 was added to vax.md. */
2317 }
2318
2319 gcc_assert (temp);
2320 return temp;
2321 }
2322
2323 /* Output a shift instruction for expression code CODE,
2324 with SHIFTED being the rtx for the value to shift,
2325 and AMOUNT the amount to shift by.
2326 Store the result in the rtx TARGET, if that is convenient.
2327 If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2328 Return the rtx for where the value is. */
2329
2330 rtx
2331 expand_shift (enum tree_code code, enum machine_mode mode, rtx shifted,
2332 int amount, rtx target, int unsignedp)
2333 {
2334 return expand_shift_1 (code, mode,
2335 shifted, GEN_INT (amount), target, unsignedp);
2336 }
2337
2338 /* Output a shift instruction for expression code CODE,
2339 with SHIFTED being the rtx for the value to shift,
2340 and AMOUNT the tree for the amount to shift by.
2341 Store the result in the rtx TARGET, if that is convenient.
2342 If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2343 Return the rtx for where the value is. */
2344
2345 rtx
2346 expand_variable_shift (enum tree_code code, enum machine_mode mode, rtx shifted,
2347 tree amount, rtx target, int unsignedp)
2348 {
2349 return expand_shift_1 (code, mode,
2350 shifted, expand_normal (amount), target, unsignedp);
2351 }
2352
2353 \f
2354 /* Indicates the type of fixup needed after a constant multiplication.
2355 BASIC_VARIANT means no fixup is needed, NEGATE_VARIANT means that
2356 the result should be negated, and ADD_VARIANT means that the
2357 multiplicand should be added to the result. */
2358 enum mult_variant {basic_variant, negate_variant, add_variant};
2359
2360 static void synth_mult (struct algorithm *, unsigned HOST_WIDE_INT,
2361 const struct mult_cost *, enum machine_mode mode);
2362 static bool choose_mult_variant (enum machine_mode, HOST_WIDE_INT,
2363 struct algorithm *, enum mult_variant *, int);
2364 static rtx expand_mult_const (enum machine_mode, rtx, HOST_WIDE_INT, rtx,
2365 const struct algorithm *, enum mult_variant);
2366 static unsigned HOST_WIDE_INT invert_mod2n (unsigned HOST_WIDE_INT, int);
2367 static rtx extract_high_half (enum machine_mode, rtx);
2368 static rtx expand_mult_highpart (enum machine_mode, rtx, rtx, rtx, int, int);
2369 static rtx expand_mult_highpart_optab (enum machine_mode, rtx, rtx, rtx,
2370 int, int);
2371 /* Compute and return the best algorithm for multiplying by T.
2372 The algorithm must cost less than cost_limit
2373 If retval.cost >= COST_LIMIT, no algorithm was found and all
2374 other field of the returned struct are undefined.
2375 MODE is the machine mode of the multiplication. */
2376
2377 static void
2378 synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
2379 const struct mult_cost *cost_limit, enum machine_mode mode)
2380 {
2381 int m;
2382 struct algorithm *alg_in, *best_alg;
2383 struct mult_cost best_cost;
2384 struct mult_cost new_limit;
2385 int op_cost, op_latency;
2386 unsigned HOST_WIDE_INT orig_t = t;
2387 unsigned HOST_WIDE_INT q;
2388 int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode));
2389 int hash_index;
2390 bool cache_hit = false;
2391 enum alg_code cache_alg = alg_zero;
2392 bool speed = optimize_insn_for_speed_p ();
2393
2394 /* Indicate that no algorithm is yet found. If no algorithm
2395 is found, this value will be returned and indicate failure. */
2396 alg_out->cost.cost = cost_limit->cost + 1;
2397 alg_out->cost.latency = cost_limit->latency + 1;
2398
2399 if (cost_limit->cost < 0
2400 || (cost_limit->cost == 0 && cost_limit->latency <= 0))
2401 return;
2402
2403 /* Restrict the bits of "t" to the multiplication's mode. */
2404 t &= GET_MODE_MASK (mode);
2405
2406 /* t == 1 can be done in zero cost. */
2407 if (t == 1)
2408 {
2409 alg_out->ops = 1;
2410 alg_out->cost.cost = 0;
2411 alg_out->cost.latency = 0;
2412 alg_out->op[0] = alg_m;
2413 return;
2414 }
2415
2416 /* t == 0 sometimes has a cost. If it does and it exceeds our limit,
2417 fail now. */
2418 if (t == 0)
2419 {
2420 if (MULT_COST_LESS (cost_limit, zero_cost[speed]))
2421 return;
2422 else
2423 {
2424 alg_out->ops = 1;
2425 alg_out->cost.cost = zero_cost[speed];
2426 alg_out->cost.latency = zero_cost[speed];
2427 alg_out->op[0] = alg_zero;
2428 return;
2429 }
2430 }
2431
2432 /* We'll be needing a couple extra algorithm structures now. */
2433
2434 alg_in = XALLOCA (struct algorithm);
2435 best_alg = XALLOCA (struct algorithm);
2436 best_cost = *cost_limit;
2437
2438 /* Compute the hash index. */
2439 hash_index = (t ^ (unsigned int) mode ^ (speed * 256)) % NUM_ALG_HASH_ENTRIES;
2440
2441 /* See if we already know what to do for T. */
2442 if (alg_hash[hash_index].t == t
2443 && alg_hash[hash_index].mode == mode
2444 && alg_hash[hash_index].mode == mode
2445 && alg_hash[hash_index].speed == speed
2446 && alg_hash[hash_index].alg != alg_unknown)
2447 {
2448 cache_alg = alg_hash[hash_index].alg;
2449
2450 if (cache_alg == alg_impossible)
2451 {
2452 /* The cache tells us that it's impossible to synthesize
2453 multiplication by T within alg_hash[hash_index].cost. */
2454 if (!CHEAPER_MULT_COST (&alg_hash[hash_index].cost, cost_limit))
2455 /* COST_LIMIT is at least as restrictive as the one
2456 recorded in the hash table, in which case we have no
2457 hope of synthesizing a multiplication. Just
2458 return. */
2459 return;
2460
2461 /* If we get here, COST_LIMIT is less restrictive than the
2462 one recorded in the hash table, so we may be able to
2463 synthesize a multiplication. Proceed as if we didn't
2464 have the cache entry. */
2465 }
2466 else
2467 {
2468 if (CHEAPER_MULT_COST (cost_limit, &alg_hash[hash_index].cost))
2469 /* The cached algorithm shows that this multiplication
2470 requires more cost than COST_LIMIT. Just return. This
2471 way, we don't clobber this cache entry with
2472 alg_impossible but retain useful information. */
2473 return;
2474
2475 cache_hit = true;
2476
2477 switch (cache_alg)
2478 {
2479 case alg_shift:
2480 goto do_alg_shift;
2481
2482 case alg_add_t_m2:
2483 case alg_sub_t_m2:
2484 goto do_alg_addsub_t_m2;
2485
2486 case alg_add_factor:
2487 case alg_sub_factor:
2488 goto do_alg_addsub_factor;
2489
2490 case alg_add_t2_m:
2491 goto do_alg_add_t2_m;
2492
2493 case alg_sub_t2_m:
2494 goto do_alg_sub_t2_m;
2495
2496 default:
2497 gcc_unreachable ();
2498 }
2499 }
2500 }
2501
2502 /* If we have a group of zero bits at the low-order part of T, try
2503 multiplying by the remaining bits and then doing a shift. */
2504
2505 if ((t & 1) == 0)
2506 {
2507 do_alg_shift:
2508 m = floor_log2 (t & -t); /* m = number of low zero bits */
2509 if (m < maxm)
2510 {
2511 q = t >> m;
2512 /* The function expand_shift will choose between a shift and
2513 a sequence of additions, so the observed cost is given as
2514 MIN (m * add_cost[speed][mode], shift_cost[speed][mode][m]). */
2515 op_cost = m * add_cost[speed][mode];
2516 if (shift_cost[speed][mode][m] < op_cost)
2517 op_cost = shift_cost[speed][mode][m];
2518 new_limit.cost = best_cost.cost - op_cost;
2519 new_limit.latency = best_cost.latency - op_cost;
2520 synth_mult (alg_in, q, &new_limit, mode);
2521
2522 alg_in->cost.cost += op_cost;
2523 alg_in->cost.latency += op_cost;
2524 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2525 {
2526 struct algorithm *x;
2527 best_cost = alg_in->cost;
2528 x = alg_in, alg_in = best_alg, best_alg = x;
2529 best_alg->log[best_alg->ops] = m;
2530 best_alg->op[best_alg->ops] = alg_shift;
2531 }
2532
2533 /* See if treating ORIG_T as a signed number yields a better
2534 sequence. Try this sequence only for a negative ORIG_T
2535 as it would be useless for a non-negative ORIG_T. */
2536 if ((HOST_WIDE_INT) orig_t < 0)
2537 {
2538 /* Shift ORIG_T as follows because a right shift of a
2539 negative-valued signed type is implementation
2540 defined. */
2541 q = ~(~orig_t >> m);
2542 /* The function expand_shift will choose between a shift
2543 and a sequence of additions, so the observed cost is
2544 given as MIN (m * add_cost[speed][mode],
2545 shift_cost[speed][mode][m]). */
2546 op_cost = m * add_cost[speed][mode];
2547 if (shift_cost[speed][mode][m] < op_cost)
2548 op_cost = shift_cost[speed][mode][m];
2549 new_limit.cost = best_cost.cost - op_cost;
2550 new_limit.latency = best_cost.latency - op_cost;
2551 synth_mult (alg_in, q, &new_limit, mode);
2552
2553 alg_in->cost.cost += op_cost;
2554 alg_in->cost.latency += op_cost;
2555 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2556 {
2557 struct algorithm *x;
2558 best_cost = alg_in->cost;
2559 x = alg_in, alg_in = best_alg, best_alg = x;
2560 best_alg->log[best_alg->ops] = m;
2561 best_alg->op[best_alg->ops] = alg_shift;
2562 }
2563 }
2564 }
2565 if (cache_hit)
2566 goto done;
2567 }
2568
2569 /* If we have an odd number, add or subtract one. */
2570 if ((t & 1) != 0)
2571 {
2572 unsigned HOST_WIDE_INT w;
2573
2574 do_alg_addsub_t_m2:
2575 for (w = 1; (w & t) != 0; w <<= 1)
2576 ;
2577 /* If T was -1, then W will be zero after the loop. This is another
2578 case where T ends with ...111. Handling this with (T + 1) and
2579 subtract 1 produces slightly better code and results in algorithm
2580 selection much faster than treating it like the ...0111 case
2581 below. */
2582 if (w == 0
2583 || (w > 2
2584 /* Reject the case where t is 3.
2585 Thus we prefer addition in that case. */
2586 && t != 3))
2587 {
2588 /* T ends with ...111. Multiply by (T + 1) and subtract 1. */
2589
2590 op_cost = add_cost[speed][mode];
2591 new_limit.cost = best_cost.cost - op_cost;
2592 new_limit.latency = best_cost.latency - op_cost;
2593 synth_mult (alg_in, t + 1, &new_limit, mode);
2594
2595 alg_in->cost.cost += op_cost;
2596 alg_in->cost.latency += op_cost;
2597 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2598 {
2599 struct algorithm *x;
2600 best_cost = alg_in->cost;
2601 x = alg_in, alg_in = best_alg, best_alg = x;
2602 best_alg->log[best_alg->ops] = 0;
2603 best_alg->op[best_alg->ops] = alg_sub_t_m2;
2604 }
2605 }
2606 else
2607 {
2608 /* T ends with ...01 or ...011. Multiply by (T - 1) and add 1. */
2609
2610 op_cost = add_cost[speed][mode];
2611 new_limit.cost = best_cost.cost - op_cost;
2612 new_limit.latency = best_cost.latency - op_cost;
2613 synth_mult (alg_in, t - 1, &new_limit, mode);
2614
2615 alg_in->cost.cost += op_cost;
2616 alg_in->cost.latency += op_cost;
2617 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2618 {
2619 struct algorithm *x;
2620 best_cost = alg_in->cost;
2621 x = alg_in, alg_in = best_alg, best_alg = x;
2622 best_alg->log[best_alg->ops] = 0;
2623 best_alg->op[best_alg->ops] = alg_add_t_m2;
2624 }
2625 }
2626
2627 /* We may be able to calculate a * -7, a * -15, a * -31, etc
2628 quickly with a - a * n for some appropriate constant n. */
2629 m = exact_log2 (-orig_t + 1);
2630 if (m >= 0 && m < maxm)
2631 {
2632 op_cost = shiftsub1_cost[speed][mode][m];
2633 new_limit.cost = best_cost.cost - op_cost;
2634 new_limit.latency = best_cost.latency - op_cost;
2635 synth_mult (alg_in, (unsigned HOST_WIDE_INT) (-orig_t + 1) >> m, &new_limit, mode);
2636
2637 alg_in->cost.cost += op_cost;
2638 alg_in->cost.latency += op_cost;
2639 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2640 {
2641 struct algorithm *x;
2642 best_cost = alg_in->cost;
2643 x = alg_in, alg_in = best_alg, best_alg = x;
2644 best_alg->log[best_alg->ops] = m;
2645 best_alg->op[best_alg->ops] = alg_sub_t_m2;
2646 }
2647 }
2648
2649 if (cache_hit)
2650 goto done;
2651 }
2652
2653 /* Look for factors of t of the form
2654 t = q(2**m +- 1), 2 <= m <= floor(log2(t - 1)).
2655 If we find such a factor, we can multiply by t using an algorithm that
2656 multiplies by q, shift the result by m and add/subtract it to itself.
2657
2658 We search for large factors first and loop down, even if large factors
2659 are less probable than small; if we find a large factor we will find a
2660 good sequence quickly, and therefore be able to prune (by decreasing
2661 COST_LIMIT) the search. */
2662
2663 do_alg_addsub_factor:
2664 for (m = floor_log2 (t - 1); m >= 2; m--)
2665 {
2666 unsigned HOST_WIDE_INT d;
2667
2668 d = ((unsigned HOST_WIDE_INT) 1 << m) + 1;
2669 if (t % d == 0 && t > d && m < maxm
2670 && (!cache_hit || cache_alg == alg_add_factor))
2671 {
2672 /* If the target has a cheap shift-and-add instruction use
2673 that in preference to a shift insn followed by an add insn.
2674 Assume that the shift-and-add is "atomic" with a latency
2675 equal to its cost, otherwise assume that on superscalar
2676 hardware the shift may be executed concurrently with the
2677 earlier steps in the algorithm. */
2678 op_cost = add_cost[speed][mode] + shift_cost[speed][mode][m];
2679 if (shiftadd_cost[speed][mode][m] < op_cost)
2680 {
2681 op_cost = shiftadd_cost[speed][mode][m];
2682 op_latency = op_cost;
2683 }
2684 else
2685 op_latency = add_cost[speed][mode];
2686
2687 new_limit.cost = best_cost.cost - op_cost;
2688 new_limit.latency = best_cost.latency - op_latency;
2689 synth_mult (alg_in, t / d, &new_limit, mode);
2690
2691 alg_in->cost.cost += op_cost;
2692 alg_in->cost.latency += op_latency;
2693 if (alg_in->cost.latency < op_cost)
2694 alg_in->cost.latency = op_cost;
2695 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2696 {
2697 struct algorithm *x;
2698 best_cost = alg_in->cost;
2699 x = alg_in, alg_in = best_alg, best_alg = x;
2700 best_alg->log[best_alg->ops] = m;
2701 best_alg->op[best_alg->ops] = alg_add_factor;
2702 }
2703 /* Other factors will have been taken care of in the recursion. */
2704 break;
2705 }
2706
2707 d = ((unsigned HOST_WIDE_INT) 1 << m) - 1;
2708 if (t % d == 0 && t > d && m < maxm
2709 && (!cache_hit || cache_alg == alg_sub_factor))
2710 {
2711 /* If the target has a cheap shift-and-subtract insn use
2712 that in preference to a shift insn followed by a sub insn.
2713 Assume that the shift-and-sub is "atomic" with a latency
2714 equal to it's cost, otherwise assume that on superscalar
2715 hardware the shift may be executed concurrently with the
2716 earlier steps in the algorithm. */
2717 op_cost = add_cost[speed][mode] + shift_cost[speed][mode][m];
2718 if (shiftsub0_cost[speed][mode][m] < op_cost)
2719 {
2720 op_cost = shiftsub0_cost[speed][mode][m];
2721 op_latency = op_cost;
2722 }
2723 else
2724 op_latency = add_cost[speed][mode];
2725
2726 new_limit.cost = best_cost.cost - op_cost;
2727 new_limit.latency = best_cost.latency - op_latency;
2728 synth_mult (alg_in, t / d, &new_limit, mode);
2729
2730 alg_in->cost.cost += op_cost;
2731 alg_in->cost.latency += op_latency;
2732 if (alg_in->cost.latency < op_cost)
2733 alg_in->cost.latency = op_cost;
2734 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2735 {
2736 struct algorithm *x;
2737 best_cost = alg_in->cost;
2738 x = alg_in, alg_in = best_alg, best_alg = x;
2739 best_alg->log[best_alg->ops] = m;
2740 best_alg->op[best_alg->ops] = alg_sub_factor;
2741 }
2742 break;
2743 }
2744 }
2745 if (cache_hit)
2746 goto done;
2747
2748 /* Try shift-and-add (load effective address) instructions,
2749 i.e. do a*3, a*5, a*9. */
2750 if ((t & 1) != 0)
2751 {
2752 do_alg_add_t2_m:
2753 q = t - 1;
2754 q = q & -q;
2755 m = exact_log2 (q);
2756 if (m >= 0 && m < maxm)
2757 {
2758 op_cost = shiftadd_cost[speed][mode][m];
2759 new_limit.cost = best_cost.cost - op_cost;
2760 new_limit.latency = best_cost.latency - op_cost;
2761 synth_mult (alg_in, (t - 1) >> m, &new_limit, mode);
2762
2763 alg_in->cost.cost += op_cost;
2764 alg_in->cost.latency += op_cost;
2765 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2766 {
2767 struct algorithm *x;
2768 best_cost = alg_in->cost;
2769 x = alg_in, alg_in = best_alg, best_alg = x;
2770 best_alg->log[best_alg->ops] = m;
2771 best_alg->op[best_alg->ops] = alg_add_t2_m;
2772 }
2773 }
2774 if (cache_hit)
2775 goto done;
2776
2777 do_alg_sub_t2_m:
2778 q = t + 1;
2779 q = q & -q;
2780 m = exact_log2 (q);
2781 if (m >= 0 && m < maxm)
2782 {
2783 op_cost = shiftsub0_cost[speed][mode][m];
2784 new_limit.cost = best_cost.cost - op_cost;
2785 new_limit.latency = best_cost.latency - op_cost;
2786 synth_mult (alg_in, (t + 1) >> m, &new_limit, mode);
2787
2788 alg_in->cost.cost += op_cost;
2789 alg_in->cost.latency += op_cost;
2790 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2791 {
2792 struct algorithm *x;
2793 best_cost = alg_in->cost;
2794 x = alg_in, alg_in = best_alg, best_alg = x;
2795 best_alg->log[best_alg->ops] = m;
2796 best_alg->op[best_alg->ops] = alg_sub_t2_m;
2797 }
2798 }
2799 if (cache_hit)
2800 goto done;
2801 }
2802
2803 done:
2804 /* If best_cost has not decreased, we have not found any algorithm. */
2805 if (!CHEAPER_MULT_COST (&best_cost, cost_limit))
2806 {
2807 /* We failed to find an algorithm. Record alg_impossible for
2808 this case (that is, <T, MODE, COST_LIMIT>) so that next time
2809 we are asked to find an algorithm for T within the same or
2810 lower COST_LIMIT, we can immediately return to the
2811 caller. */
2812 alg_hash[hash_index].t = t;
2813 alg_hash[hash_index].mode = mode;
2814 alg_hash[hash_index].speed = speed;
2815 alg_hash[hash_index].alg = alg_impossible;
2816 alg_hash[hash_index].cost = *cost_limit;
2817 return;
2818 }
2819
2820 /* Cache the result. */
2821 if (!cache_hit)
2822 {
2823 alg_hash[hash_index].t = t;
2824 alg_hash[hash_index].mode = mode;
2825 alg_hash[hash_index].speed = speed;
2826 alg_hash[hash_index].alg = best_alg->op[best_alg->ops];
2827 alg_hash[hash_index].cost.cost = best_cost.cost;
2828 alg_hash[hash_index].cost.latency = best_cost.latency;
2829 }
2830
2831 /* If we are getting a too long sequence for `struct algorithm'
2832 to record, make this search fail. */
2833 if (best_alg->ops == MAX_BITS_PER_WORD)
2834 return;
2835
2836 /* Copy the algorithm from temporary space to the space at alg_out.
2837 We avoid using structure assignment because the majority of
2838 best_alg is normally undefined, and this is a critical function. */
2839 alg_out->ops = best_alg->ops + 1;
2840 alg_out->cost = best_cost;
2841 memcpy (alg_out->op, best_alg->op,
2842 alg_out->ops * sizeof *alg_out->op);
2843 memcpy (alg_out->log, best_alg->log,
2844 alg_out->ops * sizeof *alg_out->log);
2845 }
2846 \f
2847 /* Find the cheapest way of multiplying a value of mode MODE by VAL.
2848 Try three variations:
2849
2850 - a shift/add sequence based on VAL itself
2851 - a shift/add sequence based on -VAL, followed by a negation
2852 - a shift/add sequence based on VAL - 1, followed by an addition.
2853
2854 Return true if the cheapest of these cost less than MULT_COST,
2855 describing the algorithm in *ALG and final fixup in *VARIANT. */
2856
2857 static bool
2858 choose_mult_variant (enum machine_mode mode, HOST_WIDE_INT val,
2859 struct algorithm *alg, enum mult_variant *variant,
2860 int mult_cost)
2861 {
2862 struct algorithm alg2;
2863 struct mult_cost limit;
2864 int op_cost;
2865 bool speed = optimize_insn_for_speed_p ();
2866
2867 /* Fail quickly for impossible bounds. */
2868 if (mult_cost < 0)
2869 return false;
2870
2871 /* Ensure that mult_cost provides a reasonable upper bound.
2872 Any constant multiplication can be performed with less
2873 than 2 * bits additions. */
2874 op_cost = 2 * GET_MODE_BITSIZE (mode) * add_cost[speed][mode];
2875 if (mult_cost > op_cost)
2876 mult_cost = op_cost;
2877
2878 *variant = basic_variant;
2879 limit.cost = mult_cost;
2880 limit.latency = mult_cost;
2881 synth_mult (alg, val, &limit, mode);
2882
2883 /* This works only if the inverted value actually fits in an
2884 `unsigned int' */
2885 if (HOST_BITS_PER_INT >= GET_MODE_BITSIZE (mode))
2886 {
2887 op_cost = neg_cost[speed][mode];
2888 if (MULT_COST_LESS (&alg->cost, mult_cost))
2889 {
2890 limit.cost = alg->cost.cost - op_cost;
2891 limit.latency = alg->cost.latency - op_cost;
2892 }
2893 else
2894 {
2895 limit.cost = mult_cost - op_cost;
2896 limit.latency = mult_cost - op_cost;
2897 }
2898
2899 synth_mult (&alg2, -val, &limit, mode);
2900 alg2.cost.cost += op_cost;
2901 alg2.cost.latency += op_cost;
2902 if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
2903 *alg = alg2, *variant = negate_variant;
2904 }
2905
2906 /* This proves very useful for division-by-constant. */
2907 op_cost = add_cost[speed][mode];
2908 if (MULT_COST_LESS (&alg->cost, mult_cost))
2909 {
2910 limit.cost = alg->cost.cost - op_cost;
2911 limit.latency = alg->cost.latency - op_cost;
2912 }
2913 else
2914 {
2915 limit.cost = mult_cost - op_cost;
2916 limit.latency = mult_cost - op_cost;
2917 }
2918
2919 synth_mult (&alg2, val - 1, &limit, mode);
2920 alg2.cost.cost += op_cost;
2921 alg2.cost.latency += op_cost;
2922 if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
2923 *alg = alg2, *variant = add_variant;
2924
2925 return MULT_COST_LESS (&alg->cost, mult_cost);
2926 }
2927
2928 /* A subroutine of expand_mult, used for constant multiplications.
2929 Multiply OP0 by VAL in mode MODE, storing the result in TARGET if
2930 convenient. Use the shift/add sequence described by ALG and apply
2931 the final fixup specified by VARIANT. */
2932
2933 static rtx
2934 expand_mult_const (enum machine_mode mode, rtx op0, HOST_WIDE_INT val,
2935 rtx target, const struct algorithm *alg,
2936 enum mult_variant variant)
2937 {
2938 HOST_WIDE_INT val_so_far;
2939 rtx insn, accum, tem;
2940 int opno;
2941 enum machine_mode nmode;
2942
2943 /* Avoid referencing memory over and over and invalid sharing
2944 on SUBREGs. */
2945 op0 = force_reg (mode, op0);
2946
2947 /* ACCUM starts out either as OP0 or as a zero, depending on
2948 the first operation. */
2949
2950 if (alg->op[0] == alg_zero)
2951 {
2952 accum = copy_to_mode_reg (mode, const0_rtx);
2953 val_so_far = 0;
2954 }
2955 else if (alg->op[0] == alg_m)
2956 {
2957 accum = copy_to_mode_reg (mode, op0);
2958 val_so_far = 1;
2959 }
2960 else
2961 gcc_unreachable ();
2962
2963 for (opno = 1; opno < alg->ops; opno++)
2964 {
2965 int log = alg->log[opno];
2966 rtx shift_subtarget = optimize ? 0 : accum;
2967 rtx add_target
2968 = (opno == alg->ops - 1 && target != 0 && variant != add_variant
2969 && !optimize)
2970 ? target : 0;
2971 rtx accum_target = optimize ? 0 : accum;
2972 rtx accum_inner;
2973
2974 switch (alg->op[opno])
2975 {
2976 case alg_shift:
2977 tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
2978 /* REG_EQUAL note will be attached to the following insn. */
2979 emit_move_insn (accum, tem);
2980 val_so_far <<= log;
2981 break;
2982
2983 case alg_add_t_m2:
2984 tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
2985 accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
2986 add_target ? add_target : accum_target);
2987 val_so_far += (HOST_WIDE_INT) 1 << log;
2988 break;
2989
2990 case alg_sub_t_m2:
2991 tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
2992 accum = force_operand (gen_rtx_MINUS (mode, accum, tem),
2993 add_target ? add_target : accum_target);
2994 val_so_far -= (HOST_WIDE_INT) 1 << log;
2995 break;
2996
2997 case alg_add_t2_m:
2998 accum = expand_shift (LSHIFT_EXPR, mode, accum,
2999 log, shift_subtarget, 0);
3000 accum = force_operand (gen_rtx_PLUS (mode, accum, op0),
3001 add_target ? add_target : accum_target);
3002 val_so_far = (val_so_far << log) + 1;
3003 break;
3004
3005 case alg_sub_t2_m:
3006 accum = expand_shift (LSHIFT_EXPR, mode, accum,
3007 log, shift_subtarget, 0);
3008 accum = force_operand (gen_rtx_MINUS (mode, accum, op0),
3009 add_target ? add_target : accum_target);
3010 val_so_far = (val_so_far << log) - 1;
3011 break;
3012
3013 case alg_add_factor:
3014 tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3015 accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
3016 add_target ? add_target : accum_target);
3017 val_so_far += val_so_far << log;
3018 break;
3019
3020 case alg_sub_factor:
3021 tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3022 accum = force_operand (gen_rtx_MINUS (mode, tem, accum),
3023 (add_target
3024 ? add_target : (optimize ? 0 : tem)));
3025 val_so_far = (val_so_far << log) - val_so_far;
3026 break;
3027
3028 default:
3029 gcc_unreachable ();
3030 }
3031
3032 /* Write a REG_EQUAL note on the last insn so that we can cse
3033 multiplication sequences. Note that if ACCUM is a SUBREG,
3034 we've set the inner register and must properly indicate
3035 that. */
3036
3037 tem = op0, nmode = mode;
3038 accum_inner = accum;
3039 if (GET_CODE (accum) == SUBREG)
3040 {
3041 accum_inner = SUBREG_REG (accum);
3042 nmode = GET_MODE (accum_inner);
3043 tem = gen_lowpart (nmode, op0);
3044 }
3045
3046 insn = get_last_insn ();
3047 set_dst_reg_note (insn, REG_EQUAL,
3048 gen_rtx_MULT (nmode, tem, GEN_INT (val_so_far)),
3049 accum_inner);
3050 }
3051
3052 if (variant == negate_variant)
3053 {
3054 val_so_far = -val_so_far;
3055 accum = expand_unop (mode, neg_optab, accum, target, 0);
3056 }
3057 else if (variant == add_variant)
3058 {
3059 val_so_far = val_so_far + 1;
3060 accum = force_operand (gen_rtx_PLUS (mode, accum, op0), target);
3061 }
3062
3063 /* Compare only the bits of val and val_so_far that are significant
3064 in the result mode, to avoid sign-/zero-extension confusion. */
3065 val &= GET_MODE_MASK (mode);
3066 val_so_far &= GET_MODE_MASK (mode);
3067 gcc_assert (val == val_so_far);
3068
3069 return accum;
3070 }
3071
3072 /* Perform a multiplication and return an rtx for the result.
3073 MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3074 TARGET is a suggestion for where to store the result (an rtx).
3075
3076 We check specially for a constant integer as OP1.
3077 If you want this check for OP0 as well, then before calling
3078 you should swap the two operands if OP0 would be constant. */
3079
3080 rtx
3081 expand_mult (enum machine_mode mode, rtx op0, rtx op1, rtx target,
3082 int unsignedp)
3083 {
3084 enum mult_variant variant;
3085 struct algorithm algorithm;
3086 int max_cost;
3087 bool speed = optimize_insn_for_speed_p ();
3088
3089 /* Handling const0_rtx here allows us to use zero as a rogue value for
3090 coeff below. */
3091 if (op1 == const0_rtx)
3092 return const0_rtx;
3093 if (op1 == const1_rtx)
3094 return op0;
3095 if (op1 == constm1_rtx)
3096 return expand_unop (mode,
3097 GET_MODE_CLASS (mode) == MODE_INT
3098 && !unsignedp && flag_trapv
3099 ? negv_optab : neg_optab,
3100 op0, target, 0);
3101
3102 /* These are the operations that are potentially turned into a sequence
3103 of shifts and additions. */
3104 if (SCALAR_INT_MODE_P (mode)
3105 && (unsignedp || !flag_trapv))
3106 {
3107 HOST_WIDE_INT coeff = 0;
3108 rtx fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3109
3110 /* synth_mult does an `unsigned int' multiply. As long as the mode is
3111 less than or equal in size to `unsigned int' this doesn't matter.
3112 If the mode is larger than `unsigned int', then synth_mult works
3113 only if the constant value exactly fits in an `unsigned int' without
3114 any truncation. This means that multiplying by negative values does
3115 not work; results are off by 2^32 on a 32 bit machine. */
3116
3117 if (CONST_INT_P (op1))
3118 {
3119 /* Attempt to handle multiplication of DImode values by negative
3120 coefficients, by performing the multiplication by a positive
3121 multiplier and then inverting the result. */
3122 if (INTVAL (op1) < 0
3123 && GET_MODE_BITSIZE (mode) > HOST_BITS_PER_WIDE_INT)
3124 {
3125 /* Its safe to use -INTVAL (op1) even for INT_MIN, as the
3126 result is interpreted as an unsigned coefficient.
3127 Exclude cost of op0 from max_cost to match the cost
3128 calculation of the synth_mult. */
3129 max_cost = (set_src_cost (gen_rtx_MULT (mode, fake_reg, op1),
3130 speed)
3131 - neg_cost[speed][mode]);
3132 if (max_cost > 0
3133 && choose_mult_variant (mode, -INTVAL (op1), &algorithm,
3134 &variant, max_cost))
3135 {
3136 rtx temp = expand_mult_const (mode, op0, -INTVAL (op1),
3137 NULL_RTX, &algorithm,
3138 variant);
3139 return expand_unop (mode, neg_optab, temp, target, 0);
3140 }
3141 }
3142 else coeff = INTVAL (op1);
3143 }
3144 else if (GET_CODE (op1) == CONST_DOUBLE)
3145 {
3146 /* If we are multiplying in DImode, it may still be a win
3147 to try to work with shifts and adds. */
3148 if (CONST_DOUBLE_HIGH (op1) == 0
3149 && CONST_DOUBLE_LOW (op1) > 0)
3150 coeff = CONST_DOUBLE_LOW (op1);
3151 else if (CONST_DOUBLE_LOW (op1) == 0
3152 && EXACT_POWER_OF_2_OR_ZERO_P (CONST_DOUBLE_HIGH (op1)))
3153 {
3154 int shift = floor_log2 (CONST_DOUBLE_HIGH (op1))
3155 + HOST_BITS_PER_WIDE_INT;
3156 if (shift < HOST_BITS_PER_DOUBLE_INT - 1
3157 || GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_DOUBLE_INT)
3158 return expand_shift (LSHIFT_EXPR, mode, op0,
3159 shift, target, unsignedp);
3160 }
3161 }
3162
3163 /* We used to test optimize here, on the grounds that it's better to
3164 produce a smaller program when -O is not used. But this causes
3165 such a terrible slowdown sometimes that it seems better to always
3166 use synth_mult. */
3167 if (coeff != 0)
3168 {
3169 /* Special case powers of two. */
3170 if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3171 return expand_shift (LSHIFT_EXPR, mode, op0,
3172 floor_log2 (coeff), target, unsignedp);
3173
3174 /* Exclude cost of op0 from max_cost to match the cost
3175 calculation of the synth_mult. */
3176 max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, op1), speed);
3177 if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3178 max_cost))
3179 return expand_mult_const (mode, op0, coeff, target,
3180 &algorithm, variant);
3181 }
3182 }
3183
3184 if (GET_CODE (op0) == CONST_DOUBLE)
3185 {
3186 rtx temp = op0;
3187 op0 = op1;
3188 op1 = temp;
3189 }
3190
3191 /* Expand x*2.0 as x+x. */
3192 if (GET_CODE (op1) == CONST_DOUBLE
3193 && SCALAR_FLOAT_MODE_P (mode))
3194 {
3195 REAL_VALUE_TYPE d;
3196 REAL_VALUE_FROM_CONST_DOUBLE (d, op1);
3197
3198 if (REAL_VALUES_EQUAL (d, dconst2))
3199 {
3200 op0 = force_reg (GET_MODE (op0), op0);
3201 return expand_binop (mode, add_optab, op0, op0,
3202 target, unsignedp, OPTAB_LIB_WIDEN);
3203 }
3204 }
3205
3206 /* This used to use umul_optab if unsigned, but for non-widening multiply
3207 there is no difference between signed and unsigned. */
3208 op0 = expand_binop (mode,
3209 ! unsignedp
3210 && flag_trapv && (GET_MODE_CLASS(mode) == MODE_INT)
3211 ? smulv_optab : smul_optab,
3212 op0, op1, target, unsignedp, OPTAB_LIB_WIDEN);
3213 gcc_assert (op0);
3214 return op0;
3215 }
3216
3217 /* Perform a widening multiplication and return an rtx for the result.
3218 MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3219 TARGET is a suggestion for where to store the result (an rtx).
3220 THIS_OPTAB is the optab we should use, it must be either umul_widen_optab
3221 or smul_widen_optab.
3222
3223 We check specially for a constant integer as OP1, comparing the
3224 cost of a widening multiply against the cost of a sequence of shifts
3225 and adds. */
3226
3227 rtx
3228 expand_widening_mult (enum machine_mode mode, rtx op0, rtx op1, rtx target,
3229 int unsignedp, optab this_optab)
3230 {
3231 bool speed = optimize_insn_for_speed_p ();
3232 rtx cop1;
3233
3234 if (CONST_INT_P (op1)
3235 && GET_MODE (op0) != VOIDmode
3236 && (cop1 = convert_modes (mode, GET_MODE (op0), op1,
3237 this_optab == umul_widen_optab))
3238 && CONST_INT_P (cop1)
3239 && (INTVAL (cop1) >= 0
3240 || HWI_COMPUTABLE_MODE_P (mode)))
3241 {
3242 HOST_WIDE_INT coeff = INTVAL (cop1);
3243 int max_cost;
3244 enum mult_variant variant;
3245 struct algorithm algorithm;
3246
3247 /* Special case powers of two. */
3248 if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3249 {
3250 op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3251 return expand_shift (LSHIFT_EXPR, mode, op0,
3252 floor_log2 (coeff), target, unsignedp);
3253 }
3254
3255 /* Exclude cost of op0 from max_cost to match the cost
3256 calculation of the synth_mult. */
3257 max_cost = mul_widen_cost[speed][mode];
3258 if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3259 max_cost))
3260 {
3261 op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3262 return expand_mult_const (mode, op0, coeff, target,
3263 &algorithm, variant);
3264 }
3265 }
3266 return expand_binop (mode, this_optab, op0, op1, target,
3267 unsignedp, OPTAB_LIB_WIDEN);
3268 }
3269 \f
3270 /* Return the smallest n such that 2**n >= X. */
3271
3272 int
3273 ceil_log2 (unsigned HOST_WIDE_INT x)
3274 {
3275 return floor_log2 (x - 1) + 1;
3276 }
3277
3278 /* Choose a minimal N + 1 bit approximation to 1/D that can be used to
3279 replace division by D, and put the least significant N bits of the result
3280 in *MULTIPLIER_PTR and return the most significant bit.
3281
3282 The width of operations is N (should be <= HOST_BITS_PER_WIDE_INT), the
3283 needed precision is in PRECISION (should be <= N).
3284
3285 PRECISION should be as small as possible so this function can choose
3286 multiplier more freely.
3287
3288 The rounded-up logarithm of D is placed in *lgup_ptr. A shift count that
3289 is to be used for a final right shift is placed in *POST_SHIFT_PTR.
3290
3291 Using this function, x/D will be equal to (x * m) >> (*POST_SHIFT_PTR),
3292 where m is the full HOST_BITS_PER_WIDE_INT + 1 bit multiplier. */
3293
3294 unsigned HOST_WIDE_INT
3295 choose_multiplier (unsigned HOST_WIDE_INT d, int n, int precision,
3296 unsigned HOST_WIDE_INT *multiplier_ptr,
3297 int *post_shift_ptr, int *lgup_ptr)
3298 {
3299 HOST_WIDE_INT mhigh_hi, mlow_hi;
3300 unsigned HOST_WIDE_INT mhigh_lo, mlow_lo;
3301 int lgup, post_shift;
3302 int pow, pow2;
3303 unsigned HOST_WIDE_INT nl, dummy1;
3304 HOST_WIDE_INT nh, dummy2;
3305
3306 /* lgup = ceil(log2(divisor)); */
3307 lgup = ceil_log2 (d);
3308
3309 gcc_assert (lgup <= n);
3310
3311 pow = n + lgup;
3312 pow2 = n + lgup - precision;
3313
3314 /* We could handle this with some effort, but this case is much
3315 better handled directly with a scc insn, so rely on caller using
3316 that. */
3317 gcc_assert (pow != HOST_BITS_PER_DOUBLE_INT);
3318
3319 /* mlow = 2^(N + lgup)/d */
3320 if (pow >= HOST_BITS_PER_WIDE_INT)
3321 {
3322 nh = (HOST_WIDE_INT) 1 << (pow - HOST_BITS_PER_WIDE_INT);
3323 nl = 0;
3324 }
3325 else
3326 {
3327 nh = 0;
3328 nl = (unsigned HOST_WIDE_INT) 1 << pow;
3329 }
3330 div_and_round_double (TRUNC_DIV_EXPR, 1, nl, nh, d, (HOST_WIDE_INT) 0,
3331 &mlow_lo, &mlow_hi, &dummy1, &dummy2);
3332
3333 /* mhigh = (2^(N + lgup) + 2^N + lgup - precision)/d */
3334 if (pow2 >= HOST_BITS_PER_WIDE_INT)
3335 nh |= (HOST_WIDE_INT) 1 << (pow2 - HOST_BITS_PER_WIDE_INT);
3336 else
3337 nl |= (unsigned HOST_WIDE_INT) 1 << pow2;
3338 div_and_round_double (TRUNC_DIV_EXPR, 1, nl, nh, d, (HOST_WIDE_INT) 0,
3339 &mhigh_lo, &mhigh_hi, &dummy1, &dummy2);
3340
3341 gcc_assert (!mhigh_hi || nh - d < d);
3342 gcc_assert (mhigh_hi <= 1 && mlow_hi <= 1);
3343 /* Assert that mlow < mhigh. */
3344 gcc_assert (mlow_hi < mhigh_hi
3345 || (mlow_hi == mhigh_hi && mlow_lo < mhigh_lo));
3346
3347 /* If precision == N, then mlow, mhigh exceed 2^N
3348 (but they do not exceed 2^(N+1)). */
3349
3350 /* Reduce to lowest terms. */
3351 for (post_shift = lgup; post_shift > 0; post_shift--)
3352 {
3353 unsigned HOST_WIDE_INT ml_lo = (mlow_hi << (HOST_BITS_PER_WIDE_INT - 1)) | (mlow_lo >> 1);
3354 unsigned HOST_WIDE_INT mh_lo = (mhigh_hi << (HOST_BITS_PER_WIDE_INT - 1)) | (mhigh_lo >> 1);
3355 if (ml_lo >= mh_lo)
3356 break;
3357
3358 mlow_hi = 0;
3359 mlow_lo = ml_lo;
3360 mhigh_hi = 0;
3361 mhigh_lo = mh_lo;
3362 }
3363
3364 *post_shift_ptr = post_shift;
3365 *lgup_ptr = lgup;
3366 if (n < HOST_BITS_PER_WIDE_INT)
3367 {
3368 unsigned HOST_WIDE_INT mask = ((unsigned HOST_WIDE_INT) 1 << n) - 1;
3369 *multiplier_ptr = mhigh_lo & mask;
3370 return mhigh_lo >= mask;
3371 }
3372 else
3373 {
3374 *multiplier_ptr = mhigh_lo;
3375 return mhigh_hi;
3376 }
3377 }
3378
3379 /* Compute the inverse of X mod 2**n, i.e., find Y such that X * Y is
3380 congruent to 1 (mod 2**N). */
3381
3382 static unsigned HOST_WIDE_INT
3383 invert_mod2n (unsigned HOST_WIDE_INT x, int n)
3384 {
3385 /* Solve x*y == 1 (mod 2^n), where x is odd. Return y. */
3386
3387 /* The algorithm notes that the choice y = x satisfies
3388 x*y == 1 mod 2^3, since x is assumed odd.
3389 Each iteration doubles the number of bits of significance in y. */
3390
3391 unsigned HOST_WIDE_INT mask;
3392 unsigned HOST_WIDE_INT y = x;
3393 int nbit = 3;
3394
3395 mask = (n == HOST_BITS_PER_WIDE_INT
3396 ? ~(unsigned HOST_WIDE_INT) 0
3397 : ((unsigned HOST_WIDE_INT) 1 << n) - 1);
3398
3399 while (nbit < n)
3400 {
3401 y = y * (2 - x*y) & mask; /* Modulo 2^N */
3402 nbit *= 2;
3403 }
3404 return y;
3405 }
3406
3407 /* Emit code to adjust ADJ_OPERAND after multiplication of wrong signedness
3408 flavor of OP0 and OP1. ADJ_OPERAND is already the high half of the
3409 product OP0 x OP1. If UNSIGNEDP is nonzero, adjust the signed product
3410 to become unsigned, if UNSIGNEDP is zero, adjust the unsigned product to
3411 become signed.
3412
3413 The result is put in TARGET if that is convenient.
3414
3415 MODE is the mode of operation. */
3416
3417 rtx
3418 expand_mult_highpart_adjust (enum machine_mode mode, rtx adj_operand, rtx op0,
3419 rtx op1, rtx target, int unsignedp)
3420 {
3421 rtx tem;
3422 enum rtx_code adj_code = unsignedp ? PLUS : MINUS;
3423
3424 tem = expand_shift (RSHIFT_EXPR, mode, op0,
3425 GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3426 tem = expand_and (mode, tem, op1, NULL_RTX);
3427 adj_operand
3428 = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3429 adj_operand);
3430
3431 tem = expand_shift (RSHIFT_EXPR, mode, op1,
3432 GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3433 tem = expand_and (mode, tem, op0, NULL_RTX);
3434 target = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3435 target);
3436
3437 return target;
3438 }
3439
3440 /* Subroutine of expand_mult_highpart. Return the MODE high part of OP. */
3441
3442 static rtx
3443 extract_high_half (enum machine_mode mode, rtx op)
3444 {
3445 enum machine_mode wider_mode;
3446
3447 if (mode == word_mode)
3448 return gen_highpart (mode, op);
3449
3450 gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3451
3452 wider_mode = GET_MODE_WIDER_MODE (mode);
3453 op = expand_shift (RSHIFT_EXPR, wider_mode, op,
3454 GET_MODE_BITSIZE (mode), 0, 1);
3455 return convert_modes (mode, wider_mode, op, 0);
3456 }
3457
3458 /* Like expand_mult_highpart, but only consider using a multiplication
3459 optab. OP1 is an rtx for the constant operand. */
3460
3461 static rtx
3462 expand_mult_highpart_optab (enum machine_mode mode, rtx op0, rtx op1,
3463 rtx target, int unsignedp, int max_cost)
3464 {
3465 rtx narrow_op1 = gen_int_mode (INTVAL (op1), mode);
3466 enum machine_mode wider_mode;
3467 optab moptab;
3468 rtx tem;
3469 int size;
3470 bool speed = optimize_insn_for_speed_p ();
3471
3472 gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3473
3474 wider_mode = GET_MODE_WIDER_MODE (mode);
3475 size = GET_MODE_BITSIZE (mode);
3476
3477 /* Firstly, try using a multiplication insn that only generates the needed
3478 high part of the product, and in the sign flavor of unsignedp. */
3479 if (mul_highpart_cost[speed][mode] < max_cost)
3480 {
3481 moptab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
3482 tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3483 unsignedp, OPTAB_DIRECT);
3484 if (tem)
3485 return tem;
3486 }
3487
3488 /* Secondly, same as above, but use sign flavor opposite of unsignedp.
3489 Need to adjust the result after the multiplication. */
3490 if (size - 1 < BITS_PER_WORD
3491 && (mul_highpart_cost[speed][mode] + 2 * shift_cost[speed][mode][size-1]
3492 + 4 * add_cost[speed][mode] < max_cost))
3493 {
3494 moptab = unsignedp ? smul_highpart_optab : umul_highpart_optab;
3495 tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3496 unsignedp, OPTAB_DIRECT);
3497 if (tem)
3498 /* We used the wrong signedness. Adjust the result. */
3499 return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3500 tem, unsignedp);
3501 }
3502
3503 /* Try widening multiplication. */
3504 moptab = unsignedp ? umul_widen_optab : smul_widen_optab;
3505 if (widening_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3506 && mul_widen_cost[speed][wider_mode] < max_cost)
3507 {
3508 tem = expand_binop (wider_mode, moptab, op0, narrow_op1, 0,
3509 unsignedp, OPTAB_WIDEN);
3510 if (tem)
3511 return extract_high_half (mode, tem);
3512 }
3513
3514 /* Try widening the mode and perform a non-widening multiplication. */
3515 if (optab_handler (smul_optab, wider_mode) != CODE_FOR_nothing
3516 && size - 1 < BITS_PER_WORD
3517 && mul_cost[speed][wider_mode] + shift_cost[speed][mode][size-1] < max_cost)
3518 {
3519 rtx insns, wop0, wop1;
3520
3521 /* We need to widen the operands, for example to ensure the
3522 constant multiplier is correctly sign or zero extended.
3523 Use a sequence to clean-up any instructions emitted by
3524 the conversions if things don't work out. */
3525 start_sequence ();
3526 wop0 = convert_modes (wider_mode, mode, op0, unsignedp);
3527 wop1 = convert_modes (wider_mode, mode, op1, unsignedp);
3528 tem = expand_binop (wider_mode, smul_optab, wop0, wop1, 0,
3529 unsignedp, OPTAB_WIDEN);
3530 insns = get_insns ();
3531 end_sequence ();
3532
3533 if (tem)
3534 {
3535 emit_insn (insns);
3536 return extract_high_half (mode, tem);
3537 }
3538 }
3539
3540 /* Try widening multiplication of opposite signedness, and adjust. */
3541 moptab = unsignedp ? smul_widen_optab : umul_widen_optab;
3542 if (widening_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3543 && size - 1 < BITS_PER_WORD
3544 && (mul_widen_cost[speed][wider_mode] + 2 * shift_cost[speed][mode][size-1]
3545 + 4 * add_cost[speed][mode] < max_cost))
3546 {
3547 tem = expand_binop (wider_mode, moptab, op0, narrow_op1,
3548 NULL_RTX, ! unsignedp, OPTAB_WIDEN);
3549 if (tem != 0)
3550 {
3551 tem = extract_high_half (mode, tem);
3552 /* We used the wrong signedness. Adjust the result. */
3553 return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3554 target, unsignedp);
3555 }
3556 }
3557
3558 return 0;
3559 }
3560
3561 /* Emit code to multiply OP0 and OP1 (where OP1 is an integer constant),
3562 putting the high half of the result in TARGET if that is convenient,
3563 and return where the result is. If the operation can not be performed,
3564 0 is returned.
3565
3566 MODE is the mode of operation and result.
3567
3568 UNSIGNEDP nonzero means unsigned multiply.
3569
3570 MAX_COST is the total allowed cost for the expanded RTL. */
3571
3572 static rtx
3573 expand_mult_highpart (enum machine_mode mode, rtx op0, rtx op1,
3574 rtx target, int unsignedp, int max_cost)
3575 {
3576 enum machine_mode wider_mode = GET_MODE_WIDER_MODE (mode);
3577 unsigned HOST_WIDE_INT cnst1;
3578 int extra_cost;
3579 bool sign_adjust = false;
3580 enum mult_variant variant;
3581 struct algorithm alg;
3582 rtx tem;
3583 bool speed = optimize_insn_for_speed_p ();
3584
3585 gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3586 /* We can't support modes wider than HOST_BITS_PER_INT. */
3587 gcc_assert (HWI_COMPUTABLE_MODE_P (mode));
3588
3589 cnst1 = INTVAL (op1) & GET_MODE_MASK (mode);
3590
3591 /* We can't optimize modes wider than BITS_PER_WORD.
3592 ??? We might be able to perform double-word arithmetic if
3593 mode == word_mode, however all the cost calculations in
3594 synth_mult etc. assume single-word operations. */
3595 if (GET_MODE_BITSIZE (wider_mode) > BITS_PER_WORD)
3596 return expand_mult_highpart_optab (mode, op0, op1, target,
3597 unsignedp, max_cost);
3598
3599 extra_cost = shift_cost[speed][mode][GET_MODE_BITSIZE (mode) - 1];
3600
3601 /* Check whether we try to multiply by a negative constant. */
3602 if (!unsignedp && ((cnst1 >> (GET_MODE_BITSIZE (mode) - 1)) & 1))
3603 {
3604 sign_adjust = true;
3605 extra_cost += add_cost[speed][mode];
3606 }
3607
3608 /* See whether shift/add multiplication is cheap enough. */
3609 if (choose_mult_variant (wider_mode, cnst1, &alg, &variant,
3610 max_cost - extra_cost))
3611 {
3612 /* See whether the specialized multiplication optabs are
3613 cheaper than the shift/add version. */
3614 tem = expand_mult_highpart_optab (mode, op0, op1, target, unsignedp,
3615 alg.cost.cost + extra_cost);
3616 if (tem)
3617 return tem;
3618
3619 tem = convert_to_mode (wider_mode, op0, unsignedp);
3620 tem = expand_mult_const (wider_mode, tem, cnst1, 0, &alg, variant);
3621 tem = extract_high_half (mode, tem);
3622
3623 /* Adjust result for signedness. */
3624 if (sign_adjust)
3625 tem = force_operand (gen_rtx_MINUS (mode, tem, op0), tem);
3626
3627 return tem;
3628 }
3629 return expand_mult_highpart_optab (mode, op0, op1, target,
3630 unsignedp, max_cost);
3631 }
3632
3633
3634 /* Expand signed modulus of OP0 by a power of two D in mode MODE. */
3635
3636 static rtx
3637 expand_smod_pow2 (enum machine_mode mode, rtx op0, HOST_WIDE_INT d)
3638 {
3639 unsigned HOST_WIDE_INT masklow, maskhigh;
3640 rtx result, temp, shift, label;
3641 int logd;
3642
3643 logd = floor_log2 (d);
3644 result = gen_reg_rtx (mode);
3645
3646 /* Avoid conditional branches when they're expensive. */
3647 if (BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2
3648 && optimize_insn_for_speed_p ())
3649 {
3650 rtx signmask = emit_store_flag (result, LT, op0, const0_rtx,
3651 mode, 0, -1);
3652 if (signmask)
3653 {
3654 signmask = force_reg (mode, signmask);
3655 masklow = ((HOST_WIDE_INT) 1 << logd) - 1;
3656 shift = GEN_INT (GET_MODE_BITSIZE (mode) - logd);
3657
3658 /* Use the rtx_cost of a LSHIFTRT instruction to determine
3659 which instruction sequence to use. If logical right shifts
3660 are expensive the use 2 XORs, 2 SUBs and an AND, otherwise
3661 use a LSHIFTRT, 1 ADD, 1 SUB and an AND. */
3662
3663 temp = gen_rtx_LSHIFTRT (mode, result, shift);
3664 if (optab_handler (lshr_optab, mode) == CODE_FOR_nothing
3665 || (set_src_cost (temp, optimize_insn_for_speed_p ())
3666 > COSTS_N_INSNS (2)))
3667 {
3668 temp = expand_binop (mode, xor_optab, op0, signmask,
3669 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3670 temp = expand_binop (mode, sub_optab, temp, signmask,
3671 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3672 temp = expand_binop (mode, and_optab, temp, GEN_INT (masklow),
3673 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3674 temp = expand_binop (mode, xor_optab, temp, signmask,
3675 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3676 temp = expand_binop (mode, sub_optab, temp, signmask,
3677 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3678 }
3679 else
3680 {
3681 signmask = expand_binop (mode, lshr_optab, signmask, shift,
3682 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3683 signmask = force_reg (mode, signmask);
3684
3685 temp = expand_binop (mode, add_optab, op0, signmask,
3686 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3687 temp = expand_binop (mode, and_optab, temp, GEN_INT (masklow),
3688 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3689 temp = expand_binop (mode, sub_optab, temp, signmask,
3690 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3691 }
3692 return temp;
3693 }
3694 }
3695
3696 /* Mask contains the mode's signbit and the significant bits of the
3697 modulus. By including the signbit in the operation, many targets
3698 can avoid an explicit compare operation in the following comparison
3699 against zero. */
3700
3701 masklow = ((HOST_WIDE_INT) 1 << logd) - 1;
3702 if (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
3703 {
3704 masklow |= (HOST_WIDE_INT) -1 << (GET_MODE_BITSIZE (mode) - 1);
3705 maskhigh = -1;
3706 }
3707 else
3708 maskhigh = (HOST_WIDE_INT) -1
3709 << (GET_MODE_BITSIZE (mode) - HOST_BITS_PER_WIDE_INT - 1);
3710
3711 temp = expand_binop (mode, and_optab, op0,
3712 immed_double_const (masklow, maskhigh, mode),
3713 result, 1, OPTAB_LIB_WIDEN);
3714 if (temp != result)
3715 emit_move_insn (result, temp);
3716
3717 label = gen_label_rtx ();
3718 do_cmp_and_jump (result, const0_rtx, GE, mode, label);
3719
3720 temp = expand_binop (mode, sub_optab, result, const1_rtx, result,
3721 0, OPTAB_LIB_WIDEN);
3722 masklow = (HOST_WIDE_INT) -1 << logd;
3723 maskhigh = -1;
3724 temp = expand_binop (mode, ior_optab, temp,
3725 immed_double_const (masklow, maskhigh, mode),
3726 result, 1, OPTAB_LIB_WIDEN);
3727 temp = expand_binop (mode, add_optab, temp, const1_rtx, result,
3728 0, OPTAB_LIB_WIDEN);
3729 if (temp != result)
3730 emit_move_insn (result, temp);
3731 emit_label (label);
3732 return result;
3733 }
3734
3735 /* Expand signed division of OP0 by a power of two D in mode MODE.
3736 This routine is only called for positive values of D. */
3737
3738 static rtx
3739 expand_sdiv_pow2 (enum machine_mode mode, rtx op0, HOST_WIDE_INT d)
3740 {
3741 rtx temp, label;
3742 int logd;
3743
3744 logd = floor_log2 (d);
3745
3746 if (d == 2
3747 && BRANCH_COST (optimize_insn_for_speed_p (),
3748 false) >= 1)
3749 {
3750 temp = gen_reg_rtx (mode);
3751 temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, 1);
3752 temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3753 0, OPTAB_LIB_WIDEN);
3754 return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3755 }
3756
3757 #ifdef HAVE_conditional_move
3758 if (BRANCH_COST (optimize_insn_for_speed_p (), false)
3759 >= 2)
3760 {
3761 rtx temp2;
3762
3763 /* ??? emit_conditional_move forces a stack adjustment via
3764 compare_from_rtx so, if the sequence is discarded, it will
3765 be lost. Do it now instead. */
3766 do_pending_stack_adjust ();
3767
3768 start_sequence ();
3769 temp2 = copy_to_mode_reg (mode, op0);
3770 temp = expand_binop (mode, add_optab, temp2, GEN_INT (d-1),
3771 NULL_RTX, 0, OPTAB_LIB_WIDEN);
3772 temp = force_reg (mode, temp);
3773
3774 /* Construct "temp2 = (temp2 < 0) ? temp : temp2". */
3775 temp2 = emit_conditional_move (temp2, LT, temp2, const0_rtx,
3776 mode, temp, temp2, mode, 0);
3777 if (temp2)
3778 {
3779 rtx seq = get_insns ();
3780 end_sequence ();
3781 emit_insn (seq);
3782 return expand_shift (RSHIFT_EXPR, mode, temp2, logd, NULL_RTX, 0);
3783 }
3784 end_sequence ();
3785 }
3786 #endif
3787
3788 if (BRANCH_COST (optimize_insn_for_speed_p (),
3789 false) >= 2)
3790 {
3791 int ushift = GET_MODE_BITSIZE (mode) - logd;
3792
3793 temp = gen_reg_rtx (mode);
3794 temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, -1);
3795 if (shift_cost[optimize_insn_for_speed_p ()][mode][ushift] > COSTS_N_INSNS (1))
3796 temp = expand_binop (mode, and_optab, temp, GEN_INT (d - 1),
3797 NULL_RTX, 0, OPTAB_LIB_WIDEN);
3798 else
3799 temp = expand_shift (RSHIFT_EXPR, mode, temp,
3800 ushift, NULL_RTX, 1);
3801 temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3802 0, OPTAB_LIB_WIDEN);
3803 return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3804 }
3805
3806 label = gen_label_rtx ();
3807 temp = copy_to_mode_reg (mode, op0);
3808 do_cmp_and_jump (temp, const0_rtx, GE, mode, label);
3809 expand_inc (temp, GEN_INT (d - 1));
3810 emit_label (label);
3811 return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3812 }
3813 \f
3814 /* Emit the code to divide OP0 by OP1, putting the result in TARGET
3815 if that is convenient, and returning where the result is.
3816 You may request either the quotient or the remainder as the result;
3817 specify REM_FLAG nonzero to get the remainder.
3818
3819 CODE is the expression code for which kind of division this is;
3820 it controls how rounding is done. MODE is the machine mode to use.
3821 UNSIGNEDP nonzero means do unsigned division. */
3822
3823 /* ??? For CEIL_MOD_EXPR, can compute incorrect remainder with ANDI
3824 and then correct it by or'ing in missing high bits
3825 if result of ANDI is nonzero.
3826 For ROUND_MOD_EXPR, can use ANDI and then sign-extend the result.
3827 This could optimize to a bfexts instruction.
3828 But C doesn't use these operations, so their optimizations are
3829 left for later. */
3830 /* ??? For modulo, we don't actually need the highpart of the first product,
3831 the low part will do nicely. And for small divisors, the second multiply
3832 can also be a low-part only multiply or even be completely left out.
3833 E.g. to calculate the remainder of a division by 3 with a 32 bit
3834 multiply, multiply with 0x55555556 and extract the upper two bits;
3835 the result is exact for inputs up to 0x1fffffff.
3836 The input range can be reduced by using cross-sum rules.
3837 For odd divisors >= 3, the following table gives right shift counts
3838 so that if a number is shifted by an integer multiple of the given
3839 amount, the remainder stays the same:
3840 2, 4, 3, 6, 10, 12, 4, 8, 18, 6, 11, 20, 18, 0, 5, 10, 12, 0, 12, 20,
3841 14, 12, 23, 21, 8, 0, 20, 18, 0, 0, 6, 12, 0, 22, 0, 18, 20, 30, 0, 0,
3842 0, 8, 0, 11, 12, 10, 36, 0, 30, 0, 0, 12, 0, 0, 0, 0, 44, 12, 24, 0,
3843 20, 0, 7, 14, 0, 18, 36, 0, 0, 46, 60, 0, 42, 0, 15, 24, 20, 0, 0, 33,
3844 0, 20, 0, 0, 18, 0, 60, 0, 0, 0, 0, 0, 40, 18, 0, 0, 12
3845
3846 Cross-sum rules for even numbers can be derived by leaving as many bits
3847 to the right alone as the divisor has zeros to the right.
3848 E.g. if x is an unsigned 32 bit number:
3849 (x mod 12) == (((x & 1023) + ((x >> 8) & ~3)) * 0x15555558 >> 2 * 3) >> 28
3850 */
3851
3852 rtx
3853 expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode,
3854 rtx op0, rtx op1, rtx target, int unsignedp)
3855 {
3856 enum machine_mode compute_mode;
3857 rtx tquotient;
3858 rtx quotient = 0, remainder = 0;
3859 rtx last;
3860 int size;
3861 rtx insn;
3862 optab optab1, optab2;
3863 int op1_is_constant, op1_is_pow2 = 0;
3864 int max_cost, extra_cost;
3865 static HOST_WIDE_INT last_div_const = 0;
3866 static HOST_WIDE_INT ext_op1;
3867 bool speed = optimize_insn_for_speed_p ();
3868
3869 op1_is_constant = CONST_INT_P (op1);
3870 if (op1_is_constant)
3871 {
3872 ext_op1 = INTVAL (op1);
3873 if (unsignedp)
3874 ext_op1 &= GET_MODE_MASK (mode);
3875 op1_is_pow2 = ((EXACT_POWER_OF_2_OR_ZERO_P (ext_op1)
3876 || (! unsignedp && EXACT_POWER_OF_2_OR_ZERO_P (-ext_op1))));
3877 }
3878
3879 /*
3880 This is the structure of expand_divmod:
3881
3882 First comes code to fix up the operands so we can perform the operations
3883 correctly and efficiently.
3884
3885 Second comes a switch statement with code specific for each rounding mode.
3886 For some special operands this code emits all RTL for the desired
3887 operation, for other cases, it generates only a quotient and stores it in
3888 QUOTIENT. The case for trunc division/remainder might leave quotient = 0,
3889 to indicate that it has not done anything.
3890
3891 Last comes code that finishes the operation. If QUOTIENT is set and
3892 REM_FLAG is set, the remainder is computed as OP0 - QUOTIENT * OP1. If
3893 QUOTIENT is not set, it is computed using trunc rounding.
3894
3895 We try to generate special code for division and remainder when OP1 is a
3896 constant. If |OP1| = 2**n we can use shifts and some other fast
3897 operations. For other values of OP1, we compute a carefully selected
3898 fixed-point approximation m = 1/OP1, and generate code that multiplies OP0
3899 by m.
3900
3901 In all cases but EXACT_DIV_EXPR, this multiplication requires the upper
3902 half of the product. Different strategies for generating the product are
3903 implemented in expand_mult_highpart.
3904
3905 If what we actually want is the remainder, we generate that by another
3906 by-constant multiplication and a subtraction. */
3907
3908 /* We shouldn't be called with OP1 == const1_rtx, but some of the
3909 code below will malfunction if we are, so check here and handle
3910 the special case if so. */
3911 if (op1 == const1_rtx)
3912 return rem_flag ? const0_rtx : op0;
3913
3914 /* When dividing by -1, we could get an overflow.
3915 negv_optab can handle overflows. */
3916 if (! unsignedp && op1 == constm1_rtx)
3917 {
3918 if (rem_flag)
3919 return const0_rtx;
3920 return expand_unop (mode, flag_trapv && GET_MODE_CLASS(mode) == MODE_INT
3921 ? negv_optab : neg_optab, op0, target, 0);
3922 }
3923
3924 if (target
3925 /* Don't use the function value register as a target
3926 since we have to read it as well as write it,
3927 and function-inlining gets confused by this. */
3928 && ((REG_P (target) && REG_FUNCTION_VALUE_P (target))
3929 /* Don't clobber an operand while doing a multi-step calculation. */
3930 || ((rem_flag || op1_is_constant)
3931 && (reg_mentioned_p (target, op0)
3932 || (MEM_P (op0) && MEM_P (target))))
3933 || reg_mentioned_p (target, op1)
3934 || (MEM_P (op1) && MEM_P (target))))
3935 target = 0;
3936
3937 /* Get the mode in which to perform this computation. Normally it will
3938 be MODE, but sometimes we can't do the desired operation in MODE.
3939 If so, pick a wider mode in which we can do the operation. Convert
3940 to that mode at the start to avoid repeated conversions.
3941
3942 First see what operations we need. These depend on the expression
3943 we are evaluating. (We assume that divxx3 insns exist under the
3944 same conditions that modxx3 insns and that these insns don't normally
3945 fail. If these assumptions are not correct, we may generate less
3946 efficient code in some cases.)
3947
3948 Then see if we find a mode in which we can open-code that operation
3949 (either a division, modulus, or shift). Finally, check for the smallest
3950 mode for which we can do the operation with a library call. */
3951
3952 /* We might want to refine this now that we have division-by-constant
3953 optimization. Since expand_mult_highpart tries so many variants, it is
3954 not straightforward to generalize this. Maybe we should make an array
3955 of possible modes in init_expmed? Save this for GCC 2.7. */
3956
3957 optab1 = ((op1_is_pow2 && op1 != const0_rtx)
3958 ? (unsignedp ? lshr_optab : ashr_optab)
3959 : (unsignedp ? udiv_optab : sdiv_optab));
3960 optab2 = ((op1_is_pow2 && op1 != const0_rtx)
3961 ? optab1
3962 : (unsignedp ? udivmod_optab : sdivmod_optab));
3963
3964 for (compute_mode = mode; compute_mode != VOIDmode;
3965 compute_mode = GET_MODE_WIDER_MODE (compute_mode))
3966 if (optab_handler (optab1, compute_mode) != CODE_FOR_nothing
3967 || optab_handler (optab2, compute_mode) != CODE_FOR_nothing)
3968 break;
3969
3970 if (compute_mode == VOIDmode)
3971 for (compute_mode = mode; compute_mode != VOIDmode;
3972 compute_mode = GET_MODE_WIDER_MODE (compute_mode))
3973 if (optab_libfunc (optab1, compute_mode)
3974 || optab_libfunc (optab2, compute_mode))
3975 break;
3976
3977 /* If we still couldn't find a mode, use MODE, but expand_binop will
3978 probably die. */
3979 if (compute_mode == VOIDmode)
3980 compute_mode = mode;
3981
3982 if (target && GET_MODE (target) == compute_mode)
3983 tquotient = target;
3984 else
3985 tquotient = gen_reg_rtx (compute_mode);
3986
3987 size = GET_MODE_BITSIZE (compute_mode);
3988 #if 0
3989 /* It should be possible to restrict the precision to GET_MODE_BITSIZE
3990 (mode), and thereby get better code when OP1 is a constant. Do that
3991 later. It will require going over all usages of SIZE below. */
3992 size = GET_MODE_BITSIZE (mode);
3993 #endif
3994
3995 /* Only deduct something for a REM if the last divide done was
3996 for a different constant. Then set the constant of the last
3997 divide. */
3998 max_cost = unsignedp ? udiv_cost[speed][compute_mode] : sdiv_cost[speed][compute_mode];
3999 if (rem_flag && ! (last_div_const != 0 && op1_is_constant
4000 && INTVAL (op1) == last_div_const))
4001 max_cost -= mul_cost[speed][compute_mode] + add_cost[speed][compute_mode];
4002
4003 last_div_const = ! rem_flag && op1_is_constant ? INTVAL (op1) : 0;
4004
4005 /* Now convert to the best mode to use. */
4006 if (compute_mode != mode)
4007 {
4008 op0 = convert_modes (compute_mode, mode, op0, unsignedp);
4009 op1 = convert_modes (compute_mode, mode, op1, unsignedp);
4010
4011 /* convert_modes may have placed op1 into a register, so we
4012 must recompute the following. */
4013 op1_is_constant = CONST_INT_P (op1);
4014 op1_is_pow2 = (op1_is_constant
4015 && ((EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4016 || (! unsignedp
4017 && EXACT_POWER_OF_2_OR_ZERO_P (-INTVAL (op1)))))) ;
4018 }
4019
4020 /* If one of the operands is a volatile MEM, copy it into a register. */
4021
4022 if (MEM_P (op0) && MEM_VOLATILE_P (op0))
4023 op0 = force_reg (compute_mode, op0);
4024 if (MEM_P (op1) && MEM_VOLATILE_P (op1))
4025 op1 = force_reg (compute_mode, op1);
4026
4027 /* If we need the remainder or if OP1 is constant, we need to
4028 put OP0 in a register in case it has any queued subexpressions. */
4029 if (rem_flag || op1_is_constant)
4030 op0 = force_reg (compute_mode, op0);
4031
4032 last = get_last_insn ();
4033
4034 /* Promote floor rounding to trunc rounding for unsigned operations. */
4035 if (unsignedp)
4036 {
4037 if (code == FLOOR_DIV_EXPR)
4038 code = TRUNC_DIV_EXPR;
4039 if (code == FLOOR_MOD_EXPR)
4040 code = TRUNC_MOD_EXPR;
4041 if (code == EXACT_DIV_EXPR && op1_is_pow2)
4042 code = TRUNC_DIV_EXPR;
4043 }
4044
4045 if (op1 != const0_rtx)
4046 switch (code)
4047 {
4048 case TRUNC_MOD_EXPR:
4049 case TRUNC_DIV_EXPR:
4050 if (op1_is_constant)
4051 {
4052 if (unsignedp)
4053 {
4054 unsigned HOST_WIDE_INT mh, ml;
4055 int pre_shift, post_shift;
4056 int dummy;
4057 unsigned HOST_WIDE_INT d = (INTVAL (op1)
4058 & GET_MODE_MASK (compute_mode));
4059
4060 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4061 {
4062 pre_shift = floor_log2 (d);
4063 if (rem_flag)
4064 {
4065 remainder
4066 = expand_binop (compute_mode, and_optab, op0,
4067 GEN_INT (((HOST_WIDE_INT) 1 << pre_shift) - 1),
4068 remainder, 1,
4069 OPTAB_LIB_WIDEN);
4070 if (remainder)
4071 return gen_lowpart (mode, remainder);
4072 }
4073 quotient = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4074 pre_shift, tquotient, 1);
4075 }
4076 else if (size <= HOST_BITS_PER_WIDE_INT)
4077 {
4078 if (d >= ((unsigned HOST_WIDE_INT) 1 << (size - 1)))
4079 {
4080 /* Most significant bit of divisor is set; emit an scc
4081 insn. */
4082 quotient = emit_store_flag_force (tquotient, GEU, op0, op1,
4083 compute_mode, 1, 1);
4084 }
4085 else
4086 {
4087 /* Find a suitable multiplier and right shift count
4088 instead of multiplying with D. */
4089
4090 mh = choose_multiplier (d, size, size,
4091 &ml, &post_shift, &dummy);
4092
4093 /* If the suggested multiplier is more than SIZE bits,
4094 we can do better for even divisors, using an
4095 initial right shift. */
4096 if (mh != 0 && (d & 1) == 0)
4097 {
4098 pre_shift = floor_log2 (d & -d);
4099 mh = choose_multiplier (d >> pre_shift, size,
4100 size - pre_shift,
4101 &ml, &post_shift, &dummy);
4102 gcc_assert (!mh);
4103 }
4104 else
4105 pre_shift = 0;
4106
4107 if (mh != 0)
4108 {
4109 rtx t1, t2, t3, t4;
4110
4111 if (post_shift - 1 >= BITS_PER_WORD)
4112 goto fail1;
4113
4114 extra_cost
4115 = (shift_cost[speed][compute_mode][post_shift - 1]
4116 + shift_cost[speed][compute_mode][1]
4117 + 2 * add_cost[speed][compute_mode]);
4118 t1 = expand_mult_highpart (compute_mode, op0,
4119 GEN_INT (ml),
4120 NULL_RTX, 1,
4121 max_cost - extra_cost);
4122 if (t1 == 0)
4123 goto fail1;
4124 t2 = force_operand (gen_rtx_MINUS (compute_mode,
4125 op0, t1),
4126 NULL_RTX);
4127 t3 = expand_shift (RSHIFT_EXPR, compute_mode,
4128 t2, 1, NULL_RTX, 1);
4129 t4 = force_operand (gen_rtx_PLUS (compute_mode,
4130 t1, t3),
4131 NULL_RTX);
4132 quotient = expand_shift
4133 (RSHIFT_EXPR, compute_mode, t4,
4134 post_shift - 1, tquotient, 1);
4135 }
4136 else
4137 {
4138 rtx t1, t2;
4139
4140 if (pre_shift >= BITS_PER_WORD
4141 || post_shift >= BITS_PER_WORD)
4142 goto fail1;
4143
4144 t1 = expand_shift
4145 (RSHIFT_EXPR, compute_mode, op0,
4146 pre_shift, NULL_RTX, 1);
4147 extra_cost
4148 = (shift_cost[speed][compute_mode][pre_shift]
4149 + shift_cost[speed][compute_mode][post_shift]);
4150 t2 = expand_mult_highpart (compute_mode, t1,
4151 GEN_INT (ml),
4152 NULL_RTX, 1,
4153 max_cost - extra_cost);
4154 if (t2 == 0)
4155 goto fail1;
4156 quotient = expand_shift
4157 (RSHIFT_EXPR, compute_mode, t2,
4158 post_shift, tquotient, 1);
4159 }
4160 }
4161 }
4162 else /* Too wide mode to use tricky code */
4163 break;
4164
4165 insn = get_last_insn ();
4166 if (insn != last)
4167 set_dst_reg_note (insn, REG_EQUAL,
4168 gen_rtx_UDIV (compute_mode, op0, op1),
4169 quotient);
4170 }
4171 else /* TRUNC_DIV, signed */
4172 {
4173 unsigned HOST_WIDE_INT ml;
4174 int lgup, post_shift;
4175 rtx mlr;
4176 HOST_WIDE_INT d = INTVAL (op1);
4177 unsigned HOST_WIDE_INT abs_d;
4178
4179 /* Since d might be INT_MIN, we have to cast to
4180 unsigned HOST_WIDE_INT before negating to avoid
4181 undefined signed overflow. */
4182 abs_d = (d >= 0
4183 ? (unsigned HOST_WIDE_INT) d
4184 : - (unsigned HOST_WIDE_INT) d);
4185
4186 /* n rem d = n rem -d */
4187 if (rem_flag && d < 0)
4188 {
4189 d = abs_d;
4190 op1 = gen_int_mode (abs_d, compute_mode);
4191 }
4192
4193 if (d == 1)
4194 quotient = op0;
4195 else if (d == -1)
4196 quotient = expand_unop (compute_mode, neg_optab, op0,
4197 tquotient, 0);
4198 else if (HOST_BITS_PER_WIDE_INT >= size
4199 && abs_d == (unsigned HOST_WIDE_INT) 1 << (size - 1))
4200 {
4201 /* This case is not handled correctly below. */
4202 quotient = emit_store_flag (tquotient, EQ, op0, op1,
4203 compute_mode, 1, 1);
4204 if (quotient == 0)
4205 goto fail1;
4206 }
4207 else if (EXACT_POWER_OF_2_OR_ZERO_P (d)
4208 && (rem_flag ? smod_pow2_cheap[speed][compute_mode]
4209 : sdiv_pow2_cheap[speed][compute_mode])
4210 /* We assume that cheap metric is true if the
4211 optab has an expander for this mode. */
4212 && ((optab_handler ((rem_flag ? smod_optab
4213 : sdiv_optab),
4214 compute_mode)
4215 != CODE_FOR_nothing)
4216 || (optab_handler (sdivmod_optab,
4217 compute_mode)
4218 != CODE_FOR_nothing)))
4219 ;
4220 else if (EXACT_POWER_OF_2_OR_ZERO_P (abs_d))
4221 {
4222 if (rem_flag)
4223 {
4224 remainder = expand_smod_pow2 (compute_mode, op0, d);
4225 if (remainder)
4226 return gen_lowpart (mode, remainder);
4227 }
4228
4229 if (sdiv_pow2_cheap[speed][compute_mode]
4230 && ((optab_handler (sdiv_optab, compute_mode)
4231 != CODE_FOR_nothing)
4232 || (optab_handler (sdivmod_optab, compute_mode)
4233 != CODE_FOR_nothing)))
4234 quotient = expand_divmod (0, TRUNC_DIV_EXPR,
4235 compute_mode, op0,
4236 gen_int_mode (abs_d,
4237 compute_mode),
4238 NULL_RTX, 0);
4239 else
4240 quotient = expand_sdiv_pow2 (compute_mode, op0, abs_d);
4241
4242 /* We have computed OP0 / abs(OP1). If OP1 is negative,
4243 negate the quotient. */
4244 if (d < 0)
4245 {
4246 insn = get_last_insn ();
4247 if (insn != last
4248 && abs_d < ((unsigned HOST_WIDE_INT) 1
4249 << (HOST_BITS_PER_WIDE_INT - 1)))
4250 set_dst_reg_note (insn, REG_EQUAL,
4251 gen_rtx_DIV (compute_mode, op0,
4252 gen_int_mode
4253 (abs_d,
4254 compute_mode)),
4255 quotient);
4256
4257 quotient = expand_unop (compute_mode, neg_optab,
4258 quotient, quotient, 0);
4259 }
4260 }
4261 else if (size <= HOST_BITS_PER_WIDE_INT)
4262 {
4263 choose_multiplier (abs_d, size, size - 1,
4264 &ml, &post_shift, &lgup);
4265 if (ml < (unsigned HOST_WIDE_INT) 1 << (size - 1))
4266 {
4267 rtx t1, t2, t3;
4268
4269 if (post_shift >= BITS_PER_WORD
4270 || size - 1 >= BITS_PER_WORD)
4271 goto fail1;
4272
4273 extra_cost = (shift_cost[speed][compute_mode][post_shift]
4274 + shift_cost[speed][compute_mode][size - 1]
4275 + add_cost[speed][compute_mode]);
4276 t1 = expand_mult_highpart (compute_mode, op0,
4277 GEN_INT (ml), NULL_RTX, 0,
4278 max_cost - extra_cost);
4279 if (t1 == 0)
4280 goto fail1;
4281 t2 = expand_shift
4282 (RSHIFT_EXPR, compute_mode, t1,
4283 post_shift, NULL_RTX, 0);
4284 t3 = expand_shift
4285 (RSHIFT_EXPR, compute_mode, op0,
4286 size - 1, NULL_RTX, 0);
4287 if (d < 0)
4288 quotient
4289 = force_operand (gen_rtx_MINUS (compute_mode,
4290 t3, t2),
4291 tquotient);
4292 else
4293 quotient
4294 = force_operand (gen_rtx_MINUS (compute_mode,
4295 t2, t3),
4296 tquotient);
4297 }
4298 else
4299 {
4300 rtx t1, t2, t3, t4;
4301
4302 if (post_shift >= BITS_PER_WORD
4303 || size - 1 >= BITS_PER_WORD)
4304 goto fail1;
4305
4306 ml |= (~(unsigned HOST_WIDE_INT) 0) << (size - 1);
4307 mlr = gen_int_mode (ml, compute_mode);
4308 extra_cost = (shift_cost[speed][compute_mode][post_shift]
4309 + shift_cost[speed][compute_mode][size - 1]
4310 + 2 * add_cost[speed][compute_mode]);
4311 t1 = expand_mult_highpart (compute_mode, op0, mlr,
4312 NULL_RTX, 0,
4313 max_cost - extra_cost);
4314 if (t1 == 0)
4315 goto fail1;
4316 t2 = force_operand (gen_rtx_PLUS (compute_mode,
4317 t1, op0),
4318 NULL_RTX);
4319 t3 = expand_shift
4320 (RSHIFT_EXPR, compute_mode, t2,
4321 post_shift, NULL_RTX, 0);
4322 t4 = expand_shift
4323 (RSHIFT_EXPR, compute_mode, op0,
4324 size - 1, NULL_RTX, 0);
4325 if (d < 0)
4326 quotient
4327 = force_operand (gen_rtx_MINUS (compute_mode,
4328 t4, t3),
4329 tquotient);
4330 else
4331 quotient
4332 = force_operand (gen_rtx_MINUS (compute_mode,
4333 t3, t4),
4334 tquotient);
4335 }
4336 }
4337 else /* Too wide mode to use tricky code */
4338 break;
4339
4340 insn = get_last_insn ();
4341 if (insn != last)
4342 set_dst_reg_note (insn, REG_EQUAL,
4343 gen_rtx_DIV (compute_mode, op0, op1),
4344 quotient);
4345 }
4346 break;
4347 }
4348 fail1:
4349 delete_insns_since (last);
4350 break;
4351
4352 case FLOOR_DIV_EXPR:
4353 case FLOOR_MOD_EXPR:
4354 /* We will come here only for signed operations. */
4355 if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4356 {
4357 unsigned HOST_WIDE_INT mh, ml;
4358 int pre_shift, lgup, post_shift;
4359 HOST_WIDE_INT d = INTVAL (op1);
4360
4361 if (d > 0)
4362 {
4363 /* We could just as easily deal with negative constants here,
4364 but it does not seem worth the trouble for GCC 2.6. */
4365 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4366 {
4367 pre_shift = floor_log2 (d);
4368 if (rem_flag)
4369 {
4370 remainder = expand_binop (compute_mode, and_optab, op0,
4371 GEN_INT (((HOST_WIDE_INT) 1 << pre_shift) - 1),
4372 remainder, 0, OPTAB_LIB_WIDEN);
4373 if (remainder)
4374 return gen_lowpart (mode, remainder);
4375 }
4376 quotient = expand_shift
4377 (RSHIFT_EXPR, compute_mode, op0,
4378 pre_shift, tquotient, 0);
4379 }
4380 else
4381 {
4382 rtx t1, t2, t3, t4;
4383
4384 mh = choose_multiplier (d, size, size - 1,
4385 &ml, &post_shift, &lgup);
4386 gcc_assert (!mh);
4387
4388 if (post_shift < BITS_PER_WORD
4389 && size - 1 < BITS_PER_WORD)
4390 {
4391 t1 = expand_shift
4392 (RSHIFT_EXPR, compute_mode, op0,
4393 size - 1, NULL_RTX, 0);
4394 t2 = expand_binop (compute_mode, xor_optab, op0, t1,
4395 NULL_RTX, 0, OPTAB_WIDEN);
4396 extra_cost = (shift_cost[speed][compute_mode][post_shift]
4397 + shift_cost[speed][compute_mode][size - 1]
4398 + 2 * add_cost[speed][compute_mode]);
4399 t3 = expand_mult_highpart (compute_mode, t2,
4400 GEN_INT (ml), NULL_RTX, 1,
4401 max_cost - extra_cost);
4402 if (t3 != 0)
4403 {
4404 t4 = expand_shift
4405 (RSHIFT_EXPR, compute_mode, t3,
4406 post_shift, NULL_RTX, 1);
4407 quotient = expand_binop (compute_mode, xor_optab,
4408 t4, t1, tquotient, 0,
4409 OPTAB_WIDEN);
4410 }
4411 }
4412 }
4413 }
4414 else
4415 {
4416 rtx nsign, t1, t2, t3, t4;
4417 t1 = force_operand (gen_rtx_PLUS (compute_mode,
4418 op0, constm1_rtx), NULL_RTX);
4419 t2 = expand_binop (compute_mode, ior_optab, op0, t1, NULL_RTX,
4420 0, OPTAB_WIDEN);
4421 nsign = expand_shift
4422 (RSHIFT_EXPR, compute_mode, t2,
4423 size - 1, NULL_RTX, 0);
4424 t3 = force_operand (gen_rtx_MINUS (compute_mode, t1, nsign),
4425 NULL_RTX);
4426 t4 = expand_divmod (0, TRUNC_DIV_EXPR, compute_mode, t3, op1,
4427 NULL_RTX, 0);
4428 if (t4)
4429 {
4430 rtx t5;
4431 t5 = expand_unop (compute_mode, one_cmpl_optab, nsign,
4432 NULL_RTX, 0);
4433 quotient = force_operand (gen_rtx_PLUS (compute_mode,
4434 t4, t5),
4435 tquotient);
4436 }
4437 }
4438 }
4439
4440 if (quotient != 0)
4441 break;
4442 delete_insns_since (last);
4443
4444 /* Try using an instruction that produces both the quotient and
4445 remainder, using truncation. We can easily compensate the quotient
4446 or remainder to get floor rounding, once we have the remainder.
4447 Notice that we compute also the final remainder value here,
4448 and return the result right away. */
4449 if (target == 0 || GET_MODE (target) != compute_mode)
4450 target = gen_reg_rtx (compute_mode);
4451
4452 if (rem_flag)
4453 {
4454 remainder
4455 = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4456 quotient = gen_reg_rtx (compute_mode);
4457 }
4458 else
4459 {
4460 quotient
4461 = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4462 remainder = gen_reg_rtx (compute_mode);
4463 }
4464
4465 if (expand_twoval_binop (sdivmod_optab, op0, op1,
4466 quotient, remainder, 0))
4467 {
4468 /* This could be computed with a branch-less sequence.
4469 Save that for later. */
4470 rtx tem;
4471 rtx label = gen_label_rtx ();
4472 do_cmp_and_jump (remainder, const0_rtx, EQ, compute_mode, label);
4473 tem = expand_binop (compute_mode, xor_optab, op0, op1,
4474 NULL_RTX, 0, OPTAB_WIDEN);
4475 do_cmp_and_jump (tem, const0_rtx, GE, compute_mode, label);
4476 expand_dec (quotient, const1_rtx);
4477 expand_inc (remainder, op1);
4478 emit_label (label);
4479 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4480 }
4481
4482 /* No luck with division elimination or divmod. Have to do it
4483 by conditionally adjusting op0 *and* the result. */
4484 {
4485 rtx label1, label2, label3, label4, label5;
4486 rtx adjusted_op0;
4487 rtx tem;
4488
4489 quotient = gen_reg_rtx (compute_mode);
4490 adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4491 label1 = gen_label_rtx ();
4492 label2 = gen_label_rtx ();
4493 label3 = gen_label_rtx ();
4494 label4 = gen_label_rtx ();
4495 label5 = gen_label_rtx ();
4496 do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4497 do_cmp_and_jump (adjusted_op0, const0_rtx, LT, compute_mode, label1);
4498 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4499 quotient, 0, OPTAB_LIB_WIDEN);
4500 if (tem != quotient)
4501 emit_move_insn (quotient, tem);
4502 emit_jump_insn (gen_jump (label5));
4503 emit_barrier ();
4504 emit_label (label1);
4505 expand_inc (adjusted_op0, const1_rtx);
4506 emit_jump_insn (gen_jump (label4));
4507 emit_barrier ();
4508 emit_label (label2);
4509 do_cmp_and_jump (adjusted_op0, const0_rtx, GT, compute_mode, label3);
4510 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4511 quotient, 0, OPTAB_LIB_WIDEN);
4512 if (tem != quotient)
4513 emit_move_insn (quotient, tem);
4514 emit_jump_insn (gen_jump (label5));
4515 emit_barrier ();
4516 emit_label (label3);
4517 expand_dec (adjusted_op0, const1_rtx);
4518 emit_label (label4);
4519 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4520 quotient, 0, OPTAB_LIB_WIDEN);
4521 if (tem != quotient)
4522 emit_move_insn (quotient, tem);
4523 expand_dec (quotient, const1_rtx);
4524 emit_label (label5);
4525 }
4526 break;
4527
4528 case CEIL_DIV_EXPR:
4529 case CEIL_MOD_EXPR:
4530 if (unsignedp)
4531 {
4532 if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1)))
4533 {
4534 rtx t1, t2, t3;
4535 unsigned HOST_WIDE_INT d = INTVAL (op1);
4536 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4537 floor_log2 (d), tquotient, 1);
4538 t2 = expand_binop (compute_mode, and_optab, op0,
4539 GEN_INT (d - 1),
4540 NULL_RTX, 1, OPTAB_LIB_WIDEN);
4541 t3 = gen_reg_rtx (compute_mode);
4542 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4543 compute_mode, 1, 1);
4544 if (t3 == 0)
4545 {
4546 rtx lab;
4547 lab = gen_label_rtx ();
4548 do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4549 expand_inc (t1, const1_rtx);
4550 emit_label (lab);
4551 quotient = t1;
4552 }
4553 else
4554 quotient = force_operand (gen_rtx_PLUS (compute_mode,
4555 t1, t3),
4556 tquotient);
4557 break;
4558 }
4559
4560 /* Try using an instruction that produces both the quotient and
4561 remainder, using truncation. We can easily compensate the
4562 quotient or remainder to get ceiling rounding, once we have the
4563 remainder. Notice that we compute also the final remainder
4564 value here, and return the result right away. */
4565 if (target == 0 || GET_MODE (target) != compute_mode)
4566 target = gen_reg_rtx (compute_mode);
4567
4568 if (rem_flag)
4569 {
4570 remainder = (REG_P (target)
4571 ? target : gen_reg_rtx (compute_mode));
4572 quotient = gen_reg_rtx (compute_mode);
4573 }
4574 else
4575 {
4576 quotient = (REG_P (target)
4577 ? target : gen_reg_rtx (compute_mode));
4578 remainder = gen_reg_rtx (compute_mode);
4579 }
4580
4581 if (expand_twoval_binop (udivmod_optab, op0, op1, quotient,
4582 remainder, 1))
4583 {
4584 /* This could be computed with a branch-less sequence.
4585 Save that for later. */
4586 rtx label = gen_label_rtx ();
4587 do_cmp_and_jump (remainder, const0_rtx, EQ,
4588 compute_mode, label);
4589 expand_inc (quotient, const1_rtx);
4590 expand_dec (remainder, op1);
4591 emit_label (label);
4592 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4593 }
4594
4595 /* No luck with division elimination or divmod. Have to do it
4596 by conditionally adjusting op0 *and* the result. */
4597 {
4598 rtx label1, label2;
4599 rtx adjusted_op0, tem;
4600
4601 quotient = gen_reg_rtx (compute_mode);
4602 adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4603 label1 = gen_label_rtx ();
4604 label2 = gen_label_rtx ();
4605 do_cmp_and_jump (adjusted_op0, const0_rtx, NE,
4606 compute_mode, label1);
4607 emit_move_insn (quotient, const0_rtx);
4608 emit_jump_insn (gen_jump (label2));
4609 emit_barrier ();
4610 emit_label (label1);
4611 expand_dec (adjusted_op0, const1_rtx);
4612 tem = expand_binop (compute_mode, udiv_optab, adjusted_op0, op1,
4613 quotient, 1, OPTAB_LIB_WIDEN);
4614 if (tem != quotient)
4615 emit_move_insn (quotient, tem);
4616 expand_inc (quotient, const1_rtx);
4617 emit_label (label2);
4618 }
4619 }
4620 else /* signed */
4621 {
4622 if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4623 && INTVAL (op1) >= 0)
4624 {
4625 /* This is extremely similar to the code for the unsigned case
4626 above. For 2.7 we should merge these variants, but for
4627 2.6.1 I don't want to touch the code for unsigned since that
4628 get used in C. The signed case will only be used by other
4629 languages (Ada). */
4630
4631 rtx t1, t2, t3;
4632 unsigned HOST_WIDE_INT d = INTVAL (op1);
4633 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4634 floor_log2 (d), tquotient, 0);
4635 t2 = expand_binop (compute_mode, and_optab, op0,
4636 GEN_INT (d - 1),
4637 NULL_RTX, 1, OPTAB_LIB_WIDEN);
4638 t3 = gen_reg_rtx (compute_mode);
4639 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4640 compute_mode, 1, 1);
4641 if (t3 == 0)
4642 {
4643 rtx lab;
4644 lab = gen_label_rtx ();
4645 do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4646 expand_inc (t1, const1_rtx);
4647 emit_label (lab);
4648 quotient = t1;
4649 }
4650 else
4651 quotient = force_operand (gen_rtx_PLUS (compute_mode,
4652 t1, t3),
4653 tquotient);
4654 break;
4655 }
4656
4657 /* Try using an instruction that produces both the quotient and
4658 remainder, using truncation. We can easily compensate the
4659 quotient or remainder to get ceiling rounding, once we have the
4660 remainder. Notice that we compute also the final remainder
4661 value here, and return the result right away. */
4662 if (target == 0 || GET_MODE (target) != compute_mode)
4663 target = gen_reg_rtx (compute_mode);
4664 if (rem_flag)
4665 {
4666 remainder= (REG_P (target)
4667 ? target : gen_reg_rtx (compute_mode));
4668 quotient = gen_reg_rtx (compute_mode);
4669 }
4670 else
4671 {
4672 quotient = (REG_P (target)
4673 ? target : gen_reg_rtx (compute_mode));
4674 remainder = gen_reg_rtx (compute_mode);
4675 }
4676
4677 if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient,
4678 remainder, 0))
4679 {
4680 /* This could be computed with a branch-less sequence.
4681 Save that for later. */
4682 rtx tem;
4683 rtx label = gen_label_rtx ();
4684 do_cmp_and_jump (remainder, const0_rtx, EQ,
4685 compute_mode, label);
4686 tem = expand_binop (compute_mode, xor_optab, op0, op1,
4687 NULL_RTX, 0, OPTAB_WIDEN);
4688 do_cmp_and_jump (tem, const0_rtx, LT, compute_mode, label);
4689 expand_inc (quotient, const1_rtx);
4690 expand_dec (remainder, op1);
4691 emit_label (label);
4692 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4693 }
4694
4695 /* No luck with division elimination or divmod. Have to do it
4696 by conditionally adjusting op0 *and* the result. */
4697 {
4698 rtx label1, label2, label3, label4, label5;
4699 rtx adjusted_op0;
4700 rtx tem;
4701
4702 quotient = gen_reg_rtx (compute_mode);
4703 adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4704 label1 = gen_label_rtx ();
4705 label2 = gen_label_rtx ();
4706 label3 = gen_label_rtx ();
4707 label4 = gen_label_rtx ();
4708 label5 = gen_label_rtx ();
4709 do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4710 do_cmp_and_jump (adjusted_op0, const0_rtx, GT,
4711 compute_mode, label1);
4712 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4713 quotient, 0, OPTAB_LIB_WIDEN);
4714 if (tem != quotient)
4715 emit_move_insn (quotient, tem);
4716 emit_jump_insn (gen_jump (label5));
4717 emit_barrier ();
4718 emit_label (label1);
4719 expand_dec (adjusted_op0, const1_rtx);
4720 emit_jump_insn (gen_jump (label4));
4721 emit_barrier ();
4722 emit_label (label2);
4723 do_cmp_and_jump (adjusted_op0, const0_rtx, LT,
4724 compute_mode, label3);
4725 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4726 quotient, 0, OPTAB_LIB_WIDEN);
4727 if (tem != quotient)
4728 emit_move_insn (quotient, tem);
4729 emit_jump_insn (gen_jump (label5));
4730 emit_barrier ();
4731 emit_label (label3);
4732 expand_inc (adjusted_op0, const1_rtx);
4733 emit_label (label4);
4734 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4735 quotient, 0, OPTAB_LIB_WIDEN);
4736 if (tem != quotient)
4737 emit_move_insn (quotient, tem);
4738 expand_inc (quotient, const1_rtx);
4739 emit_label (label5);
4740 }
4741 }
4742 break;
4743
4744 case EXACT_DIV_EXPR:
4745 if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4746 {
4747 HOST_WIDE_INT d = INTVAL (op1);
4748 unsigned HOST_WIDE_INT ml;
4749 int pre_shift;
4750 rtx t1;
4751
4752 pre_shift = floor_log2 (d & -d);
4753 ml = invert_mod2n (d >> pre_shift, size);
4754 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4755 pre_shift, NULL_RTX, unsignedp);
4756 quotient = expand_mult (compute_mode, t1,
4757 gen_int_mode (ml, compute_mode),
4758 NULL_RTX, 1);
4759
4760 insn = get_last_insn ();
4761 set_dst_reg_note (insn, REG_EQUAL,
4762 gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
4763 compute_mode, op0, op1),
4764 quotient);
4765 }
4766 break;
4767
4768 case ROUND_DIV_EXPR:
4769 case ROUND_MOD_EXPR:
4770 if (unsignedp)
4771 {
4772 rtx tem;
4773 rtx label;
4774 label = gen_label_rtx ();
4775 quotient = gen_reg_rtx (compute_mode);
4776 remainder = gen_reg_rtx (compute_mode);
4777 if (expand_twoval_binop (udivmod_optab, op0, op1, quotient, remainder, 1) == 0)
4778 {
4779 rtx tem;
4780 quotient = expand_binop (compute_mode, udiv_optab, op0, op1,
4781 quotient, 1, OPTAB_LIB_WIDEN);
4782 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 1);
4783 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4784 remainder, 1, OPTAB_LIB_WIDEN);
4785 }
4786 tem = plus_constant (compute_mode, op1, -1);
4787 tem = expand_shift (RSHIFT_EXPR, compute_mode, tem, 1, NULL_RTX, 1);
4788 do_cmp_and_jump (remainder, tem, LEU, compute_mode, label);
4789 expand_inc (quotient, const1_rtx);
4790 expand_dec (remainder, op1);
4791 emit_label (label);
4792 }
4793 else
4794 {
4795 rtx abs_rem, abs_op1, tem, mask;
4796 rtx label;
4797 label = gen_label_rtx ();
4798 quotient = gen_reg_rtx (compute_mode);
4799 remainder = gen_reg_rtx (compute_mode);
4800 if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient, remainder, 0) == 0)
4801 {
4802 rtx tem;
4803 quotient = expand_binop (compute_mode, sdiv_optab, op0, op1,
4804 quotient, 0, OPTAB_LIB_WIDEN);
4805 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 0);
4806 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4807 remainder, 0, OPTAB_LIB_WIDEN);
4808 }
4809 abs_rem = expand_abs (compute_mode, remainder, NULL_RTX, 1, 0);
4810 abs_op1 = expand_abs (compute_mode, op1, NULL_RTX, 1, 0);
4811 tem = expand_shift (LSHIFT_EXPR, compute_mode, abs_rem,
4812 1, NULL_RTX, 1);
4813 do_cmp_and_jump (tem, abs_op1, LTU, compute_mode, label);
4814 tem = expand_binop (compute_mode, xor_optab, op0, op1,
4815 NULL_RTX, 0, OPTAB_WIDEN);
4816 mask = expand_shift (RSHIFT_EXPR, compute_mode, tem,
4817 size - 1, NULL_RTX, 0);
4818 tem = expand_binop (compute_mode, xor_optab, mask, const1_rtx,
4819 NULL_RTX, 0, OPTAB_WIDEN);
4820 tem = expand_binop (compute_mode, sub_optab, tem, mask,
4821 NULL_RTX, 0, OPTAB_WIDEN);
4822 expand_inc (quotient, tem);
4823 tem = expand_binop (compute_mode, xor_optab, mask, op1,
4824 NULL_RTX, 0, OPTAB_WIDEN);
4825 tem = expand_binop (compute_mode, sub_optab, tem, mask,
4826 NULL_RTX, 0, OPTAB_WIDEN);
4827 expand_dec (remainder, tem);
4828 emit_label (label);
4829 }
4830 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4831
4832 default:
4833 gcc_unreachable ();
4834 }
4835
4836 if (quotient == 0)
4837 {
4838 if (target && GET_MODE (target) != compute_mode)
4839 target = 0;
4840
4841 if (rem_flag)
4842 {
4843 /* Try to produce the remainder without producing the quotient.
4844 If we seem to have a divmod pattern that does not require widening,
4845 don't try widening here. We should really have a WIDEN argument
4846 to expand_twoval_binop, since what we'd really like to do here is
4847 1) try a mod insn in compute_mode
4848 2) try a divmod insn in compute_mode
4849 3) try a div insn in compute_mode and multiply-subtract to get
4850 remainder
4851 4) try the same things with widening allowed. */
4852 remainder
4853 = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4854 op0, op1, target,
4855 unsignedp,
4856 ((optab_handler (optab2, compute_mode)
4857 != CODE_FOR_nothing)
4858 ? OPTAB_DIRECT : OPTAB_WIDEN));
4859 if (remainder == 0)
4860 {
4861 /* No luck there. Can we do remainder and divide at once
4862 without a library call? */
4863 remainder = gen_reg_rtx (compute_mode);
4864 if (! expand_twoval_binop ((unsignedp
4865 ? udivmod_optab
4866 : sdivmod_optab),
4867 op0, op1,
4868 NULL_RTX, remainder, unsignedp))
4869 remainder = 0;
4870 }
4871
4872 if (remainder)
4873 return gen_lowpart (mode, remainder);
4874 }
4875
4876 /* Produce the quotient. Try a quotient insn, but not a library call.
4877 If we have a divmod in this mode, use it in preference to widening
4878 the div (for this test we assume it will not fail). Note that optab2
4879 is set to the one of the two optabs that the call below will use. */
4880 quotient
4881 = sign_expand_binop (compute_mode, udiv_optab, sdiv_optab,
4882 op0, op1, rem_flag ? NULL_RTX : target,
4883 unsignedp,
4884 ((optab_handler (optab2, compute_mode)
4885 != CODE_FOR_nothing)
4886 ? OPTAB_DIRECT : OPTAB_WIDEN));
4887
4888 if (quotient == 0)
4889 {
4890 /* No luck there. Try a quotient-and-remainder insn,
4891 keeping the quotient alone. */
4892 quotient = gen_reg_rtx (compute_mode);
4893 if (! expand_twoval_binop (unsignedp ? udivmod_optab : sdivmod_optab,
4894 op0, op1,
4895 quotient, NULL_RTX, unsignedp))
4896 {
4897 quotient = 0;
4898 if (! rem_flag)
4899 /* Still no luck. If we are not computing the remainder,
4900 use a library call for the quotient. */
4901 quotient = sign_expand_binop (compute_mode,
4902 udiv_optab, sdiv_optab,
4903 op0, op1, target,
4904 unsignedp, OPTAB_LIB_WIDEN);
4905 }
4906 }
4907 }
4908
4909 if (rem_flag)
4910 {
4911 if (target && GET_MODE (target) != compute_mode)
4912 target = 0;
4913
4914 if (quotient == 0)
4915 {
4916 /* No divide instruction either. Use library for remainder. */
4917 remainder = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4918 op0, op1, target,
4919 unsignedp, OPTAB_LIB_WIDEN);
4920 /* No remainder function. Try a quotient-and-remainder
4921 function, keeping the remainder. */
4922 if (!remainder)
4923 {
4924 remainder = gen_reg_rtx (compute_mode);
4925 if (!expand_twoval_binop_libfunc
4926 (unsignedp ? udivmod_optab : sdivmod_optab,
4927 op0, op1,
4928 NULL_RTX, remainder,
4929 unsignedp ? UMOD : MOD))
4930 remainder = NULL_RTX;
4931 }
4932 }
4933 else
4934 {
4935 /* We divided. Now finish doing X - Y * (X / Y). */
4936 remainder = expand_mult (compute_mode, quotient, op1,
4937 NULL_RTX, unsignedp);
4938 remainder = expand_binop (compute_mode, sub_optab, op0,
4939 remainder, target, unsignedp,
4940 OPTAB_LIB_WIDEN);
4941 }
4942 }
4943
4944 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4945 }
4946 \f
4947 /* Return a tree node with data type TYPE, describing the value of X.
4948 Usually this is an VAR_DECL, if there is no obvious better choice.
4949 X may be an expression, however we only support those expressions
4950 generated by loop.c. */
4951
4952 tree
4953 make_tree (tree type, rtx x)
4954 {
4955 tree t;
4956
4957 switch (GET_CODE (x))
4958 {
4959 case CONST_INT:
4960 {
4961 HOST_WIDE_INT hi = 0;
4962
4963 if (INTVAL (x) < 0
4964 && !(TYPE_UNSIGNED (type)
4965 && (GET_MODE_BITSIZE (TYPE_MODE (type))
4966 < HOST_BITS_PER_WIDE_INT)))
4967 hi = -1;
4968
4969 t = build_int_cst_wide (type, INTVAL (x), hi);
4970
4971 return t;
4972 }
4973
4974 case CONST_DOUBLE:
4975 if (GET_MODE (x) == VOIDmode)
4976 t = build_int_cst_wide (type,
4977 CONST_DOUBLE_LOW (x), CONST_DOUBLE_HIGH (x));
4978 else
4979 {
4980 REAL_VALUE_TYPE d;
4981
4982 REAL_VALUE_FROM_CONST_DOUBLE (d, x);
4983 t = build_real (type, d);
4984 }
4985
4986 return t;
4987
4988 case CONST_VECTOR:
4989 {
4990 int units = CONST_VECTOR_NUNITS (x);
4991 tree itype = TREE_TYPE (type);
4992 tree *elts;
4993 int i;
4994
4995 /* Build a tree with vector elements. */
4996 elts = XALLOCAVEC (tree, units);
4997 for (i = units - 1; i >= 0; --i)
4998 {
4999 rtx elt = CONST_VECTOR_ELT (x, i);
5000 elts[i] = make_tree (itype, elt);
5001 }
5002
5003 return build_vector (type, elts);
5004 }
5005
5006 case PLUS:
5007 return fold_build2 (PLUS_EXPR, type, make_tree (type, XEXP (x, 0)),
5008 make_tree (type, XEXP (x, 1)));
5009
5010 case MINUS:
5011 return fold_build2 (MINUS_EXPR, type, make_tree (type, XEXP (x, 0)),
5012 make_tree (type, XEXP (x, 1)));
5013
5014 case NEG:
5015 return fold_build1 (NEGATE_EXPR, type, make_tree (type, XEXP (x, 0)));
5016
5017 case MULT:
5018 return fold_build2 (MULT_EXPR, type, make_tree (type, XEXP (x, 0)),
5019 make_tree (type, XEXP (x, 1)));
5020
5021 case ASHIFT:
5022 return fold_build2 (LSHIFT_EXPR, type, make_tree (type, XEXP (x, 0)),
5023 make_tree (type, XEXP (x, 1)));
5024
5025 case LSHIFTRT:
5026 t = unsigned_type_for (type);
5027 return fold_convert (type, build2 (RSHIFT_EXPR, t,
5028 make_tree (t, XEXP (x, 0)),
5029 make_tree (type, XEXP (x, 1))));
5030
5031 case ASHIFTRT:
5032 t = signed_type_for (type);
5033 return fold_convert (type, build2 (RSHIFT_EXPR, t,
5034 make_tree (t, XEXP (x, 0)),
5035 make_tree (type, XEXP (x, 1))));
5036
5037 case DIV:
5038 if (TREE_CODE (type) != REAL_TYPE)
5039 t = signed_type_for (type);
5040 else
5041 t = type;
5042
5043 return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5044 make_tree (t, XEXP (x, 0)),
5045 make_tree (t, XEXP (x, 1))));
5046 case UDIV:
5047 t = unsigned_type_for (type);
5048 return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5049 make_tree (t, XEXP (x, 0)),
5050 make_tree (t, XEXP (x, 1))));
5051
5052 case SIGN_EXTEND:
5053 case ZERO_EXTEND:
5054 t = lang_hooks.types.type_for_mode (GET_MODE (XEXP (x, 0)),
5055 GET_CODE (x) == ZERO_EXTEND);
5056 return fold_convert (type, make_tree (t, XEXP (x, 0)));
5057
5058 case CONST:
5059 return make_tree (type, XEXP (x, 0));
5060
5061 case SYMBOL_REF:
5062 t = SYMBOL_REF_DECL (x);
5063 if (t)
5064 return fold_convert (type, build_fold_addr_expr (t));
5065 /* else fall through. */
5066
5067 default:
5068 t = build_decl (RTL_LOCATION (x), VAR_DECL, NULL_TREE, type);
5069
5070 /* If TYPE is a POINTER_TYPE, we might need to convert X from
5071 address mode to pointer mode. */
5072 if (POINTER_TYPE_P (type))
5073 x = convert_memory_address_addr_space
5074 (TYPE_MODE (type), x, TYPE_ADDR_SPACE (TREE_TYPE (type)));
5075
5076 /* Note that we do *not* use SET_DECL_RTL here, because we do not
5077 want set_decl_rtl to go adjusting REG_ATTRS for this temporary. */
5078 t->decl_with_rtl.rtl = x;
5079
5080 return t;
5081 }
5082 }
5083 \f
5084 /* Compute the logical-and of OP0 and OP1, storing it in TARGET
5085 and returning TARGET.
5086
5087 If TARGET is 0, a pseudo-register or constant is returned. */
5088
5089 rtx
5090 expand_and (enum machine_mode mode, rtx op0, rtx op1, rtx target)
5091 {
5092 rtx tem = 0;
5093
5094 if (GET_MODE (op0) == VOIDmode && GET_MODE (op1) == VOIDmode)
5095 tem = simplify_binary_operation (AND, mode, op0, op1);
5096 if (tem == 0)
5097 tem = expand_binop (mode, and_optab, op0, op1, target, 0, OPTAB_LIB_WIDEN);
5098
5099 if (target == 0)
5100 target = tem;
5101 else if (tem != target)
5102 emit_move_insn (target, tem);
5103 return target;
5104 }
5105
5106 /* Helper function for emit_store_flag. */
5107 static rtx
5108 emit_cstore (rtx target, enum insn_code icode, enum rtx_code code,
5109 enum machine_mode mode, enum machine_mode compare_mode,
5110 int unsignedp, rtx x, rtx y, int normalizep,
5111 enum machine_mode target_mode)
5112 {
5113 struct expand_operand ops[4];
5114 rtx op0, last, comparison, subtarget;
5115 enum machine_mode result_mode = insn_data[(int) icode].operand[0].mode;
5116
5117 last = get_last_insn ();
5118 x = prepare_operand (icode, x, 2, mode, compare_mode, unsignedp);
5119 y = prepare_operand (icode, y, 3, mode, compare_mode, unsignedp);
5120 if (!x || !y)
5121 {
5122 delete_insns_since (last);
5123 return NULL_RTX;
5124 }
5125
5126 if (target_mode == VOIDmode)
5127 target_mode = result_mode;
5128 if (!target)
5129 target = gen_reg_rtx (target_mode);
5130
5131 comparison = gen_rtx_fmt_ee (code, result_mode, x, y);
5132
5133 create_output_operand (&ops[0], optimize ? NULL_RTX : target, result_mode);
5134 create_fixed_operand (&ops[1], comparison);
5135 create_fixed_operand (&ops[2], x);
5136 create_fixed_operand (&ops[3], y);
5137 if (!maybe_expand_insn (icode, 4, ops))
5138 {
5139 delete_insns_since (last);
5140 return NULL_RTX;
5141 }
5142 subtarget = ops[0].value;
5143
5144 /* If we are converting to a wider mode, first convert to
5145 TARGET_MODE, then normalize. This produces better combining
5146 opportunities on machines that have a SIGN_EXTRACT when we are
5147 testing a single bit. This mostly benefits the 68k.
5148
5149 If STORE_FLAG_VALUE does not have the sign bit set when
5150 interpreted in MODE, we can do this conversion as unsigned, which
5151 is usually more efficient. */
5152 if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (result_mode))
5153 {
5154 convert_move (target, subtarget,
5155 val_signbit_known_clear_p (result_mode,
5156 STORE_FLAG_VALUE));
5157 op0 = target;
5158 result_mode = target_mode;
5159 }
5160 else
5161 op0 = subtarget;
5162
5163 /* If we want to keep subexpressions around, don't reuse our last
5164 target. */
5165 if (optimize)
5166 subtarget = 0;
5167
5168 /* Now normalize to the proper value in MODE. Sometimes we don't
5169 have to do anything. */
5170 if (normalizep == 0 || normalizep == STORE_FLAG_VALUE)
5171 ;
5172 /* STORE_FLAG_VALUE might be the most negative number, so write
5173 the comparison this way to avoid a compiler-time warning. */
5174 else if (- normalizep == STORE_FLAG_VALUE)
5175 op0 = expand_unop (result_mode, neg_optab, op0, subtarget, 0);
5176
5177 /* We don't want to use STORE_FLAG_VALUE < 0 below since this makes
5178 it hard to use a value of just the sign bit due to ANSI integer
5179 constant typing rules. */
5180 else if (val_signbit_known_set_p (result_mode, STORE_FLAG_VALUE))
5181 op0 = expand_shift (RSHIFT_EXPR, result_mode, op0,
5182 GET_MODE_BITSIZE (result_mode) - 1, subtarget,
5183 normalizep == 1);
5184 else
5185 {
5186 gcc_assert (STORE_FLAG_VALUE & 1);
5187
5188 op0 = expand_and (result_mode, op0, const1_rtx, subtarget);
5189 if (normalizep == -1)
5190 op0 = expand_unop (result_mode, neg_optab, op0, op0, 0);
5191 }
5192
5193 /* If we were converting to a smaller mode, do the conversion now. */
5194 if (target_mode != result_mode)
5195 {
5196 convert_move (target, op0, 0);
5197 return target;
5198 }
5199 else
5200 return op0;
5201 }
5202
5203
5204 /* A subroutine of emit_store_flag only including "tricks" that do not
5205 need a recursive call. These are kept separate to avoid infinite
5206 loops. */
5207
5208 static rtx
5209 emit_store_flag_1 (rtx target, enum rtx_code code, rtx op0, rtx op1,
5210 enum machine_mode mode, int unsignedp, int normalizep,
5211 enum machine_mode target_mode)
5212 {
5213 rtx subtarget;
5214 enum insn_code icode;
5215 enum machine_mode compare_mode;
5216 enum mode_class mclass;
5217 enum rtx_code scode;
5218 rtx tem;
5219
5220 if (unsignedp)
5221 code = unsigned_condition (code);
5222 scode = swap_condition (code);
5223
5224 /* If one operand is constant, make it the second one. Only do this
5225 if the other operand is not constant as well. */
5226
5227 if (swap_commutative_operands_p (op0, op1))
5228 {
5229 tem = op0;
5230 op0 = op1;
5231 op1 = tem;
5232 code = swap_condition (code);
5233 }
5234
5235 if (mode == VOIDmode)
5236 mode = GET_MODE (op0);
5237
5238 /* For some comparisons with 1 and -1, we can convert this to
5239 comparisons with zero. This will often produce more opportunities for
5240 store-flag insns. */
5241
5242 switch (code)
5243 {
5244 case LT:
5245 if (op1 == const1_rtx)
5246 op1 = const0_rtx, code = LE;
5247 break;
5248 case LE:
5249 if (op1 == constm1_rtx)
5250 op1 = const0_rtx, code = LT;
5251 break;
5252 case GE:
5253 if (op1 == const1_rtx)
5254 op1 = const0_rtx, code = GT;
5255 break;
5256 case GT:
5257 if (op1 == constm1_rtx)
5258 op1 = const0_rtx, code = GE;
5259 break;
5260 case GEU:
5261 if (op1 == const1_rtx)
5262 op1 = const0_rtx, code = NE;
5263 break;
5264 case LTU:
5265 if (op1 == const1_rtx)
5266 op1 = const0_rtx, code = EQ;
5267 break;
5268 default:
5269 break;
5270 }
5271
5272 /* If we are comparing a double-word integer with zero or -1, we can
5273 convert the comparison into one involving a single word. */
5274 if (GET_MODE_BITSIZE (mode) == BITS_PER_WORD * 2
5275 && GET_MODE_CLASS (mode) == MODE_INT
5276 && (!MEM_P (op0) || ! MEM_VOLATILE_P (op0)))
5277 {
5278 if ((code == EQ || code == NE)
5279 && (op1 == const0_rtx || op1 == constm1_rtx))
5280 {
5281 rtx op00, op01;
5282
5283 /* Do a logical OR or AND of the two words and compare the
5284 result. */
5285 op00 = simplify_gen_subreg (word_mode, op0, mode, 0);
5286 op01 = simplify_gen_subreg (word_mode, op0, mode, UNITS_PER_WORD);
5287 tem = expand_binop (word_mode,
5288 op1 == const0_rtx ? ior_optab : and_optab,
5289 op00, op01, NULL_RTX, unsignedp,
5290 OPTAB_DIRECT);
5291
5292 if (tem != 0)
5293 tem = emit_store_flag (NULL_RTX, code, tem, op1, word_mode,
5294 unsignedp, normalizep);
5295 }
5296 else if ((code == LT || code == GE) && op1 == const0_rtx)
5297 {
5298 rtx op0h;
5299
5300 /* If testing the sign bit, can just test on high word. */
5301 op0h = simplify_gen_subreg (word_mode, op0, mode,
5302 subreg_highpart_offset (word_mode,
5303 mode));
5304 tem = emit_store_flag (NULL_RTX, code, op0h, op1, word_mode,
5305 unsignedp, normalizep);
5306 }
5307 else
5308 tem = NULL_RTX;
5309
5310 if (tem)
5311 {
5312 if (target_mode == VOIDmode || GET_MODE (tem) == target_mode)
5313 return tem;
5314 if (!target)
5315 target = gen_reg_rtx (target_mode);
5316
5317 convert_move (target, tem,
5318 !val_signbit_known_set_p (word_mode,
5319 (normalizep ? normalizep
5320 : STORE_FLAG_VALUE)));
5321 return target;
5322 }
5323 }
5324
5325 /* If this is A < 0 or A >= 0, we can do this by taking the ones
5326 complement of A (for GE) and shifting the sign bit to the low bit. */
5327 if (op1 == const0_rtx && (code == LT || code == GE)
5328 && GET_MODE_CLASS (mode) == MODE_INT
5329 && (normalizep || STORE_FLAG_VALUE == 1
5330 || val_signbit_p (mode, STORE_FLAG_VALUE)))
5331 {
5332 subtarget = target;
5333
5334 if (!target)
5335 target_mode = mode;
5336
5337 /* If the result is to be wider than OP0, it is best to convert it
5338 first. If it is to be narrower, it is *incorrect* to convert it
5339 first. */
5340 else if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (mode))
5341 {
5342 op0 = convert_modes (target_mode, mode, op0, 0);
5343 mode = target_mode;
5344 }
5345
5346 if (target_mode != mode)
5347 subtarget = 0;
5348
5349 if (code == GE)
5350 op0 = expand_unop (mode, one_cmpl_optab, op0,
5351 ((STORE_FLAG_VALUE == 1 || normalizep)
5352 ? 0 : subtarget), 0);
5353
5354 if (STORE_FLAG_VALUE == 1 || normalizep)
5355 /* If we are supposed to produce a 0/1 value, we want to do
5356 a logical shift from the sign bit to the low-order bit; for
5357 a -1/0 value, we do an arithmetic shift. */
5358 op0 = expand_shift (RSHIFT_EXPR, mode, op0,
5359 GET_MODE_BITSIZE (mode) - 1,
5360 subtarget, normalizep != -1);
5361
5362 if (mode != target_mode)
5363 op0 = convert_modes (target_mode, mode, op0, 0);
5364
5365 return op0;
5366 }
5367
5368 mclass = GET_MODE_CLASS (mode);
5369 for (compare_mode = mode; compare_mode != VOIDmode;
5370 compare_mode = GET_MODE_WIDER_MODE (compare_mode))
5371 {
5372 enum machine_mode optab_mode = mclass == MODE_CC ? CCmode : compare_mode;
5373 icode = optab_handler (cstore_optab, optab_mode);
5374 if (icode != CODE_FOR_nothing)
5375 {
5376 do_pending_stack_adjust ();
5377 tem = emit_cstore (target, icode, code, mode, compare_mode,
5378 unsignedp, op0, op1, normalizep, target_mode);
5379 if (tem)
5380 return tem;
5381
5382 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5383 {
5384 tem = emit_cstore (target, icode, scode, mode, compare_mode,
5385 unsignedp, op1, op0, normalizep, target_mode);
5386 if (tem)
5387 return tem;
5388 }
5389 break;
5390 }
5391 }
5392
5393 return 0;
5394 }
5395
5396 /* Emit a store-flags instruction for comparison CODE on OP0 and OP1
5397 and storing in TARGET. Normally return TARGET.
5398 Return 0 if that cannot be done.
5399
5400 MODE is the mode to use for OP0 and OP1 should they be CONST_INTs. If
5401 it is VOIDmode, they cannot both be CONST_INT.
5402
5403 UNSIGNEDP is for the case where we have to widen the operands
5404 to perform the operation. It says to use zero-extension.
5405
5406 NORMALIZEP is 1 if we should convert the result to be either zero
5407 or one. Normalize is -1 if we should convert the result to be
5408 either zero or -1. If NORMALIZEP is zero, the result will be left
5409 "raw" out of the scc insn. */
5410
5411 rtx
5412 emit_store_flag (rtx target, enum rtx_code code, rtx op0, rtx op1,
5413 enum machine_mode mode, int unsignedp, int normalizep)
5414 {
5415 enum machine_mode target_mode = target ? GET_MODE (target) : VOIDmode;
5416 enum rtx_code rcode;
5417 rtx subtarget;
5418 rtx tem, last, trueval;
5419
5420 tem = emit_store_flag_1 (target, code, op0, op1, mode, unsignedp, normalizep,
5421 target_mode);
5422 if (tem)
5423 return tem;
5424
5425 /* If we reached here, we can't do this with a scc insn, however there
5426 are some comparisons that can be done in other ways. Don't do any
5427 of these cases if branches are very cheap. */
5428 if (BRANCH_COST (optimize_insn_for_speed_p (), false) == 0)
5429 return 0;
5430
5431 /* See what we need to return. We can only return a 1, -1, or the
5432 sign bit. */
5433
5434 if (normalizep == 0)
5435 {
5436 if (STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1)
5437 normalizep = STORE_FLAG_VALUE;
5438
5439 else if (val_signbit_p (mode, STORE_FLAG_VALUE))
5440 ;
5441 else
5442 return 0;
5443 }
5444
5445 last = get_last_insn ();
5446
5447 /* If optimizing, use different pseudo registers for each insn, instead
5448 of reusing the same pseudo. This leads to better CSE, but slows
5449 down the compiler, since there are more pseudos */
5450 subtarget = (!optimize
5451 && (target_mode == mode)) ? target : NULL_RTX;
5452 trueval = GEN_INT (normalizep ? normalizep : STORE_FLAG_VALUE);
5453
5454 /* For floating-point comparisons, try the reverse comparison or try
5455 changing the "orderedness" of the comparison. */
5456 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5457 {
5458 enum rtx_code first_code;
5459 bool and_them;
5460
5461 rcode = reverse_condition_maybe_unordered (code);
5462 if (can_compare_p (rcode, mode, ccp_store_flag)
5463 && (code == ORDERED || code == UNORDERED
5464 || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
5465 || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
5466 {
5467 int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5468 || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5469
5470 /* For the reverse comparison, use either an addition or a XOR. */
5471 if (want_add
5472 && rtx_cost (GEN_INT (normalizep), PLUS, 1,
5473 optimize_insn_for_speed_p ()) == 0)
5474 {
5475 tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5476 STORE_FLAG_VALUE, target_mode);
5477 if (tem)
5478 return expand_binop (target_mode, add_optab, tem,
5479 GEN_INT (normalizep),
5480 target, 0, OPTAB_WIDEN);
5481 }
5482 else if (!want_add
5483 && rtx_cost (trueval, XOR, 1,
5484 optimize_insn_for_speed_p ()) == 0)
5485 {
5486 tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5487 normalizep, target_mode);
5488 if (tem)
5489 return expand_binop (target_mode, xor_optab, tem, trueval,
5490 target, INTVAL (trueval) >= 0, OPTAB_WIDEN);
5491 }
5492 }
5493
5494 delete_insns_since (last);
5495
5496 /* Cannot split ORDERED and UNORDERED, only try the above trick. */
5497 if (code == ORDERED || code == UNORDERED)
5498 return 0;
5499
5500 and_them = split_comparison (code, mode, &first_code, &code);
5501
5502 /* If there are no NaNs, the first comparison should always fall through.
5503 Effectively change the comparison to the other one. */
5504 if (!HONOR_NANS (mode))
5505 {
5506 gcc_assert (first_code == (and_them ? ORDERED : UNORDERED));
5507 return emit_store_flag_1 (target, code, op0, op1, mode, 0, normalizep,
5508 target_mode);
5509 }
5510
5511 #ifdef HAVE_conditional_move
5512 /* Try using a setcc instruction for ORDERED/UNORDERED, followed by a
5513 conditional move. */
5514 tem = emit_store_flag_1 (subtarget, first_code, op0, op1, mode, 0,
5515 normalizep, target_mode);
5516 if (tem == 0)
5517 return 0;
5518
5519 if (and_them)
5520 tem = emit_conditional_move (target, code, op0, op1, mode,
5521 tem, const0_rtx, GET_MODE (tem), 0);
5522 else
5523 tem = emit_conditional_move (target, code, op0, op1, mode,
5524 trueval, tem, GET_MODE (tem), 0);
5525
5526 if (tem == 0)
5527 delete_insns_since (last);
5528 return tem;
5529 #else
5530 return 0;
5531 #endif
5532 }
5533
5534 /* The remaining tricks only apply to integer comparisons. */
5535
5536 if (GET_MODE_CLASS (mode) != MODE_INT)
5537 return 0;
5538
5539 /* If this is an equality comparison of integers, we can try to exclusive-or
5540 (or subtract) the two operands and use a recursive call to try the
5541 comparison with zero. Don't do any of these cases if branches are
5542 very cheap. */
5543
5544 if ((code == EQ || code == NE) && op1 != const0_rtx)
5545 {
5546 tem = expand_binop (mode, xor_optab, op0, op1, subtarget, 1,
5547 OPTAB_WIDEN);
5548
5549 if (tem == 0)
5550 tem = expand_binop (mode, sub_optab, op0, op1, subtarget, 1,
5551 OPTAB_WIDEN);
5552 if (tem != 0)
5553 tem = emit_store_flag (target, code, tem, const0_rtx,
5554 mode, unsignedp, normalizep);
5555 if (tem != 0)
5556 return tem;
5557
5558 delete_insns_since (last);
5559 }
5560
5561 /* For integer comparisons, try the reverse comparison. However, for
5562 small X and if we'd have anyway to extend, implementing "X != 0"
5563 as "-(int)X >> 31" is still cheaper than inverting "(int)X == 0". */
5564 rcode = reverse_condition (code);
5565 if (can_compare_p (rcode, mode, ccp_store_flag)
5566 && ! (optab_handler (cstore_optab, mode) == CODE_FOR_nothing
5567 && code == NE
5568 && GET_MODE_SIZE (mode) < UNITS_PER_WORD
5569 && op1 == const0_rtx))
5570 {
5571 int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5572 || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5573
5574 /* Again, for the reverse comparison, use either an addition or a XOR. */
5575 if (want_add
5576 && rtx_cost (GEN_INT (normalizep), PLUS, 1,
5577 optimize_insn_for_speed_p ()) == 0)
5578 {
5579 tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5580 STORE_FLAG_VALUE, target_mode);
5581 if (tem != 0)
5582 tem = expand_binop (target_mode, add_optab, tem,
5583 GEN_INT (normalizep), target, 0, OPTAB_WIDEN);
5584 }
5585 else if (!want_add
5586 && rtx_cost (trueval, XOR, 1,
5587 optimize_insn_for_speed_p ()) == 0)
5588 {
5589 tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5590 normalizep, target_mode);
5591 if (tem != 0)
5592 tem = expand_binop (target_mode, xor_optab, tem, trueval, target,
5593 INTVAL (trueval) >= 0, OPTAB_WIDEN);
5594 }
5595
5596 if (tem != 0)
5597 return tem;
5598 delete_insns_since (last);
5599 }
5600
5601 /* Some other cases we can do are EQ, NE, LE, and GT comparisons with
5602 the constant zero. Reject all other comparisons at this point. Only
5603 do LE and GT if branches are expensive since they are expensive on
5604 2-operand machines. */
5605
5606 if (op1 != const0_rtx
5607 || (code != EQ && code != NE
5608 && (BRANCH_COST (optimize_insn_for_speed_p (),
5609 false) <= 1 || (code != LE && code != GT))))
5610 return 0;
5611
5612 /* Try to put the result of the comparison in the sign bit. Assume we can't
5613 do the necessary operation below. */
5614
5615 tem = 0;
5616
5617 /* To see if A <= 0, compute (A | (A - 1)). A <= 0 iff that result has
5618 the sign bit set. */
5619
5620 if (code == LE)
5621 {
5622 /* This is destructive, so SUBTARGET can't be OP0. */
5623 if (rtx_equal_p (subtarget, op0))
5624 subtarget = 0;
5625
5626 tem = expand_binop (mode, sub_optab, op0, const1_rtx, subtarget, 0,
5627 OPTAB_WIDEN);
5628 if (tem)
5629 tem = expand_binop (mode, ior_optab, op0, tem, subtarget, 0,
5630 OPTAB_WIDEN);
5631 }
5632
5633 /* To see if A > 0, compute (((signed) A) << BITS) - A, where BITS is the
5634 number of bits in the mode of OP0, minus one. */
5635
5636 if (code == GT)
5637 {
5638 if (rtx_equal_p (subtarget, op0))
5639 subtarget = 0;
5640
5641 tem = expand_shift (RSHIFT_EXPR, mode, op0,
5642 GET_MODE_BITSIZE (mode) - 1,
5643 subtarget, 0);
5644 tem = expand_binop (mode, sub_optab, tem, op0, subtarget, 0,
5645 OPTAB_WIDEN);
5646 }
5647
5648 if (code == EQ || code == NE)
5649 {
5650 /* For EQ or NE, one way to do the comparison is to apply an operation
5651 that converts the operand into a positive number if it is nonzero
5652 or zero if it was originally zero. Then, for EQ, we subtract 1 and
5653 for NE we negate. This puts the result in the sign bit. Then we
5654 normalize with a shift, if needed.
5655
5656 Two operations that can do the above actions are ABS and FFS, so try
5657 them. If that doesn't work, and MODE is smaller than a full word,
5658 we can use zero-extension to the wider mode (an unsigned conversion)
5659 as the operation. */
5660
5661 /* Note that ABS doesn't yield a positive number for INT_MIN, but
5662 that is compensated by the subsequent overflow when subtracting
5663 one / negating. */
5664
5665 if (optab_handler (abs_optab, mode) != CODE_FOR_nothing)
5666 tem = expand_unop (mode, abs_optab, op0, subtarget, 1);
5667 else if (optab_handler (ffs_optab, mode) != CODE_FOR_nothing)
5668 tem = expand_unop (mode, ffs_optab, op0, subtarget, 1);
5669 else if (GET_MODE_SIZE (mode) < UNITS_PER_WORD)
5670 {
5671 tem = convert_modes (word_mode, mode, op0, 1);
5672 mode = word_mode;
5673 }
5674
5675 if (tem != 0)
5676 {
5677 if (code == EQ)
5678 tem = expand_binop (mode, sub_optab, tem, const1_rtx, subtarget,
5679 0, OPTAB_WIDEN);
5680 else
5681 tem = expand_unop (mode, neg_optab, tem, subtarget, 0);
5682 }
5683
5684 /* If we couldn't do it that way, for NE we can "or" the two's complement
5685 of the value with itself. For EQ, we take the one's complement of
5686 that "or", which is an extra insn, so we only handle EQ if branches
5687 are expensive. */
5688
5689 if (tem == 0
5690 && (code == NE
5691 || BRANCH_COST (optimize_insn_for_speed_p (),
5692 false) > 1))
5693 {
5694 if (rtx_equal_p (subtarget, op0))
5695 subtarget = 0;
5696
5697 tem = expand_unop (mode, neg_optab, op0, subtarget, 0);
5698 tem = expand_binop (mode, ior_optab, tem, op0, subtarget, 0,
5699 OPTAB_WIDEN);
5700
5701 if (tem && code == EQ)
5702 tem = expand_unop (mode, one_cmpl_optab, tem, subtarget, 0);
5703 }
5704 }
5705
5706 if (tem && normalizep)
5707 tem = expand_shift (RSHIFT_EXPR, mode, tem,
5708 GET_MODE_BITSIZE (mode) - 1,
5709 subtarget, normalizep == 1);
5710
5711 if (tem)
5712 {
5713 if (!target)
5714 ;
5715 else if (GET_MODE (tem) != target_mode)
5716 {
5717 convert_move (target, tem, 0);
5718 tem = target;
5719 }
5720 else if (!subtarget)
5721 {
5722 emit_move_insn (target, tem);
5723 tem = target;
5724 }
5725 }
5726 else
5727 delete_insns_since (last);
5728
5729 return tem;
5730 }
5731
5732 /* Like emit_store_flag, but always succeeds. */
5733
5734 rtx
5735 emit_store_flag_force (rtx target, enum rtx_code code, rtx op0, rtx op1,
5736 enum machine_mode mode, int unsignedp, int normalizep)
5737 {
5738 rtx tem, label;
5739 rtx trueval, falseval;
5740
5741 /* First see if emit_store_flag can do the job. */
5742 tem = emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep);
5743 if (tem != 0)
5744 return tem;
5745
5746 if (!target)
5747 target = gen_reg_rtx (word_mode);
5748
5749 /* If this failed, we have to do this with set/compare/jump/set code.
5750 For foo != 0, if foo is in OP0, just replace it with 1 if nonzero. */
5751 trueval = normalizep ? GEN_INT (normalizep) : const1_rtx;
5752 if (code == NE
5753 && GET_MODE_CLASS (mode) == MODE_INT
5754 && REG_P (target)
5755 && op0 == target
5756 && op1 == const0_rtx)
5757 {
5758 label = gen_label_rtx ();
5759 do_compare_rtx_and_jump (target, const0_rtx, EQ, unsignedp,
5760 mode, NULL_RTX, NULL_RTX, label, -1);
5761 emit_move_insn (target, trueval);
5762 emit_label (label);
5763 return target;
5764 }
5765
5766 if (!REG_P (target)
5767 || reg_mentioned_p (target, op0) || reg_mentioned_p (target, op1))
5768 target = gen_reg_rtx (GET_MODE (target));
5769
5770 /* Jump in the right direction if the target cannot implement CODE
5771 but can jump on its reverse condition. */
5772 falseval = const0_rtx;
5773 if (! can_compare_p (code, mode, ccp_jump)
5774 && (! FLOAT_MODE_P (mode)
5775 || code == ORDERED || code == UNORDERED
5776 || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
5777 || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
5778 {
5779 enum rtx_code rcode;
5780 if (FLOAT_MODE_P (mode))
5781 rcode = reverse_condition_maybe_unordered (code);
5782 else
5783 rcode = reverse_condition (code);
5784
5785 /* Canonicalize to UNORDERED for the libcall. */
5786 if (can_compare_p (rcode, mode, ccp_jump)
5787 || (code == ORDERED && ! can_compare_p (ORDERED, mode, ccp_jump)))
5788 {
5789 falseval = trueval;
5790 trueval = const0_rtx;
5791 code = rcode;
5792 }
5793 }
5794
5795 emit_move_insn (target, trueval);
5796 label = gen_label_rtx ();
5797 do_compare_rtx_and_jump (op0, op1, code, unsignedp, mode, NULL_RTX,
5798 NULL_RTX, label, -1);
5799
5800 emit_move_insn (target, falseval);
5801 emit_label (label);
5802
5803 return target;
5804 }
5805 \f
5806 /* Perform possibly multi-word comparison and conditional jump to LABEL
5807 if ARG1 OP ARG2 true where ARG1 and ARG2 are of mode MODE. This is
5808 now a thin wrapper around do_compare_rtx_and_jump. */
5809
5810 static void
5811 do_cmp_and_jump (rtx arg1, rtx arg2, enum rtx_code op, enum machine_mode mode,
5812 rtx label)
5813 {
5814 int unsignedp = (op == LTU || op == LEU || op == GTU || op == GEU);
5815 do_compare_rtx_and_jump (arg1, arg2, op, unsignedp, mode,
5816 NULL_RTX, NULL_RTX, label, -1);
5817 }