expr.h (expand_shift): Rename to ...
[gcc.git] / gcc / expmed.c
1 /* Medium-level subroutines: convert bit-field store and extract
2 and shifts, multiplies and divides to rtl instructions.
3 Copyright (C) 1987, 1988, 1989, 1992, 1993, 1994, 1995, 1996, 1997, 1998,
4 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010,
5 2011
6 Free Software Foundation, Inc.
7
8 This file is part of GCC.
9
10 GCC is free software; you can redistribute it and/or modify it under
11 the terms of the GNU General Public License as published by the Free
12 Software Foundation; either version 3, or (at your option) any later
13 version.
14
15 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
16 WARRANTY; without even the implied warranty of MERCHANTABILITY or
17 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 for more details.
19
20 You should have received a copy of the GNU General Public License
21 along with GCC; see the file COPYING3. If not see
22 <http://www.gnu.org/licenses/>. */
23
24
25 #include "config.h"
26 #include "system.h"
27 #include "coretypes.h"
28 #include "tm.h"
29 #include "diagnostic-core.h"
30 #include "rtl.h"
31 #include "tree.h"
32 #include "tm_p.h"
33 #include "flags.h"
34 #include "insn-config.h"
35 #include "expr.h"
36 #include "optabs.h"
37 #include "recog.h"
38 #include "langhooks.h"
39 #include "df.h"
40 #include "target.h"
41 #include "expmed.h"
42
43 struct target_expmed default_target_expmed;
44 #if SWITCHABLE_TARGET
45 struct target_expmed *this_target_expmed = &default_target_expmed;
46 #endif
47
48 static void store_fixed_bit_field (rtx, unsigned HOST_WIDE_INT,
49 unsigned HOST_WIDE_INT,
50 unsigned HOST_WIDE_INT, rtx);
51 static void store_split_bit_field (rtx, unsigned HOST_WIDE_INT,
52 unsigned HOST_WIDE_INT, rtx);
53 static rtx extract_fixed_bit_field (enum machine_mode, rtx,
54 unsigned HOST_WIDE_INT,
55 unsigned HOST_WIDE_INT,
56 unsigned HOST_WIDE_INT, rtx, int, bool);
57 static rtx mask_rtx (enum machine_mode, int, int, int);
58 static rtx lshift_value (enum machine_mode, rtx, int, int);
59 static rtx extract_split_bit_field (rtx, unsigned HOST_WIDE_INT,
60 unsigned HOST_WIDE_INT, int);
61 static void do_cmp_and_jump (rtx, rtx, enum rtx_code, enum machine_mode, rtx);
62 static rtx expand_smod_pow2 (enum machine_mode, rtx, HOST_WIDE_INT);
63 static rtx expand_sdiv_pow2 (enum machine_mode, rtx, HOST_WIDE_INT);
64
65 /* Test whether a value is zero of a power of two. */
66 #define EXACT_POWER_OF_2_OR_ZERO_P(x) (((x) & ((x) - 1)) == 0)
67
68 #ifndef SLOW_UNALIGNED_ACCESS
69 #define SLOW_UNALIGNED_ACCESS(MODE, ALIGN) STRICT_ALIGNMENT
70 #endif
71
72
73 /* Reduce conditional compilation elsewhere. */
74 #ifndef HAVE_insv
75 #define HAVE_insv 0
76 #define CODE_FOR_insv CODE_FOR_nothing
77 #define gen_insv(a,b,c,d) NULL_RTX
78 #endif
79 #ifndef HAVE_extv
80 #define HAVE_extv 0
81 #define CODE_FOR_extv CODE_FOR_nothing
82 #define gen_extv(a,b,c,d) NULL_RTX
83 #endif
84 #ifndef HAVE_extzv
85 #define HAVE_extzv 0
86 #define CODE_FOR_extzv CODE_FOR_nothing
87 #define gen_extzv(a,b,c,d) NULL_RTX
88 #endif
89
90 void
91 init_expmed (void)
92 {
93 struct
94 {
95 struct rtx_def reg; rtunion reg_fld[2];
96 struct rtx_def plus; rtunion plus_fld1;
97 struct rtx_def neg;
98 struct rtx_def mult; rtunion mult_fld1;
99 struct rtx_def sdiv; rtunion sdiv_fld1;
100 struct rtx_def udiv; rtunion udiv_fld1;
101 struct rtx_def zext;
102 struct rtx_def sdiv_32; rtunion sdiv_32_fld1;
103 struct rtx_def smod_32; rtunion smod_32_fld1;
104 struct rtx_def wide_mult; rtunion wide_mult_fld1;
105 struct rtx_def wide_lshr; rtunion wide_lshr_fld1;
106 struct rtx_def wide_trunc;
107 struct rtx_def shift; rtunion shift_fld1;
108 struct rtx_def shift_mult; rtunion shift_mult_fld1;
109 struct rtx_def shift_add; rtunion shift_add_fld1;
110 struct rtx_def shift_sub0; rtunion shift_sub0_fld1;
111 struct rtx_def shift_sub1; rtunion shift_sub1_fld1;
112 } all;
113
114 rtx pow2[MAX_BITS_PER_WORD];
115 rtx cint[MAX_BITS_PER_WORD];
116 int m, n;
117 enum machine_mode mode, wider_mode;
118 int speed;
119
120
121 for (m = 1; m < MAX_BITS_PER_WORD; m++)
122 {
123 pow2[m] = GEN_INT ((HOST_WIDE_INT) 1 << m);
124 cint[m] = GEN_INT (m);
125 }
126 memset (&all, 0, sizeof all);
127
128 PUT_CODE (&all.reg, REG);
129 /* Avoid using hard regs in ways which may be unsupported. */
130 SET_REGNO (&all.reg, LAST_VIRTUAL_REGISTER + 1);
131
132 PUT_CODE (&all.plus, PLUS);
133 XEXP (&all.plus, 0) = &all.reg;
134 XEXP (&all.plus, 1) = &all.reg;
135
136 PUT_CODE (&all.neg, NEG);
137 XEXP (&all.neg, 0) = &all.reg;
138
139 PUT_CODE (&all.mult, MULT);
140 XEXP (&all.mult, 0) = &all.reg;
141 XEXP (&all.mult, 1) = &all.reg;
142
143 PUT_CODE (&all.sdiv, DIV);
144 XEXP (&all.sdiv, 0) = &all.reg;
145 XEXP (&all.sdiv, 1) = &all.reg;
146
147 PUT_CODE (&all.udiv, UDIV);
148 XEXP (&all.udiv, 0) = &all.reg;
149 XEXP (&all.udiv, 1) = &all.reg;
150
151 PUT_CODE (&all.sdiv_32, DIV);
152 XEXP (&all.sdiv_32, 0) = &all.reg;
153 XEXP (&all.sdiv_32, 1) = 32 < MAX_BITS_PER_WORD ? cint[32] : GEN_INT (32);
154
155 PUT_CODE (&all.smod_32, MOD);
156 XEXP (&all.smod_32, 0) = &all.reg;
157 XEXP (&all.smod_32, 1) = XEXP (&all.sdiv_32, 1);
158
159 PUT_CODE (&all.zext, ZERO_EXTEND);
160 XEXP (&all.zext, 0) = &all.reg;
161
162 PUT_CODE (&all.wide_mult, MULT);
163 XEXP (&all.wide_mult, 0) = &all.zext;
164 XEXP (&all.wide_mult, 1) = &all.zext;
165
166 PUT_CODE (&all.wide_lshr, LSHIFTRT);
167 XEXP (&all.wide_lshr, 0) = &all.wide_mult;
168
169 PUT_CODE (&all.wide_trunc, TRUNCATE);
170 XEXP (&all.wide_trunc, 0) = &all.wide_lshr;
171
172 PUT_CODE (&all.shift, ASHIFT);
173 XEXP (&all.shift, 0) = &all.reg;
174
175 PUT_CODE (&all.shift_mult, MULT);
176 XEXP (&all.shift_mult, 0) = &all.reg;
177
178 PUT_CODE (&all.shift_add, PLUS);
179 XEXP (&all.shift_add, 0) = &all.shift_mult;
180 XEXP (&all.shift_add, 1) = &all.reg;
181
182 PUT_CODE (&all.shift_sub0, MINUS);
183 XEXP (&all.shift_sub0, 0) = &all.shift_mult;
184 XEXP (&all.shift_sub0, 1) = &all.reg;
185
186 PUT_CODE (&all.shift_sub1, MINUS);
187 XEXP (&all.shift_sub1, 0) = &all.reg;
188 XEXP (&all.shift_sub1, 1) = &all.shift_mult;
189
190 for (speed = 0; speed < 2; speed++)
191 {
192 crtl->maybe_hot_insn_p = speed;
193 zero_cost[speed] = rtx_cost (const0_rtx, SET, speed);
194
195 for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT);
196 mode != VOIDmode;
197 mode = GET_MODE_WIDER_MODE (mode))
198 {
199 PUT_MODE (&all.reg, mode);
200 PUT_MODE (&all.plus, mode);
201 PUT_MODE (&all.neg, mode);
202 PUT_MODE (&all.mult, mode);
203 PUT_MODE (&all.sdiv, mode);
204 PUT_MODE (&all.udiv, mode);
205 PUT_MODE (&all.sdiv_32, mode);
206 PUT_MODE (&all.smod_32, mode);
207 PUT_MODE (&all.wide_trunc, mode);
208 PUT_MODE (&all.shift, mode);
209 PUT_MODE (&all.shift_mult, mode);
210 PUT_MODE (&all.shift_add, mode);
211 PUT_MODE (&all.shift_sub0, mode);
212 PUT_MODE (&all.shift_sub1, mode);
213
214 add_cost[speed][mode] = rtx_cost (&all.plus, SET, speed);
215 neg_cost[speed][mode] = rtx_cost (&all.neg, SET, speed);
216 mul_cost[speed][mode] = rtx_cost (&all.mult, SET, speed);
217 sdiv_cost[speed][mode] = rtx_cost (&all.sdiv, SET, speed);
218 udiv_cost[speed][mode] = rtx_cost (&all.udiv, SET, speed);
219
220 sdiv_pow2_cheap[speed][mode] = (rtx_cost (&all.sdiv_32, SET, speed)
221 <= 2 * add_cost[speed][mode]);
222 smod_pow2_cheap[speed][mode] = (rtx_cost (&all.smod_32, SET, speed)
223 <= 4 * add_cost[speed][mode]);
224
225 wider_mode = GET_MODE_WIDER_MODE (mode);
226 if (wider_mode != VOIDmode)
227 {
228 PUT_MODE (&all.zext, wider_mode);
229 PUT_MODE (&all.wide_mult, wider_mode);
230 PUT_MODE (&all.wide_lshr, wider_mode);
231 XEXP (&all.wide_lshr, 1) = GEN_INT (GET_MODE_BITSIZE (mode));
232
233 mul_widen_cost[speed][wider_mode]
234 = rtx_cost (&all.wide_mult, SET, speed);
235 mul_highpart_cost[speed][mode]
236 = rtx_cost (&all.wide_trunc, SET, speed);
237 }
238
239 shift_cost[speed][mode][0] = 0;
240 shiftadd_cost[speed][mode][0] = shiftsub0_cost[speed][mode][0]
241 = shiftsub1_cost[speed][mode][0] = add_cost[speed][mode];
242
243 n = MIN (MAX_BITS_PER_WORD, GET_MODE_BITSIZE (mode));
244 for (m = 1; m < n; m++)
245 {
246 XEXP (&all.shift, 1) = cint[m];
247 XEXP (&all.shift_mult, 1) = pow2[m];
248
249 shift_cost[speed][mode][m] = rtx_cost (&all.shift, SET, speed);
250 shiftadd_cost[speed][mode][m] = rtx_cost (&all.shift_add, SET, speed);
251 shiftsub0_cost[speed][mode][m] = rtx_cost (&all.shift_sub0, SET, speed);
252 shiftsub1_cost[speed][mode][m] = rtx_cost (&all.shift_sub1, SET, speed);
253 }
254 }
255 }
256 if (alg_hash_used_p)
257 memset (alg_hash, 0, sizeof (alg_hash));
258 else
259 alg_hash_used_p = true;
260 default_rtl_profile ();
261 }
262
263 /* Return an rtx representing minus the value of X.
264 MODE is the intended mode of the result,
265 useful if X is a CONST_INT. */
266
267 rtx
268 negate_rtx (enum machine_mode mode, rtx x)
269 {
270 rtx result = simplify_unary_operation (NEG, mode, x, mode);
271
272 if (result == 0)
273 result = expand_unop (mode, neg_optab, x, NULL_RTX, 0);
274
275 return result;
276 }
277
278 /* Report on the availability of insv/extv/extzv and the desired mode
279 of each of their operands. Returns MAX_MACHINE_MODE if HAVE_foo
280 is false; else the mode of the specified operand. If OPNO is -1,
281 all the caller cares about is whether the insn is available. */
282 enum machine_mode
283 mode_for_extraction (enum extraction_pattern pattern, int opno)
284 {
285 const struct insn_data_d *data;
286
287 switch (pattern)
288 {
289 case EP_insv:
290 if (HAVE_insv)
291 {
292 data = &insn_data[CODE_FOR_insv];
293 break;
294 }
295 return MAX_MACHINE_MODE;
296
297 case EP_extv:
298 if (HAVE_extv)
299 {
300 data = &insn_data[CODE_FOR_extv];
301 break;
302 }
303 return MAX_MACHINE_MODE;
304
305 case EP_extzv:
306 if (HAVE_extzv)
307 {
308 data = &insn_data[CODE_FOR_extzv];
309 break;
310 }
311 return MAX_MACHINE_MODE;
312
313 default:
314 gcc_unreachable ();
315 }
316
317 if (opno == -1)
318 return VOIDmode;
319
320 /* Everyone who uses this function used to follow it with
321 if (result == VOIDmode) result = word_mode; */
322 if (data->operand[opno].mode == VOIDmode)
323 return word_mode;
324 return data->operand[opno].mode;
325 }
326 \f
327 /* A subroutine of store_bit_field, with the same arguments. Return true
328 if the operation could be implemented.
329
330 If FALLBACK_P is true, fall back to store_fixed_bit_field if we have
331 no other way of implementing the operation. If FALLBACK_P is false,
332 return false instead. */
333
334 static bool
335 store_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
336 unsigned HOST_WIDE_INT bitnum, enum machine_mode fieldmode,
337 rtx value, bool fallback_p)
338 {
339 unsigned int unit
340 = (MEM_P (str_rtx)) ? BITS_PER_UNIT : BITS_PER_WORD;
341 unsigned HOST_WIDE_INT offset, bitpos;
342 rtx op0 = str_rtx;
343 int byte_offset;
344 rtx orig_value;
345
346 enum machine_mode op_mode = mode_for_extraction (EP_insv, 3);
347
348 while (GET_CODE (op0) == SUBREG)
349 {
350 /* The following line once was done only if WORDS_BIG_ENDIAN,
351 but I think that is a mistake. WORDS_BIG_ENDIAN is
352 meaningful at a much higher level; when structures are copied
353 between memory and regs, the higher-numbered regs
354 always get higher addresses. */
355 int inner_mode_size = GET_MODE_SIZE (GET_MODE (SUBREG_REG (op0)));
356 int outer_mode_size = GET_MODE_SIZE (GET_MODE (op0));
357
358 byte_offset = 0;
359
360 /* Paradoxical subregs need special handling on big endian machines. */
361 if (SUBREG_BYTE (op0) == 0 && inner_mode_size < outer_mode_size)
362 {
363 int difference = inner_mode_size - outer_mode_size;
364
365 if (WORDS_BIG_ENDIAN)
366 byte_offset += (difference / UNITS_PER_WORD) * UNITS_PER_WORD;
367 if (BYTES_BIG_ENDIAN)
368 byte_offset += difference % UNITS_PER_WORD;
369 }
370 else
371 byte_offset = SUBREG_BYTE (op0);
372
373 bitnum += byte_offset * BITS_PER_UNIT;
374 op0 = SUBREG_REG (op0);
375 }
376
377 /* No action is needed if the target is a register and if the field
378 lies completely outside that register. This can occur if the source
379 code contains an out-of-bounds access to a small array. */
380 if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
381 return true;
382
383 /* Use vec_set patterns for inserting parts of vectors whenever
384 available. */
385 if (VECTOR_MODE_P (GET_MODE (op0))
386 && !MEM_P (op0)
387 && optab_handler (vec_set_optab, GET_MODE (op0)) != CODE_FOR_nothing
388 && fieldmode == GET_MODE_INNER (GET_MODE (op0))
389 && bitsize == GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
390 && !(bitnum % GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
391 {
392 struct expand_operand ops[3];
393 enum machine_mode outermode = GET_MODE (op0);
394 enum machine_mode innermode = GET_MODE_INNER (outermode);
395 enum insn_code icode = optab_handler (vec_set_optab, outermode);
396 int pos = bitnum / GET_MODE_BITSIZE (innermode);
397
398 create_fixed_operand (&ops[0], op0);
399 create_input_operand (&ops[1], value, innermode);
400 create_integer_operand (&ops[2], pos);
401 if (maybe_expand_insn (icode, 3, ops))
402 return true;
403 }
404
405 /* If the target is a register, overwriting the entire object, or storing
406 a full-word or multi-word field can be done with just a SUBREG.
407
408 If the target is memory, storing any naturally aligned field can be
409 done with a simple store. For targets that support fast unaligned
410 memory, any naturally sized, unit aligned field can be done directly. */
411
412 offset = bitnum / unit;
413 bitpos = bitnum % unit;
414 byte_offset = (bitnum % BITS_PER_WORD) / BITS_PER_UNIT
415 + (offset * UNITS_PER_WORD);
416
417 if (bitpos == 0
418 && bitsize == GET_MODE_BITSIZE (fieldmode)
419 && (!MEM_P (op0)
420 ? ((GET_MODE_SIZE (fieldmode) >= UNITS_PER_WORD
421 || GET_MODE_SIZE (GET_MODE (op0)) == GET_MODE_SIZE (fieldmode))
422 && ((GET_MODE (op0) == fieldmode && byte_offset == 0)
423 || validate_subreg (fieldmode, GET_MODE (op0), op0,
424 byte_offset)))
425 : (! SLOW_UNALIGNED_ACCESS (fieldmode, MEM_ALIGN (op0))
426 || (offset * BITS_PER_UNIT % bitsize == 0
427 && MEM_ALIGN (op0) % GET_MODE_BITSIZE (fieldmode) == 0))))
428 {
429 if (MEM_P (op0))
430 op0 = adjust_address (op0, fieldmode, offset);
431 else if (GET_MODE (op0) != fieldmode)
432 op0 = simplify_gen_subreg (fieldmode, op0, GET_MODE (op0),
433 byte_offset);
434 emit_move_insn (op0, value);
435 return true;
436 }
437
438 /* Make sure we are playing with integral modes. Pun with subregs
439 if we aren't. This must come after the entire register case above,
440 since that case is valid for any mode. The following cases are only
441 valid for integral modes. */
442 {
443 enum machine_mode imode = int_mode_for_mode (GET_MODE (op0));
444 if (imode != GET_MODE (op0))
445 {
446 if (MEM_P (op0))
447 op0 = adjust_address (op0, imode, 0);
448 else
449 {
450 gcc_assert (imode != BLKmode);
451 op0 = gen_lowpart (imode, op0);
452 }
453 }
454 }
455
456 /* We may be accessing data outside the field, which means
457 we can alias adjacent data. */
458 if (MEM_P (op0))
459 {
460 op0 = shallow_copy_rtx (op0);
461 set_mem_alias_set (op0, 0);
462 set_mem_expr (op0, 0);
463 }
464
465 /* If OP0 is a register, BITPOS must count within a word.
466 But as we have it, it counts within whatever size OP0 now has.
467 On a bigendian machine, these are not the same, so convert. */
468 if (BYTES_BIG_ENDIAN
469 && !MEM_P (op0)
470 && unit > GET_MODE_BITSIZE (GET_MODE (op0)))
471 bitpos += unit - GET_MODE_BITSIZE (GET_MODE (op0));
472
473 /* Storing an lsb-aligned field in a register
474 can be done with a movestrict instruction. */
475
476 if (!MEM_P (op0)
477 && (BYTES_BIG_ENDIAN ? bitpos + bitsize == unit : bitpos == 0)
478 && bitsize == GET_MODE_BITSIZE (fieldmode)
479 && optab_handler (movstrict_optab, fieldmode) != CODE_FOR_nothing)
480 {
481 struct expand_operand ops[2];
482 enum insn_code icode = optab_handler (movstrict_optab, fieldmode);
483 rtx arg0 = op0;
484 unsigned HOST_WIDE_INT subreg_off;
485
486 if (GET_CODE (arg0) == SUBREG)
487 {
488 /* Else we've got some float mode source being extracted into
489 a different float mode destination -- this combination of
490 subregs results in Severe Tire Damage. */
491 gcc_assert (GET_MODE (SUBREG_REG (arg0)) == fieldmode
492 || GET_MODE_CLASS (fieldmode) == MODE_INT
493 || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT);
494 arg0 = SUBREG_REG (arg0);
495 }
496
497 subreg_off = (bitnum % BITS_PER_WORD) / BITS_PER_UNIT
498 + (offset * UNITS_PER_WORD);
499 if (validate_subreg (fieldmode, GET_MODE (arg0), arg0, subreg_off))
500 {
501 arg0 = gen_rtx_SUBREG (fieldmode, arg0, subreg_off);
502
503 create_fixed_operand (&ops[0], arg0);
504 /* Shrink the source operand to FIELDMODE. */
505 create_convert_operand_to (&ops[1], value, fieldmode, false);
506 if (maybe_expand_insn (icode, 2, ops))
507 return true;
508 }
509 }
510
511 /* Handle fields bigger than a word. */
512
513 if (bitsize > BITS_PER_WORD)
514 {
515 /* Here we transfer the words of the field
516 in the order least significant first.
517 This is because the most significant word is the one which may
518 be less than full.
519 However, only do that if the value is not BLKmode. */
520
521 unsigned int backwards = WORDS_BIG_ENDIAN && fieldmode != BLKmode;
522 unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
523 unsigned int i;
524 rtx last;
525
526 /* This is the mode we must force value to, so that there will be enough
527 subwords to extract. Note that fieldmode will often (always?) be
528 VOIDmode, because that is what store_field uses to indicate that this
529 is a bit field, but passing VOIDmode to operand_subword_force
530 is not allowed. */
531 fieldmode = GET_MODE (value);
532 if (fieldmode == VOIDmode)
533 fieldmode = smallest_mode_for_size (nwords * BITS_PER_WORD, MODE_INT);
534
535 last = get_last_insn ();
536 for (i = 0; i < nwords; i++)
537 {
538 /* If I is 0, use the low-order word in both field and target;
539 if I is 1, use the next to lowest word; and so on. */
540 unsigned int wordnum = (backwards ? nwords - i - 1 : i);
541 unsigned int bit_offset = (backwards
542 ? MAX ((int) bitsize - ((int) i + 1)
543 * BITS_PER_WORD,
544 0)
545 : (int) i * BITS_PER_WORD);
546 rtx value_word = operand_subword_force (value, wordnum, fieldmode);
547
548 if (!store_bit_field_1 (op0, MIN (BITS_PER_WORD,
549 bitsize - i * BITS_PER_WORD),
550 bitnum + bit_offset, word_mode,
551 value_word, fallback_p))
552 {
553 delete_insns_since (last);
554 return false;
555 }
556 }
557 return true;
558 }
559
560 /* From here on we can assume that the field to be stored in is
561 a full-word (whatever type that is), since it is shorter than a word. */
562
563 /* OFFSET is the number of words or bytes (UNIT says which)
564 from STR_RTX to the first word or byte containing part of the field. */
565
566 if (!MEM_P (op0))
567 {
568 if (offset != 0
569 || GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
570 {
571 if (!REG_P (op0))
572 {
573 /* Since this is a destination (lvalue), we can't copy
574 it to a pseudo. We can remove a SUBREG that does not
575 change the size of the operand. Such a SUBREG may
576 have been added above. */
577 gcc_assert (GET_CODE (op0) == SUBREG
578 && (GET_MODE_SIZE (GET_MODE (op0))
579 == GET_MODE_SIZE (GET_MODE (SUBREG_REG (op0)))));
580 op0 = SUBREG_REG (op0);
581 }
582 op0 = gen_rtx_SUBREG (mode_for_size (BITS_PER_WORD, MODE_INT, 0),
583 op0, (offset * UNITS_PER_WORD));
584 }
585 offset = 0;
586 }
587
588 /* If VALUE has a floating-point or complex mode, access it as an
589 integer of the corresponding size. This can occur on a machine
590 with 64 bit registers that uses SFmode for float. It can also
591 occur for unaligned float or complex fields. */
592 orig_value = value;
593 if (GET_MODE (value) != VOIDmode
594 && GET_MODE_CLASS (GET_MODE (value)) != MODE_INT
595 && GET_MODE_CLASS (GET_MODE (value)) != MODE_PARTIAL_INT)
596 {
597 value = gen_reg_rtx (int_mode_for_mode (GET_MODE (value)));
598 emit_move_insn (gen_lowpart (GET_MODE (orig_value), value), orig_value);
599 }
600
601 /* Now OFFSET is nonzero only if OP0 is memory
602 and is therefore always measured in bytes. */
603
604 if (HAVE_insv
605 && GET_MODE (value) != BLKmode
606 && bitsize > 0
607 && GET_MODE_BITSIZE (op_mode) >= bitsize
608 && ! ((REG_P (op0) || GET_CODE (op0) == SUBREG)
609 && (bitsize + bitpos > GET_MODE_BITSIZE (op_mode))))
610 {
611 struct expand_operand ops[4];
612 int xbitpos = bitpos;
613 rtx value1;
614 rtx xop0 = op0;
615 rtx last = get_last_insn ();
616 bool copy_back = false;
617
618 /* Add OFFSET into OP0's address. */
619 if (MEM_P (xop0))
620 xop0 = adjust_address (xop0, byte_mode, offset);
621
622 /* If xop0 is a register, we need it in OP_MODE
623 to make it acceptable to the format of insv. */
624 if (GET_CODE (xop0) == SUBREG)
625 /* We can't just change the mode, because this might clobber op0,
626 and we will need the original value of op0 if insv fails. */
627 xop0 = gen_rtx_SUBREG (op_mode, SUBREG_REG (xop0), SUBREG_BYTE (xop0));
628 if (REG_P (xop0) && GET_MODE (xop0) != op_mode)
629 xop0 = gen_lowpart_SUBREG (op_mode, xop0);
630
631 /* If the destination is a paradoxical subreg such that we need a
632 truncate to the inner mode, perform the insertion on a temporary and
633 truncate the result to the original destination. Note that we can't
634 just truncate the paradoxical subreg as (truncate:N (subreg:W (reg:N
635 X) 0)) is (reg:N X). */
636 if (GET_CODE (xop0) == SUBREG
637 && REG_P (SUBREG_REG (xop0))
638 && (!TRULY_NOOP_TRUNCATION
639 (GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (xop0))),
640 GET_MODE_BITSIZE (op_mode))))
641 {
642 rtx tem = gen_reg_rtx (op_mode);
643 emit_move_insn (tem, xop0);
644 xop0 = tem;
645 copy_back = true;
646 }
647
648 /* On big-endian machines, we count bits from the most significant.
649 If the bit field insn does not, we must invert. */
650
651 if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
652 xbitpos = unit - bitsize - xbitpos;
653
654 /* We have been counting XBITPOS within UNIT.
655 Count instead within the size of the register. */
656 if (BITS_BIG_ENDIAN && !MEM_P (xop0))
657 xbitpos += GET_MODE_BITSIZE (op_mode) - unit;
658
659 unit = GET_MODE_BITSIZE (op_mode);
660
661 /* Convert VALUE to op_mode (which insv insn wants) in VALUE1. */
662 value1 = value;
663 if (GET_MODE (value) != op_mode)
664 {
665 if (GET_MODE_BITSIZE (GET_MODE (value)) >= bitsize)
666 {
667 /* Optimization: Don't bother really extending VALUE
668 if it has all the bits we will actually use. However,
669 if we must narrow it, be sure we do it correctly. */
670
671 if (GET_MODE_SIZE (GET_MODE (value)) < GET_MODE_SIZE (op_mode))
672 {
673 rtx tmp;
674
675 tmp = simplify_subreg (op_mode, value1, GET_MODE (value), 0);
676 if (! tmp)
677 tmp = simplify_gen_subreg (op_mode,
678 force_reg (GET_MODE (value),
679 value1),
680 GET_MODE (value), 0);
681 value1 = tmp;
682 }
683 else
684 value1 = gen_lowpart (op_mode, value1);
685 }
686 else if (CONST_INT_P (value))
687 value1 = gen_int_mode (INTVAL (value), op_mode);
688 else
689 /* Parse phase is supposed to make VALUE's data type
690 match that of the component reference, which is a type
691 at least as wide as the field; so VALUE should have
692 a mode that corresponds to that type. */
693 gcc_assert (CONSTANT_P (value));
694 }
695
696 create_fixed_operand (&ops[0], xop0);
697 create_integer_operand (&ops[1], bitsize);
698 create_integer_operand (&ops[2], xbitpos);
699 create_input_operand (&ops[3], value1, op_mode);
700 if (maybe_expand_insn (CODE_FOR_insv, 4, ops))
701 {
702 if (copy_back)
703 convert_move (op0, xop0, true);
704 return true;
705 }
706 delete_insns_since (last);
707 }
708
709 /* If OP0 is a memory, try copying it to a register and seeing if a
710 cheap register alternative is available. */
711 if (HAVE_insv && MEM_P (op0))
712 {
713 enum machine_mode bestmode;
714
715 /* Get the mode to use for inserting into this field. If OP0 is
716 BLKmode, get the smallest mode consistent with the alignment. If
717 OP0 is a non-BLKmode object that is no wider than OP_MODE, use its
718 mode. Otherwise, use the smallest mode containing the field. */
719
720 if (GET_MODE (op0) == BLKmode
721 || (op_mode != MAX_MACHINE_MODE
722 && GET_MODE_SIZE (GET_MODE (op0)) > GET_MODE_SIZE (op_mode)))
723 bestmode = get_best_mode (bitsize, bitnum, MEM_ALIGN (op0),
724 (op_mode == MAX_MACHINE_MODE
725 ? VOIDmode : op_mode),
726 MEM_VOLATILE_P (op0));
727 else
728 bestmode = GET_MODE (op0);
729
730 if (bestmode != VOIDmode
731 && GET_MODE_SIZE (bestmode) >= GET_MODE_SIZE (fieldmode)
732 && !(SLOW_UNALIGNED_ACCESS (bestmode, MEM_ALIGN (op0))
733 && GET_MODE_BITSIZE (bestmode) > MEM_ALIGN (op0)))
734 {
735 rtx last, tempreg, xop0;
736 unsigned HOST_WIDE_INT xoffset, xbitpos;
737
738 last = get_last_insn ();
739
740 /* Adjust address to point to the containing unit of
741 that mode. Compute the offset as a multiple of this unit,
742 counting in bytes. */
743 unit = GET_MODE_BITSIZE (bestmode);
744 xoffset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
745 xbitpos = bitnum % unit;
746 xop0 = adjust_address (op0, bestmode, xoffset);
747
748 /* Fetch that unit, store the bitfield in it, then store
749 the unit. */
750 tempreg = copy_to_reg (xop0);
751 if (store_bit_field_1 (tempreg, bitsize, xbitpos,
752 fieldmode, orig_value, false))
753 {
754 emit_move_insn (xop0, tempreg);
755 return true;
756 }
757 delete_insns_since (last);
758 }
759 }
760
761 if (!fallback_p)
762 return false;
763
764 store_fixed_bit_field (op0, offset, bitsize, bitpos, value);
765 return true;
766 }
767
768 /* Generate code to store value from rtx VALUE
769 into a bit-field within structure STR_RTX
770 containing BITSIZE bits starting at bit BITNUM.
771 FIELDMODE is the machine-mode of the FIELD_DECL node for this field. */
772
773 void
774 store_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
775 unsigned HOST_WIDE_INT bitnum, enum machine_mode fieldmode,
776 rtx value)
777 {
778 if (!store_bit_field_1 (str_rtx, bitsize, bitnum, fieldmode, value, true))
779 gcc_unreachable ();
780 }
781 \f
782 /* Use shifts and boolean operations to store VALUE
783 into a bit field of width BITSIZE
784 in a memory location specified by OP0 except offset by OFFSET bytes.
785 (OFFSET must be 0 if OP0 is a register.)
786 The field starts at position BITPOS within the byte.
787 (If OP0 is a register, it may be a full word or a narrower mode,
788 but BITPOS still counts within a full word,
789 which is significant on bigendian machines.) */
790
791 static void
792 store_fixed_bit_field (rtx op0, unsigned HOST_WIDE_INT offset,
793 unsigned HOST_WIDE_INT bitsize,
794 unsigned HOST_WIDE_INT bitpos, rtx value)
795 {
796 enum machine_mode mode;
797 unsigned int total_bits = BITS_PER_WORD;
798 rtx temp;
799 int all_zero = 0;
800 int all_one = 0;
801
802 /* There is a case not handled here:
803 a structure with a known alignment of just a halfword
804 and a field split across two aligned halfwords within the structure.
805 Or likewise a structure with a known alignment of just a byte
806 and a field split across two bytes.
807 Such cases are not supposed to be able to occur. */
808
809 if (REG_P (op0) || GET_CODE (op0) == SUBREG)
810 {
811 gcc_assert (!offset);
812 /* Special treatment for a bit field split across two registers. */
813 if (bitsize + bitpos > BITS_PER_WORD)
814 {
815 store_split_bit_field (op0, bitsize, bitpos, value);
816 return;
817 }
818 }
819 else
820 {
821 /* Get the proper mode to use for this field. We want a mode that
822 includes the entire field. If such a mode would be larger than
823 a word, we won't be doing the extraction the normal way.
824 We don't want a mode bigger than the destination. */
825
826 mode = GET_MODE (op0);
827 if (GET_MODE_BITSIZE (mode) == 0
828 || GET_MODE_BITSIZE (mode) > GET_MODE_BITSIZE (word_mode))
829 mode = word_mode;
830
831 if (MEM_VOLATILE_P (op0)
832 && GET_MODE_BITSIZE (GET_MODE (op0)) > 0
833 && flag_strict_volatile_bitfields > 0)
834 mode = GET_MODE (op0);
835 else
836 mode = get_best_mode (bitsize, bitpos + offset * BITS_PER_UNIT,
837 MEM_ALIGN (op0), mode, MEM_VOLATILE_P (op0));
838
839 if (mode == VOIDmode)
840 {
841 /* The only way this should occur is if the field spans word
842 boundaries. */
843 store_split_bit_field (op0, bitsize, bitpos + offset * BITS_PER_UNIT,
844 value);
845 return;
846 }
847
848 total_bits = GET_MODE_BITSIZE (mode);
849
850 /* Make sure bitpos is valid for the chosen mode. Adjust BITPOS to
851 be in the range 0 to total_bits-1, and put any excess bytes in
852 OFFSET. */
853 if (bitpos >= total_bits)
854 {
855 offset += (bitpos / total_bits) * (total_bits / BITS_PER_UNIT);
856 bitpos -= ((bitpos / total_bits) * (total_bits / BITS_PER_UNIT)
857 * BITS_PER_UNIT);
858 }
859
860 /* Get ref to an aligned byte, halfword, or word containing the field.
861 Adjust BITPOS to be position within a word,
862 and OFFSET to be the offset of that word.
863 Then alter OP0 to refer to that word. */
864 bitpos += (offset % (total_bits / BITS_PER_UNIT)) * BITS_PER_UNIT;
865 offset -= (offset % (total_bits / BITS_PER_UNIT));
866 op0 = adjust_address (op0, mode, offset);
867 }
868
869 mode = GET_MODE (op0);
870
871 /* Now MODE is either some integral mode for a MEM as OP0,
872 or is a full-word for a REG as OP0. TOTAL_BITS corresponds.
873 The bit field is contained entirely within OP0.
874 BITPOS is the starting bit number within OP0.
875 (OP0's mode may actually be narrower than MODE.) */
876
877 if (BYTES_BIG_ENDIAN)
878 /* BITPOS is the distance between our msb
879 and that of the containing datum.
880 Convert it to the distance from the lsb. */
881 bitpos = total_bits - bitsize - bitpos;
882
883 /* Now BITPOS is always the distance between our lsb
884 and that of OP0. */
885
886 /* Shift VALUE left by BITPOS bits. If VALUE is not constant,
887 we must first convert its mode to MODE. */
888
889 if (CONST_INT_P (value))
890 {
891 HOST_WIDE_INT v = INTVAL (value);
892
893 if (bitsize < HOST_BITS_PER_WIDE_INT)
894 v &= ((HOST_WIDE_INT) 1 << bitsize) - 1;
895
896 if (v == 0)
897 all_zero = 1;
898 else if ((bitsize < HOST_BITS_PER_WIDE_INT
899 && v == ((HOST_WIDE_INT) 1 << bitsize) - 1)
900 || (bitsize == HOST_BITS_PER_WIDE_INT && v == -1))
901 all_one = 1;
902
903 value = lshift_value (mode, value, bitpos, bitsize);
904 }
905 else
906 {
907 int must_and = (GET_MODE_BITSIZE (GET_MODE (value)) != bitsize
908 && bitpos + bitsize != GET_MODE_BITSIZE (mode));
909
910 if (GET_MODE (value) != mode)
911 value = convert_to_mode (mode, value, 1);
912
913 if (must_and)
914 value = expand_binop (mode, and_optab, value,
915 mask_rtx (mode, 0, bitsize, 0),
916 NULL_RTX, 1, OPTAB_LIB_WIDEN);
917 if (bitpos > 0)
918 value = expand_shift (LSHIFT_EXPR, mode, value,
919 bitpos, NULL_RTX, 1);
920 }
921
922 /* Now clear the chosen bits in OP0,
923 except that if VALUE is -1 we need not bother. */
924 /* We keep the intermediates in registers to allow CSE to combine
925 consecutive bitfield assignments. */
926
927 temp = force_reg (mode, op0);
928
929 if (! all_one)
930 {
931 temp = expand_binop (mode, and_optab, temp,
932 mask_rtx (mode, bitpos, bitsize, 1),
933 NULL_RTX, 1, OPTAB_LIB_WIDEN);
934 temp = force_reg (mode, temp);
935 }
936
937 /* Now logical-or VALUE into OP0, unless it is zero. */
938
939 if (! all_zero)
940 {
941 temp = expand_binop (mode, ior_optab, temp, value,
942 NULL_RTX, 1, OPTAB_LIB_WIDEN);
943 temp = force_reg (mode, temp);
944 }
945
946 if (op0 != temp)
947 {
948 op0 = copy_rtx (op0);
949 emit_move_insn (op0, temp);
950 }
951 }
952 \f
953 /* Store a bit field that is split across multiple accessible memory objects.
954
955 OP0 is the REG, SUBREG or MEM rtx for the first of the objects.
956 BITSIZE is the field width; BITPOS the position of its first bit
957 (within the word).
958 VALUE is the value to store.
959
960 This does not yet handle fields wider than BITS_PER_WORD. */
961
962 static void
963 store_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
964 unsigned HOST_WIDE_INT bitpos, rtx value)
965 {
966 unsigned int unit;
967 unsigned int bitsdone = 0;
968
969 /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
970 much at a time. */
971 if (REG_P (op0) || GET_CODE (op0) == SUBREG)
972 unit = BITS_PER_WORD;
973 else
974 unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
975
976 /* If VALUE is a constant other than a CONST_INT, get it into a register in
977 WORD_MODE. If we can do this using gen_lowpart_common, do so. Note
978 that VALUE might be a floating-point constant. */
979 if (CONSTANT_P (value) && !CONST_INT_P (value))
980 {
981 rtx word = gen_lowpart_common (word_mode, value);
982
983 if (word && (value != word))
984 value = word;
985 else
986 value = gen_lowpart_common (word_mode,
987 force_reg (GET_MODE (value) != VOIDmode
988 ? GET_MODE (value)
989 : word_mode, value));
990 }
991
992 while (bitsdone < bitsize)
993 {
994 unsigned HOST_WIDE_INT thissize;
995 rtx part, word;
996 unsigned HOST_WIDE_INT thispos;
997 unsigned HOST_WIDE_INT offset;
998
999 offset = (bitpos + bitsdone) / unit;
1000 thispos = (bitpos + bitsdone) % unit;
1001
1002 /* THISSIZE must not overrun a word boundary. Otherwise,
1003 store_fixed_bit_field will call us again, and we will mutually
1004 recurse forever. */
1005 thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1006 thissize = MIN (thissize, unit - thispos);
1007
1008 if (BYTES_BIG_ENDIAN)
1009 {
1010 int total_bits;
1011
1012 /* We must do an endian conversion exactly the same way as it is
1013 done in extract_bit_field, so that the two calls to
1014 extract_fixed_bit_field will have comparable arguments. */
1015 if (!MEM_P (value) || GET_MODE (value) == BLKmode)
1016 total_bits = BITS_PER_WORD;
1017 else
1018 total_bits = GET_MODE_BITSIZE (GET_MODE (value));
1019
1020 /* Fetch successively less significant portions. */
1021 if (CONST_INT_P (value))
1022 part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1023 >> (bitsize - bitsdone - thissize))
1024 & (((HOST_WIDE_INT) 1 << thissize) - 1));
1025 else
1026 /* The args are chosen so that the last part includes the
1027 lsb. Give extract_bit_field the value it needs (with
1028 endianness compensation) to fetch the piece we want. */
1029 part = extract_fixed_bit_field (word_mode, value, 0, thissize,
1030 total_bits - bitsize + bitsdone,
1031 NULL_RTX, 1, false);
1032 }
1033 else
1034 {
1035 /* Fetch successively more significant portions. */
1036 if (CONST_INT_P (value))
1037 part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1038 >> bitsdone)
1039 & (((HOST_WIDE_INT) 1 << thissize) - 1));
1040 else
1041 part = extract_fixed_bit_field (word_mode, value, 0, thissize,
1042 bitsdone, NULL_RTX, 1, false);
1043 }
1044
1045 /* If OP0 is a register, then handle OFFSET here.
1046
1047 When handling multiword bitfields, extract_bit_field may pass
1048 down a word_mode SUBREG of a larger REG for a bitfield that actually
1049 crosses a word boundary. Thus, for a SUBREG, we must find
1050 the current word starting from the base register. */
1051 if (GET_CODE (op0) == SUBREG)
1052 {
1053 int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset;
1054 enum machine_mode sub_mode = GET_MODE (SUBREG_REG (op0));
1055 if (sub_mode != BLKmode && GET_MODE_SIZE (sub_mode) < UNITS_PER_WORD)
1056 word = word_offset ? const0_rtx : op0;
1057 else
1058 word = operand_subword_force (SUBREG_REG (op0), word_offset,
1059 GET_MODE (SUBREG_REG (op0)));
1060 offset = 0;
1061 }
1062 else if (REG_P (op0))
1063 {
1064 enum machine_mode op0_mode = GET_MODE (op0);
1065 if (op0_mode != BLKmode && GET_MODE_SIZE (op0_mode) < UNITS_PER_WORD)
1066 word = offset ? const0_rtx : op0;
1067 else
1068 word = operand_subword_force (op0, offset, GET_MODE (op0));
1069 offset = 0;
1070 }
1071 else
1072 word = op0;
1073
1074 /* OFFSET is in UNITs, and UNIT is in bits.
1075 store_fixed_bit_field wants offset in bytes. If WORD is const0_rtx,
1076 it is just an out-of-bounds access. Ignore it. */
1077 if (word != const0_rtx)
1078 store_fixed_bit_field (word, offset * unit / BITS_PER_UNIT, thissize,
1079 thispos, part);
1080 bitsdone += thissize;
1081 }
1082 }
1083 \f
1084 /* A subroutine of extract_bit_field_1 that converts return value X
1085 to either MODE or TMODE. MODE, TMODE and UNSIGNEDP are arguments
1086 to extract_bit_field. */
1087
1088 static rtx
1089 convert_extracted_bit_field (rtx x, enum machine_mode mode,
1090 enum machine_mode tmode, bool unsignedp)
1091 {
1092 if (GET_MODE (x) == tmode || GET_MODE (x) == mode)
1093 return x;
1094
1095 /* If the x mode is not a scalar integral, first convert to the
1096 integer mode of that size and then access it as a floating-point
1097 value via a SUBREG. */
1098 if (!SCALAR_INT_MODE_P (tmode))
1099 {
1100 enum machine_mode smode;
1101
1102 smode = mode_for_size (GET_MODE_BITSIZE (tmode), MODE_INT, 0);
1103 x = convert_to_mode (smode, x, unsignedp);
1104 x = force_reg (smode, x);
1105 return gen_lowpart (tmode, x);
1106 }
1107
1108 return convert_to_mode (tmode, x, unsignedp);
1109 }
1110
1111 /* A subroutine of extract_bit_field, with the same arguments.
1112 If FALLBACK_P is true, fall back to extract_fixed_bit_field
1113 if we can find no other means of implementing the operation.
1114 if FALLBACK_P is false, return NULL instead. */
1115
1116 static rtx
1117 extract_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1118 unsigned HOST_WIDE_INT bitnum,
1119 int unsignedp, bool packedp, rtx target,
1120 enum machine_mode mode, enum machine_mode tmode,
1121 bool fallback_p)
1122 {
1123 unsigned int unit
1124 = (MEM_P (str_rtx)) ? BITS_PER_UNIT : BITS_PER_WORD;
1125 unsigned HOST_WIDE_INT offset, bitpos;
1126 rtx op0 = str_rtx;
1127 enum machine_mode int_mode;
1128 enum machine_mode ext_mode;
1129 enum machine_mode mode1;
1130 enum insn_code icode;
1131 int byte_offset;
1132
1133 if (tmode == VOIDmode)
1134 tmode = mode;
1135
1136 while (GET_CODE (op0) == SUBREG)
1137 {
1138 bitnum += SUBREG_BYTE (op0) * BITS_PER_UNIT;
1139 op0 = SUBREG_REG (op0);
1140 }
1141
1142 /* If we have an out-of-bounds access to a register, just return an
1143 uninitialized register of the required mode. This can occur if the
1144 source code contains an out-of-bounds access to a small array. */
1145 if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
1146 return gen_reg_rtx (tmode);
1147
1148 if (REG_P (op0)
1149 && mode == GET_MODE (op0)
1150 && bitnum == 0
1151 && bitsize == GET_MODE_BITSIZE (GET_MODE (op0)))
1152 {
1153 /* We're trying to extract a full register from itself. */
1154 return op0;
1155 }
1156
1157 /* See if we can get a better vector mode before extracting. */
1158 if (VECTOR_MODE_P (GET_MODE (op0))
1159 && !MEM_P (op0)
1160 && GET_MODE_INNER (GET_MODE (op0)) != tmode)
1161 {
1162 enum machine_mode new_mode;
1163
1164 if (GET_MODE_CLASS (tmode) == MODE_FLOAT)
1165 new_mode = MIN_MODE_VECTOR_FLOAT;
1166 else if (GET_MODE_CLASS (tmode) == MODE_FRACT)
1167 new_mode = MIN_MODE_VECTOR_FRACT;
1168 else if (GET_MODE_CLASS (tmode) == MODE_UFRACT)
1169 new_mode = MIN_MODE_VECTOR_UFRACT;
1170 else if (GET_MODE_CLASS (tmode) == MODE_ACCUM)
1171 new_mode = MIN_MODE_VECTOR_ACCUM;
1172 else if (GET_MODE_CLASS (tmode) == MODE_UACCUM)
1173 new_mode = MIN_MODE_VECTOR_UACCUM;
1174 else
1175 new_mode = MIN_MODE_VECTOR_INT;
1176
1177 for (; new_mode != VOIDmode ; new_mode = GET_MODE_WIDER_MODE (new_mode))
1178 if (GET_MODE_SIZE (new_mode) == GET_MODE_SIZE (GET_MODE (op0))
1179 && targetm.vector_mode_supported_p (new_mode))
1180 break;
1181 if (new_mode != VOIDmode)
1182 op0 = gen_lowpart (new_mode, op0);
1183 }
1184
1185 /* Use vec_extract patterns for extracting parts of vectors whenever
1186 available. */
1187 if (VECTOR_MODE_P (GET_MODE (op0))
1188 && !MEM_P (op0)
1189 && optab_handler (vec_extract_optab, GET_MODE (op0)) != CODE_FOR_nothing
1190 && ((bitnum + bitsize - 1) / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
1191 == bitnum / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
1192 {
1193 struct expand_operand ops[3];
1194 enum machine_mode outermode = GET_MODE (op0);
1195 enum machine_mode innermode = GET_MODE_INNER (outermode);
1196 enum insn_code icode = optab_handler (vec_extract_optab, outermode);
1197 unsigned HOST_WIDE_INT pos = bitnum / GET_MODE_BITSIZE (innermode);
1198
1199 create_output_operand (&ops[0], target, innermode);
1200 create_input_operand (&ops[1], op0, outermode);
1201 create_integer_operand (&ops[2], pos);
1202 if (maybe_expand_insn (icode, 3, ops))
1203 {
1204 target = ops[0].value;
1205 if (GET_MODE (target) != mode)
1206 return gen_lowpart (tmode, target);
1207 return target;
1208 }
1209 }
1210
1211 /* Make sure we are playing with integral modes. Pun with subregs
1212 if we aren't. */
1213 {
1214 enum machine_mode imode = int_mode_for_mode (GET_MODE (op0));
1215 if (imode != GET_MODE (op0))
1216 {
1217 if (MEM_P (op0))
1218 op0 = adjust_address (op0, imode, 0);
1219 else if (imode != BLKmode)
1220 {
1221 op0 = gen_lowpart (imode, op0);
1222
1223 /* If we got a SUBREG, force it into a register since we
1224 aren't going to be able to do another SUBREG on it. */
1225 if (GET_CODE (op0) == SUBREG)
1226 op0 = force_reg (imode, op0);
1227 }
1228 else if (REG_P (op0))
1229 {
1230 rtx reg, subreg;
1231 imode = smallest_mode_for_size (GET_MODE_BITSIZE (GET_MODE (op0)),
1232 MODE_INT);
1233 reg = gen_reg_rtx (imode);
1234 subreg = gen_lowpart_SUBREG (GET_MODE (op0), reg);
1235 emit_move_insn (subreg, op0);
1236 op0 = reg;
1237 bitnum += SUBREG_BYTE (subreg) * BITS_PER_UNIT;
1238 }
1239 else
1240 {
1241 rtx mem = assign_stack_temp (GET_MODE (op0),
1242 GET_MODE_SIZE (GET_MODE (op0)), 0);
1243 emit_move_insn (mem, op0);
1244 op0 = adjust_address (mem, BLKmode, 0);
1245 }
1246 }
1247 }
1248
1249 /* We may be accessing data outside the field, which means
1250 we can alias adjacent data. */
1251 if (MEM_P (op0))
1252 {
1253 op0 = shallow_copy_rtx (op0);
1254 set_mem_alias_set (op0, 0);
1255 set_mem_expr (op0, 0);
1256 }
1257
1258 /* Extraction of a full-word or multi-word value from a structure
1259 in a register or aligned memory can be done with just a SUBREG.
1260 A subword value in the least significant part of a register
1261 can also be extracted with a SUBREG. For this, we need the
1262 byte offset of the value in op0. */
1263
1264 bitpos = bitnum % unit;
1265 offset = bitnum / unit;
1266 byte_offset = bitpos / BITS_PER_UNIT + offset * UNITS_PER_WORD;
1267
1268 /* If OP0 is a register, BITPOS must count within a word.
1269 But as we have it, it counts within whatever size OP0 now has.
1270 On a bigendian machine, these are not the same, so convert. */
1271 if (BYTES_BIG_ENDIAN
1272 && !MEM_P (op0)
1273 && unit > GET_MODE_BITSIZE (GET_MODE (op0)))
1274 bitpos += unit - GET_MODE_BITSIZE (GET_MODE (op0));
1275
1276 /* ??? We currently assume TARGET is at least as big as BITSIZE.
1277 If that's wrong, the solution is to test for it and set TARGET to 0
1278 if needed. */
1279
1280 /* Only scalar integer modes can be converted via subregs. There is an
1281 additional problem for FP modes here in that they can have a precision
1282 which is different from the size. mode_for_size uses precision, but
1283 we want a mode based on the size, so we must avoid calling it for FP
1284 modes. */
1285 mode1 = (SCALAR_INT_MODE_P (tmode)
1286 ? mode_for_size (bitsize, GET_MODE_CLASS (tmode), 0)
1287 : mode);
1288
1289 /* If the bitfield is volatile, we need to make sure the access
1290 remains on a type-aligned boundary. */
1291 if (GET_CODE (op0) == MEM
1292 && MEM_VOLATILE_P (op0)
1293 && GET_MODE_BITSIZE (GET_MODE (op0)) > 0
1294 && flag_strict_volatile_bitfields > 0)
1295 goto no_subreg_mode_swap;
1296
1297 if (((bitsize >= BITS_PER_WORD && bitsize == GET_MODE_BITSIZE (mode)
1298 && bitpos % BITS_PER_WORD == 0)
1299 || (mode1 != BLKmode
1300 /* ??? The big endian test here is wrong. This is correct
1301 if the value is in a register, and if mode_for_size is not
1302 the same mode as op0. This causes us to get unnecessarily
1303 inefficient code from the Thumb port when -mbig-endian. */
1304 && (BYTES_BIG_ENDIAN
1305 ? bitpos + bitsize == BITS_PER_WORD
1306 : bitpos == 0)))
1307 && ((!MEM_P (op0)
1308 && TRULY_NOOP_TRUNCATION (GET_MODE_BITSIZE (mode1),
1309 GET_MODE_BITSIZE (GET_MODE (op0)))
1310 && GET_MODE_SIZE (mode1) != 0
1311 && byte_offset % GET_MODE_SIZE (mode1) == 0)
1312 || (MEM_P (op0)
1313 && (! SLOW_UNALIGNED_ACCESS (mode, MEM_ALIGN (op0))
1314 || (offset * BITS_PER_UNIT % bitsize == 0
1315 && MEM_ALIGN (op0) % bitsize == 0)))))
1316 {
1317 if (MEM_P (op0))
1318 op0 = adjust_address (op0, mode1, offset);
1319 else if (mode1 != GET_MODE (op0))
1320 {
1321 rtx sub = simplify_gen_subreg (mode1, op0, GET_MODE (op0),
1322 byte_offset);
1323 if (sub == NULL)
1324 goto no_subreg_mode_swap;
1325 op0 = sub;
1326 }
1327 if (mode1 != mode)
1328 return convert_to_mode (tmode, op0, unsignedp);
1329 return op0;
1330 }
1331 no_subreg_mode_swap:
1332
1333 /* Handle fields bigger than a word. */
1334
1335 if (bitsize > BITS_PER_WORD)
1336 {
1337 /* Here we transfer the words of the field
1338 in the order least significant first.
1339 This is because the most significant word is the one which may
1340 be less than full. */
1341
1342 unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
1343 unsigned int i;
1344
1345 if (target == 0 || !REG_P (target))
1346 target = gen_reg_rtx (mode);
1347
1348 /* Indicate for flow that the entire target reg is being set. */
1349 emit_clobber (target);
1350
1351 for (i = 0; i < nwords; i++)
1352 {
1353 /* If I is 0, use the low-order word in both field and target;
1354 if I is 1, use the next to lowest word; and so on. */
1355 /* Word number in TARGET to use. */
1356 unsigned int wordnum
1357 = (WORDS_BIG_ENDIAN
1358 ? GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD - i - 1
1359 : i);
1360 /* Offset from start of field in OP0. */
1361 unsigned int bit_offset = (WORDS_BIG_ENDIAN
1362 ? MAX (0, ((int) bitsize - ((int) i + 1)
1363 * (int) BITS_PER_WORD))
1364 : (int) i * BITS_PER_WORD);
1365 rtx target_part = operand_subword (target, wordnum, 1, VOIDmode);
1366 rtx result_part
1367 = extract_bit_field (op0, MIN (BITS_PER_WORD,
1368 bitsize - i * BITS_PER_WORD),
1369 bitnum + bit_offset, 1, false, target_part, mode,
1370 word_mode);
1371
1372 gcc_assert (target_part);
1373
1374 if (result_part != target_part)
1375 emit_move_insn (target_part, result_part);
1376 }
1377
1378 if (unsignedp)
1379 {
1380 /* Unless we've filled TARGET, the upper regs in a multi-reg value
1381 need to be zero'd out. */
1382 if (GET_MODE_SIZE (GET_MODE (target)) > nwords * UNITS_PER_WORD)
1383 {
1384 unsigned int i, total_words;
1385
1386 total_words = GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD;
1387 for (i = nwords; i < total_words; i++)
1388 emit_move_insn
1389 (operand_subword (target,
1390 WORDS_BIG_ENDIAN ? total_words - i - 1 : i,
1391 1, VOIDmode),
1392 const0_rtx);
1393 }
1394 return target;
1395 }
1396
1397 /* Signed bit field: sign-extend with two arithmetic shifts. */
1398 target = expand_shift (LSHIFT_EXPR, mode, target,
1399 GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1400 return expand_shift (RSHIFT_EXPR, mode, target,
1401 GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1402 }
1403
1404 /* From here on we know the desired field is smaller than a word. */
1405
1406 /* Check if there is a correspondingly-sized integer field, so we can
1407 safely extract it as one size of integer, if necessary; then
1408 truncate or extend to the size that is wanted; then use SUBREGs or
1409 convert_to_mode to get one of the modes we really wanted. */
1410
1411 int_mode = int_mode_for_mode (tmode);
1412 if (int_mode == BLKmode)
1413 int_mode = int_mode_for_mode (mode);
1414 /* Should probably push op0 out to memory and then do a load. */
1415 gcc_assert (int_mode != BLKmode);
1416
1417 /* OFFSET is the number of words or bytes (UNIT says which)
1418 from STR_RTX to the first word or byte containing part of the field. */
1419 if (!MEM_P (op0))
1420 {
1421 if (offset != 0
1422 || GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
1423 {
1424 if (!REG_P (op0))
1425 op0 = copy_to_reg (op0);
1426 op0 = gen_rtx_SUBREG (mode_for_size (BITS_PER_WORD, MODE_INT, 0),
1427 op0, (offset * UNITS_PER_WORD));
1428 }
1429 offset = 0;
1430 }
1431
1432 /* Now OFFSET is nonzero only for memory operands. */
1433 ext_mode = mode_for_extraction (unsignedp ? EP_extzv : EP_extv, 0);
1434 icode = unsignedp ? CODE_FOR_extzv : CODE_FOR_extv;
1435 if (ext_mode != MAX_MACHINE_MODE
1436 && bitsize > 0
1437 && GET_MODE_BITSIZE (ext_mode) >= bitsize
1438 /* If op0 is a register, we need it in EXT_MODE to make it
1439 acceptable to the format of ext(z)v. */
1440 && !(GET_CODE (op0) == SUBREG && GET_MODE (op0) != ext_mode)
1441 && !((REG_P (op0) || GET_CODE (op0) == SUBREG)
1442 && (bitsize + bitpos > GET_MODE_BITSIZE (ext_mode))))
1443 {
1444 struct expand_operand ops[4];
1445 unsigned HOST_WIDE_INT xbitpos = bitpos, xoffset = offset;
1446 rtx xop0 = op0;
1447 rtx xtarget = target;
1448 rtx xspec_target = target;
1449 rtx xspec_target_subreg = 0;
1450
1451 /* If op0 is a register, we need it in EXT_MODE to make it
1452 acceptable to the format of ext(z)v. */
1453 if (REG_P (xop0) && GET_MODE (xop0) != ext_mode)
1454 xop0 = gen_lowpart_SUBREG (ext_mode, xop0);
1455 if (MEM_P (xop0))
1456 /* Get ref to first byte containing part of the field. */
1457 xop0 = adjust_address (xop0, byte_mode, xoffset);
1458
1459 /* On big-endian machines, we count bits from the most significant.
1460 If the bit field insn does not, we must invert. */
1461 if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1462 xbitpos = unit - bitsize - xbitpos;
1463
1464 /* Now convert from counting within UNIT to counting in EXT_MODE. */
1465 if (BITS_BIG_ENDIAN && !MEM_P (xop0))
1466 xbitpos += GET_MODE_BITSIZE (ext_mode) - unit;
1467
1468 unit = GET_MODE_BITSIZE (ext_mode);
1469
1470 if (xtarget == 0)
1471 xtarget = xspec_target = gen_reg_rtx (tmode);
1472
1473 if (GET_MODE (xtarget) != ext_mode)
1474 {
1475 /* Don't use LHS paradoxical subreg if explicit truncation is needed
1476 between the mode of the extraction (word_mode) and the target
1477 mode. Instead, create a temporary and use convert_move to set
1478 the target. */
1479 if (REG_P (xtarget)
1480 && TRULY_NOOP_TRUNCATION (GET_MODE_BITSIZE (GET_MODE (xtarget)),
1481 GET_MODE_BITSIZE (ext_mode)))
1482 {
1483 xtarget = gen_lowpart (ext_mode, xtarget);
1484 if (GET_MODE_SIZE (ext_mode)
1485 > GET_MODE_SIZE (GET_MODE (xspec_target)))
1486 xspec_target_subreg = xtarget;
1487 }
1488 else
1489 xtarget = gen_reg_rtx (ext_mode);
1490 }
1491
1492 create_output_operand (&ops[0], xtarget, ext_mode);
1493 create_fixed_operand (&ops[1], xop0);
1494 create_integer_operand (&ops[2], bitsize);
1495 create_integer_operand (&ops[3], xbitpos);
1496 if (maybe_expand_insn (unsignedp ? CODE_FOR_extzv : CODE_FOR_extv,
1497 4, ops))
1498 {
1499 xtarget = ops[0].value;
1500 if (xtarget == xspec_target)
1501 return xtarget;
1502 if (xtarget == xspec_target_subreg)
1503 return xspec_target;
1504 return convert_extracted_bit_field (xtarget, mode, tmode, unsignedp);
1505 }
1506 }
1507
1508 /* If OP0 is a memory, try copying it to a register and seeing if a
1509 cheap register alternative is available. */
1510 if (ext_mode != MAX_MACHINE_MODE && MEM_P (op0))
1511 {
1512 enum machine_mode bestmode;
1513
1514 /* Get the mode to use for inserting into this field. If
1515 OP0 is BLKmode, get the smallest mode consistent with the
1516 alignment. If OP0 is a non-BLKmode object that is no
1517 wider than EXT_MODE, use its mode. Otherwise, use the
1518 smallest mode containing the field. */
1519
1520 if (GET_MODE (op0) == BLKmode
1521 || (ext_mode != MAX_MACHINE_MODE
1522 && GET_MODE_SIZE (GET_MODE (op0)) > GET_MODE_SIZE (ext_mode)))
1523 bestmode = get_best_mode (bitsize, bitnum, MEM_ALIGN (op0),
1524 (ext_mode == MAX_MACHINE_MODE
1525 ? VOIDmode : ext_mode),
1526 MEM_VOLATILE_P (op0));
1527 else
1528 bestmode = GET_MODE (op0);
1529
1530 if (bestmode != VOIDmode
1531 && !(SLOW_UNALIGNED_ACCESS (bestmode, MEM_ALIGN (op0))
1532 && GET_MODE_BITSIZE (bestmode) > MEM_ALIGN (op0)))
1533 {
1534 unsigned HOST_WIDE_INT xoffset, xbitpos;
1535
1536 /* Compute the offset as a multiple of this unit,
1537 counting in bytes. */
1538 unit = GET_MODE_BITSIZE (bestmode);
1539 xoffset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
1540 xbitpos = bitnum % unit;
1541
1542 /* Make sure the register is big enough for the whole field. */
1543 if (xoffset * BITS_PER_UNIT + unit
1544 >= offset * BITS_PER_UNIT + bitsize)
1545 {
1546 rtx last, result, xop0;
1547
1548 last = get_last_insn ();
1549
1550 /* Fetch it to a register in that size. */
1551 xop0 = adjust_address (op0, bestmode, xoffset);
1552 xop0 = force_reg (bestmode, xop0);
1553 result = extract_bit_field_1 (xop0, bitsize, xbitpos,
1554 unsignedp, packedp, target,
1555 mode, tmode, false);
1556 if (result)
1557 return result;
1558
1559 delete_insns_since (last);
1560 }
1561 }
1562 }
1563
1564 if (!fallback_p)
1565 return NULL;
1566
1567 target = extract_fixed_bit_field (int_mode, op0, offset, bitsize,
1568 bitpos, target, unsignedp, packedp);
1569 return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1570 }
1571
1572 /* Generate code to extract a byte-field from STR_RTX
1573 containing BITSIZE bits, starting at BITNUM,
1574 and put it in TARGET if possible (if TARGET is nonzero).
1575 Regardless of TARGET, we return the rtx for where the value is placed.
1576
1577 STR_RTX is the structure containing the byte (a REG or MEM).
1578 UNSIGNEDP is nonzero if this is an unsigned bit field.
1579 PACKEDP is nonzero if the field has the packed attribute.
1580 MODE is the natural mode of the field value once extracted.
1581 TMODE is the mode the caller would like the value to have;
1582 but the value may be returned with type MODE instead.
1583
1584 If a TARGET is specified and we can store in it at no extra cost,
1585 we do so, and return TARGET.
1586 Otherwise, we return a REG of mode TMODE or MODE, with TMODE preferred
1587 if they are equally easy. */
1588
1589 rtx
1590 extract_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1591 unsigned HOST_WIDE_INT bitnum, int unsignedp, bool packedp,
1592 rtx target, enum machine_mode mode, enum machine_mode tmode)
1593 {
1594 return extract_bit_field_1 (str_rtx, bitsize, bitnum, unsignedp, packedp,
1595 target, mode, tmode, true);
1596 }
1597 \f
1598 /* Extract a bit field using shifts and boolean operations
1599 Returns an rtx to represent the value.
1600 OP0 addresses a register (word) or memory (byte).
1601 BITPOS says which bit within the word or byte the bit field starts in.
1602 OFFSET says how many bytes farther the bit field starts;
1603 it is 0 if OP0 is a register.
1604 BITSIZE says how many bits long the bit field is.
1605 (If OP0 is a register, it may be narrower than a full word,
1606 but BITPOS still counts within a full word,
1607 which is significant on bigendian machines.)
1608
1609 UNSIGNEDP is nonzero for an unsigned bit field (don't sign-extend value).
1610 PACKEDP is true if the field has the packed attribute.
1611
1612 If TARGET is nonzero, attempts to store the value there
1613 and return TARGET, but this is not guaranteed.
1614 If TARGET is not used, create a pseudo-reg of mode TMODE for the value. */
1615
1616 static rtx
1617 extract_fixed_bit_field (enum machine_mode tmode, rtx op0,
1618 unsigned HOST_WIDE_INT offset,
1619 unsigned HOST_WIDE_INT bitsize,
1620 unsigned HOST_WIDE_INT bitpos, rtx target,
1621 int unsignedp, bool packedp)
1622 {
1623 unsigned int total_bits = BITS_PER_WORD;
1624 enum machine_mode mode;
1625
1626 if (GET_CODE (op0) == SUBREG || REG_P (op0))
1627 {
1628 /* Special treatment for a bit field split across two registers. */
1629 if (bitsize + bitpos > BITS_PER_WORD)
1630 return extract_split_bit_field (op0, bitsize, bitpos, unsignedp);
1631 }
1632 else
1633 {
1634 /* Get the proper mode to use for this field. We want a mode that
1635 includes the entire field. If such a mode would be larger than
1636 a word, we won't be doing the extraction the normal way. */
1637
1638 if (MEM_VOLATILE_P (op0)
1639 && flag_strict_volatile_bitfields > 0)
1640 {
1641 if (GET_MODE_BITSIZE (GET_MODE (op0)) > 0)
1642 mode = GET_MODE (op0);
1643 else if (target && GET_MODE_BITSIZE (GET_MODE (target)) > 0)
1644 mode = GET_MODE (target);
1645 else
1646 mode = tmode;
1647 }
1648 else
1649 mode = get_best_mode (bitsize, bitpos + offset * BITS_PER_UNIT,
1650 MEM_ALIGN (op0), word_mode, MEM_VOLATILE_P (op0));
1651
1652 if (mode == VOIDmode)
1653 /* The only way this should occur is if the field spans word
1654 boundaries. */
1655 return extract_split_bit_field (op0, bitsize,
1656 bitpos + offset * BITS_PER_UNIT,
1657 unsignedp);
1658
1659 total_bits = GET_MODE_BITSIZE (mode);
1660
1661 /* Make sure bitpos is valid for the chosen mode. Adjust BITPOS to
1662 be in the range 0 to total_bits-1, and put any excess bytes in
1663 OFFSET. */
1664 if (bitpos >= total_bits)
1665 {
1666 offset += (bitpos / total_bits) * (total_bits / BITS_PER_UNIT);
1667 bitpos -= ((bitpos / total_bits) * (total_bits / BITS_PER_UNIT)
1668 * BITS_PER_UNIT);
1669 }
1670
1671 /* If we're accessing a volatile MEM, we can't do the next
1672 alignment step if it results in a multi-word access where we
1673 otherwise wouldn't have one. So, check for that case
1674 here. */
1675 if (MEM_P (op0)
1676 && MEM_VOLATILE_P (op0)
1677 && flag_strict_volatile_bitfields > 0
1678 && bitpos + bitsize <= total_bits
1679 && bitpos + bitsize + (offset % (total_bits / BITS_PER_UNIT)) * BITS_PER_UNIT > total_bits)
1680 {
1681 if (STRICT_ALIGNMENT)
1682 {
1683 static bool informed_about_misalignment = false;
1684 bool warned;
1685
1686 if (packedp)
1687 {
1688 if (bitsize == total_bits)
1689 warned = warning_at (input_location, OPT_fstrict_volatile_bitfields,
1690 "multiple accesses to volatile structure member"
1691 " because of packed attribute");
1692 else
1693 warned = warning_at (input_location, OPT_fstrict_volatile_bitfields,
1694 "multiple accesses to volatile structure bitfield"
1695 " because of packed attribute");
1696
1697 return extract_split_bit_field (op0, bitsize,
1698 bitpos + offset * BITS_PER_UNIT,
1699 unsignedp);
1700 }
1701
1702 if (bitsize == total_bits)
1703 warned = warning_at (input_location, OPT_fstrict_volatile_bitfields,
1704 "mis-aligned access used for structure member");
1705 else
1706 warned = warning_at (input_location, OPT_fstrict_volatile_bitfields,
1707 "mis-aligned access used for structure bitfield");
1708
1709 if (! informed_about_misalignment && warned)
1710 {
1711 informed_about_misalignment = true;
1712 inform (input_location,
1713 "when a volatile object spans multiple type-sized locations,"
1714 " the compiler must choose between using a single mis-aligned access to"
1715 " preserve the volatility, or using multiple aligned accesses to avoid"
1716 " runtime faults; this code may fail at runtime if the hardware does"
1717 " not allow this access");
1718 }
1719 }
1720 }
1721 else
1722 {
1723
1724 /* Get ref to an aligned byte, halfword, or word containing the field.
1725 Adjust BITPOS to be position within a word,
1726 and OFFSET to be the offset of that word.
1727 Then alter OP0 to refer to that word. */
1728 bitpos += (offset % (total_bits / BITS_PER_UNIT)) * BITS_PER_UNIT;
1729 offset -= (offset % (total_bits / BITS_PER_UNIT));
1730 }
1731
1732 op0 = adjust_address (op0, mode, offset);
1733 }
1734
1735 mode = GET_MODE (op0);
1736
1737 if (BYTES_BIG_ENDIAN)
1738 /* BITPOS is the distance between our msb and that of OP0.
1739 Convert it to the distance from the lsb. */
1740 bitpos = total_bits - bitsize - bitpos;
1741
1742 /* Now BITPOS is always the distance between the field's lsb and that of OP0.
1743 We have reduced the big-endian case to the little-endian case. */
1744
1745 if (unsignedp)
1746 {
1747 if (bitpos)
1748 {
1749 /* If the field does not already start at the lsb,
1750 shift it so it does. */
1751 /* Maybe propagate the target for the shift. */
1752 /* But not if we will return it--could confuse integrate.c. */
1753 rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
1754 if (tmode != mode) subtarget = 0;
1755 op0 = expand_shift (RSHIFT_EXPR, mode, op0, bitpos, subtarget, 1);
1756 }
1757 /* Convert the value to the desired mode. */
1758 if (mode != tmode)
1759 op0 = convert_to_mode (tmode, op0, 1);
1760
1761 /* Unless the msb of the field used to be the msb when we shifted,
1762 mask out the upper bits. */
1763
1764 if (GET_MODE_BITSIZE (mode) != bitpos + bitsize)
1765 return expand_binop (GET_MODE (op0), and_optab, op0,
1766 mask_rtx (GET_MODE (op0), 0, bitsize, 0),
1767 target, 1, OPTAB_LIB_WIDEN);
1768 return op0;
1769 }
1770
1771 /* To extract a signed bit-field, first shift its msb to the msb of the word,
1772 then arithmetic-shift its lsb to the lsb of the word. */
1773 op0 = force_reg (mode, op0);
1774 if (mode != tmode)
1775 target = 0;
1776
1777 /* Find the narrowest integer mode that contains the field. */
1778
1779 for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT); mode != VOIDmode;
1780 mode = GET_MODE_WIDER_MODE (mode))
1781 if (GET_MODE_BITSIZE (mode) >= bitsize + bitpos)
1782 {
1783 op0 = convert_to_mode (mode, op0, 0);
1784 break;
1785 }
1786
1787 if (GET_MODE_BITSIZE (mode) != (bitsize + bitpos))
1788 {
1789 int amount = GET_MODE_BITSIZE (mode) - (bitsize + bitpos);
1790 /* Maybe propagate the target for the shift. */
1791 rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
1792 op0 = expand_shift (LSHIFT_EXPR, mode, op0, amount, subtarget, 1);
1793 }
1794
1795 return expand_shift (RSHIFT_EXPR, mode, op0,
1796 GET_MODE_BITSIZE (mode) - bitsize, target, 0);
1797 }
1798 \f
1799 /* Return a constant integer (CONST_INT or CONST_DOUBLE) mask value
1800 of mode MODE with BITSIZE ones followed by BITPOS zeros, or the
1801 complement of that if COMPLEMENT. The mask is truncated if
1802 necessary to the width of mode MODE. The mask is zero-extended if
1803 BITSIZE+BITPOS is too small for MODE. */
1804
1805 static rtx
1806 mask_rtx (enum machine_mode mode, int bitpos, int bitsize, int complement)
1807 {
1808 double_int mask;
1809
1810 mask = double_int_mask (bitsize);
1811 mask = double_int_lshift (mask, bitpos, HOST_BITS_PER_DOUBLE_INT, false);
1812
1813 if (complement)
1814 mask = double_int_not (mask);
1815
1816 return immed_double_int_const (mask, mode);
1817 }
1818
1819 /* Return a constant integer (CONST_INT or CONST_DOUBLE) rtx with the value
1820 VALUE truncated to BITSIZE bits and then shifted left BITPOS bits. */
1821
1822 static rtx
1823 lshift_value (enum machine_mode mode, rtx value, int bitpos, int bitsize)
1824 {
1825 double_int val;
1826
1827 val = double_int_zext (uhwi_to_double_int (INTVAL (value)), bitsize);
1828 val = double_int_lshift (val, bitpos, HOST_BITS_PER_DOUBLE_INT, false);
1829
1830 return immed_double_int_const (val, mode);
1831 }
1832 \f
1833 /* Extract a bit field that is split across two words
1834 and return an RTX for the result.
1835
1836 OP0 is the REG, SUBREG or MEM rtx for the first of the two words.
1837 BITSIZE is the field width; BITPOS, position of its first bit, in the word.
1838 UNSIGNEDP is 1 if should zero-extend the contents; else sign-extend. */
1839
1840 static rtx
1841 extract_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1842 unsigned HOST_WIDE_INT bitpos, int unsignedp)
1843 {
1844 unsigned int unit;
1845 unsigned int bitsdone = 0;
1846 rtx result = NULL_RTX;
1847 int first = 1;
1848
1849 /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1850 much at a time. */
1851 if (REG_P (op0) || GET_CODE (op0) == SUBREG)
1852 unit = BITS_PER_WORD;
1853 else
1854 unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1855
1856 while (bitsdone < bitsize)
1857 {
1858 unsigned HOST_WIDE_INT thissize;
1859 rtx part, word;
1860 unsigned HOST_WIDE_INT thispos;
1861 unsigned HOST_WIDE_INT offset;
1862
1863 offset = (bitpos + bitsdone) / unit;
1864 thispos = (bitpos + bitsdone) % unit;
1865
1866 /* THISSIZE must not overrun a word boundary. Otherwise,
1867 extract_fixed_bit_field will call us again, and we will mutually
1868 recurse forever. */
1869 thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1870 thissize = MIN (thissize, unit - thispos);
1871
1872 /* If OP0 is a register, then handle OFFSET here.
1873
1874 When handling multiword bitfields, extract_bit_field may pass
1875 down a word_mode SUBREG of a larger REG for a bitfield that actually
1876 crosses a word boundary. Thus, for a SUBREG, we must find
1877 the current word starting from the base register. */
1878 if (GET_CODE (op0) == SUBREG)
1879 {
1880 int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset;
1881 word = operand_subword_force (SUBREG_REG (op0), word_offset,
1882 GET_MODE (SUBREG_REG (op0)));
1883 offset = 0;
1884 }
1885 else if (REG_P (op0))
1886 {
1887 word = operand_subword_force (op0, offset, GET_MODE (op0));
1888 offset = 0;
1889 }
1890 else
1891 word = op0;
1892
1893 /* Extract the parts in bit-counting order,
1894 whose meaning is determined by BYTES_PER_UNIT.
1895 OFFSET is in UNITs, and UNIT is in bits.
1896 extract_fixed_bit_field wants offset in bytes. */
1897 part = extract_fixed_bit_field (word_mode, word,
1898 offset * unit / BITS_PER_UNIT,
1899 thissize, thispos, 0, 1, false);
1900 bitsdone += thissize;
1901
1902 /* Shift this part into place for the result. */
1903 if (BYTES_BIG_ENDIAN)
1904 {
1905 if (bitsize != bitsdone)
1906 part = expand_shift (LSHIFT_EXPR, word_mode, part,
1907 bitsize - bitsdone, 0, 1);
1908 }
1909 else
1910 {
1911 if (bitsdone != thissize)
1912 part = expand_shift (LSHIFT_EXPR, word_mode, part,
1913 bitsdone - thissize, 0, 1);
1914 }
1915
1916 if (first)
1917 result = part;
1918 else
1919 /* Combine the parts with bitwise or. This works
1920 because we extracted each part as an unsigned bit field. */
1921 result = expand_binop (word_mode, ior_optab, part, result, NULL_RTX, 1,
1922 OPTAB_LIB_WIDEN);
1923
1924 first = 0;
1925 }
1926
1927 /* Unsigned bit field: we are done. */
1928 if (unsignedp)
1929 return result;
1930 /* Signed bit field: sign-extend with two arithmetic shifts. */
1931 result = expand_shift (LSHIFT_EXPR, word_mode, result,
1932 BITS_PER_WORD - bitsize, NULL_RTX, 0);
1933 return expand_shift (RSHIFT_EXPR, word_mode, result,
1934 BITS_PER_WORD - bitsize, NULL_RTX, 0);
1935 }
1936 \f
1937 /* Try to read the low bits of SRC as an rvalue of mode MODE, preserving
1938 the bit pattern. SRC_MODE is the mode of SRC; if this is smaller than
1939 MODE, fill the upper bits with zeros. Fail if the layout of either
1940 mode is unknown (as for CC modes) or if the extraction would involve
1941 unprofitable mode punning. Return the value on success, otherwise
1942 return null.
1943
1944 This is different from gen_lowpart* in these respects:
1945
1946 - the returned value must always be considered an rvalue
1947
1948 - when MODE is wider than SRC_MODE, the extraction involves
1949 a zero extension
1950
1951 - when MODE is smaller than SRC_MODE, the extraction involves
1952 a truncation (and is thus subject to TRULY_NOOP_TRUNCATION).
1953
1954 In other words, this routine performs a computation, whereas the
1955 gen_lowpart* routines are conceptually lvalue or rvalue subreg
1956 operations. */
1957
1958 rtx
1959 extract_low_bits (enum machine_mode mode, enum machine_mode src_mode, rtx src)
1960 {
1961 enum machine_mode int_mode, src_int_mode;
1962
1963 if (mode == src_mode)
1964 return src;
1965
1966 if (CONSTANT_P (src))
1967 {
1968 /* simplify_gen_subreg can't be used here, as if simplify_subreg
1969 fails, it will happily create (subreg (symbol_ref)) or similar
1970 invalid SUBREGs. */
1971 unsigned int byte = subreg_lowpart_offset (mode, src_mode);
1972 rtx ret = simplify_subreg (mode, src, src_mode, byte);
1973 if (ret)
1974 return ret;
1975
1976 if (GET_MODE (src) == VOIDmode
1977 || !validate_subreg (mode, src_mode, src, byte))
1978 return NULL_RTX;
1979
1980 src = force_reg (GET_MODE (src), src);
1981 return gen_rtx_SUBREG (mode, src, byte);
1982 }
1983
1984 if (GET_MODE_CLASS (mode) == MODE_CC || GET_MODE_CLASS (src_mode) == MODE_CC)
1985 return NULL_RTX;
1986
1987 if (GET_MODE_BITSIZE (mode) == GET_MODE_BITSIZE (src_mode)
1988 && MODES_TIEABLE_P (mode, src_mode))
1989 {
1990 rtx x = gen_lowpart_common (mode, src);
1991 if (x)
1992 return x;
1993 }
1994
1995 src_int_mode = int_mode_for_mode (src_mode);
1996 int_mode = int_mode_for_mode (mode);
1997 if (src_int_mode == BLKmode || int_mode == BLKmode)
1998 return NULL_RTX;
1999
2000 if (!MODES_TIEABLE_P (src_int_mode, src_mode))
2001 return NULL_RTX;
2002 if (!MODES_TIEABLE_P (int_mode, mode))
2003 return NULL_RTX;
2004
2005 src = gen_lowpart (src_int_mode, src);
2006 src = convert_modes (int_mode, src_int_mode, src, true);
2007 src = gen_lowpart (mode, src);
2008 return src;
2009 }
2010 \f
2011 /* Add INC into TARGET. */
2012
2013 void
2014 expand_inc (rtx target, rtx inc)
2015 {
2016 rtx value = expand_binop (GET_MODE (target), add_optab,
2017 target, inc,
2018 target, 0, OPTAB_LIB_WIDEN);
2019 if (value != target)
2020 emit_move_insn (target, value);
2021 }
2022
2023 /* Subtract DEC from TARGET. */
2024
2025 void
2026 expand_dec (rtx target, rtx dec)
2027 {
2028 rtx value = expand_binop (GET_MODE (target), sub_optab,
2029 target, dec,
2030 target, 0, OPTAB_LIB_WIDEN);
2031 if (value != target)
2032 emit_move_insn (target, value);
2033 }
2034 \f
2035 /* Output a shift instruction for expression code CODE,
2036 with SHIFTED being the rtx for the value to shift,
2037 and AMOUNT the tree for the amount to shift by.
2038 Store the result in the rtx TARGET, if that is convenient.
2039 If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2040 Return the rtx for where the value is. */
2041
2042 rtx
2043 expand_variable_shift (enum tree_code code, enum machine_mode mode, rtx shifted,
2044 tree amount, rtx target, int unsignedp)
2045 {
2046 rtx op1, temp = 0;
2047 int left = (code == LSHIFT_EXPR || code == LROTATE_EXPR);
2048 int rotate = (code == LROTATE_EXPR || code == RROTATE_EXPR);
2049 optab lshift_optab = ashl_optab;
2050 optab rshift_arith_optab = ashr_optab;
2051 optab rshift_uns_optab = lshr_optab;
2052 optab lrotate_optab = rotl_optab;
2053 optab rrotate_optab = rotr_optab;
2054 enum machine_mode op1_mode;
2055 int attempt;
2056 bool speed = optimize_insn_for_speed_p ();
2057
2058 op1 = expand_normal (amount);
2059 op1_mode = GET_MODE (op1);
2060
2061 /* Determine whether the shift/rotate amount is a vector, or scalar. If the
2062 shift amount is a vector, use the vector/vector shift patterns. */
2063 if (VECTOR_MODE_P (mode) && VECTOR_MODE_P (op1_mode))
2064 {
2065 lshift_optab = vashl_optab;
2066 rshift_arith_optab = vashr_optab;
2067 rshift_uns_optab = vlshr_optab;
2068 lrotate_optab = vrotl_optab;
2069 rrotate_optab = vrotr_optab;
2070 }
2071
2072 /* Previously detected shift-counts computed by NEGATE_EXPR
2073 and shifted in the other direction; but that does not work
2074 on all machines. */
2075
2076 if (SHIFT_COUNT_TRUNCATED)
2077 {
2078 if (CONST_INT_P (op1)
2079 && ((unsigned HOST_WIDE_INT) INTVAL (op1) >=
2080 (unsigned HOST_WIDE_INT) GET_MODE_BITSIZE (mode)))
2081 op1 = GEN_INT ((unsigned HOST_WIDE_INT) INTVAL (op1)
2082 % GET_MODE_BITSIZE (mode));
2083 else if (GET_CODE (op1) == SUBREG
2084 && subreg_lowpart_p (op1)
2085 && INTEGRAL_MODE_P (GET_MODE (SUBREG_REG (op1))))
2086 op1 = SUBREG_REG (op1);
2087 }
2088
2089 if (op1 == const0_rtx)
2090 return shifted;
2091
2092 /* Check whether its cheaper to implement a left shift by a constant
2093 bit count by a sequence of additions. */
2094 if (code == LSHIFT_EXPR
2095 && CONST_INT_P (op1)
2096 && INTVAL (op1) > 0
2097 && INTVAL (op1) < GET_MODE_BITSIZE (mode)
2098 && INTVAL (op1) < MAX_BITS_PER_WORD
2099 && shift_cost[speed][mode][INTVAL (op1)] > INTVAL (op1) * add_cost[speed][mode]
2100 && shift_cost[speed][mode][INTVAL (op1)] != MAX_COST)
2101 {
2102 int i;
2103 for (i = 0; i < INTVAL (op1); i++)
2104 {
2105 temp = force_reg (mode, shifted);
2106 shifted = expand_binop (mode, add_optab, temp, temp, NULL_RTX,
2107 unsignedp, OPTAB_LIB_WIDEN);
2108 }
2109 return shifted;
2110 }
2111
2112 for (attempt = 0; temp == 0 && attempt < 3; attempt++)
2113 {
2114 enum optab_methods methods;
2115
2116 if (attempt == 0)
2117 methods = OPTAB_DIRECT;
2118 else if (attempt == 1)
2119 methods = OPTAB_WIDEN;
2120 else
2121 methods = OPTAB_LIB_WIDEN;
2122
2123 if (rotate)
2124 {
2125 /* Widening does not work for rotation. */
2126 if (methods == OPTAB_WIDEN)
2127 continue;
2128 else if (methods == OPTAB_LIB_WIDEN)
2129 {
2130 /* If we have been unable to open-code this by a rotation,
2131 do it as the IOR of two shifts. I.e., to rotate A
2132 by N bits, compute (A << N) | ((unsigned) A >> (C - N))
2133 where C is the bitsize of A.
2134
2135 It is theoretically possible that the target machine might
2136 not be able to perform either shift and hence we would
2137 be making two libcalls rather than just the one for the
2138 shift (similarly if IOR could not be done). We will allow
2139 this extremely unlikely lossage to avoid complicating the
2140 code below. */
2141
2142 rtx subtarget = target == shifted ? 0 : target;
2143 tree new_amount, other_amount;
2144 rtx temp1;
2145 tree type = TREE_TYPE (amount);
2146 if (GET_MODE (op1) != TYPE_MODE (type)
2147 && GET_MODE (op1) != VOIDmode)
2148 op1 = convert_to_mode (TYPE_MODE (type), op1, 1);
2149 new_amount = make_tree (type, op1);
2150 other_amount
2151 = fold_build2 (MINUS_EXPR, type,
2152 build_int_cst (type, GET_MODE_BITSIZE (mode)),
2153 new_amount);
2154
2155 shifted = force_reg (mode, shifted);
2156
2157 temp = expand_variable_shift (left ? LSHIFT_EXPR : RSHIFT_EXPR,
2158 mode, shifted, new_amount, 0, 1);
2159 temp1 = expand_variable_shift (left ? RSHIFT_EXPR : LSHIFT_EXPR,
2160 mode, shifted, other_amount,
2161 subtarget, 1);
2162 return expand_binop (mode, ior_optab, temp, temp1, target,
2163 unsignedp, methods);
2164 }
2165
2166 temp = expand_binop (mode,
2167 left ? lrotate_optab : rrotate_optab,
2168 shifted, op1, target, unsignedp, methods);
2169 }
2170 else if (unsignedp)
2171 temp = expand_binop (mode,
2172 left ? lshift_optab : rshift_uns_optab,
2173 shifted, op1, target, unsignedp, methods);
2174
2175 /* Do arithmetic shifts.
2176 Also, if we are going to widen the operand, we can just as well
2177 use an arithmetic right-shift instead of a logical one. */
2178 if (temp == 0 && ! rotate
2179 && (! unsignedp || (! left && methods == OPTAB_WIDEN)))
2180 {
2181 enum optab_methods methods1 = methods;
2182
2183 /* If trying to widen a log shift to an arithmetic shift,
2184 don't accept an arithmetic shift of the same size. */
2185 if (unsignedp)
2186 methods1 = OPTAB_MUST_WIDEN;
2187
2188 /* Arithmetic shift */
2189
2190 temp = expand_binop (mode,
2191 left ? lshift_optab : rshift_arith_optab,
2192 shifted, op1, target, unsignedp, methods1);
2193 }
2194
2195 /* We used to try extzv here for logical right shifts, but that was
2196 only useful for one machine, the VAX, and caused poor code
2197 generation there for lshrdi3, so the code was deleted and a
2198 define_expand for lshrsi3 was added to vax.md. */
2199 }
2200
2201 gcc_assert (temp);
2202 return temp;
2203 }
2204
2205 /* Output a shift instruction for expression code CODE,
2206 with SHIFTED being the rtx for the value to shift,
2207 and AMOUNT the amount to shift by.
2208 Store the result in the rtx TARGET, if that is convenient.
2209 If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2210 Return the rtx for where the value is. */
2211
2212 rtx
2213 expand_shift (enum tree_code code, enum machine_mode mode, rtx shifted,
2214 int amount, rtx target, int unsignedp)
2215 {
2216 /* ??? With re-writing expand_shift we could avoid going through a
2217 tree for the shift amount and directly do GEN_INT (amount). */
2218 return expand_variable_shift (code, mode, shifted,
2219 build_int_cst (integer_type_node, amount),
2220 target, unsignedp);
2221 }
2222 \f
2223 /* Indicates the type of fixup needed after a constant multiplication.
2224 BASIC_VARIANT means no fixup is needed, NEGATE_VARIANT means that
2225 the result should be negated, and ADD_VARIANT means that the
2226 multiplicand should be added to the result. */
2227 enum mult_variant {basic_variant, negate_variant, add_variant};
2228
2229 static void synth_mult (struct algorithm *, unsigned HOST_WIDE_INT,
2230 const struct mult_cost *, enum machine_mode mode);
2231 static bool choose_mult_variant (enum machine_mode, HOST_WIDE_INT,
2232 struct algorithm *, enum mult_variant *, int);
2233 static rtx expand_mult_const (enum machine_mode, rtx, HOST_WIDE_INT, rtx,
2234 const struct algorithm *, enum mult_variant);
2235 static unsigned HOST_WIDE_INT choose_multiplier (unsigned HOST_WIDE_INT, int,
2236 int, rtx *, int *, int *);
2237 static unsigned HOST_WIDE_INT invert_mod2n (unsigned HOST_WIDE_INT, int);
2238 static rtx extract_high_half (enum machine_mode, rtx);
2239 static rtx expand_mult_highpart (enum machine_mode, rtx, rtx, rtx, int, int);
2240 static rtx expand_mult_highpart_optab (enum machine_mode, rtx, rtx, rtx,
2241 int, int);
2242 /* Compute and return the best algorithm for multiplying by T.
2243 The algorithm must cost less than cost_limit
2244 If retval.cost >= COST_LIMIT, no algorithm was found and all
2245 other field of the returned struct are undefined.
2246 MODE is the machine mode of the multiplication. */
2247
2248 static void
2249 synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
2250 const struct mult_cost *cost_limit, enum machine_mode mode)
2251 {
2252 int m;
2253 struct algorithm *alg_in, *best_alg;
2254 struct mult_cost best_cost;
2255 struct mult_cost new_limit;
2256 int op_cost, op_latency;
2257 unsigned HOST_WIDE_INT orig_t = t;
2258 unsigned HOST_WIDE_INT q;
2259 int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode));
2260 int hash_index;
2261 bool cache_hit = false;
2262 enum alg_code cache_alg = alg_zero;
2263 bool speed = optimize_insn_for_speed_p ();
2264
2265 /* Indicate that no algorithm is yet found. If no algorithm
2266 is found, this value will be returned and indicate failure. */
2267 alg_out->cost.cost = cost_limit->cost + 1;
2268 alg_out->cost.latency = cost_limit->latency + 1;
2269
2270 if (cost_limit->cost < 0
2271 || (cost_limit->cost == 0 && cost_limit->latency <= 0))
2272 return;
2273
2274 /* Restrict the bits of "t" to the multiplication's mode. */
2275 t &= GET_MODE_MASK (mode);
2276
2277 /* t == 1 can be done in zero cost. */
2278 if (t == 1)
2279 {
2280 alg_out->ops = 1;
2281 alg_out->cost.cost = 0;
2282 alg_out->cost.latency = 0;
2283 alg_out->op[0] = alg_m;
2284 return;
2285 }
2286
2287 /* t == 0 sometimes has a cost. If it does and it exceeds our limit,
2288 fail now. */
2289 if (t == 0)
2290 {
2291 if (MULT_COST_LESS (cost_limit, zero_cost[speed]))
2292 return;
2293 else
2294 {
2295 alg_out->ops = 1;
2296 alg_out->cost.cost = zero_cost[speed];
2297 alg_out->cost.latency = zero_cost[speed];
2298 alg_out->op[0] = alg_zero;
2299 return;
2300 }
2301 }
2302
2303 /* We'll be needing a couple extra algorithm structures now. */
2304
2305 alg_in = XALLOCA (struct algorithm);
2306 best_alg = XALLOCA (struct algorithm);
2307 best_cost = *cost_limit;
2308
2309 /* Compute the hash index. */
2310 hash_index = (t ^ (unsigned int) mode ^ (speed * 256)) % NUM_ALG_HASH_ENTRIES;
2311
2312 /* See if we already know what to do for T. */
2313 if (alg_hash[hash_index].t == t
2314 && alg_hash[hash_index].mode == mode
2315 && alg_hash[hash_index].mode == mode
2316 && alg_hash[hash_index].speed == speed
2317 && alg_hash[hash_index].alg != alg_unknown)
2318 {
2319 cache_alg = alg_hash[hash_index].alg;
2320
2321 if (cache_alg == alg_impossible)
2322 {
2323 /* The cache tells us that it's impossible to synthesize
2324 multiplication by T within alg_hash[hash_index].cost. */
2325 if (!CHEAPER_MULT_COST (&alg_hash[hash_index].cost, cost_limit))
2326 /* COST_LIMIT is at least as restrictive as the one
2327 recorded in the hash table, in which case we have no
2328 hope of synthesizing a multiplication. Just
2329 return. */
2330 return;
2331
2332 /* If we get here, COST_LIMIT is less restrictive than the
2333 one recorded in the hash table, so we may be able to
2334 synthesize a multiplication. Proceed as if we didn't
2335 have the cache entry. */
2336 }
2337 else
2338 {
2339 if (CHEAPER_MULT_COST (cost_limit, &alg_hash[hash_index].cost))
2340 /* The cached algorithm shows that this multiplication
2341 requires more cost than COST_LIMIT. Just return. This
2342 way, we don't clobber this cache entry with
2343 alg_impossible but retain useful information. */
2344 return;
2345
2346 cache_hit = true;
2347
2348 switch (cache_alg)
2349 {
2350 case alg_shift:
2351 goto do_alg_shift;
2352
2353 case alg_add_t_m2:
2354 case alg_sub_t_m2:
2355 goto do_alg_addsub_t_m2;
2356
2357 case alg_add_factor:
2358 case alg_sub_factor:
2359 goto do_alg_addsub_factor;
2360
2361 case alg_add_t2_m:
2362 goto do_alg_add_t2_m;
2363
2364 case alg_sub_t2_m:
2365 goto do_alg_sub_t2_m;
2366
2367 default:
2368 gcc_unreachable ();
2369 }
2370 }
2371 }
2372
2373 /* If we have a group of zero bits at the low-order part of T, try
2374 multiplying by the remaining bits and then doing a shift. */
2375
2376 if ((t & 1) == 0)
2377 {
2378 do_alg_shift:
2379 m = floor_log2 (t & -t); /* m = number of low zero bits */
2380 if (m < maxm)
2381 {
2382 q = t >> m;
2383 /* The function expand_shift will choose between a shift and
2384 a sequence of additions, so the observed cost is given as
2385 MIN (m * add_cost[speed][mode], shift_cost[speed][mode][m]). */
2386 op_cost = m * add_cost[speed][mode];
2387 if (shift_cost[speed][mode][m] < op_cost)
2388 op_cost = shift_cost[speed][mode][m];
2389 new_limit.cost = best_cost.cost - op_cost;
2390 new_limit.latency = best_cost.latency - op_cost;
2391 synth_mult (alg_in, q, &new_limit, mode);
2392
2393 alg_in->cost.cost += op_cost;
2394 alg_in->cost.latency += op_cost;
2395 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2396 {
2397 struct algorithm *x;
2398 best_cost = alg_in->cost;
2399 x = alg_in, alg_in = best_alg, best_alg = x;
2400 best_alg->log[best_alg->ops] = m;
2401 best_alg->op[best_alg->ops] = alg_shift;
2402 }
2403
2404 /* See if treating ORIG_T as a signed number yields a better
2405 sequence. Try this sequence only for a negative ORIG_T
2406 as it would be useless for a non-negative ORIG_T. */
2407 if ((HOST_WIDE_INT) orig_t < 0)
2408 {
2409 /* Shift ORIG_T as follows because a right shift of a
2410 negative-valued signed type is implementation
2411 defined. */
2412 q = ~(~orig_t >> m);
2413 /* The function expand_shift will choose between a shift
2414 and a sequence of additions, so the observed cost is
2415 given as MIN (m * add_cost[speed][mode],
2416 shift_cost[speed][mode][m]). */
2417 op_cost = m * add_cost[speed][mode];
2418 if (shift_cost[speed][mode][m] < op_cost)
2419 op_cost = shift_cost[speed][mode][m];
2420 new_limit.cost = best_cost.cost - op_cost;
2421 new_limit.latency = best_cost.latency - op_cost;
2422 synth_mult (alg_in, q, &new_limit, mode);
2423
2424 alg_in->cost.cost += op_cost;
2425 alg_in->cost.latency += op_cost;
2426 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2427 {
2428 struct algorithm *x;
2429 best_cost = alg_in->cost;
2430 x = alg_in, alg_in = best_alg, best_alg = x;
2431 best_alg->log[best_alg->ops] = m;
2432 best_alg->op[best_alg->ops] = alg_shift;
2433 }
2434 }
2435 }
2436 if (cache_hit)
2437 goto done;
2438 }
2439
2440 /* If we have an odd number, add or subtract one. */
2441 if ((t & 1) != 0)
2442 {
2443 unsigned HOST_WIDE_INT w;
2444
2445 do_alg_addsub_t_m2:
2446 for (w = 1; (w & t) != 0; w <<= 1)
2447 ;
2448 /* If T was -1, then W will be zero after the loop. This is another
2449 case where T ends with ...111. Handling this with (T + 1) and
2450 subtract 1 produces slightly better code and results in algorithm
2451 selection much faster than treating it like the ...0111 case
2452 below. */
2453 if (w == 0
2454 || (w > 2
2455 /* Reject the case where t is 3.
2456 Thus we prefer addition in that case. */
2457 && t != 3))
2458 {
2459 /* T ends with ...111. Multiply by (T + 1) and subtract 1. */
2460
2461 op_cost = add_cost[speed][mode];
2462 new_limit.cost = best_cost.cost - op_cost;
2463 new_limit.latency = best_cost.latency - op_cost;
2464 synth_mult (alg_in, t + 1, &new_limit, mode);
2465
2466 alg_in->cost.cost += op_cost;
2467 alg_in->cost.latency += op_cost;
2468 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2469 {
2470 struct algorithm *x;
2471 best_cost = alg_in->cost;
2472 x = alg_in, alg_in = best_alg, best_alg = x;
2473 best_alg->log[best_alg->ops] = 0;
2474 best_alg->op[best_alg->ops] = alg_sub_t_m2;
2475 }
2476 }
2477 else
2478 {
2479 /* T ends with ...01 or ...011. Multiply by (T - 1) and add 1. */
2480
2481 op_cost = add_cost[speed][mode];
2482 new_limit.cost = best_cost.cost - op_cost;
2483 new_limit.latency = best_cost.latency - op_cost;
2484 synth_mult (alg_in, t - 1, &new_limit, mode);
2485
2486 alg_in->cost.cost += op_cost;
2487 alg_in->cost.latency += op_cost;
2488 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2489 {
2490 struct algorithm *x;
2491 best_cost = alg_in->cost;
2492 x = alg_in, alg_in = best_alg, best_alg = x;
2493 best_alg->log[best_alg->ops] = 0;
2494 best_alg->op[best_alg->ops] = alg_add_t_m2;
2495 }
2496 }
2497
2498 /* We may be able to calculate a * -7, a * -15, a * -31, etc
2499 quickly with a - a * n for some appropriate constant n. */
2500 m = exact_log2 (-orig_t + 1);
2501 if (m >= 0 && m < maxm)
2502 {
2503 op_cost = shiftsub1_cost[speed][mode][m];
2504 new_limit.cost = best_cost.cost - op_cost;
2505 new_limit.latency = best_cost.latency - op_cost;
2506 synth_mult (alg_in, (unsigned HOST_WIDE_INT) (-orig_t + 1) >> m, &new_limit, mode);
2507
2508 alg_in->cost.cost += op_cost;
2509 alg_in->cost.latency += op_cost;
2510 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2511 {
2512 struct algorithm *x;
2513 best_cost = alg_in->cost;
2514 x = alg_in, alg_in = best_alg, best_alg = x;
2515 best_alg->log[best_alg->ops] = m;
2516 best_alg->op[best_alg->ops] = alg_sub_t_m2;
2517 }
2518 }
2519
2520 if (cache_hit)
2521 goto done;
2522 }
2523
2524 /* Look for factors of t of the form
2525 t = q(2**m +- 1), 2 <= m <= floor(log2(t - 1)).
2526 If we find such a factor, we can multiply by t using an algorithm that
2527 multiplies by q, shift the result by m and add/subtract it to itself.
2528
2529 We search for large factors first and loop down, even if large factors
2530 are less probable than small; if we find a large factor we will find a
2531 good sequence quickly, and therefore be able to prune (by decreasing
2532 COST_LIMIT) the search. */
2533
2534 do_alg_addsub_factor:
2535 for (m = floor_log2 (t - 1); m >= 2; m--)
2536 {
2537 unsigned HOST_WIDE_INT d;
2538
2539 d = ((unsigned HOST_WIDE_INT) 1 << m) + 1;
2540 if (t % d == 0 && t > d && m < maxm
2541 && (!cache_hit || cache_alg == alg_add_factor))
2542 {
2543 /* If the target has a cheap shift-and-add instruction use
2544 that in preference to a shift insn followed by an add insn.
2545 Assume that the shift-and-add is "atomic" with a latency
2546 equal to its cost, otherwise assume that on superscalar
2547 hardware the shift may be executed concurrently with the
2548 earlier steps in the algorithm. */
2549 op_cost = add_cost[speed][mode] + shift_cost[speed][mode][m];
2550 if (shiftadd_cost[speed][mode][m] < op_cost)
2551 {
2552 op_cost = shiftadd_cost[speed][mode][m];
2553 op_latency = op_cost;
2554 }
2555 else
2556 op_latency = add_cost[speed][mode];
2557
2558 new_limit.cost = best_cost.cost - op_cost;
2559 new_limit.latency = best_cost.latency - op_latency;
2560 synth_mult (alg_in, t / d, &new_limit, mode);
2561
2562 alg_in->cost.cost += op_cost;
2563 alg_in->cost.latency += op_latency;
2564 if (alg_in->cost.latency < op_cost)
2565 alg_in->cost.latency = op_cost;
2566 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2567 {
2568 struct algorithm *x;
2569 best_cost = alg_in->cost;
2570 x = alg_in, alg_in = best_alg, best_alg = x;
2571 best_alg->log[best_alg->ops] = m;
2572 best_alg->op[best_alg->ops] = alg_add_factor;
2573 }
2574 /* Other factors will have been taken care of in the recursion. */
2575 break;
2576 }
2577
2578 d = ((unsigned HOST_WIDE_INT) 1 << m) - 1;
2579 if (t % d == 0 && t > d && m < maxm
2580 && (!cache_hit || cache_alg == alg_sub_factor))
2581 {
2582 /* If the target has a cheap shift-and-subtract insn use
2583 that in preference to a shift insn followed by a sub insn.
2584 Assume that the shift-and-sub is "atomic" with a latency
2585 equal to it's cost, otherwise assume that on superscalar
2586 hardware the shift may be executed concurrently with the
2587 earlier steps in the algorithm. */
2588 op_cost = add_cost[speed][mode] + shift_cost[speed][mode][m];
2589 if (shiftsub0_cost[speed][mode][m] < op_cost)
2590 {
2591 op_cost = shiftsub0_cost[speed][mode][m];
2592 op_latency = op_cost;
2593 }
2594 else
2595 op_latency = add_cost[speed][mode];
2596
2597 new_limit.cost = best_cost.cost - op_cost;
2598 new_limit.latency = best_cost.latency - op_latency;
2599 synth_mult (alg_in, t / d, &new_limit, mode);
2600
2601 alg_in->cost.cost += op_cost;
2602 alg_in->cost.latency += op_latency;
2603 if (alg_in->cost.latency < op_cost)
2604 alg_in->cost.latency = op_cost;
2605 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2606 {
2607 struct algorithm *x;
2608 best_cost = alg_in->cost;
2609 x = alg_in, alg_in = best_alg, best_alg = x;
2610 best_alg->log[best_alg->ops] = m;
2611 best_alg->op[best_alg->ops] = alg_sub_factor;
2612 }
2613 break;
2614 }
2615 }
2616 if (cache_hit)
2617 goto done;
2618
2619 /* Try shift-and-add (load effective address) instructions,
2620 i.e. do a*3, a*5, a*9. */
2621 if ((t & 1) != 0)
2622 {
2623 do_alg_add_t2_m:
2624 q = t - 1;
2625 q = q & -q;
2626 m = exact_log2 (q);
2627 if (m >= 0 && m < maxm)
2628 {
2629 op_cost = shiftadd_cost[speed][mode][m];
2630 new_limit.cost = best_cost.cost - op_cost;
2631 new_limit.latency = best_cost.latency - op_cost;
2632 synth_mult (alg_in, (t - 1) >> m, &new_limit, mode);
2633
2634 alg_in->cost.cost += op_cost;
2635 alg_in->cost.latency += op_cost;
2636 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2637 {
2638 struct algorithm *x;
2639 best_cost = alg_in->cost;
2640 x = alg_in, alg_in = best_alg, best_alg = x;
2641 best_alg->log[best_alg->ops] = m;
2642 best_alg->op[best_alg->ops] = alg_add_t2_m;
2643 }
2644 }
2645 if (cache_hit)
2646 goto done;
2647
2648 do_alg_sub_t2_m:
2649 q = t + 1;
2650 q = q & -q;
2651 m = exact_log2 (q);
2652 if (m >= 0 && m < maxm)
2653 {
2654 op_cost = shiftsub0_cost[speed][mode][m];
2655 new_limit.cost = best_cost.cost - op_cost;
2656 new_limit.latency = best_cost.latency - op_cost;
2657 synth_mult (alg_in, (t + 1) >> m, &new_limit, mode);
2658
2659 alg_in->cost.cost += op_cost;
2660 alg_in->cost.latency += op_cost;
2661 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2662 {
2663 struct algorithm *x;
2664 best_cost = alg_in->cost;
2665 x = alg_in, alg_in = best_alg, best_alg = x;
2666 best_alg->log[best_alg->ops] = m;
2667 best_alg->op[best_alg->ops] = alg_sub_t2_m;
2668 }
2669 }
2670 if (cache_hit)
2671 goto done;
2672 }
2673
2674 done:
2675 /* If best_cost has not decreased, we have not found any algorithm. */
2676 if (!CHEAPER_MULT_COST (&best_cost, cost_limit))
2677 {
2678 /* We failed to find an algorithm. Record alg_impossible for
2679 this case (that is, <T, MODE, COST_LIMIT>) so that next time
2680 we are asked to find an algorithm for T within the same or
2681 lower COST_LIMIT, we can immediately return to the
2682 caller. */
2683 alg_hash[hash_index].t = t;
2684 alg_hash[hash_index].mode = mode;
2685 alg_hash[hash_index].speed = speed;
2686 alg_hash[hash_index].alg = alg_impossible;
2687 alg_hash[hash_index].cost = *cost_limit;
2688 return;
2689 }
2690
2691 /* Cache the result. */
2692 if (!cache_hit)
2693 {
2694 alg_hash[hash_index].t = t;
2695 alg_hash[hash_index].mode = mode;
2696 alg_hash[hash_index].speed = speed;
2697 alg_hash[hash_index].alg = best_alg->op[best_alg->ops];
2698 alg_hash[hash_index].cost.cost = best_cost.cost;
2699 alg_hash[hash_index].cost.latency = best_cost.latency;
2700 }
2701
2702 /* If we are getting a too long sequence for `struct algorithm'
2703 to record, make this search fail. */
2704 if (best_alg->ops == MAX_BITS_PER_WORD)
2705 return;
2706
2707 /* Copy the algorithm from temporary space to the space at alg_out.
2708 We avoid using structure assignment because the majority of
2709 best_alg is normally undefined, and this is a critical function. */
2710 alg_out->ops = best_alg->ops + 1;
2711 alg_out->cost = best_cost;
2712 memcpy (alg_out->op, best_alg->op,
2713 alg_out->ops * sizeof *alg_out->op);
2714 memcpy (alg_out->log, best_alg->log,
2715 alg_out->ops * sizeof *alg_out->log);
2716 }
2717 \f
2718 /* Find the cheapest way of multiplying a value of mode MODE by VAL.
2719 Try three variations:
2720
2721 - a shift/add sequence based on VAL itself
2722 - a shift/add sequence based on -VAL, followed by a negation
2723 - a shift/add sequence based on VAL - 1, followed by an addition.
2724
2725 Return true if the cheapest of these cost less than MULT_COST,
2726 describing the algorithm in *ALG and final fixup in *VARIANT. */
2727
2728 static bool
2729 choose_mult_variant (enum machine_mode mode, HOST_WIDE_INT val,
2730 struct algorithm *alg, enum mult_variant *variant,
2731 int mult_cost)
2732 {
2733 struct algorithm alg2;
2734 struct mult_cost limit;
2735 int op_cost;
2736 bool speed = optimize_insn_for_speed_p ();
2737
2738 /* Fail quickly for impossible bounds. */
2739 if (mult_cost < 0)
2740 return false;
2741
2742 /* Ensure that mult_cost provides a reasonable upper bound.
2743 Any constant multiplication can be performed with less
2744 than 2 * bits additions. */
2745 op_cost = 2 * GET_MODE_BITSIZE (mode) * add_cost[speed][mode];
2746 if (mult_cost > op_cost)
2747 mult_cost = op_cost;
2748
2749 *variant = basic_variant;
2750 limit.cost = mult_cost;
2751 limit.latency = mult_cost;
2752 synth_mult (alg, val, &limit, mode);
2753
2754 /* This works only if the inverted value actually fits in an
2755 `unsigned int' */
2756 if (HOST_BITS_PER_INT >= GET_MODE_BITSIZE (mode))
2757 {
2758 op_cost = neg_cost[speed][mode];
2759 if (MULT_COST_LESS (&alg->cost, mult_cost))
2760 {
2761 limit.cost = alg->cost.cost - op_cost;
2762 limit.latency = alg->cost.latency - op_cost;
2763 }
2764 else
2765 {
2766 limit.cost = mult_cost - op_cost;
2767 limit.latency = mult_cost - op_cost;
2768 }
2769
2770 synth_mult (&alg2, -val, &limit, mode);
2771 alg2.cost.cost += op_cost;
2772 alg2.cost.latency += op_cost;
2773 if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
2774 *alg = alg2, *variant = negate_variant;
2775 }
2776
2777 /* This proves very useful for division-by-constant. */
2778 op_cost = add_cost[speed][mode];
2779 if (MULT_COST_LESS (&alg->cost, mult_cost))
2780 {
2781 limit.cost = alg->cost.cost - op_cost;
2782 limit.latency = alg->cost.latency - op_cost;
2783 }
2784 else
2785 {
2786 limit.cost = mult_cost - op_cost;
2787 limit.latency = mult_cost - op_cost;
2788 }
2789
2790 synth_mult (&alg2, val - 1, &limit, mode);
2791 alg2.cost.cost += op_cost;
2792 alg2.cost.latency += op_cost;
2793 if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
2794 *alg = alg2, *variant = add_variant;
2795
2796 return MULT_COST_LESS (&alg->cost, mult_cost);
2797 }
2798
2799 /* A subroutine of expand_mult, used for constant multiplications.
2800 Multiply OP0 by VAL in mode MODE, storing the result in TARGET if
2801 convenient. Use the shift/add sequence described by ALG and apply
2802 the final fixup specified by VARIANT. */
2803
2804 static rtx
2805 expand_mult_const (enum machine_mode mode, rtx op0, HOST_WIDE_INT val,
2806 rtx target, const struct algorithm *alg,
2807 enum mult_variant variant)
2808 {
2809 HOST_WIDE_INT val_so_far;
2810 rtx insn, accum, tem;
2811 int opno;
2812 enum machine_mode nmode;
2813
2814 /* Avoid referencing memory over and over and invalid sharing
2815 on SUBREGs. */
2816 op0 = force_reg (mode, op0);
2817
2818 /* ACCUM starts out either as OP0 or as a zero, depending on
2819 the first operation. */
2820
2821 if (alg->op[0] == alg_zero)
2822 {
2823 accum = copy_to_mode_reg (mode, const0_rtx);
2824 val_so_far = 0;
2825 }
2826 else if (alg->op[0] == alg_m)
2827 {
2828 accum = copy_to_mode_reg (mode, op0);
2829 val_so_far = 1;
2830 }
2831 else
2832 gcc_unreachable ();
2833
2834 for (opno = 1; opno < alg->ops; opno++)
2835 {
2836 int log = alg->log[opno];
2837 rtx shift_subtarget = optimize ? 0 : accum;
2838 rtx add_target
2839 = (opno == alg->ops - 1 && target != 0 && variant != add_variant
2840 && !optimize)
2841 ? target : 0;
2842 rtx accum_target = optimize ? 0 : accum;
2843
2844 switch (alg->op[opno])
2845 {
2846 case alg_shift:
2847 tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
2848 /* REG_EQUAL note will be attached to the following insn. */
2849 emit_move_insn (accum, tem);
2850 val_so_far <<= log;
2851 break;
2852
2853 case alg_add_t_m2:
2854 tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
2855 accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
2856 add_target ? add_target : accum_target);
2857 val_so_far += (HOST_WIDE_INT) 1 << log;
2858 break;
2859
2860 case alg_sub_t_m2:
2861 tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
2862 accum = force_operand (gen_rtx_MINUS (mode, accum, tem),
2863 add_target ? add_target : accum_target);
2864 val_so_far -= (HOST_WIDE_INT) 1 << log;
2865 break;
2866
2867 case alg_add_t2_m:
2868 accum = expand_shift (LSHIFT_EXPR, mode, accum,
2869 log, shift_subtarget, 0);
2870 accum = force_operand (gen_rtx_PLUS (mode, accum, op0),
2871 add_target ? add_target : accum_target);
2872 val_so_far = (val_so_far << log) + 1;
2873 break;
2874
2875 case alg_sub_t2_m:
2876 accum = expand_shift (LSHIFT_EXPR, mode, accum,
2877 log, shift_subtarget, 0);
2878 accum = force_operand (gen_rtx_MINUS (mode, accum, op0),
2879 add_target ? add_target : accum_target);
2880 val_so_far = (val_so_far << log) - 1;
2881 break;
2882
2883 case alg_add_factor:
2884 tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
2885 accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
2886 add_target ? add_target : accum_target);
2887 val_so_far += val_so_far << log;
2888 break;
2889
2890 case alg_sub_factor:
2891 tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
2892 accum = force_operand (gen_rtx_MINUS (mode, tem, accum),
2893 (add_target
2894 ? add_target : (optimize ? 0 : tem)));
2895 val_so_far = (val_so_far << log) - val_so_far;
2896 break;
2897
2898 default:
2899 gcc_unreachable ();
2900 }
2901
2902 /* Write a REG_EQUAL note on the last insn so that we can cse
2903 multiplication sequences. Note that if ACCUM is a SUBREG,
2904 we've set the inner register and must properly indicate
2905 that. */
2906
2907 tem = op0, nmode = mode;
2908 if (GET_CODE (accum) == SUBREG)
2909 {
2910 nmode = GET_MODE (SUBREG_REG (accum));
2911 tem = gen_lowpart (nmode, op0);
2912 }
2913
2914 insn = get_last_insn ();
2915 set_unique_reg_note (insn, REG_EQUAL,
2916 gen_rtx_MULT (nmode, tem,
2917 GEN_INT (val_so_far)));
2918 }
2919
2920 if (variant == negate_variant)
2921 {
2922 val_so_far = -val_so_far;
2923 accum = expand_unop (mode, neg_optab, accum, target, 0);
2924 }
2925 else if (variant == add_variant)
2926 {
2927 val_so_far = val_so_far + 1;
2928 accum = force_operand (gen_rtx_PLUS (mode, accum, op0), target);
2929 }
2930
2931 /* Compare only the bits of val and val_so_far that are significant
2932 in the result mode, to avoid sign-/zero-extension confusion. */
2933 val &= GET_MODE_MASK (mode);
2934 val_so_far &= GET_MODE_MASK (mode);
2935 gcc_assert (val == val_so_far);
2936
2937 return accum;
2938 }
2939
2940 /* Perform a multiplication and return an rtx for the result.
2941 MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
2942 TARGET is a suggestion for where to store the result (an rtx).
2943
2944 We check specially for a constant integer as OP1.
2945 If you want this check for OP0 as well, then before calling
2946 you should swap the two operands if OP0 would be constant. */
2947
2948 rtx
2949 expand_mult (enum machine_mode mode, rtx op0, rtx op1, rtx target,
2950 int unsignedp)
2951 {
2952 enum mult_variant variant;
2953 struct algorithm algorithm;
2954 int max_cost;
2955 bool speed = optimize_insn_for_speed_p ();
2956
2957 /* Handling const0_rtx here allows us to use zero as a rogue value for
2958 coeff below. */
2959 if (op1 == const0_rtx)
2960 return const0_rtx;
2961 if (op1 == const1_rtx)
2962 return op0;
2963 if (op1 == constm1_rtx)
2964 return expand_unop (mode,
2965 GET_MODE_CLASS (mode) == MODE_INT
2966 && !unsignedp && flag_trapv
2967 ? negv_optab : neg_optab,
2968 op0, target, 0);
2969
2970 /* These are the operations that are potentially turned into a sequence
2971 of shifts and additions. */
2972 if (SCALAR_INT_MODE_P (mode)
2973 && (unsignedp || !flag_trapv))
2974 {
2975 HOST_WIDE_INT coeff = 0;
2976 rtx fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
2977
2978 /* synth_mult does an `unsigned int' multiply. As long as the mode is
2979 less than or equal in size to `unsigned int' this doesn't matter.
2980 If the mode is larger than `unsigned int', then synth_mult works
2981 only if the constant value exactly fits in an `unsigned int' without
2982 any truncation. This means that multiplying by negative values does
2983 not work; results are off by 2^32 on a 32 bit machine. */
2984
2985 if (CONST_INT_P (op1))
2986 {
2987 /* Attempt to handle multiplication of DImode values by negative
2988 coefficients, by performing the multiplication by a positive
2989 multiplier and then inverting the result. */
2990 if (INTVAL (op1) < 0
2991 && GET_MODE_BITSIZE (mode) > HOST_BITS_PER_WIDE_INT)
2992 {
2993 /* Its safe to use -INTVAL (op1) even for INT_MIN, as the
2994 result is interpreted as an unsigned coefficient.
2995 Exclude cost of op0 from max_cost to match the cost
2996 calculation of the synth_mult. */
2997 max_cost = rtx_cost (gen_rtx_MULT (mode, fake_reg, op1), SET, speed)
2998 - neg_cost[speed][mode];
2999 if (max_cost > 0
3000 && choose_mult_variant (mode, -INTVAL (op1), &algorithm,
3001 &variant, max_cost))
3002 {
3003 rtx temp = expand_mult_const (mode, op0, -INTVAL (op1),
3004 NULL_RTX, &algorithm,
3005 variant);
3006 return expand_unop (mode, neg_optab, temp, target, 0);
3007 }
3008 }
3009 else coeff = INTVAL (op1);
3010 }
3011 else if (GET_CODE (op1) == CONST_DOUBLE)
3012 {
3013 /* If we are multiplying in DImode, it may still be a win
3014 to try to work with shifts and adds. */
3015 if (CONST_DOUBLE_HIGH (op1) == 0
3016 && CONST_DOUBLE_LOW (op1) > 0)
3017 coeff = CONST_DOUBLE_LOW (op1);
3018 else if (CONST_DOUBLE_LOW (op1) == 0
3019 && EXACT_POWER_OF_2_OR_ZERO_P (CONST_DOUBLE_HIGH (op1)))
3020 {
3021 int shift = floor_log2 (CONST_DOUBLE_HIGH (op1))
3022 + HOST_BITS_PER_WIDE_INT;
3023 return expand_shift (LSHIFT_EXPR, mode, op0,
3024 shift, target, unsignedp);
3025 }
3026 }
3027
3028 /* We used to test optimize here, on the grounds that it's better to
3029 produce a smaller program when -O is not used. But this causes
3030 such a terrible slowdown sometimes that it seems better to always
3031 use synth_mult. */
3032 if (coeff != 0)
3033 {
3034 /* Special case powers of two. */
3035 if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3036 return expand_shift (LSHIFT_EXPR, mode, op0,
3037 floor_log2 (coeff), target, unsignedp);
3038
3039 /* Exclude cost of op0 from max_cost to match the cost
3040 calculation of the synth_mult. */
3041 max_cost = rtx_cost (gen_rtx_MULT (mode, fake_reg, op1), SET, speed);
3042 if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3043 max_cost))
3044 return expand_mult_const (mode, op0, coeff, target,
3045 &algorithm, variant);
3046 }
3047 }
3048
3049 if (GET_CODE (op0) == CONST_DOUBLE)
3050 {
3051 rtx temp = op0;
3052 op0 = op1;
3053 op1 = temp;
3054 }
3055
3056 /* Expand x*2.0 as x+x. */
3057 if (GET_CODE (op1) == CONST_DOUBLE
3058 && SCALAR_FLOAT_MODE_P (mode))
3059 {
3060 REAL_VALUE_TYPE d;
3061 REAL_VALUE_FROM_CONST_DOUBLE (d, op1);
3062
3063 if (REAL_VALUES_EQUAL (d, dconst2))
3064 {
3065 op0 = force_reg (GET_MODE (op0), op0);
3066 return expand_binop (mode, add_optab, op0, op0,
3067 target, unsignedp, OPTAB_LIB_WIDEN);
3068 }
3069 }
3070
3071 /* This used to use umul_optab if unsigned, but for non-widening multiply
3072 there is no difference between signed and unsigned. */
3073 op0 = expand_binop (mode,
3074 ! unsignedp
3075 && flag_trapv && (GET_MODE_CLASS(mode) == MODE_INT)
3076 ? smulv_optab : smul_optab,
3077 op0, op1, target, unsignedp, OPTAB_LIB_WIDEN);
3078 gcc_assert (op0);
3079 return op0;
3080 }
3081
3082 /* Perform a widening multiplication and return an rtx for the result.
3083 MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3084 TARGET is a suggestion for where to store the result (an rtx).
3085 THIS_OPTAB is the optab we should use, it must be either umul_widen_optab
3086 or smul_widen_optab.
3087
3088 We check specially for a constant integer as OP1, comparing the
3089 cost of a widening multiply against the cost of a sequence of shifts
3090 and adds. */
3091
3092 rtx
3093 expand_widening_mult (enum machine_mode mode, rtx op0, rtx op1, rtx target,
3094 int unsignedp, optab this_optab)
3095 {
3096 bool speed = optimize_insn_for_speed_p ();
3097 rtx cop1;
3098
3099 if (CONST_INT_P (op1)
3100 && GET_MODE (op0) != VOIDmode
3101 && (cop1 = convert_modes (mode, GET_MODE (op0), op1,
3102 this_optab == umul_widen_optab))
3103 && CONST_INT_P (cop1)
3104 && (INTVAL (cop1) >= 0
3105 || GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT))
3106 {
3107 HOST_WIDE_INT coeff = INTVAL (cop1);
3108 int max_cost;
3109 enum mult_variant variant;
3110 struct algorithm algorithm;
3111
3112 /* Special case powers of two. */
3113 if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3114 {
3115 op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3116 return expand_shift (LSHIFT_EXPR, mode, op0,
3117 floor_log2 (coeff), target, unsignedp);
3118 }
3119
3120 /* Exclude cost of op0 from max_cost to match the cost
3121 calculation of the synth_mult. */
3122 max_cost = mul_widen_cost[speed][mode];
3123 if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3124 max_cost))
3125 {
3126 op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3127 return expand_mult_const (mode, op0, coeff, target,
3128 &algorithm, variant);
3129 }
3130 }
3131 return expand_binop (mode, this_optab, op0, op1, target,
3132 unsignedp, OPTAB_LIB_WIDEN);
3133 }
3134 \f
3135 /* Return the smallest n such that 2**n >= X. */
3136
3137 int
3138 ceil_log2 (unsigned HOST_WIDE_INT x)
3139 {
3140 return floor_log2 (x - 1) + 1;
3141 }
3142
3143 /* Choose a minimal N + 1 bit approximation to 1/D that can be used to
3144 replace division by D, and put the least significant N bits of the result
3145 in *MULTIPLIER_PTR and return the most significant bit.
3146
3147 The width of operations is N (should be <= HOST_BITS_PER_WIDE_INT), the
3148 needed precision is in PRECISION (should be <= N).
3149
3150 PRECISION should be as small as possible so this function can choose
3151 multiplier more freely.
3152
3153 The rounded-up logarithm of D is placed in *lgup_ptr. A shift count that
3154 is to be used for a final right shift is placed in *POST_SHIFT_PTR.
3155
3156 Using this function, x/D will be equal to (x * m) >> (*POST_SHIFT_PTR),
3157 where m is the full HOST_BITS_PER_WIDE_INT + 1 bit multiplier. */
3158
3159 static
3160 unsigned HOST_WIDE_INT
3161 choose_multiplier (unsigned HOST_WIDE_INT d, int n, int precision,
3162 rtx *multiplier_ptr, int *post_shift_ptr, int *lgup_ptr)
3163 {
3164 HOST_WIDE_INT mhigh_hi, mlow_hi;
3165 unsigned HOST_WIDE_INT mhigh_lo, mlow_lo;
3166 int lgup, post_shift;
3167 int pow, pow2;
3168 unsigned HOST_WIDE_INT nl, dummy1;
3169 HOST_WIDE_INT nh, dummy2;
3170
3171 /* lgup = ceil(log2(divisor)); */
3172 lgup = ceil_log2 (d);
3173
3174 gcc_assert (lgup <= n);
3175
3176 pow = n + lgup;
3177 pow2 = n + lgup - precision;
3178
3179 /* We could handle this with some effort, but this case is much
3180 better handled directly with a scc insn, so rely on caller using
3181 that. */
3182 gcc_assert (pow != 2 * HOST_BITS_PER_WIDE_INT);
3183
3184 /* mlow = 2^(N + lgup)/d */
3185 if (pow >= HOST_BITS_PER_WIDE_INT)
3186 {
3187 nh = (HOST_WIDE_INT) 1 << (pow - HOST_BITS_PER_WIDE_INT);
3188 nl = 0;
3189 }
3190 else
3191 {
3192 nh = 0;
3193 nl = (unsigned HOST_WIDE_INT) 1 << pow;
3194 }
3195 div_and_round_double (TRUNC_DIV_EXPR, 1, nl, nh, d, (HOST_WIDE_INT) 0,
3196 &mlow_lo, &mlow_hi, &dummy1, &dummy2);
3197
3198 /* mhigh = (2^(N + lgup) + 2^N + lgup - precision)/d */
3199 if (pow2 >= HOST_BITS_PER_WIDE_INT)
3200 nh |= (HOST_WIDE_INT) 1 << (pow2 - HOST_BITS_PER_WIDE_INT);
3201 else
3202 nl |= (unsigned HOST_WIDE_INT) 1 << pow2;
3203 div_and_round_double (TRUNC_DIV_EXPR, 1, nl, nh, d, (HOST_WIDE_INT) 0,
3204 &mhigh_lo, &mhigh_hi, &dummy1, &dummy2);
3205
3206 gcc_assert (!mhigh_hi || nh - d < d);
3207 gcc_assert (mhigh_hi <= 1 && mlow_hi <= 1);
3208 /* Assert that mlow < mhigh. */
3209 gcc_assert (mlow_hi < mhigh_hi
3210 || (mlow_hi == mhigh_hi && mlow_lo < mhigh_lo));
3211
3212 /* If precision == N, then mlow, mhigh exceed 2^N
3213 (but they do not exceed 2^(N+1)). */
3214
3215 /* Reduce to lowest terms. */
3216 for (post_shift = lgup; post_shift > 0; post_shift--)
3217 {
3218 unsigned HOST_WIDE_INT ml_lo = (mlow_hi << (HOST_BITS_PER_WIDE_INT - 1)) | (mlow_lo >> 1);
3219 unsigned HOST_WIDE_INT mh_lo = (mhigh_hi << (HOST_BITS_PER_WIDE_INT - 1)) | (mhigh_lo >> 1);
3220 if (ml_lo >= mh_lo)
3221 break;
3222
3223 mlow_hi = 0;
3224 mlow_lo = ml_lo;
3225 mhigh_hi = 0;
3226 mhigh_lo = mh_lo;
3227 }
3228
3229 *post_shift_ptr = post_shift;
3230 *lgup_ptr = lgup;
3231 if (n < HOST_BITS_PER_WIDE_INT)
3232 {
3233 unsigned HOST_WIDE_INT mask = ((unsigned HOST_WIDE_INT) 1 << n) - 1;
3234 *multiplier_ptr = GEN_INT (mhigh_lo & mask);
3235 return mhigh_lo >= mask;
3236 }
3237 else
3238 {
3239 *multiplier_ptr = GEN_INT (mhigh_lo);
3240 return mhigh_hi;
3241 }
3242 }
3243
3244 /* Compute the inverse of X mod 2**n, i.e., find Y such that X * Y is
3245 congruent to 1 (mod 2**N). */
3246
3247 static unsigned HOST_WIDE_INT
3248 invert_mod2n (unsigned HOST_WIDE_INT x, int n)
3249 {
3250 /* Solve x*y == 1 (mod 2^n), where x is odd. Return y. */
3251
3252 /* The algorithm notes that the choice y = x satisfies
3253 x*y == 1 mod 2^3, since x is assumed odd.
3254 Each iteration doubles the number of bits of significance in y. */
3255
3256 unsigned HOST_WIDE_INT mask;
3257 unsigned HOST_WIDE_INT y = x;
3258 int nbit = 3;
3259
3260 mask = (n == HOST_BITS_PER_WIDE_INT
3261 ? ~(unsigned HOST_WIDE_INT) 0
3262 : ((unsigned HOST_WIDE_INT) 1 << n) - 1);
3263
3264 while (nbit < n)
3265 {
3266 y = y * (2 - x*y) & mask; /* Modulo 2^N */
3267 nbit *= 2;
3268 }
3269 return y;
3270 }
3271
3272 /* Emit code to adjust ADJ_OPERAND after multiplication of wrong signedness
3273 flavor of OP0 and OP1. ADJ_OPERAND is already the high half of the
3274 product OP0 x OP1. If UNSIGNEDP is nonzero, adjust the signed product
3275 to become unsigned, if UNSIGNEDP is zero, adjust the unsigned product to
3276 become signed.
3277
3278 The result is put in TARGET if that is convenient.
3279
3280 MODE is the mode of operation. */
3281
3282 rtx
3283 expand_mult_highpart_adjust (enum machine_mode mode, rtx adj_operand, rtx op0,
3284 rtx op1, rtx target, int unsignedp)
3285 {
3286 rtx tem;
3287 enum rtx_code adj_code = unsignedp ? PLUS : MINUS;
3288
3289 tem = expand_shift (RSHIFT_EXPR, mode, op0,
3290 GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3291 tem = expand_and (mode, tem, op1, NULL_RTX);
3292 adj_operand
3293 = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3294 adj_operand);
3295
3296 tem = expand_shift (RSHIFT_EXPR, mode, op1,
3297 GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3298 tem = expand_and (mode, tem, op0, NULL_RTX);
3299 target = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3300 target);
3301
3302 return target;
3303 }
3304
3305 /* Subroutine of expand_mult_highpart. Return the MODE high part of OP. */
3306
3307 static rtx
3308 extract_high_half (enum machine_mode mode, rtx op)
3309 {
3310 enum machine_mode wider_mode;
3311
3312 if (mode == word_mode)
3313 return gen_highpart (mode, op);
3314
3315 gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3316
3317 wider_mode = GET_MODE_WIDER_MODE (mode);
3318 op = expand_shift (RSHIFT_EXPR, wider_mode, op,
3319 GET_MODE_BITSIZE (mode), 0, 1);
3320 return convert_modes (mode, wider_mode, op, 0);
3321 }
3322
3323 /* Like expand_mult_highpart, but only consider using a multiplication
3324 optab. OP1 is an rtx for the constant operand. */
3325
3326 static rtx
3327 expand_mult_highpart_optab (enum machine_mode mode, rtx op0, rtx op1,
3328 rtx target, int unsignedp, int max_cost)
3329 {
3330 rtx narrow_op1 = gen_int_mode (INTVAL (op1), mode);
3331 enum machine_mode wider_mode;
3332 optab moptab;
3333 rtx tem;
3334 int size;
3335 bool speed = optimize_insn_for_speed_p ();
3336
3337 gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3338
3339 wider_mode = GET_MODE_WIDER_MODE (mode);
3340 size = GET_MODE_BITSIZE (mode);
3341
3342 /* Firstly, try using a multiplication insn that only generates the needed
3343 high part of the product, and in the sign flavor of unsignedp. */
3344 if (mul_highpart_cost[speed][mode] < max_cost)
3345 {
3346 moptab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
3347 tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3348 unsignedp, OPTAB_DIRECT);
3349 if (tem)
3350 return tem;
3351 }
3352
3353 /* Secondly, same as above, but use sign flavor opposite of unsignedp.
3354 Need to adjust the result after the multiplication. */
3355 if (size - 1 < BITS_PER_WORD
3356 && (mul_highpart_cost[speed][mode] + 2 * shift_cost[speed][mode][size-1]
3357 + 4 * add_cost[speed][mode] < max_cost))
3358 {
3359 moptab = unsignedp ? smul_highpart_optab : umul_highpart_optab;
3360 tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3361 unsignedp, OPTAB_DIRECT);
3362 if (tem)
3363 /* We used the wrong signedness. Adjust the result. */
3364 return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3365 tem, unsignedp);
3366 }
3367
3368 /* Try widening multiplication. */
3369 moptab = unsignedp ? umul_widen_optab : smul_widen_optab;
3370 if (optab_handler (moptab, wider_mode) != CODE_FOR_nothing
3371 && mul_widen_cost[speed][wider_mode] < max_cost)
3372 {
3373 tem = expand_binop (wider_mode, moptab, op0, narrow_op1, 0,
3374 unsignedp, OPTAB_WIDEN);
3375 if (tem)
3376 return extract_high_half (mode, tem);
3377 }
3378
3379 /* Try widening the mode and perform a non-widening multiplication. */
3380 if (optab_handler (smul_optab, wider_mode) != CODE_FOR_nothing
3381 && size - 1 < BITS_PER_WORD
3382 && mul_cost[speed][wider_mode] + shift_cost[speed][mode][size-1] < max_cost)
3383 {
3384 rtx insns, wop0, wop1;
3385
3386 /* We need to widen the operands, for example to ensure the
3387 constant multiplier is correctly sign or zero extended.
3388 Use a sequence to clean-up any instructions emitted by
3389 the conversions if things don't work out. */
3390 start_sequence ();
3391 wop0 = convert_modes (wider_mode, mode, op0, unsignedp);
3392 wop1 = convert_modes (wider_mode, mode, op1, unsignedp);
3393 tem = expand_binop (wider_mode, smul_optab, wop0, wop1, 0,
3394 unsignedp, OPTAB_WIDEN);
3395 insns = get_insns ();
3396 end_sequence ();
3397
3398 if (tem)
3399 {
3400 emit_insn (insns);
3401 return extract_high_half (mode, tem);
3402 }
3403 }
3404
3405 /* Try widening multiplication of opposite signedness, and adjust. */
3406 moptab = unsignedp ? smul_widen_optab : umul_widen_optab;
3407 if (optab_handler (moptab, wider_mode) != CODE_FOR_nothing
3408 && size - 1 < BITS_PER_WORD
3409 && (mul_widen_cost[speed][wider_mode] + 2 * shift_cost[speed][mode][size-1]
3410 + 4 * add_cost[speed][mode] < max_cost))
3411 {
3412 tem = expand_binop (wider_mode, moptab, op0, narrow_op1,
3413 NULL_RTX, ! unsignedp, OPTAB_WIDEN);
3414 if (tem != 0)
3415 {
3416 tem = extract_high_half (mode, tem);
3417 /* We used the wrong signedness. Adjust the result. */
3418 return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3419 target, unsignedp);
3420 }
3421 }
3422
3423 return 0;
3424 }
3425
3426 /* Emit code to multiply OP0 and OP1 (where OP1 is an integer constant),
3427 putting the high half of the result in TARGET if that is convenient,
3428 and return where the result is. If the operation can not be performed,
3429 0 is returned.
3430
3431 MODE is the mode of operation and result.
3432
3433 UNSIGNEDP nonzero means unsigned multiply.
3434
3435 MAX_COST is the total allowed cost for the expanded RTL. */
3436
3437 static rtx
3438 expand_mult_highpart (enum machine_mode mode, rtx op0, rtx op1,
3439 rtx target, int unsignedp, int max_cost)
3440 {
3441 enum machine_mode wider_mode = GET_MODE_WIDER_MODE (mode);
3442 unsigned HOST_WIDE_INT cnst1;
3443 int extra_cost;
3444 bool sign_adjust = false;
3445 enum mult_variant variant;
3446 struct algorithm alg;
3447 rtx tem;
3448 bool speed = optimize_insn_for_speed_p ();
3449
3450 gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3451 /* We can't support modes wider than HOST_BITS_PER_INT. */
3452 gcc_assert (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT);
3453
3454 cnst1 = INTVAL (op1) & GET_MODE_MASK (mode);
3455
3456 /* We can't optimize modes wider than BITS_PER_WORD.
3457 ??? We might be able to perform double-word arithmetic if
3458 mode == word_mode, however all the cost calculations in
3459 synth_mult etc. assume single-word operations. */
3460 if (GET_MODE_BITSIZE (wider_mode) > BITS_PER_WORD)
3461 return expand_mult_highpart_optab (mode, op0, op1, target,
3462 unsignedp, max_cost);
3463
3464 extra_cost = shift_cost[speed][mode][GET_MODE_BITSIZE (mode) - 1];
3465
3466 /* Check whether we try to multiply by a negative constant. */
3467 if (!unsignedp && ((cnst1 >> (GET_MODE_BITSIZE (mode) - 1)) & 1))
3468 {
3469 sign_adjust = true;
3470 extra_cost += add_cost[speed][mode];
3471 }
3472
3473 /* See whether shift/add multiplication is cheap enough. */
3474 if (choose_mult_variant (wider_mode, cnst1, &alg, &variant,
3475 max_cost - extra_cost))
3476 {
3477 /* See whether the specialized multiplication optabs are
3478 cheaper than the shift/add version. */
3479 tem = expand_mult_highpart_optab (mode, op0, op1, target, unsignedp,
3480 alg.cost.cost + extra_cost);
3481 if (tem)
3482 return tem;
3483
3484 tem = convert_to_mode (wider_mode, op0, unsignedp);
3485 tem = expand_mult_const (wider_mode, tem, cnst1, 0, &alg, variant);
3486 tem = extract_high_half (mode, tem);
3487
3488 /* Adjust result for signedness. */
3489 if (sign_adjust)
3490 tem = force_operand (gen_rtx_MINUS (mode, tem, op0), tem);
3491
3492 return tem;
3493 }
3494 return expand_mult_highpart_optab (mode, op0, op1, target,
3495 unsignedp, max_cost);
3496 }
3497
3498
3499 /* Expand signed modulus of OP0 by a power of two D in mode MODE. */
3500
3501 static rtx
3502 expand_smod_pow2 (enum machine_mode mode, rtx op0, HOST_WIDE_INT d)
3503 {
3504 unsigned HOST_WIDE_INT masklow, maskhigh;
3505 rtx result, temp, shift, label;
3506 int logd;
3507
3508 logd = floor_log2 (d);
3509 result = gen_reg_rtx (mode);
3510
3511 /* Avoid conditional branches when they're expensive. */
3512 if (BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2
3513 && optimize_insn_for_speed_p ())
3514 {
3515 rtx signmask = emit_store_flag (result, LT, op0, const0_rtx,
3516 mode, 0, -1);
3517 if (signmask)
3518 {
3519 signmask = force_reg (mode, signmask);
3520 masklow = ((HOST_WIDE_INT) 1 << logd) - 1;
3521 shift = GEN_INT (GET_MODE_BITSIZE (mode) - logd);
3522
3523 /* Use the rtx_cost of a LSHIFTRT instruction to determine
3524 which instruction sequence to use. If logical right shifts
3525 are expensive the use 2 XORs, 2 SUBs and an AND, otherwise
3526 use a LSHIFTRT, 1 ADD, 1 SUB and an AND. */
3527
3528 temp = gen_rtx_LSHIFTRT (mode, result, shift);
3529 if (optab_handler (lshr_optab, mode) == CODE_FOR_nothing
3530 || rtx_cost (temp, SET, optimize_insn_for_speed_p ()) > COSTS_N_INSNS (2))
3531 {
3532 temp = expand_binop (mode, xor_optab, op0, signmask,
3533 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3534 temp = expand_binop (mode, sub_optab, temp, signmask,
3535 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3536 temp = expand_binop (mode, and_optab, temp, GEN_INT (masklow),
3537 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3538 temp = expand_binop (mode, xor_optab, temp, signmask,
3539 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3540 temp = expand_binop (mode, sub_optab, temp, signmask,
3541 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3542 }
3543 else
3544 {
3545 signmask = expand_binop (mode, lshr_optab, signmask, shift,
3546 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3547 signmask = force_reg (mode, signmask);
3548
3549 temp = expand_binop (mode, add_optab, op0, signmask,
3550 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3551 temp = expand_binop (mode, and_optab, temp, GEN_INT (masklow),
3552 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3553 temp = expand_binop (mode, sub_optab, temp, signmask,
3554 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3555 }
3556 return temp;
3557 }
3558 }
3559
3560 /* Mask contains the mode's signbit and the significant bits of the
3561 modulus. By including the signbit in the operation, many targets
3562 can avoid an explicit compare operation in the following comparison
3563 against zero. */
3564
3565 masklow = ((HOST_WIDE_INT) 1 << logd) - 1;
3566 if (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
3567 {
3568 masklow |= (HOST_WIDE_INT) -1 << (GET_MODE_BITSIZE (mode) - 1);
3569 maskhigh = -1;
3570 }
3571 else
3572 maskhigh = (HOST_WIDE_INT) -1
3573 << (GET_MODE_BITSIZE (mode) - HOST_BITS_PER_WIDE_INT - 1);
3574
3575 temp = expand_binop (mode, and_optab, op0,
3576 immed_double_const (masklow, maskhigh, mode),
3577 result, 1, OPTAB_LIB_WIDEN);
3578 if (temp != result)
3579 emit_move_insn (result, temp);
3580
3581 label = gen_label_rtx ();
3582 do_cmp_and_jump (result, const0_rtx, GE, mode, label);
3583
3584 temp = expand_binop (mode, sub_optab, result, const1_rtx, result,
3585 0, OPTAB_LIB_WIDEN);
3586 masklow = (HOST_WIDE_INT) -1 << logd;
3587 maskhigh = -1;
3588 temp = expand_binop (mode, ior_optab, temp,
3589 immed_double_const (masklow, maskhigh, mode),
3590 result, 1, OPTAB_LIB_WIDEN);
3591 temp = expand_binop (mode, add_optab, temp, const1_rtx, result,
3592 0, OPTAB_LIB_WIDEN);
3593 if (temp != result)
3594 emit_move_insn (result, temp);
3595 emit_label (label);
3596 return result;
3597 }
3598
3599 /* Expand signed division of OP0 by a power of two D in mode MODE.
3600 This routine is only called for positive values of D. */
3601
3602 static rtx
3603 expand_sdiv_pow2 (enum machine_mode mode, rtx op0, HOST_WIDE_INT d)
3604 {
3605 rtx temp, label;
3606 int logd;
3607
3608 logd = floor_log2 (d);
3609
3610 if (d == 2
3611 && BRANCH_COST (optimize_insn_for_speed_p (),
3612 false) >= 1)
3613 {
3614 temp = gen_reg_rtx (mode);
3615 temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, 1);
3616 temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3617 0, OPTAB_LIB_WIDEN);
3618 return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3619 }
3620
3621 #ifdef HAVE_conditional_move
3622 if (BRANCH_COST (optimize_insn_for_speed_p (), false)
3623 >= 2)
3624 {
3625 rtx temp2;
3626
3627 /* ??? emit_conditional_move forces a stack adjustment via
3628 compare_from_rtx so, if the sequence is discarded, it will
3629 be lost. Do it now instead. */
3630 do_pending_stack_adjust ();
3631
3632 start_sequence ();
3633 temp2 = copy_to_mode_reg (mode, op0);
3634 temp = expand_binop (mode, add_optab, temp2, GEN_INT (d-1),
3635 NULL_RTX, 0, OPTAB_LIB_WIDEN);
3636 temp = force_reg (mode, temp);
3637
3638 /* Construct "temp2 = (temp2 < 0) ? temp : temp2". */
3639 temp2 = emit_conditional_move (temp2, LT, temp2, const0_rtx,
3640 mode, temp, temp2, mode, 0);
3641 if (temp2)
3642 {
3643 rtx seq = get_insns ();
3644 end_sequence ();
3645 emit_insn (seq);
3646 return expand_shift (RSHIFT_EXPR, mode, temp2, logd, NULL_RTX, 0);
3647 }
3648 end_sequence ();
3649 }
3650 #endif
3651
3652 if (BRANCH_COST (optimize_insn_for_speed_p (),
3653 false) >= 2)
3654 {
3655 int ushift = GET_MODE_BITSIZE (mode) - logd;
3656
3657 temp = gen_reg_rtx (mode);
3658 temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, -1);
3659 if (shift_cost[optimize_insn_for_speed_p ()][mode][ushift] > COSTS_N_INSNS (1))
3660 temp = expand_binop (mode, and_optab, temp, GEN_INT (d - 1),
3661 NULL_RTX, 0, OPTAB_LIB_WIDEN);
3662 else
3663 temp = expand_shift (RSHIFT_EXPR, mode, temp,
3664 ushift, NULL_RTX, 1);
3665 temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3666 0, OPTAB_LIB_WIDEN);
3667 return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3668 }
3669
3670 label = gen_label_rtx ();
3671 temp = copy_to_mode_reg (mode, op0);
3672 do_cmp_and_jump (temp, const0_rtx, GE, mode, label);
3673 expand_inc (temp, GEN_INT (d - 1));
3674 emit_label (label);
3675 return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3676 }
3677 \f
3678 /* Emit the code to divide OP0 by OP1, putting the result in TARGET
3679 if that is convenient, and returning where the result is.
3680 You may request either the quotient or the remainder as the result;
3681 specify REM_FLAG nonzero to get the remainder.
3682
3683 CODE is the expression code for which kind of division this is;
3684 it controls how rounding is done. MODE is the machine mode to use.
3685 UNSIGNEDP nonzero means do unsigned division. */
3686
3687 /* ??? For CEIL_MOD_EXPR, can compute incorrect remainder with ANDI
3688 and then correct it by or'ing in missing high bits
3689 if result of ANDI is nonzero.
3690 For ROUND_MOD_EXPR, can use ANDI and then sign-extend the result.
3691 This could optimize to a bfexts instruction.
3692 But C doesn't use these operations, so their optimizations are
3693 left for later. */
3694 /* ??? For modulo, we don't actually need the highpart of the first product,
3695 the low part will do nicely. And for small divisors, the second multiply
3696 can also be a low-part only multiply or even be completely left out.
3697 E.g. to calculate the remainder of a division by 3 with a 32 bit
3698 multiply, multiply with 0x55555556 and extract the upper two bits;
3699 the result is exact for inputs up to 0x1fffffff.
3700 The input range can be reduced by using cross-sum rules.
3701 For odd divisors >= 3, the following table gives right shift counts
3702 so that if a number is shifted by an integer multiple of the given
3703 amount, the remainder stays the same:
3704 2, 4, 3, 6, 10, 12, 4, 8, 18, 6, 11, 20, 18, 0, 5, 10, 12, 0, 12, 20,
3705 14, 12, 23, 21, 8, 0, 20, 18, 0, 0, 6, 12, 0, 22, 0, 18, 20, 30, 0, 0,
3706 0, 8, 0, 11, 12, 10, 36, 0, 30, 0, 0, 12, 0, 0, 0, 0, 44, 12, 24, 0,
3707 20, 0, 7, 14, 0, 18, 36, 0, 0, 46, 60, 0, 42, 0, 15, 24, 20, 0, 0, 33,
3708 0, 20, 0, 0, 18, 0, 60, 0, 0, 0, 0, 0, 40, 18, 0, 0, 12
3709
3710 Cross-sum rules for even numbers can be derived by leaving as many bits
3711 to the right alone as the divisor has zeros to the right.
3712 E.g. if x is an unsigned 32 bit number:
3713 (x mod 12) == (((x & 1023) + ((x >> 8) & ~3)) * 0x15555558 >> 2 * 3) >> 28
3714 */
3715
3716 rtx
3717 expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode,
3718 rtx op0, rtx op1, rtx target, int unsignedp)
3719 {
3720 enum machine_mode compute_mode;
3721 rtx tquotient;
3722 rtx quotient = 0, remainder = 0;
3723 rtx last;
3724 int size;
3725 rtx insn, set;
3726 optab optab1, optab2;
3727 int op1_is_constant, op1_is_pow2 = 0;
3728 int max_cost, extra_cost;
3729 static HOST_WIDE_INT last_div_const = 0;
3730 static HOST_WIDE_INT ext_op1;
3731 bool speed = optimize_insn_for_speed_p ();
3732
3733 op1_is_constant = CONST_INT_P (op1);
3734 if (op1_is_constant)
3735 {
3736 ext_op1 = INTVAL (op1);
3737 if (unsignedp)
3738 ext_op1 &= GET_MODE_MASK (mode);
3739 op1_is_pow2 = ((EXACT_POWER_OF_2_OR_ZERO_P (ext_op1)
3740 || (! unsignedp && EXACT_POWER_OF_2_OR_ZERO_P (-ext_op1))));
3741 }
3742
3743 /*
3744 This is the structure of expand_divmod:
3745
3746 First comes code to fix up the operands so we can perform the operations
3747 correctly and efficiently.
3748
3749 Second comes a switch statement with code specific for each rounding mode.
3750 For some special operands this code emits all RTL for the desired
3751 operation, for other cases, it generates only a quotient and stores it in
3752 QUOTIENT. The case for trunc division/remainder might leave quotient = 0,
3753 to indicate that it has not done anything.
3754
3755 Last comes code that finishes the operation. If QUOTIENT is set and
3756 REM_FLAG is set, the remainder is computed as OP0 - QUOTIENT * OP1. If
3757 QUOTIENT is not set, it is computed using trunc rounding.
3758
3759 We try to generate special code for division and remainder when OP1 is a
3760 constant. If |OP1| = 2**n we can use shifts and some other fast
3761 operations. For other values of OP1, we compute a carefully selected
3762 fixed-point approximation m = 1/OP1, and generate code that multiplies OP0
3763 by m.
3764
3765 In all cases but EXACT_DIV_EXPR, this multiplication requires the upper
3766 half of the product. Different strategies for generating the product are
3767 implemented in expand_mult_highpart.
3768
3769 If what we actually want is the remainder, we generate that by another
3770 by-constant multiplication and a subtraction. */
3771
3772 /* We shouldn't be called with OP1 == const1_rtx, but some of the
3773 code below will malfunction if we are, so check here and handle
3774 the special case if so. */
3775 if (op1 == const1_rtx)
3776 return rem_flag ? const0_rtx : op0;
3777
3778 /* When dividing by -1, we could get an overflow.
3779 negv_optab can handle overflows. */
3780 if (! unsignedp && op1 == constm1_rtx)
3781 {
3782 if (rem_flag)
3783 return const0_rtx;
3784 return expand_unop (mode, flag_trapv && GET_MODE_CLASS(mode) == MODE_INT
3785 ? negv_optab : neg_optab, op0, target, 0);
3786 }
3787
3788 if (target
3789 /* Don't use the function value register as a target
3790 since we have to read it as well as write it,
3791 and function-inlining gets confused by this. */
3792 && ((REG_P (target) && REG_FUNCTION_VALUE_P (target))
3793 /* Don't clobber an operand while doing a multi-step calculation. */
3794 || ((rem_flag || op1_is_constant)
3795 && (reg_mentioned_p (target, op0)
3796 || (MEM_P (op0) && MEM_P (target))))
3797 || reg_mentioned_p (target, op1)
3798 || (MEM_P (op1) && MEM_P (target))))
3799 target = 0;
3800
3801 /* Get the mode in which to perform this computation. Normally it will
3802 be MODE, but sometimes we can't do the desired operation in MODE.
3803 If so, pick a wider mode in which we can do the operation. Convert
3804 to that mode at the start to avoid repeated conversions.
3805
3806 First see what operations we need. These depend on the expression
3807 we are evaluating. (We assume that divxx3 insns exist under the
3808 same conditions that modxx3 insns and that these insns don't normally
3809 fail. If these assumptions are not correct, we may generate less
3810 efficient code in some cases.)
3811
3812 Then see if we find a mode in which we can open-code that operation
3813 (either a division, modulus, or shift). Finally, check for the smallest
3814 mode for which we can do the operation with a library call. */
3815
3816 /* We might want to refine this now that we have division-by-constant
3817 optimization. Since expand_mult_highpart tries so many variants, it is
3818 not straightforward to generalize this. Maybe we should make an array
3819 of possible modes in init_expmed? Save this for GCC 2.7. */
3820
3821 optab1 = ((op1_is_pow2 && op1 != const0_rtx)
3822 ? (unsignedp ? lshr_optab : ashr_optab)
3823 : (unsignedp ? udiv_optab : sdiv_optab));
3824 optab2 = ((op1_is_pow2 && op1 != const0_rtx)
3825 ? optab1
3826 : (unsignedp ? udivmod_optab : sdivmod_optab));
3827
3828 for (compute_mode = mode; compute_mode != VOIDmode;
3829 compute_mode = GET_MODE_WIDER_MODE (compute_mode))
3830 if (optab_handler (optab1, compute_mode) != CODE_FOR_nothing
3831 || optab_handler (optab2, compute_mode) != CODE_FOR_nothing)
3832 break;
3833
3834 if (compute_mode == VOIDmode)
3835 for (compute_mode = mode; compute_mode != VOIDmode;
3836 compute_mode = GET_MODE_WIDER_MODE (compute_mode))
3837 if (optab_libfunc (optab1, compute_mode)
3838 || optab_libfunc (optab2, compute_mode))
3839 break;
3840
3841 /* If we still couldn't find a mode, use MODE, but expand_binop will
3842 probably die. */
3843 if (compute_mode == VOIDmode)
3844 compute_mode = mode;
3845
3846 if (target && GET_MODE (target) == compute_mode)
3847 tquotient = target;
3848 else
3849 tquotient = gen_reg_rtx (compute_mode);
3850
3851 size = GET_MODE_BITSIZE (compute_mode);
3852 #if 0
3853 /* It should be possible to restrict the precision to GET_MODE_BITSIZE
3854 (mode), and thereby get better code when OP1 is a constant. Do that
3855 later. It will require going over all usages of SIZE below. */
3856 size = GET_MODE_BITSIZE (mode);
3857 #endif
3858
3859 /* Only deduct something for a REM if the last divide done was
3860 for a different constant. Then set the constant of the last
3861 divide. */
3862 max_cost = unsignedp ? udiv_cost[speed][compute_mode] : sdiv_cost[speed][compute_mode];
3863 if (rem_flag && ! (last_div_const != 0 && op1_is_constant
3864 && INTVAL (op1) == last_div_const))
3865 max_cost -= mul_cost[speed][compute_mode] + add_cost[speed][compute_mode];
3866
3867 last_div_const = ! rem_flag && op1_is_constant ? INTVAL (op1) : 0;
3868
3869 /* Now convert to the best mode to use. */
3870 if (compute_mode != mode)
3871 {
3872 op0 = convert_modes (compute_mode, mode, op0, unsignedp);
3873 op1 = convert_modes (compute_mode, mode, op1, unsignedp);
3874
3875 /* convert_modes may have placed op1 into a register, so we
3876 must recompute the following. */
3877 op1_is_constant = CONST_INT_P (op1);
3878 op1_is_pow2 = (op1_is_constant
3879 && ((EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
3880 || (! unsignedp
3881 && EXACT_POWER_OF_2_OR_ZERO_P (-INTVAL (op1)))))) ;
3882 }
3883
3884 /* If one of the operands is a volatile MEM, copy it into a register. */
3885
3886 if (MEM_P (op0) && MEM_VOLATILE_P (op0))
3887 op0 = force_reg (compute_mode, op0);
3888 if (MEM_P (op1) && MEM_VOLATILE_P (op1))
3889 op1 = force_reg (compute_mode, op1);
3890
3891 /* If we need the remainder or if OP1 is constant, we need to
3892 put OP0 in a register in case it has any queued subexpressions. */
3893 if (rem_flag || op1_is_constant)
3894 op0 = force_reg (compute_mode, op0);
3895
3896 last = get_last_insn ();
3897
3898 /* Promote floor rounding to trunc rounding for unsigned operations. */
3899 if (unsignedp)
3900 {
3901 if (code == FLOOR_DIV_EXPR)
3902 code = TRUNC_DIV_EXPR;
3903 if (code == FLOOR_MOD_EXPR)
3904 code = TRUNC_MOD_EXPR;
3905 if (code == EXACT_DIV_EXPR && op1_is_pow2)
3906 code = TRUNC_DIV_EXPR;
3907 }
3908
3909 if (op1 != const0_rtx)
3910 switch (code)
3911 {
3912 case TRUNC_MOD_EXPR:
3913 case TRUNC_DIV_EXPR:
3914 if (op1_is_constant)
3915 {
3916 if (unsignedp)
3917 {
3918 unsigned HOST_WIDE_INT mh;
3919 int pre_shift, post_shift;
3920 int dummy;
3921 rtx ml;
3922 unsigned HOST_WIDE_INT d = (INTVAL (op1)
3923 & GET_MODE_MASK (compute_mode));
3924
3925 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
3926 {
3927 pre_shift = floor_log2 (d);
3928 if (rem_flag)
3929 {
3930 remainder
3931 = expand_binop (compute_mode, and_optab, op0,
3932 GEN_INT (((HOST_WIDE_INT) 1 << pre_shift) - 1),
3933 remainder, 1,
3934 OPTAB_LIB_WIDEN);
3935 if (remainder)
3936 return gen_lowpart (mode, remainder);
3937 }
3938 quotient = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3939 pre_shift, tquotient, 1);
3940 }
3941 else if (size <= HOST_BITS_PER_WIDE_INT)
3942 {
3943 if (d >= ((unsigned HOST_WIDE_INT) 1 << (size - 1)))
3944 {
3945 /* Most significant bit of divisor is set; emit an scc
3946 insn. */
3947 quotient = emit_store_flag_force (tquotient, GEU, op0, op1,
3948 compute_mode, 1, 1);
3949 }
3950 else
3951 {
3952 /* Find a suitable multiplier and right shift count
3953 instead of multiplying with D. */
3954
3955 mh = choose_multiplier (d, size, size,
3956 &ml, &post_shift, &dummy);
3957
3958 /* If the suggested multiplier is more than SIZE bits,
3959 we can do better for even divisors, using an
3960 initial right shift. */
3961 if (mh != 0 && (d & 1) == 0)
3962 {
3963 pre_shift = floor_log2 (d & -d);
3964 mh = choose_multiplier (d >> pre_shift, size,
3965 size - pre_shift,
3966 &ml, &post_shift, &dummy);
3967 gcc_assert (!mh);
3968 }
3969 else
3970 pre_shift = 0;
3971
3972 if (mh != 0)
3973 {
3974 rtx t1, t2, t3, t4;
3975
3976 if (post_shift - 1 >= BITS_PER_WORD)
3977 goto fail1;
3978
3979 extra_cost
3980 = (shift_cost[speed][compute_mode][post_shift - 1]
3981 + shift_cost[speed][compute_mode][1]
3982 + 2 * add_cost[speed][compute_mode]);
3983 t1 = expand_mult_highpart (compute_mode, op0, ml,
3984 NULL_RTX, 1,
3985 max_cost - extra_cost);
3986 if (t1 == 0)
3987 goto fail1;
3988 t2 = force_operand (gen_rtx_MINUS (compute_mode,
3989 op0, t1),
3990 NULL_RTX);
3991 t3 = expand_shift (RSHIFT_EXPR, compute_mode,
3992 t2, 1, NULL_RTX, 1);
3993 t4 = force_operand (gen_rtx_PLUS (compute_mode,
3994 t1, t3),
3995 NULL_RTX);
3996 quotient = expand_shift
3997 (RSHIFT_EXPR, compute_mode, t4,
3998 post_shift - 1, tquotient, 1);
3999 }
4000 else
4001 {
4002 rtx t1, t2;
4003
4004 if (pre_shift >= BITS_PER_WORD
4005 || post_shift >= BITS_PER_WORD)
4006 goto fail1;
4007
4008 t1 = expand_shift
4009 (RSHIFT_EXPR, compute_mode, op0,
4010 pre_shift, NULL_RTX, 1);
4011 extra_cost
4012 = (shift_cost[speed][compute_mode][pre_shift]
4013 + shift_cost[speed][compute_mode][post_shift]);
4014 t2 = expand_mult_highpart (compute_mode, t1, ml,
4015 NULL_RTX, 1,
4016 max_cost - extra_cost);
4017 if (t2 == 0)
4018 goto fail1;
4019 quotient = expand_shift
4020 (RSHIFT_EXPR, compute_mode, t2,
4021 post_shift, tquotient, 1);
4022 }
4023 }
4024 }
4025 else /* Too wide mode to use tricky code */
4026 break;
4027
4028 insn = get_last_insn ();
4029 if (insn != last
4030 && (set = single_set (insn)) != 0
4031 && SET_DEST (set) == quotient)
4032 set_unique_reg_note (insn,
4033 REG_EQUAL,
4034 gen_rtx_UDIV (compute_mode, op0, op1));
4035 }
4036 else /* TRUNC_DIV, signed */
4037 {
4038 unsigned HOST_WIDE_INT ml;
4039 int lgup, post_shift;
4040 rtx mlr;
4041 HOST_WIDE_INT d = INTVAL (op1);
4042 unsigned HOST_WIDE_INT abs_d;
4043
4044 /* Since d might be INT_MIN, we have to cast to
4045 unsigned HOST_WIDE_INT before negating to avoid
4046 undefined signed overflow. */
4047 abs_d = (d >= 0
4048 ? (unsigned HOST_WIDE_INT) d
4049 : - (unsigned HOST_WIDE_INT) d);
4050
4051 /* n rem d = n rem -d */
4052 if (rem_flag && d < 0)
4053 {
4054 d = abs_d;
4055 op1 = gen_int_mode (abs_d, compute_mode);
4056 }
4057
4058 if (d == 1)
4059 quotient = op0;
4060 else if (d == -1)
4061 quotient = expand_unop (compute_mode, neg_optab, op0,
4062 tquotient, 0);
4063 else if (HOST_BITS_PER_WIDE_INT >= size
4064 && abs_d == (unsigned HOST_WIDE_INT) 1 << (size - 1))
4065 {
4066 /* This case is not handled correctly below. */
4067 quotient = emit_store_flag (tquotient, EQ, op0, op1,
4068 compute_mode, 1, 1);
4069 if (quotient == 0)
4070 goto fail1;
4071 }
4072 else if (EXACT_POWER_OF_2_OR_ZERO_P (d)
4073 && (rem_flag ? smod_pow2_cheap[speed][compute_mode]
4074 : sdiv_pow2_cheap[speed][compute_mode])
4075 /* We assume that cheap metric is true if the
4076 optab has an expander for this mode. */
4077 && ((optab_handler ((rem_flag ? smod_optab
4078 : sdiv_optab),
4079 compute_mode)
4080 != CODE_FOR_nothing)
4081 || (optab_handler (sdivmod_optab,
4082 compute_mode)
4083 != CODE_FOR_nothing)))
4084 ;
4085 else if (EXACT_POWER_OF_2_OR_ZERO_P (abs_d))
4086 {
4087 if (rem_flag)
4088 {
4089 remainder = expand_smod_pow2 (compute_mode, op0, d);
4090 if (remainder)
4091 return gen_lowpart (mode, remainder);
4092 }
4093
4094 if (sdiv_pow2_cheap[speed][compute_mode]
4095 && ((optab_handler (sdiv_optab, compute_mode)
4096 != CODE_FOR_nothing)
4097 || (optab_handler (sdivmod_optab, compute_mode)
4098 != CODE_FOR_nothing)))
4099 quotient = expand_divmod (0, TRUNC_DIV_EXPR,
4100 compute_mode, op0,
4101 gen_int_mode (abs_d,
4102 compute_mode),
4103 NULL_RTX, 0);
4104 else
4105 quotient = expand_sdiv_pow2 (compute_mode, op0, abs_d);
4106
4107 /* We have computed OP0 / abs(OP1). If OP1 is negative,
4108 negate the quotient. */
4109 if (d < 0)
4110 {
4111 insn = get_last_insn ();
4112 if (insn != last
4113 && (set = single_set (insn)) != 0
4114 && SET_DEST (set) == quotient
4115 && abs_d < ((unsigned HOST_WIDE_INT) 1
4116 << (HOST_BITS_PER_WIDE_INT - 1)))
4117 set_unique_reg_note (insn,
4118 REG_EQUAL,
4119 gen_rtx_DIV (compute_mode,
4120 op0,
4121 GEN_INT
4122 (trunc_int_for_mode
4123 (abs_d,
4124 compute_mode))));
4125
4126 quotient = expand_unop (compute_mode, neg_optab,
4127 quotient, quotient, 0);
4128 }
4129 }
4130 else if (size <= HOST_BITS_PER_WIDE_INT)
4131 {
4132 choose_multiplier (abs_d, size, size - 1,
4133 &mlr, &post_shift, &lgup);
4134 ml = (unsigned HOST_WIDE_INT) INTVAL (mlr);
4135 if (ml < (unsigned HOST_WIDE_INT) 1 << (size - 1))
4136 {
4137 rtx t1, t2, t3;
4138
4139 if (post_shift >= BITS_PER_WORD
4140 || size - 1 >= BITS_PER_WORD)
4141 goto fail1;
4142
4143 extra_cost = (shift_cost[speed][compute_mode][post_shift]
4144 + shift_cost[speed][compute_mode][size - 1]
4145 + add_cost[speed][compute_mode]);
4146 t1 = expand_mult_highpart (compute_mode, op0, mlr,
4147 NULL_RTX, 0,
4148 max_cost - extra_cost);
4149 if (t1 == 0)
4150 goto fail1;
4151 t2 = expand_shift
4152 (RSHIFT_EXPR, compute_mode, t1,
4153 post_shift, NULL_RTX, 0);
4154 t3 = expand_shift
4155 (RSHIFT_EXPR, compute_mode, op0,
4156 size - 1, NULL_RTX, 0);
4157 if (d < 0)
4158 quotient
4159 = force_operand (gen_rtx_MINUS (compute_mode,
4160 t3, t2),
4161 tquotient);
4162 else
4163 quotient
4164 = force_operand (gen_rtx_MINUS (compute_mode,
4165 t2, t3),
4166 tquotient);
4167 }
4168 else
4169 {
4170 rtx t1, t2, t3, t4;
4171
4172 if (post_shift >= BITS_PER_WORD
4173 || size - 1 >= BITS_PER_WORD)
4174 goto fail1;
4175
4176 ml |= (~(unsigned HOST_WIDE_INT) 0) << (size - 1);
4177 mlr = gen_int_mode (ml, compute_mode);
4178 extra_cost = (shift_cost[speed][compute_mode][post_shift]
4179 + shift_cost[speed][compute_mode][size - 1]
4180 + 2 * add_cost[speed][compute_mode]);
4181 t1 = expand_mult_highpart (compute_mode, op0, mlr,
4182 NULL_RTX, 0,
4183 max_cost - extra_cost);
4184 if (t1 == 0)
4185 goto fail1;
4186 t2 = force_operand (gen_rtx_PLUS (compute_mode,
4187 t1, op0),
4188 NULL_RTX);
4189 t3 = expand_shift
4190 (RSHIFT_EXPR, compute_mode, t2,
4191 post_shift, NULL_RTX, 0);
4192 t4 = expand_shift
4193 (RSHIFT_EXPR, compute_mode, op0,
4194 size - 1, NULL_RTX, 0);
4195 if (d < 0)
4196 quotient
4197 = force_operand (gen_rtx_MINUS (compute_mode,
4198 t4, t3),
4199 tquotient);
4200 else
4201 quotient
4202 = force_operand (gen_rtx_MINUS (compute_mode,
4203 t3, t4),
4204 tquotient);
4205 }
4206 }
4207 else /* Too wide mode to use tricky code */
4208 break;
4209
4210 insn = get_last_insn ();
4211 if (insn != last
4212 && (set = single_set (insn)) != 0
4213 && SET_DEST (set) == quotient)
4214 set_unique_reg_note (insn,
4215 REG_EQUAL,
4216 gen_rtx_DIV (compute_mode, op0, op1));
4217 }
4218 break;
4219 }
4220 fail1:
4221 delete_insns_since (last);
4222 break;
4223
4224 case FLOOR_DIV_EXPR:
4225 case FLOOR_MOD_EXPR:
4226 /* We will come here only for signed operations. */
4227 if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4228 {
4229 unsigned HOST_WIDE_INT mh;
4230 int pre_shift, lgup, post_shift;
4231 HOST_WIDE_INT d = INTVAL (op1);
4232 rtx ml;
4233
4234 if (d > 0)
4235 {
4236 /* We could just as easily deal with negative constants here,
4237 but it does not seem worth the trouble for GCC 2.6. */
4238 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4239 {
4240 pre_shift = floor_log2 (d);
4241 if (rem_flag)
4242 {
4243 remainder = expand_binop (compute_mode, and_optab, op0,
4244 GEN_INT (((HOST_WIDE_INT) 1 << pre_shift) - 1),
4245 remainder, 0, OPTAB_LIB_WIDEN);
4246 if (remainder)
4247 return gen_lowpart (mode, remainder);
4248 }
4249 quotient = expand_shift
4250 (RSHIFT_EXPR, compute_mode, op0,
4251 pre_shift, tquotient, 0);
4252 }
4253 else
4254 {
4255 rtx t1, t2, t3, t4;
4256
4257 mh = choose_multiplier (d, size, size - 1,
4258 &ml, &post_shift, &lgup);
4259 gcc_assert (!mh);
4260
4261 if (post_shift < BITS_PER_WORD
4262 && size - 1 < BITS_PER_WORD)
4263 {
4264 t1 = expand_shift
4265 (RSHIFT_EXPR, compute_mode, op0,
4266 size - 1, NULL_RTX, 0);
4267 t2 = expand_binop (compute_mode, xor_optab, op0, t1,
4268 NULL_RTX, 0, OPTAB_WIDEN);
4269 extra_cost = (shift_cost[speed][compute_mode][post_shift]
4270 + shift_cost[speed][compute_mode][size - 1]
4271 + 2 * add_cost[speed][compute_mode]);
4272 t3 = expand_mult_highpart (compute_mode, t2, ml,
4273 NULL_RTX, 1,
4274 max_cost - extra_cost);
4275 if (t3 != 0)
4276 {
4277 t4 = expand_shift
4278 (RSHIFT_EXPR, compute_mode, t3,
4279 post_shift, NULL_RTX, 1);
4280 quotient = expand_binop (compute_mode, xor_optab,
4281 t4, t1, tquotient, 0,
4282 OPTAB_WIDEN);
4283 }
4284 }
4285 }
4286 }
4287 else
4288 {
4289 rtx nsign, t1, t2, t3, t4;
4290 t1 = force_operand (gen_rtx_PLUS (compute_mode,
4291 op0, constm1_rtx), NULL_RTX);
4292 t2 = expand_binop (compute_mode, ior_optab, op0, t1, NULL_RTX,
4293 0, OPTAB_WIDEN);
4294 nsign = expand_shift
4295 (RSHIFT_EXPR, compute_mode, t2,
4296 size - 1, NULL_RTX, 0);
4297 t3 = force_operand (gen_rtx_MINUS (compute_mode, t1, nsign),
4298 NULL_RTX);
4299 t4 = expand_divmod (0, TRUNC_DIV_EXPR, compute_mode, t3, op1,
4300 NULL_RTX, 0);
4301 if (t4)
4302 {
4303 rtx t5;
4304 t5 = expand_unop (compute_mode, one_cmpl_optab, nsign,
4305 NULL_RTX, 0);
4306 quotient = force_operand (gen_rtx_PLUS (compute_mode,
4307 t4, t5),
4308 tquotient);
4309 }
4310 }
4311 }
4312
4313 if (quotient != 0)
4314 break;
4315 delete_insns_since (last);
4316
4317 /* Try using an instruction that produces both the quotient and
4318 remainder, using truncation. We can easily compensate the quotient
4319 or remainder to get floor rounding, once we have the remainder.
4320 Notice that we compute also the final remainder value here,
4321 and return the result right away. */
4322 if (target == 0 || GET_MODE (target) != compute_mode)
4323 target = gen_reg_rtx (compute_mode);
4324
4325 if (rem_flag)
4326 {
4327 remainder
4328 = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4329 quotient = gen_reg_rtx (compute_mode);
4330 }
4331 else
4332 {
4333 quotient
4334 = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4335 remainder = gen_reg_rtx (compute_mode);
4336 }
4337
4338 if (expand_twoval_binop (sdivmod_optab, op0, op1,
4339 quotient, remainder, 0))
4340 {
4341 /* This could be computed with a branch-less sequence.
4342 Save that for later. */
4343 rtx tem;
4344 rtx label = gen_label_rtx ();
4345 do_cmp_and_jump (remainder, const0_rtx, EQ, compute_mode, label);
4346 tem = expand_binop (compute_mode, xor_optab, op0, op1,
4347 NULL_RTX, 0, OPTAB_WIDEN);
4348 do_cmp_and_jump (tem, const0_rtx, GE, compute_mode, label);
4349 expand_dec (quotient, const1_rtx);
4350 expand_inc (remainder, op1);
4351 emit_label (label);
4352 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4353 }
4354
4355 /* No luck with division elimination or divmod. Have to do it
4356 by conditionally adjusting op0 *and* the result. */
4357 {
4358 rtx label1, label2, label3, label4, label5;
4359 rtx adjusted_op0;
4360 rtx tem;
4361
4362 quotient = gen_reg_rtx (compute_mode);
4363 adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4364 label1 = gen_label_rtx ();
4365 label2 = gen_label_rtx ();
4366 label3 = gen_label_rtx ();
4367 label4 = gen_label_rtx ();
4368 label5 = gen_label_rtx ();
4369 do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4370 do_cmp_and_jump (adjusted_op0, const0_rtx, LT, compute_mode, label1);
4371 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4372 quotient, 0, OPTAB_LIB_WIDEN);
4373 if (tem != quotient)
4374 emit_move_insn (quotient, tem);
4375 emit_jump_insn (gen_jump (label5));
4376 emit_barrier ();
4377 emit_label (label1);
4378 expand_inc (adjusted_op0, const1_rtx);
4379 emit_jump_insn (gen_jump (label4));
4380 emit_barrier ();
4381 emit_label (label2);
4382 do_cmp_and_jump (adjusted_op0, const0_rtx, GT, compute_mode, label3);
4383 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4384 quotient, 0, OPTAB_LIB_WIDEN);
4385 if (tem != quotient)
4386 emit_move_insn (quotient, tem);
4387 emit_jump_insn (gen_jump (label5));
4388 emit_barrier ();
4389 emit_label (label3);
4390 expand_dec (adjusted_op0, const1_rtx);
4391 emit_label (label4);
4392 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4393 quotient, 0, OPTAB_LIB_WIDEN);
4394 if (tem != quotient)
4395 emit_move_insn (quotient, tem);
4396 expand_dec (quotient, const1_rtx);
4397 emit_label (label5);
4398 }
4399 break;
4400
4401 case CEIL_DIV_EXPR:
4402 case CEIL_MOD_EXPR:
4403 if (unsignedp)
4404 {
4405 if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1)))
4406 {
4407 rtx t1, t2, t3;
4408 unsigned HOST_WIDE_INT d = INTVAL (op1);
4409 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4410 floor_log2 (d), tquotient, 1);
4411 t2 = expand_binop (compute_mode, and_optab, op0,
4412 GEN_INT (d - 1),
4413 NULL_RTX, 1, OPTAB_LIB_WIDEN);
4414 t3 = gen_reg_rtx (compute_mode);
4415 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4416 compute_mode, 1, 1);
4417 if (t3 == 0)
4418 {
4419 rtx lab;
4420 lab = gen_label_rtx ();
4421 do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4422 expand_inc (t1, const1_rtx);
4423 emit_label (lab);
4424 quotient = t1;
4425 }
4426 else
4427 quotient = force_operand (gen_rtx_PLUS (compute_mode,
4428 t1, t3),
4429 tquotient);
4430 break;
4431 }
4432
4433 /* Try using an instruction that produces both the quotient and
4434 remainder, using truncation. We can easily compensate the
4435 quotient or remainder to get ceiling rounding, once we have the
4436 remainder. Notice that we compute also the final remainder
4437 value here, and return the result right away. */
4438 if (target == 0 || GET_MODE (target) != compute_mode)
4439 target = gen_reg_rtx (compute_mode);
4440
4441 if (rem_flag)
4442 {
4443 remainder = (REG_P (target)
4444 ? target : gen_reg_rtx (compute_mode));
4445 quotient = gen_reg_rtx (compute_mode);
4446 }
4447 else
4448 {
4449 quotient = (REG_P (target)
4450 ? target : gen_reg_rtx (compute_mode));
4451 remainder = gen_reg_rtx (compute_mode);
4452 }
4453
4454 if (expand_twoval_binop (udivmod_optab, op0, op1, quotient,
4455 remainder, 1))
4456 {
4457 /* This could be computed with a branch-less sequence.
4458 Save that for later. */
4459 rtx label = gen_label_rtx ();
4460 do_cmp_and_jump (remainder, const0_rtx, EQ,
4461 compute_mode, label);
4462 expand_inc (quotient, const1_rtx);
4463 expand_dec (remainder, op1);
4464 emit_label (label);
4465 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4466 }
4467
4468 /* No luck with division elimination or divmod. Have to do it
4469 by conditionally adjusting op0 *and* the result. */
4470 {
4471 rtx label1, label2;
4472 rtx adjusted_op0, tem;
4473
4474 quotient = gen_reg_rtx (compute_mode);
4475 adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4476 label1 = gen_label_rtx ();
4477 label2 = gen_label_rtx ();
4478 do_cmp_and_jump (adjusted_op0, const0_rtx, NE,
4479 compute_mode, label1);
4480 emit_move_insn (quotient, const0_rtx);
4481 emit_jump_insn (gen_jump (label2));
4482 emit_barrier ();
4483 emit_label (label1);
4484 expand_dec (adjusted_op0, const1_rtx);
4485 tem = expand_binop (compute_mode, udiv_optab, adjusted_op0, op1,
4486 quotient, 1, OPTAB_LIB_WIDEN);
4487 if (tem != quotient)
4488 emit_move_insn (quotient, tem);
4489 expand_inc (quotient, const1_rtx);
4490 emit_label (label2);
4491 }
4492 }
4493 else /* signed */
4494 {
4495 if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4496 && INTVAL (op1) >= 0)
4497 {
4498 /* This is extremely similar to the code for the unsigned case
4499 above. For 2.7 we should merge these variants, but for
4500 2.6.1 I don't want to touch the code for unsigned since that
4501 get used in C. The signed case will only be used by other
4502 languages (Ada). */
4503
4504 rtx t1, t2, t3;
4505 unsigned HOST_WIDE_INT d = INTVAL (op1);
4506 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4507 floor_log2 (d), tquotient, 0);
4508 t2 = expand_binop (compute_mode, and_optab, op0,
4509 GEN_INT (d - 1),
4510 NULL_RTX, 1, OPTAB_LIB_WIDEN);
4511 t3 = gen_reg_rtx (compute_mode);
4512 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4513 compute_mode, 1, 1);
4514 if (t3 == 0)
4515 {
4516 rtx lab;
4517 lab = gen_label_rtx ();
4518 do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4519 expand_inc (t1, const1_rtx);
4520 emit_label (lab);
4521 quotient = t1;
4522 }
4523 else
4524 quotient = force_operand (gen_rtx_PLUS (compute_mode,
4525 t1, t3),
4526 tquotient);
4527 break;
4528 }
4529
4530 /* Try using an instruction that produces both the quotient and
4531 remainder, using truncation. We can easily compensate the
4532 quotient or remainder to get ceiling rounding, once we have the
4533 remainder. Notice that we compute also the final remainder
4534 value here, and return the result right away. */
4535 if (target == 0 || GET_MODE (target) != compute_mode)
4536 target = gen_reg_rtx (compute_mode);
4537 if (rem_flag)
4538 {
4539 remainder= (REG_P (target)
4540 ? target : gen_reg_rtx (compute_mode));
4541 quotient = gen_reg_rtx (compute_mode);
4542 }
4543 else
4544 {
4545 quotient = (REG_P (target)
4546 ? target : gen_reg_rtx (compute_mode));
4547 remainder = gen_reg_rtx (compute_mode);
4548 }
4549
4550 if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient,
4551 remainder, 0))
4552 {
4553 /* This could be computed with a branch-less sequence.
4554 Save that for later. */
4555 rtx tem;
4556 rtx label = gen_label_rtx ();
4557 do_cmp_and_jump (remainder, const0_rtx, EQ,
4558 compute_mode, label);
4559 tem = expand_binop (compute_mode, xor_optab, op0, op1,
4560 NULL_RTX, 0, OPTAB_WIDEN);
4561 do_cmp_and_jump (tem, const0_rtx, LT, compute_mode, label);
4562 expand_inc (quotient, const1_rtx);
4563 expand_dec (remainder, op1);
4564 emit_label (label);
4565 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4566 }
4567
4568 /* No luck with division elimination or divmod. Have to do it
4569 by conditionally adjusting op0 *and* the result. */
4570 {
4571 rtx label1, label2, label3, label4, label5;
4572 rtx adjusted_op0;
4573 rtx tem;
4574
4575 quotient = gen_reg_rtx (compute_mode);
4576 adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4577 label1 = gen_label_rtx ();
4578 label2 = gen_label_rtx ();
4579 label3 = gen_label_rtx ();
4580 label4 = gen_label_rtx ();
4581 label5 = gen_label_rtx ();
4582 do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4583 do_cmp_and_jump (adjusted_op0, const0_rtx, GT,
4584 compute_mode, label1);
4585 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4586 quotient, 0, OPTAB_LIB_WIDEN);
4587 if (tem != quotient)
4588 emit_move_insn (quotient, tem);
4589 emit_jump_insn (gen_jump (label5));
4590 emit_barrier ();
4591 emit_label (label1);
4592 expand_dec (adjusted_op0, const1_rtx);
4593 emit_jump_insn (gen_jump (label4));
4594 emit_barrier ();
4595 emit_label (label2);
4596 do_cmp_and_jump (adjusted_op0, const0_rtx, LT,
4597 compute_mode, label3);
4598 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4599 quotient, 0, OPTAB_LIB_WIDEN);
4600 if (tem != quotient)
4601 emit_move_insn (quotient, tem);
4602 emit_jump_insn (gen_jump (label5));
4603 emit_barrier ();
4604 emit_label (label3);
4605 expand_inc (adjusted_op0, const1_rtx);
4606 emit_label (label4);
4607 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4608 quotient, 0, OPTAB_LIB_WIDEN);
4609 if (tem != quotient)
4610 emit_move_insn (quotient, tem);
4611 expand_inc (quotient, const1_rtx);
4612 emit_label (label5);
4613 }
4614 }
4615 break;
4616
4617 case EXACT_DIV_EXPR:
4618 if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4619 {
4620 HOST_WIDE_INT d = INTVAL (op1);
4621 unsigned HOST_WIDE_INT ml;
4622 int pre_shift;
4623 rtx t1;
4624
4625 pre_shift = floor_log2 (d & -d);
4626 ml = invert_mod2n (d >> pre_shift, size);
4627 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4628 pre_shift, NULL_RTX, unsignedp);
4629 quotient = expand_mult (compute_mode, t1,
4630 gen_int_mode (ml, compute_mode),
4631 NULL_RTX, 1);
4632
4633 insn = get_last_insn ();
4634 set_unique_reg_note (insn,
4635 REG_EQUAL,
4636 gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
4637 compute_mode,
4638 op0, op1));
4639 }
4640 break;
4641
4642 case ROUND_DIV_EXPR:
4643 case ROUND_MOD_EXPR:
4644 if (unsignedp)
4645 {
4646 rtx tem;
4647 rtx label;
4648 label = gen_label_rtx ();
4649 quotient = gen_reg_rtx (compute_mode);
4650 remainder = gen_reg_rtx (compute_mode);
4651 if (expand_twoval_binop (udivmod_optab, op0, op1, quotient, remainder, 1) == 0)
4652 {
4653 rtx tem;
4654 quotient = expand_binop (compute_mode, udiv_optab, op0, op1,
4655 quotient, 1, OPTAB_LIB_WIDEN);
4656 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 1);
4657 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4658 remainder, 1, OPTAB_LIB_WIDEN);
4659 }
4660 tem = plus_constant (op1, -1);
4661 tem = expand_shift (RSHIFT_EXPR, compute_mode, tem, 1, NULL_RTX, 1);
4662 do_cmp_and_jump (remainder, tem, LEU, compute_mode, label);
4663 expand_inc (quotient, const1_rtx);
4664 expand_dec (remainder, op1);
4665 emit_label (label);
4666 }
4667 else
4668 {
4669 rtx abs_rem, abs_op1, tem, mask;
4670 rtx label;
4671 label = gen_label_rtx ();
4672 quotient = gen_reg_rtx (compute_mode);
4673 remainder = gen_reg_rtx (compute_mode);
4674 if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient, remainder, 0) == 0)
4675 {
4676 rtx tem;
4677 quotient = expand_binop (compute_mode, sdiv_optab, op0, op1,
4678 quotient, 0, OPTAB_LIB_WIDEN);
4679 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 0);
4680 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4681 remainder, 0, OPTAB_LIB_WIDEN);
4682 }
4683 abs_rem = expand_abs (compute_mode, remainder, NULL_RTX, 1, 0);
4684 abs_op1 = expand_abs (compute_mode, op1, NULL_RTX, 1, 0);
4685 tem = expand_shift (LSHIFT_EXPR, compute_mode, abs_rem,
4686 1, NULL_RTX, 1);
4687 do_cmp_and_jump (tem, abs_op1, LTU, compute_mode, label);
4688 tem = expand_binop (compute_mode, xor_optab, op0, op1,
4689 NULL_RTX, 0, OPTAB_WIDEN);
4690 mask = expand_shift (RSHIFT_EXPR, compute_mode, tem,
4691 size - 1, NULL_RTX, 0);
4692 tem = expand_binop (compute_mode, xor_optab, mask, const1_rtx,
4693 NULL_RTX, 0, OPTAB_WIDEN);
4694 tem = expand_binop (compute_mode, sub_optab, tem, mask,
4695 NULL_RTX, 0, OPTAB_WIDEN);
4696 expand_inc (quotient, tem);
4697 tem = expand_binop (compute_mode, xor_optab, mask, op1,
4698 NULL_RTX, 0, OPTAB_WIDEN);
4699 tem = expand_binop (compute_mode, sub_optab, tem, mask,
4700 NULL_RTX, 0, OPTAB_WIDEN);
4701 expand_dec (remainder, tem);
4702 emit_label (label);
4703 }
4704 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4705
4706 default:
4707 gcc_unreachable ();
4708 }
4709
4710 if (quotient == 0)
4711 {
4712 if (target && GET_MODE (target) != compute_mode)
4713 target = 0;
4714
4715 if (rem_flag)
4716 {
4717 /* Try to produce the remainder without producing the quotient.
4718 If we seem to have a divmod pattern that does not require widening,
4719 don't try widening here. We should really have a WIDEN argument
4720 to expand_twoval_binop, since what we'd really like to do here is
4721 1) try a mod insn in compute_mode
4722 2) try a divmod insn in compute_mode
4723 3) try a div insn in compute_mode and multiply-subtract to get
4724 remainder
4725 4) try the same things with widening allowed. */
4726 remainder
4727 = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4728 op0, op1, target,
4729 unsignedp,
4730 ((optab_handler (optab2, compute_mode)
4731 != CODE_FOR_nothing)
4732 ? OPTAB_DIRECT : OPTAB_WIDEN));
4733 if (remainder == 0)
4734 {
4735 /* No luck there. Can we do remainder and divide at once
4736 without a library call? */
4737 remainder = gen_reg_rtx (compute_mode);
4738 if (! expand_twoval_binop ((unsignedp
4739 ? udivmod_optab
4740 : sdivmod_optab),
4741 op0, op1,
4742 NULL_RTX, remainder, unsignedp))
4743 remainder = 0;
4744 }
4745
4746 if (remainder)
4747 return gen_lowpart (mode, remainder);
4748 }
4749
4750 /* Produce the quotient. Try a quotient insn, but not a library call.
4751 If we have a divmod in this mode, use it in preference to widening
4752 the div (for this test we assume it will not fail). Note that optab2
4753 is set to the one of the two optabs that the call below will use. */
4754 quotient
4755 = sign_expand_binop (compute_mode, udiv_optab, sdiv_optab,
4756 op0, op1, rem_flag ? NULL_RTX : target,
4757 unsignedp,
4758 ((optab_handler (optab2, compute_mode)
4759 != CODE_FOR_nothing)
4760 ? OPTAB_DIRECT : OPTAB_WIDEN));
4761
4762 if (quotient == 0)
4763 {
4764 /* No luck there. Try a quotient-and-remainder insn,
4765 keeping the quotient alone. */
4766 quotient = gen_reg_rtx (compute_mode);
4767 if (! expand_twoval_binop (unsignedp ? udivmod_optab : sdivmod_optab,
4768 op0, op1,
4769 quotient, NULL_RTX, unsignedp))
4770 {
4771 quotient = 0;
4772 if (! rem_flag)
4773 /* Still no luck. If we are not computing the remainder,
4774 use a library call for the quotient. */
4775 quotient = sign_expand_binop (compute_mode,
4776 udiv_optab, sdiv_optab,
4777 op0, op1, target,
4778 unsignedp, OPTAB_LIB_WIDEN);
4779 }
4780 }
4781 }
4782
4783 if (rem_flag)
4784 {
4785 if (target && GET_MODE (target) != compute_mode)
4786 target = 0;
4787
4788 if (quotient == 0)
4789 {
4790 /* No divide instruction either. Use library for remainder. */
4791 remainder = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4792 op0, op1, target,
4793 unsignedp, OPTAB_LIB_WIDEN);
4794 /* No remainder function. Try a quotient-and-remainder
4795 function, keeping the remainder. */
4796 if (!remainder)
4797 {
4798 remainder = gen_reg_rtx (compute_mode);
4799 if (!expand_twoval_binop_libfunc
4800 (unsignedp ? udivmod_optab : sdivmod_optab,
4801 op0, op1,
4802 NULL_RTX, remainder,
4803 unsignedp ? UMOD : MOD))
4804 remainder = NULL_RTX;
4805 }
4806 }
4807 else
4808 {
4809 /* We divided. Now finish doing X - Y * (X / Y). */
4810 remainder = expand_mult (compute_mode, quotient, op1,
4811 NULL_RTX, unsignedp);
4812 remainder = expand_binop (compute_mode, sub_optab, op0,
4813 remainder, target, unsignedp,
4814 OPTAB_LIB_WIDEN);
4815 }
4816 }
4817
4818 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4819 }
4820 \f
4821 /* Return a tree node with data type TYPE, describing the value of X.
4822 Usually this is an VAR_DECL, if there is no obvious better choice.
4823 X may be an expression, however we only support those expressions
4824 generated by loop.c. */
4825
4826 tree
4827 make_tree (tree type, rtx x)
4828 {
4829 tree t;
4830
4831 switch (GET_CODE (x))
4832 {
4833 case CONST_INT:
4834 {
4835 HOST_WIDE_INT hi = 0;
4836
4837 if (INTVAL (x) < 0
4838 && !(TYPE_UNSIGNED (type)
4839 && (GET_MODE_BITSIZE (TYPE_MODE (type))
4840 < HOST_BITS_PER_WIDE_INT)))
4841 hi = -1;
4842
4843 t = build_int_cst_wide (type, INTVAL (x), hi);
4844
4845 return t;
4846 }
4847
4848 case CONST_DOUBLE:
4849 if (GET_MODE (x) == VOIDmode)
4850 t = build_int_cst_wide (type,
4851 CONST_DOUBLE_LOW (x), CONST_DOUBLE_HIGH (x));
4852 else
4853 {
4854 REAL_VALUE_TYPE d;
4855
4856 REAL_VALUE_FROM_CONST_DOUBLE (d, x);
4857 t = build_real (type, d);
4858 }
4859
4860 return t;
4861
4862 case CONST_VECTOR:
4863 {
4864 int units = CONST_VECTOR_NUNITS (x);
4865 tree itype = TREE_TYPE (type);
4866 tree t = NULL_TREE;
4867 int i;
4868
4869
4870 /* Build a tree with vector elements. */
4871 for (i = units - 1; i >= 0; --i)
4872 {
4873 rtx elt = CONST_VECTOR_ELT (x, i);
4874 t = tree_cons (NULL_TREE, make_tree (itype, elt), t);
4875 }
4876
4877 return build_vector (type, t);
4878 }
4879
4880 case PLUS:
4881 return fold_build2 (PLUS_EXPR, type, make_tree (type, XEXP (x, 0)),
4882 make_tree (type, XEXP (x, 1)));
4883
4884 case MINUS:
4885 return fold_build2 (MINUS_EXPR, type, make_tree (type, XEXP (x, 0)),
4886 make_tree (type, XEXP (x, 1)));
4887
4888 case NEG:
4889 return fold_build1 (NEGATE_EXPR, type, make_tree (type, XEXP (x, 0)));
4890
4891 case MULT:
4892 return fold_build2 (MULT_EXPR, type, make_tree (type, XEXP (x, 0)),
4893 make_tree (type, XEXP (x, 1)));
4894
4895 case ASHIFT:
4896 return fold_build2 (LSHIFT_EXPR, type, make_tree (type, XEXP (x, 0)),
4897 make_tree (type, XEXP (x, 1)));
4898
4899 case LSHIFTRT:
4900 t = unsigned_type_for (type);
4901 return fold_convert (type, build2 (RSHIFT_EXPR, t,
4902 make_tree (t, XEXP (x, 0)),
4903 make_tree (type, XEXP (x, 1))));
4904
4905 case ASHIFTRT:
4906 t = signed_type_for (type);
4907 return fold_convert (type, build2 (RSHIFT_EXPR, t,
4908 make_tree (t, XEXP (x, 0)),
4909 make_tree (type, XEXP (x, 1))));
4910
4911 case DIV:
4912 if (TREE_CODE (type) != REAL_TYPE)
4913 t = signed_type_for (type);
4914 else
4915 t = type;
4916
4917 return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
4918 make_tree (t, XEXP (x, 0)),
4919 make_tree (t, XEXP (x, 1))));
4920 case UDIV:
4921 t = unsigned_type_for (type);
4922 return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
4923 make_tree (t, XEXP (x, 0)),
4924 make_tree (t, XEXP (x, 1))));
4925
4926 case SIGN_EXTEND:
4927 case ZERO_EXTEND:
4928 t = lang_hooks.types.type_for_mode (GET_MODE (XEXP (x, 0)),
4929 GET_CODE (x) == ZERO_EXTEND);
4930 return fold_convert (type, make_tree (t, XEXP (x, 0)));
4931
4932 case CONST:
4933 return make_tree (type, XEXP (x, 0));
4934
4935 case SYMBOL_REF:
4936 t = SYMBOL_REF_DECL (x);
4937 if (t)
4938 return fold_convert (type, build_fold_addr_expr (t));
4939 /* else fall through. */
4940
4941 default:
4942 t = build_decl (RTL_LOCATION (x), VAR_DECL, NULL_TREE, type);
4943
4944 /* If TYPE is a POINTER_TYPE, we might need to convert X from
4945 address mode to pointer mode. */
4946 if (POINTER_TYPE_P (type))
4947 x = convert_memory_address_addr_space
4948 (TYPE_MODE (type), x, TYPE_ADDR_SPACE (TREE_TYPE (type)));
4949
4950 /* Note that we do *not* use SET_DECL_RTL here, because we do not
4951 want set_decl_rtl to go adjusting REG_ATTRS for this temporary. */
4952 t->decl_with_rtl.rtl = x;
4953
4954 return t;
4955 }
4956 }
4957 \f
4958 /* Compute the logical-and of OP0 and OP1, storing it in TARGET
4959 and returning TARGET.
4960
4961 If TARGET is 0, a pseudo-register or constant is returned. */
4962
4963 rtx
4964 expand_and (enum machine_mode mode, rtx op0, rtx op1, rtx target)
4965 {
4966 rtx tem = 0;
4967
4968 if (GET_MODE (op0) == VOIDmode && GET_MODE (op1) == VOIDmode)
4969 tem = simplify_binary_operation (AND, mode, op0, op1);
4970 if (tem == 0)
4971 tem = expand_binop (mode, and_optab, op0, op1, target, 0, OPTAB_LIB_WIDEN);
4972
4973 if (target == 0)
4974 target = tem;
4975 else if (tem != target)
4976 emit_move_insn (target, tem);
4977 return target;
4978 }
4979
4980 /* Helper function for emit_store_flag. */
4981 static rtx
4982 emit_cstore (rtx target, enum insn_code icode, enum rtx_code code,
4983 enum machine_mode mode, enum machine_mode compare_mode,
4984 int unsignedp, rtx x, rtx y, int normalizep,
4985 enum machine_mode target_mode)
4986 {
4987 struct expand_operand ops[4];
4988 rtx op0, last, comparison, subtarget;
4989 enum machine_mode result_mode = insn_data[(int) icode].operand[0].mode;
4990
4991 last = get_last_insn ();
4992 x = prepare_operand (icode, x, 2, mode, compare_mode, unsignedp);
4993 y = prepare_operand (icode, y, 3, mode, compare_mode, unsignedp);
4994 if (!x || !y)
4995 {
4996 delete_insns_since (last);
4997 return NULL_RTX;
4998 }
4999
5000 if (target_mode == VOIDmode)
5001 target_mode = result_mode;
5002 if (!target)
5003 target = gen_reg_rtx (target_mode);
5004
5005 comparison = gen_rtx_fmt_ee (code, result_mode, x, y);
5006
5007 create_output_operand (&ops[0], optimize ? NULL_RTX : target, result_mode);
5008 create_fixed_operand (&ops[1], comparison);
5009 create_fixed_operand (&ops[2], x);
5010 create_fixed_operand (&ops[3], y);
5011 if (!maybe_expand_insn (icode, 4, ops))
5012 {
5013 delete_insns_since (last);
5014 return NULL_RTX;
5015 }
5016 subtarget = ops[0].value;
5017
5018 /* If we are converting to a wider mode, first convert to
5019 TARGET_MODE, then normalize. This produces better combining
5020 opportunities on machines that have a SIGN_EXTRACT when we are
5021 testing a single bit. This mostly benefits the 68k.
5022
5023 If STORE_FLAG_VALUE does not have the sign bit set when
5024 interpreted in MODE, we can do this conversion as unsigned, which
5025 is usually more efficient. */
5026 if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (result_mode))
5027 {
5028 convert_move (target, subtarget,
5029 (GET_MODE_BITSIZE (result_mode) <= HOST_BITS_PER_WIDE_INT)
5030 && 0 == (STORE_FLAG_VALUE
5031 & ((HOST_WIDE_INT) 1
5032 << (GET_MODE_BITSIZE (result_mode) -1))));
5033 op0 = target;
5034 result_mode = target_mode;
5035 }
5036 else
5037 op0 = subtarget;
5038
5039 /* If we want to keep subexpressions around, don't reuse our last
5040 target. */
5041 if (optimize)
5042 subtarget = 0;
5043
5044 /* Now normalize to the proper value in MODE. Sometimes we don't
5045 have to do anything. */
5046 if (normalizep == 0 || normalizep == STORE_FLAG_VALUE)
5047 ;
5048 /* STORE_FLAG_VALUE might be the most negative number, so write
5049 the comparison this way to avoid a compiler-time warning. */
5050 else if (- normalizep == STORE_FLAG_VALUE)
5051 op0 = expand_unop (result_mode, neg_optab, op0, subtarget, 0);
5052
5053 /* We don't want to use STORE_FLAG_VALUE < 0 below since this makes
5054 it hard to use a value of just the sign bit due to ANSI integer
5055 constant typing rules. */
5056 else if (GET_MODE_BITSIZE (result_mode) <= HOST_BITS_PER_WIDE_INT
5057 && (STORE_FLAG_VALUE
5058 & ((HOST_WIDE_INT) 1 << (GET_MODE_BITSIZE (result_mode) - 1))))
5059 op0 = expand_shift (RSHIFT_EXPR, result_mode, op0,
5060 GET_MODE_BITSIZE (result_mode) - 1, subtarget,
5061 normalizep == 1);
5062 else
5063 {
5064 gcc_assert (STORE_FLAG_VALUE & 1);
5065
5066 op0 = expand_and (result_mode, op0, const1_rtx, subtarget);
5067 if (normalizep == -1)
5068 op0 = expand_unop (result_mode, neg_optab, op0, op0, 0);
5069 }
5070
5071 /* If we were converting to a smaller mode, do the conversion now. */
5072 if (target_mode != result_mode)
5073 {
5074 convert_move (target, op0, 0);
5075 return target;
5076 }
5077 else
5078 return op0;
5079 }
5080
5081
5082 /* A subroutine of emit_store_flag only including "tricks" that do not
5083 need a recursive call. These are kept separate to avoid infinite
5084 loops. */
5085
5086 static rtx
5087 emit_store_flag_1 (rtx target, enum rtx_code code, rtx op0, rtx op1,
5088 enum machine_mode mode, int unsignedp, int normalizep,
5089 enum machine_mode target_mode)
5090 {
5091 rtx subtarget;
5092 enum insn_code icode;
5093 enum machine_mode compare_mode;
5094 enum mode_class mclass;
5095 enum rtx_code scode;
5096 rtx tem;
5097
5098 if (unsignedp)
5099 code = unsigned_condition (code);
5100 scode = swap_condition (code);
5101
5102 /* If one operand is constant, make it the second one. Only do this
5103 if the other operand is not constant as well. */
5104
5105 if (swap_commutative_operands_p (op0, op1))
5106 {
5107 tem = op0;
5108 op0 = op1;
5109 op1 = tem;
5110 code = swap_condition (code);
5111 }
5112
5113 if (mode == VOIDmode)
5114 mode = GET_MODE (op0);
5115
5116 /* For some comparisons with 1 and -1, we can convert this to
5117 comparisons with zero. This will often produce more opportunities for
5118 store-flag insns. */
5119
5120 switch (code)
5121 {
5122 case LT:
5123 if (op1 == const1_rtx)
5124 op1 = const0_rtx, code = LE;
5125 break;
5126 case LE:
5127 if (op1 == constm1_rtx)
5128 op1 = const0_rtx, code = LT;
5129 break;
5130 case GE:
5131 if (op1 == const1_rtx)
5132 op1 = const0_rtx, code = GT;
5133 break;
5134 case GT:
5135 if (op1 == constm1_rtx)
5136 op1 = const0_rtx, code = GE;
5137 break;
5138 case GEU:
5139 if (op1 == const1_rtx)
5140 op1 = const0_rtx, code = NE;
5141 break;
5142 case LTU:
5143 if (op1 == const1_rtx)
5144 op1 = const0_rtx, code = EQ;
5145 break;
5146 default:
5147 break;
5148 }
5149
5150 /* If we are comparing a double-word integer with zero or -1, we can
5151 convert the comparison into one involving a single word. */
5152 if (GET_MODE_BITSIZE (mode) == BITS_PER_WORD * 2
5153 && GET_MODE_CLASS (mode) == MODE_INT
5154 && (!MEM_P (op0) || ! MEM_VOLATILE_P (op0)))
5155 {
5156 if ((code == EQ || code == NE)
5157 && (op1 == const0_rtx || op1 == constm1_rtx))
5158 {
5159 rtx op00, op01;
5160
5161 /* Do a logical OR or AND of the two words and compare the
5162 result. */
5163 op00 = simplify_gen_subreg (word_mode, op0, mode, 0);
5164 op01 = simplify_gen_subreg (word_mode, op0, mode, UNITS_PER_WORD);
5165 tem = expand_binop (word_mode,
5166 op1 == const0_rtx ? ior_optab : and_optab,
5167 op00, op01, NULL_RTX, unsignedp,
5168 OPTAB_DIRECT);
5169
5170 if (tem != 0)
5171 tem = emit_store_flag (NULL_RTX, code, tem, op1, word_mode,
5172 unsignedp, normalizep);
5173 }
5174 else if ((code == LT || code == GE) && op1 == const0_rtx)
5175 {
5176 rtx op0h;
5177
5178 /* If testing the sign bit, can just test on high word. */
5179 op0h = simplify_gen_subreg (word_mode, op0, mode,
5180 subreg_highpart_offset (word_mode,
5181 mode));
5182 tem = emit_store_flag (NULL_RTX, code, op0h, op1, word_mode,
5183 unsignedp, normalizep);
5184 }
5185 else
5186 tem = NULL_RTX;
5187
5188 if (tem)
5189 {
5190 if (target_mode == VOIDmode || GET_MODE (tem) == target_mode)
5191 return tem;
5192 if (!target)
5193 target = gen_reg_rtx (target_mode);
5194
5195 convert_move (target, tem,
5196 0 == ((normalizep ? normalizep : STORE_FLAG_VALUE)
5197 & ((HOST_WIDE_INT) 1
5198 << (GET_MODE_BITSIZE (word_mode) -1))));
5199 return target;
5200 }
5201 }
5202
5203 /* If this is A < 0 or A >= 0, we can do this by taking the ones
5204 complement of A (for GE) and shifting the sign bit to the low bit. */
5205 if (op1 == const0_rtx && (code == LT || code == GE)
5206 && GET_MODE_CLASS (mode) == MODE_INT
5207 && (normalizep || STORE_FLAG_VALUE == 1
5208 || (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT
5209 && ((STORE_FLAG_VALUE & GET_MODE_MASK (mode))
5210 == ((unsigned HOST_WIDE_INT) 1
5211 << (GET_MODE_BITSIZE (mode) - 1))))))
5212 {
5213 subtarget = target;
5214
5215 if (!target)
5216 target_mode = mode;
5217
5218 /* If the result is to be wider than OP0, it is best to convert it
5219 first. If it is to be narrower, it is *incorrect* to convert it
5220 first. */
5221 else if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (mode))
5222 {
5223 op0 = convert_modes (target_mode, mode, op0, 0);
5224 mode = target_mode;
5225 }
5226
5227 if (target_mode != mode)
5228 subtarget = 0;
5229
5230 if (code == GE)
5231 op0 = expand_unop (mode, one_cmpl_optab, op0,
5232 ((STORE_FLAG_VALUE == 1 || normalizep)
5233 ? 0 : subtarget), 0);
5234
5235 if (STORE_FLAG_VALUE == 1 || normalizep)
5236 /* If we are supposed to produce a 0/1 value, we want to do
5237 a logical shift from the sign bit to the low-order bit; for
5238 a -1/0 value, we do an arithmetic shift. */
5239 op0 = expand_shift (RSHIFT_EXPR, mode, op0,
5240 GET_MODE_BITSIZE (mode) - 1,
5241 subtarget, normalizep != -1);
5242
5243 if (mode != target_mode)
5244 op0 = convert_modes (target_mode, mode, op0, 0);
5245
5246 return op0;
5247 }
5248
5249 mclass = GET_MODE_CLASS (mode);
5250 for (compare_mode = mode; compare_mode != VOIDmode;
5251 compare_mode = GET_MODE_WIDER_MODE (compare_mode))
5252 {
5253 enum machine_mode optab_mode = mclass == MODE_CC ? CCmode : compare_mode;
5254 icode = optab_handler (cstore_optab, optab_mode);
5255 if (icode != CODE_FOR_nothing)
5256 {
5257 do_pending_stack_adjust ();
5258 tem = emit_cstore (target, icode, code, mode, compare_mode,
5259 unsignedp, op0, op1, normalizep, target_mode);
5260 if (tem)
5261 return tem;
5262
5263 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5264 {
5265 tem = emit_cstore (target, icode, scode, mode, compare_mode,
5266 unsignedp, op1, op0, normalizep, target_mode);
5267 if (tem)
5268 return tem;
5269 }
5270 break;
5271 }
5272 }
5273
5274 return 0;
5275 }
5276
5277 /* Emit a store-flags instruction for comparison CODE on OP0 and OP1
5278 and storing in TARGET. Normally return TARGET.
5279 Return 0 if that cannot be done.
5280
5281 MODE is the mode to use for OP0 and OP1 should they be CONST_INTs. If
5282 it is VOIDmode, they cannot both be CONST_INT.
5283
5284 UNSIGNEDP is for the case where we have to widen the operands
5285 to perform the operation. It says to use zero-extension.
5286
5287 NORMALIZEP is 1 if we should convert the result to be either zero
5288 or one. Normalize is -1 if we should convert the result to be
5289 either zero or -1. If NORMALIZEP is zero, the result will be left
5290 "raw" out of the scc insn. */
5291
5292 rtx
5293 emit_store_flag (rtx target, enum rtx_code code, rtx op0, rtx op1,
5294 enum machine_mode mode, int unsignedp, int normalizep)
5295 {
5296 enum machine_mode target_mode = target ? GET_MODE (target) : VOIDmode;
5297 enum rtx_code rcode;
5298 rtx subtarget;
5299 rtx tem, last, trueval;
5300
5301 tem = emit_store_flag_1 (target, code, op0, op1, mode, unsignedp, normalizep,
5302 target_mode);
5303 if (tem)
5304 return tem;
5305
5306 /* If we reached here, we can't do this with a scc insn, however there
5307 are some comparisons that can be done in other ways. Don't do any
5308 of these cases if branches are very cheap. */
5309 if (BRANCH_COST (optimize_insn_for_speed_p (), false) == 0)
5310 return 0;
5311
5312 /* See what we need to return. We can only return a 1, -1, or the
5313 sign bit. */
5314
5315 if (normalizep == 0)
5316 {
5317 if (STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1)
5318 normalizep = STORE_FLAG_VALUE;
5319
5320 else if (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT
5321 && ((STORE_FLAG_VALUE & GET_MODE_MASK (mode))
5322 == (unsigned HOST_WIDE_INT) 1 << (GET_MODE_BITSIZE (mode) - 1)))
5323 ;
5324 else
5325 return 0;
5326 }
5327
5328 last = get_last_insn ();
5329
5330 /* If optimizing, use different pseudo registers for each insn, instead
5331 of reusing the same pseudo. This leads to better CSE, but slows
5332 down the compiler, since there are more pseudos */
5333 subtarget = (!optimize
5334 && (target_mode == mode)) ? target : NULL_RTX;
5335 trueval = GEN_INT (normalizep ? normalizep : STORE_FLAG_VALUE);
5336
5337 /* For floating-point comparisons, try the reverse comparison or try
5338 changing the "orderedness" of the comparison. */
5339 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5340 {
5341 enum rtx_code first_code;
5342 bool and_them;
5343
5344 rcode = reverse_condition_maybe_unordered (code);
5345 if (can_compare_p (rcode, mode, ccp_store_flag)
5346 && (code == ORDERED || code == UNORDERED
5347 || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
5348 || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
5349 {
5350 int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5351 || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5352
5353 /* For the reverse comparison, use either an addition or a XOR. */
5354 if (want_add
5355 && rtx_cost (GEN_INT (normalizep), PLUS,
5356 optimize_insn_for_speed_p ()) == 0)
5357 {
5358 tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5359 STORE_FLAG_VALUE, target_mode);
5360 if (tem)
5361 return expand_binop (target_mode, add_optab, tem,
5362 GEN_INT (normalizep),
5363 target, 0, OPTAB_WIDEN);
5364 }
5365 else if (!want_add
5366 && rtx_cost (trueval, XOR,
5367 optimize_insn_for_speed_p ()) == 0)
5368 {
5369 tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5370 normalizep, target_mode);
5371 if (tem)
5372 return expand_binop (target_mode, xor_optab, tem, trueval,
5373 target, INTVAL (trueval) >= 0, OPTAB_WIDEN);
5374 }
5375 }
5376
5377 delete_insns_since (last);
5378
5379 /* Cannot split ORDERED and UNORDERED, only try the above trick. */
5380 if (code == ORDERED || code == UNORDERED)
5381 return 0;
5382
5383 and_them = split_comparison (code, mode, &first_code, &code);
5384
5385 /* If there are no NaNs, the first comparison should always fall through.
5386 Effectively change the comparison to the other one. */
5387 if (!HONOR_NANS (mode))
5388 {
5389 gcc_assert (first_code == (and_them ? ORDERED : UNORDERED));
5390 return emit_store_flag_1 (target, code, op0, op1, mode, 0, normalizep,
5391 target_mode);
5392 }
5393
5394 #ifdef HAVE_conditional_move
5395 /* Try using a setcc instruction for ORDERED/UNORDERED, followed by a
5396 conditional move. */
5397 tem = emit_store_flag_1 (subtarget, first_code, op0, op1, mode, 0,
5398 normalizep, target_mode);
5399 if (tem == 0)
5400 return 0;
5401
5402 if (and_them)
5403 tem = emit_conditional_move (target, code, op0, op1, mode,
5404 tem, const0_rtx, GET_MODE (tem), 0);
5405 else
5406 tem = emit_conditional_move (target, code, op0, op1, mode,
5407 trueval, tem, GET_MODE (tem), 0);
5408
5409 if (tem == 0)
5410 delete_insns_since (last);
5411 return tem;
5412 #else
5413 return 0;
5414 #endif
5415 }
5416
5417 /* The remaining tricks only apply to integer comparisons. */
5418
5419 if (GET_MODE_CLASS (mode) != MODE_INT)
5420 return 0;
5421
5422 /* If this is an equality comparison of integers, we can try to exclusive-or
5423 (or subtract) the two operands and use a recursive call to try the
5424 comparison with zero. Don't do any of these cases if branches are
5425 very cheap. */
5426
5427 if ((code == EQ || code == NE) && op1 != const0_rtx)
5428 {
5429 tem = expand_binop (mode, xor_optab, op0, op1, subtarget, 1,
5430 OPTAB_WIDEN);
5431
5432 if (tem == 0)
5433 tem = expand_binop (mode, sub_optab, op0, op1, subtarget, 1,
5434 OPTAB_WIDEN);
5435 if (tem != 0)
5436 tem = emit_store_flag (target, code, tem, const0_rtx,
5437 mode, unsignedp, normalizep);
5438 if (tem != 0)
5439 return tem;
5440
5441 delete_insns_since (last);
5442 }
5443
5444 /* For integer comparisons, try the reverse comparison. However, for
5445 small X and if we'd have anyway to extend, implementing "X != 0"
5446 as "-(int)X >> 31" is still cheaper than inverting "(int)X == 0". */
5447 rcode = reverse_condition (code);
5448 if (can_compare_p (rcode, mode, ccp_store_flag)
5449 && ! (optab_handler (cstore_optab, mode) == CODE_FOR_nothing
5450 && code == NE
5451 && GET_MODE_SIZE (mode) < UNITS_PER_WORD
5452 && op1 == const0_rtx))
5453 {
5454 int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5455 || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5456
5457 /* Again, for the reverse comparison, use either an addition or a XOR. */
5458 if (want_add
5459 && rtx_cost (GEN_INT (normalizep), PLUS,
5460 optimize_insn_for_speed_p ()) == 0)
5461 {
5462 tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5463 STORE_FLAG_VALUE, target_mode);
5464 if (tem != 0)
5465 tem = expand_binop (target_mode, add_optab, tem,
5466 GEN_INT (normalizep), target, 0, OPTAB_WIDEN);
5467 }
5468 else if (!want_add
5469 && rtx_cost (trueval, XOR,
5470 optimize_insn_for_speed_p ()) == 0)
5471 {
5472 tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5473 normalizep, target_mode);
5474 if (tem != 0)
5475 tem = expand_binop (target_mode, xor_optab, tem, trueval, target,
5476 INTVAL (trueval) >= 0, OPTAB_WIDEN);
5477 }
5478
5479 if (tem != 0)
5480 return tem;
5481 delete_insns_since (last);
5482 }
5483
5484 /* Some other cases we can do are EQ, NE, LE, and GT comparisons with
5485 the constant zero. Reject all other comparisons at this point. Only
5486 do LE and GT if branches are expensive since they are expensive on
5487 2-operand machines. */
5488
5489 if (op1 != const0_rtx
5490 || (code != EQ && code != NE
5491 && (BRANCH_COST (optimize_insn_for_speed_p (),
5492 false) <= 1 || (code != LE && code != GT))))
5493 return 0;
5494
5495 /* Try to put the result of the comparison in the sign bit. Assume we can't
5496 do the necessary operation below. */
5497
5498 tem = 0;
5499
5500 /* To see if A <= 0, compute (A | (A - 1)). A <= 0 iff that result has
5501 the sign bit set. */
5502
5503 if (code == LE)
5504 {
5505 /* This is destructive, so SUBTARGET can't be OP0. */
5506 if (rtx_equal_p (subtarget, op0))
5507 subtarget = 0;
5508
5509 tem = expand_binop (mode, sub_optab, op0, const1_rtx, subtarget, 0,
5510 OPTAB_WIDEN);
5511 if (tem)
5512 tem = expand_binop (mode, ior_optab, op0, tem, subtarget, 0,
5513 OPTAB_WIDEN);
5514 }
5515
5516 /* To see if A > 0, compute (((signed) A) << BITS) - A, where BITS is the
5517 number of bits in the mode of OP0, minus one. */
5518
5519 if (code == GT)
5520 {
5521 if (rtx_equal_p (subtarget, op0))
5522 subtarget = 0;
5523
5524 tem = expand_shift (RSHIFT_EXPR, mode, op0,
5525 GET_MODE_BITSIZE (mode) - 1,
5526 subtarget, 0);
5527 tem = expand_binop (mode, sub_optab, tem, op0, subtarget, 0,
5528 OPTAB_WIDEN);
5529 }
5530
5531 if (code == EQ || code == NE)
5532 {
5533 /* For EQ or NE, one way to do the comparison is to apply an operation
5534 that converts the operand into a positive number if it is nonzero
5535 or zero if it was originally zero. Then, for EQ, we subtract 1 and
5536 for NE we negate. This puts the result in the sign bit. Then we
5537 normalize with a shift, if needed.
5538
5539 Two operations that can do the above actions are ABS and FFS, so try
5540 them. If that doesn't work, and MODE is smaller than a full word,
5541 we can use zero-extension to the wider mode (an unsigned conversion)
5542 as the operation. */
5543
5544 /* Note that ABS doesn't yield a positive number for INT_MIN, but
5545 that is compensated by the subsequent overflow when subtracting
5546 one / negating. */
5547
5548 if (optab_handler (abs_optab, mode) != CODE_FOR_nothing)
5549 tem = expand_unop (mode, abs_optab, op0, subtarget, 1);
5550 else if (optab_handler (ffs_optab, mode) != CODE_FOR_nothing)
5551 tem = expand_unop (mode, ffs_optab, op0, subtarget, 1);
5552 else if (GET_MODE_SIZE (mode) < UNITS_PER_WORD)
5553 {
5554 tem = convert_modes (word_mode, mode, op0, 1);
5555 mode = word_mode;
5556 }
5557
5558 if (tem != 0)
5559 {
5560 if (code == EQ)
5561 tem = expand_binop (mode, sub_optab, tem, const1_rtx, subtarget,
5562 0, OPTAB_WIDEN);
5563 else
5564 tem = expand_unop (mode, neg_optab, tem, subtarget, 0);
5565 }
5566
5567 /* If we couldn't do it that way, for NE we can "or" the two's complement
5568 of the value with itself. For EQ, we take the one's complement of
5569 that "or", which is an extra insn, so we only handle EQ if branches
5570 are expensive. */
5571
5572 if (tem == 0
5573 && (code == NE
5574 || BRANCH_COST (optimize_insn_for_speed_p (),
5575 false) > 1))
5576 {
5577 if (rtx_equal_p (subtarget, op0))
5578 subtarget = 0;
5579
5580 tem = expand_unop (mode, neg_optab, op0, subtarget, 0);
5581 tem = expand_binop (mode, ior_optab, tem, op0, subtarget, 0,
5582 OPTAB_WIDEN);
5583
5584 if (tem && code == EQ)
5585 tem = expand_unop (mode, one_cmpl_optab, tem, subtarget, 0);
5586 }
5587 }
5588
5589 if (tem && normalizep)
5590 tem = expand_shift (RSHIFT_EXPR, mode, tem,
5591 GET_MODE_BITSIZE (mode) - 1,
5592 subtarget, normalizep == 1);
5593
5594 if (tem)
5595 {
5596 if (!target)
5597 ;
5598 else if (GET_MODE (tem) != target_mode)
5599 {
5600 convert_move (target, tem, 0);
5601 tem = target;
5602 }
5603 else if (!subtarget)
5604 {
5605 emit_move_insn (target, tem);
5606 tem = target;
5607 }
5608 }
5609 else
5610 delete_insns_since (last);
5611
5612 return tem;
5613 }
5614
5615 /* Like emit_store_flag, but always succeeds. */
5616
5617 rtx
5618 emit_store_flag_force (rtx target, enum rtx_code code, rtx op0, rtx op1,
5619 enum machine_mode mode, int unsignedp, int normalizep)
5620 {
5621 rtx tem, label;
5622 rtx trueval, falseval;
5623
5624 /* First see if emit_store_flag can do the job. */
5625 tem = emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep);
5626 if (tem != 0)
5627 return tem;
5628
5629 if (!target)
5630 target = gen_reg_rtx (word_mode);
5631
5632 /* If this failed, we have to do this with set/compare/jump/set code.
5633 For foo != 0, if foo is in OP0, just replace it with 1 if nonzero. */
5634 trueval = normalizep ? GEN_INT (normalizep) : const1_rtx;
5635 if (code == NE
5636 && GET_MODE_CLASS (mode) == MODE_INT
5637 && REG_P (target)
5638 && op0 == target
5639 && op1 == const0_rtx)
5640 {
5641 label = gen_label_rtx ();
5642 do_compare_rtx_and_jump (target, const0_rtx, EQ, unsignedp,
5643 mode, NULL_RTX, NULL_RTX, label, -1);
5644 emit_move_insn (target, trueval);
5645 emit_label (label);
5646 return target;
5647 }
5648
5649 if (!REG_P (target)
5650 || reg_mentioned_p (target, op0) || reg_mentioned_p (target, op1))
5651 target = gen_reg_rtx (GET_MODE (target));
5652
5653 /* Jump in the right direction if the target cannot implement CODE
5654 but can jump on its reverse condition. */
5655 falseval = const0_rtx;
5656 if (! can_compare_p (code, mode, ccp_jump)
5657 && (! FLOAT_MODE_P (mode)
5658 || code == ORDERED || code == UNORDERED
5659 || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
5660 || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
5661 {
5662 enum rtx_code rcode;
5663 if (FLOAT_MODE_P (mode))
5664 rcode = reverse_condition_maybe_unordered (code);
5665 else
5666 rcode = reverse_condition (code);
5667
5668 /* Canonicalize to UNORDERED for the libcall. */
5669 if (can_compare_p (rcode, mode, ccp_jump)
5670 || (code == ORDERED && ! can_compare_p (ORDERED, mode, ccp_jump)))
5671 {
5672 falseval = trueval;
5673 trueval = const0_rtx;
5674 code = rcode;
5675 }
5676 }
5677
5678 emit_move_insn (target, trueval);
5679 label = gen_label_rtx ();
5680 do_compare_rtx_and_jump (op0, op1, code, unsignedp, mode, NULL_RTX,
5681 NULL_RTX, label, -1);
5682
5683 emit_move_insn (target, falseval);
5684 emit_label (label);
5685
5686 return target;
5687 }
5688 \f
5689 /* Perform possibly multi-word comparison and conditional jump to LABEL
5690 if ARG1 OP ARG2 true where ARG1 and ARG2 are of mode MODE. This is
5691 now a thin wrapper around do_compare_rtx_and_jump. */
5692
5693 static void
5694 do_cmp_and_jump (rtx arg1, rtx arg2, enum rtx_code op, enum machine_mode mode,
5695 rtx label)
5696 {
5697 int unsignedp = (op == LTU || op == LEU || op == GTU || op == GEU);
5698 do_compare_rtx_and_jump (arg1, arg2, op, unsignedp, mode,
5699 NULL_RTX, NULL_RTX, label, -1);
5700 }