re PR middle-end/24950 (ICE in operand_subword_force)
[gcc.git] / gcc / expmed.c
1 /* Medium-level subroutines: convert bit-field store and extract
2 and shifts, multiplies and divides to rtl instructions.
3 Copyright (C) 1987, 1988, 1989, 1992, 1993, 1994, 1995, 1996, 1997, 1998,
4 1999, 2000, 2001, 2002, 2003, 2004, 2005 Free Software Foundation, Inc.
5
6 This file is part of GCC.
7
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 2, or (at your option) any later
11 version.
12
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING. If not, write to the Free
20 Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
21 02110-1301, USA. */
22
23
24 #include "config.h"
25 #include "system.h"
26 #include "coretypes.h"
27 #include "tm.h"
28 #include "toplev.h"
29 #include "rtl.h"
30 #include "tree.h"
31 #include "tm_p.h"
32 #include "flags.h"
33 #include "insn-config.h"
34 #include "expr.h"
35 #include "optabs.h"
36 #include "real.h"
37 #include "recog.h"
38 #include "langhooks.h"
39
40 static void store_fixed_bit_field (rtx, unsigned HOST_WIDE_INT,
41 unsigned HOST_WIDE_INT,
42 unsigned HOST_WIDE_INT, rtx);
43 static void store_split_bit_field (rtx, unsigned HOST_WIDE_INT,
44 unsigned HOST_WIDE_INT, rtx);
45 static rtx extract_fixed_bit_field (enum machine_mode, rtx,
46 unsigned HOST_WIDE_INT,
47 unsigned HOST_WIDE_INT,
48 unsigned HOST_WIDE_INT, rtx, int);
49 static rtx mask_rtx (enum machine_mode, int, int, int);
50 static rtx lshift_value (enum machine_mode, rtx, int, int);
51 static rtx extract_split_bit_field (rtx, unsigned HOST_WIDE_INT,
52 unsigned HOST_WIDE_INT, int);
53 static void do_cmp_and_jump (rtx, rtx, enum rtx_code, enum machine_mode, rtx);
54 static rtx expand_smod_pow2 (enum machine_mode, rtx, HOST_WIDE_INT);
55 static rtx expand_sdiv_pow2 (enum machine_mode, rtx, HOST_WIDE_INT);
56
57 /* Test whether a value is zero of a power of two. */
58 #define EXACT_POWER_OF_2_OR_ZERO_P(x) (((x) & ((x) - 1)) == 0)
59
60 /* Nonzero means divides or modulus operations are relatively cheap for
61 powers of two, so don't use branches; emit the operation instead.
62 Usually, this will mean that the MD file will emit non-branch
63 sequences. */
64
65 static bool sdiv_pow2_cheap[NUM_MACHINE_MODES];
66 static bool smod_pow2_cheap[NUM_MACHINE_MODES];
67
68 #ifndef SLOW_UNALIGNED_ACCESS
69 #define SLOW_UNALIGNED_ACCESS(MODE, ALIGN) STRICT_ALIGNMENT
70 #endif
71
72 /* For compilers that support multiple targets with different word sizes,
73 MAX_BITS_PER_WORD contains the biggest value of BITS_PER_WORD. An example
74 is the H8/300(H) compiler. */
75
76 #ifndef MAX_BITS_PER_WORD
77 #define MAX_BITS_PER_WORD BITS_PER_WORD
78 #endif
79
80 /* Reduce conditional compilation elsewhere. */
81 #ifndef HAVE_insv
82 #define HAVE_insv 0
83 #define CODE_FOR_insv CODE_FOR_nothing
84 #define gen_insv(a,b,c,d) NULL_RTX
85 #endif
86 #ifndef HAVE_extv
87 #define HAVE_extv 0
88 #define CODE_FOR_extv CODE_FOR_nothing
89 #define gen_extv(a,b,c,d) NULL_RTX
90 #endif
91 #ifndef HAVE_extzv
92 #define HAVE_extzv 0
93 #define CODE_FOR_extzv CODE_FOR_nothing
94 #define gen_extzv(a,b,c,d) NULL_RTX
95 #endif
96
97 /* Cost of various pieces of RTL. Note that some of these are indexed by
98 shift count and some by mode. */
99 static int zero_cost;
100 static int add_cost[NUM_MACHINE_MODES];
101 static int neg_cost[NUM_MACHINE_MODES];
102 static int shift_cost[NUM_MACHINE_MODES][MAX_BITS_PER_WORD];
103 static int shiftadd_cost[NUM_MACHINE_MODES][MAX_BITS_PER_WORD];
104 static int shiftsub_cost[NUM_MACHINE_MODES][MAX_BITS_PER_WORD];
105 static int mul_cost[NUM_MACHINE_MODES];
106 static int div_cost[NUM_MACHINE_MODES];
107 static int mul_widen_cost[NUM_MACHINE_MODES];
108 static int mul_highpart_cost[NUM_MACHINE_MODES];
109
110 void
111 init_expmed (void)
112 {
113 struct
114 {
115 struct rtx_def reg; rtunion reg_fld[2];
116 struct rtx_def plus; rtunion plus_fld1;
117 struct rtx_def neg;
118 struct rtx_def udiv; rtunion udiv_fld1;
119 struct rtx_def mult; rtunion mult_fld1;
120 struct rtx_def div; rtunion div_fld1;
121 struct rtx_def mod; rtunion mod_fld1;
122 struct rtx_def zext;
123 struct rtx_def wide_mult; rtunion wide_mult_fld1;
124 struct rtx_def wide_lshr; rtunion wide_lshr_fld1;
125 struct rtx_def wide_trunc;
126 struct rtx_def shift; rtunion shift_fld1;
127 struct rtx_def shift_mult; rtunion shift_mult_fld1;
128 struct rtx_def shift_add; rtunion shift_add_fld1;
129 struct rtx_def shift_sub; rtunion shift_sub_fld1;
130 } all;
131
132 rtx pow2[MAX_BITS_PER_WORD];
133 rtx cint[MAX_BITS_PER_WORD];
134 int m, n;
135 enum machine_mode mode, wider_mode;
136
137 zero_cost = rtx_cost (const0_rtx, 0);
138
139 for (m = 1; m < MAX_BITS_PER_WORD; m++)
140 {
141 pow2[m] = GEN_INT ((HOST_WIDE_INT) 1 << m);
142 cint[m] = GEN_INT (m);
143 }
144
145 memset (&all, 0, sizeof all);
146
147 PUT_CODE (&all.reg, REG);
148 /* Avoid using hard regs in ways which may be unsupported. */
149 REGNO (&all.reg) = LAST_VIRTUAL_REGISTER + 1;
150
151 PUT_CODE (&all.plus, PLUS);
152 XEXP (&all.plus, 0) = &all.reg;
153 XEXP (&all.plus, 1) = &all.reg;
154
155 PUT_CODE (&all.neg, NEG);
156 XEXP (&all.neg, 0) = &all.reg;
157
158 PUT_CODE (&all.udiv, UDIV);
159 XEXP (&all.udiv, 0) = &all.reg;
160 XEXP (&all.udiv, 1) = &all.reg;
161
162 PUT_CODE (&all.mult, MULT);
163 XEXP (&all.mult, 0) = &all.reg;
164 XEXP (&all.mult, 1) = &all.reg;
165
166 PUT_CODE (&all.div, DIV);
167 XEXP (&all.div, 0) = &all.reg;
168 XEXP (&all.div, 1) = 32 < MAX_BITS_PER_WORD ? cint[32] : GEN_INT (32);
169
170 PUT_CODE (&all.mod, MOD);
171 XEXP (&all.mod, 0) = &all.reg;
172 XEXP (&all.mod, 1) = XEXP (&all.div, 1);
173
174 PUT_CODE (&all.zext, ZERO_EXTEND);
175 XEXP (&all.zext, 0) = &all.reg;
176
177 PUT_CODE (&all.wide_mult, MULT);
178 XEXP (&all.wide_mult, 0) = &all.zext;
179 XEXP (&all.wide_mult, 1) = &all.zext;
180
181 PUT_CODE (&all.wide_lshr, LSHIFTRT);
182 XEXP (&all.wide_lshr, 0) = &all.wide_mult;
183
184 PUT_CODE (&all.wide_trunc, TRUNCATE);
185 XEXP (&all.wide_trunc, 0) = &all.wide_lshr;
186
187 PUT_CODE (&all.shift, ASHIFT);
188 XEXP (&all.shift, 0) = &all.reg;
189
190 PUT_CODE (&all.shift_mult, MULT);
191 XEXP (&all.shift_mult, 0) = &all.reg;
192
193 PUT_CODE (&all.shift_add, PLUS);
194 XEXP (&all.shift_add, 0) = &all.shift_mult;
195 XEXP (&all.shift_add, 1) = &all.reg;
196
197 PUT_CODE (&all.shift_sub, MINUS);
198 XEXP (&all.shift_sub, 0) = &all.shift_mult;
199 XEXP (&all.shift_sub, 1) = &all.reg;
200
201 for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT);
202 mode != VOIDmode;
203 mode = GET_MODE_WIDER_MODE (mode))
204 {
205 PUT_MODE (&all.reg, mode);
206 PUT_MODE (&all.plus, mode);
207 PUT_MODE (&all.neg, mode);
208 PUT_MODE (&all.udiv, mode);
209 PUT_MODE (&all.mult, mode);
210 PUT_MODE (&all.div, mode);
211 PUT_MODE (&all.mod, mode);
212 PUT_MODE (&all.wide_trunc, mode);
213 PUT_MODE (&all.shift, mode);
214 PUT_MODE (&all.shift_mult, mode);
215 PUT_MODE (&all.shift_add, mode);
216 PUT_MODE (&all.shift_sub, mode);
217
218 add_cost[mode] = rtx_cost (&all.plus, SET);
219 neg_cost[mode] = rtx_cost (&all.neg, SET);
220 div_cost[mode] = rtx_cost (&all.udiv, SET);
221 mul_cost[mode] = rtx_cost (&all.mult, SET);
222
223 sdiv_pow2_cheap[mode] = (rtx_cost (&all.div, SET) <= 2 * add_cost[mode]);
224 smod_pow2_cheap[mode] = (rtx_cost (&all.mod, SET) <= 4 * add_cost[mode]);
225
226 wider_mode = GET_MODE_WIDER_MODE (mode);
227 if (wider_mode != VOIDmode)
228 {
229 PUT_MODE (&all.zext, wider_mode);
230 PUT_MODE (&all.wide_mult, wider_mode);
231 PUT_MODE (&all.wide_lshr, wider_mode);
232 XEXP (&all.wide_lshr, 1) = GEN_INT (GET_MODE_BITSIZE (mode));
233
234 mul_widen_cost[wider_mode] = rtx_cost (&all.wide_mult, SET);
235 mul_highpart_cost[mode] = rtx_cost (&all.wide_trunc, SET);
236 }
237
238 shift_cost[mode][0] = 0;
239 shiftadd_cost[mode][0] = shiftsub_cost[mode][0] = add_cost[mode];
240
241 n = MIN (MAX_BITS_PER_WORD, GET_MODE_BITSIZE (mode));
242 for (m = 1; m < n; m++)
243 {
244 XEXP (&all.shift, 1) = cint[m];
245 XEXP (&all.shift_mult, 1) = pow2[m];
246
247 shift_cost[mode][m] = rtx_cost (&all.shift, SET);
248 shiftadd_cost[mode][m] = rtx_cost (&all.shift_add, SET);
249 shiftsub_cost[mode][m] = rtx_cost (&all.shift_sub, SET);
250 }
251 }
252 }
253
254 /* Return an rtx representing minus the value of X.
255 MODE is the intended mode of the result,
256 useful if X is a CONST_INT. */
257
258 rtx
259 negate_rtx (enum machine_mode mode, rtx x)
260 {
261 rtx result = simplify_unary_operation (NEG, mode, x, mode);
262
263 if (result == 0)
264 result = expand_unop (mode, neg_optab, x, NULL_RTX, 0);
265
266 return result;
267 }
268
269 /* Report on the availability of insv/extv/extzv and the desired mode
270 of each of their operands. Returns MAX_MACHINE_MODE if HAVE_foo
271 is false; else the mode of the specified operand. If OPNO is -1,
272 all the caller cares about is whether the insn is available. */
273 enum machine_mode
274 mode_for_extraction (enum extraction_pattern pattern, int opno)
275 {
276 const struct insn_data *data;
277
278 switch (pattern)
279 {
280 case EP_insv:
281 if (HAVE_insv)
282 {
283 data = &insn_data[CODE_FOR_insv];
284 break;
285 }
286 return MAX_MACHINE_MODE;
287
288 case EP_extv:
289 if (HAVE_extv)
290 {
291 data = &insn_data[CODE_FOR_extv];
292 break;
293 }
294 return MAX_MACHINE_MODE;
295
296 case EP_extzv:
297 if (HAVE_extzv)
298 {
299 data = &insn_data[CODE_FOR_extzv];
300 break;
301 }
302 return MAX_MACHINE_MODE;
303
304 default:
305 gcc_unreachable ();
306 }
307
308 if (opno == -1)
309 return VOIDmode;
310
311 /* Everyone who uses this function used to follow it with
312 if (result == VOIDmode) result = word_mode; */
313 if (data->operand[opno].mode == VOIDmode)
314 return word_mode;
315 return data->operand[opno].mode;
316 }
317
318 \f
319 /* Generate code to store value from rtx VALUE
320 into a bit-field within structure STR_RTX
321 containing BITSIZE bits starting at bit BITNUM.
322 FIELDMODE is the machine-mode of the FIELD_DECL node for this field.
323 ALIGN is the alignment that STR_RTX is known to have.
324 TOTAL_SIZE is the size of the structure in bytes, or -1 if varying. */
325
326 /* ??? Note that there are two different ideas here for how
327 to determine the size to count bits within, for a register.
328 One is BITS_PER_WORD, and the other is the size of operand 3
329 of the insv pattern.
330
331 If operand 3 of the insv pattern is VOIDmode, then we will use BITS_PER_WORD
332 else, we use the mode of operand 3. */
333
334 rtx
335 store_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
336 unsigned HOST_WIDE_INT bitnum, enum machine_mode fieldmode,
337 rtx value)
338 {
339 unsigned int unit
340 = (MEM_P (str_rtx)) ? BITS_PER_UNIT : BITS_PER_WORD;
341 unsigned HOST_WIDE_INT offset, bitpos;
342 rtx op0 = str_rtx;
343 int byte_offset;
344 rtx orig_value;
345
346 enum machine_mode op_mode = mode_for_extraction (EP_insv, 3);
347
348 while (GET_CODE (op0) == SUBREG)
349 {
350 /* The following line once was done only if WORDS_BIG_ENDIAN,
351 but I think that is a mistake. WORDS_BIG_ENDIAN is
352 meaningful at a much higher level; when structures are copied
353 between memory and regs, the higher-numbered regs
354 always get higher addresses. */
355 bitnum += SUBREG_BYTE (op0) * BITS_PER_UNIT;
356 op0 = SUBREG_REG (op0);
357 }
358
359 /* No action is needed if the target is a register and if the field
360 lies completely outside that register. This can occur if the source
361 code contains an out-of-bounds access to a small array. */
362 if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
363 return value;
364
365 /* Use vec_set patterns for inserting parts of vectors whenever
366 available. */
367 if (VECTOR_MODE_P (GET_MODE (op0))
368 && !MEM_P (op0)
369 && (vec_set_optab->handlers[GET_MODE (op0)].insn_code
370 != CODE_FOR_nothing)
371 && fieldmode == GET_MODE_INNER (GET_MODE (op0))
372 && bitsize == GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
373 && !(bitnum % GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
374 {
375 enum machine_mode outermode = GET_MODE (op0);
376 enum machine_mode innermode = GET_MODE_INNER (outermode);
377 int icode = (int) vec_set_optab->handlers[outermode].insn_code;
378 int pos = bitnum / GET_MODE_BITSIZE (innermode);
379 rtx rtxpos = GEN_INT (pos);
380 rtx src = value;
381 rtx dest = op0;
382 rtx pat, seq;
383 enum machine_mode mode0 = insn_data[icode].operand[0].mode;
384 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
385 enum machine_mode mode2 = insn_data[icode].operand[2].mode;
386
387 start_sequence ();
388
389 if (! (*insn_data[icode].operand[1].predicate) (src, mode1))
390 src = copy_to_mode_reg (mode1, src);
391
392 if (! (*insn_data[icode].operand[2].predicate) (rtxpos, mode2))
393 rtxpos = copy_to_mode_reg (mode1, rtxpos);
394
395 /* We could handle this, but we should always be called with a pseudo
396 for our targets and all insns should take them as outputs. */
397 gcc_assert ((*insn_data[icode].operand[0].predicate) (dest, mode0)
398 && (*insn_data[icode].operand[1].predicate) (src, mode1)
399 && (*insn_data[icode].operand[2].predicate) (rtxpos, mode2));
400 pat = GEN_FCN (icode) (dest, src, rtxpos);
401 seq = get_insns ();
402 end_sequence ();
403 if (pat)
404 {
405 emit_insn (seq);
406 emit_insn (pat);
407 return dest;
408 }
409 }
410
411 /* If the target is a register, overwriting the entire object, or storing
412 a full-word or multi-word field can be done with just a SUBREG.
413
414 If the target is memory, storing any naturally aligned field can be
415 done with a simple store. For targets that support fast unaligned
416 memory, any naturally sized, unit aligned field can be done directly. */
417
418 offset = bitnum / unit;
419 bitpos = bitnum % unit;
420 byte_offset = (bitnum % BITS_PER_WORD) / BITS_PER_UNIT
421 + (offset * UNITS_PER_WORD);
422
423 if (bitpos == 0
424 && bitsize == GET_MODE_BITSIZE (fieldmode)
425 && (!MEM_P (op0)
426 ? ((GET_MODE_SIZE (fieldmode) >= UNITS_PER_WORD
427 || GET_MODE_SIZE (GET_MODE (op0)) == GET_MODE_SIZE (fieldmode))
428 && byte_offset % GET_MODE_SIZE (fieldmode) == 0)
429 : (! SLOW_UNALIGNED_ACCESS (fieldmode, MEM_ALIGN (op0))
430 || (offset * BITS_PER_UNIT % bitsize == 0
431 && MEM_ALIGN (op0) % GET_MODE_BITSIZE (fieldmode) == 0))))
432 {
433 if (MEM_P (op0))
434 op0 = adjust_address (op0, fieldmode, offset);
435 else if (GET_MODE (op0) != fieldmode)
436 op0 = simplify_gen_subreg (fieldmode, op0, GET_MODE (op0),
437 byte_offset);
438 emit_move_insn (op0, value);
439 return value;
440 }
441
442 /* Make sure we are playing with integral modes. Pun with subregs
443 if we aren't. This must come after the entire register case above,
444 since that case is valid for any mode. The following cases are only
445 valid for integral modes. */
446 {
447 enum machine_mode imode = int_mode_for_mode (GET_MODE (op0));
448 if (imode != GET_MODE (op0))
449 {
450 if (MEM_P (op0))
451 op0 = adjust_address (op0, imode, 0);
452 else
453 {
454 gcc_assert (imode != BLKmode);
455 op0 = gen_lowpart (imode, op0);
456 }
457 }
458 }
459
460 /* We may be accessing data outside the field, which means
461 we can alias adjacent data. */
462 if (MEM_P (op0))
463 {
464 op0 = shallow_copy_rtx (op0);
465 set_mem_alias_set (op0, 0);
466 set_mem_expr (op0, 0);
467 }
468
469 /* If OP0 is a register, BITPOS must count within a word.
470 But as we have it, it counts within whatever size OP0 now has.
471 On a bigendian machine, these are not the same, so convert. */
472 if (BYTES_BIG_ENDIAN
473 && !MEM_P (op0)
474 && unit > GET_MODE_BITSIZE (GET_MODE (op0)))
475 bitpos += unit - GET_MODE_BITSIZE (GET_MODE (op0));
476
477 /* Storing an lsb-aligned field in a register
478 can be done with a movestrict instruction. */
479
480 if (!MEM_P (op0)
481 && (BYTES_BIG_ENDIAN ? bitpos + bitsize == unit : bitpos == 0)
482 && bitsize == GET_MODE_BITSIZE (fieldmode)
483 && (movstrict_optab->handlers[fieldmode].insn_code
484 != CODE_FOR_nothing))
485 {
486 int icode = movstrict_optab->handlers[fieldmode].insn_code;
487
488 /* Get appropriate low part of the value being stored. */
489 if (GET_CODE (value) == CONST_INT || REG_P (value))
490 value = gen_lowpart (fieldmode, value);
491 else if (!(GET_CODE (value) == SYMBOL_REF
492 || GET_CODE (value) == LABEL_REF
493 || GET_CODE (value) == CONST))
494 value = convert_to_mode (fieldmode, value, 0);
495
496 if (! (*insn_data[icode].operand[1].predicate) (value, fieldmode))
497 value = copy_to_mode_reg (fieldmode, value);
498
499 if (GET_CODE (op0) == SUBREG)
500 {
501 /* Else we've got some float mode source being extracted into
502 a different float mode destination -- this combination of
503 subregs results in Severe Tire Damage. */
504 gcc_assert (GET_MODE (SUBREG_REG (op0)) == fieldmode
505 || GET_MODE_CLASS (fieldmode) == MODE_INT
506 || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT);
507 op0 = SUBREG_REG (op0);
508 }
509
510 emit_insn (GEN_FCN (icode)
511 (gen_rtx_SUBREG (fieldmode, op0,
512 (bitnum % BITS_PER_WORD) / BITS_PER_UNIT
513 + (offset * UNITS_PER_WORD)),
514 value));
515
516 return value;
517 }
518
519 /* Handle fields bigger than a word. */
520
521 if (bitsize > BITS_PER_WORD)
522 {
523 /* Here we transfer the words of the field
524 in the order least significant first.
525 This is because the most significant word is the one which may
526 be less than full.
527 However, only do that if the value is not BLKmode. */
528
529 unsigned int backwards = WORDS_BIG_ENDIAN && fieldmode != BLKmode;
530 unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
531 unsigned int i;
532
533 /* This is the mode we must force value to, so that there will be enough
534 subwords to extract. Note that fieldmode will often (always?) be
535 VOIDmode, because that is what store_field uses to indicate that this
536 is a bit field, but passing VOIDmode to operand_subword_force
537 is not allowed. */
538 fieldmode = GET_MODE (value);
539 if (fieldmode == VOIDmode)
540 fieldmode = smallest_mode_for_size (nwords * BITS_PER_WORD, MODE_INT);
541
542 for (i = 0; i < nwords; i++)
543 {
544 /* If I is 0, use the low-order word in both field and target;
545 if I is 1, use the next to lowest word; and so on. */
546 unsigned int wordnum = (backwards ? nwords - i - 1 : i);
547 unsigned int bit_offset = (backwards
548 ? MAX ((int) bitsize - ((int) i + 1)
549 * BITS_PER_WORD,
550 0)
551 : (int) i * BITS_PER_WORD);
552
553 store_bit_field (op0, MIN (BITS_PER_WORD,
554 bitsize - i * BITS_PER_WORD),
555 bitnum + bit_offset, word_mode,
556 operand_subword_force (value, wordnum, fieldmode));
557 }
558 return value;
559 }
560
561 /* From here on we can assume that the field to be stored in is
562 a full-word (whatever type that is), since it is shorter than a word. */
563
564 /* OFFSET is the number of words or bytes (UNIT says which)
565 from STR_RTX to the first word or byte containing part of the field. */
566
567 if (!MEM_P (op0))
568 {
569 if (offset != 0
570 || GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
571 {
572 if (!REG_P (op0))
573 {
574 /* Since this is a destination (lvalue), we can't copy
575 it to a pseudo. We can remove a SUBREG that does not
576 change the size of the operand. Such a SUBREG may
577 have been added above. */
578 gcc_assert (GET_CODE (op0) == SUBREG
579 && (GET_MODE_SIZE (GET_MODE (op0))
580 == GET_MODE_SIZE (GET_MODE (SUBREG_REG (op0)))));
581 op0 = SUBREG_REG (op0);
582 }
583 op0 = gen_rtx_SUBREG (mode_for_size (BITS_PER_WORD, MODE_INT, 0),
584 op0, (offset * UNITS_PER_WORD));
585 }
586 offset = 0;
587 }
588
589 /* If VALUE has a floating-point or complex mode, access it as an
590 integer of the corresponding size. This can occur on a machine
591 with 64 bit registers that uses SFmode for float. It can also
592 occur for unaligned float or complex fields. */
593 orig_value = value;
594 if (GET_MODE (value) != VOIDmode
595 && GET_MODE_CLASS (GET_MODE (value)) != MODE_INT
596 && GET_MODE_CLASS (GET_MODE (value)) != MODE_PARTIAL_INT)
597 {
598 value = gen_reg_rtx (int_mode_for_mode (GET_MODE (value)));
599 emit_move_insn (gen_lowpart (GET_MODE (orig_value), value), orig_value);
600 }
601
602 /* Now OFFSET is nonzero only if OP0 is memory
603 and is therefore always measured in bytes. */
604
605 if (HAVE_insv
606 && GET_MODE (value) != BLKmode
607 && !(bitsize == 1 && GET_CODE (value) == CONST_INT)
608 && bitsize > 0
609 && GET_MODE_BITSIZE (op_mode) >= bitsize
610 && ! ((REG_P (op0) || GET_CODE (op0) == SUBREG)
611 && (bitsize + bitpos > GET_MODE_BITSIZE (op_mode))))
612 {
613 int xbitpos = bitpos;
614 rtx value1;
615 rtx xop0 = op0;
616 rtx last = get_last_insn ();
617 rtx pat;
618 enum machine_mode maxmode = mode_for_extraction (EP_insv, 3);
619 int save_volatile_ok = volatile_ok;
620
621 volatile_ok = 1;
622
623 /* If this machine's insv can only insert into a register, copy OP0
624 into a register and save it back later. */
625 if (MEM_P (op0)
626 && ! ((*insn_data[(int) CODE_FOR_insv].operand[0].predicate)
627 (op0, VOIDmode)))
628 {
629 rtx tempreg;
630 enum machine_mode bestmode;
631
632 /* Get the mode to use for inserting into this field. If OP0 is
633 BLKmode, get the smallest mode consistent with the alignment. If
634 OP0 is a non-BLKmode object that is no wider than MAXMODE, use its
635 mode. Otherwise, use the smallest mode containing the field. */
636
637 if (GET_MODE (op0) == BLKmode
638 || GET_MODE_SIZE (GET_MODE (op0)) > GET_MODE_SIZE (maxmode))
639 bestmode
640 = get_best_mode (bitsize, bitnum, MEM_ALIGN (op0), maxmode,
641 MEM_VOLATILE_P (op0));
642 else
643 bestmode = GET_MODE (op0);
644
645 if (bestmode == VOIDmode
646 || GET_MODE_SIZE (bestmode) < GET_MODE_SIZE (fieldmode)
647 || (SLOW_UNALIGNED_ACCESS (bestmode, MEM_ALIGN (op0))
648 && GET_MODE_BITSIZE (bestmode) > MEM_ALIGN (op0)))
649 goto insv_loses;
650
651 /* Adjust address to point to the containing unit of that mode.
652 Compute offset as multiple of this unit, counting in bytes. */
653 unit = GET_MODE_BITSIZE (bestmode);
654 offset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
655 bitpos = bitnum % unit;
656 op0 = adjust_address (op0, bestmode, offset);
657
658 /* Fetch that unit, store the bitfield in it, then store
659 the unit. */
660 tempreg = copy_to_reg (op0);
661 store_bit_field (tempreg, bitsize, bitpos, fieldmode, orig_value);
662 emit_move_insn (op0, tempreg);
663 return value;
664 }
665 volatile_ok = save_volatile_ok;
666
667 /* Add OFFSET into OP0's address. */
668 if (MEM_P (xop0))
669 xop0 = adjust_address (xop0, byte_mode, offset);
670
671 /* If xop0 is a register, we need it in MAXMODE
672 to make it acceptable to the format of insv. */
673 if (GET_CODE (xop0) == SUBREG)
674 /* We can't just change the mode, because this might clobber op0,
675 and we will need the original value of op0 if insv fails. */
676 xop0 = gen_rtx_SUBREG (maxmode, SUBREG_REG (xop0), SUBREG_BYTE (xop0));
677 if (REG_P (xop0) && GET_MODE (xop0) != maxmode)
678 xop0 = gen_rtx_SUBREG (maxmode, xop0, 0);
679
680 /* On big-endian machines, we count bits from the most significant.
681 If the bit field insn does not, we must invert. */
682
683 if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
684 xbitpos = unit - bitsize - xbitpos;
685
686 /* We have been counting XBITPOS within UNIT.
687 Count instead within the size of the register. */
688 if (BITS_BIG_ENDIAN && !MEM_P (xop0))
689 xbitpos += GET_MODE_BITSIZE (maxmode) - unit;
690
691 unit = GET_MODE_BITSIZE (maxmode);
692
693 /* Convert VALUE to maxmode (which insv insn wants) in VALUE1. */
694 value1 = value;
695 if (GET_MODE (value) != maxmode)
696 {
697 if (GET_MODE_BITSIZE (GET_MODE (value)) >= bitsize)
698 {
699 /* Optimization: Don't bother really extending VALUE
700 if it has all the bits we will actually use. However,
701 if we must narrow it, be sure we do it correctly. */
702
703 if (GET_MODE_SIZE (GET_MODE (value)) < GET_MODE_SIZE (maxmode))
704 {
705 rtx tmp;
706
707 tmp = simplify_subreg (maxmode, value1, GET_MODE (value), 0);
708 if (! tmp)
709 tmp = simplify_gen_subreg (maxmode,
710 force_reg (GET_MODE (value),
711 value1),
712 GET_MODE (value), 0);
713 value1 = tmp;
714 }
715 else
716 value1 = gen_lowpart (maxmode, value1);
717 }
718 else if (GET_CODE (value) == CONST_INT)
719 value1 = gen_int_mode (INTVAL (value), maxmode);
720 else
721 /* Parse phase is supposed to make VALUE's data type
722 match that of the component reference, which is a type
723 at least as wide as the field; so VALUE should have
724 a mode that corresponds to that type. */
725 gcc_assert (CONSTANT_P (value));
726 }
727
728 /* If this machine's insv insists on a register,
729 get VALUE1 into a register. */
730 if (! ((*insn_data[(int) CODE_FOR_insv].operand[3].predicate)
731 (value1, maxmode)))
732 value1 = force_reg (maxmode, value1);
733
734 pat = gen_insv (xop0, GEN_INT (bitsize), GEN_INT (xbitpos), value1);
735 if (pat)
736 emit_insn (pat);
737 else
738 {
739 delete_insns_since (last);
740 store_fixed_bit_field (op0, offset, bitsize, bitpos, value);
741 }
742 }
743 else
744 insv_loses:
745 /* Insv is not available; store using shifts and boolean ops. */
746 store_fixed_bit_field (op0, offset, bitsize, bitpos, value);
747 return value;
748 }
749 \f
750 /* Use shifts and boolean operations to store VALUE
751 into a bit field of width BITSIZE
752 in a memory location specified by OP0 except offset by OFFSET bytes.
753 (OFFSET must be 0 if OP0 is a register.)
754 The field starts at position BITPOS within the byte.
755 (If OP0 is a register, it may be a full word or a narrower mode,
756 but BITPOS still counts within a full word,
757 which is significant on bigendian machines.) */
758
759 static void
760 store_fixed_bit_field (rtx op0, unsigned HOST_WIDE_INT offset,
761 unsigned HOST_WIDE_INT bitsize,
762 unsigned HOST_WIDE_INT bitpos, rtx value)
763 {
764 enum machine_mode mode;
765 unsigned int total_bits = BITS_PER_WORD;
766 rtx subtarget, temp;
767 int all_zero = 0;
768 int all_one = 0;
769
770 /* There is a case not handled here:
771 a structure with a known alignment of just a halfword
772 and a field split across two aligned halfwords within the structure.
773 Or likewise a structure with a known alignment of just a byte
774 and a field split across two bytes.
775 Such cases are not supposed to be able to occur. */
776
777 if (REG_P (op0) || GET_CODE (op0) == SUBREG)
778 {
779 gcc_assert (!offset);
780 /* Special treatment for a bit field split across two registers. */
781 if (bitsize + bitpos > BITS_PER_WORD)
782 {
783 store_split_bit_field (op0, bitsize, bitpos, value);
784 return;
785 }
786 }
787 else
788 {
789 /* Get the proper mode to use for this field. We want a mode that
790 includes the entire field. If such a mode would be larger than
791 a word, we won't be doing the extraction the normal way.
792 We don't want a mode bigger than the destination. */
793
794 mode = GET_MODE (op0);
795 if (GET_MODE_BITSIZE (mode) == 0
796 || GET_MODE_BITSIZE (mode) > GET_MODE_BITSIZE (word_mode))
797 mode = word_mode;
798 mode = get_best_mode (bitsize, bitpos + offset * BITS_PER_UNIT,
799 MEM_ALIGN (op0), mode, MEM_VOLATILE_P (op0));
800
801 if (mode == VOIDmode)
802 {
803 /* The only way this should occur is if the field spans word
804 boundaries. */
805 store_split_bit_field (op0, bitsize, bitpos + offset * BITS_PER_UNIT,
806 value);
807 return;
808 }
809
810 total_bits = GET_MODE_BITSIZE (mode);
811
812 /* Make sure bitpos is valid for the chosen mode. Adjust BITPOS to
813 be in the range 0 to total_bits-1, and put any excess bytes in
814 OFFSET. */
815 if (bitpos >= total_bits)
816 {
817 offset += (bitpos / total_bits) * (total_bits / BITS_PER_UNIT);
818 bitpos -= ((bitpos / total_bits) * (total_bits / BITS_PER_UNIT)
819 * BITS_PER_UNIT);
820 }
821
822 /* Get ref to an aligned byte, halfword, or word containing the field.
823 Adjust BITPOS to be position within a word,
824 and OFFSET to be the offset of that word.
825 Then alter OP0 to refer to that word. */
826 bitpos += (offset % (total_bits / BITS_PER_UNIT)) * BITS_PER_UNIT;
827 offset -= (offset % (total_bits / BITS_PER_UNIT));
828 op0 = adjust_address (op0, mode, offset);
829 }
830
831 mode = GET_MODE (op0);
832
833 /* Now MODE is either some integral mode for a MEM as OP0,
834 or is a full-word for a REG as OP0. TOTAL_BITS corresponds.
835 The bit field is contained entirely within OP0.
836 BITPOS is the starting bit number within OP0.
837 (OP0's mode may actually be narrower than MODE.) */
838
839 if (BYTES_BIG_ENDIAN)
840 /* BITPOS is the distance between our msb
841 and that of the containing datum.
842 Convert it to the distance from the lsb. */
843 bitpos = total_bits - bitsize - bitpos;
844
845 /* Now BITPOS is always the distance between our lsb
846 and that of OP0. */
847
848 /* Shift VALUE left by BITPOS bits. If VALUE is not constant,
849 we must first convert its mode to MODE. */
850
851 if (GET_CODE (value) == CONST_INT)
852 {
853 HOST_WIDE_INT v = INTVAL (value);
854
855 if (bitsize < HOST_BITS_PER_WIDE_INT)
856 v &= ((HOST_WIDE_INT) 1 << bitsize) - 1;
857
858 if (v == 0)
859 all_zero = 1;
860 else if ((bitsize < HOST_BITS_PER_WIDE_INT
861 && v == ((HOST_WIDE_INT) 1 << bitsize) - 1)
862 || (bitsize == HOST_BITS_PER_WIDE_INT && v == -1))
863 all_one = 1;
864
865 value = lshift_value (mode, value, bitpos, bitsize);
866 }
867 else
868 {
869 int must_and = (GET_MODE_BITSIZE (GET_MODE (value)) != bitsize
870 && bitpos + bitsize != GET_MODE_BITSIZE (mode));
871
872 if (GET_MODE (value) != mode)
873 {
874 if ((REG_P (value) || GET_CODE (value) == SUBREG)
875 && GET_MODE_SIZE (mode) < GET_MODE_SIZE (GET_MODE (value)))
876 value = gen_lowpart (mode, value);
877 else
878 value = convert_to_mode (mode, value, 1);
879 }
880
881 if (must_and)
882 value = expand_binop (mode, and_optab, value,
883 mask_rtx (mode, 0, bitsize, 0),
884 NULL_RTX, 1, OPTAB_LIB_WIDEN);
885 if (bitpos > 0)
886 value = expand_shift (LSHIFT_EXPR, mode, value,
887 build_int_cst (NULL_TREE, bitpos), NULL_RTX, 1);
888 }
889
890 /* Now clear the chosen bits in OP0,
891 except that if VALUE is -1 we need not bother. */
892
893 subtarget = op0;
894
895 if (! all_one)
896 {
897 temp = expand_binop (mode, and_optab, op0,
898 mask_rtx (mode, bitpos, bitsize, 1),
899 subtarget, 1, OPTAB_LIB_WIDEN);
900 subtarget = temp;
901 }
902 else
903 temp = op0;
904
905 /* Now logical-or VALUE into OP0, unless it is zero. */
906
907 if (! all_zero)
908 temp = expand_binop (mode, ior_optab, temp, value,
909 subtarget, 1, OPTAB_LIB_WIDEN);
910 if (op0 != temp)
911 emit_move_insn (op0, temp);
912 }
913 \f
914 /* Store a bit field that is split across multiple accessible memory objects.
915
916 OP0 is the REG, SUBREG or MEM rtx for the first of the objects.
917 BITSIZE is the field width; BITPOS the position of its first bit
918 (within the word).
919 VALUE is the value to store.
920
921 This does not yet handle fields wider than BITS_PER_WORD. */
922
923 static void
924 store_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
925 unsigned HOST_WIDE_INT bitpos, rtx value)
926 {
927 unsigned int unit;
928 unsigned int bitsdone = 0;
929
930 /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
931 much at a time. */
932 if (REG_P (op0) || GET_CODE (op0) == SUBREG)
933 unit = BITS_PER_WORD;
934 else
935 unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
936
937 /* If VALUE is a constant other than a CONST_INT, get it into a register in
938 WORD_MODE. If we can do this using gen_lowpart_common, do so. Note
939 that VALUE might be a floating-point constant. */
940 if (CONSTANT_P (value) && GET_CODE (value) != CONST_INT)
941 {
942 rtx word = gen_lowpart_common (word_mode, value);
943
944 if (word && (value != word))
945 value = word;
946 else
947 value = gen_lowpart_common (word_mode,
948 force_reg (GET_MODE (value) != VOIDmode
949 ? GET_MODE (value)
950 : word_mode, value));
951 }
952
953 while (bitsdone < bitsize)
954 {
955 unsigned HOST_WIDE_INT thissize;
956 rtx part, word;
957 unsigned HOST_WIDE_INT thispos;
958 unsigned HOST_WIDE_INT offset;
959
960 offset = (bitpos + bitsdone) / unit;
961 thispos = (bitpos + bitsdone) % unit;
962
963 /* THISSIZE must not overrun a word boundary. Otherwise,
964 store_fixed_bit_field will call us again, and we will mutually
965 recurse forever. */
966 thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
967 thissize = MIN (thissize, unit - thispos);
968
969 if (BYTES_BIG_ENDIAN)
970 {
971 int total_bits;
972
973 /* We must do an endian conversion exactly the same way as it is
974 done in extract_bit_field, so that the two calls to
975 extract_fixed_bit_field will have comparable arguments. */
976 if (!MEM_P (value) || GET_MODE (value) == BLKmode)
977 total_bits = BITS_PER_WORD;
978 else
979 total_bits = GET_MODE_BITSIZE (GET_MODE (value));
980
981 /* Fetch successively less significant portions. */
982 if (GET_CODE (value) == CONST_INT)
983 part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
984 >> (bitsize - bitsdone - thissize))
985 & (((HOST_WIDE_INT) 1 << thissize) - 1));
986 else
987 /* The args are chosen so that the last part includes the
988 lsb. Give extract_bit_field the value it needs (with
989 endianness compensation) to fetch the piece we want. */
990 part = extract_fixed_bit_field (word_mode, value, 0, thissize,
991 total_bits - bitsize + bitsdone,
992 NULL_RTX, 1);
993 }
994 else
995 {
996 /* Fetch successively more significant portions. */
997 if (GET_CODE (value) == CONST_INT)
998 part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
999 >> bitsdone)
1000 & (((HOST_WIDE_INT) 1 << thissize) - 1));
1001 else
1002 part = extract_fixed_bit_field (word_mode, value, 0, thissize,
1003 bitsdone, NULL_RTX, 1);
1004 }
1005
1006 /* If OP0 is a register, then handle OFFSET here.
1007
1008 When handling multiword bitfields, extract_bit_field may pass
1009 down a word_mode SUBREG of a larger REG for a bitfield that actually
1010 crosses a word boundary. Thus, for a SUBREG, we must find
1011 the current word starting from the base register. */
1012 if (GET_CODE (op0) == SUBREG)
1013 {
1014 int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset;
1015 word = operand_subword_force (SUBREG_REG (op0), word_offset,
1016 GET_MODE (SUBREG_REG (op0)));
1017 offset = 0;
1018 }
1019 else if (REG_P (op0))
1020 {
1021 word = operand_subword_force (op0, offset, GET_MODE (op0));
1022 offset = 0;
1023 }
1024 else
1025 word = op0;
1026
1027 /* OFFSET is in UNITs, and UNIT is in bits.
1028 store_fixed_bit_field wants offset in bytes. */
1029 store_fixed_bit_field (word, offset * unit / BITS_PER_UNIT, thissize,
1030 thispos, part);
1031 bitsdone += thissize;
1032 }
1033 }
1034 \f
1035 /* Generate code to extract a byte-field from STR_RTX
1036 containing BITSIZE bits, starting at BITNUM,
1037 and put it in TARGET if possible (if TARGET is nonzero).
1038 Regardless of TARGET, we return the rtx for where the value is placed.
1039
1040 STR_RTX is the structure containing the byte (a REG or MEM).
1041 UNSIGNEDP is nonzero if this is an unsigned bit field.
1042 MODE is the natural mode of the field value once extracted.
1043 TMODE is the mode the caller would like the value to have;
1044 but the value may be returned with type MODE instead.
1045
1046 TOTAL_SIZE is the size in bytes of the containing structure,
1047 or -1 if varying.
1048
1049 If a TARGET is specified and we can store in it at no extra cost,
1050 we do so, and return TARGET.
1051 Otherwise, we return a REG of mode TMODE or MODE, with TMODE preferred
1052 if they are equally easy. */
1053
1054 rtx
1055 extract_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1056 unsigned HOST_WIDE_INT bitnum, int unsignedp, rtx target,
1057 enum machine_mode mode, enum machine_mode tmode)
1058 {
1059 unsigned int unit
1060 = (MEM_P (str_rtx)) ? BITS_PER_UNIT : BITS_PER_WORD;
1061 unsigned HOST_WIDE_INT offset, bitpos;
1062 rtx op0 = str_rtx;
1063 rtx spec_target = target;
1064 rtx spec_target_subreg = 0;
1065 enum machine_mode int_mode;
1066 enum machine_mode extv_mode = mode_for_extraction (EP_extv, 0);
1067 enum machine_mode extzv_mode = mode_for_extraction (EP_extzv, 0);
1068 enum machine_mode mode1;
1069 int byte_offset;
1070
1071 if (tmode == VOIDmode)
1072 tmode = mode;
1073
1074 while (GET_CODE (op0) == SUBREG)
1075 {
1076 bitnum += SUBREG_BYTE (op0) * BITS_PER_UNIT;
1077 op0 = SUBREG_REG (op0);
1078 }
1079
1080 /* If we have an out-of-bounds access to a register, just return an
1081 uninitialized register of the required mode. This can occur if the
1082 source code contains an out-of-bounds access to a small array. */
1083 if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
1084 return gen_reg_rtx (tmode);
1085
1086 if (REG_P (op0)
1087 && mode == GET_MODE (op0)
1088 && bitnum == 0
1089 && bitsize == GET_MODE_BITSIZE (GET_MODE (op0)))
1090 {
1091 /* We're trying to extract a full register from itself. */
1092 return op0;
1093 }
1094
1095 /* Use vec_extract patterns for extracting parts of vectors whenever
1096 available. */
1097 if (VECTOR_MODE_P (GET_MODE (op0))
1098 && !MEM_P (op0)
1099 && (vec_extract_optab->handlers[GET_MODE (op0)].insn_code
1100 != CODE_FOR_nothing)
1101 && ((bitnum + bitsize - 1) / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
1102 == bitnum / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
1103 {
1104 enum machine_mode outermode = GET_MODE (op0);
1105 enum machine_mode innermode = GET_MODE_INNER (outermode);
1106 int icode = (int) vec_extract_optab->handlers[outermode].insn_code;
1107 unsigned HOST_WIDE_INT pos = bitnum / GET_MODE_BITSIZE (innermode);
1108 rtx rtxpos = GEN_INT (pos);
1109 rtx src = op0;
1110 rtx dest = NULL, pat, seq;
1111 enum machine_mode mode0 = insn_data[icode].operand[0].mode;
1112 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
1113 enum machine_mode mode2 = insn_data[icode].operand[2].mode;
1114
1115 if (innermode == tmode || innermode == mode)
1116 dest = target;
1117
1118 if (!dest)
1119 dest = gen_reg_rtx (innermode);
1120
1121 start_sequence ();
1122
1123 if (! (*insn_data[icode].operand[0].predicate) (dest, mode0))
1124 dest = copy_to_mode_reg (mode0, dest);
1125
1126 if (! (*insn_data[icode].operand[1].predicate) (src, mode1))
1127 src = copy_to_mode_reg (mode1, src);
1128
1129 if (! (*insn_data[icode].operand[2].predicate) (rtxpos, mode2))
1130 rtxpos = copy_to_mode_reg (mode1, rtxpos);
1131
1132 /* We could handle this, but we should always be called with a pseudo
1133 for our targets and all insns should take them as outputs. */
1134 gcc_assert ((*insn_data[icode].operand[0].predicate) (dest, mode0)
1135 && (*insn_data[icode].operand[1].predicate) (src, mode1)
1136 && (*insn_data[icode].operand[2].predicate) (rtxpos, mode2));
1137
1138 pat = GEN_FCN (icode) (dest, src, rtxpos);
1139 seq = get_insns ();
1140 end_sequence ();
1141 if (pat)
1142 {
1143 emit_insn (seq);
1144 emit_insn (pat);
1145 return dest;
1146 }
1147 }
1148
1149 /* Make sure we are playing with integral modes. Pun with subregs
1150 if we aren't. */
1151 {
1152 enum machine_mode imode = int_mode_for_mode (GET_MODE (op0));
1153 if (imode != GET_MODE (op0))
1154 {
1155 if (MEM_P (op0))
1156 op0 = adjust_address (op0, imode, 0);
1157 else
1158 {
1159 gcc_assert (imode != BLKmode);
1160 op0 = gen_lowpart (imode, op0);
1161
1162 /* If we got a SUBREG, force it into a register since we
1163 aren't going to be able to do another SUBREG on it. */
1164 if (GET_CODE (op0) == SUBREG)
1165 op0 = force_reg (imode, op0);
1166 }
1167 }
1168 }
1169
1170 /* We may be accessing data outside the field, which means
1171 we can alias adjacent data. */
1172 if (MEM_P (op0))
1173 {
1174 op0 = shallow_copy_rtx (op0);
1175 set_mem_alias_set (op0, 0);
1176 set_mem_expr (op0, 0);
1177 }
1178
1179 /* Extraction of a full-word or multi-word value from a structure
1180 in a register or aligned memory can be done with just a SUBREG.
1181 A subword value in the least significant part of a register
1182 can also be extracted with a SUBREG. For this, we need the
1183 byte offset of the value in op0. */
1184
1185 bitpos = bitnum % unit;
1186 offset = bitnum / unit;
1187 byte_offset = bitpos / BITS_PER_UNIT + offset * UNITS_PER_WORD;
1188
1189 /* If OP0 is a register, BITPOS must count within a word.
1190 But as we have it, it counts within whatever size OP0 now has.
1191 On a bigendian machine, these are not the same, so convert. */
1192 if (BYTES_BIG_ENDIAN
1193 && !MEM_P (op0)
1194 && unit > GET_MODE_BITSIZE (GET_MODE (op0)))
1195 bitpos += unit - GET_MODE_BITSIZE (GET_MODE (op0));
1196
1197 /* ??? We currently assume TARGET is at least as big as BITSIZE.
1198 If that's wrong, the solution is to test for it and set TARGET to 0
1199 if needed. */
1200
1201 /* Only scalar integer modes can be converted via subregs. There is an
1202 additional problem for FP modes here in that they can have a precision
1203 which is different from the size. mode_for_size uses precision, but
1204 we want a mode based on the size, so we must avoid calling it for FP
1205 modes. */
1206 mode1 = (SCALAR_INT_MODE_P (tmode)
1207 ? mode_for_size (bitsize, GET_MODE_CLASS (tmode), 0)
1208 : mode);
1209
1210 if (((bitsize >= BITS_PER_WORD && bitsize == GET_MODE_BITSIZE (mode)
1211 && bitpos % BITS_PER_WORD == 0)
1212 || (mode1 != BLKmode
1213 /* ??? The big endian test here is wrong. This is correct
1214 if the value is in a register, and if mode_for_size is not
1215 the same mode as op0. This causes us to get unnecessarily
1216 inefficient code from the Thumb port when -mbig-endian. */
1217 && (BYTES_BIG_ENDIAN
1218 ? bitpos + bitsize == BITS_PER_WORD
1219 : bitpos == 0)))
1220 && ((!MEM_P (op0)
1221 && TRULY_NOOP_TRUNCATION (GET_MODE_BITSIZE (mode),
1222 GET_MODE_BITSIZE (GET_MODE (op0)))
1223 && GET_MODE_SIZE (mode1) != 0
1224 && byte_offset % GET_MODE_SIZE (mode1) == 0)
1225 || (MEM_P (op0)
1226 && (! SLOW_UNALIGNED_ACCESS (mode, MEM_ALIGN (op0))
1227 || (offset * BITS_PER_UNIT % bitsize == 0
1228 && MEM_ALIGN (op0) % bitsize == 0)))))
1229 {
1230 if (mode1 != GET_MODE (op0))
1231 {
1232 if (MEM_P (op0))
1233 op0 = adjust_address (op0, mode1, offset);
1234 else
1235 {
1236 rtx sub = simplify_gen_subreg (mode1, op0, GET_MODE (op0),
1237 byte_offset);
1238 if (sub == NULL)
1239 goto no_subreg_mode_swap;
1240 op0 = sub;
1241 }
1242 }
1243 if (mode1 != mode)
1244 return convert_to_mode (tmode, op0, unsignedp);
1245 return op0;
1246 }
1247 no_subreg_mode_swap:
1248
1249 /* Handle fields bigger than a word. */
1250
1251 if (bitsize > BITS_PER_WORD)
1252 {
1253 /* Here we transfer the words of the field
1254 in the order least significant first.
1255 This is because the most significant word is the one which may
1256 be less than full. */
1257
1258 unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
1259 unsigned int i;
1260
1261 if (target == 0 || !REG_P (target))
1262 target = gen_reg_rtx (mode);
1263
1264 /* Indicate for flow that the entire target reg is being set. */
1265 emit_insn (gen_rtx_CLOBBER (VOIDmode, target));
1266
1267 for (i = 0; i < nwords; i++)
1268 {
1269 /* If I is 0, use the low-order word in both field and target;
1270 if I is 1, use the next to lowest word; and so on. */
1271 /* Word number in TARGET to use. */
1272 unsigned int wordnum
1273 = (WORDS_BIG_ENDIAN
1274 ? GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD - i - 1
1275 : i);
1276 /* Offset from start of field in OP0. */
1277 unsigned int bit_offset = (WORDS_BIG_ENDIAN
1278 ? MAX (0, ((int) bitsize - ((int) i + 1)
1279 * (int) BITS_PER_WORD))
1280 : (int) i * BITS_PER_WORD);
1281 rtx target_part = operand_subword (target, wordnum, 1, VOIDmode);
1282 rtx result_part
1283 = extract_bit_field (op0, MIN (BITS_PER_WORD,
1284 bitsize - i * BITS_PER_WORD),
1285 bitnum + bit_offset, 1, target_part, mode,
1286 word_mode);
1287
1288 gcc_assert (target_part);
1289
1290 if (result_part != target_part)
1291 emit_move_insn (target_part, result_part);
1292 }
1293
1294 if (unsignedp)
1295 {
1296 /* Unless we've filled TARGET, the upper regs in a multi-reg value
1297 need to be zero'd out. */
1298 if (GET_MODE_SIZE (GET_MODE (target)) > nwords * UNITS_PER_WORD)
1299 {
1300 unsigned int i, total_words;
1301
1302 total_words = GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD;
1303 for (i = nwords; i < total_words; i++)
1304 emit_move_insn
1305 (operand_subword (target,
1306 WORDS_BIG_ENDIAN ? total_words - i - 1 : i,
1307 1, VOIDmode),
1308 const0_rtx);
1309 }
1310 return target;
1311 }
1312
1313 /* Signed bit field: sign-extend with two arithmetic shifts. */
1314 target = expand_shift (LSHIFT_EXPR, mode, target,
1315 build_int_cst (NULL_TREE,
1316 GET_MODE_BITSIZE (mode) - bitsize),
1317 NULL_RTX, 0);
1318 return expand_shift (RSHIFT_EXPR, mode, target,
1319 build_int_cst (NULL_TREE,
1320 GET_MODE_BITSIZE (mode) - bitsize),
1321 NULL_RTX, 0);
1322 }
1323
1324 /* From here on we know the desired field is smaller than a word. */
1325
1326 /* Check if there is a correspondingly-sized integer field, so we can
1327 safely extract it as one size of integer, if necessary; then
1328 truncate or extend to the size that is wanted; then use SUBREGs or
1329 convert_to_mode to get one of the modes we really wanted. */
1330
1331 int_mode = int_mode_for_mode (tmode);
1332 if (int_mode == BLKmode)
1333 int_mode = int_mode_for_mode (mode);
1334 /* Should probably push op0 out to memory and then do a load. */
1335 gcc_assert (int_mode != BLKmode);
1336
1337 /* OFFSET is the number of words or bytes (UNIT says which)
1338 from STR_RTX to the first word or byte containing part of the field. */
1339 if (!MEM_P (op0))
1340 {
1341 if (offset != 0
1342 || GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
1343 {
1344 if (!REG_P (op0))
1345 op0 = copy_to_reg (op0);
1346 op0 = gen_rtx_SUBREG (mode_for_size (BITS_PER_WORD, MODE_INT, 0),
1347 op0, (offset * UNITS_PER_WORD));
1348 }
1349 offset = 0;
1350 }
1351
1352 /* Now OFFSET is nonzero only for memory operands. */
1353
1354 if (unsignedp)
1355 {
1356 if (HAVE_extzv
1357 && bitsize > 0
1358 && GET_MODE_BITSIZE (extzv_mode) >= bitsize
1359 && ! ((REG_P (op0) || GET_CODE (op0) == SUBREG)
1360 && (bitsize + bitpos > GET_MODE_BITSIZE (extzv_mode))))
1361 {
1362 unsigned HOST_WIDE_INT xbitpos = bitpos, xoffset = offset;
1363 rtx bitsize_rtx, bitpos_rtx;
1364 rtx last = get_last_insn ();
1365 rtx xop0 = op0;
1366 rtx xtarget = target;
1367 rtx xspec_target = spec_target;
1368 rtx xspec_target_subreg = spec_target_subreg;
1369 rtx pat;
1370 enum machine_mode maxmode = mode_for_extraction (EP_extzv, 0);
1371
1372 if (MEM_P (xop0))
1373 {
1374 int save_volatile_ok = volatile_ok;
1375 volatile_ok = 1;
1376
1377 /* Is the memory operand acceptable? */
1378 if (! ((*insn_data[(int) CODE_FOR_extzv].operand[1].predicate)
1379 (xop0, GET_MODE (xop0))))
1380 {
1381 /* No, load into a reg and extract from there. */
1382 enum machine_mode bestmode;
1383
1384 /* Get the mode to use for inserting into this field. If
1385 OP0 is BLKmode, get the smallest mode consistent with the
1386 alignment. If OP0 is a non-BLKmode object that is no
1387 wider than MAXMODE, use its mode. Otherwise, use the
1388 smallest mode containing the field. */
1389
1390 if (GET_MODE (xop0) == BLKmode
1391 || (GET_MODE_SIZE (GET_MODE (op0))
1392 > GET_MODE_SIZE (maxmode)))
1393 bestmode = get_best_mode (bitsize, bitnum,
1394 MEM_ALIGN (xop0), maxmode,
1395 MEM_VOLATILE_P (xop0));
1396 else
1397 bestmode = GET_MODE (xop0);
1398
1399 if (bestmode == VOIDmode
1400 || (SLOW_UNALIGNED_ACCESS (bestmode, MEM_ALIGN (xop0))
1401 && GET_MODE_BITSIZE (bestmode) > MEM_ALIGN (xop0)))
1402 goto extzv_loses;
1403
1404 /* Compute offset as multiple of this unit,
1405 counting in bytes. */
1406 unit = GET_MODE_BITSIZE (bestmode);
1407 xoffset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
1408 xbitpos = bitnum % unit;
1409 xop0 = adjust_address (xop0, bestmode, xoffset);
1410
1411 /* Make sure register is big enough for the whole field. */
1412 if (xoffset * BITS_PER_UNIT + unit
1413 < offset * BITS_PER_UNIT + bitsize)
1414 goto extzv_loses;
1415
1416 /* Fetch it to a register in that size. */
1417 xop0 = force_reg (bestmode, xop0);
1418
1419 /* XBITPOS counts within UNIT, which is what is expected. */
1420 }
1421 else
1422 /* Get ref to first byte containing part of the field. */
1423 xop0 = adjust_address (xop0, byte_mode, xoffset);
1424
1425 volatile_ok = save_volatile_ok;
1426 }
1427
1428 /* If op0 is a register, we need it in MAXMODE (which is usually
1429 SImode). to make it acceptable to the format of extzv. */
1430 if (GET_CODE (xop0) == SUBREG && GET_MODE (xop0) != maxmode)
1431 goto extzv_loses;
1432 if (REG_P (xop0) && GET_MODE (xop0) != maxmode)
1433 xop0 = gen_rtx_SUBREG (maxmode, xop0, 0);
1434
1435 /* On big-endian machines, we count bits from the most significant.
1436 If the bit field insn does not, we must invert. */
1437 if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1438 xbitpos = unit - bitsize - xbitpos;
1439
1440 /* Now convert from counting within UNIT to counting in MAXMODE. */
1441 if (BITS_BIG_ENDIAN && !MEM_P (xop0))
1442 xbitpos += GET_MODE_BITSIZE (maxmode) - unit;
1443
1444 unit = GET_MODE_BITSIZE (maxmode);
1445
1446 if (xtarget == 0)
1447 xtarget = xspec_target = gen_reg_rtx (tmode);
1448
1449 if (GET_MODE (xtarget) != maxmode)
1450 {
1451 if (REG_P (xtarget))
1452 {
1453 int wider = (GET_MODE_SIZE (maxmode)
1454 > GET_MODE_SIZE (GET_MODE (xtarget)));
1455 xtarget = gen_lowpart (maxmode, xtarget);
1456 if (wider)
1457 xspec_target_subreg = xtarget;
1458 }
1459 else
1460 xtarget = gen_reg_rtx (maxmode);
1461 }
1462
1463 /* If this machine's extzv insists on a register target,
1464 make sure we have one. */
1465 if (! ((*insn_data[(int) CODE_FOR_extzv].operand[0].predicate)
1466 (xtarget, maxmode)))
1467 xtarget = gen_reg_rtx (maxmode);
1468
1469 bitsize_rtx = GEN_INT (bitsize);
1470 bitpos_rtx = GEN_INT (xbitpos);
1471
1472 pat = gen_extzv (xtarget, xop0, bitsize_rtx, bitpos_rtx);
1473 if (pat)
1474 {
1475 emit_insn (pat);
1476 target = xtarget;
1477 spec_target = xspec_target;
1478 spec_target_subreg = xspec_target_subreg;
1479 }
1480 else
1481 {
1482 delete_insns_since (last);
1483 target = extract_fixed_bit_field (int_mode, op0, offset, bitsize,
1484 bitpos, target, 1);
1485 }
1486 }
1487 else
1488 extzv_loses:
1489 target = extract_fixed_bit_field (int_mode, op0, offset, bitsize,
1490 bitpos, target, 1);
1491 }
1492 else
1493 {
1494 if (HAVE_extv
1495 && bitsize > 0
1496 && GET_MODE_BITSIZE (extv_mode) >= bitsize
1497 && ! ((REG_P (op0) || GET_CODE (op0) == SUBREG)
1498 && (bitsize + bitpos > GET_MODE_BITSIZE (extv_mode))))
1499 {
1500 int xbitpos = bitpos, xoffset = offset;
1501 rtx bitsize_rtx, bitpos_rtx;
1502 rtx last = get_last_insn ();
1503 rtx xop0 = op0, xtarget = target;
1504 rtx xspec_target = spec_target;
1505 rtx xspec_target_subreg = spec_target_subreg;
1506 rtx pat;
1507 enum machine_mode maxmode = mode_for_extraction (EP_extv, 0);
1508
1509 if (MEM_P (xop0))
1510 {
1511 /* Is the memory operand acceptable? */
1512 if (! ((*insn_data[(int) CODE_FOR_extv].operand[1].predicate)
1513 (xop0, GET_MODE (xop0))))
1514 {
1515 /* No, load into a reg and extract from there. */
1516 enum machine_mode bestmode;
1517
1518 /* Get the mode to use for inserting into this field. If
1519 OP0 is BLKmode, get the smallest mode consistent with the
1520 alignment. If OP0 is a non-BLKmode object that is no
1521 wider than MAXMODE, use its mode. Otherwise, use the
1522 smallest mode containing the field. */
1523
1524 if (GET_MODE (xop0) == BLKmode
1525 || (GET_MODE_SIZE (GET_MODE (op0))
1526 > GET_MODE_SIZE (maxmode)))
1527 bestmode = get_best_mode (bitsize, bitnum,
1528 MEM_ALIGN (xop0), maxmode,
1529 MEM_VOLATILE_P (xop0));
1530 else
1531 bestmode = GET_MODE (xop0);
1532
1533 if (bestmode == VOIDmode
1534 || (SLOW_UNALIGNED_ACCESS (bestmode, MEM_ALIGN (xop0))
1535 && GET_MODE_BITSIZE (bestmode) > MEM_ALIGN (xop0)))
1536 goto extv_loses;
1537
1538 /* Compute offset as multiple of this unit,
1539 counting in bytes. */
1540 unit = GET_MODE_BITSIZE (bestmode);
1541 xoffset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
1542 xbitpos = bitnum % unit;
1543 xop0 = adjust_address (xop0, bestmode, xoffset);
1544
1545 /* Make sure register is big enough for the whole field. */
1546 if (xoffset * BITS_PER_UNIT + unit
1547 < offset * BITS_PER_UNIT + bitsize)
1548 goto extv_loses;
1549
1550 /* Fetch it to a register in that size. */
1551 xop0 = force_reg (bestmode, xop0);
1552
1553 /* XBITPOS counts within UNIT, which is what is expected. */
1554 }
1555 else
1556 /* Get ref to first byte containing part of the field. */
1557 xop0 = adjust_address (xop0, byte_mode, xoffset);
1558 }
1559
1560 /* If op0 is a register, we need it in MAXMODE (which is usually
1561 SImode) to make it acceptable to the format of extv. */
1562 if (GET_CODE (xop0) == SUBREG && GET_MODE (xop0) != maxmode)
1563 goto extv_loses;
1564 if (REG_P (xop0) && GET_MODE (xop0) != maxmode)
1565 xop0 = gen_rtx_SUBREG (maxmode, xop0, 0);
1566
1567 /* On big-endian machines, we count bits from the most significant.
1568 If the bit field insn does not, we must invert. */
1569 if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1570 xbitpos = unit - bitsize - xbitpos;
1571
1572 /* XBITPOS counts within a size of UNIT.
1573 Adjust to count within a size of MAXMODE. */
1574 if (BITS_BIG_ENDIAN && !MEM_P (xop0))
1575 xbitpos += (GET_MODE_BITSIZE (maxmode) - unit);
1576
1577 unit = GET_MODE_BITSIZE (maxmode);
1578
1579 if (xtarget == 0)
1580 xtarget = xspec_target = gen_reg_rtx (tmode);
1581
1582 if (GET_MODE (xtarget) != maxmode)
1583 {
1584 if (REG_P (xtarget))
1585 {
1586 int wider = (GET_MODE_SIZE (maxmode)
1587 > GET_MODE_SIZE (GET_MODE (xtarget)));
1588 xtarget = gen_lowpart (maxmode, xtarget);
1589 if (wider)
1590 xspec_target_subreg = xtarget;
1591 }
1592 else
1593 xtarget = gen_reg_rtx (maxmode);
1594 }
1595
1596 /* If this machine's extv insists on a register target,
1597 make sure we have one. */
1598 if (! ((*insn_data[(int) CODE_FOR_extv].operand[0].predicate)
1599 (xtarget, maxmode)))
1600 xtarget = gen_reg_rtx (maxmode);
1601
1602 bitsize_rtx = GEN_INT (bitsize);
1603 bitpos_rtx = GEN_INT (xbitpos);
1604
1605 pat = gen_extv (xtarget, xop0, bitsize_rtx, bitpos_rtx);
1606 if (pat)
1607 {
1608 emit_insn (pat);
1609 target = xtarget;
1610 spec_target = xspec_target;
1611 spec_target_subreg = xspec_target_subreg;
1612 }
1613 else
1614 {
1615 delete_insns_since (last);
1616 target = extract_fixed_bit_field (int_mode, op0, offset, bitsize,
1617 bitpos, target, 0);
1618 }
1619 }
1620 else
1621 extv_loses:
1622 target = extract_fixed_bit_field (int_mode, op0, offset, bitsize,
1623 bitpos, target, 0);
1624 }
1625 if (target == spec_target)
1626 return target;
1627 if (target == spec_target_subreg)
1628 return spec_target;
1629 if (GET_MODE (target) != tmode && GET_MODE (target) != mode)
1630 {
1631 /* If the target mode is not a scalar integral, first convert to the
1632 integer mode of that size and then access it as a floating-point
1633 value via a SUBREG. */
1634 if (!SCALAR_INT_MODE_P (tmode))
1635 {
1636 enum machine_mode smode
1637 = mode_for_size (GET_MODE_BITSIZE (tmode), MODE_INT, 0);
1638 target = convert_to_mode (smode, target, unsignedp);
1639 target = force_reg (smode, target);
1640 return gen_lowpart (tmode, target);
1641 }
1642
1643 return convert_to_mode (tmode, target, unsignedp);
1644 }
1645 return target;
1646 }
1647 \f
1648 /* Extract a bit field using shifts and boolean operations
1649 Returns an rtx to represent the value.
1650 OP0 addresses a register (word) or memory (byte).
1651 BITPOS says which bit within the word or byte the bit field starts in.
1652 OFFSET says how many bytes farther the bit field starts;
1653 it is 0 if OP0 is a register.
1654 BITSIZE says how many bits long the bit field is.
1655 (If OP0 is a register, it may be narrower than a full word,
1656 but BITPOS still counts within a full word,
1657 which is significant on bigendian machines.)
1658
1659 UNSIGNEDP is nonzero for an unsigned bit field (don't sign-extend value).
1660 If TARGET is nonzero, attempts to store the value there
1661 and return TARGET, but this is not guaranteed.
1662 If TARGET is not used, create a pseudo-reg of mode TMODE for the value. */
1663
1664 static rtx
1665 extract_fixed_bit_field (enum machine_mode tmode, rtx op0,
1666 unsigned HOST_WIDE_INT offset,
1667 unsigned HOST_WIDE_INT bitsize,
1668 unsigned HOST_WIDE_INT bitpos, rtx target,
1669 int unsignedp)
1670 {
1671 unsigned int total_bits = BITS_PER_WORD;
1672 enum machine_mode mode;
1673
1674 if (GET_CODE (op0) == SUBREG || REG_P (op0))
1675 {
1676 /* Special treatment for a bit field split across two registers. */
1677 if (bitsize + bitpos > BITS_PER_WORD)
1678 return extract_split_bit_field (op0, bitsize, bitpos, unsignedp);
1679 }
1680 else
1681 {
1682 /* Get the proper mode to use for this field. We want a mode that
1683 includes the entire field. If such a mode would be larger than
1684 a word, we won't be doing the extraction the normal way. */
1685
1686 mode = get_best_mode (bitsize, bitpos + offset * BITS_PER_UNIT,
1687 MEM_ALIGN (op0), word_mode, MEM_VOLATILE_P (op0));
1688
1689 if (mode == VOIDmode)
1690 /* The only way this should occur is if the field spans word
1691 boundaries. */
1692 return extract_split_bit_field (op0, bitsize,
1693 bitpos + offset * BITS_PER_UNIT,
1694 unsignedp);
1695
1696 total_bits = GET_MODE_BITSIZE (mode);
1697
1698 /* Make sure bitpos is valid for the chosen mode. Adjust BITPOS to
1699 be in the range 0 to total_bits-1, and put any excess bytes in
1700 OFFSET. */
1701 if (bitpos >= total_bits)
1702 {
1703 offset += (bitpos / total_bits) * (total_bits / BITS_PER_UNIT);
1704 bitpos -= ((bitpos / total_bits) * (total_bits / BITS_PER_UNIT)
1705 * BITS_PER_UNIT);
1706 }
1707
1708 /* Get ref to an aligned byte, halfword, or word containing the field.
1709 Adjust BITPOS to be position within a word,
1710 and OFFSET to be the offset of that word.
1711 Then alter OP0 to refer to that word. */
1712 bitpos += (offset % (total_bits / BITS_PER_UNIT)) * BITS_PER_UNIT;
1713 offset -= (offset % (total_bits / BITS_PER_UNIT));
1714 op0 = adjust_address (op0, mode, offset);
1715 }
1716
1717 mode = GET_MODE (op0);
1718
1719 if (BYTES_BIG_ENDIAN)
1720 /* BITPOS is the distance between our msb and that of OP0.
1721 Convert it to the distance from the lsb. */
1722 bitpos = total_bits - bitsize - bitpos;
1723
1724 /* Now BITPOS is always the distance between the field's lsb and that of OP0.
1725 We have reduced the big-endian case to the little-endian case. */
1726
1727 if (unsignedp)
1728 {
1729 if (bitpos)
1730 {
1731 /* If the field does not already start at the lsb,
1732 shift it so it does. */
1733 tree amount = build_int_cst (NULL_TREE, bitpos);
1734 /* Maybe propagate the target for the shift. */
1735 /* But not if we will return it--could confuse integrate.c. */
1736 rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
1737 if (tmode != mode) subtarget = 0;
1738 op0 = expand_shift (RSHIFT_EXPR, mode, op0, amount, subtarget, 1);
1739 }
1740 /* Convert the value to the desired mode. */
1741 if (mode != tmode)
1742 op0 = convert_to_mode (tmode, op0, 1);
1743
1744 /* Unless the msb of the field used to be the msb when we shifted,
1745 mask out the upper bits. */
1746
1747 if (GET_MODE_BITSIZE (mode) != bitpos + bitsize)
1748 return expand_binop (GET_MODE (op0), and_optab, op0,
1749 mask_rtx (GET_MODE (op0), 0, bitsize, 0),
1750 target, 1, OPTAB_LIB_WIDEN);
1751 return op0;
1752 }
1753
1754 /* To extract a signed bit-field, first shift its msb to the msb of the word,
1755 then arithmetic-shift its lsb to the lsb of the word. */
1756 op0 = force_reg (mode, op0);
1757 if (mode != tmode)
1758 target = 0;
1759
1760 /* Find the narrowest integer mode that contains the field. */
1761
1762 for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT); mode != VOIDmode;
1763 mode = GET_MODE_WIDER_MODE (mode))
1764 if (GET_MODE_BITSIZE (mode) >= bitsize + bitpos)
1765 {
1766 op0 = convert_to_mode (mode, op0, 0);
1767 break;
1768 }
1769
1770 if (GET_MODE_BITSIZE (mode) != (bitsize + bitpos))
1771 {
1772 tree amount
1773 = build_int_cst (NULL_TREE,
1774 GET_MODE_BITSIZE (mode) - (bitsize + bitpos));
1775 /* Maybe propagate the target for the shift. */
1776 rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
1777 op0 = expand_shift (LSHIFT_EXPR, mode, op0, amount, subtarget, 1);
1778 }
1779
1780 return expand_shift (RSHIFT_EXPR, mode, op0,
1781 build_int_cst (NULL_TREE,
1782 GET_MODE_BITSIZE (mode) - bitsize),
1783 target, 0);
1784 }
1785 \f
1786 /* Return a constant integer (CONST_INT or CONST_DOUBLE) mask value
1787 of mode MODE with BITSIZE ones followed by BITPOS zeros, or the
1788 complement of that if COMPLEMENT. The mask is truncated if
1789 necessary to the width of mode MODE. The mask is zero-extended if
1790 BITSIZE+BITPOS is too small for MODE. */
1791
1792 static rtx
1793 mask_rtx (enum machine_mode mode, int bitpos, int bitsize, int complement)
1794 {
1795 HOST_WIDE_INT masklow, maskhigh;
1796
1797 if (bitsize == 0)
1798 masklow = 0;
1799 else if (bitpos < HOST_BITS_PER_WIDE_INT)
1800 masklow = (HOST_WIDE_INT) -1 << bitpos;
1801 else
1802 masklow = 0;
1803
1804 if (bitpos + bitsize < HOST_BITS_PER_WIDE_INT)
1805 masklow &= ((unsigned HOST_WIDE_INT) -1
1806 >> (HOST_BITS_PER_WIDE_INT - bitpos - bitsize));
1807
1808 if (bitpos <= HOST_BITS_PER_WIDE_INT)
1809 maskhigh = -1;
1810 else
1811 maskhigh = (HOST_WIDE_INT) -1 << (bitpos - HOST_BITS_PER_WIDE_INT);
1812
1813 if (bitsize == 0)
1814 maskhigh = 0;
1815 else if (bitpos + bitsize > HOST_BITS_PER_WIDE_INT)
1816 maskhigh &= ((unsigned HOST_WIDE_INT) -1
1817 >> (2 * HOST_BITS_PER_WIDE_INT - bitpos - bitsize));
1818 else
1819 maskhigh = 0;
1820
1821 if (complement)
1822 {
1823 maskhigh = ~maskhigh;
1824 masklow = ~masklow;
1825 }
1826
1827 return immed_double_const (masklow, maskhigh, mode);
1828 }
1829
1830 /* Return a constant integer (CONST_INT or CONST_DOUBLE) rtx with the value
1831 VALUE truncated to BITSIZE bits and then shifted left BITPOS bits. */
1832
1833 static rtx
1834 lshift_value (enum machine_mode mode, rtx value, int bitpos, int bitsize)
1835 {
1836 unsigned HOST_WIDE_INT v = INTVAL (value);
1837 HOST_WIDE_INT low, high;
1838
1839 if (bitsize < HOST_BITS_PER_WIDE_INT)
1840 v &= ~((HOST_WIDE_INT) -1 << bitsize);
1841
1842 if (bitpos < HOST_BITS_PER_WIDE_INT)
1843 {
1844 low = v << bitpos;
1845 high = (bitpos > 0 ? (v >> (HOST_BITS_PER_WIDE_INT - bitpos)) : 0);
1846 }
1847 else
1848 {
1849 low = 0;
1850 high = v << (bitpos - HOST_BITS_PER_WIDE_INT);
1851 }
1852
1853 return immed_double_const (low, high, mode);
1854 }
1855 \f
1856 /* Extract a bit field from a memory by forcing the alignment of the
1857 memory. This efficient only if the field spans at least 4 boundaries.
1858
1859 OP0 is the MEM.
1860 BITSIZE is the field width; BITPOS is the position of the first bit.
1861 UNSIGNEDP is true if the result should be zero-extended. */
1862
1863 static rtx
1864 extract_force_align_mem_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1865 unsigned HOST_WIDE_INT bitpos,
1866 int unsignedp)
1867 {
1868 enum machine_mode mode, dmode;
1869 unsigned int m_bitsize, m_size;
1870 unsigned int sign_shift_up, sign_shift_dn;
1871 rtx base, a1, a2, v1, v2, comb, shift, result, start;
1872
1873 /* Choose a mode that will fit BITSIZE. */
1874 mode = smallest_mode_for_size (bitsize, MODE_INT);
1875 m_size = GET_MODE_SIZE (mode);
1876 m_bitsize = GET_MODE_BITSIZE (mode);
1877
1878 /* Choose a mode twice as wide. Fail if no such mode exists. */
1879 dmode = mode_for_size (m_bitsize * 2, MODE_INT, false);
1880 if (dmode == BLKmode)
1881 return NULL;
1882
1883 do_pending_stack_adjust ();
1884 start = get_last_insn ();
1885
1886 /* At the end, we'll need an additional shift to deal with sign/zero
1887 extension. By default this will be a left+right shift of the
1888 appropriate size. But we may be able to eliminate one of them. */
1889 sign_shift_up = sign_shift_dn = m_bitsize - bitsize;
1890
1891 if (STRICT_ALIGNMENT)
1892 {
1893 base = plus_constant (XEXP (op0, 0), bitpos / BITS_PER_UNIT);
1894 bitpos %= BITS_PER_UNIT;
1895
1896 /* We load two values to be concatenate. There's an edge condition
1897 that bears notice -- an aligned value at the end of a page can
1898 only load one value lest we segfault. So the two values we load
1899 are at "base & -size" and "(base + size - 1) & -size". If base
1900 is unaligned, the addresses will be aligned and sequential; if
1901 base is aligned, the addresses will both be equal to base. */
1902
1903 a1 = expand_simple_binop (Pmode, AND, force_operand (base, NULL),
1904 GEN_INT (-(HOST_WIDE_INT)m_size),
1905 NULL, true, OPTAB_LIB_WIDEN);
1906 mark_reg_pointer (a1, m_bitsize);
1907 v1 = gen_rtx_MEM (mode, a1);
1908 set_mem_align (v1, m_bitsize);
1909 v1 = force_reg (mode, validize_mem (v1));
1910
1911 a2 = plus_constant (base, GET_MODE_SIZE (mode) - 1);
1912 a2 = expand_simple_binop (Pmode, AND, force_operand (a2, NULL),
1913 GEN_INT (-(HOST_WIDE_INT)m_size),
1914 NULL, true, OPTAB_LIB_WIDEN);
1915 v2 = gen_rtx_MEM (mode, a2);
1916 set_mem_align (v2, m_bitsize);
1917 v2 = force_reg (mode, validize_mem (v2));
1918
1919 /* Combine these two values into a double-word value. */
1920 if (m_bitsize == BITS_PER_WORD)
1921 {
1922 comb = gen_reg_rtx (dmode);
1923 emit_insn (gen_rtx_CLOBBER (VOIDmode, comb));
1924 emit_move_insn (gen_rtx_SUBREG (mode, comb, 0), v1);
1925 emit_move_insn (gen_rtx_SUBREG (mode, comb, m_size), v2);
1926 }
1927 else
1928 {
1929 if (BYTES_BIG_ENDIAN)
1930 comb = v1, v1 = v2, v2 = comb;
1931 v1 = convert_modes (dmode, mode, v1, true);
1932 if (v1 == NULL)
1933 goto fail;
1934 v2 = convert_modes (dmode, mode, v2, true);
1935 v2 = expand_simple_binop (dmode, ASHIFT, v2, GEN_INT (m_bitsize),
1936 NULL, true, OPTAB_LIB_WIDEN);
1937 if (v2 == NULL)
1938 goto fail;
1939 comb = expand_simple_binop (dmode, IOR, v1, v2, NULL,
1940 true, OPTAB_LIB_WIDEN);
1941 if (comb == NULL)
1942 goto fail;
1943 }
1944
1945 shift = expand_simple_binop (Pmode, AND, base, GEN_INT (m_size - 1),
1946 NULL, true, OPTAB_LIB_WIDEN);
1947 shift = expand_mult (Pmode, shift, GEN_INT (BITS_PER_UNIT), NULL, 1);
1948
1949 if (bitpos != 0)
1950 {
1951 if (sign_shift_up <= bitpos)
1952 bitpos -= sign_shift_up, sign_shift_up = 0;
1953 shift = expand_simple_binop (Pmode, PLUS, shift, GEN_INT (bitpos),
1954 NULL, true, OPTAB_LIB_WIDEN);
1955 }
1956 }
1957 else
1958 {
1959 unsigned HOST_WIDE_INT offset = bitpos / BITS_PER_UNIT;
1960 bitpos %= BITS_PER_UNIT;
1961
1962 /* When strict alignment is not required, we can just load directly
1963 from memory without masking. If the remaining BITPOS offset is
1964 small enough, we may be able to do all operations in MODE as
1965 opposed to DMODE. */
1966 if (bitpos + bitsize <= m_bitsize)
1967 dmode = mode;
1968 comb = adjust_address (op0, dmode, offset);
1969
1970 if (sign_shift_up <= bitpos)
1971 bitpos -= sign_shift_up, sign_shift_up = 0;
1972 shift = GEN_INT (bitpos);
1973 }
1974
1975 /* Shift down the double-word such that the requested value is at bit 0. */
1976 if (shift != const0_rtx)
1977 comb = expand_simple_binop (dmode, unsignedp ? LSHIFTRT : ASHIFTRT,
1978 comb, shift, NULL, unsignedp, OPTAB_LIB_WIDEN);
1979 if (comb == NULL)
1980 goto fail;
1981
1982 /* If the field exactly matches MODE, then all we need to do is return the
1983 lowpart. Otherwise, shift to get the sign bits set properly. */
1984 result = force_reg (mode, gen_lowpart (mode, comb));
1985
1986 if (sign_shift_up)
1987 result = expand_simple_binop (mode, ASHIFT, result,
1988 GEN_INT (sign_shift_up),
1989 NULL_RTX, 0, OPTAB_LIB_WIDEN);
1990 if (sign_shift_dn)
1991 result = expand_simple_binop (mode, unsignedp ? LSHIFTRT : ASHIFTRT,
1992 result, GEN_INT (sign_shift_dn),
1993 NULL_RTX, 0, OPTAB_LIB_WIDEN);
1994
1995 return result;
1996
1997 fail:
1998 delete_insns_since (start);
1999 return NULL;
2000 }
2001
2002 /* Extract a bit field that is split across two words
2003 and return an RTX for the result.
2004
2005 OP0 is the REG, SUBREG or MEM rtx for the first of the two words.
2006 BITSIZE is the field width; BITPOS, position of its first bit, in the word.
2007 UNSIGNEDP is 1 if should zero-extend the contents; else sign-extend. */
2008
2009 static rtx
2010 extract_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
2011 unsigned HOST_WIDE_INT bitpos, int unsignedp)
2012 {
2013 unsigned int unit;
2014 unsigned int bitsdone = 0;
2015 rtx result = NULL_RTX;
2016 int first = 1;
2017
2018 /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
2019 much at a time. */
2020 if (REG_P (op0) || GET_CODE (op0) == SUBREG)
2021 unit = BITS_PER_WORD;
2022 else
2023 {
2024 unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
2025 if (0 && bitsize / unit > 2)
2026 {
2027 rtx tmp = extract_force_align_mem_bit_field (op0, bitsize, bitpos,
2028 unsignedp);
2029 if (tmp)
2030 return tmp;
2031 }
2032 }
2033
2034 while (bitsdone < bitsize)
2035 {
2036 unsigned HOST_WIDE_INT thissize;
2037 rtx part, word;
2038 unsigned HOST_WIDE_INT thispos;
2039 unsigned HOST_WIDE_INT offset;
2040
2041 offset = (bitpos + bitsdone) / unit;
2042 thispos = (bitpos + bitsdone) % unit;
2043
2044 /* THISSIZE must not overrun a word boundary. Otherwise,
2045 extract_fixed_bit_field will call us again, and we will mutually
2046 recurse forever. */
2047 thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
2048 thissize = MIN (thissize, unit - thispos);
2049
2050 /* If OP0 is a register, then handle OFFSET here.
2051
2052 When handling multiword bitfields, extract_bit_field may pass
2053 down a word_mode SUBREG of a larger REG for a bitfield that actually
2054 crosses a word boundary. Thus, for a SUBREG, we must find
2055 the current word starting from the base register. */
2056 if (GET_CODE (op0) == SUBREG)
2057 {
2058 int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset;
2059 word = operand_subword_force (SUBREG_REG (op0), word_offset,
2060 GET_MODE (SUBREG_REG (op0)));
2061 offset = 0;
2062 }
2063 else if (REG_P (op0))
2064 {
2065 word = operand_subword_force (op0, offset, GET_MODE (op0));
2066 offset = 0;
2067 }
2068 else
2069 word = op0;
2070
2071 /* Extract the parts in bit-counting order,
2072 whose meaning is determined by BYTES_PER_UNIT.
2073 OFFSET is in UNITs, and UNIT is in bits.
2074 extract_fixed_bit_field wants offset in bytes. */
2075 part = extract_fixed_bit_field (word_mode, word,
2076 offset * unit / BITS_PER_UNIT,
2077 thissize, thispos, 0, 1);
2078 bitsdone += thissize;
2079
2080 /* Shift this part into place for the result. */
2081 if (BYTES_BIG_ENDIAN)
2082 {
2083 if (bitsize != bitsdone)
2084 part = expand_shift (LSHIFT_EXPR, word_mode, part,
2085 build_int_cst (NULL_TREE, bitsize - bitsdone),
2086 0, 1);
2087 }
2088 else
2089 {
2090 if (bitsdone != thissize)
2091 part = expand_shift (LSHIFT_EXPR, word_mode, part,
2092 build_int_cst (NULL_TREE,
2093 bitsdone - thissize), 0, 1);
2094 }
2095
2096 if (first)
2097 result = part;
2098 else
2099 /* Combine the parts with bitwise or. This works
2100 because we extracted each part as an unsigned bit field. */
2101 result = expand_binop (word_mode, ior_optab, part, result, NULL_RTX, 1,
2102 OPTAB_LIB_WIDEN);
2103
2104 first = 0;
2105 }
2106
2107 /* Unsigned bit field: we are done. */
2108 if (unsignedp)
2109 return result;
2110 /* Signed bit field: sign-extend with two arithmetic shifts. */
2111 result = expand_shift (LSHIFT_EXPR, word_mode, result,
2112 build_int_cst (NULL_TREE, BITS_PER_WORD - bitsize),
2113 NULL_RTX, 0);
2114 return expand_shift (RSHIFT_EXPR, word_mode, result,
2115 build_int_cst (NULL_TREE, BITS_PER_WORD - bitsize),
2116 NULL_RTX, 0);
2117 }
2118 \f
2119 /* Add INC into TARGET. */
2120
2121 void
2122 expand_inc (rtx target, rtx inc)
2123 {
2124 rtx value = expand_binop (GET_MODE (target), add_optab,
2125 target, inc,
2126 target, 0, OPTAB_LIB_WIDEN);
2127 if (value != target)
2128 emit_move_insn (target, value);
2129 }
2130
2131 /* Subtract DEC from TARGET. */
2132
2133 void
2134 expand_dec (rtx target, rtx dec)
2135 {
2136 rtx value = expand_binop (GET_MODE (target), sub_optab,
2137 target, dec,
2138 target, 0, OPTAB_LIB_WIDEN);
2139 if (value != target)
2140 emit_move_insn (target, value);
2141 }
2142 \f
2143 /* Output a shift instruction for expression code CODE,
2144 with SHIFTED being the rtx for the value to shift,
2145 and AMOUNT the tree for the amount to shift by.
2146 Store the result in the rtx TARGET, if that is convenient.
2147 If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2148 Return the rtx for where the value is. */
2149
2150 rtx
2151 expand_shift (enum tree_code code, enum machine_mode mode, rtx shifted,
2152 tree amount, rtx target, int unsignedp)
2153 {
2154 rtx op1, temp = 0;
2155 int left = (code == LSHIFT_EXPR || code == LROTATE_EXPR);
2156 int rotate = (code == LROTATE_EXPR || code == RROTATE_EXPR);
2157 int try;
2158
2159 /* Previously detected shift-counts computed by NEGATE_EXPR
2160 and shifted in the other direction; but that does not work
2161 on all machines. */
2162
2163 op1 = expand_expr (amount, NULL_RTX, VOIDmode, 0);
2164
2165 if (SHIFT_COUNT_TRUNCATED)
2166 {
2167 if (GET_CODE (op1) == CONST_INT
2168 && ((unsigned HOST_WIDE_INT) INTVAL (op1) >=
2169 (unsigned HOST_WIDE_INT) GET_MODE_BITSIZE (mode)))
2170 op1 = GEN_INT ((unsigned HOST_WIDE_INT) INTVAL (op1)
2171 % GET_MODE_BITSIZE (mode));
2172 else if (GET_CODE (op1) == SUBREG
2173 && subreg_lowpart_p (op1))
2174 op1 = SUBREG_REG (op1);
2175 }
2176
2177 if (op1 == const0_rtx)
2178 return shifted;
2179
2180 /* Check whether its cheaper to implement a left shift by a constant
2181 bit count by a sequence of additions. */
2182 if (code == LSHIFT_EXPR
2183 && GET_CODE (op1) == CONST_INT
2184 && INTVAL (op1) > 0
2185 && INTVAL (op1) < GET_MODE_BITSIZE (mode)
2186 && shift_cost[mode][INTVAL (op1)] > INTVAL (op1) * add_cost[mode])
2187 {
2188 int i;
2189 for (i = 0; i < INTVAL (op1); i++)
2190 {
2191 temp = force_reg (mode, shifted);
2192 shifted = expand_binop (mode, add_optab, temp, temp, NULL_RTX,
2193 unsignedp, OPTAB_LIB_WIDEN);
2194 }
2195 return shifted;
2196 }
2197
2198 for (try = 0; temp == 0 && try < 3; try++)
2199 {
2200 enum optab_methods methods;
2201
2202 if (try == 0)
2203 methods = OPTAB_DIRECT;
2204 else if (try == 1)
2205 methods = OPTAB_WIDEN;
2206 else
2207 methods = OPTAB_LIB_WIDEN;
2208
2209 if (rotate)
2210 {
2211 /* Widening does not work for rotation. */
2212 if (methods == OPTAB_WIDEN)
2213 continue;
2214 else if (methods == OPTAB_LIB_WIDEN)
2215 {
2216 /* If we have been unable to open-code this by a rotation,
2217 do it as the IOR of two shifts. I.e., to rotate A
2218 by N bits, compute (A << N) | ((unsigned) A >> (C - N))
2219 where C is the bitsize of A.
2220
2221 It is theoretically possible that the target machine might
2222 not be able to perform either shift and hence we would
2223 be making two libcalls rather than just the one for the
2224 shift (similarly if IOR could not be done). We will allow
2225 this extremely unlikely lossage to avoid complicating the
2226 code below. */
2227
2228 rtx subtarget = target == shifted ? 0 : target;
2229 rtx temp1;
2230 tree type = TREE_TYPE (amount);
2231 tree new_amount = make_tree (type, op1);
2232 tree other_amount
2233 = fold_build2 (MINUS_EXPR, type,
2234 build_int_cst (type, GET_MODE_BITSIZE (mode)),
2235 amount);
2236
2237 shifted = force_reg (mode, shifted);
2238
2239 temp = expand_shift (left ? LSHIFT_EXPR : RSHIFT_EXPR,
2240 mode, shifted, new_amount, 0, 1);
2241 temp1 = expand_shift (left ? RSHIFT_EXPR : LSHIFT_EXPR,
2242 mode, shifted, other_amount, subtarget, 1);
2243 return expand_binop (mode, ior_optab, temp, temp1, target,
2244 unsignedp, methods);
2245 }
2246
2247 temp = expand_binop (mode,
2248 left ? rotl_optab : rotr_optab,
2249 shifted, op1, target, unsignedp, methods);
2250 }
2251 else if (unsignedp)
2252 temp = expand_binop (mode,
2253 left ? ashl_optab : lshr_optab,
2254 shifted, op1, target, unsignedp, methods);
2255
2256 /* Do arithmetic shifts.
2257 Also, if we are going to widen the operand, we can just as well
2258 use an arithmetic right-shift instead of a logical one. */
2259 if (temp == 0 && ! rotate
2260 && (! unsignedp || (! left && methods == OPTAB_WIDEN)))
2261 {
2262 enum optab_methods methods1 = methods;
2263
2264 /* If trying to widen a log shift to an arithmetic shift,
2265 don't accept an arithmetic shift of the same size. */
2266 if (unsignedp)
2267 methods1 = OPTAB_MUST_WIDEN;
2268
2269 /* Arithmetic shift */
2270
2271 temp = expand_binop (mode,
2272 left ? ashl_optab : ashr_optab,
2273 shifted, op1, target, unsignedp, methods1);
2274 }
2275
2276 /* We used to try extzv here for logical right shifts, but that was
2277 only useful for one machine, the VAX, and caused poor code
2278 generation there for lshrdi3, so the code was deleted and a
2279 define_expand for lshrsi3 was added to vax.md. */
2280 }
2281
2282 gcc_assert (temp);
2283 return temp;
2284 }
2285 \f
2286 enum alg_code {
2287 alg_unknown,
2288 alg_zero,
2289 alg_m, alg_shift,
2290 alg_add_t_m2,
2291 alg_sub_t_m2,
2292 alg_add_factor,
2293 alg_sub_factor,
2294 alg_add_t2_m,
2295 alg_sub_t2_m,
2296 alg_impossible
2297 };
2298
2299 /* This structure holds the "cost" of a multiply sequence. The
2300 "cost" field holds the total rtx_cost of every operator in the
2301 synthetic multiplication sequence, hence cost(a op b) is defined
2302 as rtx_cost(op) + cost(a) + cost(b), where cost(leaf) is zero.
2303 The "latency" field holds the minimum possible latency of the
2304 synthetic multiply, on a hypothetical infinitely parallel CPU.
2305 This is the critical path, or the maximum height, of the expression
2306 tree which is the sum of rtx_costs on the most expensive path from
2307 any leaf to the root. Hence latency(a op b) is defined as zero for
2308 leaves and rtx_cost(op) + max(latency(a), latency(b)) otherwise. */
2309
2310 struct mult_cost {
2311 short cost; /* Total rtx_cost of the multiplication sequence. */
2312 short latency; /* The latency of the multiplication sequence. */
2313 };
2314
2315 /* This macro is used to compare a pointer to a mult_cost against an
2316 single integer "rtx_cost" value. This is equivalent to the macro
2317 CHEAPER_MULT_COST(X,Z) where Z = {Y,Y}. */
2318 #define MULT_COST_LESS(X,Y) ((X)->cost < (Y) \
2319 || ((X)->cost == (Y) && (X)->latency < (Y)))
2320
2321 /* This macro is used to compare two pointers to mult_costs against
2322 each other. The macro returns true if X is cheaper than Y.
2323 Currently, the cheaper of two mult_costs is the one with the
2324 lower "cost". If "cost"s are tied, the lower latency is cheaper. */
2325 #define CHEAPER_MULT_COST(X,Y) ((X)->cost < (Y)->cost \
2326 || ((X)->cost == (Y)->cost \
2327 && (X)->latency < (Y)->latency))
2328
2329 /* This structure records a sequence of operations.
2330 `ops' is the number of operations recorded.
2331 `cost' is their total cost.
2332 The operations are stored in `op' and the corresponding
2333 logarithms of the integer coefficients in `log'.
2334
2335 These are the operations:
2336 alg_zero total := 0;
2337 alg_m total := multiplicand;
2338 alg_shift total := total * coeff
2339 alg_add_t_m2 total := total + multiplicand * coeff;
2340 alg_sub_t_m2 total := total - multiplicand * coeff;
2341 alg_add_factor total := total * coeff + total;
2342 alg_sub_factor total := total * coeff - total;
2343 alg_add_t2_m total := total * coeff + multiplicand;
2344 alg_sub_t2_m total := total * coeff - multiplicand;
2345
2346 The first operand must be either alg_zero or alg_m. */
2347
2348 struct algorithm
2349 {
2350 struct mult_cost cost;
2351 short ops;
2352 /* The size of the OP and LOG fields are not directly related to the
2353 word size, but the worst-case algorithms will be if we have few
2354 consecutive ones or zeros, i.e., a multiplicand like 10101010101...
2355 In that case we will generate shift-by-2, add, shift-by-2, add,...,
2356 in total wordsize operations. */
2357 enum alg_code op[MAX_BITS_PER_WORD];
2358 char log[MAX_BITS_PER_WORD];
2359 };
2360
2361 /* The entry for our multiplication cache/hash table. */
2362 struct alg_hash_entry {
2363 /* The number we are multiplying by. */
2364 unsigned int t;
2365
2366 /* The mode in which we are multiplying something by T. */
2367 enum machine_mode mode;
2368
2369 /* The best multiplication algorithm for t. */
2370 enum alg_code alg;
2371
2372 /* The cost of multiplication if ALG_CODE is not alg_impossible.
2373 Otherwise, the cost within which multiplication by T is
2374 impossible. */
2375 struct mult_cost cost;
2376 };
2377
2378 /* The number of cache/hash entries. */
2379 #define NUM_ALG_HASH_ENTRIES 307
2380
2381 /* Each entry of ALG_HASH caches alg_code for some integer. This is
2382 actually a hash table. If we have a collision, that the older
2383 entry is kicked out. */
2384 static struct alg_hash_entry alg_hash[NUM_ALG_HASH_ENTRIES];
2385
2386 /* Indicates the type of fixup needed after a constant multiplication.
2387 BASIC_VARIANT means no fixup is needed, NEGATE_VARIANT means that
2388 the result should be negated, and ADD_VARIANT means that the
2389 multiplicand should be added to the result. */
2390 enum mult_variant {basic_variant, negate_variant, add_variant};
2391
2392 static void synth_mult (struct algorithm *, unsigned HOST_WIDE_INT,
2393 const struct mult_cost *, enum machine_mode mode);
2394 static bool choose_mult_variant (enum machine_mode, HOST_WIDE_INT,
2395 struct algorithm *, enum mult_variant *, int);
2396 static rtx expand_mult_const (enum machine_mode, rtx, HOST_WIDE_INT, rtx,
2397 const struct algorithm *, enum mult_variant);
2398 static unsigned HOST_WIDE_INT choose_multiplier (unsigned HOST_WIDE_INT, int,
2399 int, rtx *, int *, int *);
2400 static unsigned HOST_WIDE_INT invert_mod2n (unsigned HOST_WIDE_INT, int);
2401 static rtx extract_high_half (enum machine_mode, rtx);
2402 static rtx expand_mult_highpart (enum machine_mode, rtx, rtx, rtx, int, int);
2403 static rtx expand_mult_highpart_optab (enum machine_mode, rtx, rtx, rtx,
2404 int, int);
2405 /* Compute and return the best algorithm for multiplying by T.
2406 The algorithm must cost less than cost_limit
2407 If retval.cost >= COST_LIMIT, no algorithm was found and all
2408 other field of the returned struct are undefined.
2409 MODE is the machine mode of the multiplication. */
2410
2411 static void
2412 synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
2413 const struct mult_cost *cost_limit, enum machine_mode mode)
2414 {
2415 int m;
2416 struct algorithm *alg_in, *best_alg;
2417 struct mult_cost best_cost;
2418 struct mult_cost new_limit;
2419 int op_cost, op_latency;
2420 unsigned HOST_WIDE_INT q;
2421 int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode));
2422 int hash_index;
2423 bool cache_hit = false;
2424 enum alg_code cache_alg = alg_zero;
2425
2426 /* Indicate that no algorithm is yet found. If no algorithm
2427 is found, this value will be returned and indicate failure. */
2428 alg_out->cost.cost = cost_limit->cost + 1;
2429 alg_out->cost.latency = cost_limit->latency + 1;
2430
2431 if (cost_limit->cost < 0
2432 || (cost_limit->cost == 0 && cost_limit->latency <= 0))
2433 return;
2434
2435 /* Restrict the bits of "t" to the multiplication's mode. */
2436 t &= GET_MODE_MASK (mode);
2437
2438 /* t == 1 can be done in zero cost. */
2439 if (t == 1)
2440 {
2441 alg_out->ops = 1;
2442 alg_out->cost.cost = 0;
2443 alg_out->cost.latency = 0;
2444 alg_out->op[0] = alg_m;
2445 return;
2446 }
2447
2448 /* t == 0 sometimes has a cost. If it does and it exceeds our limit,
2449 fail now. */
2450 if (t == 0)
2451 {
2452 if (MULT_COST_LESS (cost_limit, zero_cost))
2453 return;
2454 else
2455 {
2456 alg_out->ops = 1;
2457 alg_out->cost.cost = zero_cost;
2458 alg_out->cost.latency = zero_cost;
2459 alg_out->op[0] = alg_zero;
2460 return;
2461 }
2462 }
2463
2464 /* We'll be needing a couple extra algorithm structures now. */
2465
2466 alg_in = alloca (sizeof (struct algorithm));
2467 best_alg = alloca (sizeof (struct algorithm));
2468 best_cost = *cost_limit;
2469
2470 /* Compute the hash index. */
2471 hash_index = (t ^ (unsigned int) mode) % NUM_ALG_HASH_ENTRIES;
2472
2473 /* See if we already know what to do for T. */
2474 if (alg_hash[hash_index].t == t
2475 && alg_hash[hash_index].mode == mode
2476 && alg_hash[hash_index].alg != alg_unknown)
2477 {
2478 cache_alg = alg_hash[hash_index].alg;
2479
2480 if (cache_alg == alg_impossible)
2481 {
2482 /* The cache tells us that it's impossible to synthesize
2483 multiplication by T within alg_hash[hash_index].cost. */
2484 if (!CHEAPER_MULT_COST (&alg_hash[hash_index].cost, cost_limit))
2485 /* COST_LIMIT is at least as restrictive as the one
2486 recorded in the hash table, in which case we have no
2487 hope of synthesizing a multiplication. Just
2488 return. */
2489 return;
2490
2491 /* If we get here, COST_LIMIT is less restrictive than the
2492 one recorded in the hash table, so we may be able to
2493 synthesize a multiplication. Proceed as if we didn't
2494 have the cache entry. */
2495 }
2496 else
2497 {
2498 if (CHEAPER_MULT_COST (cost_limit, &alg_hash[hash_index].cost))
2499 /* The cached algorithm shows that this multiplication
2500 requires more cost than COST_LIMIT. Just return. This
2501 way, we don't clobber this cache entry with
2502 alg_impossible but retain useful information. */
2503 return;
2504
2505 cache_hit = true;
2506
2507 switch (cache_alg)
2508 {
2509 case alg_shift:
2510 goto do_alg_shift;
2511
2512 case alg_add_t_m2:
2513 case alg_sub_t_m2:
2514 goto do_alg_addsub_t_m2;
2515
2516 case alg_add_factor:
2517 case alg_sub_factor:
2518 goto do_alg_addsub_factor;
2519
2520 case alg_add_t2_m:
2521 goto do_alg_add_t2_m;
2522
2523 case alg_sub_t2_m:
2524 goto do_alg_sub_t2_m;
2525
2526 default:
2527 gcc_unreachable ();
2528 }
2529 }
2530 }
2531
2532 /* If we have a group of zero bits at the low-order part of T, try
2533 multiplying by the remaining bits and then doing a shift. */
2534
2535 if ((t & 1) == 0)
2536 {
2537 do_alg_shift:
2538 m = floor_log2 (t & -t); /* m = number of low zero bits */
2539 if (m < maxm)
2540 {
2541 q = t >> m;
2542 /* The function expand_shift will choose between a shift and
2543 a sequence of additions, so the observed cost is given as
2544 MIN (m * add_cost[mode], shift_cost[mode][m]). */
2545 op_cost = m * add_cost[mode];
2546 if (shift_cost[mode][m] < op_cost)
2547 op_cost = shift_cost[mode][m];
2548 new_limit.cost = best_cost.cost - op_cost;
2549 new_limit.latency = best_cost.latency - op_cost;
2550 synth_mult (alg_in, q, &new_limit, mode);
2551
2552 alg_in->cost.cost += op_cost;
2553 alg_in->cost.latency += op_cost;
2554 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2555 {
2556 struct algorithm *x;
2557 best_cost = alg_in->cost;
2558 x = alg_in, alg_in = best_alg, best_alg = x;
2559 best_alg->log[best_alg->ops] = m;
2560 best_alg->op[best_alg->ops] = alg_shift;
2561 }
2562 }
2563 if (cache_hit)
2564 goto done;
2565 }
2566
2567 /* If we have an odd number, add or subtract one. */
2568 if ((t & 1) != 0)
2569 {
2570 unsigned HOST_WIDE_INT w;
2571
2572 do_alg_addsub_t_m2:
2573 for (w = 1; (w & t) != 0; w <<= 1)
2574 ;
2575 /* If T was -1, then W will be zero after the loop. This is another
2576 case where T ends with ...111. Handling this with (T + 1) and
2577 subtract 1 produces slightly better code and results in algorithm
2578 selection much faster than treating it like the ...0111 case
2579 below. */
2580 if (w == 0
2581 || (w > 2
2582 /* Reject the case where t is 3.
2583 Thus we prefer addition in that case. */
2584 && t != 3))
2585 {
2586 /* T ends with ...111. Multiply by (T + 1) and subtract 1. */
2587
2588 op_cost = add_cost[mode];
2589 new_limit.cost = best_cost.cost - op_cost;
2590 new_limit.latency = best_cost.latency - op_cost;
2591 synth_mult (alg_in, t + 1, &new_limit, mode);
2592
2593 alg_in->cost.cost += op_cost;
2594 alg_in->cost.latency += op_cost;
2595 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2596 {
2597 struct algorithm *x;
2598 best_cost = alg_in->cost;
2599 x = alg_in, alg_in = best_alg, best_alg = x;
2600 best_alg->log[best_alg->ops] = 0;
2601 best_alg->op[best_alg->ops] = alg_sub_t_m2;
2602 }
2603 }
2604 else
2605 {
2606 /* T ends with ...01 or ...011. Multiply by (T - 1) and add 1. */
2607
2608 op_cost = add_cost[mode];
2609 new_limit.cost = best_cost.cost - op_cost;
2610 new_limit.latency = best_cost.latency - op_cost;
2611 synth_mult (alg_in, t - 1, &new_limit, mode);
2612
2613 alg_in->cost.cost += op_cost;
2614 alg_in->cost.latency += op_cost;
2615 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2616 {
2617 struct algorithm *x;
2618 best_cost = alg_in->cost;
2619 x = alg_in, alg_in = best_alg, best_alg = x;
2620 best_alg->log[best_alg->ops] = 0;
2621 best_alg->op[best_alg->ops] = alg_add_t_m2;
2622 }
2623 }
2624 if (cache_hit)
2625 goto done;
2626 }
2627
2628 /* Look for factors of t of the form
2629 t = q(2**m +- 1), 2 <= m <= floor(log2(t - 1)).
2630 If we find such a factor, we can multiply by t using an algorithm that
2631 multiplies by q, shift the result by m and add/subtract it to itself.
2632
2633 We search for large factors first and loop down, even if large factors
2634 are less probable than small; if we find a large factor we will find a
2635 good sequence quickly, and therefore be able to prune (by decreasing
2636 COST_LIMIT) the search. */
2637
2638 do_alg_addsub_factor:
2639 for (m = floor_log2 (t - 1); m >= 2; m--)
2640 {
2641 unsigned HOST_WIDE_INT d;
2642
2643 d = ((unsigned HOST_WIDE_INT) 1 << m) + 1;
2644 if (t % d == 0 && t > d && m < maxm
2645 && (!cache_hit || cache_alg == alg_add_factor))
2646 {
2647 /* If the target has a cheap shift-and-add instruction use
2648 that in preference to a shift insn followed by an add insn.
2649 Assume that the shift-and-add is "atomic" with a latency
2650 equal to its cost, otherwise assume that on superscalar
2651 hardware the shift may be executed concurrently with the
2652 earlier steps in the algorithm. */
2653 op_cost = add_cost[mode] + shift_cost[mode][m];
2654 if (shiftadd_cost[mode][m] < op_cost)
2655 {
2656 op_cost = shiftadd_cost[mode][m];
2657 op_latency = op_cost;
2658 }
2659 else
2660 op_latency = add_cost[mode];
2661
2662 new_limit.cost = best_cost.cost - op_cost;
2663 new_limit.latency = best_cost.latency - op_latency;
2664 synth_mult (alg_in, t / d, &new_limit, mode);
2665
2666 alg_in->cost.cost += op_cost;
2667 alg_in->cost.latency += op_latency;
2668 if (alg_in->cost.latency < op_cost)
2669 alg_in->cost.latency = op_cost;
2670 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2671 {
2672 struct algorithm *x;
2673 best_cost = alg_in->cost;
2674 x = alg_in, alg_in = best_alg, best_alg = x;
2675 best_alg->log[best_alg->ops] = m;
2676 best_alg->op[best_alg->ops] = alg_add_factor;
2677 }
2678 /* Other factors will have been taken care of in the recursion. */
2679 break;
2680 }
2681
2682 d = ((unsigned HOST_WIDE_INT) 1 << m) - 1;
2683 if (t % d == 0 && t > d && m < maxm
2684 && (!cache_hit || cache_alg == alg_sub_factor))
2685 {
2686 /* If the target has a cheap shift-and-subtract insn use
2687 that in preference to a shift insn followed by a sub insn.
2688 Assume that the shift-and-sub is "atomic" with a latency
2689 equal to it's cost, otherwise assume that on superscalar
2690 hardware the shift may be executed concurrently with the
2691 earlier steps in the algorithm. */
2692 op_cost = add_cost[mode] + shift_cost[mode][m];
2693 if (shiftsub_cost[mode][m] < op_cost)
2694 {
2695 op_cost = shiftsub_cost[mode][m];
2696 op_latency = op_cost;
2697 }
2698 else
2699 op_latency = add_cost[mode];
2700
2701 new_limit.cost = best_cost.cost - op_cost;
2702 new_limit.latency = best_cost.latency - op_latency;
2703 synth_mult (alg_in, t / d, &new_limit, mode);
2704
2705 alg_in->cost.cost += op_cost;
2706 alg_in->cost.latency += op_latency;
2707 if (alg_in->cost.latency < op_cost)
2708 alg_in->cost.latency = op_cost;
2709 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2710 {
2711 struct algorithm *x;
2712 best_cost = alg_in->cost;
2713 x = alg_in, alg_in = best_alg, best_alg = x;
2714 best_alg->log[best_alg->ops] = m;
2715 best_alg->op[best_alg->ops] = alg_sub_factor;
2716 }
2717 break;
2718 }
2719 }
2720 if (cache_hit)
2721 goto done;
2722
2723 /* Try shift-and-add (load effective address) instructions,
2724 i.e. do a*3, a*5, a*9. */
2725 if ((t & 1) != 0)
2726 {
2727 do_alg_add_t2_m:
2728 q = t - 1;
2729 q = q & -q;
2730 m = exact_log2 (q);
2731 if (m >= 0 && m < maxm)
2732 {
2733 op_cost = shiftadd_cost[mode][m];
2734 new_limit.cost = best_cost.cost - op_cost;
2735 new_limit.latency = best_cost.latency - op_cost;
2736 synth_mult (alg_in, (t - 1) >> m, &new_limit, mode);
2737
2738 alg_in->cost.cost += op_cost;
2739 alg_in->cost.latency += op_cost;
2740 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2741 {
2742 struct algorithm *x;
2743 best_cost = alg_in->cost;
2744 x = alg_in, alg_in = best_alg, best_alg = x;
2745 best_alg->log[best_alg->ops] = m;
2746 best_alg->op[best_alg->ops] = alg_add_t2_m;
2747 }
2748 }
2749 if (cache_hit)
2750 goto done;
2751
2752 do_alg_sub_t2_m:
2753 q = t + 1;
2754 q = q & -q;
2755 m = exact_log2 (q);
2756 if (m >= 0 && m < maxm)
2757 {
2758 op_cost = shiftsub_cost[mode][m];
2759 new_limit.cost = best_cost.cost - op_cost;
2760 new_limit.latency = best_cost.latency - op_cost;
2761 synth_mult (alg_in, (t + 1) >> m, &new_limit, mode);
2762
2763 alg_in->cost.cost += op_cost;
2764 alg_in->cost.latency += op_cost;
2765 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2766 {
2767 struct algorithm *x;
2768 best_cost = alg_in->cost;
2769 x = alg_in, alg_in = best_alg, best_alg = x;
2770 best_alg->log[best_alg->ops] = m;
2771 best_alg->op[best_alg->ops] = alg_sub_t2_m;
2772 }
2773 }
2774 if (cache_hit)
2775 goto done;
2776 }
2777
2778 done:
2779 /* If best_cost has not decreased, we have not found any algorithm. */
2780 if (!CHEAPER_MULT_COST (&best_cost, cost_limit))
2781 {
2782 /* We failed to find an algorithm. Record alg_impossible for
2783 this case (that is, <T, MODE, COST_LIMIT>) so that next time
2784 we are asked to find an algorithm for T within the same or
2785 lower COST_LIMIT, we can immediately return to the
2786 caller. */
2787 alg_hash[hash_index].t = t;
2788 alg_hash[hash_index].mode = mode;
2789 alg_hash[hash_index].alg = alg_impossible;
2790 alg_hash[hash_index].cost = *cost_limit;
2791 return;
2792 }
2793
2794 /* Cache the result. */
2795 if (!cache_hit)
2796 {
2797 alg_hash[hash_index].t = t;
2798 alg_hash[hash_index].mode = mode;
2799 alg_hash[hash_index].alg = best_alg->op[best_alg->ops];
2800 alg_hash[hash_index].cost.cost = best_cost.cost;
2801 alg_hash[hash_index].cost.latency = best_cost.latency;
2802 }
2803
2804 /* If we are getting a too long sequence for `struct algorithm'
2805 to record, make this search fail. */
2806 if (best_alg->ops == MAX_BITS_PER_WORD)
2807 return;
2808
2809 /* Copy the algorithm from temporary space to the space at alg_out.
2810 We avoid using structure assignment because the majority of
2811 best_alg is normally undefined, and this is a critical function. */
2812 alg_out->ops = best_alg->ops + 1;
2813 alg_out->cost = best_cost;
2814 memcpy (alg_out->op, best_alg->op,
2815 alg_out->ops * sizeof *alg_out->op);
2816 memcpy (alg_out->log, best_alg->log,
2817 alg_out->ops * sizeof *alg_out->log);
2818 }
2819 \f
2820 /* Find the cheapest way of multiplying a value of mode MODE by VAL.
2821 Try three variations:
2822
2823 - a shift/add sequence based on VAL itself
2824 - a shift/add sequence based on -VAL, followed by a negation
2825 - a shift/add sequence based on VAL - 1, followed by an addition.
2826
2827 Return true if the cheapest of these cost less than MULT_COST,
2828 describing the algorithm in *ALG and final fixup in *VARIANT. */
2829
2830 static bool
2831 choose_mult_variant (enum machine_mode mode, HOST_WIDE_INT val,
2832 struct algorithm *alg, enum mult_variant *variant,
2833 int mult_cost)
2834 {
2835 struct algorithm alg2;
2836 struct mult_cost limit;
2837 int op_cost;
2838
2839 *variant = basic_variant;
2840 limit.cost = mult_cost;
2841 limit.latency = mult_cost;
2842 synth_mult (alg, val, &limit, mode);
2843
2844 /* This works only if the inverted value actually fits in an
2845 `unsigned int' */
2846 if (HOST_BITS_PER_INT >= GET_MODE_BITSIZE (mode))
2847 {
2848 op_cost = neg_cost[mode];
2849 if (MULT_COST_LESS (&alg->cost, mult_cost))
2850 {
2851 limit.cost = alg->cost.cost - op_cost;
2852 limit.latency = alg->cost.latency - op_cost;
2853 }
2854 else
2855 {
2856 limit.cost = mult_cost - op_cost;
2857 limit.latency = mult_cost - op_cost;
2858 }
2859
2860 synth_mult (&alg2, -val, &limit, mode);
2861 alg2.cost.cost += op_cost;
2862 alg2.cost.latency += op_cost;
2863 if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
2864 *alg = alg2, *variant = negate_variant;
2865 }
2866
2867 /* This proves very useful for division-by-constant. */
2868 op_cost = add_cost[mode];
2869 if (MULT_COST_LESS (&alg->cost, mult_cost))
2870 {
2871 limit.cost = alg->cost.cost - op_cost;
2872 limit.latency = alg->cost.latency - op_cost;
2873 }
2874 else
2875 {
2876 limit.cost = mult_cost - op_cost;
2877 limit.latency = mult_cost - op_cost;
2878 }
2879
2880 synth_mult (&alg2, val - 1, &limit, mode);
2881 alg2.cost.cost += op_cost;
2882 alg2.cost.latency += op_cost;
2883 if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
2884 *alg = alg2, *variant = add_variant;
2885
2886 return MULT_COST_LESS (&alg->cost, mult_cost);
2887 }
2888
2889 /* A subroutine of expand_mult, used for constant multiplications.
2890 Multiply OP0 by VAL in mode MODE, storing the result in TARGET if
2891 convenient. Use the shift/add sequence described by ALG and apply
2892 the final fixup specified by VARIANT. */
2893
2894 static rtx
2895 expand_mult_const (enum machine_mode mode, rtx op0, HOST_WIDE_INT val,
2896 rtx target, const struct algorithm *alg,
2897 enum mult_variant variant)
2898 {
2899 HOST_WIDE_INT val_so_far;
2900 rtx insn, accum, tem;
2901 int opno;
2902 enum machine_mode nmode;
2903
2904 /* Avoid referencing memory over and over.
2905 For speed, but also for correctness when mem is volatile. */
2906 if (MEM_P (op0))
2907 op0 = force_reg (mode, op0);
2908
2909 /* ACCUM starts out either as OP0 or as a zero, depending on
2910 the first operation. */
2911
2912 if (alg->op[0] == alg_zero)
2913 {
2914 accum = copy_to_mode_reg (mode, const0_rtx);
2915 val_so_far = 0;
2916 }
2917 else if (alg->op[0] == alg_m)
2918 {
2919 accum = copy_to_mode_reg (mode, op0);
2920 val_so_far = 1;
2921 }
2922 else
2923 gcc_unreachable ();
2924
2925 for (opno = 1; opno < alg->ops; opno++)
2926 {
2927 int log = alg->log[opno];
2928 rtx shift_subtarget = optimize ? 0 : accum;
2929 rtx add_target
2930 = (opno == alg->ops - 1 && target != 0 && variant != add_variant
2931 && !optimize)
2932 ? target : 0;
2933 rtx accum_target = optimize ? 0 : accum;
2934
2935 switch (alg->op[opno])
2936 {
2937 case alg_shift:
2938 accum = expand_shift (LSHIFT_EXPR, mode, accum,
2939 build_int_cst (NULL_TREE, log),
2940 NULL_RTX, 0);
2941 val_so_far <<= log;
2942 break;
2943
2944 case alg_add_t_m2:
2945 tem = expand_shift (LSHIFT_EXPR, mode, op0,
2946 build_int_cst (NULL_TREE, log),
2947 NULL_RTX, 0);
2948 accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
2949 add_target ? add_target : accum_target);
2950 val_so_far += (HOST_WIDE_INT) 1 << log;
2951 break;
2952
2953 case alg_sub_t_m2:
2954 tem = expand_shift (LSHIFT_EXPR, mode, op0,
2955 build_int_cst (NULL_TREE, log),
2956 NULL_RTX, 0);
2957 accum = force_operand (gen_rtx_MINUS (mode, accum, tem),
2958 add_target ? add_target : accum_target);
2959 val_so_far -= (HOST_WIDE_INT) 1 << log;
2960 break;
2961
2962 case alg_add_t2_m:
2963 accum = expand_shift (LSHIFT_EXPR, mode, accum,
2964 build_int_cst (NULL_TREE, log),
2965 shift_subtarget,
2966 0);
2967 accum = force_operand (gen_rtx_PLUS (mode, accum, op0),
2968 add_target ? add_target : accum_target);
2969 val_so_far = (val_so_far << log) + 1;
2970 break;
2971
2972 case alg_sub_t2_m:
2973 accum = expand_shift (LSHIFT_EXPR, mode, accum,
2974 build_int_cst (NULL_TREE, log),
2975 shift_subtarget, 0);
2976 accum = force_operand (gen_rtx_MINUS (mode, accum, op0),
2977 add_target ? add_target : accum_target);
2978 val_so_far = (val_so_far << log) - 1;
2979 break;
2980
2981 case alg_add_factor:
2982 tem = expand_shift (LSHIFT_EXPR, mode, accum,
2983 build_int_cst (NULL_TREE, log),
2984 NULL_RTX, 0);
2985 accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
2986 add_target ? add_target : accum_target);
2987 val_so_far += val_so_far << log;
2988 break;
2989
2990 case alg_sub_factor:
2991 tem = expand_shift (LSHIFT_EXPR, mode, accum,
2992 build_int_cst (NULL_TREE, log),
2993 NULL_RTX, 0);
2994 accum = force_operand (gen_rtx_MINUS (mode, tem, accum),
2995 (add_target
2996 ? add_target : (optimize ? 0 : tem)));
2997 val_so_far = (val_so_far << log) - val_so_far;
2998 break;
2999
3000 default:
3001 gcc_unreachable ();
3002 }
3003
3004 /* Write a REG_EQUAL note on the last insn so that we can cse
3005 multiplication sequences. Note that if ACCUM is a SUBREG,
3006 we've set the inner register and must properly indicate
3007 that. */
3008
3009 tem = op0, nmode = mode;
3010 if (GET_CODE (accum) == SUBREG)
3011 {
3012 nmode = GET_MODE (SUBREG_REG (accum));
3013 tem = gen_lowpart (nmode, op0);
3014 }
3015
3016 insn = get_last_insn ();
3017 set_unique_reg_note (insn, REG_EQUAL,
3018 gen_rtx_MULT (nmode, tem, GEN_INT (val_so_far)));
3019 }
3020
3021 if (variant == negate_variant)
3022 {
3023 val_so_far = -val_so_far;
3024 accum = expand_unop (mode, neg_optab, accum, target, 0);
3025 }
3026 else if (variant == add_variant)
3027 {
3028 val_so_far = val_so_far + 1;
3029 accum = force_operand (gen_rtx_PLUS (mode, accum, op0), target);
3030 }
3031
3032 /* Compare only the bits of val and val_so_far that are significant
3033 in the result mode, to avoid sign-/zero-extension confusion. */
3034 val &= GET_MODE_MASK (mode);
3035 val_so_far &= GET_MODE_MASK (mode);
3036 gcc_assert (val == val_so_far);
3037
3038 return accum;
3039 }
3040
3041 /* Perform a multiplication and return an rtx for the result.
3042 MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3043 TARGET is a suggestion for where to store the result (an rtx).
3044
3045 We check specially for a constant integer as OP1.
3046 If you want this check for OP0 as well, then before calling
3047 you should swap the two operands if OP0 would be constant. */
3048
3049 rtx
3050 expand_mult (enum machine_mode mode, rtx op0, rtx op1, rtx target,
3051 int unsignedp)
3052 {
3053 enum mult_variant variant;
3054 struct algorithm algorithm;
3055 int max_cost;
3056
3057 /* Handling const0_rtx here allows us to use zero as a rogue value for
3058 coeff below. */
3059 if (op1 == const0_rtx)
3060 return const0_rtx;
3061 if (op1 == const1_rtx)
3062 return op0;
3063 if (op1 == constm1_rtx)
3064 return expand_unop (mode,
3065 GET_MODE_CLASS (mode) == MODE_INT
3066 && !unsignedp && flag_trapv
3067 ? negv_optab : neg_optab,
3068 op0, target, 0);
3069
3070 /* These are the operations that are potentially turned into a sequence
3071 of shifts and additions. */
3072 if (SCALAR_INT_MODE_P (mode)
3073 && (unsignedp || !flag_trapv))
3074 {
3075 HOST_WIDE_INT coeff = 0;
3076 rtx fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3077
3078 /* synth_mult does an `unsigned int' multiply. As long as the mode is
3079 less than or equal in size to `unsigned int' this doesn't matter.
3080 If the mode is larger than `unsigned int', then synth_mult works
3081 only if the constant value exactly fits in an `unsigned int' without
3082 any truncation. This means that multiplying by negative values does
3083 not work; results are off by 2^32 on a 32 bit machine. */
3084
3085 if (GET_CODE (op1) == CONST_INT)
3086 {
3087 /* Attempt to handle multiplication of DImode values by negative
3088 coefficients, by performing the multiplication by a positive
3089 multiplier and then inverting the result. */
3090 if (INTVAL (op1) < 0
3091 && GET_MODE_BITSIZE (mode) > HOST_BITS_PER_WIDE_INT)
3092 {
3093 /* Its safe to use -INTVAL (op1) even for INT_MIN, as the
3094 result is interpreted as an unsigned coefficient.
3095 Exclude cost of op0 from max_cost to match the cost
3096 calculation of the synth_mult. */
3097 max_cost = rtx_cost (gen_rtx_MULT (mode, fake_reg, op1), SET)
3098 - neg_cost[mode];
3099 if (max_cost > 0
3100 && choose_mult_variant (mode, -INTVAL (op1), &algorithm,
3101 &variant, max_cost))
3102 {
3103 rtx temp = expand_mult_const (mode, op0, -INTVAL (op1),
3104 NULL_RTX, &algorithm,
3105 variant);
3106 return expand_unop (mode, neg_optab, temp, target, 0);
3107 }
3108 }
3109 else coeff = INTVAL (op1);
3110 }
3111 else if (GET_CODE (op1) == CONST_DOUBLE)
3112 {
3113 /* If we are multiplying in DImode, it may still be a win
3114 to try to work with shifts and adds. */
3115 if (CONST_DOUBLE_HIGH (op1) == 0)
3116 coeff = CONST_DOUBLE_LOW (op1);
3117 else if (CONST_DOUBLE_LOW (op1) == 0
3118 && EXACT_POWER_OF_2_OR_ZERO_P (CONST_DOUBLE_HIGH (op1)))
3119 {
3120 int shift = floor_log2 (CONST_DOUBLE_HIGH (op1))
3121 + HOST_BITS_PER_WIDE_INT;
3122 return expand_shift (LSHIFT_EXPR, mode, op0,
3123 build_int_cst (NULL_TREE, shift),
3124 target, unsignedp);
3125 }
3126 }
3127
3128 /* We used to test optimize here, on the grounds that it's better to
3129 produce a smaller program when -O is not used. But this causes
3130 such a terrible slowdown sometimes that it seems better to always
3131 use synth_mult. */
3132 if (coeff != 0)
3133 {
3134 /* Special case powers of two. */
3135 if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3136 return expand_shift (LSHIFT_EXPR, mode, op0,
3137 build_int_cst (NULL_TREE, floor_log2 (coeff)),
3138 target, unsignedp);
3139
3140 /* Exclude cost of op0 from max_cost to match the cost
3141 calculation of the synth_mult. */
3142 max_cost = rtx_cost (gen_rtx_MULT (mode, fake_reg, op1), SET);
3143 if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3144 max_cost))
3145 return expand_mult_const (mode, op0, coeff, target,
3146 &algorithm, variant);
3147 }
3148 }
3149
3150 if (GET_CODE (op0) == CONST_DOUBLE)
3151 {
3152 rtx temp = op0;
3153 op0 = op1;
3154 op1 = temp;
3155 }
3156
3157 /* Expand x*2.0 as x+x. */
3158 if (GET_CODE (op1) == CONST_DOUBLE
3159 && SCALAR_FLOAT_MODE_P (mode))
3160 {
3161 REAL_VALUE_TYPE d;
3162 REAL_VALUE_FROM_CONST_DOUBLE (d, op1);
3163
3164 if (REAL_VALUES_EQUAL (d, dconst2))
3165 {
3166 op0 = force_reg (GET_MODE (op0), op0);
3167 return expand_binop (mode, add_optab, op0, op0,
3168 target, unsignedp, OPTAB_LIB_WIDEN);
3169 }
3170 }
3171
3172 /* This used to use umul_optab if unsigned, but for non-widening multiply
3173 there is no difference between signed and unsigned. */
3174 op0 = expand_binop (mode,
3175 ! unsignedp
3176 && flag_trapv && (GET_MODE_CLASS(mode) == MODE_INT)
3177 ? smulv_optab : smul_optab,
3178 op0, op1, target, unsignedp, OPTAB_LIB_WIDEN);
3179 gcc_assert (op0);
3180 return op0;
3181 }
3182 \f
3183 /* Return the smallest n such that 2**n >= X. */
3184
3185 int
3186 ceil_log2 (unsigned HOST_WIDE_INT x)
3187 {
3188 return floor_log2 (x - 1) + 1;
3189 }
3190
3191 /* Choose a minimal N + 1 bit approximation to 1/D that can be used to
3192 replace division by D, and put the least significant N bits of the result
3193 in *MULTIPLIER_PTR and return the most significant bit.
3194
3195 The width of operations is N (should be <= HOST_BITS_PER_WIDE_INT), the
3196 needed precision is in PRECISION (should be <= N).
3197
3198 PRECISION should be as small as possible so this function can choose
3199 multiplier more freely.
3200
3201 The rounded-up logarithm of D is placed in *lgup_ptr. A shift count that
3202 is to be used for a final right shift is placed in *POST_SHIFT_PTR.
3203
3204 Using this function, x/D will be equal to (x * m) >> (*POST_SHIFT_PTR),
3205 where m is the full HOST_BITS_PER_WIDE_INT + 1 bit multiplier. */
3206
3207 static
3208 unsigned HOST_WIDE_INT
3209 choose_multiplier (unsigned HOST_WIDE_INT d, int n, int precision,
3210 rtx *multiplier_ptr, int *post_shift_ptr, int *lgup_ptr)
3211 {
3212 HOST_WIDE_INT mhigh_hi, mlow_hi;
3213 unsigned HOST_WIDE_INT mhigh_lo, mlow_lo;
3214 int lgup, post_shift;
3215 int pow, pow2;
3216 unsigned HOST_WIDE_INT nl, dummy1;
3217 HOST_WIDE_INT nh, dummy2;
3218
3219 /* lgup = ceil(log2(divisor)); */
3220 lgup = ceil_log2 (d);
3221
3222 gcc_assert (lgup <= n);
3223
3224 pow = n + lgup;
3225 pow2 = n + lgup - precision;
3226
3227 /* We could handle this with some effort, but this case is much
3228 better handled directly with a scc insn, so rely on caller using
3229 that. */
3230 gcc_assert (pow != 2 * HOST_BITS_PER_WIDE_INT);
3231
3232 /* mlow = 2^(N + lgup)/d */
3233 if (pow >= HOST_BITS_PER_WIDE_INT)
3234 {
3235 nh = (HOST_WIDE_INT) 1 << (pow - HOST_BITS_PER_WIDE_INT);
3236 nl = 0;
3237 }
3238 else
3239 {
3240 nh = 0;
3241 nl = (unsigned HOST_WIDE_INT) 1 << pow;
3242 }
3243 div_and_round_double (TRUNC_DIV_EXPR, 1, nl, nh, d, (HOST_WIDE_INT) 0,
3244 &mlow_lo, &mlow_hi, &dummy1, &dummy2);
3245
3246 /* mhigh = (2^(N + lgup) + 2^N + lgup - precision)/d */
3247 if (pow2 >= HOST_BITS_PER_WIDE_INT)
3248 nh |= (HOST_WIDE_INT) 1 << (pow2 - HOST_BITS_PER_WIDE_INT);
3249 else
3250 nl |= (unsigned HOST_WIDE_INT) 1 << pow2;
3251 div_and_round_double (TRUNC_DIV_EXPR, 1, nl, nh, d, (HOST_WIDE_INT) 0,
3252 &mhigh_lo, &mhigh_hi, &dummy1, &dummy2);
3253
3254 gcc_assert (!mhigh_hi || nh - d < d);
3255 gcc_assert (mhigh_hi <= 1 && mlow_hi <= 1);
3256 /* Assert that mlow < mhigh. */
3257 gcc_assert (mlow_hi < mhigh_hi
3258 || (mlow_hi == mhigh_hi && mlow_lo < mhigh_lo));
3259
3260 /* If precision == N, then mlow, mhigh exceed 2^N
3261 (but they do not exceed 2^(N+1)). */
3262
3263 /* Reduce to lowest terms. */
3264 for (post_shift = lgup; post_shift > 0; post_shift--)
3265 {
3266 unsigned HOST_WIDE_INT ml_lo = (mlow_hi << (HOST_BITS_PER_WIDE_INT - 1)) | (mlow_lo >> 1);
3267 unsigned HOST_WIDE_INT mh_lo = (mhigh_hi << (HOST_BITS_PER_WIDE_INT - 1)) | (mhigh_lo >> 1);
3268 if (ml_lo >= mh_lo)
3269 break;
3270
3271 mlow_hi = 0;
3272 mlow_lo = ml_lo;
3273 mhigh_hi = 0;
3274 mhigh_lo = mh_lo;
3275 }
3276
3277 *post_shift_ptr = post_shift;
3278 *lgup_ptr = lgup;
3279 if (n < HOST_BITS_PER_WIDE_INT)
3280 {
3281 unsigned HOST_WIDE_INT mask = ((unsigned HOST_WIDE_INT) 1 << n) - 1;
3282 *multiplier_ptr = GEN_INT (mhigh_lo & mask);
3283 return mhigh_lo >= mask;
3284 }
3285 else
3286 {
3287 *multiplier_ptr = GEN_INT (mhigh_lo);
3288 return mhigh_hi;
3289 }
3290 }
3291
3292 /* Compute the inverse of X mod 2**n, i.e., find Y such that X * Y is
3293 congruent to 1 (mod 2**N). */
3294
3295 static unsigned HOST_WIDE_INT
3296 invert_mod2n (unsigned HOST_WIDE_INT x, int n)
3297 {
3298 /* Solve x*y == 1 (mod 2^n), where x is odd. Return y. */
3299
3300 /* The algorithm notes that the choice y = x satisfies
3301 x*y == 1 mod 2^3, since x is assumed odd.
3302 Each iteration doubles the number of bits of significance in y. */
3303
3304 unsigned HOST_WIDE_INT mask;
3305 unsigned HOST_WIDE_INT y = x;
3306 int nbit = 3;
3307
3308 mask = (n == HOST_BITS_PER_WIDE_INT
3309 ? ~(unsigned HOST_WIDE_INT) 0
3310 : ((unsigned HOST_WIDE_INT) 1 << n) - 1);
3311
3312 while (nbit < n)
3313 {
3314 y = y * (2 - x*y) & mask; /* Modulo 2^N */
3315 nbit *= 2;
3316 }
3317 return y;
3318 }
3319
3320 /* Emit code to adjust ADJ_OPERAND after multiplication of wrong signedness
3321 flavor of OP0 and OP1. ADJ_OPERAND is already the high half of the
3322 product OP0 x OP1. If UNSIGNEDP is nonzero, adjust the signed product
3323 to become unsigned, if UNSIGNEDP is zero, adjust the unsigned product to
3324 become signed.
3325
3326 The result is put in TARGET if that is convenient.
3327
3328 MODE is the mode of operation. */
3329
3330 rtx
3331 expand_mult_highpart_adjust (enum machine_mode mode, rtx adj_operand, rtx op0,
3332 rtx op1, rtx target, int unsignedp)
3333 {
3334 rtx tem;
3335 enum rtx_code adj_code = unsignedp ? PLUS : MINUS;
3336
3337 tem = expand_shift (RSHIFT_EXPR, mode, op0,
3338 build_int_cst (NULL_TREE, GET_MODE_BITSIZE (mode) - 1),
3339 NULL_RTX, 0);
3340 tem = expand_and (mode, tem, op1, NULL_RTX);
3341 adj_operand
3342 = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3343 adj_operand);
3344
3345 tem = expand_shift (RSHIFT_EXPR, mode, op1,
3346 build_int_cst (NULL_TREE, GET_MODE_BITSIZE (mode) - 1),
3347 NULL_RTX, 0);
3348 tem = expand_and (mode, tem, op0, NULL_RTX);
3349 target = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3350 target);
3351
3352 return target;
3353 }
3354
3355 /* Subroutine of expand_mult_highpart. Return the MODE high part of OP. */
3356
3357 static rtx
3358 extract_high_half (enum machine_mode mode, rtx op)
3359 {
3360 enum machine_mode wider_mode;
3361
3362 if (mode == word_mode)
3363 return gen_highpart (mode, op);
3364
3365 wider_mode = GET_MODE_WIDER_MODE (mode);
3366 op = expand_shift (RSHIFT_EXPR, wider_mode, op,
3367 build_int_cst (NULL_TREE, GET_MODE_BITSIZE (mode)), 0, 1);
3368 return convert_modes (mode, wider_mode, op, 0);
3369 }
3370
3371 /* Like expand_mult_highpart, but only consider using a multiplication
3372 optab. OP1 is an rtx for the constant operand. */
3373
3374 static rtx
3375 expand_mult_highpart_optab (enum machine_mode mode, rtx op0, rtx op1,
3376 rtx target, int unsignedp, int max_cost)
3377 {
3378 rtx narrow_op1 = gen_int_mode (INTVAL (op1), mode);
3379 enum machine_mode wider_mode;
3380 optab moptab;
3381 rtx tem;
3382 int size;
3383
3384 wider_mode = GET_MODE_WIDER_MODE (mode);
3385 size = GET_MODE_BITSIZE (mode);
3386
3387 /* Firstly, try using a multiplication insn that only generates the needed
3388 high part of the product, and in the sign flavor of unsignedp. */
3389 if (mul_highpart_cost[mode] < max_cost)
3390 {
3391 moptab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
3392 tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3393 unsignedp, OPTAB_DIRECT);
3394 if (tem)
3395 return tem;
3396 }
3397
3398 /* Secondly, same as above, but use sign flavor opposite of unsignedp.
3399 Need to adjust the result after the multiplication. */
3400 if (size - 1 < BITS_PER_WORD
3401 && (mul_highpart_cost[mode] + 2 * shift_cost[mode][size-1]
3402 + 4 * add_cost[mode] < max_cost))
3403 {
3404 moptab = unsignedp ? smul_highpart_optab : umul_highpart_optab;
3405 tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3406 unsignedp, OPTAB_DIRECT);
3407 if (tem)
3408 /* We used the wrong signedness. Adjust the result. */
3409 return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3410 tem, unsignedp);
3411 }
3412
3413 /* Try widening multiplication. */
3414 moptab = unsignedp ? umul_widen_optab : smul_widen_optab;
3415 if (moptab->handlers[wider_mode].insn_code != CODE_FOR_nothing
3416 && mul_widen_cost[wider_mode] < max_cost)
3417 {
3418 tem = expand_binop (wider_mode, moptab, op0, narrow_op1, 0,
3419 unsignedp, OPTAB_WIDEN);
3420 if (tem)
3421 return extract_high_half (mode, tem);
3422 }
3423
3424 /* Try widening the mode and perform a non-widening multiplication. */
3425 if (smul_optab->handlers[wider_mode].insn_code != CODE_FOR_nothing
3426 && size - 1 < BITS_PER_WORD
3427 && mul_cost[wider_mode] + shift_cost[mode][size-1] < max_cost)
3428 {
3429 rtx insns, wop0, wop1;
3430
3431 /* We need to widen the operands, for example to ensure the
3432 constant multiplier is correctly sign or zero extended.
3433 Use a sequence to clean-up any instructions emitted by
3434 the conversions if things don't work out. */
3435 start_sequence ();
3436 wop0 = convert_modes (wider_mode, mode, op0, unsignedp);
3437 wop1 = convert_modes (wider_mode, mode, op1, unsignedp);
3438 tem = expand_binop (wider_mode, smul_optab, wop0, wop1, 0,
3439 unsignedp, OPTAB_WIDEN);
3440 insns = get_insns ();
3441 end_sequence ();
3442
3443 if (tem)
3444 {
3445 emit_insn (insns);
3446 return extract_high_half (mode, tem);
3447 }
3448 }
3449
3450 /* Try widening multiplication of opposite signedness, and adjust. */
3451 moptab = unsignedp ? smul_widen_optab : umul_widen_optab;
3452 if (moptab->handlers[wider_mode].insn_code != CODE_FOR_nothing
3453 && size - 1 < BITS_PER_WORD
3454 && (mul_widen_cost[wider_mode] + 2 * shift_cost[mode][size-1]
3455 + 4 * add_cost[mode] < max_cost))
3456 {
3457 tem = expand_binop (wider_mode, moptab, op0, narrow_op1,
3458 NULL_RTX, ! unsignedp, OPTAB_WIDEN);
3459 if (tem != 0)
3460 {
3461 tem = extract_high_half (mode, tem);
3462 /* We used the wrong signedness. Adjust the result. */
3463 return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3464 target, unsignedp);
3465 }
3466 }
3467
3468 return 0;
3469 }
3470
3471 /* Emit code to multiply OP0 and OP1 (where OP1 is an integer constant),
3472 putting the high half of the result in TARGET if that is convenient,
3473 and return where the result is. If the operation can not be performed,
3474 0 is returned.
3475
3476 MODE is the mode of operation and result.
3477
3478 UNSIGNEDP nonzero means unsigned multiply.
3479
3480 MAX_COST is the total allowed cost for the expanded RTL. */
3481
3482 static rtx
3483 expand_mult_highpart (enum machine_mode mode, rtx op0, rtx op1,
3484 rtx target, int unsignedp, int max_cost)
3485 {
3486 enum machine_mode wider_mode = GET_MODE_WIDER_MODE (mode);
3487 unsigned HOST_WIDE_INT cnst1;
3488 int extra_cost;
3489 bool sign_adjust = false;
3490 enum mult_variant variant;
3491 struct algorithm alg;
3492 rtx tem;
3493
3494 /* We can't support modes wider than HOST_BITS_PER_INT. */
3495 gcc_assert (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT);
3496
3497 cnst1 = INTVAL (op1) & GET_MODE_MASK (mode);
3498
3499 /* We can't optimize modes wider than BITS_PER_WORD.
3500 ??? We might be able to perform double-word arithmetic if
3501 mode == word_mode, however all the cost calculations in
3502 synth_mult etc. assume single-word operations. */
3503 if (GET_MODE_BITSIZE (wider_mode) > BITS_PER_WORD)
3504 return expand_mult_highpart_optab (mode, op0, op1, target,
3505 unsignedp, max_cost);
3506
3507 extra_cost = shift_cost[mode][GET_MODE_BITSIZE (mode) - 1];
3508
3509 /* Check whether we try to multiply by a negative constant. */
3510 if (!unsignedp && ((cnst1 >> (GET_MODE_BITSIZE (mode) - 1)) & 1))
3511 {
3512 sign_adjust = true;
3513 extra_cost += add_cost[mode];
3514 }
3515
3516 /* See whether shift/add multiplication is cheap enough. */
3517 if (choose_mult_variant (wider_mode, cnst1, &alg, &variant,
3518 max_cost - extra_cost))
3519 {
3520 /* See whether the specialized multiplication optabs are
3521 cheaper than the shift/add version. */
3522 tem = expand_mult_highpart_optab (mode, op0, op1, target, unsignedp,
3523 alg.cost.cost + extra_cost);
3524 if (tem)
3525 return tem;
3526
3527 tem = convert_to_mode (wider_mode, op0, unsignedp);
3528 tem = expand_mult_const (wider_mode, tem, cnst1, 0, &alg, variant);
3529 tem = extract_high_half (mode, tem);
3530
3531 /* Adjust result for signedness. */
3532 if (sign_adjust)
3533 tem = force_operand (gen_rtx_MINUS (mode, tem, op0), tem);
3534
3535 return tem;
3536 }
3537 return expand_mult_highpart_optab (mode, op0, op1, target,
3538 unsignedp, max_cost);
3539 }
3540
3541
3542 /* Expand signed modulus of OP0 by a power of two D in mode MODE. */
3543
3544 static rtx
3545 expand_smod_pow2 (enum machine_mode mode, rtx op0, HOST_WIDE_INT d)
3546 {
3547 unsigned HOST_WIDE_INT masklow, maskhigh;
3548 rtx result, temp, shift, label;
3549 int logd;
3550
3551 logd = floor_log2 (d);
3552 result = gen_reg_rtx (mode);
3553
3554 /* Avoid conditional branches when they're expensive. */
3555 if (BRANCH_COST >= 2
3556 && !optimize_size)
3557 {
3558 rtx signmask = emit_store_flag (result, LT, op0, const0_rtx,
3559 mode, 0, -1);
3560 if (signmask)
3561 {
3562 signmask = force_reg (mode, signmask);
3563 masklow = ((HOST_WIDE_INT) 1 << logd) - 1;
3564 shift = GEN_INT (GET_MODE_BITSIZE (mode) - logd);
3565
3566 /* Use the rtx_cost of a LSHIFTRT instruction to determine
3567 which instruction sequence to use. If logical right shifts
3568 are expensive the use 2 XORs, 2 SUBs and an AND, otherwise
3569 use a LSHIFTRT, 1 ADD, 1 SUB and an AND. */
3570
3571 temp = gen_rtx_LSHIFTRT (mode, result, shift);
3572 if (lshr_optab->handlers[mode].insn_code == CODE_FOR_nothing
3573 || rtx_cost (temp, SET) > COSTS_N_INSNS (2))
3574 {
3575 temp = expand_binop (mode, xor_optab, op0, signmask,
3576 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3577 temp = expand_binop (mode, sub_optab, temp, signmask,
3578 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3579 temp = expand_binop (mode, and_optab, temp, GEN_INT (masklow),
3580 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3581 temp = expand_binop (mode, xor_optab, temp, signmask,
3582 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3583 temp = expand_binop (mode, sub_optab, temp, signmask,
3584 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3585 }
3586 else
3587 {
3588 signmask = expand_binop (mode, lshr_optab, signmask, shift,
3589 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3590 signmask = force_reg (mode, signmask);
3591
3592 temp = expand_binop (mode, add_optab, op0, signmask,
3593 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3594 temp = expand_binop (mode, and_optab, temp, GEN_INT (masklow),
3595 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3596 temp = expand_binop (mode, sub_optab, temp, signmask,
3597 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3598 }
3599 return temp;
3600 }
3601 }
3602
3603 /* Mask contains the mode's signbit and the significant bits of the
3604 modulus. By including the signbit in the operation, many targets
3605 can avoid an explicit compare operation in the following comparison
3606 against zero. */
3607
3608 masklow = ((HOST_WIDE_INT) 1 << logd) - 1;
3609 if (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
3610 {
3611 masklow |= (HOST_WIDE_INT) -1 << (GET_MODE_BITSIZE (mode) - 1);
3612 maskhigh = -1;
3613 }
3614 else
3615 maskhigh = (HOST_WIDE_INT) -1
3616 << (GET_MODE_BITSIZE (mode) - HOST_BITS_PER_WIDE_INT - 1);
3617
3618 temp = expand_binop (mode, and_optab, op0,
3619 immed_double_const (masklow, maskhigh, mode),
3620 result, 1, OPTAB_LIB_WIDEN);
3621 if (temp != result)
3622 emit_move_insn (result, temp);
3623
3624 label = gen_label_rtx ();
3625 do_cmp_and_jump (result, const0_rtx, GE, mode, label);
3626
3627 temp = expand_binop (mode, sub_optab, result, const1_rtx, result,
3628 0, OPTAB_LIB_WIDEN);
3629 masklow = (HOST_WIDE_INT) -1 << logd;
3630 maskhigh = -1;
3631 temp = expand_binop (mode, ior_optab, temp,
3632 immed_double_const (masklow, maskhigh, mode),
3633 result, 1, OPTAB_LIB_WIDEN);
3634 temp = expand_binop (mode, add_optab, temp, const1_rtx, result,
3635 0, OPTAB_LIB_WIDEN);
3636 if (temp != result)
3637 emit_move_insn (result, temp);
3638 emit_label (label);
3639 return result;
3640 }
3641
3642 /* Expand signed division of OP0 by a power of two D in mode MODE.
3643 This routine is only called for positive values of D. */
3644
3645 static rtx
3646 expand_sdiv_pow2 (enum machine_mode mode, rtx op0, HOST_WIDE_INT d)
3647 {
3648 rtx temp, label;
3649 tree shift;
3650 int logd;
3651
3652 logd = floor_log2 (d);
3653 shift = build_int_cst (NULL_TREE, logd);
3654
3655 if (d == 2 && BRANCH_COST >= 1)
3656 {
3657 temp = gen_reg_rtx (mode);
3658 temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, 1);
3659 temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3660 0, OPTAB_LIB_WIDEN);
3661 return expand_shift (RSHIFT_EXPR, mode, temp, shift, NULL_RTX, 0);
3662 }
3663
3664 #ifdef HAVE_conditional_move
3665 if (BRANCH_COST >= 2)
3666 {
3667 rtx temp2;
3668
3669 /* ??? emit_conditional_move forces a stack adjustment via
3670 compare_from_rtx so, if the sequence is discarded, it will
3671 be lost. Do it now instead. */
3672 do_pending_stack_adjust ();
3673
3674 start_sequence ();
3675 temp2 = copy_to_mode_reg (mode, op0);
3676 temp = expand_binop (mode, add_optab, temp2, GEN_INT (d-1),
3677 NULL_RTX, 0, OPTAB_LIB_WIDEN);
3678 temp = force_reg (mode, temp);
3679
3680 /* Construct "temp2 = (temp2 < 0) ? temp : temp2". */
3681 temp2 = emit_conditional_move (temp2, LT, temp2, const0_rtx,
3682 mode, temp, temp2, mode, 0);
3683 if (temp2)
3684 {
3685 rtx seq = get_insns ();
3686 end_sequence ();
3687 emit_insn (seq);
3688 return expand_shift (RSHIFT_EXPR, mode, temp2, shift, NULL_RTX, 0);
3689 }
3690 end_sequence ();
3691 }
3692 #endif
3693
3694 if (BRANCH_COST >= 2)
3695 {
3696 int ushift = GET_MODE_BITSIZE (mode) - logd;
3697
3698 temp = gen_reg_rtx (mode);
3699 temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, -1);
3700 if (shift_cost[mode][ushift] > COSTS_N_INSNS (1))
3701 temp = expand_binop (mode, and_optab, temp, GEN_INT (d - 1),
3702 NULL_RTX, 0, OPTAB_LIB_WIDEN);
3703 else
3704 temp = expand_shift (RSHIFT_EXPR, mode, temp,
3705 build_int_cst (NULL_TREE, ushift),
3706 NULL_RTX, 1);
3707 temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3708 0, OPTAB_LIB_WIDEN);
3709 return expand_shift (RSHIFT_EXPR, mode, temp, shift, NULL_RTX, 0);
3710 }
3711
3712 label = gen_label_rtx ();
3713 temp = copy_to_mode_reg (mode, op0);
3714 do_cmp_and_jump (temp, const0_rtx, GE, mode, label);
3715 expand_inc (temp, GEN_INT (d - 1));
3716 emit_label (label);
3717 return expand_shift (RSHIFT_EXPR, mode, temp, shift, NULL_RTX, 0);
3718 }
3719 \f
3720 /* Emit the code to divide OP0 by OP1, putting the result in TARGET
3721 if that is convenient, and returning where the result is.
3722 You may request either the quotient or the remainder as the result;
3723 specify REM_FLAG nonzero to get the remainder.
3724
3725 CODE is the expression code for which kind of division this is;
3726 it controls how rounding is done. MODE is the machine mode to use.
3727 UNSIGNEDP nonzero means do unsigned division. */
3728
3729 /* ??? For CEIL_MOD_EXPR, can compute incorrect remainder with ANDI
3730 and then correct it by or'ing in missing high bits
3731 if result of ANDI is nonzero.
3732 For ROUND_MOD_EXPR, can use ANDI and then sign-extend the result.
3733 This could optimize to a bfexts instruction.
3734 But C doesn't use these operations, so their optimizations are
3735 left for later. */
3736 /* ??? For modulo, we don't actually need the highpart of the first product,
3737 the low part will do nicely. And for small divisors, the second multiply
3738 can also be a low-part only multiply or even be completely left out.
3739 E.g. to calculate the remainder of a division by 3 with a 32 bit
3740 multiply, multiply with 0x55555556 and extract the upper two bits;
3741 the result is exact for inputs up to 0x1fffffff.
3742 The input range can be reduced by using cross-sum rules.
3743 For odd divisors >= 3, the following table gives right shift counts
3744 so that if a number is shifted by an integer multiple of the given
3745 amount, the remainder stays the same:
3746 2, 4, 3, 6, 10, 12, 4, 8, 18, 6, 11, 20, 18, 0, 5, 10, 12, 0, 12, 20,
3747 14, 12, 23, 21, 8, 0, 20, 18, 0, 0, 6, 12, 0, 22, 0, 18, 20, 30, 0, 0,
3748 0, 8, 0, 11, 12, 10, 36, 0, 30, 0, 0, 12, 0, 0, 0, 0, 44, 12, 24, 0,
3749 20, 0, 7, 14, 0, 18, 36, 0, 0, 46, 60, 0, 42, 0, 15, 24, 20, 0, 0, 33,
3750 0, 20, 0, 0, 18, 0, 60, 0, 0, 0, 0, 0, 40, 18, 0, 0, 12
3751
3752 Cross-sum rules for even numbers can be derived by leaving as many bits
3753 to the right alone as the divisor has zeros to the right.
3754 E.g. if x is an unsigned 32 bit number:
3755 (x mod 12) == (((x & 1023) + ((x >> 8) & ~3)) * 0x15555558 >> 2 * 3) >> 28
3756 */
3757
3758 rtx
3759 expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode,
3760 rtx op0, rtx op1, rtx target, int unsignedp)
3761 {
3762 enum machine_mode compute_mode;
3763 rtx tquotient;
3764 rtx quotient = 0, remainder = 0;
3765 rtx last;
3766 int size;
3767 rtx insn, set;
3768 optab optab1, optab2;
3769 int op1_is_constant, op1_is_pow2 = 0;
3770 int max_cost, extra_cost;
3771 static HOST_WIDE_INT last_div_const = 0;
3772 static HOST_WIDE_INT ext_op1;
3773
3774 op1_is_constant = GET_CODE (op1) == CONST_INT;
3775 if (op1_is_constant)
3776 {
3777 ext_op1 = INTVAL (op1);
3778 if (unsignedp)
3779 ext_op1 &= GET_MODE_MASK (mode);
3780 op1_is_pow2 = ((EXACT_POWER_OF_2_OR_ZERO_P (ext_op1)
3781 || (! unsignedp && EXACT_POWER_OF_2_OR_ZERO_P (-ext_op1))));
3782 }
3783
3784 /*
3785 This is the structure of expand_divmod:
3786
3787 First comes code to fix up the operands so we can perform the operations
3788 correctly and efficiently.
3789
3790 Second comes a switch statement with code specific for each rounding mode.
3791 For some special operands this code emits all RTL for the desired
3792 operation, for other cases, it generates only a quotient and stores it in
3793 QUOTIENT. The case for trunc division/remainder might leave quotient = 0,
3794 to indicate that it has not done anything.
3795
3796 Last comes code that finishes the operation. If QUOTIENT is set and
3797 REM_FLAG is set, the remainder is computed as OP0 - QUOTIENT * OP1. If
3798 QUOTIENT is not set, it is computed using trunc rounding.
3799
3800 We try to generate special code for division and remainder when OP1 is a
3801 constant. If |OP1| = 2**n we can use shifts and some other fast
3802 operations. For other values of OP1, we compute a carefully selected
3803 fixed-point approximation m = 1/OP1, and generate code that multiplies OP0
3804 by m.
3805
3806 In all cases but EXACT_DIV_EXPR, this multiplication requires the upper
3807 half of the product. Different strategies for generating the product are
3808 implemented in expand_mult_highpart.
3809
3810 If what we actually want is the remainder, we generate that by another
3811 by-constant multiplication and a subtraction. */
3812
3813 /* We shouldn't be called with OP1 == const1_rtx, but some of the
3814 code below will malfunction if we are, so check here and handle
3815 the special case if so. */
3816 if (op1 == const1_rtx)
3817 return rem_flag ? const0_rtx : op0;
3818
3819 /* When dividing by -1, we could get an overflow.
3820 negv_optab can handle overflows. */
3821 if (! unsignedp && op1 == constm1_rtx)
3822 {
3823 if (rem_flag)
3824 return const0_rtx;
3825 return expand_unop (mode, flag_trapv && GET_MODE_CLASS(mode) == MODE_INT
3826 ? negv_optab : neg_optab, op0, target, 0);
3827 }
3828
3829 if (target
3830 /* Don't use the function value register as a target
3831 since we have to read it as well as write it,
3832 and function-inlining gets confused by this. */
3833 && ((REG_P (target) && REG_FUNCTION_VALUE_P (target))
3834 /* Don't clobber an operand while doing a multi-step calculation. */
3835 || ((rem_flag || op1_is_constant)
3836 && (reg_mentioned_p (target, op0)
3837 || (MEM_P (op0) && MEM_P (target))))
3838 || reg_mentioned_p (target, op1)
3839 || (MEM_P (op1) && MEM_P (target))))
3840 target = 0;
3841
3842 /* Get the mode in which to perform this computation. Normally it will
3843 be MODE, but sometimes we can't do the desired operation in MODE.
3844 If so, pick a wider mode in which we can do the operation. Convert
3845 to that mode at the start to avoid repeated conversions.
3846
3847 First see what operations we need. These depend on the expression
3848 we are evaluating. (We assume that divxx3 insns exist under the
3849 same conditions that modxx3 insns and that these insns don't normally
3850 fail. If these assumptions are not correct, we may generate less
3851 efficient code in some cases.)
3852
3853 Then see if we find a mode in which we can open-code that operation
3854 (either a division, modulus, or shift). Finally, check for the smallest
3855 mode for which we can do the operation with a library call. */
3856
3857 /* We might want to refine this now that we have division-by-constant
3858 optimization. Since expand_mult_highpart tries so many variants, it is
3859 not straightforward to generalize this. Maybe we should make an array
3860 of possible modes in init_expmed? Save this for GCC 2.7. */
3861
3862 optab1 = ((op1_is_pow2 && op1 != const0_rtx)
3863 ? (unsignedp ? lshr_optab : ashr_optab)
3864 : (unsignedp ? udiv_optab : sdiv_optab));
3865 optab2 = ((op1_is_pow2 && op1 != const0_rtx)
3866 ? optab1
3867 : (unsignedp ? udivmod_optab : sdivmod_optab));
3868
3869 for (compute_mode = mode; compute_mode != VOIDmode;
3870 compute_mode = GET_MODE_WIDER_MODE (compute_mode))
3871 if (optab1->handlers[compute_mode].insn_code != CODE_FOR_nothing
3872 || optab2->handlers[compute_mode].insn_code != CODE_FOR_nothing)
3873 break;
3874
3875 if (compute_mode == VOIDmode)
3876 for (compute_mode = mode; compute_mode != VOIDmode;
3877 compute_mode = GET_MODE_WIDER_MODE (compute_mode))
3878 if (optab1->handlers[compute_mode].libfunc
3879 || optab2->handlers[compute_mode].libfunc)
3880 break;
3881
3882 /* If we still couldn't find a mode, use MODE, but expand_binop will
3883 probably die. */
3884 if (compute_mode == VOIDmode)
3885 compute_mode = mode;
3886
3887 if (target && GET_MODE (target) == compute_mode)
3888 tquotient = target;
3889 else
3890 tquotient = gen_reg_rtx (compute_mode);
3891
3892 size = GET_MODE_BITSIZE (compute_mode);
3893 #if 0
3894 /* It should be possible to restrict the precision to GET_MODE_BITSIZE
3895 (mode), and thereby get better code when OP1 is a constant. Do that
3896 later. It will require going over all usages of SIZE below. */
3897 size = GET_MODE_BITSIZE (mode);
3898 #endif
3899
3900 /* Only deduct something for a REM if the last divide done was
3901 for a different constant. Then set the constant of the last
3902 divide. */
3903 max_cost = div_cost[compute_mode]
3904 - (rem_flag && ! (last_div_const != 0 && op1_is_constant
3905 && INTVAL (op1) == last_div_const)
3906 ? mul_cost[compute_mode] + add_cost[compute_mode]
3907 : 0);
3908
3909 last_div_const = ! rem_flag && op1_is_constant ? INTVAL (op1) : 0;
3910
3911 /* Now convert to the best mode to use. */
3912 if (compute_mode != mode)
3913 {
3914 op0 = convert_modes (compute_mode, mode, op0, unsignedp);
3915 op1 = convert_modes (compute_mode, mode, op1, unsignedp);
3916
3917 /* convert_modes may have placed op1 into a register, so we
3918 must recompute the following. */
3919 op1_is_constant = GET_CODE (op1) == CONST_INT;
3920 op1_is_pow2 = (op1_is_constant
3921 && ((EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
3922 || (! unsignedp
3923 && EXACT_POWER_OF_2_OR_ZERO_P (-INTVAL (op1)))))) ;
3924 }
3925
3926 /* If one of the operands is a volatile MEM, copy it into a register. */
3927
3928 if (MEM_P (op0) && MEM_VOLATILE_P (op0))
3929 op0 = force_reg (compute_mode, op0);
3930 if (MEM_P (op1) && MEM_VOLATILE_P (op1))
3931 op1 = force_reg (compute_mode, op1);
3932
3933 /* If we need the remainder or if OP1 is constant, we need to
3934 put OP0 in a register in case it has any queued subexpressions. */
3935 if (rem_flag || op1_is_constant)
3936 op0 = force_reg (compute_mode, op0);
3937
3938 last = get_last_insn ();
3939
3940 /* Promote floor rounding to trunc rounding for unsigned operations. */
3941 if (unsignedp)
3942 {
3943 if (code == FLOOR_DIV_EXPR)
3944 code = TRUNC_DIV_EXPR;
3945 if (code == FLOOR_MOD_EXPR)
3946 code = TRUNC_MOD_EXPR;
3947 if (code == EXACT_DIV_EXPR && op1_is_pow2)
3948 code = TRUNC_DIV_EXPR;
3949 }
3950
3951 if (op1 != const0_rtx)
3952 switch (code)
3953 {
3954 case TRUNC_MOD_EXPR:
3955 case TRUNC_DIV_EXPR:
3956 if (op1_is_constant)
3957 {
3958 if (unsignedp)
3959 {
3960 unsigned HOST_WIDE_INT mh;
3961 int pre_shift, post_shift;
3962 int dummy;
3963 rtx ml;
3964 unsigned HOST_WIDE_INT d = (INTVAL (op1)
3965 & GET_MODE_MASK (compute_mode));
3966
3967 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
3968 {
3969 pre_shift = floor_log2 (d);
3970 if (rem_flag)
3971 {
3972 remainder
3973 = expand_binop (compute_mode, and_optab, op0,
3974 GEN_INT (((HOST_WIDE_INT) 1 << pre_shift) - 1),
3975 remainder, 1,
3976 OPTAB_LIB_WIDEN);
3977 if (remainder)
3978 return gen_lowpart (mode, remainder);
3979 }
3980 quotient = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3981 build_int_cst (NULL_TREE,
3982 pre_shift),
3983 tquotient, 1);
3984 }
3985 else if (size <= HOST_BITS_PER_WIDE_INT)
3986 {
3987 if (d >= ((unsigned HOST_WIDE_INT) 1 << (size - 1)))
3988 {
3989 /* Most significant bit of divisor is set; emit an scc
3990 insn. */
3991 quotient = emit_store_flag (tquotient, GEU, op0, op1,
3992 compute_mode, 1, 1);
3993 if (quotient == 0)
3994 goto fail1;
3995 }
3996 else
3997 {
3998 /* Find a suitable multiplier and right shift count
3999 instead of multiplying with D. */
4000
4001 mh = choose_multiplier (d, size, size,
4002 &ml, &post_shift, &dummy);
4003
4004 /* If the suggested multiplier is more than SIZE bits,
4005 we can do better for even divisors, using an
4006 initial right shift. */
4007 if (mh != 0 && (d & 1) == 0)
4008 {
4009 pre_shift = floor_log2 (d & -d);
4010 mh = choose_multiplier (d >> pre_shift, size,
4011 size - pre_shift,
4012 &ml, &post_shift, &dummy);
4013 gcc_assert (!mh);
4014 }
4015 else
4016 pre_shift = 0;
4017
4018 if (mh != 0)
4019 {
4020 rtx t1, t2, t3, t4;
4021
4022 if (post_shift - 1 >= BITS_PER_WORD)
4023 goto fail1;
4024
4025 extra_cost
4026 = (shift_cost[compute_mode][post_shift - 1]
4027 + shift_cost[compute_mode][1]
4028 + 2 * add_cost[compute_mode]);
4029 t1 = expand_mult_highpart (compute_mode, op0, ml,
4030 NULL_RTX, 1,
4031 max_cost - extra_cost);
4032 if (t1 == 0)
4033 goto fail1;
4034 t2 = force_operand (gen_rtx_MINUS (compute_mode,
4035 op0, t1),
4036 NULL_RTX);
4037 t3 = expand_shift
4038 (RSHIFT_EXPR, compute_mode, t2,
4039 build_int_cst (NULL_TREE, 1),
4040 NULL_RTX,1);
4041 t4 = force_operand (gen_rtx_PLUS (compute_mode,
4042 t1, t3),
4043 NULL_RTX);
4044 quotient = expand_shift
4045 (RSHIFT_EXPR, compute_mode, t4,
4046 build_int_cst (NULL_TREE, post_shift - 1),
4047 tquotient, 1);
4048 }
4049 else
4050 {
4051 rtx t1, t2;
4052
4053 if (pre_shift >= BITS_PER_WORD
4054 || post_shift >= BITS_PER_WORD)
4055 goto fail1;
4056
4057 t1 = expand_shift
4058 (RSHIFT_EXPR, compute_mode, op0,
4059 build_int_cst (NULL_TREE, pre_shift),
4060 NULL_RTX, 1);
4061 extra_cost
4062 = (shift_cost[compute_mode][pre_shift]
4063 + shift_cost[compute_mode][post_shift]);
4064 t2 = expand_mult_highpart (compute_mode, t1, ml,
4065 NULL_RTX, 1,
4066 max_cost - extra_cost);
4067 if (t2 == 0)
4068 goto fail1;
4069 quotient = expand_shift
4070 (RSHIFT_EXPR, compute_mode, t2,
4071 build_int_cst (NULL_TREE, post_shift),
4072 tquotient, 1);
4073 }
4074 }
4075 }
4076 else /* Too wide mode to use tricky code */
4077 break;
4078
4079 insn = get_last_insn ();
4080 if (insn != last
4081 && (set = single_set (insn)) != 0
4082 && SET_DEST (set) == quotient)
4083 set_unique_reg_note (insn,
4084 REG_EQUAL,
4085 gen_rtx_UDIV (compute_mode, op0, op1));
4086 }
4087 else /* TRUNC_DIV, signed */
4088 {
4089 unsigned HOST_WIDE_INT ml;
4090 int lgup, post_shift;
4091 rtx mlr;
4092 HOST_WIDE_INT d = INTVAL (op1);
4093 unsigned HOST_WIDE_INT abs_d = d >= 0 ? d : -d;
4094
4095 /* n rem d = n rem -d */
4096 if (rem_flag && d < 0)
4097 {
4098 d = abs_d;
4099 op1 = gen_int_mode (abs_d, compute_mode);
4100 }
4101
4102 if (d == 1)
4103 quotient = op0;
4104 else if (d == -1)
4105 quotient = expand_unop (compute_mode, neg_optab, op0,
4106 tquotient, 0);
4107 else if (abs_d == (unsigned HOST_WIDE_INT) 1 << (size - 1))
4108 {
4109 /* This case is not handled correctly below. */
4110 quotient = emit_store_flag (tquotient, EQ, op0, op1,
4111 compute_mode, 1, 1);
4112 if (quotient == 0)
4113 goto fail1;
4114 }
4115 else if (EXACT_POWER_OF_2_OR_ZERO_P (d)
4116 && (rem_flag ? smod_pow2_cheap[compute_mode]
4117 : sdiv_pow2_cheap[compute_mode])
4118 /* We assume that cheap metric is true if the
4119 optab has an expander for this mode. */
4120 && (((rem_flag ? smod_optab : sdiv_optab)
4121 ->handlers[compute_mode].insn_code
4122 != CODE_FOR_nothing)
4123 || (sdivmod_optab->handlers[compute_mode]
4124 .insn_code != CODE_FOR_nothing)))
4125 ;
4126 else if (EXACT_POWER_OF_2_OR_ZERO_P (abs_d))
4127 {
4128 if (rem_flag)
4129 {
4130 remainder = expand_smod_pow2 (compute_mode, op0, d);
4131 if (remainder)
4132 return gen_lowpart (mode, remainder);
4133 }
4134
4135 if (sdiv_pow2_cheap[compute_mode]
4136 && ((sdiv_optab->handlers[compute_mode].insn_code
4137 != CODE_FOR_nothing)
4138 || (sdivmod_optab->handlers[compute_mode].insn_code
4139 != CODE_FOR_nothing)))
4140 quotient = expand_divmod (0, TRUNC_DIV_EXPR,
4141 compute_mode, op0,
4142 gen_int_mode (abs_d,
4143 compute_mode),
4144 NULL_RTX, 0);
4145 else
4146 quotient = expand_sdiv_pow2 (compute_mode, op0, abs_d);
4147
4148 /* We have computed OP0 / abs(OP1). If OP1 is negative,
4149 negate the quotient. */
4150 if (d < 0)
4151 {
4152 insn = get_last_insn ();
4153 if (insn != last
4154 && (set = single_set (insn)) != 0
4155 && SET_DEST (set) == quotient
4156 && abs_d < ((unsigned HOST_WIDE_INT) 1
4157 << (HOST_BITS_PER_WIDE_INT - 1)))
4158 set_unique_reg_note (insn,
4159 REG_EQUAL,
4160 gen_rtx_DIV (compute_mode,
4161 op0,
4162 GEN_INT
4163 (trunc_int_for_mode
4164 (abs_d,
4165 compute_mode))));
4166
4167 quotient = expand_unop (compute_mode, neg_optab,
4168 quotient, quotient, 0);
4169 }
4170 }
4171 else if (size <= HOST_BITS_PER_WIDE_INT)
4172 {
4173 choose_multiplier (abs_d, size, size - 1,
4174 &mlr, &post_shift, &lgup);
4175 ml = (unsigned HOST_WIDE_INT) INTVAL (mlr);
4176 if (ml < (unsigned HOST_WIDE_INT) 1 << (size - 1))
4177 {
4178 rtx t1, t2, t3;
4179
4180 if (post_shift >= BITS_PER_WORD
4181 || size - 1 >= BITS_PER_WORD)
4182 goto fail1;
4183
4184 extra_cost = (shift_cost[compute_mode][post_shift]
4185 + shift_cost[compute_mode][size - 1]
4186 + add_cost[compute_mode]);
4187 t1 = expand_mult_highpart (compute_mode, op0, mlr,
4188 NULL_RTX, 0,
4189 max_cost - extra_cost);
4190 if (t1 == 0)
4191 goto fail1;
4192 t2 = expand_shift
4193 (RSHIFT_EXPR, compute_mode, t1,
4194 build_int_cst (NULL_TREE, post_shift),
4195 NULL_RTX, 0);
4196 t3 = expand_shift
4197 (RSHIFT_EXPR, compute_mode, op0,
4198 build_int_cst (NULL_TREE, size - 1),
4199 NULL_RTX, 0);
4200 if (d < 0)
4201 quotient
4202 = force_operand (gen_rtx_MINUS (compute_mode,
4203 t3, t2),
4204 tquotient);
4205 else
4206 quotient
4207 = force_operand (gen_rtx_MINUS (compute_mode,
4208 t2, t3),
4209 tquotient);
4210 }
4211 else
4212 {
4213 rtx t1, t2, t3, t4;
4214
4215 if (post_shift >= BITS_PER_WORD
4216 || size - 1 >= BITS_PER_WORD)
4217 goto fail1;
4218
4219 ml |= (~(unsigned HOST_WIDE_INT) 0) << (size - 1);
4220 mlr = gen_int_mode (ml, compute_mode);
4221 extra_cost = (shift_cost[compute_mode][post_shift]
4222 + shift_cost[compute_mode][size - 1]
4223 + 2 * add_cost[compute_mode]);
4224 t1 = expand_mult_highpart (compute_mode, op0, mlr,
4225 NULL_RTX, 0,
4226 max_cost - extra_cost);
4227 if (t1 == 0)
4228 goto fail1;
4229 t2 = force_operand (gen_rtx_PLUS (compute_mode,
4230 t1, op0),
4231 NULL_RTX);
4232 t3 = expand_shift
4233 (RSHIFT_EXPR, compute_mode, t2,
4234 build_int_cst (NULL_TREE, post_shift),
4235 NULL_RTX, 0);
4236 t4 = expand_shift
4237 (RSHIFT_EXPR, compute_mode, op0,
4238 build_int_cst (NULL_TREE, size - 1),
4239 NULL_RTX, 0);
4240 if (d < 0)
4241 quotient
4242 = force_operand (gen_rtx_MINUS (compute_mode,
4243 t4, t3),
4244 tquotient);
4245 else
4246 quotient
4247 = force_operand (gen_rtx_MINUS (compute_mode,
4248 t3, t4),
4249 tquotient);
4250 }
4251 }
4252 else /* Too wide mode to use tricky code */
4253 break;
4254
4255 insn = get_last_insn ();
4256 if (insn != last
4257 && (set = single_set (insn)) != 0
4258 && SET_DEST (set) == quotient)
4259 set_unique_reg_note (insn,
4260 REG_EQUAL,
4261 gen_rtx_DIV (compute_mode, op0, op1));
4262 }
4263 break;
4264 }
4265 fail1:
4266 delete_insns_since (last);
4267 break;
4268
4269 case FLOOR_DIV_EXPR:
4270 case FLOOR_MOD_EXPR:
4271 /* We will come here only for signed operations. */
4272 if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4273 {
4274 unsigned HOST_WIDE_INT mh;
4275 int pre_shift, lgup, post_shift;
4276 HOST_WIDE_INT d = INTVAL (op1);
4277 rtx ml;
4278
4279 if (d > 0)
4280 {
4281 /* We could just as easily deal with negative constants here,
4282 but it does not seem worth the trouble for GCC 2.6. */
4283 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4284 {
4285 pre_shift = floor_log2 (d);
4286 if (rem_flag)
4287 {
4288 remainder = expand_binop (compute_mode, and_optab, op0,
4289 GEN_INT (((HOST_WIDE_INT) 1 << pre_shift) - 1),
4290 remainder, 0, OPTAB_LIB_WIDEN);
4291 if (remainder)
4292 return gen_lowpart (mode, remainder);
4293 }
4294 quotient = expand_shift
4295 (RSHIFT_EXPR, compute_mode, op0,
4296 build_int_cst (NULL_TREE, pre_shift),
4297 tquotient, 0);
4298 }
4299 else
4300 {
4301 rtx t1, t2, t3, t4;
4302
4303 mh = choose_multiplier (d, size, size - 1,
4304 &ml, &post_shift, &lgup);
4305 gcc_assert (!mh);
4306
4307 if (post_shift < BITS_PER_WORD
4308 && size - 1 < BITS_PER_WORD)
4309 {
4310 t1 = expand_shift
4311 (RSHIFT_EXPR, compute_mode, op0,
4312 build_int_cst (NULL_TREE, size - 1),
4313 NULL_RTX, 0);
4314 t2 = expand_binop (compute_mode, xor_optab, op0, t1,
4315 NULL_RTX, 0, OPTAB_WIDEN);
4316 extra_cost = (shift_cost[compute_mode][post_shift]
4317 + shift_cost[compute_mode][size - 1]
4318 + 2 * add_cost[compute_mode]);
4319 t3 = expand_mult_highpart (compute_mode, t2, ml,
4320 NULL_RTX, 1,
4321 max_cost - extra_cost);
4322 if (t3 != 0)
4323 {
4324 t4 = expand_shift
4325 (RSHIFT_EXPR, compute_mode, t3,
4326 build_int_cst (NULL_TREE, post_shift),
4327 NULL_RTX, 1);
4328 quotient = expand_binop (compute_mode, xor_optab,
4329 t4, t1, tquotient, 0,
4330 OPTAB_WIDEN);
4331 }
4332 }
4333 }
4334 }
4335 else
4336 {
4337 rtx nsign, t1, t2, t3, t4;
4338 t1 = force_operand (gen_rtx_PLUS (compute_mode,
4339 op0, constm1_rtx), NULL_RTX);
4340 t2 = expand_binop (compute_mode, ior_optab, op0, t1, NULL_RTX,
4341 0, OPTAB_WIDEN);
4342 nsign = expand_shift
4343 (RSHIFT_EXPR, compute_mode, t2,
4344 build_int_cst (NULL_TREE, size - 1),
4345 NULL_RTX, 0);
4346 t3 = force_operand (gen_rtx_MINUS (compute_mode, t1, nsign),
4347 NULL_RTX);
4348 t4 = expand_divmod (0, TRUNC_DIV_EXPR, compute_mode, t3, op1,
4349 NULL_RTX, 0);
4350 if (t4)
4351 {
4352 rtx t5;
4353 t5 = expand_unop (compute_mode, one_cmpl_optab, nsign,
4354 NULL_RTX, 0);
4355 quotient = force_operand (gen_rtx_PLUS (compute_mode,
4356 t4, t5),
4357 tquotient);
4358 }
4359 }
4360 }
4361
4362 if (quotient != 0)
4363 break;
4364 delete_insns_since (last);
4365
4366 /* Try using an instruction that produces both the quotient and
4367 remainder, using truncation. We can easily compensate the quotient
4368 or remainder to get floor rounding, once we have the remainder.
4369 Notice that we compute also the final remainder value here,
4370 and return the result right away. */
4371 if (target == 0 || GET_MODE (target) != compute_mode)
4372 target = gen_reg_rtx (compute_mode);
4373
4374 if (rem_flag)
4375 {
4376 remainder
4377 = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4378 quotient = gen_reg_rtx (compute_mode);
4379 }
4380 else
4381 {
4382 quotient
4383 = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4384 remainder = gen_reg_rtx (compute_mode);
4385 }
4386
4387 if (expand_twoval_binop (sdivmod_optab, op0, op1,
4388 quotient, remainder, 0))
4389 {
4390 /* This could be computed with a branch-less sequence.
4391 Save that for later. */
4392 rtx tem;
4393 rtx label = gen_label_rtx ();
4394 do_cmp_and_jump (remainder, const0_rtx, EQ, compute_mode, label);
4395 tem = expand_binop (compute_mode, xor_optab, op0, op1,
4396 NULL_RTX, 0, OPTAB_WIDEN);
4397 do_cmp_and_jump (tem, const0_rtx, GE, compute_mode, label);
4398 expand_dec (quotient, const1_rtx);
4399 expand_inc (remainder, op1);
4400 emit_label (label);
4401 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4402 }
4403
4404 /* No luck with division elimination or divmod. Have to do it
4405 by conditionally adjusting op0 *and* the result. */
4406 {
4407 rtx label1, label2, label3, label4, label5;
4408 rtx adjusted_op0;
4409 rtx tem;
4410
4411 quotient = gen_reg_rtx (compute_mode);
4412 adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4413 label1 = gen_label_rtx ();
4414 label2 = gen_label_rtx ();
4415 label3 = gen_label_rtx ();
4416 label4 = gen_label_rtx ();
4417 label5 = gen_label_rtx ();
4418 do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4419 do_cmp_and_jump (adjusted_op0, const0_rtx, LT, compute_mode, label1);
4420 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4421 quotient, 0, OPTAB_LIB_WIDEN);
4422 if (tem != quotient)
4423 emit_move_insn (quotient, tem);
4424 emit_jump_insn (gen_jump (label5));
4425 emit_barrier ();
4426 emit_label (label1);
4427 expand_inc (adjusted_op0, const1_rtx);
4428 emit_jump_insn (gen_jump (label4));
4429 emit_barrier ();
4430 emit_label (label2);
4431 do_cmp_and_jump (adjusted_op0, const0_rtx, GT, compute_mode, label3);
4432 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4433 quotient, 0, OPTAB_LIB_WIDEN);
4434 if (tem != quotient)
4435 emit_move_insn (quotient, tem);
4436 emit_jump_insn (gen_jump (label5));
4437 emit_barrier ();
4438 emit_label (label3);
4439 expand_dec (adjusted_op0, const1_rtx);
4440 emit_label (label4);
4441 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4442 quotient, 0, OPTAB_LIB_WIDEN);
4443 if (tem != quotient)
4444 emit_move_insn (quotient, tem);
4445 expand_dec (quotient, const1_rtx);
4446 emit_label (label5);
4447 }
4448 break;
4449
4450 case CEIL_DIV_EXPR:
4451 case CEIL_MOD_EXPR:
4452 if (unsignedp)
4453 {
4454 if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1)))
4455 {
4456 rtx t1, t2, t3;
4457 unsigned HOST_WIDE_INT d = INTVAL (op1);
4458 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4459 build_int_cst (NULL_TREE, floor_log2 (d)),
4460 tquotient, 1);
4461 t2 = expand_binop (compute_mode, and_optab, op0,
4462 GEN_INT (d - 1),
4463 NULL_RTX, 1, OPTAB_LIB_WIDEN);
4464 t3 = gen_reg_rtx (compute_mode);
4465 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4466 compute_mode, 1, 1);
4467 if (t3 == 0)
4468 {
4469 rtx lab;
4470 lab = gen_label_rtx ();
4471 do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4472 expand_inc (t1, const1_rtx);
4473 emit_label (lab);
4474 quotient = t1;
4475 }
4476 else
4477 quotient = force_operand (gen_rtx_PLUS (compute_mode,
4478 t1, t3),
4479 tquotient);
4480 break;
4481 }
4482
4483 /* Try using an instruction that produces both the quotient and
4484 remainder, using truncation. We can easily compensate the
4485 quotient or remainder to get ceiling rounding, once we have the
4486 remainder. Notice that we compute also the final remainder
4487 value here, and return the result right away. */
4488 if (target == 0 || GET_MODE (target) != compute_mode)
4489 target = gen_reg_rtx (compute_mode);
4490
4491 if (rem_flag)
4492 {
4493 remainder = (REG_P (target)
4494 ? target : gen_reg_rtx (compute_mode));
4495 quotient = gen_reg_rtx (compute_mode);
4496 }
4497 else
4498 {
4499 quotient = (REG_P (target)
4500 ? target : gen_reg_rtx (compute_mode));
4501 remainder = gen_reg_rtx (compute_mode);
4502 }
4503
4504 if (expand_twoval_binop (udivmod_optab, op0, op1, quotient,
4505 remainder, 1))
4506 {
4507 /* This could be computed with a branch-less sequence.
4508 Save that for later. */
4509 rtx label = gen_label_rtx ();
4510 do_cmp_and_jump (remainder, const0_rtx, EQ,
4511 compute_mode, label);
4512 expand_inc (quotient, const1_rtx);
4513 expand_dec (remainder, op1);
4514 emit_label (label);
4515 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4516 }
4517
4518 /* No luck with division elimination or divmod. Have to do it
4519 by conditionally adjusting op0 *and* the result. */
4520 {
4521 rtx label1, label2;
4522 rtx adjusted_op0, tem;
4523
4524 quotient = gen_reg_rtx (compute_mode);
4525 adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4526 label1 = gen_label_rtx ();
4527 label2 = gen_label_rtx ();
4528 do_cmp_and_jump (adjusted_op0, const0_rtx, NE,
4529 compute_mode, label1);
4530 emit_move_insn (quotient, const0_rtx);
4531 emit_jump_insn (gen_jump (label2));
4532 emit_barrier ();
4533 emit_label (label1);
4534 expand_dec (adjusted_op0, const1_rtx);
4535 tem = expand_binop (compute_mode, udiv_optab, adjusted_op0, op1,
4536 quotient, 1, OPTAB_LIB_WIDEN);
4537 if (tem != quotient)
4538 emit_move_insn (quotient, tem);
4539 expand_inc (quotient, const1_rtx);
4540 emit_label (label2);
4541 }
4542 }
4543 else /* signed */
4544 {
4545 if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4546 && INTVAL (op1) >= 0)
4547 {
4548 /* This is extremely similar to the code for the unsigned case
4549 above. For 2.7 we should merge these variants, but for
4550 2.6.1 I don't want to touch the code for unsigned since that
4551 get used in C. The signed case will only be used by other
4552 languages (Ada). */
4553
4554 rtx t1, t2, t3;
4555 unsigned HOST_WIDE_INT d = INTVAL (op1);
4556 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4557 build_int_cst (NULL_TREE, floor_log2 (d)),
4558 tquotient, 0);
4559 t2 = expand_binop (compute_mode, and_optab, op0,
4560 GEN_INT (d - 1),
4561 NULL_RTX, 1, OPTAB_LIB_WIDEN);
4562 t3 = gen_reg_rtx (compute_mode);
4563 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4564 compute_mode, 1, 1);
4565 if (t3 == 0)
4566 {
4567 rtx lab;
4568 lab = gen_label_rtx ();
4569 do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4570 expand_inc (t1, const1_rtx);
4571 emit_label (lab);
4572 quotient = t1;
4573 }
4574 else
4575 quotient = force_operand (gen_rtx_PLUS (compute_mode,
4576 t1, t3),
4577 tquotient);
4578 break;
4579 }
4580
4581 /* Try using an instruction that produces both the quotient and
4582 remainder, using truncation. We can easily compensate the
4583 quotient or remainder to get ceiling rounding, once we have the
4584 remainder. Notice that we compute also the final remainder
4585 value here, and return the result right away. */
4586 if (target == 0 || GET_MODE (target) != compute_mode)
4587 target = gen_reg_rtx (compute_mode);
4588 if (rem_flag)
4589 {
4590 remainder= (REG_P (target)
4591 ? target : gen_reg_rtx (compute_mode));
4592 quotient = gen_reg_rtx (compute_mode);
4593 }
4594 else
4595 {
4596 quotient = (REG_P (target)
4597 ? target : gen_reg_rtx (compute_mode));
4598 remainder = gen_reg_rtx (compute_mode);
4599 }
4600
4601 if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient,
4602 remainder, 0))
4603 {
4604 /* This could be computed with a branch-less sequence.
4605 Save that for later. */
4606 rtx tem;
4607 rtx label = gen_label_rtx ();
4608 do_cmp_and_jump (remainder, const0_rtx, EQ,
4609 compute_mode, label);
4610 tem = expand_binop (compute_mode, xor_optab, op0, op1,
4611 NULL_RTX, 0, OPTAB_WIDEN);
4612 do_cmp_and_jump (tem, const0_rtx, LT, compute_mode, label);
4613 expand_inc (quotient, const1_rtx);
4614 expand_dec (remainder, op1);
4615 emit_label (label);
4616 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4617 }
4618
4619 /* No luck with division elimination or divmod. Have to do it
4620 by conditionally adjusting op0 *and* the result. */
4621 {
4622 rtx label1, label2, label3, label4, label5;
4623 rtx adjusted_op0;
4624 rtx tem;
4625
4626 quotient = gen_reg_rtx (compute_mode);
4627 adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4628 label1 = gen_label_rtx ();
4629 label2 = gen_label_rtx ();
4630 label3 = gen_label_rtx ();
4631 label4 = gen_label_rtx ();
4632 label5 = gen_label_rtx ();
4633 do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4634 do_cmp_and_jump (adjusted_op0, const0_rtx, GT,
4635 compute_mode, label1);
4636 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4637 quotient, 0, OPTAB_LIB_WIDEN);
4638 if (tem != quotient)
4639 emit_move_insn (quotient, tem);
4640 emit_jump_insn (gen_jump (label5));
4641 emit_barrier ();
4642 emit_label (label1);
4643 expand_dec (adjusted_op0, const1_rtx);
4644 emit_jump_insn (gen_jump (label4));
4645 emit_barrier ();
4646 emit_label (label2);
4647 do_cmp_and_jump (adjusted_op0, const0_rtx, LT,
4648 compute_mode, label3);
4649 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4650 quotient, 0, OPTAB_LIB_WIDEN);
4651 if (tem != quotient)
4652 emit_move_insn (quotient, tem);
4653 emit_jump_insn (gen_jump (label5));
4654 emit_barrier ();
4655 emit_label (label3);
4656 expand_inc (adjusted_op0, const1_rtx);
4657 emit_label (label4);
4658 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4659 quotient, 0, OPTAB_LIB_WIDEN);
4660 if (tem != quotient)
4661 emit_move_insn (quotient, tem);
4662 expand_inc (quotient, const1_rtx);
4663 emit_label (label5);
4664 }
4665 }
4666 break;
4667
4668 case EXACT_DIV_EXPR:
4669 if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4670 {
4671 HOST_WIDE_INT d = INTVAL (op1);
4672 unsigned HOST_WIDE_INT ml;
4673 int pre_shift;
4674 rtx t1;
4675
4676 pre_shift = floor_log2 (d & -d);
4677 ml = invert_mod2n (d >> pre_shift, size);
4678 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4679 build_int_cst (NULL_TREE, pre_shift),
4680 NULL_RTX, unsignedp);
4681 quotient = expand_mult (compute_mode, t1,
4682 gen_int_mode (ml, compute_mode),
4683 NULL_RTX, 1);
4684
4685 insn = get_last_insn ();
4686 set_unique_reg_note (insn,
4687 REG_EQUAL,
4688 gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
4689 compute_mode,
4690 op0, op1));
4691 }
4692 break;
4693
4694 case ROUND_DIV_EXPR:
4695 case ROUND_MOD_EXPR:
4696 if (unsignedp)
4697 {
4698 rtx tem;
4699 rtx label;
4700 label = gen_label_rtx ();
4701 quotient = gen_reg_rtx (compute_mode);
4702 remainder = gen_reg_rtx (compute_mode);
4703 if (expand_twoval_binop (udivmod_optab, op0, op1, quotient, remainder, 1) == 0)
4704 {
4705 rtx tem;
4706 quotient = expand_binop (compute_mode, udiv_optab, op0, op1,
4707 quotient, 1, OPTAB_LIB_WIDEN);
4708 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 1);
4709 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4710 remainder, 1, OPTAB_LIB_WIDEN);
4711 }
4712 tem = plus_constant (op1, -1);
4713 tem = expand_shift (RSHIFT_EXPR, compute_mode, tem,
4714 build_int_cst (NULL_TREE, 1),
4715 NULL_RTX, 1);
4716 do_cmp_and_jump (remainder, tem, LEU, compute_mode, label);
4717 expand_inc (quotient, const1_rtx);
4718 expand_dec (remainder, op1);
4719 emit_label (label);
4720 }
4721 else
4722 {
4723 rtx abs_rem, abs_op1, tem, mask;
4724 rtx label;
4725 label = gen_label_rtx ();
4726 quotient = gen_reg_rtx (compute_mode);
4727 remainder = gen_reg_rtx (compute_mode);
4728 if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient, remainder, 0) == 0)
4729 {
4730 rtx tem;
4731 quotient = expand_binop (compute_mode, sdiv_optab, op0, op1,
4732 quotient, 0, OPTAB_LIB_WIDEN);
4733 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 0);
4734 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4735 remainder, 0, OPTAB_LIB_WIDEN);
4736 }
4737 abs_rem = expand_abs (compute_mode, remainder, NULL_RTX, 1, 0);
4738 abs_op1 = expand_abs (compute_mode, op1, NULL_RTX, 1, 0);
4739 tem = expand_shift (LSHIFT_EXPR, compute_mode, abs_rem,
4740 build_int_cst (NULL_TREE, 1),
4741 NULL_RTX, 1);
4742 do_cmp_and_jump (tem, abs_op1, LTU, compute_mode, label);
4743 tem = expand_binop (compute_mode, xor_optab, op0, op1,
4744 NULL_RTX, 0, OPTAB_WIDEN);
4745 mask = expand_shift (RSHIFT_EXPR, compute_mode, tem,
4746 build_int_cst (NULL_TREE, size - 1),
4747 NULL_RTX, 0);
4748 tem = expand_binop (compute_mode, xor_optab, mask, const1_rtx,
4749 NULL_RTX, 0, OPTAB_WIDEN);
4750 tem = expand_binop (compute_mode, sub_optab, tem, mask,
4751 NULL_RTX, 0, OPTAB_WIDEN);
4752 expand_inc (quotient, tem);
4753 tem = expand_binop (compute_mode, xor_optab, mask, op1,
4754 NULL_RTX, 0, OPTAB_WIDEN);
4755 tem = expand_binop (compute_mode, sub_optab, tem, mask,
4756 NULL_RTX, 0, OPTAB_WIDEN);
4757 expand_dec (remainder, tem);
4758 emit_label (label);
4759 }
4760 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4761
4762 default:
4763 gcc_unreachable ();
4764 }
4765
4766 if (quotient == 0)
4767 {
4768 if (target && GET_MODE (target) != compute_mode)
4769 target = 0;
4770
4771 if (rem_flag)
4772 {
4773 /* Try to produce the remainder without producing the quotient.
4774 If we seem to have a divmod pattern that does not require widening,
4775 don't try widening here. We should really have a WIDEN argument
4776 to expand_twoval_binop, since what we'd really like to do here is
4777 1) try a mod insn in compute_mode
4778 2) try a divmod insn in compute_mode
4779 3) try a div insn in compute_mode and multiply-subtract to get
4780 remainder
4781 4) try the same things with widening allowed. */
4782 remainder
4783 = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4784 op0, op1, target,
4785 unsignedp,
4786 ((optab2->handlers[compute_mode].insn_code
4787 != CODE_FOR_nothing)
4788 ? OPTAB_DIRECT : OPTAB_WIDEN));
4789 if (remainder == 0)
4790 {
4791 /* No luck there. Can we do remainder and divide at once
4792 without a library call? */
4793 remainder = gen_reg_rtx (compute_mode);
4794 if (! expand_twoval_binop ((unsignedp
4795 ? udivmod_optab
4796 : sdivmod_optab),
4797 op0, op1,
4798 NULL_RTX, remainder, unsignedp))
4799 remainder = 0;
4800 }
4801
4802 if (remainder)
4803 return gen_lowpart (mode, remainder);
4804 }
4805
4806 /* Produce the quotient. Try a quotient insn, but not a library call.
4807 If we have a divmod in this mode, use it in preference to widening
4808 the div (for this test we assume it will not fail). Note that optab2
4809 is set to the one of the two optabs that the call below will use. */
4810 quotient
4811 = sign_expand_binop (compute_mode, udiv_optab, sdiv_optab,
4812 op0, op1, rem_flag ? NULL_RTX : target,
4813 unsignedp,
4814 ((optab2->handlers[compute_mode].insn_code
4815 != CODE_FOR_nothing)
4816 ? OPTAB_DIRECT : OPTAB_WIDEN));
4817
4818 if (quotient == 0)
4819 {
4820 /* No luck there. Try a quotient-and-remainder insn,
4821 keeping the quotient alone. */
4822 quotient = gen_reg_rtx (compute_mode);
4823 if (! expand_twoval_binop (unsignedp ? udivmod_optab : sdivmod_optab,
4824 op0, op1,
4825 quotient, NULL_RTX, unsignedp))
4826 {
4827 quotient = 0;
4828 if (! rem_flag)
4829 /* Still no luck. If we are not computing the remainder,
4830 use a library call for the quotient. */
4831 quotient = sign_expand_binop (compute_mode,
4832 udiv_optab, sdiv_optab,
4833 op0, op1, target,
4834 unsignedp, OPTAB_LIB_WIDEN);
4835 }
4836 }
4837 }
4838
4839 if (rem_flag)
4840 {
4841 if (target && GET_MODE (target) != compute_mode)
4842 target = 0;
4843
4844 if (quotient == 0)
4845 {
4846 /* No divide instruction either. Use library for remainder. */
4847 remainder = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4848 op0, op1, target,
4849 unsignedp, OPTAB_LIB_WIDEN);
4850 /* No remainder function. Try a quotient-and-remainder
4851 function, keeping the remainder. */
4852 if (!remainder)
4853 {
4854 remainder = gen_reg_rtx (compute_mode);
4855 if (!expand_twoval_binop_libfunc
4856 (unsignedp ? udivmod_optab : sdivmod_optab,
4857 op0, op1,
4858 NULL_RTX, remainder,
4859 unsignedp ? UMOD : MOD))
4860 remainder = NULL_RTX;
4861 }
4862 }
4863 else
4864 {
4865 /* We divided. Now finish doing X - Y * (X / Y). */
4866 remainder = expand_mult (compute_mode, quotient, op1,
4867 NULL_RTX, unsignedp);
4868 remainder = expand_binop (compute_mode, sub_optab, op0,
4869 remainder, target, unsignedp,
4870 OPTAB_LIB_WIDEN);
4871 }
4872 }
4873
4874 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4875 }
4876 \f
4877 /* Return a tree node with data type TYPE, describing the value of X.
4878 Usually this is an VAR_DECL, if there is no obvious better choice.
4879 X may be an expression, however we only support those expressions
4880 generated by loop.c. */
4881
4882 tree
4883 make_tree (tree type, rtx x)
4884 {
4885 tree t;
4886
4887 switch (GET_CODE (x))
4888 {
4889 case CONST_INT:
4890 {
4891 HOST_WIDE_INT hi = 0;
4892
4893 if (INTVAL (x) < 0
4894 && !(TYPE_UNSIGNED (type)
4895 && (GET_MODE_BITSIZE (TYPE_MODE (type))
4896 < HOST_BITS_PER_WIDE_INT)))
4897 hi = -1;
4898
4899 t = build_int_cst_wide (type, INTVAL (x), hi);
4900
4901 return t;
4902 }
4903
4904 case CONST_DOUBLE:
4905 if (GET_MODE (x) == VOIDmode)
4906 t = build_int_cst_wide (type,
4907 CONST_DOUBLE_LOW (x), CONST_DOUBLE_HIGH (x));
4908 else
4909 {
4910 REAL_VALUE_TYPE d;
4911
4912 REAL_VALUE_FROM_CONST_DOUBLE (d, x);
4913 t = build_real (type, d);
4914 }
4915
4916 return t;
4917
4918 case CONST_VECTOR:
4919 {
4920 int i, units;
4921 rtx elt;
4922 tree t = NULL_TREE;
4923
4924 units = CONST_VECTOR_NUNITS (x);
4925
4926 /* Build a tree with vector elements. */
4927 for (i = units - 1; i >= 0; --i)
4928 {
4929 elt = CONST_VECTOR_ELT (x, i);
4930 t = tree_cons (NULL_TREE, make_tree (type, elt), t);
4931 }
4932
4933 return build_vector (type, t);
4934 }
4935
4936 case PLUS:
4937 return fold_build2 (PLUS_EXPR, type, make_tree (type, XEXP (x, 0)),
4938 make_tree (type, XEXP (x, 1)));
4939
4940 case MINUS:
4941 return fold_build2 (MINUS_EXPR, type, make_tree (type, XEXP (x, 0)),
4942 make_tree (type, XEXP (x, 1)));
4943
4944 case NEG:
4945 return fold_build1 (NEGATE_EXPR, type, make_tree (type, XEXP (x, 0)));
4946
4947 case MULT:
4948 return fold_build2 (MULT_EXPR, type, make_tree (type, XEXP (x, 0)),
4949 make_tree (type, XEXP (x, 1)));
4950
4951 case ASHIFT:
4952 return fold_build2 (LSHIFT_EXPR, type, make_tree (type, XEXP (x, 0)),
4953 make_tree (type, XEXP (x, 1)));
4954
4955 case LSHIFTRT:
4956 t = lang_hooks.types.unsigned_type (type);
4957 return fold_convert (type, build2 (RSHIFT_EXPR, t,
4958 make_tree (t, XEXP (x, 0)),
4959 make_tree (type, XEXP (x, 1))));
4960
4961 case ASHIFTRT:
4962 t = lang_hooks.types.signed_type (type);
4963 return fold_convert (type, build2 (RSHIFT_EXPR, t,
4964 make_tree (t, XEXP (x, 0)),
4965 make_tree (type, XEXP (x, 1))));
4966
4967 case DIV:
4968 if (TREE_CODE (type) != REAL_TYPE)
4969 t = lang_hooks.types.signed_type (type);
4970 else
4971 t = type;
4972
4973 return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
4974 make_tree (t, XEXP (x, 0)),
4975 make_tree (t, XEXP (x, 1))));
4976 case UDIV:
4977 t = lang_hooks.types.unsigned_type (type);
4978 return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
4979 make_tree (t, XEXP (x, 0)),
4980 make_tree (t, XEXP (x, 1))));
4981
4982 case SIGN_EXTEND:
4983 case ZERO_EXTEND:
4984 t = lang_hooks.types.type_for_mode (GET_MODE (XEXP (x, 0)),
4985 GET_CODE (x) == ZERO_EXTEND);
4986 return fold_convert (type, make_tree (t, XEXP (x, 0)));
4987
4988 default:
4989 t = build_decl (VAR_DECL, NULL_TREE, type);
4990
4991 /* If TYPE is a POINTER_TYPE, X might be Pmode with TYPE_MODE being
4992 ptr_mode. So convert. */
4993 if (POINTER_TYPE_P (type))
4994 x = convert_memory_address (TYPE_MODE (type), x);
4995
4996 /* Note that we do *not* use SET_DECL_RTL here, because we do not
4997 want set_decl_rtl to go adjusting REG_ATTRS for this temporary. */
4998 t->decl_with_rtl.rtl = x;
4999
5000 return t;
5001 }
5002 }
5003
5004 /* Check whether the multiplication X * MULT + ADD overflows.
5005 X, MULT and ADD must be CONST_*.
5006 MODE is the machine mode for the computation.
5007 X and MULT must have mode MODE. ADD may have a different mode.
5008 So can X (defaults to same as MODE).
5009 UNSIGNEDP is nonzero to do unsigned multiplication. */
5010
5011 bool
5012 const_mult_add_overflow_p (rtx x, rtx mult, rtx add,
5013 enum machine_mode mode, int unsignedp)
5014 {
5015 tree type, mult_type, add_type, result;
5016
5017 type = lang_hooks.types.type_for_mode (mode, unsignedp);
5018
5019 /* In order to get a proper overflow indication from an unsigned
5020 type, we have to pretend that it's a sizetype. */
5021 mult_type = type;
5022 if (unsignedp)
5023 {
5024 /* FIXME:It would be nice if we could step directly from this
5025 type to its sizetype equivalent. */
5026 mult_type = build_distinct_type_copy (type);
5027 TYPE_IS_SIZETYPE (mult_type) = 1;
5028 }
5029
5030 add_type = (GET_MODE (add) == VOIDmode ? mult_type
5031 : lang_hooks.types.type_for_mode (GET_MODE (add), unsignedp));
5032
5033 result = fold_build2 (PLUS_EXPR, mult_type,
5034 fold_build2 (MULT_EXPR, mult_type,
5035 make_tree (mult_type, x),
5036 make_tree (mult_type, mult)),
5037 make_tree (add_type, add));
5038
5039 return TREE_CONSTANT_OVERFLOW (result);
5040 }
5041
5042 /* Return an rtx representing the value of X * MULT + ADD.
5043 TARGET is a suggestion for where to store the result (an rtx).
5044 MODE is the machine mode for the computation.
5045 X and MULT must have mode MODE. ADD may have a different mode.
5046 So can X (defaults to same as MODE).
5047 UNSIGNEDP is nonzero to do unsigned multiplication.
5048 This may emit insns. */
5049
5050 rtx
5051 expand_mult_add (rtx x, rtx target, rtx mult, rtx add, enum machine_mode mode,
5052 int unsignedp)
5053 {
5054 tree type = lang_hooks.types.type_for_mode (mode, unsignedp);
5055 tree add_type = (GET_MODE (add) == VOIDmode
5056 ? type: lang_hooks.types.type_for_mode (GET_MODE (add),
5057 unsignedp));
5058 tree result = fold_build2 (PLUS_EXPR, type,
5059 fold_build2 (MULT_EXPR, type,
5060 make_tree (type, x),
5061 make_tree (type, mult)),
5062 make_tree (add_type, add));
5063
5064 return expand_expr (result, target, VOIDmode, 0);
5065 }
5066 \f
5067 /* Compute the logical-and of OP0 and OP1, storing it in TARGET
5068 and returning TARGET.
5069
5070 If TARGET is 0, a pseudo-register or constant is returned. */
5071
5072 rtx
5073 expand_and (enum machine_mode mode, rtx op0, rtx op1, rtx target)
5074 {
5075 rtx tem = 0;
5076
5077 if (GET_MODE (op0) == VOIDmode && GET_MODE (op1) == VOIDmode)
5078 tem = simplify_binary_operation (AND, mode, op0, op1);
5079 if (tem == 0)
5080 tem = expand_binop (mode, and_optab, op0, op1, target, 0, OPTAB_LIB_WIDEN);
5081
5082 if (target == 0)
5083 target = tem;
5084 else if (tem != target)
5085 emit_move_insn (target, tem);
5086 return target;
5087 }
5088 \f
5089 /* Emit a store-flags instruction for comparison CODE on OP0 and OP1
5090 and storing in TARGET. Normally return TARGET.
5091 Return 0 if that cannot be done.
5092
5093 MODE is the mode to use for OP0 and OP1 should they be CONST_INTs. If
5094 it is VOIDmode, they cannot both be CONST_INT.
5095
5096 UNSIGNEDP is for the case where we have to widen the operands
5097 to perform the operation. It says to use zero-extension.
5098
5099 NORMALIZEP is 1 if we should convert the result to be either zero
5100 or one. Normalize is -1 if we should convert the result to be
5101 either zero or -1. If NORMALIZEP is zero, the result will be left
5102 "raw" out of the scc insn. */
5103
5104 rtx
5105 emit_store_flag (rtx target, enum rtx_code code, rtx op0, rtx op1,
5106 enum machine_mode mode, int unsignedp, int normalizep)
5107 {
5108 rtx subtarget;
5109 enum insn_code icode;
5110 enum machine_mode compare_mode;
5111 enum machine_mode target_mode = GET_MODE (target);
5112 rtx tem;
5113 rtx last = get_last_insn ();
5114 rtx pattern, comparison;
5115
5116 if (unsignedp)
5117 code = unsigned_condition (code);
5118
5119 /* If one operand is constant, make it the second one. Only do this
5120 if the other operand is not constant as well. */
5121
5122 if (swap_commutative_operands_p (op0, op1))
5123 {
5124 tem = op0;
5125 op0 = op1;
5126 op1 = tem;
5127 code = swap_condition (code);
5128 }
5129
5130 if (mode == VOIDmode)
5131 mode = GET_MODE (op0);
5132
5133 /* For some comparisons with 1 and -1, we can convert this to
5134 comparisons with zero. This will often produce more opportunities for
5135 store-flag insns. */
5136
5137 switch (code)
5138 {
5139 case LT:
5140 if (op1 == const1_rtx)
5141 op1 = const0_rtx, code = LE;
5142 break;
5143 case LE:
5144 if (op1 == constm1_rtx)
5145 op1 = const0_rtx, code = LT;
5146 break;
5147 case GE:
5148 if (op1 == const1_rtx)
5149 op1 = const0_rtx, code = GT;
5150 break;
5151 case GT:
5152 if (op1 == constm1_rtx)
5153 op1 = const0_rtx, code = GE;
5154 break;
5155 case GEU:
5156 if (op1 == const1_rtx)
5157 op1 = const0_rtx, code = NE;
5158 break;
5159 case LTU:
5160 if (op1 == const1_rtx)
5161 op1 = const0_rtx, code = EQ;
5162 break;
5163 default:
5164 break;
5165 }
5166
5167 /* If we are comparing a double-word integer with zero or -1, we can
5168 convert the comparison into one involving a single word. */
5169 if (GET_MODE_BITSIZE (mode) == BITS_PER_WORD * 2
5170 && GET_MODE_CLASS (mode) == MODE_INT
5171 && (!MEM_P (op0) || ! MEM_VOLATILE_P (op0)))
5172 {
5173 if ((code == EQ || code == NE)
5174 && (op1 == const0_rtx || op1 == constm1_rtx))
5175 {
5176 rtx op00, op01, op0both;
5177
5178 /* Do a logical OR or AND of the two words and compare the result. */
5179 op00 = simplify_gen_subreg (word_mode, op0, mode, 0);
5180 op01 = simplify_gen_subreg (word_mode, op0, mode, UNITS_PER_WORD);
5181 op0both = expand_binop (word_mode,
5182 op1 == const0_rtx ? ior_optab : and_optab,
5183 op00, op01, NULL_RTX, unsignedp, OPTAB_DIRECT);
5184
5185 if (op0both != 0)
5186 return emit_store_flag (target, code, op0both, op1, word_mode,
5187 unsignedp, normalizep);
5188 }
5189 else if ((code == LT || code == GE) && op1 == const0_rtx)
5190 {
5191 rtx op0h;
5192
5193 /* If testing the sign bit, can just test on high word. */
5194 op0h = simplify_gen_subreg (word_mode, op0, mode,
5195 subreg_highpart_offset (word_mode, mode));
5196 return emit_store_flag (target, code, op0h, op1, word_mode,
5197 unsignedp, normalizep);
5198 }
5199 }
5200
5201 /* From now on, we won't change CODE, so set ICODE now. */
5202 icode = setcc_gen_code[(int) code];
5203
5204 /* If this is A < 0 or A >= 0, we can do this by taking the ones
5205 complement of A (for GE) and shifting the sign bit to the low bit. */
5206 if (op1 == const0_rtx && (code == LT || code == GE)
5207 && GET_MODE_CLASS (mode) == MODE_INT
5208 && (normalizep || STORE_FLAG_VALUE == 1
5209 || (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT
5210 && ((STORE_FLAG_VALUE & GET_MODE_MASK (mode))
5211 == (unsigned HOST_WIDE_INT) 1 << (GET_MODE_BITSIZE (mode) - 1)))))
5212 {
5213 subtarget = target;
5214
5215 /* If the result is to be wider than OP0, it is best to convert it
5216 first. If it is to be narrower, it is *incorrect* to convert it
5217 first. */
5218 if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (mode))
5219 {
5220 op0 = convert_modes (target_mode, mode, op0, 0);
5221 mode = target_mode;
5222 }
5223
5224 if (target_mode != mode)
5225 subtarget = 0;
5226
5227 if (code == GE)
5228 op0 = expand_unop (mode, one_cmpl_optab, op0,
5229 ((STORE_FLAG_VALUE == 1 || normalizep)
5230 ? 0 : subtarget), 0);
5231
5232 if (STORE_FLAG_VALUE == 1 || normalizep)
5233 /* If we are supposed to produce a 0/1 value, we want to do
5234 a logical shift from the sign bit to the low-order bit; for
5235 a -1/0 value, we do an arithmetic shift. */
5236 op0 = expand_shift (RSHIFT_EXPR, mode, op0,
5237 size_int (GET_MODE_BITSIZE (mode) - 1),
5238 subtarget, normalizep != -1);
5239
5240 if (mode != target_mode)
5241 op0 = convert_modes (target_mode, mode, op0, 0);
5242
5243 return op0;
5244 }
5245
5246 if (icode != CODE_FOR_nothing)
5247 {
5248 insn_operand_predicate_fn pred;
5249
5250 /* We think we may be able to do this with a scc insn. Emit the
5251 comparison and then the scc insn. */
5252
5253 do_pending_stack_adjust ();
5254 last = get_last_insn ();
5255
5256 comparison
5257 = compare_from_rtx (op0, op1, code, unsignedp, mode, NULL_RTX);
5258 if (CONSTANT_P (comparison))
5259 {
5260 switch (GET_CODE (comparison))
5261 {
5262 case CONST_INT:
5263 if (comparison == const0_rtx)
5264 return const0_rtx;
5265 break;
5266
5267 #ifdef FLOAT_STORE_FLAG_VALUE
5268 case CONST_DOUBLE:
5269 if (comparison == CONST0_RTX (GET_MODE (comparison)))
5270 return const0_rtx;
5271 break;
5272 #endif
5273 default:
5274 gcc_unreachable ();
5275 }
5276
5277 if (normalizep == 1)
5278 return const1_rtx;
5279 if (normalizep == -1)
5280 return constm1_rtx;
5281 return const_true_rtx;
5282 }
5283
5284 /* The code of COMPARISON may not match CODE if compare_from_rtx
5285 decided to swap its operands and reverse the original code.
5286
5287 We know that compare_from_rtx returns either a CONST_INT or
5288 a new comparison code, so it is safe to just extract the
5289 code from COMPARISON. */
5290 code = GET_CODE (comparison);
5291
5292 /* Get a reference to the target in the proper mode for this insn. */
5293 compare_mode = insn_data[(int) icode].operand[0].mode;
5294 subtarget = target;
5295 pred = insn_data[(int) icode].operand[0].predicate;
5296 if (optimize || ! (*pred) (subtarget, compare_mode))
5297 subtarget = gen_reg_rtx (compare_mode);
5298
5299 pattern = GEN_FCN (icode) (subtarget);
5300 if (pattern)
5301 {
5302 emit_insn (pattern);
5303
5304 /* If we are converting to a wider mode, first convert to
5305 TARGET_MODE, then normalize. This produces better combining
5306 opportunities on machines that have a SIGN_EXTRACT when we are
5307 testing a single bit. This mostly benefits the 68k.
5308
5309 If STORE_FLAG_VALUE does not have the sign bit set when
5310 interpreted in COMPARE_MODE, we can do this conversion as
5311 unsigned, which is usually more efficient. */
5312 if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (compare_mode))
5313 {
5314 convert_move (target, subtarget,
5315 (GET_MODE_BITSIZE (compare_mode)
5316 <= HOST_BITS_PER_WIDE_INT)
5317 && 0 == (STORE_FLAG_VALUE
5318 & ((HOST_WIDE_INT) 1
5319 << (GET_MODE_BITSIZE (compare_mode) -1))));
5320 op0 = target;
5321 compare_mode = target_mode;
5322 }
5323 else
5324 op0 = subtarget;
5325
5326 /* If we want to keep subexpressions around, don't reuse our
5327 last target. */
5328
5329 if (optimize)
5330 subtarget = 0;
5331
5332 /* Now normalize to the proper value in COMPARE_MODE. Sometimes
5333 we don't have to do anything. */
5334 if (normalizep == 0 || normalizep == STORE_FLAG_VALUE)
5335 ;
5336 /* STORE_FLAG_VALUE might be the most negative number, so write
5337 the comparison this way to avoid a compiler-time warning. */
5338 else if (- normalizep == STORE_FLAG_VALUE)
5339 op0 = expand_unop (compare_mode, neg_optab, op0, subtarget, 0);
5340
5341 /* We don't want to use STORE_FLAG_VALUE < 0 below since this
5342 makes it hard to use a value of just the sign bit due to
5343 ANSI integer constant typing rules. */
5344 else if (GET_MODE_BITSIZE (compare_mode) <= HOST_BITS_PER_WIDE_INT
5345 && (STORE_FLAG_VALUE
5346 & ((HOST_WIDE_INT) 1
5347 << (GET_MODE_BITSIZE (compare_mode) - 1))))
5348 op0 = expand_shift (RSHIFT_EXPR, compare_mode, op0,
5349 size_int (GET_MODE_BITSIZE (compare_mode) - 1),
5350 subtarget, normalizep == 1);
5351 else
5352 {
5353 gcc_assert (STORE_FLAG_VALUE & 1);
5354
5355 op0 = expand_and (compare_mode, op0, const1_rtx, subtarget);
5356 if (normalizep == -1)
5357 op0 = expand_unop (compare_mode, neg_optab, op0, op0, 0);
5358 }
5359
5360 /* If we were converting to a smaller mode, do the
5361 conversion now. */
5362 if (target_mode != compare_mode)
5363 {
5364 convert_move (target, op0, 0);
5365 return target;
5366 }
5367 else
5368 return op0;
5369 }
5370 }
5371
5372 delete_insns_since (last);
5373
5374 /* If optimizing, use different pseudo registers for each insn, instead
5375 of reusing the same pseudo. This leads to better CSE, but slows
5376 down the compiler, since there are more pseudos */
5377 subtarget = (!optimize
5378 && (target_mode == mode)) ? target : NULL_RTX;
5379
5380 /* If we reached here, we can't do this with a scc insn. However, there
5381 are some comparisons that can be done directly. For example, if
5382 this is an equality comparison of integers, we can try to exclusive-or
5383 (or subtract) the two operands and use a recursive call to try the
5384 comparison with zero. Don't do any of these cases if branches are
5385 very cheap. */
5386
5387 if (BRANCH_COST > 0
5388 && GET_MODE_CLASS (mode) == MODE_INT && (code == EQ || code == NE)
5389 && op1 != const0_rtx)
5390 {
5391 tem = expand_binop (mode, xor_optab, op0, op1, subtarget, 1,
5392 OPTAB_WIDEN);
5393
5394 if (tem == 0)
5395 tem = expand_binop (mode, sub_optab, op0, op1, subtarget, 1,
5396 OPTAB_WIDEN);
5397 if (tem != 0)
5398 tem = emit_store_flag (target, code, tem, const0_rtx,
5399 mode, unsignedp, normalizep);
5400 if (tem == 0)
5401 delete_insns_since (last);
5402 return tem;
5403 }
5404
5405 /* Some other cases we can do are EQ, NE, LE, and GT comparisons with
5406 the constant zero. Reject all other comparisons at this point. Only
5407 do LE and GT if branches are expensive since they are expensive on
5408 2-operand machines. */
5409
5410 if (BRANCH_COST == 0
5411 || GET_MODE_CLASS (mode) != MODE_INT || op1 != const0_rtx
5412 || (code != EQ && code != NE
5413 && (BRANCH_COST <= 1 || (code != LE && code != GT))))
5414 return 0;
5415
5416 /* See what we need to return. We can only return a 1, -1, or the
5417 sign bit. */
5418
5419 if (normalizep == 0)
5420 {
5421 if (STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1)
5422 normalizep = STORE_FLAG_VALUE;
5423
5424 else if (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT
5425 && ((STORE_FLAG_VALUE & GET_MODE_MASK (mode))
5426 == (unsigned HOST_WIDE_INT) 1 << (GET_MODE_BITSIZE (mode) - 1)))
5427 ;
5428 else
5429 return 0;
5430 }
5431
5432 /* Try to put the result of the comparison in the sign bit. Assume we can't
5433 do the necessary operation below. */
5434
5435 tem = 0;
5436
5437 /* To see if A <= 0, compute (A | (A - 1)). A <= 0 iff that result has
5438 the sign bit set. */
5439
5440 if (code == LE)
5441 {
5442 /* This is destructive, so SUBTARGET can't be OP0. */
5443 if (rtx_equal_p (subtarget, op0))
5444 subtarget = 0;
5445
5446 tem = expand_binop (mode, sub_optab, op0, const1_rtx, subtarget, 0,
5447 OPTAB_WIDEN);
5448 if (tem)
5449 tem = expand_binop (mode, ior_optab, op0, tem, subtarget, 0,
5450 OPTAB_WIDEN);
5451 }
5452
5453 /* To see if A > 0, compute (((signed) A) << BITS) - A, where BITS is the
5454 number of bits in the mode of OP0, minus one. */
5455
5456 if (code == GT)
5457 {
5458 if (rtx_equal_p (subtarget, op0))
5459 subtarget = 0;
5460
5461 tem = expand_shift (RSHIFT_EXPR, mode, op0,
5462 size_int (GET_MODE_BITSIZE (mode) - 1),
5463 subtarget, 0);
5464 tem = expand_binop (mode, sub_optab, tem, op0, subtarget, 0,
5465 OPTAB_WIDEN);
5466 }
5467
5468 if (code == EQ || code == NE)
5469 {
5470 /* For EQ or NE, one way to do the comparison is to apply an operation
5471 that converts the operand into a positive number if it is nonzero
5472 or zero if it was originally zero. Then, for EQ, we subtract 1 and
5473 for NE we negate. This puts the result in the sign bit. Then we
5474 normalize with a shift, if needed.
5475
5476 Two operations that can do the above actions are ABS and FFS, so try
5477 them. If that doesn't work, and MODE is smaller than a full word,
5478 we can use zero-extension to the wider mode (an unsigned conversion)
5479 as the operation. */
5480
5481 /* Note that ABS doesn't yield a positive number for INT_MIN, but
5482 that is compensated by the subsequent overflow when subtracting
5483 one / negating. */
5484
5485 if (abs_optab->handlers[mode].insn_code != CODE_FOR_nothing)
5486 tem = expand_unop (mode, abs_optab, op0, subtarget, 1);
5487 else if (ffs_optab->handlers[mode].insn_code != CODE_FOR_nothing)
5488 tem = expand_unop (mode, ffs_optab, op0, subtarget, 1);
5489 else if (GET_MODE_SIZE (mode) < UNITS_PER_WORD)
5490 {
5491 tem = convert_modes (word_mode, mode, op0, 1);
5492 mode = word_mode;
5493 }
5494
5495 if (tem != 0)
5496 {
5497 if (code == EQ)
5498 tem = expand_binop (mode, sub_optab, tem, const1_rtx, subtarget,
5499 0, OPTAB_WIDEN);
5500 else
5501 tem = expand_unop (mode, neg_optab, tem, subtarget, 0);
5502 }
5503
5504 /* If we couldn't do it that way, for NE we can "or" the two's complement
5505 of the value with itself. For EQ, we take the one's complement of
5506 that "or", which is an extra insn, so we only handle EQ if branches
5507 are expensive. */
5508
5509 if (tem == 0 && (code == NE || BRANCH_COST > 1))
5510 {
5511 if (rtx_equal_p (subtarget, op0))
5512 subtarget = 0;
5513
5514 tem = expand_unop (mode, neg_optab, op0, subtarget, 0);
5515 tem = expand_binop (mode, ior_optab, tem, op0, subtarget, 0,
5516 OPTAB_WIDEN);
5517
5518 if (tem && code == EQ)
5519 tem = expand_unop (mode, one_cmpl_optab, tem, subtarget, 0);
5520 }
5521 }
5522
5523 if (tem && normalizep)
5524 tem = expand_shift (RSHIFT_EXPR, mode, tem,
5525 size_int (GET_MODE_BITSIZE (mode) - 1),
5526 subtarget, normalizep == 1);
5527
5528 if (tem)
5529 {
5530 if (GET_MODE (tem) != target_mode)
5531 {
5532 convert_move (target, tem, 0);
5533 tem = target;
5534 }
5535 else if (!subtarget)
5536 {
5537 emit_move_insn (target, tem);
5538 tem = target;
5539 }
5540 }
5541 else
5542 delete_insns_since (last);
5543
5544 return tem;
5545 }
5546
5547 /* Like emit_store_flag, but always succeeds. */
5548
5549 rtx
5550 emit_store_flag_force (rtx target, enum rtx_code code, rtx op0, rtx op1,
5551 enum machine_mode mode, int unsignedp, int normalizep)
5552 {
5553 rtx tem, label;
5554
5555 /* First see if emit_store_flag can do the job. */
5556 tem = emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep);
5557 if (tem != 0)
5558 return tem;
5559
5560 if (normalizep == 0)
5561 normalizep = 1;
5562
5563 /* If this failed, we have to do this with set/compare/jump/set code. */
5564
5565 if (!REG_P (target)
5566 || reg_mentioned_p (target, op0) || reg_mentioned_p (target, op1))
5567 target = gen_reg_rtx (GET_MODE (target));
5568
5569 emit_move_insn (target, const1_rtx);
5570 label = gen_label_rtx ();
5571 do_compare_rtx_and_jump (op0, op1, code, unsignedp, mode, NULL_RTX,
5572 NULL_RTX, label);
5573
5574 emit_move_insn (target, const0_rtx);
5575 emit_label (label);
5576
5577 return target;
5578 }
5579 \f
5580 /* Perform possibly multi-word comparison and conditional jump to LABEL
5581 if ARG1 OP ARG2 true where ARG1 and ARG2 are of mode MODE
5582
5583 The algorithm is based on the code in expr.c:do_jump.
5584
5585 Note that this does not perform a general comparison. Only
5586 variants generated within expmed.c are correctly handled, others
5587 could be handled if needed. */
5588
5589 static void
5590 do_cmp_and_jump (rtx arg1, rtx arg2, enum rtx_code op, enum machine_mode mode,
5591 rtx label)
5592 {
5593 /* If this mode is an integer too wide to compare properly,
5594 compare word by word. Rely on cse to optimize constant cases. */
5595
5596 if (GET_MODE_CLASS (mode) == MODE_INT
5597 && ! can_compare_p (op, mode, ccp_jump))
5598 {
5599 rtx label2 = gen_label_rtx ();
5600
5601 switch (op)
5602 {
5603 case LTU:
5604 do_jump_by_parts_greater_rtx (mode, 1, arg2, arg1, label2, label);
5605 break;
5606
5607 case LEU:
5608 do_jump_by_parts_greater_rtx (mode, 1, arg1, arg2, label, label2);
5609 break;
5610
5611 case LT:
5612 do_jump_by_parts_greater_rtx (mode, 0, arg2, arg1, label2, label);
5613 break;
5614
5615 case GT:
5616 do_jump_by_parts_greater_rtx (mode, 0, arg1, arg2, label2, label);
5617 break;
5618
5619 case GE:
5620 do_jump_by_parts_greater_rtx (mode, 0, arg2, arg1, label, label2);
5621 break;
5622
5623 /* do_jump_by_parts_equality_rtx compares with zero. Luckily
5624 that's the only equality operations we do */
5625 case EQ:
5626 gcc_assert (arg2 == const0_rtx && mode == GET_MODE(arg1));
5627 do_jump_by_parts_equality_rtx (arg1, label2, label);
5628 break;
5629
5630 case NE:
5631 gcc_assert (arg2 == const0_rtx && mode == GET_MODE(arg1));
5632 do_jump_by_parts_equality_rtx (arg1, label, label2);
5633 break;
5634
5635 default:
5636 gcc_unreachable ();
5637 }
5638
5639 emit_label (label2);
5640 }
5641 else
5642 emit_cmp_and_jump_insns (arg1, arg2, op, NULL_RTX, mode, 0, label);
5643 }