optabs.c (expand_abs_nojump): Update BRANCH_COST call.
[gcc.git] / gcc / expmed.c
1 /* Medium-level subroutines: convert bit-field store and extract
2 and shifts, multiplies and divides to rtl instructions.
3 Copyright (C) 1987, 1988, 1989, 1992, 1993, 1994, 1995, 1996, 1997, 1998,
4 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007
5 Free Software Foundation, Inc.
6
7 This file is part of GCC.
8
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
12 version.
13
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
22
23
24 #include "config.h"
25 #include "system.h"
26 #include "coretypes.h"
27 #include "tm.h"
28 #include "toplev.h"
29 #include "rtl.h"
30 #include "tree.h"
31 #include "tm_p.h"
32 #include "flags.h"
33 #include "insn-config.h"
34 #include "expr.h"
35 #include "optabs.h"
36 #include "real.h"
37 #include "recog.h"
38 #include "langhooks.h"
39 #include "df.h"
40 #include "target.h"
41
42 static void store_fixed_bit_field (rtx, unsigned HOST_WIDE_INT,
43 unsigned HOST_WIDE_INT,
44 unsigned HOST_WIDE_INT, rtx);
45 static void store_split_bit_field (rtx, unsigned HOST_WIDE_INT,
46 unsigned HOST_WIDE_INT, rtx);
47 static rtx extract_fixed_bit_field (enum machine_mode, rtx,
48 unsigned HOST_WIDE_INT,
49 unsigned HOST_WIDE_INT,
50 unsigned HOST_WIDE_INT, rtx, int);
51 static rtx mask_rtx (enum machine_mode, int, int, int);
52 static rtx lshift_value (enum machine_mode, rtx, int, int);
53 static rtx extract_split_bit_field (rtx, unsigned HOST_WIDE_INT,
54 unsigned HOST_WIDE_INT, int);
55 static void do_cmp_and_jump (rtx, rtx, enum rtx_code, enum machine_mode, rtx);
56 static rtx expand_smod_pow2 (enum machine_mode, rtx, HOST_WIDE_INT);
57 static rtx expand_sdiv_pow2 (enum machine_mode, rtx, HOST_WIDE_INT);
58
59 /* Test whether a value is zero of a power of two. */
60 #define EXACT_POWER_OF_2_OR_ZERO_P(x) (((x) & ((x) - 1)) == 0)
61
62 /* Nonzero means divides or modulus operations are relatively cheap for
63 powers of two, so don't use branches; emit the operation instead.
64 Usually, this will mean that the MD file will emit non-branch
65 sequences. */
66
67 static bool sdiv_pow2_cheap[NUM_MACHINE_MODES];
68 static bool smod_pow2_cheap[NUM_MACHINE_MODES];
69
70 #ifndef SLOW_UNALIGNED_ACCESS
71 #define SLOW_UNALIGNED_ACCESS(MODE, ALIGN) STRICT_ALIGNMENT
72 #endif
73
74 /* For compilers that support multiple targets with different word sizes,
75 MAX_BITS_PER_WORD contains the biggest value of BITS_PER_WORD. An example
76 is the H8/300(H) compiler. */
77
78 #ifndef MAX_BITS_PER_WORD
79 #define MAX_BITS_PER_WORD BITS_PER_WORD
80 #endif
81
82 /* Reduce conditional compilation elsewhere. */
83 #ifndef HAVE_insv
84 #define HAVE_insv 0
85 #define CODE_FOR_insv CODE_FOR_nothing
86 #define gen_insv(a,b,c,d) NULL_RTX
87 #endif
88 #ifndef HAVE_extv
89 #define HAVE_extv 0
90 #define CODE_FOR_extv CODE_FOR_nothing
91 #define gen_extv(a,b,c,d) NULL_RTX
92 #endif
93 #ifndef HAVE_extzv
94 #define HAVE_extzv 0
95 #define CODE_FOR_extzv CODE_FOR_nothing
96 #define gen_extzv(a,b,c,d) NULL_RTX
97 #endif
98
99 /* Cost of various pieces of RTL. Note that some of these are indexed by
100 shift count and some by mode. */
101 static int zero_cost;
102 static int add_cost[NUM_MACHINE_MODES];
103 static int neg_cost[NUM_MACHINE_MODES];
104 static int shift_cost[NUM_MACHINE_MODES][MAX_BITS_PER_WORD];
105 static int shiftadd_cost[NUM_MACHINE_MODES][MAX_BITS_PER_WORD];
106 static int shiftsub_cost[NUM_MACHINE_MODES][MAX_BITS_PER_WORD];
107 static int mul_cost[NUM_MACHINE_MODES];
108 static int sdiv_cost[NUM_MACHINE_MODES];
109 static int udiv_cost[NUM_MACHINE_MODES];
110 static int mul_widen_cost[NUM_MACHINE_MODES];
111 static int mul_highpart_cost[NUM_MACHINE_MODES];
112
113 void
114 init_expmed (void)
115 {
116 struct
117 {
118 struct rtx_def reg; rtunion reg_fld[2];
119 struct rtx_def plus; rtunion plus_fld1;
120 struct rtx_def neg;
121 struct rtx_def mult; rtunion mult_fld1;
122 struct rtx_def sdiv; rtunion sdiv_fld1;
123 struct rtx_def udiv; rtunion udiv_fld1;
124 struct rtx_def zext;
125 struct rtx_def sdiv_32; rtunion sdiv_32_fld1;
126 struct rtx_def smod_32; rtunion smod_32_fld1;
127 struct rtx_def wide_mult; rtunion wide_mult_fld1;
128 struct rtx_def wide_lshr; rtunion wide_lshr_fld1;
129 struct rtx_def wide_trunc;
130 struct rtx_def shift; rtunion shift_fld1;
131 struct rtx_def shift_mult; rtunion shift_mult_fld1;
132 struct rtx_def shift_add; rtunion shift_add_fld1;
133 struct rtx_def shift_sub; rtunion shift_sub_fld1;
134 } all;
135
136 rtx pow2[MAX_BITS_PER_WORD];
137 rtx cint[MAX_BITS_PER_WORD];
138 int m, n;
139 enum machine_mode mode, wider_mode;
140
141 zero_cost = rtx_cost (const0_rtx, 0);
142
143 for (m = 1; m < MAX_BITS_PER_WORD; m++)
144 {
145 pow2[m] = GEN_INT ((HOST_WIDE_INT) 1 << m);
146 cint[m] = GEN_INT (m);
147 }
148
149 memset (&all, 0, sizeof all);
150
151 PUT_CODE (&all.reg, REG);
152 /* Avoid using hard regs in ways which may be unsupported. */
153 SET_REGNO (&all.reg, LAST_VIRTUAL_REGISTER + 1);
154
155 PUT_CODE (&all.plus, PLUS);
156 XEXP (&all.plus, 0) = &all.reg;
157 XEXP (&all.plus, 1) = &all.reg;
158
159 PUT_CODE (&all.neg, NEG);
160 XEXP (&all.neg, 0) = &all.reg;
161
162 PUT_CODE (&all.mult, MULT);
163 XEXP (&all.mult, 0) = &all.reg;
164 XEXP (&all.mult, 1) = &all.reg;
165
166 PUT_CODE (&all.sdiv, DIV);
167 XEXP (&all.sdiv, 0) = &all.reg;
168 XEXP (&all.sdiv, 1) = &all.reg;
169
170 PUT_CODE (&all.udiv, UDIV);
171 XEXP (&all.udiv, 0) = &all.reg;
172 XEXP (&all.udiv, 1) = &all.reg;
173
174 PUT_CODE (&all.sdiv_32, DIV);
175 XEXP (&all.sdiv_32, 0) = &all.reg;
176 XEXP (&all.sdiv_32, 1) = 32 < MAX_BITS_PER_WORD ? cint[32] : GEN_INT (32);
177
178 PUT_CODE (&all.smod_32, MOD);
179 XEXP (&all.smod_32, 0) = &all.reg;
180 XEXP (&all.smod_32, 1) = XEXP (&all.sdiv_32, 1);
181
182 PUT_CODE (&all.zext, ZERO_EXTEND);
183 XEXP (&all.zext, 0) = &all.reg;
184
185 PUT_CODE (&all.wide_mult, MULT);
186 XEXP (&all.wide_mult, 0) = &all.zext;
187 XEXP (&all.wide_mult, 1) = &all.zext;
188
189 PUT_CODE (&all.wide_lshr, LSHIFTRT);
190 XEXP (&all.wide_lshr, 0) = &all.wide_mult;
191
192 PUT_CODE (&all.wide_trunc, TRUNCATE);
193 XEXP (&all.wide_trunc, 0) = &all.wide_lshr;
194
195 PUT_CODE (&all.shift, ASHIFT);
196 XEXP (&all.shift, 0) = &all.reg;
197
198 PUT_CODE (&all.shift_mult, MULT);
199 XEXP (&all.shift_mult, 0) = &all.reg;
200
201 PUT_CODE (&all.shift_add, PLUS);
202 XEXP (&all.shift_add, 0) = &all.shift_mult;
203 XEXP (&all.shift_add, 1) = &all.reg;
204
205 PUT_CODE (&all.shift_sub, MINUS);
206 XEXP (&all.shift_sub, 0) = &all.shift_mult;
207 XEXP (&all.shift_sub, 1) = &all.reg;
208
209 for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT);
210 mode != VOIDmode;
211 mode = GET_MODE_WIDER_MODE (mode))
212 {
213 PUT_MODE (&all.reg, mode);
214 PUT_MODE (&all.plus, mode);
215 PUT_MODE (&all.neg, mode);
216 PUT_MODE (&all.mult, mode);
217 PUT_MODE (&all.sdiv, mode);
218 PUT_MODE (&all.udiv, mode);
219 PUT_MODE (&all.sdiv_32, mode);
220 PUT_MODE (&all.smod_32, mode);
221 PUT_MODE (&all.wide_trunc, mode);
222 PUT_MODE (&all.shift, mode);
223 PUT_MODE (&all.shift_mult, mode);
224 PUT_MODE (&all.shift_add, mode);
225 PUT_MODE (&all.shift_sub, mode);
226
227 add_cost[mode] = rtx_cost (&all.plus, SET);
228 neg_cost[mode] = rtx_cost (&all.neg, SET);
229 mul_cost[mode] = rtx_cost (&all.mult, SET);
230 sdiv_cost[mode] = rtx_cost (&all.sdiv, SET);
231 udiv_cost[mode] = rtx_cost (&all.udiv, SET);
232
233 sdiv_pow2_cheap[mode] = (rtx_cost (&all.sdiv_32, SET)
234 <= 2 * add_cost[mode]);
235 smod_pow2_cheap[mode] = (rtx_cost (&all.smod_32, SET)
236 <= 4 * add_cost[mode]);
237
238 wider_mode = GET_MODE_WIDER_MODE (mode);
239 if (wider_mode != VOIDmode)
240 {
241 PUT_MODE (&all.zext, wider_mode);
242 PUT_MODE (&all.wide_mult, wider_mode);
243 PUT_MODE (&all.wide_lshr, wider_mode);
244 XEXP (&all.wide_lshr, 1) = GEN_INT (GET_MODE_BITSIZE (mode));
245
246 mul_widen_cost[wider_mode] = rtx_cost (&all.wide_mult, SET);
247 mul_highpart_cost[mode] = rtx_cost (&all.wide_trunc, SET);
248 }
249
250 shift_cost[mode][0] = 0;
251 shiftadd_cost[mode][0] = shiftsub_cost[mode][0] = add_cost[mode];
252
253 n = MIN (MAX_BITS_PER_WORD, GET_MODE_BITSIZE (mode));
254 for (m = 1; m < n; m++)
255 {
256 XEXP (&all.shift, 1) = cint[m];
257 XEXP (&all.shift_mult, 1) = pow2[m];
258
259 shift_cost[mode][m] = rtx_cost (&all.shift, SET);
260 shiftadd_cost[mode][m] = rtx_cost (&all.shift_add, SET);
261 shiftsub_cost[mode][m] = rtx_cost (&all.shift_sub, SET);
262 }
263 }
264 }
265
266 /* Return an rtx representing minus the value of X.
267 MODE is the intended mode of the result,
268 useful if X is a CONST_INT. */
269
270 rtx
271 negate_rtx (enum machine_mode mode, rtx x)
272 {
273 rtx result = simplify_unary_operation (NEG, mode, x, mode);
274
275 if (result == 0)
276 result = expand_unop (mode, neg_optab, x, NULL_RTX, 0);
277
278 return result;
279 }
280
281 /* Report on the availability of insv/extv/extzv and the desired mode
282 of each of their operands. Returns MAX_MACHINE_MODE if HAVE_foo
283 is false; else the mode of the specified operand. If OPNO is -1,
284 all the caller cares about is whether the insn is available. */
285 enum machine_mode
286 mode_for_extraction (enum extraction_pattern pattern, int opno)
287 {
288 const struct insn_data *data;
289
290 switch (pattern)
291 {
292 case EP_insv:
293 if (HAVE_insv)
294 {
295 data = &insn_data[CODE_FOR_insv];
296 break;
297 }
298 return MAX_MACHINE_MODE;
299
300 case EP_extv:
301 if (HAVE_extv)
302 {
303 data = &insn_data[CODE_FOR_extv];
304 break;
305 }
306 return MAX_MACHINE_MODE;
307
308 case EP_extzv:
309 if (HAVE_extzv)
310 {
311 data = &insn_data[CODE_FOR_extzv];
312 break;
313 }
314 return MAX_MACHINE_MODE;
315
316 default:
317 gcc_unreachable ();
318 }
319
320 if (opno == -1)
321 return VOIDmode;
322
323 /* Everyone who uses this function used to follow it with
324 if (result == VOIDmode) result = word_mode; */
325 if (data->operand[opno].mode == VOIDmode)
326 return word_mode;
327 return data->operand[opno].mode;
328 }
329
330 /* Return true if X, of mode MODE, matches the predicate for operand
331 OPNO of instruction ICODE. Allow volatile memories, regardless of
332 the ambient volatile_ok setting. */
333
334 static bool
335 check_predicate_volatile_ok (enum insn_code icode, int opno,
336 rtx x, enum machine_mode mode)
337 {
338 bool save_volatile_ok, result;
339
340 save_volatile_ok = volatile_ok;
341 result = insn_data[(int) icode].operand[opno].predicate (x, mode);
342 volatile_ok = save_volatile_ok;
343 return result;
344 }
345 \f
346 /* A subroutine of store_bit_field, with the same arguments. Return true
347 if the operation could be implemented.
348
349 If FALLBACK_P is true, fall back to store_fixed_bit_field if we have
350 no other way of implementing the operation. If FALLBACK_P is false,
351 return false instead. */
352
353 static bool
354 store_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
355 unsigned HOST_WIDE_INT bitnum, enum machine_mode fieldmode,
356 rtx value, bool fallback_p)
357 {
358 unsigned int unit
359 = (MEM_P (str_rtx)) ? BITS_PER_UNIT : BITS_PER_WORD;
360 unsigned HOST_WIDE_INT offset, bitpos;
361 rtx op0 = str_rtx;
362 int byte_offset;
363 rtx orig_value;
364
365 enum machine_mode op_mode = mode_for_extraction (EP_insv, 3);
366
367 while (GET_CODE (op0) == SUBREG)
368 {
369 /* The following line once was done only if WORDS_BIG_ENDIAN,
370 but I think that is a mistake. WORDS_BIG_ENDIAN is
371 meaningful at a much higher level; when structures are copied
372 between memory and regs, the higher-numbered regs
373 always get higher addresses. */
374 int inner_mode_size = GET_MODE_SIZE (GET_MODE (SUBREG_REG (op0)));
375 int outer_mode_size = GET_MODE_SIZE (GET_MODE (op0));
376
377 byte_offset = 0;
378
379 /* Paradoxical subregs need special handling on big endian machines. */
380 if (SUBREG_BYTE (op0) == 0 && inner_mode_size < outer_mode_size)
381 {
382 int difference = inner_mode_size - outer_mode_size;
383
384 if (WORDS_BIG_ENDIAN)
385 byte_offset += (difference / UNITS_PER_WORD) * UNITS_PER_WORD;
386 if (BYTES_BIG_ENDIAN)
387 byte_offset += difference % UNITS_PER_WORD;
388 }
389 else
390 byte_offset = SUBREG_BYTE (op0);
391
392 bitnum += byte_offset * BITS_PER_UNIT;
393 op0 = SUBREG_REG (op0);
394 }
395
396 /* No action is needed if the target is a register and if the field
397 lies completely outside that register. This can occur if the source
398 code contains an out-of-bounds access to a small array. */
399 if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
400 return true;
401
402 /* Use vec_set patterns for inserting parts of vectors whenever
403 available. */
404 if (VECTOR_MODE_P (GET_MODE (op0))
405 && !MEM_P (op0)
406 && (optab_handler (vec_set_optab, GET_MODE (op0))->insn_code
407 != CODE_FOR_nothing)
408 && fieldmode == GET_MODE_INNER (GET_MODE (op0))
409 && bitsize == GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
410 && !(bitnum % GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
411 {
412 enum machine_mode outermode = GET_MODE (op0);
413 enum machine_mode innermode = GET_MODE_INNER (outermode);
414 int icode = (int) optab_handler (vec_set_optab, outermode)->insn_code;
415 int pos = bitnum / GET_MODE_BITSIZE (innermode);
416 rtx rtxpos = GEN_INT (pos);
417 rtx src = value;
418 rtx dest = op0;
419 rtx pat, seq;
420 enum machine_mode mode0 = insn_data[icode].operand[0].mode;
421 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
422 enum machine_mode mode2 = insn_data[icode].operand[2].mode;
423
424 start_sequence ();
425
426 if (! (*insn_data[icode].operand[1].predicate) (src, mode1))
427 src = copy_to_mode_reg (mode1, src);
428
429 if (! (*insn_data[icode].operand[2].predicate) (rtxpos, mode2))
430 rtxpos = copy_to_mode_reg (mode1, rtxpos);
431
432 /* We could handle this, but we should always be called with a pseudo
433 for our targets and all insns should take them as outputs. */
434 gcc_assert ((*insn_data[icode].operand[0].predicate) (dest, mode0)
435 && (*insn_data[icode].operand[1].predicate) (src, mode1)
436 && (*insn_data[icode].operand[2].predicate) (rtxpos, mode2));
437 pat = GEN_FCN (icode) (dest, src, rtxpos);
438 seq = get_insns ();
439 end_sequence ();
440 if (pat)
441 {
442 emit_insn (seq);
443 emit_insn (pat);
444 return true;
445 }
446 }
447
448 /* If the target is a register, overwriting the entire object, or storing
449 a full-word or multi-word field can be done with just a SUBREG.
450
451 If the target is memory, storing any naturally aligned field can be
452 done with a simple store. For targets that support fast unaligned
453 memory, any naturally sized, unit aligned field can be done directly. */
454
455 offset = bitnum / unit;
456 bitpos = bitnum % unit;
457 byte_offset = (bitnum % BITS_PER_WORD) / BITS_PER_UNIT
458 + (offset * UNITS_PER_WORD);
459
460 if (bitpos == 0
461 && bitsize == GET_MODE_BITSIZE (fieldmode)
462 && (!MEM_P (op0)
463 ? ((GET_MODE_SIZE (fieldmode) >= UNITS_PER_WORD
464 || GET_MODE_SIZE (GET_MODE (op0)) == GET_MODE_SIZE (fieldmode))
465 && byte_offset % GET_MODE_SIZE (fieldmode) == 0)
466 : (! SLOW_UNALIGNED_ACCESS (fieldmode, MEM_ALIGN (op0))
467 || (offset * BITS_PER_UNIT % bitsize == 0
468 && MEM_ALIGN (op0) % GET_MODE_BITSIZE (fieldmode) == 0))))
469 {
470 if (MEM_P (op0))
471 op0 = adjust_address (op0, fieldmode, offset);
472 else if (GET_MODE (op0) != fieldmode)
473 op0 = simplify_gen_subreg (fieldmode, op0, GET_MODE (op0),
474 byte_offset);
475 emit_move_insn (op0, value);
476 return true;
477 }
478
479 /* Make sure we are playing with integral modes. Pun with subregs
480 if we aren't. This must come after the entire register case above,
481 since that case is valid for any mode. The following cases are only
482 valid for integral modes. */
483 {
484 enum machine_mode imode = int_mode_for_mode (GET_MODE (op0));
485 if (imode != GET_MODE (op0))
486 {
487 if (MEM_P (op0))
488 op0 = adjust_address (op0, imode, 0);
489 else
490 {
491 gcc_assert (imode != BLKmode);
492 op0 = gen_lowpart (imode, op0);
493 }
494 }
495 }
496
497 /* We may be accessing data outside the field, which means
498 we can alias adjacent data. */
499 if (MEM_P (op0))
500 {
501 op0 = shallow_copy_rtx (op0);
502 set_mem_alias_set (op0, 0);
503 set_mem_expr (op0, 0);
504 }
505
506 /* If OP0 is a register, BITPOS must count within a word.
507 But as we have it, it counts within whatever size OP0 now has.
508 On a bigendian machine, these are not the same, so convert. */
509 if (BYTES_BIG_ENDIAN
510 && !MEM_P (op0)
511 && unit > GET_MODE_BITSIZE (GET_MODE (op0)))
512 bitpos += unit - GET_MODE_BITSIZE (GET_MODE (op0));
513
514 /* Storing an lsb-aligned field in a register
515 can be done with a movestrict instruction. */
516
517 if (!MEM_P (op0)
518 && (BYTES_BIG_ENDIAN ? bitpos + bitsize == unit : bitpos == 0)
519 && bitsize == GET_MODE_BITSIZE (fieldmode)
520 && (optab_handler (movstrict_optab, fieldmode)->insn_code
521 != CODE_FOR_nothing))
522 {
523 int icode = optab_handler (movstrict_optab, fieldmode)->insn_code;
524 rtx insn;
525 rtx start = get_last_insn ();
526
527 /* Get appropriate low part of the value being stored. */
528 if (GET_CODE (value) == CONST_INT || REG_P (value))
529 value = gen_lowpart (fieldmode, value);
530 else if (!(GET_CODE (value) == SYMBOL_REF
531 || GET_CODE (value) == LABEL_REF
532 || GET_CODE (value) == CONST))
533 value = convert_to_mode (fieldmode, value, 0);
534
535 if (! (*insn_data[icode].operand[1].predicate) (value, fieldmode))
536 value = copy_to_mode_reg (fieldmode, value);
537
538 if (GET_CODE (op0) == SUBREG)
539 {
540 /* Else we've got some float mode source being extracted into
541 a different float mode destination -- this combination of
542 subregs results in Severe Tire Damage. */
543 gcc_assert (GET_MODE (SUBREG_REG (op0)) == fieldmode
544 || GET_MODE_CLASS (fieldmode) == MODE_INT
545 || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT);
546 op0 = SUBREG_REG (op0);
547 }
548
549 insn = (GEN_FCN (icode)
550 (gen_rtx_SUBREG (fieldmode, op0,
551 (bitnum % BITS_PER_WORD) / BITS_PER_UNIT
552 + (offset * UNITS_PER_WORD)),
553 value));
554 if (insn)
555 {
556 emit_insn (insn);
557 return true;
558 }
559 delete_insns_since (start);
560 }
561
562 /* Handle fields bigger than a word. */
563
564 if (bitsize > BITS_PER_WORD)
565 {
566 /* Here we transfer the words of the field
567 in the order least significant first.
568 This is because the most significant word is the one which may
569 be less than full.
570 However, only do that if the value is not BLKmode. */
571
572 unsigned int backwards = WORDS_BIG_ENDIAN && fieldmode != BLKmode;
573 unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
574 unsigned int i;
575 rtx last;
576
577 /* This is the mode we must force value to, so that there will be enough
578 subwords to extract. Note that fieldmode will often (always?) be
579 VOIDmode, because that is what store_field uses to indicate that this
580 is a bit field, but passing VOIDmode to operand_subword_force
581 is not allowed. */
582 fieldmode = GET_MODE (value);
583 if (fieldmode == VOIDmode)
584 fieldmode = smallest_mode_for_size (nwords * BITS_PER_WORD, MODE_INT);
585
586 last = get_last_insn ();
587 for (i = 0; i < nwords; i++)
588 {
589 /* If I is 0, use the low-order word in both field and target;
590 if I is 1, use the next to lowest word; and so on. */
591 unsigned int wordnum = (backwards ? nwords - i - 1 : i);
592 unsigned int bit_offset = (backwards
593 ? MAX ((int) bitsize - ((int) i + 1)
594 * BITS_PER_WORD,
595 0)
596 : (int) i * BITS_PER_WORD);
597 rtx value_word = operand_subword_force (value, wordnum, fieldmode);
598
599 if (!store_bit_field_1 (op0, MIN (BITS_PER_WORD,
600 bitsize - i * BITS_PER_WORD),
601 bitnum + bit_offset, word_mode,
602 value_word, fallback_p))
603 {
604 delete_insns_since (last);
605 return false;
606 }
607 }
608 return true;
609 }
610
611 /* From here on we can assume that the field to be stored in is
612 a full-word (whatever type that is), since it is shorter than a word. */
613
614 /* OFFSET is the number of words or bytes (UNIT says which)
615 from STR_RTX to the first word or byte containing part of the field. */
616
617 if (!MEM_P (op0))
618 {
619 if (offset != 0
620 || GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
621 {
622 if (!REG_P (op0))
623 {
624 /* Since this is a destination (lvalue), we can't copy
625 it to a pseudo. We can remove a SUBREG that does not
626 change the size of the operand. Such a SUBREG may
627 have been added above. */
628 gcc_assert (GET_CODE (op0) == SUBREG
629 && (GET_MODE_SIZE (GET_MODE (op0))
630 == GET_MODE_SIZE (GET_MODE (SUBREG_REG (op0)))));
631 op0 = SUBREG_REG (op0);
632 }
633 op0 = gen_rtx_SUBREG (mode_for_size (BITS_PER_WORD, MODE_INT, 0),
634 op0, (offset * UNITS_PER_WORD));
635 }
636 offset = 0;
637 }
638
639 /* If VALUE has a floating-point or complex mode, access it as an
640 integer of the corresponding size. This can occur on a machine
641 with 64 bit registers that uses SFmode for float. It can also
642 occur for unaligned float or complex fields. */
643 orig_value = value;
644 if (GET_MODE (value) != VOIDmode
645 && GET_MODE_CLASS (GET_MODE (value)) != MODE_INT
646 && GET_MODE_CLASS (GET_MODE (value)) != MODE_PARTIAL_INT)
647 {
648 value = gen_reg_rtx (int_mode_for_mode (GET_MODE (value)));
649 emit_move_insn (gen_lowpart (GET_MODE (orig_value), value), orig_value);
650 }
651
652 /* Now OFFSET is nonzero only if OP0 is memory
653 and is therefore always measured in bytes. */
654
655 if (HAVE_insv
656 && GET_MODE (value) != BLKmode
657 && bitsize > 0
658 && GET_MODE_BITSIZE (op_mode) >= bitsize
659 && ! ((REG_P (op0) || GET_CODE (op0) == SUBREG)
660 && (bitsize + bitpos > GET_MODE_BITSIZE (op_mode)))
661 && insn_data[CODE_FOR_insv].operand[1].predicate (GEN_INT (bitsize),
662 VOIDmode)
663 && check_predicate_volatile_ok (CODE_FOR_insv, 0, op0, VOIDmode))
664 {
665 int xbitpos = bitpos;
666 rtx value1;
667 rtx xop0 = op0;
668 rtx last = get_last_insn ();
669 rtx pat;
670
671 /* Add OFFSET into OP0's address. */
672 if (MEM_P (xop0))
673 xop0 = adjust_address (xop0, byte_mode, offset);
674
675 /* If xop0 is a register, we need it in OP_MODE
676 to make it acceptable to the format of insv. */
677 if (GET_CODE (xop0) == SUBREG)
678 /* We can't just change the mode, because this might clobber op0,
679 and we will need the original value of op0 if insv fails. */
680 xop0 = gen_rtx_SUBREG (op_mode, SUBREG_REG (xop0), SUBREG_BYTE (xop0));
681 if (REG_P (xop0) && GET_MODE (xop0) != op_mode)
682 xop0 = gen_rtx_SUBREG (op_mode, xop0, 0);
683
684 /* On big-endian machines, we count bits from the most significant.
685 If the bit field insn does not, we must invert. */
686
687 if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
688 xbitpos = unit - bitsize - xbitpos;
689
690 /* We have been counting XBITPOS within UNIT.
691 Count instead within the size of the register. */
692 if (BITS_BIG_ENDIAN && !MEM_P (xop0))
693 xbitpos += GET_MODE_BITSIZE (op_mode) - unit;
694
695 unit = GET_MODE_BITSIZE (op_mode);
696
697 /* Convert VALUE to op_mode (which insv insn wants) in VALUE1. */
698 value1 = value;
699 if (GET_MODE (value) != op_mode)
700 {
701 if (GET_MODE_BITSIZE (GET_MODE (value)) >= bitsize)
702 {
703 /* Optimization: Don't bother really extending VALUE
704 if it has all the bits we will actually use. However,
705 if we must narrow it, be sure we do it correctly. */
706
707 if (GET_MODE_SIZE (GET_MODE (value)) < GET_MODE_SIZE (op_mode))
708 {
709 rtx tmp;
710
711 tmp = simplify_subreg (op_mode, value1, GET_MODE (value), 0);
712 if (! tmp)
713 tmp = simplify_gen_subreg (op_mode,
714 force_reg (GET_MODE (value),
715 value1),
716 GET_MODE (value), 0);
717 value1 = tmp;
718 }
719 else
720 value1 = gen_lowpart (op_mode, value1);
721 }
722 else if (GET_CODE (value) == CONST_INT)
723 value1 = gen_int_mode (INTVAL (value), op_mode);
724 else
725 /* Parse phase is supposed to make VALUE's data type
726 match that of the component reference, which is a type
727 at least as wide as the field; so VALUE should have
728 a mode that corresponds to that type. */
729 gcc_assert (CONSTANT_P (value));
730 }
731
732 /* If this machine's insv insists on a register,
733 get VALUE1 into a register. */
734 if (! ((*insn_data[(int) CODE_FOR_insv].operand[3].predicate)
735 (value1, op_mode)))
736 value1 = force_reg (op_mode, value1);
737
738 pat = gen_insv (xop0, GEN_INT (bitsize), GEN_INT (xbitpos), value1);
739 if (pat)
740 {
741 emit_insn (pat);
742 return true;
743 }
744 delete_insns_since (last);
745 }
746
747 /* If OP0 is a memory, try copying it to a register and seeing if a
748 cheap register alternative is available. */
749 if (HAVE_insv && MEM_P (op0))
750 {
751 enum machine_mode bestmode;
752
753 /* Get the mode to use for inserting into this field. If OP0 is
754 BLKmode, get the smallest mode consistent with the alignment. If
755 OP0 is a non-BLKmode object that is no wider than OP_MODE, use its
756 mode. Otherwise, use the smallest mode containing the field. */
757
758 if (GET_MODE (op0) == BLKmode
759 || (op_mode != MAX_MACHINE_MODE
760 && GET_MODE_SIZE (GET_MODE (op0)) > GET_MODE_SIZE (op_mode)))
761 bestmode = get_best_mode (bitsize, bitnum, MEM_ALIGN (op0),
762 (op_mode == MAX_MACHINE_MODE
763 ? VOIDmode : op_mode),
764 MEM_VOLATILE_P (op0));
765 else
766 bestmode = GET_MODE (op0);
767
768 if (bestmode != VOIDmode
769 && GET_MODE_SIZE (bestmode) >= GET_MODE_SIZE (fieldmode)
770 && !(SLOW_UNALIGNED_ACCESS (bestmode, MEM_ALIGN (op0))
771 && GET_MODE_BITSIZE (bestmode) > MEM_ALIGN (op0)))
772 {
773 rtx last, tempreg, xop0;
774 unsigned HOST_WIDE_INT xoffset, xbitpos;
775
776 last = get_last_insn ();
777
778 /* Adjust address to point to the containing unit of
779 that mode. Compute the offset as a multiple of this unit,
780 counting in bytes. */
781 unit = GET_MODE_BITSIZE (bestmode);
782 xoffset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
783 xbitpos = bitnum % unit;
784 xop0 = adjust_address (op0, bestmode, xoffset);
785
786 /* Fetch that unit, store the bitfield in it, then store
787 the unit. */
788 tempreg = copy_to_reg (xop0);
789 if (store_bit_field_1 (tempreg, bitsize, xbitpos,
790 fieldmode, orig_value, false))
791 {
792 emit_move_insn (xop0, tempreg);
793 return true;
794 }
795 delete_insns_since (last);
796 }
797 }
798
799 if (!fallback_p)
800 return false;
801
802 store_fixed_bit_field (op0, offset, bitsize, bitpos, value);
803 return true;
804 }
805
806 /* Generate code to store value from rtx VALUE
807 into a bit-field within structure STR_RTX
808 containing BITSIZE bits starting at bit BITNUM.
809 FIELDMODE is the machine-mode of the FIELD_DECL node for this field. */
810
811 void
812 store_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
813 unsigned HOST_WIDE_INT bitnum, enum machine_mode fieldmode,
814 rtx value)
815 {
816 if (!store_bit_field_1 (str_rtx, bitsize, bitnum, fieldmode, value, true))
817 gcc_unreachable ();
818 }
819 \f
820 /* Use shifts and boolean operations to store VALUE
821 into a bit field of width BITSIZE
822 in a memory location specified by OP0 except offset by OFFSET bytes.
823 (OFFSET must be 0 if OP0 is a register.)
824 The field starts at position BITPOS within the byte.
825 (If OP0 is a register, it may be a full word or a narrower mode,
826 but BITPOS still counts within a full word,
827 which is significant on bigendian machines.) */
828
829 static void
830 store_fixed_bit_field (rtx op0, unsigned HOST_WIDE_INT offset,
831 unsigned HOST_WIDE_INT bitsize,
832 unsigned HOST_WIDE_INT bitpos, rtx value)
833 {
834 enum machine_mode mode;
835 unsigned int total_bits = BITS_PER_WORD;
836 rtx temp;
837 int all_zero = 0;
838 int all_one = 0;
839
840 /* There is a case not handled here:
841 a structure with a known alignment of just a halfword
842 and a field split across two aligned halfwords within the structure.
843 Or likewise a structure with a known alignment of just a byte
844 and a field split across two bytes.
845 Such cases are not supposed to be able to occur. */
846
847 if (REG_P (op0) || GET_CODE (op0) == SUBREG)
848 {
849 gcc_assert (!offset);
850 /* Special treatment for a bit field split across two registers. */
851 if (bitsize + bitpos > BITS_PER_WORD)
852 {
853 store_split_bit_field (op0, bitsize, bitpos, value);
854 return;
855 }
856 }
857 else
858 {
859 /* Get the proper mode to use for this field. We want a mode that
860 includes the entire field. If such a mode would be larger than
861 a word, we won't be doing the extraction the normal way.
862 We don't want a mode bigger than the destination. */
863
864 mode = GET_MODE (op0);
865 if (GET_MODE_BITSIZE (mode) == 0
866 || GET_MODE_BITSIZE (mode) > GET_MODE_BITSIZE (word_mode))
867 mode = word_mode;
868 mode = get_best_mode (bitsize, bitpos + offset * BITS_PER_UNIT,
869 MEM_ALIGN (op0), mode, MEM_VOLATILE_P (op0));
870
871 if (mode == VOIDmode)
872 {
873 /* The only way this should occur is if the field spans word
874 boundaries. */
875 store_split_bit_field (op0, bitsize, bitpos + offset * BITS_PER_UNIT,
876 value);
877 return;
878 }
879
880 total_bits = GET_MODE_BITSIZE (mode);
881
882 /* Make sure bitpos is valid for the chosen mode. Adjust BITPOS to
883 be in the range 0 to total_bits-1, and put any excess bytes in
884 OFFSET. */
885 if (bitpos >= total_bits)
886 {
887 offset += (bitpos / total_bits) * (total_bits / BITS_PER_UNIT);
888 bitpos -= ((bitpos / total_bits) * (total_bits / BITS_PER_UNIT)
889 * BITS_PER_UNIT);
890 }
891
892 /* Get ref to an aligned byte, halfword, or word containing the field.
893 Adjust BITPOS to be position within a word,
894 and OFFSET to be the offset of that word.
895 Then alter OP0 to refer to that word. */
896 bitpos += (offset % (total_bits / BITS_PER_UNIT)) * BITS_PER_UNIT;
897 offset -= (offset % (total_bits / BITS_PER_UNIT));
898 op0 = adjust_address (op0, mode, offset);
899 }
900
901 mode = GET_MODE (op0);
902
903 /* Now MODE is either some integral mode for a MEM as OP0,
904 or is a full-word for a REG as OP0. TOTAL_BITS corresponds.
905 The bit field is contained entirely within OP0.
906 BITPOS is the starting bit number within OP0.
907 (OP0's mode may actually be narrower than MODE.) */
908
909 if (BYTES_BIG_ENDIAN)
910 /* BITPOS is the distance between our msb
911 and that of the containing datum.
912 Convert it to the distance from the lsb. */
913 bitpos = total_bits - bitsize - bitpos;
914
915 /* Now BITPOS is always the distance between our lsb
916 and that of OP0. */
917
918 /* Shift VALUE left by BITPOS bits. If VALUE is not constant,
919 we must first convert its mode to MODE. */
920
921 if (GET_CODE (value) == CONST_INT)
922 {
923 HOST_WIDE_INT v = INTVAL (value);
924
925 if (bitsize < HOST_BITS_PER_WIDE_INT)
926 v &= ((HOST_WIDE_INT) 1 << bitsize) - 1;
927
928 if (v == 0)
929 all_zero = 1;
930 else if ((bitsize < HOST_BITS_PER_WIDE_INT
931 && v == ((HOST_WIDE_INT) 1 << bitsize) - 1)
932 || (bitsize == HOST_BITS_PER_WIDE_INT && v == -1))
933 all_one = 1;
934
935 value = lshift_value (mode, value, bitpos, bitsize);
936 }
937 else
938 {
939 int must_and = (GET_MODE_BITSIZE (GET_MODE (value)) != bitsize
940 && bitpos + bitsize != GET_MODE_BITSIZE (mode));
941
942 if (GET_MODE (value) != mode)
943 {
944 if ((REG_P (value) || GET_CODE (value) == SUBREG)
945 && GET_MODE_SIZE (mode) < GET_MODE_SIZE (GET_MODE (value)))
946 value = gen_lowpart (mode, value);
947 else
948 value = convert_to_mode (mode, value, 1);
949 }
950
951 if (must_and)
952 value = expand_binop (mode, and_optab, value,
953 mask_rtx (mode, 0, bitsize, 0),
954 NULL_RTX, 1, OPTAB_LIB_WIDEN);
955 if (bitpos > 0)
956 value = expand_shift (LSHIFT_EXPR, mode, value,
957 build_int_cst (NULL_TREE, bitpos), NULL_RTX, 1);
958 }
959
960 /* Now clear the chosen bits in OP0,
961 except that if VALUE is -1 we need not bother. */
962 /* We keep the intermediates in registers to allow CSE to combine
963 consecutive bitfield assignments. */
964
965 temp = force_reg (mode, op0);
966
967 if (! all_one)
968 {
969 temp = expand_binop (mode, and_optab, temp,
970 mask_rtx (mode, bitpos, bitsize, 1),
971 NULL_RTX, 1, OPTAB_LIB_WIDEN);
972 temp = force_reg (mode, temp);
973 }
974
975 /* Now logical-or VALUE into OP0, unless it is zero. */
976
977 if (! all_zero)
978 {
979 temp = expand_binop (mode, ior_optab, temp, value,
980 NULL_RTX, 1, OPTAB_LIB_WIDEN);
981 temp = force_reg (mode, temp);
982 }
983
984 if (op0 != temp)
985 {
986 op0 = copy_rtx (op0);
987 emit_move_insn (op0, temp);
988 }
989 }
990 \f
991 /* Store a bit field that is split across multiple accessible memory objects.
992
993 OP0 is the REG, SUBREG or MEM rtx for the first of the objects.
994 BITSIZE is the field width; BITPOS the position of its first bit
995 (within the word).
996 VALUE is the value to store.
997
998 This does not yet handle fields wider than BITS_PER_WORD. */
999
1000 static void
1001 store_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1002 unsigned HOST_WIDE_INT bitpos, rtx value)
1003 {
1004 unsigned int unit;
1005 unsigned int bitsdone = 0;
1006
1007 /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1008 much at a time. */
1009 if (REG_P (op0) || GET_CODE (op0) == SUBREG)
1010 unit = BITS_PER_WORD;
1011 else
1012 unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1013
1014 /* If VALUE is a constant other than a CONST_INT, get it into a register in
1015 WORD_MODE. If we can do this using gen_lowpart_common, do so. Note
1016 that VALUE might be a floating-point constant. */
1017 if (CONSTANT_P (value) && GET_CODE (value) != CONST_INT)
1018 {
1019 rtx word = gen_lowpart_common (word_mode, value);
1020
1021 if (word && (value != word))
1022 value = word;
1023 else
1024 value = gen_lowpart_common (word_mode,
1025 force_reg (GET_MODE (value) != VOIDmode
1026 ? GET_MODE (value)
1027 : word_mode, value));
1028 }
1029
1030 while (bitsdone < bitsize)
1031 {
1032 unsigned HOST_WIDE_INT thissize;
1033 rtx part, word;
1034 unsigned HOST_WIDE_INT thispos;
1035 unsigned HOST_WIDE_INT offset;
1036
1037 offset = (bitpos + bitsdone) / unit;
1038 thispos = (bitpos + bitsdone) % unit;
1039
1040 /* THISSIZE must not overrun a word boundary. Otherwise,
1041 store_fixed_bit_field will call us again, and we will mutually
1042 recurse forever. */
1043 thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1044 thissize = MIN (thissize, unit - thispos);
1045
1046 if (BYTES_BIG_ENDIAN)
1047 {
1048 int total_bits;
1049
1050 /* We must do an endian conversion exactly the same way as it is
1051 done in extract_bit_field, so that the two calls to
1052 extract_fixed_bit_field will have comparable arguments. */
1053 if (!MEM_P (value) || GET_MODE (value) == BLKmode)
1054 total_bits = BITS_PER_WORD;
1055 else
1056 total_bits = GET_MODE_BITSIZE (GET_MODE (value));
1057
1058 /* Fetch successively less significant portions. */
1059 if (GET_CODE (value) == CONST_INT)
1060 part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1061 >> (bitsize - bitsdone - thissize))
1062 & (((HOST_WIDE_INT) 1 << thissize) - 1));
1063 else
1064 /* The args are chosen so that the last part includes the
1065 lsb. Give extract_bit_field the value it needs (with
1066 endianness compensation) to fetch the piece we want. */
1067 part = extract_fixed_bit_field (word_mode, value, 0, thissize,
1068 total_bits - bitsize + bitsdone,
1069 NULL_RTX, 1);
1070 }
1071 else
1072 {
1073 /* Fetch successively more significant portions. */
1074 if (GET_CODE (value) == CONST_INT)
1075 part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1076 >> bitsdone)
1077 & (((HOST_WIDE_INT) 1 << thissize) - 1));
1078 else
1079 part = extract_fixed_bit_field (word_mode, value, 0, thissize,
1080 bitsdone, NULL_RTX, 1);
1081 }
1082
1083 /* If OP0 is a register, then handle OFFSET here.
1084
1085 When handling multiword bitfields, extract_bit_field may pass
1086 down a word_mode SUBREG of a larger REG for a bitfield that actually
1087 crosses a word boundary. Thus, for a SUBREG, we must find
1088 the current word starting from the base register. */
1089 if (GET_CODE (op0) == SUBREG)
1090 {
1091 int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset;
1092 word = operand_subword_force (SUBREG_REG (op0), word_offset,
1093 GET_MODE (SUBREG_REG (op0)));
1094 offset = 0;
1095 }
1096 else if (REG_P (op0))
1097 {
1098 word = operand_subword_force (op0, offset, GET_MODE (op0));
1099 offset = 0;
1100 }
1101 else
1102 word = op0;
1103
1104 /* OFFSET is in UNITs, and UNIT is in bits.
1105 store_fixed_bit_field wants offset in bytes. */
1106 store_fixed_bit_field (word, offset * unit / BITS_PER_UNIT, thissize,
1107 thispos, part);
1108 bitsdone += thissize;
1109 }
1110 }
1111 \f
1112 /* A subroutine of extract_bit_field_1 that converts return value X
1113 to either MODE or TMODE. MODE, TMODE and UNSIGNEDP are arguments
1114 to extract_bit_field. */
1115
1116 static rtx
1117 convert_extracted_bit_field (rtx x, enum machine_mode mode,
1118 enum machine_mode tmode, bool unsignedp)
1119 {
1120 if (GET_MODE (x) == tmode || GET_MODE (x) == mode)
1121 return x;
1122
1123 /* If the x mode is not a scalar integral, first convert to the
1124 integer mode of that size and then access it as a floating-point
1125 value via a SUBREG. */
1126 if (!SCALAR_INT_MODE_P (tmode))
1127 {
1128 enum machine_mode smode;
1129
1130 smode = mode_for_size (GET_MODE_BITSIZE (tmode), MODE_INT, 0);
1131 x = convert_to_mode (smode, x, unsignedp);
1132 x = force_reg (smode, x);
1133 return gen_lowpart (tmode, x);
1134 }
1135
1136 return convert_to_mode (tmode, x, unsignedp);
1137 }
1138
1139 /* A subroutine of extract_bit_field, with the same arguments.
1140 If FALLBACK_P is true, fall back to extract_fixed_bit_field
1141 if we can find no other means of implementing the operation.
1142 if FALLBACK_P is false, return NULL instead. */
1143
1144 static rtx
1145 extract_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1146 unsigned HOST_WIDE_INT bitnum, int unsignedp, rtx target,
1147 enum machine_mode mode, enum machine_mode tmode,
1148 bool fallback_p)
1149 {
1150 unsigned int unit
1151 = (MEM_P (str_rtx)) ? BITS_PER_UNIT : BITS_PER_WORD;
1152 unsigned HOST_WIDE_INT offset, bitpos;
1153 rtx op0 = str_rtx;
1154 enum machine_mode int_mode;
1155 enum machine_mode ext_mode;
1156 enum machine_mode mode1;
1157 enum insn_code icode;
1158 int byte_offset;
1159
1160 if (tmode == VOIDmode)
1161 tmode = mode;
1162
1163 while (GET_CODE (op0) == SUBREG)
1164 {
1165 bitnum += SUBREG_BYTE (op0) * BITS_PER_UNIT;
1166 op0 = SUBREG_REG (op0);
1167 }
1168
1169 /* If we have an out-of-bounds access to a register, just return an
1170 uninitialized register of the required mode. This can occur if the
1171 source code contains an out-of-bounds access to a small array. */
1172 if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
1173 return gen_reg_rtx (tmode);
1174
1175 if (REG_P (op0)
1176 && mode == GET_MODE (op0)
1177 && bitnum == 0
1178 && bitsize == GET_MODE_BITSIZE (GET_MODE (op0)))
1179 {
1180 /* We're trying to extract a full register from itself. */
1181 return op0;
1182 }
1183
1184 /* See if we can get a better vector mode before extracting. */
1185 if (VECTOR_MODE_P (GET_MODE (op0))
1186 && !MEM_P (op0)
1187 && GET_MODE_INNER (GET_MODE (op0)) != tmode)
1188 {
1189 enum machine_mode new_mode;
1190 int nunits = GET_MODE_NUNITS (GET_MODE (op0));
1191
1192 if (GET_MODE_CLASS (tmode) == MODE_FLOAT)
1193 new_mode = MIN_MODE_VECTOR_FLOAT;
1194 else if (GET_MODE_CLASS (tmode) == MODE_FRACT)
1195 new_mode = MIN_MODE_VECTOR_FRACT;
1196 else if (GET_MODE_CLASS (tmode) == MODE_UFRACT)
1197 new_mode = MIN_MODE_VECTOR_UFRACT;
1198 else if (GET_MODE_CLASS (tmode) == MODE_ACCUM)
1199 new_mode = MIN_MODE_VECTOR_ACCUM;
1200 else if (GET_MODE_CLASS (tmode) == MODE_UACCUM)
1201 new_mode = MIN_MODE_VECTOR_UACCUM;
1202 else
1203 new_mode = MIN_MODE_VECTOR_INT;
1204
1205 for (; new_mode != VOIDmode ; new_mode = GET_MODE_WIDER_MODE (new_mode))
1206 if (GET_MODE_NUNITS (new_mode) == nunits
1207 && GET_MODE_INNER (new_mode) == tmode
1208 && targetm.vector_mode_supported_p (new_mode))
1209 break;
1210 if (new_mode != VOIDmode)
1211 op0 = gen_lowpart (new_mode, op0);
1212 }
1213
1214 /* Use vec_extract patterns for extracting parts of vectors whenever
1215 available. */
1216 if (VECTOR_MODE_P (GET_MODE (op0))
1217 && !MEM_P (op0)
1218 && (optab_handler (vec_extract_optab, GET_MODE (op0))->insn_code
1219 != CODE_FOR_nothing)
1220 && ((bitnum + bitsize - 1) / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
1221 == bitnum / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
1222 {
1223 enum machine_mode outermode = GET_MODE (op0);
1224 enum machine_mode innermode = GET_MODE_INNER (outermode);
1225 int icode = (int) optab_handler (vec_extract_optab, outermode)->insn_code;
1226 unsigned HOST_WIDE_INT pos = bitnum / GET_MODE_BITSIZE (innermode);
1227 rtx rtxpos = GEN_INT (pos);
1228 rtx src = op0;
1229 rtx dest = NULL, pat, seq;
1230 enum machine_mode mode0 = insn_data[icode].operand[0].mode;
1231 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
1232 enum machine_mode mode2 = insn_data[icode].operand[2].mode;
1233
1234 if (innermode == tmode || innermode == mode)
1235 dest = target;
1236
1237 if (!dest)
1238 dest = gen_reg_rtx (innermode);
1239
1240 start_sequence ();
1241
1242 if (! (*insn_data[icode].operand[0].predicate) (dest, mode0))
1243 dest = copy_to_mode_reg (mode0, dest);
1244
1245 if (! (*insn_data[icode].operand[1].predicate) (src, mode1))
1246 src = copy_to_mode_reg (mode1, src);
1247
1248 if (! (*insn_data[icode].operand[2].predicate) (rtxpos, mode2))
1249 rtxpos = copy_to_mode_reg (mode1, rtxpos);
1250
1251 /* We could handle this, but we should always be called with a pseudo
1252 for our targets and all insns should take them as outputs. */
1253 gcc_assert ((*insn_data[icode].operand[0].predicate) (dest, mode0)
1254 && (*insn_data[icode].operand[1].predicate) (src, mode1)
1255 && (*insn_data[icode].operand[2].predicate) (rtxpos, mode2));
1256
1257 pat = GEN_FCN (icode) (dest, src, rtxpos);
1258 seq = get_insns ();
1259 end_sequence ();
1260 if (pat)
1261 {
1262 emit_insn (seq);
1263 emit_insn (pat);
1264 if (mode0 != mode)
1265 return gen_lowpart (tmode, dest);
1266 return dest;
1267 }
1268 }
1269
1270 /* Make sure we are playing with integral modes. Pun with subregs
1271 if we aren't. */
1272 {
1273 enum machine_mode imode = int_mode_for_mode (GET_MODE (op0));
1274 if (imode != GET_MODE (op0))
1275 {
1276 if (MEM_P (op0))
1277 op0 = adjust_address (op0, imode, 0);
1278 else
1279 {
1280 gcc_assert (imode != BLKmode);
1281 op0 = gen_lowpart (imode, op0);
1282
1283 /* If we got a SUBREG, force it into a register since we
1284 aren't going to be able to do another SUBREG on it. */
1285 if (GET_CODE (op0) == SUBREG)
1286 op0 = force_reg (imode, op0);
1287 }
1288 }
1289 }
1290
1291 /* We may be accessing data outside the field, which means
1292 we can alias adjacent data. */
1293 if (MEM_P (op0))
1294 {
1295 op0 = shallow_copy_rtx (op0);
1296 set_mem_alias_set (op0, 0);
1297 set_mem_expr (op0, 0);
1298 }
1299
1300 /* Extraction of a full-word or multi-word value from a structure
1301 in a register or aligned memory can be done with just a SUBREG.
1302 A subword value in the least significant part of a register
1303 can also be extracted with a SUBREG. For this, we need the
1304 byte offset of the value in op0. */
1305
1306 bitpos = bitnum % unit;
1307 offset = bitnum / unit;
1308 byte_offset = bitpos / BITS_PER_UNIT + offset * UNITS_PER_WORD;
1309
1310 /* If OP0 is a register, BITPOS must count within a word.
1311 But as we have it, it counts within whatever size OP0 now has.
1312 On a bigendian machine, these are not the same, so convert. */
1313 if (BYTES_BIG_ENDIAN
1314 && !MEM_P (op0)
1315 && unit > GET_MODE_BITSIZE (GET_MODE (op0)))
1316 bitpos += unit - GET_MODE_BITSIZE (GET_MODE (op0));
1317
1318 /* ??? We currently assume TARGET is at least as big as BITSIZE.
1319 If that's wrong, the solution is to test for it and set TARGET to 0
1320 if needed. */
1321
1322 /* Only scalar integer modes can be converted via subregs. There is an
1323 additional problem for FP modes here in that they can have a precision
1324 which is different from the size. mode_for_size uses precision, but
1325 we want a mode based on the size, so we must avoid calling it for FP
1326 modes. */
1327 mode1 = (SCALAR_INT_MODE_P (tmode)
1328 ? mode_for_size (bitsize, GET_MODE_CLASS (tmode), 0)
1329 : mode);
1330
1331 if (((bitsize >= BITS_PER_WORD && bitsize == GET_MODE_BITSIZE (mode)
1332 && bitpos % BITS_PER_WORD == 0)
1333 || (mode1 != BLKmode
1334 /* ??? The big endian test here is wrong. This is correct
1335 if the value is in a register, and if mode_for_size is not
1336 the same mode as op0. This causes us to get unnecessarily
1337 inefficient code from the Thumb port when -mbig-endian. */
1338 && (BYTES_BIG_ENDIAN
1339 ? bitpos + bitsize == BITS_PER_WORD
1340 : bitpos == 0)))
1341 && ((!MEM_P (op0)
1342 && TRULY_NOOP_TRUNCATION (GET_MODE_BITSIZE (mode),
1343 GET_MODE_BITSIZE (GET_MODE (op0)))
1344 && GET_MODE_SIZE (mode1) != 0
1345 && byte_offset % GET_MODE_SIZE (mode1) == 0)
1346 || (MEM_P (op0)
1347 && (! SLOW_UNALIGNED_ACCESS (mode, MEM_ALIGN (op0))
1348 || (offset * BITS_PER_UNIT % bitsize == 0
1349 && MEM_ALIGN (op0) % bitsize == 0)))))
1350 {
1351 if (MEM_P (op0))
1352 op0 = adjust_address (op0, mode1, offset);
1353 else if (mode1 != GET_MODE (op0))
1354 {
1355 rtx sub = simplify_gen_subreg (mode1, op0, GET_MODE (op0),
1356 byte_offset);
1357 if (sub == NULL)
1358 goto no_subreg_mode_swap;
1359 op0 = sub;
1360 }
1361 if (mode1 != mode)
1362 return convert_to_mode (tmode, op0, unsignedp);
1363 return op0;
1364 }
1365 no_subreg_mode_swap:
1366
1367 /* Handle fields bigger than a word. */
1368
1369 if (bitsize > BITS_PER_WORD)
1370 {
1371 /* Here we transfer the words of the field
1372 in the order least significant first.
1373 This is because the most significant word is the one which may
1374 be less than full. */
1375
1376 unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
1377 unsigned int i;
1378
1379 if (target == 0 || !REG_P (target))
1380 target = gen_reg_rtx (mode);
1381
1382 /* Indicate for flow that the entire target reg is being set. */
1383 emit_clobber (target);
1384
1385 for (i = 0; i < nwords; i++)
1386 {
1387 /* If I is 0, use the low-order word in both field and target;
1388 if I is 1, use the next to lowest word; and so on. */
1389 /* Word number in TARGET to use. */
1390 unsigned int wordnum
1391 = (WORDS_BIG_ENDIAN
1392 ? GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD - i - 1
1393 : i);
1394 /* Offset from start of field in OP0. */
1395 unsigned int bit_offset = (WORDS_BIG_ENDIAN
1396 ? MAX (0, ((int) bitsize - ((int) i + 1)
1397 * (int) BITS_PER_WORD))
1398 : (int) i * BITS_PER_WORD);
1399 rtx target_part = operand_subword (target, wordnum, 1, VOIDmode);
1400 rtx result_part
1401 = extract_bit_field (op0, MIN (BITS_PER_WORD,
1402 bitsize - i * BITS_PER_WORD),
1403 bitnum + bit_offset, 1, target_part, mode,
1404 word_mode);
1405
1406 gcc_assert (target_part);
1407
1408 if (result_part != target_part)
1409 emit_move_insn (target_part, result_part);
1410 }
1411
1412 if (unsignedp)
1413 {
1414 /* Unless we've filled TARGET, the upper regs in a multi-reg value
1415 need to be zero'd out. */
1416 if (GET_MODE_SIZE (GET_MODE (target)) > nwords * UNITS_PER_WORD)
1417 {
1418 unsigned int i, total_words;
1419
1420 total_words = GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD;
1421 for (i = nwords; i < total_words; i++)
1422 emit_move_insn
1423 (operand_subword (target,
1424 WORDS_BIG_ENDIAN ? total_words - i - 1 : i,
1425 1, VOIDmode),
1426 const0_rtx);
1427 }
1428 return target;
1429 }
1430
1431 /* Signed bit field: sign-extend with two arithmetic shifts. */
1432 target = expand_shift (LSHIFT_EXPR, mode, target,
1433 build_int_cst (NULL_TREE,
1434 GET_MODE_BITSIZE (mode) - bitsize),
1435 NULL_RTX, 0);
1436 return expand_shift (RSHIFT_EXPR, mode, target,
1437 build_int_cst (NULL_TREE,
1438 GET_MODE_BITSIZE (mode) - bitsize),
1439 NULL_RTX, 0);
1440 }
1441
1442 /* From here on we know the desired field is smaller than a word. */
1443
1444 /* Check if there is a correspondingly-sized integer field, so we can
1445 safely extract it as one size of integer, if necessary; then
1446 truncate or extend to the size that is wanted; then use SUBREGs or
1447 convert_to_mode to get one of the modes we really wanted. */
1448
1449 int_mode = int_mode_for_mode (tmode);
1450 if (int_mode == BLKmode)
1451 int_mode = int_mode_for_mode (mode);
1452 /* Should probably push op0 out to memory and then do a load. */
1453 gcc_assert (int_mode != BLKmode);
1454
1455 /* OFFSET is the number of words or bytes (UNIT says which)
1456 from STR_RTX to the first word or byte containing part of the field. */
1457 if (!MEM_P (op0))
1458 {
1459 if (offset != 0
1460 || GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
1461 {
1462 if (!REG_P (op0))
1463 op0 = copy_to_reg (op0);
1464 op0 = gen_rtx_SUBREG (mode_for_size (BITS_PER_WORD, MODE_INT, 0),
1465 op0, (offset * UNITS_PER_WORD));
1466 }
1467 offset = 0;
1468 }
1469
1470 /* Now OFFSET is nonzero only for memory operands. */
1471 ext_mode = mode_for_extraction (unsignedp ? EP_extzv : EP_extv, 0);
1472 icode = unsignedp ? CODE_FOR_extzv : CODE_FOR_extv;
1473 if (ext_mode != MAX_MACHINE_MODE
1474 && bitsize > 0
1475 && GET_MODE_BITSIZE (ext_mode) >= bitsize
1476 /* If op0 is a register, we need it in EXT_MODE to make it
1477 acceptable to the format of ext(z)v. */
1478 && !(GET_CODE (op0) == SUBREG && GET_MODE (op0) != ext_mode)
1479 && !((REG_P (op0) || GET_CODE (op0) == SUBREG)
1480 && (bitsize + bitpos > GET_MODE_BITSIZE (ext_mode)))
1481 && check_predicate_volatile_ok (icode, 1, op0, GET_MODE (op0)))
1482 {
1483 unsigned HOST_WIDE_INT xbitpos = bitpos, xoffset = offset;
1484 rtx bitsize_rtx, bitpos_rtx;
1485 rtx last = get_last_insn ();
1486 rtx xop0 = op0;
1487 rtx xtarget = target;
1488 rtx xspec_target = target;
1489 rtx xspec_target_subreg = 0;
1490 rtx pat;
1491
1492 /* If op0 is a register, we need it in EXT_MODE to make it
1493 acceptable to the format of ext(z)v. */
1494 if (REG_P (xop0) && GET_MODE (xop0) != ext_mode)
1495 xop0 = gen_rtx_SUBREG (ext_mode, xop0, 0);
1496 if (MEM_P (xop0))
1497 /* Get ref to first byte containing part of the field. */
1498 xop0 = adjust_address (xop0, byte_mode, xoffset);
1499
1500 /* On big-endian machines, we count bits from the most significant.
1501 If the bit field insn does not, we must invert. */
1502 if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1503 xbitpos = unit - bitsize - xbitpos;
1504
1505 /* Now convert from counting within UNIT to counting in EXT_MODE. */
1506 if (BITS_BIG_ENDIAN && !MEM_P (xop0))
1507 xbitpos += GET_MODE_BITSIZE (ext_mode) - unit;
1508
1509 unit = GET_MODE_BITSIZE (ext_mode);
1510
1511 if (xtarget == 0)
1512 xtarget = xspec_target = gen_reg_rtx (tmode);
1513
1514 if (GET_MODE (xtarget) != ext_mode)
1515 {
1516 if (REG_P (xtarget))
1517 {
1518 xtarget = gen_lowpart (ext_mode, xtarget);
1519 if (GET_MODE_SIZE (ext_mode)
1520 > GET_MODE_SIZE (GET_MODE (xspec_target)))
1521 xspec_target_subreg = xtarget;
1522 }
1523 else
1524 xtarget = gen_reg_rtx (ext_mode);
1525 }
1526
1527 /* If this machine's ext(z)v insists on a register target,
1528 make sure we have one. */
1529 if (!insn_data[(int) icode].operand[0].predicate (xtarget, ext_mode))
1530 xtarget = gen_reg_rtx (ext_mode);
1531
1532 bitsize_rtx = GEN_INT (bitsize);
1533 bitpos_rtx = GEN_INT (xbitpos);
1534
1535 pat = (unsignedp
1536 ? gen_extzv (xtarget, xop0, bitsize_rtx, bitpos_rtx)
1537 : gen_extv (xtarget, xop0, bitsize_rtx, bitpos_rtx));
1538 if (pat)
1539 {
1540 emit_insn (pat);
1541 if (xtarget == xspec_target)
1542 return xtarget;
1543 if (xtarget == xspec_target_subreg)
1544 return xspec_target;
1545 return convert_extracted_bit_field (xtarget, mode, tmode, unsignedp);
1546 }
1547 delete_insns_since (last);
1548 }
1549
1550 /* If OP0 is a memory, try copying it to a register and seeing if a
1551 cheap register alternative is available. */
1552 if (ext_mode != MAX_MACHINE_MODE && MEM_P (op0))
1553 {
1554 enum machine_mode bestmode;
1555
1556 /* Get the mode to use for inserting into this field. If
1557 OP0 is BLKmode, get the smallest mode consistent with the
1558 alignment. If OP0 is a non-BLKmode object that is no
1559 wider than EXT_MODE, use its mode. Otherwise, use the
1560 smallest mode containing the field. */
1561
1562 if (GET_MODE (op0) == BLKmode
1563 || (ext_mode != MAX_MACHINE_MODE
1564 && GET_MODE_SIZE (GET_MODE (op0)) > GET_MODE_SIZE (ext_mode)))
1565 bestmode = get_best_mode (bitsize, bitnum, MEM_ALIGN (op0),
1566 (ext_mode == MAX_MACHINE_MODE
1567 ? VOIDmode : ext_mode),
1568 MEM_VOLATILE_P (op0));
1569 else
1570 bestmode = GET_MODE (op0);
1571
1572 if (bestmode != VOIDmode
1573 && !(SLOW_UNALIGNED_ACCESS (bestmode, MEM_ALIGN (op0))
1574 && GET_MODE_BITSIZE (bestmode) > MEM_ALIGN (op0)))
1575 {
1576 unsigned HOST_WIDE_INT xoffset, xbitpos;
1577
1578 /* Compute the offset as a multiple of this unit,
1579 counting in bytes. */
1580 unit = GET_MODE_BITSIZE (bestmode);
1581 xoffset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
1582 xbitpos = bitnum % unit;
1583
1584 /* Make sure the register is big enough for the whole field. */
1585 if (xoffset * BITS_PER_UNIT + unit
1586 >= offset * BITS_PER_UNIT + bitsize)
1587 {
1588 rtx last, result, xop0;
1589
1590 last = get_last_insn ();
1591
1592 /* Fetch it to a register in that size. */
1593 xop0 = adjust_address (op0, bestmode, xoffset);
1594 xop0 = force_reg (bestmode, xop0);
1595 result = extract_bit_field_1 (xop0, bitsize, xbitpos,
1596 unsignedp, target,
1597 mode, tmode, false);
1598 if (result)
1599 return result;
1600
1601 delete_insns_since (last);
1602 }
1603 }
1604 }
1605
1606 if (!fallback_p)
1607 return NULL;
1608
1609 target = extract_fixed_bit_field (int_mode, op0, offset, bitsize,
1610 bitpos, target, unsignedp);
1611 return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1612 }
1613
1614 /* Generate code to extract a byte-field from STR_RTX
1615 containing BITSIZE bits, starting at BITNUM,
1616 and put it in TARGET if possible (if TARGET is nonzero).
1617 Regardless of TARGET, we return the rtx for where the value is placed.
1618
1619 STR_RTX is the structure containing the byte (a REG or MEM).
1620 UNSIGNEDP is nonzero if this is an unsigned bit field.
1621 MODE is the natural mode of the field value once extracted.
1622 TMODE is the mode the caller would like the value to have;
1623 but the value may be returned with type MODE instead.
1624
1625 If a TARGET is specified and we can store in it at no extra cost,
1626 we do so, and return TARGET.
1627 Otherwise, we return a REG of mode TMODE or MODE, with TMODE preferred
1628 if they are equally easy. */
1629
1630 rtx
1631 extract_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1632 unsigned HOST_WIDE_INT bitnum, int unsignedp, rtx target,
1633 enum machine_mode mode, enum machine_mode tmode)
1634 {
1635 return extract_bit_field_1 (str_rtx, bitsize, bitnum, unsignedp,
1636 target, mode, tmode, true);
1637 }
1638 \f
1639 /* Extract a bit field using shifts and boolean operations
1640 Returns an rtx to represent the value.
1641 OP0 addresses a register (word) or memory (byte).
1642 BITPOS says which bit within the word or byte the bit field starts in.
1643 OFFSET says how many bytes farther the bit field starts;
1644 it is 0 if OP0 is a register.
1645 BITSIZE says how many bits long the bit field is.
1646 (If OP0 is a register, it may be narrower than a full word,
1647 but BITPOS still counts within a full word,
1648 which is significant on bigendian machines.)
1649
1650 UNSIGNEDP is nonzero for an unsigned bit field (don't sign-extend value).
1651 If TARGET is nonzero, attempts to store the value there
1652 and return TARGET, but this is not guaranteed.
1653 If TARGET is not used, create a pseudo-reg of mode TMODE for the value. */
1654
1655 static rtx
1656 extract_fixed_bit_field (enum machine_mode tmode, rtx op0,
1657 unsigned HOST_WIDE_INT offset,
1658 unsigned HOST_WIDE_INT bitsize,
1659 unsigned HOST_WIDE_INT bitpos, rtx target,
1660 int unsignedp)
1661 {
1662 unsigned int total_bits = BITS_PER_WORD;
1663 enum machine_mode mode;
1664
1665 if (GET_CODE (op0) == SUBREG || REG_P (op0))
1666 {
1667 /* Special treatment for a bit field split across two registers. */
1668 if (bitsize + bitpos > BITS_PER_WORD)
1669 return extract_split_bit_field (op0, bitsize, bitpos, unsignedp);
1670 }
1671 else
1672 {
1673 /* Get the proper mode to use for this field. We want a mode that
1674 includes the entire field. If such a mode would be larger than
1675 a word, we won't be doing the extraction the normal way. */
1676
1677 mode = get_best_mode (bitsize, bitpos + offset * BITS_PER_UNIT,
1678 MEM_ALIGN (op0), word_mode, MEM_VOLATILE_P (op0));
1679
1680 if (mode == VOIDmode)
1681 /* The only way this should occur is if the field spans word
1682 boundaries. */
1683 return extract_split_bit_field (op0, bitsize,
1684 bitpos + offset * BITS_PER_UNIT,
1685 unsignedp);
1686
1687 total_bits = GET_MODE_BITSIZE (mode);
1688
1689 /* Make sure bitpos is valid for the chosen mode. Adjust BITPOS to
1690 be in the range 0 to total_bits-1, and put any excess bytes in
1691 OFFSET. */
1692 if (bitpos >= total_bits)
1693 {
1694 offset += (bitpos / total_bits) * (total_bits / BITS_PER_UNIT);
1695 bitpos -= ((bitpos / total_bits) * (total_bits / BITS_PER_UNIT)
1696 * BITS_PER_UNIT);
1697 }
1698
1699 /* Get ref to an aligned byte, halfword, or word containing the field.
1700 Adjust BITPOS to be position within a word,
1701 and OFFSET to be the offset of that word.
1702 Then alter OP0 to refer to that word. */
1703 bitpos += (offset % (total_bits / BITS_PER_UNIT)) * BITS_PER_UNIT;
1704 offset -= (offset % (total_bits / BITS_PER_UNIT));
1705 op0 = adjust_address (op0, mode, offset);
1706 }
1707
1708 mode = GET_MODE (op0);
1709
1710 if (BYTES_BIG_ENDIAN)
1711 /* BITPOS is the distance between our msb and that of OP0.
1712 Convert it to the distance from the lsb. */
1713 bitpos = total_bits - bitsize - bitpos;
1714
1715 /* Now BITPOS is always the distance between the field's lsb and that of OP0.
1716 We have reduced the big-endian case to the little-endian case. */
1717
1718 if (unsignedp)
1719 {
1720 if (bitpos)
1721 {
1722 /* If the field does not already start at the lsb,
1723 shift it so it does. */
1724 tree amount = build_int_cst (NULL_TREE, bitpos);
1725 /* Maybe propagate the target for the shift. */
1726 /* But not if we will return it--could confuse integrate.c. */
1727 rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
1728 if (tmode != mode) subtarget = 0;
1729 op0 = expand_shift (RSHIFT_EXPR, mode, op0, amount, subtarget, 1);
1730 }
1731 /* Convert the value to the desired mode. */
1732 if (mode != tmode)
1733 op0 = convert_to_mode (tmode, op0, 1);
1734
1735 /* Unless the msb of the field used to be the msb when we shifted,
1736 mask out the upper bits. */
1737
1738 if (GET_MODE_BITSIZE (mode) != bitpos + bitsize)
1739 return expand_binop (GET_MODE (op0), and_optab, op0,
1740 mask_rtx (GET_MODE (op0), 0, bitsize, 0),
1741 target, 1, OPTAB_LIB_WIDEN);
1742 return op0;
1743 }
1744
1745 /* To extract a signed bit-field, first shift its msb to the msb of the word,
1746 then arithmetic-shift its lsb to the lsb of the word. */
1747 op0 = force_reg (mode, op0);
1748 if (mode != tmode)
1749 target = 0;
1750
1751 /* Find the narrowest integer mode that contains the field. */
1752
1753 for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT); mode != VOIDmode;
1754 mode = GET_MODE_WIDER_MODE (mode))
1755 if (GET_MODE_BITSIZE (mode) >= bitsize + bitpos)
1756 {
1757 op0 = convert_to_mode (mode, op0, 0);
1758 break;
1759 }
1760
1761 if (GET_MODE_BITSIZE (mode) != (bitsize + bitpos))
1762 {
1763 tree amount
1764 = build_int_cst (NULL_TREE,
1765 GET_MODE_BITSIZE (mode) - (bitsize + bitpos));
1766 /* Maybe propagate the target for the shift. */
1767 rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
1768 op0 = expand_shift (LSHIFT_EXPR, mode, op0, amount, subtarget, 1);
1769 }
1770
1771 return expand_shift (RSHIFT_EXPR, mode, op0,
1772 build_int_cst (NULL_TREE,
1773 GET_MODE_BITSIZE (mode) - bitsize),
1774 target, 0);
1775 }
1776 \f
1777 /* Return a constant integer (CONST_INT or CONST_DOUBLE) mask value
1778 of mode MODE with BITSIZE ones followed by BITPOS zeros, or the
1779 complement of that if COMPLEMENT. The mask is truncated if
1780 necessary to the width of mode MODE. The mask is zero-extended if
1781 BITSIZE+BITPOS is too small for MODE. */
1782
1783 static rtx
1784 mask_rtx (enum machine_mode mode, int bitpos, int bitsize, int complement)
1785 {
1786 HOST_WIDE_INT masklow, maskhigh;
1787
1788 if (bitsize == 0)
1789 masklow = 0;
1790 else if (bitpos < HOST_BITS_PER_WIDE_INT)
1791 masklow = (HOST_WIDE_INT) -1 << bitpos;
1792 else
1793 masklow = 0;
1794
1795 if (bitpos + bitsize < HOST_BITS_PER_WIDE_INT)
1796 masklow &= ((unsigned HOST_WIDE_INT) -1
1797 >> (HOST_BITS_PER_WIDE_INT - bitpos - bitsize));
1798
1799 if (bitpos <= HOST_BITS_PER_WIDE_INT)
1800 maskhigh = -1;
1801 else
1802 maskhigh = (HOST_WIDE_INT) -1 << (bitpos - HOST_BITS_PER_WIDE_INT);
1803
1804 if (bitsize == 0)
1805 maskhigh = 0;
1806 else if (bitpos + bitsize > HOST_BITS_PER_WIDE_INT)
1807 maskhigh &= ((unsigned HOST_WIDE_INT) -1
1808 >> (2 * HOST_BITS_PER_WIDE_INT - bitpos - bitsize));
1809 else
1810 maskhigh = 0;
1811
1812 if (complement)
1813 {
1814 maskhigh = ~maskhigh;
1815 masklow = ~masklow;
1816 }
1817
1818 return immed_double_const (masklow, maskhigh, mode);
1819 }
1820
1821 /* Return a constant integer (CONST_INT or CONST_DOUBLE) rtx with the value
1822 VALUE truncated to BITSIZE bits and then shifted left BITPOS bits. */
1823
1824 static rtx
1825 lshift_value (enum machine_mode mode, rtx value, int bitpos, int bitsize)
1826 {
1827 unsigned HOST_WIDE_INT v = INTVAL (value);
1828 HOST_WIDE_INT low, high;
1829
1830 if (bitsize < HOST_BITS_PER_WIDE_INT)
1831 v &= ~((HOST_WIDE_INT) -1 << bitsize);
1832
1833 if (bitpos < HOST_BITS_PER_WIDE_INT)
1834 {
1835 low = v << bitpos;
1836 high = (bitpos > 0 ? (v >> (HOST_BITS_PER_WIDE_INT - bitpos)) : 0);
1837 }
1838 else
1839 {
1840 low = 0;
1841 high = v << (bitpos - HOST_BITS_PER_WIDE_INT);
1842 }
1843
1844 return immed_double_const (low, high, mode);
1845 }
1846 \f
1847 /* Extract a bit field that is split across two words
1848 and return an RTX for the result.
1849
1850 OP0 is the REG, SUBREG or MEM rtx for the first of the two words.
1851 BITSIZE is the field width; BITPOS, position of its first bit, in the word.
1852 UNSIGNEDP is 1 if should zero-extend the contents; else sign-extend. */
1853
1854 static rtx
1855 extract_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1856 unsigned HOST_WIDE_INT bitpos, int unsignedp)
1857 {
1858 unsigned int unit;
1859 unsigned int bitsdone = 0;
1860 rtx result = NULL_RTX;
1861 int first = 1;
1862
1863 /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1864 much at a time. */
1865 if (REG_P (op0) || GET_CODE (op0) == SUBREG)
1866 unit = BITS_PER_WORD;
1867 else
1868 unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1869
1870 while (bitsdone < bitsize)
1871 {
1872 unsigned HOST_WIDE_INT thissize;
1873 rtx part, word;
1874 unsigned HOST_WIDE_INT thispos;
1875 unsigned HOST_WIDE_INT offset;
1876
1877 offset = (bitpos + bitsdone) / unit;
1878 thispos = (bitpos + bitsdone) % unit;
1879
1880 /* THISSIZE must not overrun a word boundary. Otherwise,
1881 extract_fixed_bit_field will call us again, and we will mutually
1882 recurse forever. */
1883 thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1884 thissize = MIN (thissize, unit - thispos);
1885
1886 /* If OP0 is a register, then handle OFFSET here.
1887
1888 When handling multiword bitfields, extract_bit_field may pass
1889 down a word_mode SUBREG of a larger REG for a bitfield that actually
1890 crosses a word boundary. Thus, for a SUBREG, we must find
1891 the current word starting from the base register. */
1892 if (GET_CODE (op0) == SUBREG)
1893 {
1894 int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset;
1895 word = operand_subword_force (SUBREG_REG (op0), word_offset,
1896 GET_MODE (SUBREG_REG (op0)));
1897 offset = 0;
1898 }
1899 else if (REG_P (op0))
1900 {
1901 word = operand_subword_force (op0, offset, GET_MODE (op0));
1902 offset = 0;
1903 }
1904 else
1905 word = op0;
1906
1907 /* Extract the parts in bit-counting order,
1908 whose meaning is determined by BYTES_PER_UNIT.
1909 OFFSET is in UNITs, and UNIT is in bits.
1910 extract_fixed_bit_field wants offset in bytes. */
1911 part = extract_fixed_bit_field (word_mode, word,
1912 offset * unit / BITS_PER_UNIT,
1913 thissize, thispos, 0, 1);
1914 bitsdone += thissize;
1915
1916 /* Shift this part into place for the result. */
1917 if (BYTES_BIG_ENDIAN)
1918 {
1919 if (bitsize != bitsdone)
1920 part = expand_shift (LSHIFT_EXPR, word_mode, part,
1921 build_int_cst (NULL_TREE, bitsize - bitsdone),
1922 0, 1);
1923 }
1924 else
1925 {
1926 if (bitsdone != thissize)
1927 part = expand_shift (LSHIFT_EXPR, word_mode, part,
1928 build_int_cst (NULL_TREE,
1929 bitsdone - thissize), 0, 1);
1930 }
1931
1932 if (first)
1933 result = part;
1934 else
1935 /* Combine the parts with bitwise or. This works
1936 because we extracted each part as an unsigned bit field. */
1937 result = expand_binop (word_mode, ior_optab, part, result, NULL_RTX, 1,
1938 OPTAB_LIB_WIDEN);
1939
1940 first = 0;
1941 }
1942
1943 /* Unsigned bit field: we are done. */
1944 if (unsignedp)
1945 return result;
1946 /* Signed bit field: sign-extend with two arithmetic shifts. */
1947 result = expand_shift (LSHIFT_EXPR, word_mode, result,
1948 build_int_cst (NULL_TREE, BITS_PER_WORD - bitsize),
1949 NULL_RTX, 0);
1950 return expand_shift (RSHIFT_EXPR, word_mode, result,
1951 build_int_cst (NULL_TREE, BITS_PER_WORD - bitsize),
1952 NULL_RTX, 0);
1953 }
1954 \f
1955 /* Try to read the low bits of SRC as an rvalue of mode MODE, preserving
1956 the bit pattern. SRC_MODE is the mode of SRC; if this is smaller than
1957 MODE, fill the upper bits with zeros. Fail if the layout of either
1958 mode is unknown (as for CC modes) or if the extraction would involve
1959 unprofitable mode punning. Return the value on success, otherwise
1960 return null.
1961
1962 This is different from gen_lowpart* in these respects:
1963
1964 - the returned value must always be considered an rvalue
1965
1966 - when MODE is wider than SRC_MODE, the extraction involves
1967 a zero extension
1968
1969 - when MODE is smaller than SRC_MODE, the extraction involves
1970 a truncation (and is thus subject to TRULY_NOOP_TRUNCATION).
1971
1972 In other words, this routine performs a computation, whereas the
1973 gen_lowpart* routines are conceptually lvalue or rvalue subreg
1974 operations. */
1975
1976 rtx
1977 extract_low_bits (enum machine_mode mode, enum machine_mode src_mode, rtx src)
1978 {
1979 enum machine_mode int_mode, src_int_mode;
1980
1981 if (mode == src_mode)
1982 return src;
1983
1984 if (CONSTANT_P (src))
1985 return simplify_gen_subreg (mode, src, src_mode,
1986 subreg_lowpart_offset (mode, src_mode));
1987
1988 if (GET_MODE_CLASS (mode) == MODE_CC || GET_MODE_CLASS (src_mode) == MODE_CC)
1989 return NULL_RTX;
1990
1991 if (GET_MODE_BITSIZE (mode) == GET_MODE_BITSIZE (src_mode)
1992 && MODES_TIEABLE_P (mode, src_mode))
1993 {
1994 rtx x = gen_lowpart_common (mode, src);
1995 if (x)
1996 return x;
1997 }
1998
1999 src_int_mode = int_mode_for_mode (src_mode);
2000 int_mode = int_mode_for_mode (mode);
2001 if (src_int_mode == BLKmode || int_mode == BLKmode)
2002 return NULL_RTX;
2003
2004 if (!MODES_TIEABLE_P (src_int_mode, src_mode))
2005 return NULL_RTX;
2006 if (!MODES_TIEABLE_P (int_mode, mode))
2007 return NULL_RTX;
2008
2009 src = gen_lowpart (src_int_mode, src);
2010 src = convert_modes (int_mode, src_int_mode, src, true);
2011 src = gen_lowpart (mode, src);
2012 return src;
2013 }
2014 \f
2015 /* Add INC into TARGET. */
2016
2017 void
2018 expand_inc (rtx target, rtx inc)
2019 {
2020 rtx value = expand_binop (GET_MODE (target), add_optab,
2021 target, inc,
2022 target, 0, OPTAB_LIB_WIDEN);
2023 if (value != target)
2024 emit_move_insn (target, value);
2025 }
2026
2027 /* Subtract DEC from TARGET. */
2028
2029 void
2030 expand_dec (rtx target, rtx dec)
2031 {
2032 rtx value = expand_binop (GET_MODE (target), sub_optab,
2033 target, dec,
2034 target, 0, OPTAB_LIB_WIDEN);
2035 if (value != target)
2036 emit_move_insn (target, value);
2037 }
2038 \f
2039 /* Output a shift instruction for expression code CODE,
2040 with SHIFTED being the rtx for the value to shift,
2041 and AMOUNT the tree for the amount to shift by.
2042 Store the result in the rtx TARGET, if that is convenient.
2043 If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2044 Return the rtx for where the value is. */
2045
2046 rtx
2047 expand_shift (enum tree_code code, enum machine_mode mode, rtx shifted,
2048 tree amount, rtx target, int unsignedp)
2049 {
2050 rtx op1, temp = 0;
2051 int left = (code == LSHIFT_EXPR || code == LROTATE_EXPR);
2052 int rotate = (code == LROTATE_EXPR || code == RROTATE_EXPR);
2053 optab lshift_optab = ashl_optab;
2054 optab rshift_arith_optab = ashr_optab;
2055 optab rshift_uns_optab = lshr_optab;
2056 optab lrotate_optab = rotl_optab;
2057 optab rrotate_optab = rotr_optab;
2058 enum machine_mode op1_mode;
2059 int attempt;
2060
2061 op1 = expand_normal (amount);
2062 op1_mode = GET_MODE (op1);
2063
2064 /* Determine whether the shift/rotate amount is a vector, or scalar. If the
2065 shift amount is a vector, use the vector/vector shift patterns. */
2066 if (VECTOR_MODE_P (mode) && VECTOR_MODE_P (op1_mode))
2067 {
2068 lshift_optab = vashl_optab;
2069 rshift_arith_optab = vashr_optab;
2070 rshift_uns_optab = vlshr_optab;
2071 lrotate_optab = vrotl_optab;
2072 rrotate_optab = vrotr_optab;
2073 }
2074
2075 /* Previously detected shift-counts computed by NEGATE_EXPR
2076 and shifted in the other direction; but that does not work
2077 on all machines. */
2078
2079 if (SHIFT_COUNT_TRUNCATED)
2080 {
2081 if (GET_CODE (op1) == CONST_INT
2082 && ((unsigned HOST_WIDE_INT) INTVAL (op1) >=
2083 (unsigned HOST_WIDE_INT) GET_MODE_BITSIZE (mode)))
2084 op1 = GEN_INT ((unsigned HOST_WIDE_INT) INTVAL (op1)
2085 % GET_MODE_BITSIZE (mode));
2086 else if (GET_CODE (op1) == SUBREG
2087 && subreg_lowpart_p (op1))
2088 op1 = SUBREG_REG (op1);
2089 }
2090
2091 if (op1 == const0_rtx)
2092 return shifted;
2093
2094 /* Check whether its cheaper to implement a left shift by a constant
2095 bit count by a sequence of additions. */
2096 if (code == LSHIFT_EXPR
2097 && GET_CODE (op1) == CONST_INT
2098 && INTVAL (op1) > 0
2099 && INTVAL (op1) < GET_MODE_BITSIZE (mode)
2100 && INTVAL (op1) < MAX_BITS_PER_WORD
2101 && shift_cost[mode][INTVAL (op1)] > INTVAL (op1) * add_cost[mode]
2102 && shift_cost[mode][INTVAL (op1)] != MAX_COST)
2103 {
2104 int i;
2105 for (i = 0; i < INTVAL (op1); i++)
2106 {
2107 temp = force_reg (mode, shifted);
2108 shifted = expand_binop (mode, add_optab, temp, temp, NULL_RTX,
2109 unsignedp, OPTAB_LIB_WIDEN);
2110 }
2111 return shifted;
2112 }
2113
2114 for (attempt = 0; temp == 0 && attempt < 3; attempt++)
2115 {
2116 enum optab_methods methods;
2117
2118 if (attempt == 0)
2119 methods = OPTAB_DIRECT;
2120 else if (attempt == 1)
2121 methods = OPTAB_WIDEN;
2122 else
2123 methods = OPTAB_LIB_WIDEN;
2124
2125 if (rotate)
2126 {
2127 /* Widening does not work for rotation. */
2128 if (methods == OPTAB_WIDEN)
2129 continue;
2130 else if (methods == OPTAB_LIB_WIDEN)
2131 {
2132 /* If we have been unable to open-code this by a rotation,
2133 do it as the IOR of two shifts. I.e., to rotate A
2134 by N bits, compute (A << N) | ((unsigned) A >> (C - N))
2135 where C is the bitsize of A.
2136
2137 It is theoretically possible that the target machine might
2138 not be able to perform either shift and hence we would
2139 be making two libcalls rather than just the one for the
2140 shift (similarly if IOR could not be done). We will allow
2141 this extremely unlikely lossage to avoid complicating the
2142 code below. */
2143
2144 rtx subtarget = target == shifted ? 0 : target;
2145 tree new_amount, other_amount;
2146 rtx temp1;
2147 tree type = TREE_TYPE (amount);
2148 if (GET_MODE (op1) != TYPE_MODE (type)
2149 && GET_MODE (op1) != VOIDmode)
2150 op1 = convert_to_mode (TYPE_MODE (type), op1, 1);
2151 new_amount = make_tree (type, op1);
2152 other_amount
2153 = fold_build2 (MINUS_EXPR, type,
2154 build_int_cst (type, GET_MODE_BITSIZE (mode)),
2155 new_amount);
2156
2157 shifted = force_reg (mode, shifted);
2158
2159 temp = expand_shift (left ? LSHIFT_EXPR : RSHIFT_EXPR,
2160 mode, shifted, new_amount, 0, 1);
2161 temp1 = expand_shift (left ? RSHIFT_EXPR : LSHIFT_EXPR,
2162 mode, shifted, other_amount, subtarget, 1);
2163 return expand_binop (mode, ior_optab, temp, temp1, target,
2164 unsignedp, methods);
2165 }
2166
2167 temp = expand_binop (mode,
2168 left ? lrotate_optab : rrotate_optab,
2169 shifted, op1, target, unsignedp, methods);
2170 }
2171 else if (unsignedp)
2172 temp = expand_binop (mode,
2173 left ? lshift_optab : rshift_uns_optab,
2174 shifted, op1, target, unsignedp, methods);
2175
2176 /* Do arithmetic shifts.
2177 Also, if we are going to widen the operand, we can just as well
2178 use an arithmetic right-shift instead of a logical one. */
2179 if (temp == 0 && ! rotate
2180 && (! unsignedp || (! left && methods == OPTAB_WIDEN)))
2181 {
2182 enum optab_methods methods1 = methods;
2183
2184 /* If trying to widen a log shift to an arithmetic shift,
2185 don't accept an arithmetic shift of the same size. */
2186 if (unsignedp)
2187 methods1 = OPTAB_MUST_WIDEN;
2188
2189 /* Arithmetic shift */
2190
2191 temp = expand_binop (mode,
2192 left ? lshift_optab : rshift_arith_optab,
2193 shifted, op1, target, unsignedp, methods1);
2194 }
2195
2196 /* We used to try extzv here for logical right shifts, but that was
2197 only useful for one machine, the VAX, and caused poor code
2198 generation there for lshrdi3, so the code was deleted and a
2199 define_expand for lshrsi3 was added to vax.md. */
2200 }
2201
2202 gcc_assert (temp);
2203 return temp;
2204 }
2205 \f
2206 enum alg_code {
2207 alg_unknown,
2208 alg_zero,
2209 alg_m, alg_shift,
2210 alg_add_t_m2,
2211 alg_sub_t_m2,
2212 alg_add_factor,
2213 alg_sub_factor,
2214 alg_add_t2_m,
2215 alg_sub_t2_m,
2216 alg_impossible
2217 };
2218
2219 /* This structure holds the "cost" of a multiply sequence. The
2220 "cost" field holds the total rtx_cost of every operator in the
2221 synthetic multiplication sequence, hence cost(a op b) is defined
2222 as rtx_cost(op) + cost(a) + cost(b), where cost(leaf) is zero.
2223 The "latency" field holds the minimum possible latency of the
2224 synthetic multiply, on a hypothetical infinitely parallel CPU.
2225 This is the critical path, or the maximum height, of the expression
2226 tree which is the sum of rtx_costs on the most expensive path from
2227 any leaf to the root. Hence latency(a op b) is defined as zero for
2228 leaves and rtx_cost(op) + max(latency(a), latency(b)) otherwise. */
2229
2230 struct mult_cost {
2231 short cost; /* Total rtx_cost of the multiplication sequence. */
2232 short latency; /* The latency of the multiplication sequence. */
2233 };
2234
2235 /* This macro is used to compare a pointer to a mult_cost against an
2236 single integer "rtx_cost" value. This is equivalent to the macro
2237 CHEAPER_MULT_COST(X,Z) where Z = {Y,Y}. */
2238 #define MULT_COST_LESS(X,Y) ((X)->cost < (Y) \
2239 || ((X)->cost == (Y) && (X)->latency < (Y)))
2240
2241 /* This macro is used to compare two pointers to mult_costs against
2242 each other. The macro returns true if X is cheaper than Y.
2243 Currently, the cheaper of two mult_costs is the one with the
2244 lower "cost". If "cost"s are tied, the lower latency is cheaper. */
2245 #define CHEAPER_MULT_COST(X,Y) ((X)->cost < (Y)->cost \
2246 || ((X)->cost == (Y)->cost \
2247 && (X)->latency < (Y)->latency))
2248
2249 /* This structure records a sequence of operations.
2250 `ops' is the number of operations recorded.
2251 `cost' is their total cost.
2252 The operations are stored in `op' and the corresponding
2253 logarithms of the integer coefficients in `log'.
2254
2255 These are the operations:
2256 alg_zero total := 0;
2257 alg_m total := multiplicand;
2258 alg_shift total := total * coeff
2259 alg_add_t_m2 total := total + multiplicand * coeff;
2260 alg_sub_t_m2 total := total - multiplicand * coeff;
2261 alg_add_factor total := total * coeff + total;
2262 alg_sub_factor total := total * coeff - total;
2263 alg_add_t2_m total := total * coeff + multiplicand;
2264 alg_sub_t2_m total := total * coeff - multiplicand;
2265
2266 The first operand must be either alg_zero or alg_m. */
2267
2268 struct algorithm
2269 {
2270 struct mult_cost cost;
2271 short ops;
2272 /* The size of the OP and LOG fields are not directly related to the
2273 word size, but the worst-case algorithms will be if we have few
2274 consecutive ones or zeros, i.e., a multiplicand like 10101010101...
2275 In that case we will generate shift-by-2, add, shift-by-2, add,...,
2276 in total wordsize operations. */
2277 enum alg_code op[MAX_BITS_PER_WORD];
2278 char log[MAX_BITS_PER_WORD];
2279 };
2280
2281 /* The entry for our multiplication cache/hash table. */
2282 struct alg_hash_entry {
2283 /* The number we are multiplying by. */
2284 unsigned HOST_WIDE_INT t;
2285
2286 /* The mode in which we are multiplying something by T. */
2287 enum machine_mode mode;
2288
2289 /* The best multiplication algorithm for t. */
2290 enum alg_code alg;
2291
2292 /* The cost of multiplication if ALG_CODE is not alg_impossible.
2293 Otherwise, the cost within which multiplication by T is
2294 impossible. */
2295 struct mult_cost cost;
2296 };
2297
2298 /* The number of cache/hash entries. */
2299 #if HOST_BITS_PER_WIDE_INT == 64
2300 #define NUM_ALG_HASH_ENTRIES 1031
2301 #else
2302 #define NUM_ALG_HASH_ENTRIES 307
2303 #endif
2304
2305 /* Each entry of ALG_HASH caches alg_code for some integer. This is
2306 actually a hash table. If we have a collision, that the older
2307 entry is kicked out. */
2308 static struct alg_hash_entry alg_hash[NUM_ALG_HASH_ENTRIES];
2309
2310 /* Indicates the type of fixup needed after a constant multiplication.
2311 BASIC_VARIANT means no fixup is needed, NEGATE_VARIANT means that
2312 the result should be negated, and ADD_VARIANT means that the
2313 multiplicand should be added to the result. */
2314 enum mult_variant {basic_variant, negate_variant, add_variant};
2315
2316 static void synth_mult (struct algorithm *, unsigned HOST_WIDE_INT,
2317 const struct mult_cost *, enum machine_mode mode);
2318 static bool choose_mult_variant (enum machine_mode, HOST_WIDE_INT,
2319 struct algorithm *, enum mult_variant *, int);
2320 static rtx expand_mult_const (enum machine_mode, rtx, HOST_WIDE_INT, rtx,
2321 const struct algorithm *, enum mult_variant);
2322 static unsigned HOST_WIDE_INT choose_multiplier (unsigned HOST_WIDE_INT, int,
2323 int, rtx *, int *, int *);
2324 static unsigned HOST_WIDE_INT invert_mod2n (unsigned HOST_WIDE_INT, int);
2325 static rtx extract_high_half (enum machine_mode, rtx);
2326 static rtx expand_mult_highpart (enum machine_mode, rtx, rtx, rtx, int, int);
2327 static rtx expand_mult_highpart_optab (enum machine_mode, rtx, rtx, rtx,
2328 int, int);
2329 /* Compute and return the best algorithm for multiplying by T.
2330 The algorithm must cost less than cost_limit
2331 If retval.cost >= COST_LIMIT, no algorithm was found and all
2332 other field of the returned struct are undefined.
2333 MODE is the machine mode of the multiplication. */
2334
2335 static void
2336 synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
2337 const struct mult_cost *cost_limit, enum machine_mode mode)
2338 {
2339 int m;
2340 struct algorithm *alg_in, *best_alg;
2341 struct mult_cost best_cost;
2342 struct mult_cost new_limit;
2343 int op_cost, op_latency;
2344 unsigned HOST_WIDE_INT q;
2345 int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode));
2346 int hash_index;
2347 bool cache_hit = false;
2348 enum alg_code cache_alg = alg_zero;
2349
2350 /* Indicate that no algorithm is yet found. If no algorithm
2351 is found, this value will be returned and indicate failure. */
2352 alg_out->cost.cost = cost_limit->cost + 1;
2353 alg_out->cost.latency = cost_limit->latency + 1;
2354
2355 if (cost_limit->cost < 0
2356 || (cost_limit->cost == 0 && cost_limit->latency <= 0))
2357 return;
2358
2359 /* Restrict the bits of "t" to the multiplication's mode. */
2360 t &= GET_MODE_MASK (mode);
2361
2362 /* t == 1 can be done in zero cost. */
2363 if (t == 1)
2364 {
2365 alg_out->ops = 1;
2366 alg_out->cost.cost = 0;
2367 alg_out->cost.latency = 0;
2368 alg_out->op[0] = alg_m;
2369 return;
2370 }
2371
2372 /* t == 0 sometimes has a cost. If it does and it exceeds our limit,
2373 fail now. */
2374 if (t == 0)
2375 {
2376 if (MULT_COST_LESS (cost_limit, zero_cost))
2377 return;
2378 else
2379 {
2380 alg_out->ops = 1;
2381 alg_out->cost.cost = zero_cost;
2382 alg_out->cost.latency = zero_cost;
2383 alg_out->op[0] = alg_zero;
2384 return;
2385 }
2386 }
2387
2388 /* We'll be needing a couple extra algorithm structures now. */
2389
2390 alg_in = XALLOCA (struct algorithm);
2391 best_alg = XALLOCA (struct algorithm);
2392 best_cost = *cost_limit;
2393
2394 /* Compute the hash index. */
2395 hash_index = (t ^ (unsigned int) mode) % NUM_ALG_HASH_ENTRIES;
2396
2397 /* See if we already know what to do for T. */
2398 if (alg_hash[hash_index].t == t
2399 && alg_hash[hash_index].mode == mode
2400 && alg_hash[hash_index].alg != alg_unknown)
2401 {
2402 cache_alg = alg_hash[hash_index].alg;
2403
2404 if (cache_alg == alg_impossible)
2405 {
2406 /* The cache tells us that it's impossible to synthesize
2407 multiplication by T within alg_hash[hash_index].cost. */
2408 if (!CHEAPER_MULT_COST (&alg_hash[hash_index].cost, cost_limit))
2409 /* COST_LIMIT is at least as restrictive as the one
2410 recorded in the hash table, in which case we have no
2411 hope of synthesizing a multiplication. Just
2412 return. */
2413 return;
2414
2415 /* If we get here, COST_LIMIT is less restrictive than the
2416 one recorded in the hash table, so we may be able to
2417 synthesize a multiplication. Proceed as if we didn't
2418 have the cache entry. */
2419 }
2420 else
2421 {
2422 if (CHEAPER_MULT_COST (cost_limit, &alg_hash[hash_index].cost))
2423 /* The cached algorithm shows that this multiplication
2424 requires more cost than COST_LIMIT. Just return. This
2425 way, we don't clobber this cache entry with
2426 alg_impossible but retain useful information. */
2427 return;
2428
2429 cache_hit = true;
2430
2431 switch (cache_alg)
2432 {
2433 case alg_shift:
2434 goto do_alg_shift;
2435
2436 case alg_add_t_m2:
2437 case alg_sub_t_m2:
2438 goto do_alg_addsub_t_m2;
2439
2440 case alg_add_factor:
2441 case alg_sub_factor:
2442 goto do_alg_addsub_factor;
2443
2444 case alg_add_t2_m:
2445 goto do_alg_add_t2_m;
2446
2447 case alg_sub_t2_m:
2448 goto do_alg_sub_t2_m;
2449
2450 default:
2451 gcc_unreachable ();
2452 }
2453 }
2454 }
2455
2456 /* If we have a group of zero bits at the low-order part of T, try
2457 multiplying by the remaining bits and then doing a shift. */
2458
2459 if ((t & 1) == 0)
2460 {
2461 do_alg_shift:
2462 m = floor_log2 (t & -t); /* m = number of low zero bits */
2463 if (m < maxm)
2464 {
2465 q = t >> m;
2466 /* The function expand_shift will choose between a shift and
2467 a sequence of additions, so the observed cost is given as
2468 MIN (m * add_cost[mode], shift_cost[mode][m]). */
2469 op_cost = m * add_cost[mode];
2470 if (shift_cost[mode][m] < op_cost)
2471 op_cost = shift_cost[mode][m];
2472 new_limit.cost = best_cost.cost - op_cost;
2473 new_limit.latency = best_cost.latency - op_cost;
2474 synth_mult (alg_in, q, &new_limit, mode);
2475
2476 alg_in->cost.cost += op_cost;
2477 alg_in->cost.latency += op_cost;
2478 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2479 {
2480 struct algorithm *x;
2481 best_cost = alg_in->cost;
2482 x = alg_in, alg_in = best_alg, best_alg = x;
2483 best_alg->log[best_alg->ops] = m;
2484 best_alg->op[best_alg->ops] = alg_shift;
2485 }
2486 }
2487 if (cache_hit)
2488 goto done;
2489 }
2490
2491 /* If we have an odd number, add or subtract one. */
2492 if ((t & 1) != 0)
2493 {
2494 unsigned HOST_WIDE_INT w;
2495
2496 do_alg_addsub_t_m2:
2497 for (w = 1; (w & t) != 0; w <<= 1)
2498 ;
2499 /* If T was -1, then W will be zero after the loop. This is another
2500 case where T ends with ...111. Handling this with (T + 1) and
2501 subtract 1 produces slightly better code and results in algorithm
2502 selection much faster than treating it like the ...0111 case
2503 below. */
2504 if (w == 0
2505 || (w > 2
2506 /* Reject the case where t is 3.
2507 Thus we prefer addition in that case. */
2508 && t != 3))
2509 {
2510 /* T ends with ...111. Multiply by (T + 1) and subtract 1. */
2511
2512 op_cost = add_cost[mode];
2513 new_limit.cost = best_cost.cost - op_cost;
2514 new_limit.latency = best_cost.latency - op_cost;
2515 synth_mult (alg_in, t + 1, &new_limit, mode);
2516
2517 alg_in->cost.cost += op_cost;
2518 alg_in->cost.latency += op_cost;
2519 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2520 {
2521 struct algorithm *x;
2522 best_cost = alg_in->cost;
2523 x = alg_in, alg_in = best_alg, best_alg = x;
2524 best_alg->log[best_alg->ops] = 0;
2525 best_alg->op[best_alg->ops] = alg_sub_t_m2;
2526 }
2527 }
2528 else
2529 {
2530 /* T ends with ...01 or ...011. Multiply by (T - 1) and add 1. */
2531
2532 op_cost = add_cost[mode];
2533 new_limit.cost = best_cost.cost - op_cost;
2534 new_limit.latency = best_cost.latency - op_cost;
2535 synth_mult (alg_in, t - 1, &new_limit, mode);
2536
2537 alg_in->cost.cost += op_cost;
2538 alg_in->cost.latency += op_cost;
2539 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2540 {
2541 struct algorithm *x;
2542 best_cost = alg_in->cost;
2543 x = alg_in, alg_in = best_alg, best_alg = x;
2544 best_alg->log[best_alg->ops] = 0;
2545 best_alg->op[best_alg->ops] = alg_add_t_m2;
2546 }
2547 }
2548 if (cache_hit)
2549 goto done;
2550 }
2551
2552 /* Look for factors of t of the form
2553 t = q(2**m +- 1), 2 <= m <= floor(log2(t - 1)).
2554 If we find such a factor, we can multiply by t using an algorithm that
2555 multiplies by q, shift the result by m and add/subtract it to itself.
2556
2557 We search for large factors first and loop down, even if large factors
2558 are less probable than small; if we find a large factor we will find a
2559 good sequence quickly, and therefore be able to prune (by decreasing
2560 COST_LIMIT) the search. */
2561
2562 do_alg_addsub_factor:
2563 for (m = floor_log2 (t - 1); m >= 2; m--)
2564 {
2565 unsigned HOST_WIDE_INT d;
2566
2567 d = ((unsigned HOST_WIDE_INT) 1 << m) + 1;
2568 if (t % d == 0 && t > d && m < maxm
2569 && (!cache_hit || cache_alg == alg_add_factor))
2570 {
2571 /* If the target has a cheap shift-and-add instruction use
2572 that in preference to a shift insn followed by an add insn.
2573 Assume that the shift-and-add is "atomic" with a latency
2574 equal to its cost, otherwise assume that on superscalar
2575 hardware the shift may be executed concurrently with the
2576 earlier steps in the algorithm. */
2577 op_cost = add_cost[mode] + shift_cost[mode][m];
2578 if (shiftadd_cost[mode][m] < op_cost)
2579 {
2580 op_cost = shiftadd_cost[mode][m];
2581 op_latency = op_cost;
2582 }
2583 else
2584 op_latency = add_cost[mode];
2585
2586 new_limit.cost = best_cost.cost - op_cost;
2587 new_limit.latency = best_cost.latency - op_latency;
2588 synth_mult (alg_in, t / d, &new_limit, mode);
2589
2590 alg_in->cost.cost += op_cost;
2591 alg_in->cost.latency += op_latency;
2592 if (alg_in->cost.latency < op_cost)
2593 alg_in->cost.latency = op_cost;
2594 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2595 {
2596 struct algorithm *x;
2597 best_cost = alg_in->cost;
2598 x = alg_in, alg_in = best_alg, best_alg = x;
2599 best_alg->log[best_alg->ops] = m;
2600 best_alg->op[best_alg->ops] = alg_add_factor;
2601 }
2602 /* Other factors will have been taken care of in the recursion. */
2603 break;
2604 }
2605
2606 d = ((unsigned HOST_WIDE_INT) 1 << m) - 1;
2607 if (t % d == 0 && t > d && m < maxm
2608 && (!cache_hit || cache_alg == alg_sub_factor))
2609 {
2610 /* If the target has a cheap shift-and-subtract insn use
2611 that in preference to a shift insn followed by a sub insn.
2612 Assume that the shift-and-sub is "atomic" with a latency
2613 equal to it's cost, otherwise assume that on superscalar
2614 hardware the shift may be executed concurrently with the
2615 earlier steps in the algorithm. */
2616 op_cost = add_cost[mode] + shift_cost[mode][m];
2617 if (shiftsub_cost[mode][m] < op_cost)
2618 {
2619 op_cost = shiftsub_cost[mode][m];
2620 op_latency = op_cost;
2621 }
2622 else
2623 op_latency = add_cost[mode];
2624
2625 new_limit.cost = best_cost.cost - op_cost;
2626 new_limit.latency = best_cost.latency - op_latency;
2627 synth_mult (alg_in, t / d, &new_limit, mode);
2628
2629 alg_in->cost.cost += op_cost;
2630 alg_in->cost.latency += op_latency;
2631 if (alg_in->cost.latency < op_cost)
2632 alg_in->cost.latency = op_cost;
2633 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2634 {
2635 struct algorithm *x;
2636 best_cost = alg_in->cost;
2637 x = alg_in, alg_in = best_alg, best_alg = x;
2638 best_alg->log[best_alg->ops] = m;
2639 best_alg->op[best_alg->ops] = alg_sub_factor;
2640 }
2641 break;
2642 }
2643 }
2644 if (cache_hit)
2645 goto done;
2646
2647 /* Try shift-and-add (load effective address) instructions,
2648 i.e. do a*3, a*5, a*9. */
2649 if ((t & 1) != 0)
2650 {
2651 do_alg_add_t2_m:
2652 q = t - 1;
2653 q = q & -q;
2654 m = exact_log2 (q);
2655 if (m >= 0 && m < maxm)
2656 {
2657 op_cost = shiftadd_cost[mode][m];
2658 new_limit.cost = best_cost.cost - op_cost;
2659 new_limit.latency = best_cost.latency - op_cost;
2660 synth_mult (alg_in, (t - 1) >> m, &new_limit, mode);
2661
2662 alg_in->cost.cost += op_cost;
2663 alg_in->cost.latency += op_cost;
2664 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2665 {
2666 struct algorithm *x;
2667 best_cost = alg_in->cost;
2668 x = alg_in, alg_in = best_alg, best_alg = x;
2669 best_alg->log[best_alg->ops] = m;
2670 best_alg->op[best_alg->ops] = alg_add_t2_m;
2671 }
2672 }
2673 if (cache_hit)
2674 goto done;
2675
2676 do_alg_sub_t2_m:
2677 q = t + 1;
2678 q = q & -q;
2679 m = exact_log2 (q);
2680 if (m >= 0 && m < maxm)
2681 {
2682 op_cost = shiftsub_cost[mode][m];
2683 new_limit.cost = best_cost.cost - op_cost;
2684 new_limit.latency = best_cost.latency - op_cost;
2685 synth_mult (alg_in, (t + 1) >> m, &new_limit, mode);
2686
2687 alg_in->cost.cost += op_cost;
2688 alg_in->cost.latency += op_cost;
2689 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2690 {
2691 struct algorithm *x;
2692 best_cost = alg_in->cost;
2693 x = alg_in, alg_in = best_alg, best_alg = x;
2694 best_alg->log[best_alg->ops] = m;
2695 best_alg->op[best_alg->ops] = alg_sub_t2_m;
2696 }
2697 }
2698 if (cache_hit)
2699 goto done;
2700 }
2701
2702 done:
2703 /* If best_cost has not decreased, we have not found any algorithm. */
2704 if (!CHEAPER_MULT_COST (&best_cost, cost_limit))
2705 {
2706 /* We failed to find an algorithm. Record alg_impossible for
2707 this case (that is, <T, MODE, COST_LIMIT>) so that next time
2708 we are asked to find an algorithm for T within the same or
2709 lower COST_LIMIT, we can immediately return to the
2710 caller. */
2711 alg_hash[hash_index].t = t;
2712 alg_hash[hash_index].mode = mode;
2713 alg_hash[hash_index].alg = alg_impossible;
2714 alg_hash[hash_index].cost = *cost_limit;
2715 return;
2716 }
2717
2718 /* Cache the result. */
2719 if (!cache_hit)
2720 {
2721 alg_hash[hash_index].t = t;
2722 alg_hash[hash_index].mode = mode;
2723 alg_hash[hash_index].alg = best_alg->op[best_alg->ops];
2724 alg_hash[hash_index].cost.cost = best_cost.cost;
2725 alg_hash[hash_index].cost.latency = best_cost.latency;
2726 }
2727
2728 /* If we are getting a too long sequence for `struct algorithm'
2729 to record, make this search fail. */
2730 if (best_alg->ops == MAX_BITS_PER_WORD)
2731 return;
2732
2733 /* Copy the algorithm from temporary space to the space at alg_out.
2734 We avoid using structure assignment because the majority of
2735 best_alg is normally undefined, and this is a critical function. */
2736 alg_out->ops = best_alg->ops + 1;
2737 alg_out->cost = best_cost;
2738 memcpy (alg_out->op, best_alg->op,
2739 alg_out->ops * sizeof *alg_out->op);
2740 memcpy (alg_out->log, best_alg->log,
2741 alg_out->ops * sizeof *alg_out->log);
2742 }
2743 \f
2744 /* Find the cheapest way of multiplying a value of mode MODE by VAL.
2745 Try three variations:
2746
2747 - a shift/add sequence based on VAL itself
2748 - a shift/add sequence based on -VAL, followed by a negation
2749 - a shift/add sequence based on VAL - 1, followed by an addition.
2750
2751 Return true if the cheapest of these cost less than MULT_COST,
2752 describing the algorithm in *ALG and final fixup in *VARIANT. */
2753
2754 static bool
2755 choose_mult_variant (enum machine_mode mode, HOST_WIDE_INT val,
2756 struct algorithm *alg, enum mult_variant *variant,
2757 int mult_cost)
2758 {
2759 struct algorithm alg2;
2760 struct mult_cost limit;
2761 int op_cost;
2762
2763 /* Fail quickly for impossible bounds. */
2764 if (mult_cost < 0)
2765 return false;
2766
2767 /* Ensure that mult_cost provides a reasonable upper bound.
2768 Any constant multiplication can be performed with less
2769 than 2 * bits additions. */
2770 op_cost = 2 * GET_MODE_BITSIZE (mode) * add_cost[mode];
2771 if (mult_cost > op_cost)
2772 mult_cost = op_cost;
2773
2774 *variant = basic_variant;
2775 limit.cost = mult_cost;
2776 limit.latency = mult_cost;
2777 synth_mult (alg, val, &limit, mode);
2778
2779 /* This works only if the inverted value actually fits in an
2780 `unsigned int' */
2781 if (HOST_BITS_PER_INT >= GET_MODE_BITSIZE (mode))
2782 {
2783 op_cost = neg_cost[mode];
2784 if (MULT_COST_LESS (&alg->cost, mult_cost))
2785 {
2786 limit.cost = alg->cost.cost - op_cost;
2787 limit.latency = alg->cost.latency - op_cost;
2788 }
2789 else
2790 {
2791 limit.cost = mult_cost - op_cost;
2792 limit.latency = mult_cost - op_cost;
2793 }
2794
2795 synth_mult (&alg2, -val, &limit, mode);
2796 alg2.cost.cost += op_cost;
2797 alg2.cost.latency += op_cost;
2798 if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
2799 *alg = alg2, *variant = negate_variant;
2800 }
2801
2802 /* This proves very useful for division-by-constant. */
2803 op_cost = add_cost[mode];
2804 if (MULT_COST_LESS (&alg->cost, mult_cost))
2805 {
2806 limit.cost = alg->cost.cost - op_cost;
2807 limit.latency = alg->cost.latency - op_cost;
2808 }
2809 else
2810 {
2811 limit.cost = mult_cost - op_cost;
2812 limit.latency = mult_cost - op_cost;
2813 }
2814
2815 synth_mult (&alg2, val - 1, &limit, mode);
2816 alg2.cost.cost += op_cost;
2817 alg2.cost.latency += op_cost;
2818 if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
2819 *alg = alg2, *variant = add_variant;
2820
2821 return MULT_COST_LESS (&alg->cost, mult_cost);
2822 }
2823
2824 /* A subroutine of expand_mult, used for constant multiplications.
2825 Multiply OP0 by VAL in mode MODE, storing the result in TARGET if
2826 convenient. Use the shift/add sequence described by ALG and apply
2827 the final fixup specified by VARIANT. */
2828
2829 static rtx
2830 expand_mult_const (enum machine_mode mode, rtx op0, HOST_WIDE_INT val,
2831 rtx target, const struct algorithm *alg,
2832 enum mult_variant variant)
2833 {
2834 HOST_WIDE_INT val_so_far;
2835 rtx insn, accum, tem;
2836 int opno;
2837 enum machine_mode nmode;
2838
2839 /* Avoid referencing memory over and over and invalid sharing
2840 on SUBREGs. */
2841 op0 = force_reg (mode, op0);
2842
2843 /* ACCUM starts out either as OP0 or as a zero, depending on
2844 the first operation. */
2845
2846 if (alg->op[0] == alg_zero)
2847 {
2848 accum = copy_to_mode_reg (mode, const0_rtx);
2849 val_so_far = 0;
2850 }
2851 else if (alg->op[0] == alg_m)
2852 {
2853 accum = copy_to_mode_reg (mode, op0);
2854 val_so_far = 1;
2855 }
2856 else
2857 gcc_unreachable ();
2858
2859 for (opno = 1; opno < alg->ops; opno++)
2860 {
2861 int log = alg->log[opno];
2862 rtx shift_subtarget = optimize ? 0 : accum;
2863 rtx add_target
2864 = (opno == alg->ops - 1 && target != 0 && variant != add_variant
2865 && !optimize)
2866 ? target : 0;
2867 rtx accum_target = optimize ? 0 : accum;
2868
2869 switch (alg->op[opno])
2870 {
2871 case alg_shift:
2872 accum = expand_shift (LSHIFT_EXPR, mode, accum,
2873 build_int_cst (NULL_TREE, log),
2874 NULL_RTX, 0);
2875 val_so_far <<= log;
2876 break;
2877
2878 case alg_add_t_m2:
2879 tem = expand_shift (LSHIFT_EXPR, mode, op0,
2880 build_int_cst (NULL_TREE, log),
2881 NULL_RTX, 0);
2882 accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
2883 add_target ? add_target : accum_target);
2884 val_so_far += (HOST_WIDE_INT) 1 << log;
2885 break;
2886
2887 case alg_sub_t_m2:
2888 tem = expand_shift (LSHIFT_EXPR, mode, op0,
2889 build_int_cst (NULL_TREE, log),
2890 NULL_RTX, 0);
2891 accum = force_operand (gen_rtx_MINUS (mode, accum, tem),
2892 add_target ? add_target : accum_target);
2893 val_so_far -= (HOST_WIDE_INT) 1 << log;
2894 break;
2895
2896 case alg_add_t2_m:
2897 accum = expand_shift (LSHIFT_EXPR, mode, accum,
2898 build_int_cst (NULL_TREE, log),
2899 shift_subtarget,
2900 0);
2901 accum = force_operand (gen_rtx_PLUS (mode, accum, op0),
2902 add_target ? add_target : accum_target);
2903 val_so_far = (val_so_far << log) + 1;
2904 break;
2905
2906 case alg_sub_t2_m:
2907 accum = expand_shift (LSHIFT_EXPR, mode, accum,
2908 build_int_cst (NULL_TREE, log),
2909 shift_subtarget, 0);
2910 accum = force_operand (gen_rtx_MINUS (mode, accum, op0),
2911 add_target ? add_target : accum_target);
2912 val_so_far = (val_so_far << log) - 1;
2913 break;
2914
2915 case alg_add_factor:
2916 tem = expand_shift (LSHIFT_EXPR, mode, accum,
2917 build_int_cst (NULL_TREE, log),
2918 NULL_RTX, 0);
2919 accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
2920 add_target ? add_target : accum_target);
2921 val_so_far += val_so_far << log;
2922 break;
2923
2924 case alg_sub_factor:
2925 tem = expand_shift (LSHIFT_EXPR, mode, accum,
2926 build_int_cst (NULL_TREE, log),
2927 NULL_RTX, 0);
2928 accum = force_operand (gen_rtx_MINUS (mode, tem, accum),
2929 (add_target
2930 ? add_target : (optimize ? 0 : tem)));
2931 val_so_far = (val_so_far << log) - val_so_far;
2932 break;
2933
2934 default:
2935 gcc_unreachable ();
2936 }
2937
2938 /* Write a REG_EQUAL note on the last insn so that we can cse
2939 multiplication sequences. Note that if ACCUM is a SUBREG,
2940 we've set the inner register and must properly indicate
2941 that. */
2942
2943 tem = op0, nmode = mode;
2944 if (GET_CODE (accum) == SUBREG)
2945 {
2946 nmode = GET_MODE (SUBREG_REG (accum));
2947 tem = gen_lowpart (nmode, op0);
2948 }
2949
2950 insn = get_last_insn ();
2951 set_unique_reg_note (insn, REG_EQUAL,
2952 gen_rtx_MULT (nmode, tem,
2953 GEN_INT (val_so_far)));
2954 }
2955
2956 if (variant == negate_variant)
2957 {
2958 val_so_far = -val_so_far;
2959 accum = expand_unop (mode, neg_optab, accum, target, 0);
2960 }
2961 else if (variant == add_variant)
2962 {
2963 val_so_far = val_so_far + 1;
2964 accum = force_operand (gen_rtx_PLUS (mode, accum, op0), target);
2965 }
2966
2967 /* Compare only the bits of val and val_so_far that are significant
2968 in the result mode, to avoid sign-/zero-extension confusion. */
2969 val &= GET_MODE_MASK (mode);
2970 val_so_far &= GET_MODE_MASK (mode);
2971 gcc_assert (val == val_so_far);
2972
2973 return accum;
2974 }
2975
2976 /* Perform a multiplication and return an rtx for the result.
2977 MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
2978 TARGET is a suggestion for where to store the result (an rtx).
2979
2980 We check specially for a constant integer as OP1.
2981 If you want this check for OP0 as well, then before calling
2982 you should swap the two operands if OP0 would be constant. */
2983
2984 rtx
2985 expand_mult (enum machine_mode mode, rtx op0, rtx op1, rtx target,
2986 int unsignedp)
2987 {
2988 enum mult_variant variant;
2989 struct algorithm algorithm;
2990 int max_cost;
2991
2992 /* Handling const0_rtx here allows us to use zero as a rogue value for
2993 coeff below. */
2994 if (op1 == const0_rtx)
2995 return const0_rtx;
2996 if (op1 == const1_rtx)
2997 return op0;
2998 if (op1 == constm1_rtx)
2999 return expand_unop (mode,
3000 GET_MODE_CLASS (mode) == MODE_INT
3001 && !unsignedp && flag_trapv
3002 ? negv_optab : neg_optab,
3003 op0, target, 0);
3004
3005 /* These are the operations that are potentially turned into a sequence
3006 of shifts and additions. */
3007 if (SCALAR_INT_MODE_P (mode)
3008 && (unsignedp || !flag_trapv))
3009 {
3010 HOST_WIDE_INT coeff = 0;
3011 rtx fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3012
3013 /* synth_mult does an `unsigned int' multiply. As long as the mode is
3014 less than or equal in size to `unsigned int' this doesn't matter.
3015 If the mode is larger than `unsigned int', then synth_mult works
3016 only if the constant value exactly fits in an `unsigned int' without
3017 any truncation. This means that multiplying by negative values does
3018 not work; results are off by 2^32 on a 32 bit machine. */
3019
3020 if (GET_CODE (op1) == CONST_INT)
3021 {
3022 /* Attempt to handle multiplication of DImode values by negative
3023 coefficients, by performing the multiplication by a positive
3024 multiplier and then inverting the result. */
3025 if (INTVAL (op1) < 0
3026 && GET_MODE_BITSIZE (mode) > HOST_BITS_PER_WIDE_INT)
3027 {
3028 /* Its safe to use -INTVAL (op1) even for INT_MIN, as the
3029 result is interpreted as an unsigned coefficient.
3030 Exclude cost of op0 from max_cost to match the cost
3031 calculation of the synth_mult. */
3032 max_cost = rtx_cost (gen_rtx_MULT (mode, fake_reg, op1), SET)
3033 - neg_cost[mode];
3034 if (max_cost > 0
3035 && choose_mult_variant (mode, -INTVAL (op1), &algorithm,
3036 &variant, max_cost))
3037 {
3038 rtx temp = expand_mult_const (mode, op0, -INTVAL (op1),
3039 NULL_RTX, &algorithm,
3040 variant);
3041 return expand_unop (mode, neg_optab, temp, target, 0);
3042 }
3043 }
3044 else coeff = INTVAL (op1);
3045 }
3046 else if (GET_CODE (op1) == CONST_DOUBLE)
3047 {
3048 /* If we are multiplying in DImode, it may still be a win
3049 to try to work with shifts and adds. */
3050 if (CONST_DOUBLE_HIGH (op1) == 0)
3051 coeff = CONST_DOUBLE_LOW (op1);
3052 else if (CONST_DOUBLE_LOW (op1) == 0
3053 && EXACT_POWER_OF_2_OR_ZERO_P (CONST_DOUBLE_HIGH (op1)))
3054 {
3055 int shift = floor_log2 (CONST_DOUBLE_HIGH (op1))
3056 + HOST_BITS_PER_WIDE_INT;
3057 return expand_shift (LSHIFT_EXPR, mode, op0,
3058 build_int_cst (NULL_TREE, shift),
3059 target, unsignedp);
3060 }
3061 }
3062
3063 /* We used to test optimize here, on the grounds that it's better to
3064 produce a smaller program when -O is not used. But this causes
3065 such a terrible slowdown sometimes that it seems better to always
3066 use synth_mult. */
3067 if (coeff != 0)
3068 {
3069 /* Special case powers of two. */
3070 if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3071 return expand_shift (LSHIFT_EXPR, mode, op0,
3072 build_int_cst (NULL_TREE, floor_log2 (coeff)),
3073 target, unsignedp);
3074
3075 /* Exclude cost of op0 from max_cost to match the cost
3076 calculation of the synth_mult. */
3077 max_cost = rtx_cost (gen_rtx_MULT (mode, fake_reg, op1), SET);
3078 if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3079 max_cost))
3080 return expand_mult_const (mode, op0, coeff, target,
3081 &algorithm, variant);
3082 }
3083 }
3084
3085 if (GET_CODE (op0) == CONST_DOUBLE)
3086 {
3087 rtx temp = op0;
3088 op0 = op1;
3089 op1 = temp;
3090 }
3091
3092 /* Expand x*2.0 as x+x. */
3093 if (GET_CODE (op1) == CONST_DOUBLE
3094 && SCALAR_FLOAT_MODE_P (mode))
3095 {
3096 REAL_VALUE_TYPE d;
3097 REAL_VALUE_FROM_CONST_DOUBLE (d, op1);
3098
3099 if (REAL_VALUES_EQUAL (d, dconst2))
3100 {
3101 op0 = force_reg (GET_MODE (op0), op0);
3102 return expand_binop (mode, add_optab, op0, op0,
3103 target, unsignedp, OPTAB_LIB_WIDEN);
3104 }
3105 }
3106
3107 /* This used to use umul_optab if unsigned, but for non-widening multiply
3108 there is no difference between signed and unsigned. */
3109 op0 = expand_binop (mode,
3110 ! unsignedp
3111 && flag_trapv && (GET_MODE_CLASS(mode) == MODE_INT)
3112 ? smulv_optab : smul_optab,
3113 op0, op1, target, unsignedp, OPTAB_LIB_WIDEN);
3114 gcc_assert (op0);
3115 return op0;
3116 }
3117 \f
3118 /* Return the smallest n such that 2**n >= X. */
3119
3120 int
3121 ceil_log2 (unsigned HOST_WIDE_INT x)
3122 {
3123 return floor_log2 (x - 1) + 1;
3124 }
3125
3126 /* Choose a minimal N + 1 bit approximation to 1/D that can be used to
3127 replace division by D, and put the least significant N bits of the result
3128 in *MULTIPLIER_PTR and return the most significant bit.
3129
3130 The width of operations is N (should be <= HOST_BITS_PER_WIDE_INT), the
3131 needed precision is in PRECISION (should be <= N).
3132
3133 PRECISION should be as small as possible so this function can choose
3134 multiplier more freely.
3135
3136 The rounded-up logarithm of D is placed in *lgup_ptr. A shift count that
3137 is to be used for a final right shift is placed in *POST_SHIFT_PTR.
3138
3139 Using this function, x/D will be equal to (x * m) >> (*POST_SHIFT_PTR),
3140 where m is the full HOST_BITS_PER_WIDE_INT + 1 bit multiplier. */
3141
3142 static
3143 unsigned HOST_WIDE_INT
3144 choose_multiplier (unsigned HOST_WIDE_INT d, int n, int precision,
3145 rtx *multiplier_ptr, int *post_shift_ptr, int *lgup_ptr)
3146 {
3147 HOST_WIDE_INT mhigh_hi, mlow_hi;
3148 unsigned HOST_WIDE_INT mhigh_lo, mlow_lo;
3149 int lgup, post_shift;
3150 int pow, pow2;
3151 unsigned HOST_WIDE_INT nl, dummy1;
3152 HOST_WIDE_INT nh, dummy2;
3153
3154 /* lgup = ceil(log2(divisor)); */
3155 lgup = ceil_log2 (d);
3156
3157 gcc_assert (lgup <= n);
3158
3159 pow = n + lgup;
3160 pow2 = n + lgup - precision;
3161
3162 /* We could handle this with some effort, but this case is much
3163 better handled directly with a scc insn, so rely on caller using
3164 that. */
3165 gcc_assert (pow != 2 * HOST_BITS_PER_WIDE_INT);
3166
3167 /* mlow = 2^(N + lgup)/d */
3168 if (pow >= HOST_BITS_PER_WIDE_INT)
3169 {
3170 nh = (HOST_WIDE_INT) 1 << (pow - HOST_BITS_PER_WIDE_INT);
3171 nl = 0;
3172 }
3173 else
3174 {
3175 nh = 0;
3176 nl = (unsigned HOST_WIDE_INT) 1 << pow;
3177 }
3178 div_and_round_double (TRUNC_DIV_EXPR, 1, nl, nh, d, (HOST_WIDE_INT) 0,
3179 &mlow_lo, &mlow_hi, &dummy1, &dummy2);
3180
3181 /* mhigh = (2^(N + lgup) + 2^N + lgup - precision)/d */
3182 if (pow2 >= HOST_BITS_PER_WIDE_INT)
3183 nh |= (HOST_WIDE_INT) 1 << (pow2 - HOST_BITS_PER_WIDE_INT);
3184 else
3185 nl |= (unsigned HOST_WIDE_INT) 1 << pow2;
3186 div_and_round_double (TRUNC_DIV_EXPR, 1, nl, nh, d, (HOST_WIDE_INT) 0,
3187 &mhigh_lo, &mhigh_hi, &dummy1, &dummy2);
3188
3189 gcc_assert (!mhigh_hi || nh - d < d);
3190 gcc_assert (mhigh_hi <= 1 && mlow_hi <= 1);
3191 /* Assert that mlow < mhigh. */
3192 gcc_assert (mlow_hi < mhigh_hi
3193 || (mlow_hi == mhigh_hi && mlow_lo < mhigh_lo));
3194
3195 /* If precision == N, then mlow, mhigh exceed 2^N
3196 (but they do not exceed 2^(N+1)). */
3197
3198 /* Reduce to lowest terms. */
3199 for (post_shift = lgup; post_shift > 0; post_shift--)
3200 {
3201 unsigned HOST_WIDE_INT ml_lo = (mlow_hi << (HOST_BITS_PER_WIDE_INT - 1)) | (mlow_lo >> 1);
3202 unsigned HOST_WIDE_INT mh_lo = (mhigh_hi << (HOST_BITS_PER_WIDE_INT - 1)) | (mhigh_lo >> 1);
3203 if (ml_lo >= mh_lo)
3204 break;
3205
3206 mlow_hi = 0;
3207 mlow_lo = ml_lo;
3208 mhigh_hi = 0;
3209 mhigh_lo = mh_lo;
3210 }
3211
3212 *post_shift_ptr = post_shift;
3213 *lgup_ptr = lgup;
3214 if (n < HOST_BITS_PER_WIDE_INT)
3215 {
3216 unsigned HOST_WIDE_INT mask = ((unsigned HOST_WIDE_INT) 1 << n) - 1;
3217 *multiplier_ptr = GEN_INT (mhigh_lo & mask);
3218 return mhigh_lo >= mask;
3219 }
3220 else
3221 {
3222 *multiplier_ptr = GEN_INT (mhigh_lo);
3223 return mhigh_hi;
3224 }
3225 }
3226
3227 /* Compute the inverse of X mod 2**n, i.e., find Y such that X * Y is
3228 congruent to 1 (mod 2**N). */
3229
3230 static unsigned HOST_WIDE_INT
3231 invert_mod2n (unsigned HOST_WIDE_INT x, int n)
3232 {
3233 /* Solve x*y == 1 (mod 2^n), where x is odd. Return y. */
3234
3235 /* The algorithm notes that the choice y = x satisfies
3236 x*y == 1 mod 2^3, since x is assumed odd.
3237 Each iteration doubles the number of bits of significance in y. */
3238
3239 unsigned HOST_WIDE_INT mask;
3240 unsigned HOST_WIDE_INT y = x;
3241 int nbit = 3;
3242
3243 mask = (n == HOST_BITS_PER_WIDE_INT
3244 ? ~(unsigned HOST_WIDE_INT) 0
3245 : ((unsigned HOST_WIDE_INT) 1 << n) - 1);
3246
3247 while (nbit < n)
3248 {
3249 y = y * (2 - x*y) & mask; /* Modulo 2^N */
3250 nbit *= 2;
3251 }
3252 return y;
3253 }
3254
3255 /* Emit code to adjust ADJ_OPERAND after multiplication of wrong signedness
3256 flavor of OP0 and OP1. ADJ_OPERAND is already the high half of the
3257 product OP0 x OP1. If UNSIGNEDP is nonzero, adjust the signed product
3258 to become unsigned, if UNSIGNEDP is zero, adjust the unsigned product to
3259 become signed.
3260
3261 The result is put in TARGET if that is convenient.
3262
3263 MODE is the mode of operation. */
3264
3265 rtx
3266 expand_mult_highpart_adjust (enum machine_mode mode, rtx adj_operand, rtx op0,
3267 rtx op1, rtx target, int unsignedp)
3268 {
3269 rtx tem;
3270 enum rtx_code adj_code = unsignedp ? PLUS : MINUS;
3271
3272 tem = expand_shift (RSHIFT_EXPR, mode, op0,
3273 build_int_cst (NULL_TREE, GET_MODE_BITSIZE (mode) - 1),
3274 NULL_RTX, 0);
3275 tem = expand_and (mode, tem, op1, NULL_RTX);
3276 adj_operand
3277 = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3278 adj_operand);
3279
3280 tem = expand_shift (RSHIFT_EXPR, mode, op1,
3281 build_int_cst (NULL_TREE, GET_MODE_BITSIZE (mode) - 1),
3282 NULL_RTX, 0);
3283 tem = expand_and (mode, tem, op0, NULL_RTX);
3284 target = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3285 target);
3286
3287 return target;
3288 }
3289
3290 /* Subroutine of expand_mult_highpart. Return the MODE high part of OP. */
3291
3292 static rtx
3293 extract_high_half (enum machine_mode mode, rtx op)
3294 {
3295 enum machine_mode wider_mode;
3296
3297 if (mode == word_mode)
3298 return gen_highpart (mode, op);
3299
3300 gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3301
3302 wider_mode = GET_MODE_WIDER_MODE (mode);
3303 op = expand_shift (RSHIFT_EXPR, wider_mode, op,
3304 build_int_cst (NULL_TREE, GET_MODE_BITSIZE (mode)), 0, 1);
3305 return convert_modes (mode, wider_mode, op, 0);
3306 }
3307
3308 /* Like expand_mult_highpart, but only consider using a multiplication
3309 optab. OP1 is an rtx for the constant operand. */
3310
3311 static rtx
3312 expand_mult_highpart_optab (enum machine_mode mode, rtx op0, rtx op1,
3313 rtx target, int unsignedp, int max_cost)
3314 {
3315 rtx narrow_op1 = gen_int_mode (INTVAL (op1), mode);
3316 enum machine_mode wider_mode;
3317 optab moptab;
3318 rtx tem;
3319 int size;
3320
3321 gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3322
3323 wider_mode = GET_MODE_WIDER_MODE (mode);
3324 size = GET_MODE_BITSIZE (mode);
3325
3326 /* Firstly, try using a multiplication insn that only generates the needed
3327 high part of the product, and in the sign flavor of unsignedp. */
3328 if (mul_highpart_cost[mode] < max_cost)
3329 {
3330 moptab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
3331 tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3332 unsignedp, OPTAB_DIRECT);
3333 if (tem)
3334 return tem;
3335 }
3336
3337 /* Secondly, same as above, but use sign flavor opposite of unsignedp.
3338 Need to adjust the result after the multiplication. */
3339 if (size - 1 < BITS_PER_WORD
3340 && (mul_highpart_cost[mode] + 2 * shift_cost[mode][size-1]
3341 + 4 * add_cost[mode] < max_cost))
3342 {
3343 moptab = unsignedp ? smul_highpart_optab : umul_highpart_optab;
3344 tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3345 unsignedp, OPTAB_DIRECT);
3346 if (tem)
3347 /* We used the wrong signedness. Adjust the result. */
3348 return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3349 tem, unsignedp);
3350 }
3351
3352 /* Try widening multiplication. */
3353 moptab = unsignedp ? umul_widen_optab : smul_widen_optab;
3354 if (optab_handler (moptab, wider_mode)->insn_code != CODE_FOR_nothing
3355 && mul_widen_cost[wider_mode] < max_cost)
3356 {
3357 tem = expand_binop (wider_mode, moptab, op0, narrow_op1, 0,
3358 unsignedp, OPTAB_WIDEN);
3359 if (tem)
3360 return extract_high_half (mode, tem);
3361 }
3362
3363 /* Try widening the mode and perform a non-widening multiplication. */
3364 if (optab_handler (smul_optab, wider_mode)->insn_code != CODE_FOR_nothing
3365 && size - 1 < BITS_PER_WORD
3366 && mul_cost[wider_mode] + shift_cost[mode][size-1] < max_cost)
3367 {
3368 rtx insns, wop0, wop1;
3369
3370 /* We need to widen the operands, for example to ensure the
3371 constant multiplier is correctly sign or zero extended.
3372 Use a sequence to clean-up any instructions emitted by
3373 the conversions if things don't work out. */
3374 start_sequence ();
3375 wop0 = convert_modes (wider_mode, mode, op0, unsignedp);
3376 wop1 = convert_modes (wider_mode, mode, op1, unsignedp);
3377 tem = expand_binop (wider_mode, smul_optab, wop0, wop1, 0,
3378 unsignedp, OPTAB_WIDEN);
3379 insns = get_insns ();
3380 end_sequence ();
3381
3382 if (tem)
3383 {
3384 emit_insn (insns);
3385 return extract_high_half (mode, tem);
3386 }
3387 }
3388
3389 /* Try widening multiplication of opposite signedness, and adjust. */
3390 moptab = unsignedp ? smul_widen_optab : umul_widen_optab;
3391 if (optab_handler (moptab, wider_mode)->insn_code != CODE_FOR_nothing
3392 && size - 1 < BITS_PER_WORD
3393 && (mul_widen_cost[wider_mode] + 2 * shift_cost[mode][size-1]
3394 + 4 * add_cost[mode] < max_cost))
3395 {
3396 tem = expand_binop (wider_mode, moptab, op0, narrow_op1,
3397 NULL_RTX, ! unsignedp, OPTAB_WIDEN);
3398 if (tem != 0)
3399 {
3400 tem = extract_high_half (mode, tem);
3401 /* We used the wrong signedness. Adjust the result. */
3402 return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3403 target, unsignedp);
3404 }
3405 }
3406
3407 return 0;
3408 }
3409
3410 /* Emit code to multiply OP0 and OP1 (where OP1 is an integer constant),
3411 putting the high half of the result in TARGET if that is convenient,
3412 and return where the result is. If the operation can not be performed,
3413 0 is returned.
3414
3415 MODE is the mode of operation and result.
3416
3417 UNSIGNEDP nonzero means unsigned multiply.
3418
3419 MAX_COST is the total allowed cost for the expanded RTL. */
3420
3421 static rtx
3422 expand_mult_highpart (enum machine_mode mode, rtx op0, rtx op1,
3423 rtx target, int unsignedp, int max_cost)
3424 {
3425 enum machine_mode wider_mode = GET_MODE_WIDER_MODE (mode);
3426 unsigned HOST_WIDE_INT cnst1;
3427 int extra_cost;
3428 bool sign_adjust = false;
3429 enum mult_variant variant;
3430 struct algorithm alg;
3431 rtx tem;
3432
3433 gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3434 /* We can't support modes wider than HOST_BITS_PER_INT. */
3435 gcc_assert (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT);
3436
3437 cnst1 = INTVAL (op1) & GET_MODE_MASK (mode);
3438
3439 /* We can't optimize modes wider than BITS_PER_WORD.
3440 ??? We might be able to perform double-word arithmetic if
3441 mode == word_mode, however all the cost calculations in
3442 synth_mult etc. assume single-word operations. */
3443 if (GET_MODE_BITSIZE (wider_mode) > BITS_PER_WORD)
3444 return expand_mult_highpart_optab (mode, op0, op1, target,
3445 unsignedp, max_cost);
3446
3447 extra_cost = shift_cost[mode][GET_MODE_BITSIZE (mode) - 1];
3448
3449 /* Check whether we try to multiply by a negative constant. */
3450 if (!unsignedp && ((cnst1 >> (GET_MODE_BITSIZE (mode) - 1)) & 1))
3451 {
3452 sign_adjust = true;
3453 extra_cost += add_cost[mode];
3454 }
3455
3456 /* See whether shift/add multiplication is cheap enough. */
3457 if (choose_mult_variant (wider_mode, cnst1, &alg, &variant,
3458 max_cost - extra_cost))
3459 {
3460 /* See whether the specialized multiplication optabs are
3461 cheaper than the shift/add version. */
3462 tem = expand_mult_highpart_optab (mode, op0, op1, target, unsignedp,
3463 alg.cost.cost + extra_cost);
3464 if (tem)
3465 return tem;
3466
3467 tem = convert_to_mode (wider_mode, op0, unsignedp);
3468 tem = expand_mult_const (wider_mode, tem, cnst1, 0, &alg, variant);
3469 tem = extract_high_half (mode, tem);
3470
3471 /* Adjust result for signedness. */
3472 if (sign_adjust)
3473 tem = force_operand (gen_rtx_MINUS (mode, tem, op0), tem);
3474
3475 return tem;
3476 }
3477 return expand_mult_highpart_optab (mode, op0, op1, target,
3478 unsignedp, max_cost);
3479 }
3480
3481
3482 /* Expand signed modulus of OP0 by a power of two D in mode MODE. */
3483
3484 static rtx
3485 expand_smod_pow2 (enum machine_mode mode, rtx op0, HOST_WIDE_INT d)
3486 {
3487 unsigned HOST_WIDE_INT masklow, maskhigh;
3488 rtx result, temp, shift, label;
3489 int logd;
3490
3491 logd = floor_log2 (d);
3492 result = gen_reg_rtx (mode);
3493
3494 /* Avoid conditional branches when they're expensive. */
3495 if (BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2
3496 && optimize_insn_for_speed_p ())
3497 {
3498 rtx signmask = emit_store_flag (result, LT, op0, const0_rtx,
3499 mode, 0, -1);
3500 if (signmask)
3501 {
3502 signmask = force_reg (mode, signmask);
3503 masklow = ((HOST_WIDE_INT) 1 << logd) - 1;
3504 shift = GEN_INT (GET_MODE_BITSIZE (mode) - logd);
3505
3506 /* Use the rtx_cost of a LSHIFTRT instruction to determine
3507 which instruction sequence to use. If logical right shifts
3508 are expensive the use 2 XORs, 2 SUBs and an AND, otherwise
3509 use a LSHIFTRT, 1 ADD, 1 SUB and an AND. */
3510
3511 temp = gen_rtx_LSHIFTRT (mode, result, shift);
3512 if (optab_handler (lshr_optab, mode)->insn_code == CODE_FOR_nothing
3513 || rtx_cost (temp, SET) > COSTS_N_INSNS (2))
3514 {
3515 temp = expand_binop (mode, xor_optab, op0, signmask,
3516 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3517 temp = expand_binop (mode, sub_optab, temp, signmask,
3518 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3519 temp = expand_binop (mode, and_optab, temp, GEN_INT (masklow),
3520 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3521 temp = expand_binop (mode, xor_optab, temp, signmask,
3522 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3523 temp = expand_binop (mode, sub_optab, temp, signmask,
3524 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3525 }
3526 else
3527 {
3528 signmask = expand_binop (mode, lshr_optab, signmask, shift,
3529 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3530 signmask = force_reg (mode, signmask);
3531
3532 temp = expand_binop (mode, add_optab, op0, signmask,
3533 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3534 temp = expand_binop (mode, and_optab, temp, GEN_INT (masklow),
3535 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3536 temp = expand_binop (mode, sub_optab, temp, signmask,
3537 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3538 }
3539 return temp;
3540 }
3541 }
3542
3543 /* Mask contains the mode's signbit and the significant bits of the
3544 modulus. By including the signbit in the operation, many targets
3545 can avoid an explicit compare operation in the following comparison
3546 against zero. */
3547
3548 masklow = ((HOST_WIDE_INT) 1 << logd) - 1;
3549 if (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
3550 {
3551 masklow |= (HOST_WIDE_INT) -1 << (GET_MODE_BITSIZE (mode) - 1);
3552 maskhigh = -1;
3553 }
3554 else
3555 maskhigh = (HOST_WIDE_INT) -1
3556 << (GET_MODE_BITSIZE (mode) - HOST_BITS_PER_WIDE_INT - 1);
3557
3558 temp = expand_binop (mode, and_optab, op0,
3559 immed_double_const (masklow, maskhigh, mode),
3560 result, 1, OPTAB_LIB_WIDEN);
3561 if (temp != result)
3562 emit_move_insn (result, temp);
3563
3564 label = gen_label_rtx ();
3565 do_cmp_and_jump (result, const0_rtx, GE, mode, label);
3566
3567 temp = expand_binop (mode, sub_optab, result, const1_rtx, result,
3568 0, OPTAB_LIB_WIDEN);
3569 masklow = (HOST_WIDE_INT) -1 << logd;
3570 maskhigh = -1;
3571 temp = expand_binop (mode, ior_optab, temp,
3572 immed_double_const (masklow, maskhigh, mode),
3573 result, 1, OPTAB_LIB_WIDEN);
3574 temp = expand_binop (mode, add_optab, temp, const1_rtx, result,
3575 0, OPTAB_LIB_WIDEN);
3576 if (temp != result)
3577 emit_move_insn (result, temp);
3578 emit_label (label);
3579 return result;
3580 }
3581
3582 /* Expand signed division of OP0 by a power of two D in mode MODE.
3583 This routine is only called for positive values of D. */
3584
3585 static rtx
3586 expand_sdiv_pow2 (enum machine_mode mode, rtx op0, HOST_WIDE_INT d)
3587 {
3588 rtx temp, label;
3589 tree shift;
3590 int logd;
3591
3592 logd = floor_log2 (d);
3593 shift = build_int_cst (NULL_TREE, logd);
3594
3595 if (d == 2
3596 && BRANCH_COST (optimize_insn_for_speed_p (),
3597 false) >= 1)
3598 {
3599 temp = gen_reg_rtx (mode);
3600 temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, 1);
3601 temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3602 0, OPTAB_LIB_WIDEN);
3603 return expand_shift (RSHIFT_EXPR, mode, temp, shift, NULL_RTX, 0);
3604 }
3605
3606 #ifdef HAVE_conditional_move
3607 if (BRANCH_COST (optimize_insn_for_speed_p (), false)
3608 >= 2)
3609 {
3610 rtx temp2;
3611
3612 /* ??? emit_conditional_move forces a stack adjustment via
3613 compare_from_rtx so, if the sequence is discarded, it will
3614 be lost. Do it now instead. */
3615 do_pending_stack_adjust ();
3616
3617 start_sequence ();
3618 temp2 = copy_to_mode_reg (mode, op0);
3619 temp = expand_binop (mode, add_optab, temp2, GEN_INT (d-1),
3620 NULL_RTX, 0, OPTAB_LIB_WIDEN);
3621 temp = force_reg (mode, temp);
3622
3623 /* Construct "temp2 = (temp2 < 0) ? temp : temp2". */
3624 temp2 = emit_conditional_move (temp2, LT, temp2, const0_rtx,
3625 mode, temp, temp2, mode, 0);
3626 if (temp2)
3627 {
3628 rtx seq = get_insns ();
3629 end_sequence ();
3630 emit_insn (seq);
3631 return expand_shift (RSHIFT_EXPR, mode, temp2, shift, NULL_RTX, 0);
3632 }
3633 end_sequence ();
3634 }
3635 #endif
3636
3637 if (BRANCH_COST (optimize_insn_for_speed_p (),
3638 false) >= 2)
3639 {
3640 int ushift = GET_MODE_BITSIZE (mode) - logd;
3641
3642 temp = gen_reg_rtx (mode);
3643 temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, -1);
3644 if (shift_cost[mode][ushift] > COSTS_N_INSNS (1))
3645 temp = expand_binop (mode, and_optab, temp, GEN_INT (d - 1),
3646 NULL_RTX, 0, OPTAB_LIB_WIDEN);
3647 else
3648 temp = expand_shift (RSHIFT_EXPR, mode, temp,
3649 build_int_cst (NULL_TREE, ushift),
3650 NULL_RTX, 1);
3651 temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3652 0, OPTAB_LIB_WIDEN);
3653 return expand_shift (RSHIFT_EXPR, mode, temp, shift, NULL_RTX, 0);
3654 }
3655
3656 label = gen_label_rtx ();
3657 temp = copy_to_mode_reg (mode, op0);
3658 do_cmp_and_jump (temp, const0_rtx, GE, mode, label);
3659 expand_inc (temp, GEN_INT (d - 1));
3660 emit_label (label);
3661 return expand_shift (RSHIFT_EXPR, mode, temp, shift, NULL_RTX, 0);
3662 }
3663 \f
3664 /* Emit the code to divide OP0 by OP1, putting the result in TARGET
3665 if that is convenient, and returning where the result is.
3666 You may request either the quotient or the remainder as the result;
3667 specify REM_FLAG nonzero to get the remainder.
3668
3669 CODE is the expression code for which kind of division this is;
3670 it controls how rounding is done. MODE is the machine mode to use.
3671 UNSIGNEDP nonzero means do unsigned division. */
3672
3673 /* ??? For CEIL_MOD_EXPR, can compute incorrect remainder with ANDI
3674 and then correct it by or'ing in missing high bits
3675 if result of ANDI is nonzero.
3676 For ROUND_MOD_EXPR, can use ANDI and then sign-extend the result.
3677 This could optimize to a bfexts instruction.
3678 But C doesn't use these operations, so their optimizations are
3679 left for later. */
3680 /* ??? For modulo, we don't actually need the highpart of the first product,
3681 the low part will do nicely. And for small divisors, the second multiply
3682 can also be a low-part only multiply or even be completely left out.
3683 E.g. to calculate the remainder of a division by 3 with a 32 bit
3684 multiply, multiply with 0x55555556 and extract the upper two bits;
3685 the result is exact for inputs up to 0x1fffffff.
3686 The input range can be reduced by using cross-sum rules.
3687 For odd divisors >= 3, the following table gives right shift counts
3688 so that if a number is shifted by an integer multiple of the given
3689 amount, the remainder stays the same:
3690 2, 4, 3, 6, 10, 12, 4, 8, 18, 6, 11, 20, 18, 0, 5, 10, 12, 0, 12, 20,
3691 14, 12, 23, 21, 8, 0, 20, 18, 0, 0, 6, 12, 0, 22, 0, 18, 20, 30, 0, 0,
3692 0, 8, 0, 11, 12, 10, 36, 0, 30, 0, 0, 12, 0, 0, 0, 0, 44, 12, 24, 0,
3693 20, 0, 7, 14, 0, 18, 36, 0, 0, 46, 60, 0, 42, 0, 15, 24, 20, 0, 0, 33,
3694 0, 20, 0, 0, 18, 0, 60, 0, 0, 0, 0, 0, 40, 18, 0, 0, 12
3695
3696 Cross-sum rules for even numbers can be derived by leaving as many bits
3697 to the right alone as the divisor has zeros to the right.
3698 E.g. if x is an unsigned 32 bit number:
3699 (x mod 12) == (((x & 1023) + ((x >> 8) & ~3)) * 0x15555558 >> 2 * 3) >> 28
3700 */
3701
3702 rtx
3703 expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode,
3704 rtx op0, rtx op1, rtx target, int unsignedp)
3705 {
3706 enum machine_mode compute_mode;
3707 rtx tquotient;
3708 rtx quotient = 0, remainder = 0;
3709 rtx last;
3710 int size;
3711 rtx insn, set;
3712 optab optab1, optab2;
3713 int op1_is_constant, op1_is_pow2 = 0;
3714 int max_cost, extra_cost;
3715 static HOST_WIDE_INT last_div_const = 0;
3716 static HOST_WIDE_INT ext_op1;
3717
3718 op1_is_constant = GET_CODE (op1) == CONST_INT;
3719 if (op1_is_constant)
3720 {
3721 ext_op1 = INTVAL (op1);
3722 if (unsignedp)
3723 ext_op1 &= GET_MODE_MASK (mode);
3724 op1_is_pow2 = ((EXACT_POWER_OF_2_OR_ZERO_P (ext_op1)
3725 || (! unsignedp && EXACT_POWER_OF_2_OR_ZERO_P (-ext_op1))));
3726 }
3727
3728 /*
3729 This is the structure of expand_divmod:
3730
3731 First comes code to fix up the operands so we can perform the operations
3732 correctly and efficiently.
3733
3734 Second comes a switch statement with code specific for each rounding mode.
3735 For some special operands this code emits all RTL for the desired
3736 operation, for other cases, it generates only a quotient and stores it in
3737 QUOTIENT. The case for trunc division/remainder might leave quotient = 0,
3738 to indicate that it has not done anything.
3739
3740 Last comes code that finishes the operation. If QUOTIENT is set and
3741 REM_FLAG is set, the remainder is computed as OP0 - QUOTIENT * OP1. If
3742 QUOTIENT is not set, it is computed using trunc rounding.
3743
3744 We try to generate special code for division and remainder when OP1 is a
3745 constant. If |OP1| = 2**n we can use shifts and some other fast
3746 operations. For other values of OP1, we compute a carefully selected
3747 fixed-point approximation m = 1/OP1, and generate code that multiplies OP0
3748 by m.
3749
3750 In all cases but EXACT_DIV_EXPR, this multiplication requires the upper
3751 half of the product. Different strategies for generating the product are
3752 implemented in expand_mult_highpart.
3753
3754 If what we actually want is the remainder, we generate that by another
3755 by-constant multiplication and a subtraction. */
3756
3757 /* We shouldn't be called with OP1 == const1_rtx, but some of the
3758 code below will malfunction if we are, so check here and handle
3759 the special case if so. */
3760 if (op1 == const1_rtx)
3761 return rem_flag ? const0_rtx : op0;
3762
3763 /* When dividing by -1, we could get an overflow.
3764 negv_optab can handle overflows. */
3765 if (! unsignedp && op1 == constm1_rtx)
3766 {
3767 if (rem_flag)
3768 return const0_rtx;
3769 return expand_unop (mode, flag_trapv && GET_MODE_CLASS(mode) == MODE_INT
3770 ? negv_optab : neg_optab, op0, target, 0);
3771 }
3772
3773 if (target
3774 /* Don't use the function value register as a target
3775 since we have to read it as well as write it,
3776 and function-inlining gets confused by this. */
3777 && ((REG_P (target) && REG_FUNCTION_VALUE_P (target))
3778 /* Don't clobber an operand while doing a multi-step calculation. */
3779 || ((rem_flag || op1_is_constant)
3780 && (reg_mentioned_p (target, op0)
3781 || (MEM_P (op0) && MEM_P (target))))
3782 || reg_mentioned_p (target, op1)
3783 || (MEM_P (op1) && MEM_P (target))))
3784 target = 0;
3785
3786 /* Get the mode in which to perform this computation. Normally it will
3787 be MODE, but sometimes we can't do the desired operation in MODE.
3788 If so, pick a wider mode in which we can do the operation. Convert
3789 to that mode at the start to avoid repeated conversions.
3790
3791 First see what operations we need. These depend on the expression
3792 we are evaluating. (We assume that divxx3 insns exist under the
3793 same conditions that modxx3 insns and that these insns don't normally
3794 fail. If these assumptions are not correct, we may generate less
3795 efficient code in some cases.)
3796
3797 Then see if we find a mode in which we can open-code that operation
3798 (either a division, modulus, or shift). Finally, check for the smallest
3799 mode for which we can do the operation with a library call. */
3800
3801 /* We might want to refine this now that we have division-by-constant
3802 optimization. Since expand_mult_highpart tries so many variants, it is
3803 not straightforward to generalize this. Maybe we should make an array
3804 of possible modes in init_expmed? Save this for GCC 2.7. */
3805
3806 optab1 = ((op1_is_pow2 && op1 != const0_rtx)
3807 ? (unsignedp ? lshr_optab : ashr_optab)
3808 : (unsignedp ? udiv_optab : sdiv_optab));
3809 optab2 = ((op1_is_pow2 && op1 != const0_rtx)
3810 ? optab1
3811 : (unsignedp ? udivmod_optab : sdivmod_optab));
3812
3813 for (compute_mode = mode; compute_mode != VOIDmode;
3814 compute_mode = GET_MODE_WIDER_MODE (compute_mode))
3815 if (optab_handler (optab1, compute_mode)->insn_code != CODE_FOR_nothing
3816 || optab_handler (optab2, compute_mode)->insn_code != CODE_FOR_nothing)
3817 break;
3818
3819 if (compute_mode == VOIDmode)
3820 for (compute_mode = mode; compute_mode != VOIDmode;
3821 compute_mode = GET_MODE_WIDER_MODE (compute_mode))
3822 if (optab_libfunc (optab1, compute_mode)
3823 || optab_libfunc (optab2, compute_mode))
3824 break;
3825
3826 /* If we still couldn't find a mode, use MODE, but expand_binop will
3827 probably die. */
3828 if (compute_mode == VOIDmode)
3829 compute_mode = mode;
3830
3831 if (target && GET_MODE (target) == compute_mode)
3832 tquotient = target;
3833 else
3834 tquotient = gen_reg_rtx (compute_mode);
3835
3836 size = GET_MODE_BITSIZE (compute_mode);
3837 #if 0
3838 /* It should be possible to restrict the precision to GET_MODE_BITSIZE
3839 (mode), and thereby get better code when OP1 is a constant. Do that
3840 later. It will require going over all usages of SIZE below. */
3841 size = GET_MODE_BITSIZE (mode);
3842 #endif
3843
3844 /* Only deduct something for a REM if the last divide done was
3845 for a different constant. Then set the constant of the last
3846 divide. */
3847 max_cost = unsignedp ? udiv_cost[compute_mode] : sdiv_cost[compute_mode];
3848 if (rem_flag && ! (last_div_const != 0 && op1_is_constant
3849 && INTVAL (op1) == last_div_const))
3850 max_cost -= mul_cost[compute_mode] + add_cost[compute_mode];
3851
3852 last_div_const = ! rem_flag && op1_is_constant ? INTVAL (op1) : 0;
3853
3854 /* Now convert to the best mode to use. */
3855 if (compute_mode != mode)
3856 {
3857 op0 = convert_modes (compute_mode, mode, op0, unsignedp);
3858 op1 = convert_modes (compute_mode, mode, op1, unsignedp);
3859
3860 /* convert_modes may have placed op1 into a register, so we
3861 must recompute the following. */
3862 op1_is_constant = GET_CODE (op1) == CONST_INT;
3863 op1_is_pow2 = (op1_is_constant
3864 && ((EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
3865 || (! unsignedp
3866 && EXACT_POWER_OF_2_OR_ZERO_P (-INTVAL (op1)))))) ;
3867 }
3868
3869 /* If one of the operands is a volatile MEM, copy it into a register. */
3870
3871 if (MEM_P (op0) && MEM_VOLATILE_P (op0))
3872 op0 = force_reg (compute_mode, op0);
3873 if (MEM_P (op1) && MEM_VOLATILE_P (op1))
3874 op1 = force_reg (compute_mode, op1);
3875
3876 /* If we need the remainder or if OP1 is constant, we need to
3877 put OP0 in a register in case it has any queued subexpressions. */
3878 if (rem_flag || op1_is_constant)
3879 op0 = force_reg (compute_mode, op0);
3880
3881 last = get_last_insn ();
3882
3883 /* Promote floor rounding to trunc rounding for unsigned operations. */
3884 if (unsignedp)
3885 {
3886 if (code == FLOOR_DIV_EXPR)
3887 code = TRUNC_DIV_EXPR;
3888 if (code == FLOOR_MOD_EXPR)
3889 code = TRUNC_MOD_EXPR;
3890 if (code == EXACT_DIV_EXPR && op1_is_pow2)
3891 code = TRUNC_DIV_EXPR;
3892 }
3893
3894 if (op1 != const0_rtx)
3895 switch (code)
3896 {
3897 case TRUNC_MOD_EXPR:
3898 case TRUNC_DIV_EXPR:
3899 if (op1_is_constant)
3900 {
3901 if (unsignedp)
3902 {
3903 unsigned HOST_WIDE_INT mh;
3904 int pre_shift, post_shift;
3905 int dummy;
3906 rtx ml;
3907 unsigned HOST_WIDE_INT d = (INTVAL (op1)
3908 & GET_MODE_MASK (compute_mode));
3909
3910 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
3911 {
3912 pre_shift = floor_log2 (d);
3913 if (rem_flag)
3914 {
3915 remainder
3916 = expand_binop (compute_mode, and_optab, op0,
3917 GEN_INT (((HOST_WIDE_INT) 1 << pre_shift) - 1),
3918 remainder, 1,
3919 OPTAB_LIB_WIDEN);
3920 if (remainder)
3921 return gen_lowpart (mode, remainder);
3922 }
3923 quotient = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3924 build_int_cst (NULL_TREE,
3925 pre_shift),
3926 tquotient, 1);
3927 }
3928 else if (size <= HOST_BITS_PER_WIDE_INT)
3929 {
3930 if (d >= ((unsigned HOST_WIDE_INT) 1 << (size - 1)))
3931 {
3932 /* Most significant bit of divisor is set; emit an scc
3933 insn. */
3934 quotient = emit_store_flag (tquotient, GEU, op0, op1,
3935 compute_mode, 1, 1);
3936 if (quotient == 0)
3937 goto fail1;
3938 }
3939 else
3940 {
3941 /* Find a suitable multiplier and right shift count
3942 instead of multiplying with D. */
3943
3944 mh = choose_multiplier (d, size, size,
3945 &ml, &post_shift, &dummy);
3946
3947 /* If the suggested multiplier is more than SIZE bits,
3948 we can do better for even divisors, using an
3949 initial right shift. */
3950 if (mh != 0 && (d & 1) == 0)
3951 {
3952 pre_shift = floor_log2 (d & -d);
3953 mh = choose_multiplier (d >> pre_shift, size,
3954 size - pre_shift,
3955 &ml, &post_shift, &dummy);
3956 gcc_assert (!mh);
3957 }
3958 else
3959 pre_shift = 0;
3960
3961 if (mh != 0)
3962 {
3963 rtx t1, t2, t3, t4;
3964
3965 if (post_shift - 1 >= BITS_PER_WORD)
3966 goto fail1;
3967
3968 extra_cost
3969 = (shift_cost[compute_mode][post_shift - 1]
3970 + shift_cost[compute_mode][1]
3971 + 2 * add_cost[compute_mode]);
3972 t1 = expand_mult_highpart (compute_mode, op0, ml,
3973 NULL_RTX, 1,
3974 max_cost - extra_cost);
3975 if (t1 == 0)
3976 goto fail1;
3977 t2 = force_operand (gen_rtx_MINUS (compute_mode,
3978 op0, t1),
3979 NULL_RTX);
3980 t3 = expand_shift
3981 (RSHIFT_EXPR, compute_mode, t2,
3982 build_int_cst (NULL_TREE, 1),
3983 NULL_RTX,1);
3984 t4 = force_operand (gen_rtx_PLUS (compute_mode,
3985 t1, t3),
3986 NULL_RTX);
3987 quotient = expand_shift
3988 (RSHIFT_EXPR, compute_mode, t4,
3989 build_int_cst (NULL_TREE, post_shift - 1),
3990 tquotient, 1);
3991 }
3992 else
3993 {
3994 rtx t1, t2;
3995
3996 if (pre_shift >= BITS_PER_WORD
3997 || post_shift >= BITS_PER_WORD)
3998 goto fail1;
3999
4000 t1 = expand_shift
4001 (RSHIFT_EXPR, compute_mode, op0,
4002 build_int_cst (NULL_TREE, pre_shift),
4003 NULL_RTX, 1);
4004 extra_cost
4005 = (shift_cost[compute_mode][pre_shift]
4006 + shift_cost[compute_mode][post_shift]);
4007 t2 = expand_mult_highpart (compute_mode, t1, ml,
4008 NULL_RTX, 1,
4009 max_cost - extra_cost);
4010 if (t2 == 0)
4011 goto fail1;
4012 quotient = expand_shift
4013 (RSHIFT_EXPR, compute_mode, t2,
4014 build_int_cst (NULL_TREE, post_shift),
4015 tquotient, 1);
4016 }
4017 }
4018 }
4019 else /* Too wide mode to use tricky code */
4020 break;
4021
4022 insn = get_last_insn ();
4023 if (insn != last
4024 && (set = single_set (insn)) != 0
4025 && SET_DEST (set) == quotient)
4026 set_unique_reg_note (insn,
4027 REG_EQUAL,
4028 gen_rtx_UDIV (compute_mode, op0, op1));
4029 }
4030 else /* TRUNC_DIV, signed */
4031 {
4032 unsigned HOST_WIDE_INT ml;
4033 int lgup, post_shift;
4034 rtx mlr;
4035 HOST_WIDE_INT d = INTVAL (op1);
4036 unsigned HOST_WIDE_INT abs_d;
4037
4038 /* Since d might be INT_MIN, we have to cast to
4039 unsigned HOST_WIDE_INT before negating to avoid
4040 undefined signed overflow. */
4041 abs_d = (d >= 0
4042 ? (unsigned HOST_WIDE_INT) d
4043 : - (unsigned HOST_WIDE_INT) d);
4044
4045 /* n rem d = n rem -d */
4046 if (rem_flag && d < 0)
4047 {
4048 d = abs_d;
4049 op1 = gen_int_mode (abs_d, compute_mode);
4050 }
4051
4052 if (d == 1)
4053 quotient = op0;
4054 else if (d == -1)
4055 quotient = expand_unop (compute_mode, neg_optab, op0,
4056 tquotient, 0);
4057 else if (abs_d == (unsigned HOST_WIDE_INT) 1 << (size - 1))
4058 {
4059 /* This case is not handled correctly below. */
4060 quotient = emit_store_flag (tquotient, EQ, op0, op1,
4061 compute_mode, 1, 1);
4062 if (quotient == 0)
4063 goto fail1;
4064 }
4065 else if (EXACT_POWER_OF_2_OR_ZERO_P (d)
4066 && (rem_flag ? smod_pow2_cheap[compute_mode]
4067 : sdiv_pow2_cheap[compute_mode])
4068 /* We assume that cheap metric is true if the
4069 optab has an expander for this mode. */
4070 && ((optab_handler ((rem_flag ? smod_optab
4071 : sdiv_optab),
4072 compute_mode)->insn_code
4073 != CODE_FOR_nothing)
4074 || (optab_handler(sdivmod_optab,
4075 compute_mode)
4076 ->insn_code != CODE_FOR_nothing)))
4077 ;
4078 else if (EXACT_POWER_OF_2_OR_ZERO_P (abs_d))
4079 {
4080 if (rem_flag)
4081 {
4082 remainder = expand_smod_pow2 (compute_mode, op0, d);
4083 if (remainder)
4084 return gen_lowpart (mode, remainder);
4085 }
4086
4087 if (sdiv_pow2_cheap[compute_mode]
4088 && ((optab_handler (sdiv_optab, compute_mode)->insn_code
4089 != CODE_FOR_nothing)
4090 || (optab_handler (sdivmod_optab, compute_mode)->insn_code
4091 != CODE_FOR_nothing)))
4092 quotient = expand_divmod (0, TRUNC_DIV_EXPR,
4093 compute_mode, op0,
4094 gen_int_mode (abs_d,
4095 compute_mode),
4096 NULL_RTX, 0);
4097 else
4098 quotient = expand_sdiv_pow2 (compute_mode, op0, abs_d);
4099
4100 /* We have computed OP0 / abs(OP1). If OP1 is negative,
4101 negate the quotient. */
4102 if (d < 0)
4103 {
4104 insn = get_last_insn ();
4105 if (insn != last
4106 && (set = single_set (insn)) != 0
4107 && SET_DEST (set) == quotient
4108 && abs_d < ((unsigned HOST_WIDE_INT) 1
4109 << (HOST_BITS_PER_WIDE_INT - 1)))
4110 set_unique_reg_note (insn,
4111 REG_EQUAL,
4112 gen_rtx_DIV (compute_mode,
4113 op0,
4114 GEN_INT
4115 (trunc_int_for_mode
4116 (abs_d,
4117 compute_mode))));
4118
4119 quotient = expand_unop (compute_mode, neg_optab,
4120 quotient, quotient, 0);
4121 }
4122 }
4123 else if (size <= HOST_BITS_PER_WIDE_INT)
4124 {
4125 choose_multiplier (abs_d, size, size - 1,
4126 &mlr, &post_shift, &lgup);
4127 ml = (unsigned HOST_WIDE_INT) INTVAL (mlr);
4128 if (ml < (unsigned HOST_WIDE_INT) 1 << (size - 1))
4129 {
4130 rtx t1, t2, t3;
4131
4132 if (post_shift >= BITS_PER_WORD
4133 || size - 1 >= BITS_PER_WORD)
4134 goto fail1;
4135
4136 extra_cost = (shift_cost[compute_mode][post_shift]
4137 + shift_cost[compute_mode][size - 1]
4138 + add_cost[compute_mode]);
4139 t1 = expand_mult_highpart (compute_mode, op0, mlr,
4140 NULL_RTX, 0,
4141 max_cost - extra_cost);
4142 if (t1 == 0)
4143 goto fail1;
4144 t2 = expand_shift
4145 (RSHIFT_EXPR, compute_mode, t1,
4146 build_int_cst (NULL_TREE, post_shift),
4147 NULL_RTX, 0);
4148 t3 = expand_shift
4149 (RSHIFT_EXPR, compute_mode, op0,
4150 build_int_cst (NULL_TREE, size - 1),
4151 NULL_RTX, 0);
4152 if (d < 0)
4153 quotient
4154 = force_operand (gen_rtx_MINUS (compute_mode,
4155 t3, t2),
4156 tquotient);
4157 else
4158 quotient
4159 = force_operand (gen_rtx_MINUS (compute_mode,
4160 t2, t3),
4161 tquotient);
4162 }
4163 else
4164 {
4165 rtx t1, t2, t3, t4;
4166
4167 if (post_shift >= BITS_PER_WORD
4168 || size - 1 >= BITS_PER_WORD)
4169 goto fail1;
4170
4171 ml |= (~(unsigned HOST_WIDE_INT) 0) << (size - 1);
4172 mlr = gen_int_mode (ml, compute_mode);
4173 extra_cost = (shift_cost[compute_mode][post_shift]
4174 + shift_cost[compute_mode][size - 1]
4175 + 2 * add_cost[compute_mode]);
4176 t1 = expand_mult_highpart (compute_mode, op0, mlr,
4177 NULL_RTX, 0,
4178 max_cost - extra_cost);
4179 if (t1 == 0)
4180 goto fail1;
4181 t2 = force_operand (gen_rtx_PLUS (compute_mode,
4182 t1, op0),
4183 NULL_RTX);
4184 t3 = expand_shift
4185 (RSHIFT_EXPR, compute_mode, t2,
4186 build_int_cst (NULL_TREE, post_shift),
4187 NULL_RTX, 0);
4188 t4 = expand_shift
4189 (RSHIFT_EXPR, compute_mode, op0,
4190 build_int_cst (NULL_TREE, size - 1),
4191 NULL_RTX, 0);
4192 if (d < 0)
4193 quotient
4194 = force_operand (gen_rtx_MINUS (compute_mode,
4195 t4, t3),
4196 tquotient);
4197 else
4198 quotient
4199 = force_operand (gen_rtx_MINUS (compute_mode,
4200 t3, t4),
4201 tquotient);
4202 }
4203 }
4204 else /* Too wide mode to use tricky code */
4205 break;
4206
4207 insn = get_last_insn ();
4208 if (insn != last
4209 && (set = single_set (insn)) != 0
4210 && SET_DEST (set) == quotient)
4211 set_unique_reg_note (insn,
4212 REG_EQUAL,
4213 gen_rtx_DIV (compute_mode, op0, op1));
4214 }
4215 break;
4216 }
4217 fail1:
4218 delete_insns_since (last);
4219 break;
4220
4221 case FLOOR_DIV_EXPR:
4222 case FLOOR_MOD_EXPR:
4223 /* We will come here only for signed operations. */
4224 if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4225 {
4226 unsigned HOST_WIDE_INT mh;
4227 int pre_shift, lgup, post_shift;
4228 HOST_WIDE_INT d = INTVAL (op1);
4229 rtx ml;
4230
4231 if (d > 0)
4232 {
4233 /* We could just as easily deal with negative constants here,
4234 but it does not seem worth the trouble for GCC 2.6. */
4235 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4236 {
4237 pre_shift = floor_log2 (d);
4238 if (rem_flag)
4239 {
4240 remainder = expand_binop (compute_mode, and_optab, op0,
4241 GEN_INT (((HOST_WIDE_INT) 1 << pre_shift) - 1),
4242 remainder, 0, OPTAB_LIB_WIDEN);
4243 if (remainder)
4244 return gen_lowpart (mode, remainder);
4245 }
4246 quotient = expand_shift
4247 (RSHIFT_EXPR, compute_mode, op0,
4248 build_int_cst (NULL_TREE, pre_shift),
4249 tquotient, 0);
4250 }
4251 else
4252 {
4253 rtx t1, t2, t3, t4;
4254
4255 mh = choose_multiplier (d, size, size - 1,
4256 &ml, &post_shift, &lgup);
4257 gcc_assert (!mh);
4258
4259 if (post_shift < BITS_PER_WORD
4260 && size - 1 < BITS_PER_WORD)
4261 {
4262 t1 = expand_shift
4263 (RSHIFT_EXPR, compute_mode, op0,
4264 build_int_cst (NULL_TREE, size - 1),
4265 NULL_RTX, 0);
4266 t2 = expand_binop (compute_mode, xor_optab, op0, t1,
4267 NULL_RTX, 0, OPTAB_WIDEN);
4268 extra_cost = (shift_cost[compute_mode][post_shift]
4269 + shift_cost[compute_mode][size - 1]
4270 + 2 * add_cost[compute_mode]);
4271 t3 = expand_mult_highpart (compute_mode, t2, ml,
4272 NULL_RTX, 1,
4273 max_cost - extra_cost);
4274 if (t3 != 0)
4275 {
4276 t4 = expand_shift
4277 (RSHIFT_EXPR, compute_mode, t3,
4278 build_int_cst (NULL_TREE, post_shift),
4279 NULL_RTX, 1);
4280 quotient = expand_binop (compute_mode, xor_optab,
4281 t4, t1, tquotient, 0,
4282 OPTAB_WIDEN);
4283 }
4284 }
4285 }
4286 }
4287 else
4288 {
4289 rtx nsign, t1, t2, t3, t4;
4290 t1 = force_operand (gen_rtx_PLUS (compute_mode,
4291 op0, constm1_rtx), NULL_RTX);
4292 t2 = expand_binop (compute_mode, ior_optab, op0, t1, NULL_RTX,
4293 0, OPTAB_WIDEN);
4294 nsign = expand_shift
4295 (RSHIFT_EXPR, compute_mode, t2,
4296 build_int_cst (NULL_TREE, size - 1),
4297 NULL_RTX, 0);
4298 t3 = force_operand (gen_rtx_MINUS (compute_mode, t1, nsign),
4299 NULL_RTX);
4300 t4 = expand_divmod (0, TRUNC_DIV_EXPR, compute_mode, t3, op1,
4301 NULL_RTX, 0);
4302 if (t4)
4303 {
4304 rtx t5;
4305 t5 = expand_unop (compute_mode, one_cmpl_optab, nsign,
4306 NULL_RTX, 0);
4307 quotient = force_operand (gen_rtx_PLUS (compute_mode,
4308 t4, t5),
4309 tquotient);
4310 }
4311 }
4312 }
4313
4314 if (quotient != 0)
4315 break;
4316 delete_insns_since (last);
4317
4318 /* Try using an instruction that produces both the quotient and
4319 remainder, using truncation. We can easily compensate the quotient
4320 or remainder to get floor rounding, once we have the remainder.
4321 Notice that we compute also the final remainder value here,
4322 and return the result right away. */
4323 if (target == 0 || GET_MODE (target) != compute_mode)
4324 target = gen_reg_rtx (compute_mode);
4325
4326 if (rem_flag)
4327 {
4328 remainder
4329 = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4330 quotient = gen_reg_rtx (compute_mode);
4331 }
4332 else
4333 {
4334 quotient
4335 = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4336 remainder = gen_reg_rtx (compute_mode);
4337 }
4338
4339 if (expand_twoval_binop (sdivmod_optab, op0, op1,
4340 quotient, remainder, 0))
4341 {
4342 /* This could be computed with a branch-less sequence.
4343 Save that for later. */
4344 rtx tem;
4345 rtx label = gen_label_rtx ();
4346 do_cmp_and_jump (remainder, const0_rtx, EQ, compute_mode, label);
4347 tem = expand_binop (compute_mode, xor_optab, op0, op1,
4348 NULL_RTX, 0, OPTAB_WIDEN);
4349 do_cmp_and_jump (tem, const0_rtx, GE, compute_mode, label);
4350 expand_dec (quotient, const1_rtx);
4351 expand_inc (remainder, op1);
4352 emit_label (label);
4353 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4354 }
4355
4356 /* No luck with division elimination or divmod. Have to do it
4357 by conditionally adjusting op0 *and* the result. */
4358 {
4359 rtx label1, label2, label3, label4, label5;
4360 rtx adjusted_op0;
4361 rtx tem;
4362
4363 quotient = gen_reg_rtx (compute_mode);
4364 adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4365 label1 = gen_label_rtx ();
4366 label2 = gen_label_rtx ();
4367 label3 = gen_label_rtx ();
4368 label4 = gen_label_rtx ();
4369 label5 = gen_label_rtx ();
4370 do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4371 do_cmp_and_jump (adjusted_op0, const0_rtx, LT, compute_mode, label1);
4372 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4373 quotient, 0, OPTAB_LIB_WIDEN);
4374 if (tem != quotient)
4375 emit_move_insn (quotient, tem);
4376 emit_jump_insn (gen_jump (label5));
4377 emit_barrier ();
4378 emit_label (label1);
4379 expand_inc (adjusted_op0, const1_rtx);
4380 emit_jump_insn (gen_jump (label4));
4381 emit_barrier ();
4382 emit_label (label2);
4383 do_cmp_and_jump (adjusted_op0, const0_rtx, GT, compute_mode, label3);
4384 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4385 quotient, 0, OPTAB_LIB_WIDEN);
4386 if (tem != quotient)
4387 emit_move_insn (quotient, tem);
4388 emit_jump_insn (gen_jump (label5));
4389 emit_barrier ();
4390 emit_label (label3);
4391 expand_dec (adjusted_op0, const1_rtx);
4392 emit_label (label4);
4393 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4394 quotient, 0, OPTAB_LIB_WIDEN);
4395 if (tem != quotient)
4396 emit_move_insn (quotient, tem);
4397 expand_dec (quotient, const1_rtx);
4398 emit_label (label5);
4399 }
4400 break;
4401
4402 case CEIL_DIV_EXPR:
4403 case CEIL_MOD_EXPR:
4404 if (unsignedp)
4405 {
4406 if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1)))
4407 {
4408 rtx t1, t2, t3;
4409 unsigned HOST_WIDE_INT d = INTVAL (op1);
4410 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4411 build_int_cst (NULL_TREE, floor_log2 (d)),
4412 tquotient, 1);
4413 t2 = expand_binop (compute_mode, and_optab, op0,
4414 GEN_INT (d - 1),
4415 NULL_RTX, 1, OPTAB_LIB_WIDEN);
4416 t3 = gen_reg_rtx (compute_mode);
4417 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4418 compute_mode, 1, 1);
4419 if (t3 == 0)
4420 {
4421 rtx lab;
4422 lab = gen_label_rtx ();
4423 do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4424 expand_inc (t1, const1_rtx);
4425 emit_label (lab);
4426 quotient = t1;
4427 }
4428 else
4429 quotient = force_operand (gen_rtx_PLUS (compute_mode,
4430 t1, t3),
4431 tquotient);
4432 break;
4433 }
4434
4435 /* Try using an instruction that produces both the quotient and
4436 remainder, using truncation. We can easily compensate the
4437 quotient or remainder to get ceiling rounding, once we have the
4438 remainder. Notice that we compute also the final remainder
4439 value here, and return the result right away. */
4440 if (target == 0 || GET_MODE (target) != compute_mode)
4441 target = gen_reg_rtx (compute_mode);
4442
4443 if (rem_flag)
4444 {
4445 remainder = (REG_P (target)
4446 ? target : gen_reg_rtx (compute_mode));
4447 quotient = gen_reg_rtx (compute_mode);
4448 }
4449 else
4450 {
4451 quotient = (REG_P (target)
4452 ? target : gen_reg_rtx (compute_mode));
4453 remainder = gen_reg_rtx (compute_mode);
4454 }
4455
4456 if (expand_twoval_binop (udivmod_optab, op0, op1, quotient,
4457 remainder, 1))
4458 {
4459 /* This could be computed with a branch-less sequence.
4460 Save that for later. */
4461 rtx label = gen_label_rtx ();
4462 do_cmp_and_jump (remainder, const0_rtx, EQ,
4463 compute_mode, label);
4464 expand_inc (quotient, const1_rtx);
4465 expand_dec (remainder, op1);
4466 emit_label (label);
4467 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4468 }
4469
4470 /* No luck with division elimination or divmod. Have to do it
4471 by conditionally adjusting op0 *and* the result. */
4472 {
4473 rtx label1, label2;
4474 rtx adjusted_op0, tem;
4475
4476 quotient = gen_reg_rtx (compute_mode);
4477 adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4478 label1 = gen_label_rtx ();
4479 label2 = gen_label_rtx ();
4480 do_cmp_and_jump (adjusted_op0, const0_rtx, NE,
4481 compute_mode, label1);
4482 emit_move_insn (quotient, const0_rtx);
4483 emit_jump_insn (gen_jump (label2));
4484 emit_barrier ();
4485 emit_label (label1);
4486 expand_dec (adjusted_op0, const1_rtx);
4487 tem = expand_binop (compute_mode, udiv_optab, adjusted_op0, op1,
4488 quotient, 1, OPTAB_LIB_WIDEN);
4489 if (tem != quotient)
4490 emit_move_insn (quotient, tem);
4491 expand_inc (quotient, const1_rtx);
4492 emit_label (label2);
4493 }
4494 }
4495 else /* signed */
4496 {
4497 if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4498 && INTVAL (op1) >= 0)
4499 {
4500 /* This is extremely similar to the code for the unsigned case
4501 above. For 2.7 we should merge these variants, but for
4502 2.6.1 I don't want to touch the code for unsigned since that
4503 get used in C. The signed case will only be used by other
4504 languages (Ada). */
4505
4506 rtx t1, t2, t3;
4507 unsigned HOST_WIDE_INT d = INTVAL (op1);
4508 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4509 build_int_cst (NULL_TREE, floor_log2 (d)),
4510 tquotient, 0);
4511 t2 = expand_binop (compute_mode, and_optab, op0,
4512 GEN_INT (d - 1),
4513 NULL_RTX, 1, OPTAB_LIB_WIDEN);
4514 t3 = gen_reg_rtx (compute_mode);
4515 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4516 compute_mode, 1, 1);
4517 if (t3 == 0)
4518 {
4519 rtx lab;
4520 lab = gen_label_rtx ();
4521 do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4522 expand_inc (t1, const1_rtx);
4523 emit_label (lab);
4524 quotient = t1;
4525 }
4526 else
4527 quotient = force_operand (gen_rtx_PLUS (compute_mode,
4528 t1, t3),
4529 tquotient);
4530 break;
4531 }
4532
4533 /* Try using an instruction that produces both the quotient and
4534 remainder, using truncation. We can easily compensate the
4535 quotient or remainder to get ceiling rounding, once we have the
4536 remainder. Notice that we compute also the final remainder
4537 value here, and return the result right away. */
4538 if (target == 0 || GET_MODE (target) != compute_mode)
4539 target = gen_reg_rtx (compute_mode);
4540 if (rem_flag)
4541 {
4542 remainder= (REG_P (target)
4543 ? target : gen_reg_rtx (compute_mode));
4544 quotient = gen_reg_rtx (compute_mode);
4545 }
4546 else
4547 {
4548 quotient = (REG_P (target)
4549 ? target : gen_reg_rtx (compute_mode));
4550 remainder = gen_reg_rtx (compute_mode);
4551 }
4552
4553 if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient,
4554 remainder, 0))
4555 {
4556 /* This could be computed with a branch-less sequence.
4557 Save that for later. */
4558 rtx tem;
4559 rtx label = gen_label_rtx ();
4560 do_cmp_and_jump (remainder, const0_rtx, EQ,
4561 compute_mode, label);
4562 tem = expand_binop (compute_mode, xor_optab, op0, op1,
4563 NULL_RTX, 0, OPTAB_WIDEN);
4564 do_cmp_and_jump (tem, const0_rtx, LT, compute_mode, label);
4565 expand_inc (quotient, const1_rtx);
4566 expand_dec (remainder, op1);
4567 emit_label (label);
4568 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4569 }
4570
4571 /* No luck with division elimination or divmod. Have to do it
4572 by conditionally adjusting op0 *and* the result. */
4573 {
4574 rtx label1, label2, label3, label4, label5;
4575 rtx adjusted_op0;
4576 rtx tem;
4577
4578 quotient = gen_reg_rtx (compute_mode);
4579 adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4580 label1 = gen_label_rtx ();
4581 label2 = gen_label_rtx ();
4582 label3 = gen_label_rtx ();
4583 label4 = gen_label_rtx ();
4584 label5 = gen_label_rtx ();
4585 do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4586 do_cmp_and_jump (adjusted_op0, const0_rtx, GT,
4587 compute_mode, label1);
4588 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4589 quotient, 0, OPTAB_LIB_WIDEN);
4590 if (tem != quotient)
4591 emit_move_insn (quotient, tem);
4592 emit_jump_insn (gen_jump (label5));
4593 emit_barrier ();
4594 emit_label (label1);
4595 expand_dec (adjusted_op0, const1_rtx);
4596 emit_jump_insn (gen_jump (label4));
4597 emit_barrier ();
4598 emit_label (label2);
4599 do_cmp_and_jump (adjusted_op0, const0_rtx, LT,
4600 compute_mode, label3);
4601 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4602 quotient, 0, OPTAB_LIB_WIDEN);
4603 if (tem != quotient)
4604 emit_move_insn (quotient, tem);
4605 emit_jump_insn (gen_jump (label5));
4606 emit_barrier ();
4607 emit_label (label3);
4608 expand_inc (adjusted_op0, const1_rtx);
4609 emit_label (label4);
4610 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4611 quotient, 0, OPTAB_LIB_WIDEN);
4612 if (tem != quotient)
4613 emit_move_insn (quotient, tem);
4614 expand_inc (quotient, const1_rtx);
4615 emit_label (label5);
4616 }
4617 }
4618 break;
4619
4620 case EXACT_DIV_EXPR:
4621 if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4622 {
4623 HOST_WIDE_INT d = INTVAL (op1);
4624 unsigned HOST_WIDE_INT ml;
4625 int pre_shift;
4626 rtx t1;
4627
4628 pre_shift = floor_log2 (d & -d);
4629 ml = invert_mod2n (d >> pre_shift, size);
4630 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4631 build_int_cst (NULL_TREE, pre_shift),
4632 NULL_RTX, unsignedp);
4633 quotient = expand_mult (compute_mode, t1,
4634 gen_int_mode (ml, compute_mode),
4635 NULL_RTX, 1);
4636
4637 insn = get_last_insn ();
4638 set_unique_reg_note (insn,
4639 REG_EQUAL,
4640 gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
4641 compute_mode,
4642 op0, op1));
4643 }
4644 break;
4645
4646 case ROUND_DIV_EXPR:
4647 case ROUND_MOD_EXPR:
4648 if (unsignedp)
4649 {
4650 rtx tem;
4651 rtx label;
4652 label = gen_label_rtx ();
4653 quotient = gen_reg_rtx (compute_mode);
4654 remainder = gen_reg_rtx (compute_mode);
4655 if (expand_twoval_binop (udivmod_optab, op0, op1, quotient, remainder, 1) == 0)
4656 {
4657 rtx tem;
4658 quotient = expand_binop (compute_mode, udiv_optab, op0, op1,
4659 quotient, 1, OPTAB_LIB_WIDEN);
4660 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 1);
4661 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4662 remainder, 1, OPTAB_LIB_WIDEN);
4663 }
4664 tem = plus_constant (op1, -1);
4665 tem = expand_shift (RSHIFT_EXPR, compute_mode, tem,
4666 build_int_cst (NULL_TREE, 1),
4667 NULL_RTX, 1);
4668 do_cmp_and_jump (remainder, tem, LEU, compute_mode, label);
4669 expand_inc (quotient, const1_rtx);
4670 expand_dec (remainder, op1);
4671 emit_label (label);
4672 }
4673 else
4674 {
4675 rtx abs_rem, abs_op1, tem, mask;
4676 rtx label;
4677 label = gen_label_rtx ();
4678 quotient = gen_reg_rtx (compute_mode);
4679 remainder = gen_reg_rtx (compute_mode);
4680 if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient, remainder, 0) == 0)
4681 {
4682 rtx tem;
4683 quotient = expand_binop (compute_mode, sdiv_optab, op0, op1,
4684 quotient, 0, OPTAB_LIB_WIDEN);
4685 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 0);
4686 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4687 remainder, 0, OPTAB_LIB_WIDEN);
4688 }
4689 abs_rem = expand_abs (compute_mode, remainder, NULL_RTX, 1, 0);
4690 abs_op1 = expand_abs (compute_mode, op1, NULL_RTX, 1, 0);
4691 tem = expand_shift (LSHIFT_EXPR, compute_mode, abs_rem,
4692 build_int_cst (NULL_TREE, 1),
4693 NULL_RTX, 1);
4694 do_cmp_and_jump (tem, abs_op1, LTU, compute_mode, label);
4695 tem = expand_binop (compute_mode, xor_optab, op0, op1,
4696 NULL_RTX, 0, OPTAB_WIDEN);
4697 mask = expand_shift (RSHIFT_EXPR, compute_mode, tem,
4698 build_int_cst (NULL_TREE, size - 1),
4699 NULL_RTX, 0);
4700 tem = expand_binop (compute_mode, xor_optab, mask, const1_rtx,
4701 NULL_RTX, 0, OPTAB_WIDEN);
4702 tem = expand_binop (compute_mode, sub_optab, tem, mask,
4703 NULL_RTX, 0, OPTAB_WIDEN);
4704 expand_inc (quotient, tem);
4705 tem = expand_binop (compute_mode, xor_optab, mask, op1,
4706 NULL_RTX, 0, OPTAB_WIDEN);
4707 tem = expand_binop (compute_mode, sub_optab, tem, mask,
4708 NULL_RTX, 0, OPTAB_WIDEN);
4709 expand_dec (remainder, tem);
4710 emit_label (label);
4711 }
4712 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4713
4714 default:
4715 gcc_unreachable ();
4716 }
4717
4718 if (quotient == 0)
4719 {
4720 if (target && GET_MODE (target) != compute_mode)
4721 target = 0;
4722
4723 if (rem_flag)
4724 {
4725 /* Try to produce the remainder without producing the quotient.
4726 If we seem to have a divmod pattern that does not require widening,
4727 don't try widening here. We should really have a WIDEN argument
4728 to expand_twoval_binop, since what we'd really like to do here is
4729 1) try a mod insn in compute_mode
4730 2) try a divmod insn in compute_mode
4731 3) try a div insn in compute_mode and multiply-subtract to get
4732 remainder
4733 4) try the same things with widening allowed. */
4734 remainder
4735 = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4736 op0, op1, target,
4737 unsignedp,
4738 ((optab_handler (optab2, compute_mode)->insn_code
4739 != CODE_FOR_nothing)
4740 ? OPTAB_DIRECT : OPTAB_WIDEN));
4741 if (remainder == 0)
4742 {
4743 /* No luck there. Can we do remainder and divide at once
4744 without a library call? */
4745 remainder = gen_reg_rtx (compute_mode);
4746 if (! expand_twoval_binop ((unsignedp
4747 ? udivmod_optab
4748 : sdivmod_optab),
4749 op0, op1,
4750 NULL_RTX, remainder, unsignedp))
4751 remainder = 0;
4752 }
4753
4754 if (remainder)
4755 return gen_lowpart (mode, remainder);
4756 }
4757
4758 /* Produce the quotient. Try a quotient insn, but not a library call.
4759 If we have a divmod in this mode, use it in preference to widening
4760 the div (for this test we assume it will not fail). Note that optab2
4761 is set to the one of the two optabs that the call below will use. */
4762 quotient
4763 = sign_expand_binop (compute_mode, udiv_optab, sdiv_optab,
4764 op0, op1, rem_flag ? NULL_RTX : target,
4765 unsignedp,
4766 ((optab_handler (optab2, compute_mode)->insn_code
4767 != CODE_FOR_nothing)
4768 ? OPTAB_DIRECT : OPTAB_WIDEN));
4769
4770 if (quotient == 0)
4771 {
4772 /* No luck there. Try a quotient-and-remainder insn,
4773 keeping the quotient alone. */
4774 quotient = gen_reg_rtx (compute_mode);
4775 if (! expand_twoval_binop (unsignedp ? udivmod_optab : sdivmod_optab,
4776 op0, op1,
4777 quotient, NULL_RTX, unsignedp))
4778 {
4779 quotient = 0;
4780 if (! rem_flag)
4781 /* Still no luck. If we are not computing the remainder,
4782 use a library call for the quotient. */
4783 quotient = sign_expand_binop (compute_mode,
4784 udiv_optab, sdiv_optab,
4785 op0, op1, target,
4786 unsignedp, OPTAB_LIB_WIDEN);
4787 }
4788 }
4789 }
4790
4791 if (rem_flag)
4792 {
4793 if (target && GET_MODE (target) != compute_mode)
4794 target = 0;
4795
4796 if (quotient == 0)
4797 {
4798 /* No divide instruction either. Use library for remainder. */
4799 remainder = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4800 op0, op1, target,
4801 unsignedp, OPTAB_LIB_WIDEN);
4802 /* No remainder function. Try a quotient-and-remainder
4803 function, keeping the remainder. */
4804 if (!remainder)
4805 {
4806 remainder = gen_reg_rtx (compute_mode);
4807 if (!expand_twoval_binop_libfunc
4808 (unsignedp ? udivmod_optab : sdivmod_optab,
4809 op0, op1,
4810 NULL_RTX, remainder,
4811 unsignedp ? UMOD : MOD))
4812 remainder = NULL_RTX;
4813 }
4814 }
4815 else
4816 {
4817 /* We divided. Now finish doing X - Y * (X / Y). */
4818 remainder = expand_mult (compute_mode, quotient, op1,
4819 NULL_RTX, unsignedp);
4820 remainder = expand_binop (compute_mode, sub_optab, op0,
4821 remainder, target, unsignedp,
4822 OPTAB_LIB_WIDEN);
4823 }
4824 }
4825
4826 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4827 }
4828 \f
4829 /* Return a tree node with data type TYPE, describing the value of X.
4830 Usually this is an VAR_DECL, if there is no obvious better choice.
4831 X may be an expression, however we only support those expressions
4832 generated by loop.c. */
4833
4834 tree
4835 make_tree (tree type, rtx x)
4836 {
4837 tree t;
4838
4839 switch (GET_CODE (x))
4840 {
4841 case CONST_INT:
4842 {
4843 HOST_WIDE_INT hi = 0;
4844
4845 if (INTVAL (x) < 0
4846 && !(TYPE_UNSIGNED (type)
4847 && (GET_MODE_BITSIZE (TYPE_MODE (type))
4848 < HOST_BITS_PER_WIDE_INT)))
4849 hi = -1;
4850
4851 t = build_int_cst_wide (type, INTVAL (x), hi);
4852
4853 return t;
4854 }
4855
4856 case CONST_DOUBLE:
4857 if (GET_MODE (x) == VOIDmode)
4858 t = build_int_cst_wide (type,
4859 CONST_DOUBLE_LOW (x), CONST_DOUBLE_HIGH (x));
4860 else
4861 {
4862 REAL_VALUE_TYPE d;
4863
4864 REAL_VALUE_FROM_CONST_DOUBLE (d, x);
4865 t = build_real (type, d);
4866 }
4867
4868 return t;
4869
4870 case CONST_VECTOR:
4871 {
4872 int units = CONST_VECTOR_NUNITS (x);
4873 tree itype = TREE_TYPE (type);
4874 tree t = NULL_TREE;
4875 int i;
4876
4877
4878 /* Build a tree with vector elements. */
4879 for (i = units - 1; i >= 0; --i)
4880 {
4881 rtx elt = CONST_VECTOR_ELT (x, i);
4882 t = tree_cons (NULL_TREE, make_tree (itype, elt), t);
4883 }
4884
4885 return build_vector (type, t);
4886 }
4887
4888 case PLUS:
4889 return fold_build2 (PLUS_EXPR, type, make_tree (type, XEXP (x, 0)),
4890 make_tree (type, XEXP (x, 1)));
4891
4892 case MINUS:
4893 return fold_build2 (MINUS_EXPR, type, make_tree (type, XEXP (x, 0)),
4894 make_tree (type, XEXP (x, 1)));
4895
4896 case NEG:
4897 return fold_build1 (NEGATE_EXPR, type, make_tree (type, XEXP (x, 0)));
4898
4899 case MULT:
4900 return fold_build2 (MULT_EXPR, type, make_tree (type, XEXP (x, 0)),
4901 make_tree (type, XEXP (x, 1)));
4902
4903 case ASHIFT:
4904 return fold_build2 (LSHIFT_EXPR, type, make_tree (type, XEXP (x, 0)),
4905 make_tree (type, XEXP (x, 1)));
4906
4907 case LSHIFTRT:
4908 t = unsigned_type_for (type);
4909 return fold_convert (type, build2 (RSHIFT_EXPR, t,
4910 make_tree (t, XEXP (x, 0)),
4911 make_tree (type, XEXP (x, 1))));
4912
4913 case ASHIFTRT:
4914 t = signed_type_for (type);
4915 return fold_convert (type, build2 (RSHIFT_EXPR, t,
4916 make_tree (t, XEXP (x, 0)),
4917 make_tree (type, XEXP (x, 1))));
4918
4919 case DIV:
4920 if (TREE_CODE (type) != REAL_TYPE)
4921 t = signed_type_for (type);
4922 else
4923 t = type;
4924
4925 return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
4926 make_tree (t, XEXP (x, 0)),
4927 make_tree (t, XEXP (x, 1))));
4928 case UDIV:
4929 t = unsigned_type_for (type);
4930 return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
4931 make_tree (t, XEXP (x, 0)),
4932 make_tree (t, XEXP (x, 1))));
4933
4934 case SIGN_EXTEND:
4935 case ZERO_EXTEND:
4936 t = lang_hooks.types.type_for_mode (GET_MODE (XEXP (x, 0)),
4937 GET_CODE (x) == ZERO_EXTEND);
4938 return fold_convert (type, make_tree (t, XEXP (x, 0)));
4939
4940 case CONST:
4941 return make_tree (type, XEXP (x, 0));
4942
4943 case SYMBOL_REF:
4944 t = SYMBOL_REF_DECL (x);
4945 if (t)
4946 return fold_convert (type, build_fold_addr_expr (t));
4947 /* else fall through. */
4948
4949 default:
4950 t = build_decl (VAR_DECL, NULL_TREE, type);
4951
4952 /* If TYPE is a POINTER_TYPE, X might be Pmode with TYPE_MODE being
4953 ptr_mode. So convert. */
4954 if (POINTER_TYPE_P (type))
4955 x = convert_memory_address (TYPE_MODE (type), x);
4956
4957 /* Note that we do *not* use SET_DECL_RTL here, because we do not
4958 want set_decl_rtl to go adjusting REG_ATTRS for this temporary. */
4959 t->decl_with_rtl.rtl = x;
4960
4961 return t;
4962 }
4963 }
4964 \f
4965 /* Compute the logical-and of OP0 and OP1, storing it in TARGET
4966 and returning TARGET.
4967
4968 If TARGET is 0, a pseudo-register or constant is returned. */
4969
4970 rtx
4971 expand_and (enum machine_mode mode, rtx op0, rtx op1, rtx target)
4972 {
4973 rtx tem = 0;
4974
4975 if (GET_MODE (op0) == VOIDmode && GET_MODE (op1) == VOIDmode)
4976 tem = simplify_binary_operation (AND, mode, op0, op1);
4977 if (tem == 0)
4978 tem = expand_binop (mode, and_optab, op0, op1, target, 0, OPTAB_LIB_WIDEN);
4979
4980 if (target == 0)
4981 target = tem;
4982 else if (tem != target)
4983 emit_move_insn (target, tem);
4984 return target;
4985 }
4986 \f
4987 /* Helper function for emit_store_flag. */
4988 static rtx
4989 emit_store_flag_1 (rtx target, rtx subtarget, enum machine_mode mode,
4990 int normalizep)
4991 {
4992 rtx op0;
4993 enum machine_mode target_mode = GET_MODE (target);
4994
4995 /* If we are converting to a wider mode, first convert to
4996 TARGET_MODE, then normalize. This produces better combining
4997 opportunities on machines that have a SIGN_EXTRACT when we are
4998 testing a single bit. This mostly benefits the 68k.
4999
5000 If STORE_FLAG_VALUE does not have the sign bit set when
5001 interpreted in MODE, we can do this conversion as unsigned, which
5002 is usually more efficient. */
5003 if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (mode))
5004 {
5005 convert_move (target, subtarget,
5006 (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
5007 && 0 == (STORE_FLAG_VALUE
5008 & ((HOST_WIDE_INT) 1
5009 << (GET_MODE_BITSIZE (mode) -1))));
5010 op0 = target;
5011 mode = target_mode;
5012 }
5013 else
5014 op0 = subtarget;
5015
5016 /* If we want to keep subexpressions around, don't reuse our last
5017 target. */
5018 if (optimize)
5019 subtarget = 0;
5020
5021 /* Now normalize to the proper value in MODE. Sometimes we don't
5022 have to do anything. */
5023 if (normalizep == 0 || normalizep == STORE_FLAG_VALUE)
5024 ;
5025 /* STORE_FLAG_VALUE might be the most negative number, so write
5026 the comparison this way to avoid a compiler-time warning. */
5027 else if (- normalizep == STORE_FLAG_VALUE)
5028 op0 = expand_unop (mode, neg_optab, op0, subtarget, 0);
5029
5030 /* We don't want to use STORE_FLAG_VALUE < 0 below since this makes
5031 it hard to use a value of just the sign bit due to ANSI integer
5032 constant typing rules. */
5033 else if (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT
5034 && (STORE_FLAG_VALUE
5035 & ((HOST_WIDE_INT) 1 << (GET_MODE_BITSIZE (mode) - 1))))
5036 op0 = expand_shift (RSHIFT_EXPR, mode, op0,
5037 size_int (GET_MODE_BITSIZE (mode) - 1), subtarget,
5038 normalizep == 1);
5039 else
5040 {
5041 gcc_assert (STORE_FLAG_VALUE & 1);
5042
5043 op0 = expand_and (mode, op0, const1_rtx, subtarget);
5044 if (normalizep == -1)
5045 op0 = expand_unop (mode, neg_optab, op0, op0, 0);
5046 }
5047
5048 /* If we were converting to a smaller mode, do the conversion now. */
5049 if (target_mode != mode)
5050 {
5051 convert_move (target, op0, 0);
5052 return target;
5053 }
5054 else
5055 return op0;
5056 }
5057
5058 /* Emit a store-flags instruction for comparison CODE on OP0 and OP1
5059 and storing in TARGET. Normally return TARGET.
5060 Return 0 if that cannot be done.
5061
5062 MODE is the mode to use for OP0 and OP1 should they be CONST_INTs. If
5063 it is VOIDmode, they cannot both be CONST_INT.
5064
5065 UNSIGNEDP is for the case where we have to widen the operands
5066 to perform the operation. It says to use zero-extension.
5067
5068 NORMALIZEP is 1 if we should convert the result to be either zero
5069 or one. Normalize is -1 if we should convert the result to be
5070 either zero or -1. If NORMALIZEP is zero, the result will be left
5071 "raw" out of the scc insn. */
5072
5073 rtx
5074 emit_store_flag (rtx target, enum rtx_code code, rtx op0, rtx op1,
5075 enum machine_mode mode, int unsignedp, int normalizep)
5076 {
5077 rtx subtarget;
5078 enum insn_code icode;
5079 enum machine_mode compare_mode;
5080 enum machine_mode target_mode = GET_MODE (target);
5081 rtx tem;
5082 rtx last = get_last_insn ();
5083 rtx pattern, comparison;
5084
5085 if (unsignedp)
5086 code = unsigned_condition (code);
5087
5088 /* If one operand is constant, make it the second one. Only do this
5089 if the other operand is not constant as well. */
5090
5091 if (swap_commutative_operands_p (op0, op1))
5092 {
5093 tem = op0;
5094 op0 = op1;
5095 op1 = tem;
5096 code = swap_condition (code);
5097 }
5098
5099 if (mode == VOIDmode)
5100 mode = GET_MODE (op0);
5101
5102 /* For some comparisons with 1 and -1, we can convert this to
5103 comparisons with zero. This will often produce more opportunities for
5104 store-flag insns. */
5105
5106 switch (code)
5107 {
5108 case LT:
5109 if (op1 == const1_rtx)
5110 op1 = const0_rtx, code = LE;
5111 break;
5112 case LE:
5113 if (op1 == constm1_rtx)
5114 op1 = const0_rtx, code = LT;
5115 break;
5116 case GE:
5117 if (op1 == const1_rtx)
5118 op1 = const0_rtx, code = GT;
5119 break;
5120 case GT:
5121 if (op1 == constm1_rtx)
5122 op1 = const0_rtx, code = GE;
5123 break;
5124 case GEU:
5125 if (op1 == const1_rtx)
5126 op1 = const0_rtx, code = NE;
5127 break;
5128 case LTU:
5129 if (op1 == const1_rtx)
5130 op1 = const0_rtx, code = EQ;
5131 break;
5132 default:
5133 break;
5134 }
5135
5136 /* If we are comparing a double-word integer with zero or -1, we can
5137 convert the comparison into one involving a single word. */
5138 if (GET_MODE_BITSIZE (mode) == BITS_PER_WORD * 2
5139 && GET_MODE_CLASS (mode) == MODE_INT
5140 && (!MEM_P (op0) || ! MEM_VOLATILE_P (op0)))
5141 {
5142 if ((code == EQ || code == NE)
5143 && (op1 == const0_rtx || op1 == constm1_rtx))
5144 {
5145 rtx op00, op01, op0both;
5146
5147 /* Do a logical OR or AND of the two words and compare the
5148 result. */
5149 op00 = simplify_gen_subreg (word_mode, op0, mode, 0);
5150 op01 = simplify_gen_subreg (word_mode, op0, mode, UNITS_PER_WORD);
5151 op0both = expand_binop (word_mode,
5152 op1 == const0_rtx ? ior_optab : and_optab,
5153 op00, op01, NULL_RTX, unsignedp,
5154 OPTAB_DIRECT);
5155
5156 if (op0both != 0)
5157 return emit_store_flag (target, code, op0both, op1, word_mode,
5158 unsignedp, normalizep);
5159 }
5160 else if ((code == LT || code == GE) && op1 == const0_rtx)
5161 {
5162 rtx op0h;
5163
5164 /* If testing the sign bit, can just test on high word. */
5165 op0h = simplify_gen_subreg (word_mode, op0, mode,
5166 subreg_highpart_offset (word_mode,
5167 mode));
5168 return emit_store_flag (target, code, op0h, op1, word_mode,
5169 unsignedp, normalizep);
5170 }
5171 }
5172
5173 /* If this is A < 0 or A >= 0, we can do this by taking the ones
5174 complement of A (for GE) and shifting the sign bit to the low bit. */
5175 if (op1 == const0_rtx && (code == LT || code == GE)
5176 && GET_MODE_CLASS (mode) == MODE_INT
5177 && (normalizep || STORE_FLAG_VALUE == 1
5178 || (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT
5179 && ((STORE_FLAG_VALUE & GET_MODE_MASK (mode))
5180 == ((unsigned HOST_WIDE_INT) 1
5181 << (GET_MODE_BITSIZE (mode) - 1))))))
5182 {
5183 subtarget = target;
5184
5185 /* If the result is to be wider than OP0, it is best to convert it
5186 first. If it is to be narrower, it is *incorrect* to convert it
5187 first. */
5188 if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (mode))
5189 {
5190 op0 = convert_modes (target_mode, mode, op0, 0);
5191 mode = target_mode;
5192 }
5193
5194 if (target_mode != mode)
5195 subtarget = 0;
5196
5197 if (code == GE)
5198 op0 = expand_unop (mode, one_cmpl_optab, op0,
5199 ((STORE_FLAG_VALUE == 1 || normalizep)
5200 ? 0 : subtarget), 0);
5201
5202 if (STORE_FLAG_VALUE == 1 || normalizep)
5203 /* If we are supposed to produce a 0/1 value, we want to do
5204 a logical shift from the sign bit to the low-order bit; for
5205 a -1/0 value, we do an arithmetic shift. */
5206 op0 = expand_shift (RSHIFT_EXPR, mode, op0,
5207 size_int (GET_MODE_BITSIZE (mode) - 1),
5208 subtarget, normalizep != -1);
5209
5210 if (mode != target_mode)
5211 op0 = convert_modes (target_mode, mode, op0, 0);
5212
5213 return op0;
5214 }
5215
5216 icode = setcc_gen_code[(int) code];
5217
5218 if (icode != CODE_FOR_nothing)
5219 {
5220 insn_operand_predicate_fn pred;
5221
5222 /* We think we may be able to do this with a scc insn. Emit the
5223 comparison and then the scc insn. */
5224
5225 do_pending_stack_adjust ();
5226 last = get_last_insn ();
5227
5228 comparison
5229 = compare_from_rtx (op0, op1, code, unsignedp, mode, NULL_RTX);
5230 if (CONSTANT_P (comparison))
5231 {
5232 switch (GET_CODE (comparison))
5233 {
5234 case CONST_INT:
5235 if (comparison == const0_rtx)
5236 return const0_rtx;
5237 break;
5238
5239 #ifdef FLOAT_STORE_FLAG_VALUE
5240 case CONST_DOUBLE:
5241 if (comparison == CONST0_RTX (GET_MODE (comparison)))
5242 return const0_rtx;
5243 break;
5244 #endif
5245 default:
5246 gcc_unreachable ();
5247 }
5248
5249 if (normalizep == 1)
5250 return const1_rtx;
5251 if (normalizep == -1)
5252 return constm1_rtx;
5253 return const_true_rtx;
5254 }
5255
5256 /* The code of COMPARISON may not match CODE if compare_from_rtx
5257 decided to swap its operands and reverse the original code.
5258
5259 We know that compare_from_rtx returns either a CONST_INT or
5260 a new comparison code, so it is safe to just extract the
5261 code from COMPARISON. */
5262 code = GET_CODE (comparison);
5263
5264 /* Get a reference to the target in the proper mode for this insn. */
5265 compare_mode = insn_data[(int) icode].operand[0].mode;
5266 subtarget = target;
5267 pred = insn_data[(int) icode].operand[0].predicate;
5268 if (optimize || ! (*pred) (subtarget, compare_mode))
5269 subtarget = gen_reg_rtx (compare_mode);
5270
5271 pattern = GEN_FCN (icode) (subtarget);
5272 if (pattern)
5273 {
5274 emit_insn (pattern);
5275 return emit_store_flag_1 (target, subtarget, compare_mode,
5276 normalizep);
5277 }
5278 }
5279 else
5280 {
5281 /* We don't have an scc insn, so try a cstore insn. */
5282
5283 for (compare_mode = mode; compare_mode != VOIDmode;
5284 compare_mode = GET_MODE_WIDER_MODE (compare_mode))
5285 {
5286 icode = optab_handler (cstore_optab, compare_mode)->insn_code;
5287 if (icode != CODE_FOR_nothing)
5288 break;
5289 }
5290
5291 if (icode != CODE_FOR_nothing)
5292 {
5293 enum machine_mode result_mode
5294 = insn_data[(int) icode].operand[0].mode;
5295 rtx cstore_op0 = op0;
5296 rtx cstore_op1 = op1;
5297
5298 do_pending_stack_adjust ();
5299 last = get_last_insn ();
5300
5301 if (compare_mode != mode)
5302 {
5303 cstore_op0 = convert_modes (compare_mode, mode, cstore_op0,
5304 unsignedp);
5305 cstore_op1 = convert_modes (compare_mode, mode, cstore_op1,
5306 unsignedp);
5307 }
5308
5309 if (!insn_data[(int) icode].operand[2].predicate (cstore_op0,
5310 compare_mode))
5311 cstore_op0 = copy_to_mode_reg (compare_mode, cstore_op0);
5312
5313 if (!insn_data[(int) icode].operand[3].predicate (cstore_op1,
5314 compare_mode))
5315 cstore_op1 = copy_to_mode_reg (compare_mode, cstore_op1);
5316
5317 comparison = gen_rtx_fmt_ee (code, result_mode, cstore_op0,
5318 cstore_op1);
5319 subtarget = target;
5320
5321 if (optimize || !(insn_data[(int) icode].operand[0].predicate
5322 (subtarget, result_mode)))
5323 subtarget = gen_reg_rtx (result_mode);
5324
5325 pattern = GEN_FCN (icode) (subtarget, comparison, cstore_op0,
5326 cstore_op1);
5327
5328 if (pattern)
5329 {
5330 emit_insn (pattern);
5331 return emit_store_flag_1 (target, subtarget, result_mode,
5332 normalizep);
5333 }
5334 }
5335 }
5336
5337 delete_insns_since (last);
5338
5339 /* If optimizing, use different pseudo registers for each insn, instead
5340 of reusing the same pseudo. This leads to better CSE, but slows
5341 down the compiler, since there are more pseudos */
5342 subtarget = (!optimize
5343 && (target_mode == mode)) ? target : NULL_RTX;
5344
5345 /* If we reached here, we can't do this with a scc insn. However, there
5346 are some comparisons that can be done directly. For example, if
5347 this is an equality comparison of integers, we can try to exclusive-or
5348 (or subtract) the two operands and use a recursive call to try the
5349 comparison with zero. Don't do any of these cases if branches are
5350 very cheap. */
5351
5352 if (BRANCH_COST (optimize_insn_for_speed_p (),
5353 false) > 0
5354 && GET_MODE_CLASS (mode) == MODE_INT && (code == EQ || code == NE)
5355 && op1 != const0_rtx)
5356 {
5357 tem = expand_binop (mode, xor_optab, op0, op1, subtarget, 1,
5358 OPTAB_WIDEN);
5359
5360 if (tem == 0)
5361 tem = expand_binop (mode, sub_optab, op0, op1, subtarget, 1,
5362 OPTAB_WIDEN);
5363 if (tem != 0)
5364 tem = emit_store_flag (target, code, tem, const0_rtx,
5365 mode, unsignedp, normalizep);
5366 if (tem == 0)
5367 delete_insns_since (last);
5368 return tem;
5369 }
5370
5371 /* Some other cases we can do are EQ, NE, LE, and GT comparisons with
5372 the constant zero. Reject all other comparisons at this point. Only
5373 do LE and GT if branches are expensive since they are expensive on
5374 2-operand machines. */
5375
5376 if (BRANCH_COST (optimize_insn_for_speed_p (),
5377 false) == 0
5378 || GET_MODE_CLASS (mode) != MODE_INT || op1 != const0_rtx
5379 || (code != EQ && code != NE
5380 && (BRANCH_COST (optimize_insn_for_speed_p (),
5381 false) <= 1 || (code != LE && code != GT))))
5382 return 0;
5383
5384 /* See what we need to return. We can only return a 1, -1, or the
5385 sign bit. */
5386
5387 if (normalizep == 0)
5388 {
5389 if (STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1)
5390 normalizep = STORE_FLAG_VALUE;
5391
5392 else if (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT
5393 && ((STORE_FLAG_VALUE & GET_MODE_MASK (mode))
5394 == (unsigned HOST_WIDE_INT) 1 << (GET_MODE_BITSIZE (mode) - 1)))
5395 ;
5396 else
5397 return 0;
5398 }
5399
5400 /* Try to put the result of the comparison in the sign bit. Assume we can't
5401 do the necessary operation below. */
5402
5403 tem = 0;
5404
5405 /* To see if A <= 0, compute (A | (A - 1)). A <= 0 iff that result has
5406 the sign bit set. */
5407
5408 if (code == LE)
5409 {
5410 /* This is destructive, so SUBTARGET can't be OP0. */
5411 if (rtx_equal_p (subtarget, op0))
5412 subtarget = 0;
5413
5414 tem = expand_binop (mode, sub_optab, op0, const1_rtx, subtarget, 0,
5415 OPTAB_WIDEN);
5416 if (tem)
5417 tem = expand_binop (mode, ior_optab, op0, tem, subtarget, 0,
5418 OPTAB_WIDEN);
5419 }
5420
5421 /* To see if A > 0, compute (((signed) A) << BITS) - A, where BITS is the
5422 number of bits in the mode of OP0, minus one. */
5423
5424 if (code == GT)
5425 {
5426 if (rtx_equal_p (subtarget, op0))
5427 subtarget = 0;
5428
5429 tem = expand_shift (RSHIFT_EXPR, mode, op0,
5430 size_int (GET_MODE_BITSIZE (mode) - 1),
5431 subtarget, 0);
5432 tem = expand_binop (mode, sub_optab, tem, op0, subtarget, 0,
5433 OPTAB_WIDEN);
5434 }
5435
5436 if (code == EQ || code == NE)
5437 {
5438 /* For EQ or NE, one way to do the comparison is to apply an operation
5439 that converts the operand into a positive number if it is nonzero
5440 or zero if it was originally zero. Then, for EQ, we subtract 1 and
5441 for NE we negate. This puts the result in the sign bit. Then we
5442 normalize with a shift, if needed.
5443
5444 Two operations that can do the above actions are ABS and FFS, so try
5445 them. If that doesn't work, and MODE is smaller than a full word,
5446 we can use zero-extension to the wider mode (an unsigned conversion)
5447 as the operation. */
5448
5449 /* Note that ABS doesn't yield a positive number for INT_MIN, but
5450 that is compensated by the subsequent overflow when subtracting
5451 one / negating. */
5452
5453 if (optab_handler (abs_optab, mode)->insn_code != CODE_FOR_nothing)
5454 tem = expand_unop (mode, abs_optab, op0, subtarget, 1);
5455 else if (optab_handler (ffs_optab, mode)->insn_code != CODE_FOR_nothing)
5456 tem = expand_unop (mode, ffs_optab, op0, subtarget, 1);
5457 else if (GET_MODE_SIZE (mode) < UNITS_PER_WORD)
5458 {
5459 tem = convert_modes (word_mode, mode, op0, 1);
5460 mode = word_mode;
5461 }
5462
5463 if (tem != 0)
5464 {
5465 if (code == EQ)
5466 tem = expand_binop (mode, sub_optab, tem, const1_rtx, subtarget,
5467 0, OPTAB_WIDEN);
5468 else
5469 tem = expand_unop (mode, neg_optab, tem, subtarget, 0);
5470 }
5471
5472 /* If we couldn't do it that way, for NE we can "or" the two's complement
5473 of the value with itself. For EQ, we take the one's complement of
5474 that "or", which is an extra insn, so we only handle EQ if branches
5475 are expensive. */
5476
5477 if (tem == 0
5478 && (code == NE
5479 || BRANCH_COST (optimize_insn_for_speed_p (),
5480 false) > 1))
5481 {
5482 if (rtx_equal_p (subtarget, op0))
5483 subtarget = 0;
5484
5485 tem = expand_unop (mode, neg_optab, op0, subtarget, 0);
5486 tem = expand_binop (mode, ior_optab, tem, op0, subtarget, 0,
5487 OPTAB_WIDEN);
5488
5489 if (tem && code == EQ)
5490 tem = expand_unop (mode, one_cmpl_optab, tem, subtarget, 0);
5491 }
5492 }
5493
5494 if (tem && normalizep)
5495 tem = expand_shift (RSHIFT_EXPR, mode, tem,
5496 size_int (GET_MODE_BITSIZE (mode) - 1),
5497 subtarget, normalizep == 1);
5498
5499 if (tem)
5500 {
5501 if (GET_MODE (tem) != target_mode)
5502 {
5503 convert_move (target, tem, 0);
5504 tem = target;
5505 }
5506 else if (!subtarget)
5507 {
5508 emit_move_insn (target, tem);
5509 tem = target;
5510 }
5511 }
5512 else
5513 delete_insns_since (last);
5514
5515 return tem;
5516 }
5517
5518 /* Like emit_store_flag, but always succeeds. */
5519
5520 rtx
5521 emit_store_flag_force (rtx target, enum rtx_code code, rtx op0, rtx op1,
5522 enum machine_mode mode, int unsignedp, int normalizep)
5523 {
5524 rtx tem, label;
5525
5526 /* First see if emit_store_flag can do the job. */
5527 tem = emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep);
5528 if (tem != 0)
5529 return tem;
5530
5531 if (normalizep == 0)
5532 normalizep = 1;
5533
5534 /* If this failed, we have to do this with set/compare/jump/set code. */
5535
5536 if (!REG_P (target)
5537 || reg_mentioned_p (target, op0) || reg_mentioned_p (target, op1))
5538 target = gen_reg_rtx (GET_MODE (target));
5539
5540 emit_move_insn (target, const1_rtx);
5541 label = gen_label_rtx ();
5542 do_compare_rtx_and_jump (op0, op1, code, unsignedp, mode, NULL_RTX,
5543 NULL_RTX, label);
5544
5545 emit_move_insn (target, const0_rtx);
5546 emit_label (label);
5547
5548 return target;
5549 }
5550 \f
5551 /* Perform possibly multi-word comparison and conditional jump to LABEL
5552 if ARG1 OP ARG2 true where ARG1 and ARG2 are of mode MODE. This is
5553 now a thin wrapper around do_compare_rtx_and_jump. */
5554
5555 static void
5556 do_cmp_and_jump (rtx arg1, rtx arg2, enum rtx_code op, enum machine_mode mode,
5557 rtx label)
5558 {
5559 int unsignedp = (op == LTU || op == LEU || op == GTU || op == GEU);
5560 do_compare_rtx_and_jump (arg1, arg2, op, unsignedp, mode,
5561 NULL_RTX, NULL_RTX, label);
5562 }