defaults.h (SLOW_UNALIGNED_ACCESS): Provide default definition.
[gcc.git] / gcc / expmed.c
1 /* Medium-level subroutines: convert bit-field store and extract
2 and shifts, multiplies and divides to rtl instructions.
3 Copyright (C) 1987, 1988, 1989, 1992, 1993, 1994, 1995, 1996, 1997, 1998,
4 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010,
5 2011, 2012
6 Free Software Foundation, Inc.
7
8 This file is part of GCC.
9
10 GCC is free software; you can redistribute it and/or modify it under
11 the terms of the GNU General Public License as published by the Free
12 Software Foundation; either version 3, or (at your option) any later
13 version.
14
15 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
16 WARRANTY; without even the implied warranty of MERCHANTABILITY or
17 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 for more details.
19
20 You should have received a copy of the GNU General Public License
21 along with GCC; see the file COPYING3. If not see
22 <http://www.gnu.org/licenses/>. */
23
24
25 #include "config.h"
26 #include "system.h"
27 #include "coretypes.h"
28 #include "tm.h"
29 #include "diagnostic-core.h"
30 #include "rtl.h"
31 #include "tree.h"
32 #include "tm_p.h"
33 #include "flags.h"
34 #include "insn-config.h"
35 #include "expr.h"
36 #include "optabs.h"
37 #include "recog.h"
38 #include "langhooks.h"
39 #include "df.h"
40 #include "target.h"
41 #include "expmed.h"
42
43 struct target_expmed default_target_expmed;
44 #if SWITCHABLE_TARGET
45 struct target_expmed *this_target_expmed = &default_target_expmed;
46 #endif
47
48 static void store_fixed_bit_field (rtx, unsigned HOST_WIDE_INT,
49 unsigned HOST_WIDE_INT,
50 unsigned HOST_WIDE_INT,
51 unsigned HOST_WIDE_INT,
52 rtx);
53 static void store_split_bit_field (rtx, unsigned HOST_WIDE_INT,
54 unsigned HOST_WIDE_INT,
55 unsigned HOST_WIDE_INT,
56 unsigned HOST_WIDE_INT,
57 rtx);
58 static rtx extract_fixed_bit_field (enum machine_mode, rtx,
59 unsigned HOST_WIDE_INT,
60 unsigned HOST_WIDE_INT, rtx, int, bool);
61 static rtx mask_rtx (enum machine_mode, int, int, int);
62 static rtx lshift_value (enum machine_mode, rtx, int, int);
63 static rtx extract_split_bit_field (rtx, unsigned HOST_WIDE_INT,
64 unsigned HOST_WIDE_INT, int);
65 static void do_cmp_and_jump (rtx, rtx, enum rtx_code, enum machine_mode, rtx);
66 static rtx expand_smod_pow2 (enum machine_mode, rtx, HOST_WIDE_INT);
67 static rtx expand_sdiv_pow2 (enum machine_mode, rtx, HOST_WIDE_INT);
68
69 /* Test whether a value is zero of a power of two. */
70 #define EXACT_POWER_OF_2_OR_ZERO_P(x) (((x) & ((x) - 1)) == 0)
71
72 /* Reduce conditional compilation elsewhere. */
73 #ifndef HAVE_insv
74 #define HAVE_insv 0
75 #define CODE_FOR_insv CODE_FOR_nothing
76 #define gen_insv(a,b,c,d) NULL_RTX
77 #endif
78 #ifndef HAVE_extv
79 #define HAVE_extv 0
80 #define CODE_FOR_extv CODE_FOR_nothing
81 #define gen_extv(a,b,c,d) NULL_RTX
82 #endif
83 #ifndef HAVE_extzv
84 #define HAVE_extzv 0
85 #define CODE_FOR_extzv CODE_FOR_nothing
86 #define gen_extzv(a,b,c,d) NULL_RTX
87 #endif
88
89 struct init_expmed_rtl
90 {
91 struct rtx_def reg; rtunion reg_fld[2];
92 struct rtx_def plus; rtunion plus_fld1;
93 struct rtx_def neg;
94 struct rtx_def mult; rtunion mult_fld1;
95 struct rtx_def sdiv; rtunion sdiv_fld1;
96 struct rtx_def udiv; rtunion udiv_fld1;
97 struct rtx_def sdiv_32; rtunion sdiv_32_fld1;
98 struct rtx_def smod_32; rtunion smod_32_fld1;
99 struct rtx_def wide_mult; rtunion wide_mult_fld1;
100 struct rtx_def wide_lshr; rtunion wide_lshr_fld1;
101 struct rtx_def wide_trunc;
102 struct rtx_def shift; rtunion shift_fld1;
103 struct rtx_def shift_mult; rtunion shift_mult_fld1;
104 struct rtx_def shift_add; rtunion shift_add_fld1;
105 struct rtx_def shift_sub0; rtunion shift_sub0_fld1;
106 struct rtx_def shift_sub1; rtunion shift_sub1_fld1;
107 struct rtx_def zext;
108 struct rtx_def trunc;
109
110 rtx pow2[MAX_BITS_PER_WORD];
111 rtx cint[MAX_BITS_PER_WORD];
112 };
113
114 static void
115 init_expmed_one_conv (struct init_expmed_rtl *all, enum machine_mode to_mode,
116 enum machine_mode from_mode, bool speed)
117 {
118 int to_size, from_size;
119 rtx which;
120
121 /* We're given no information about the true size of a partial integer,
122 only the size of the "full" integer it requires for storage. For
123 comparison purposes here, reduce the bit size by one in that case. */
124 to_size = (GET_MODE_BITSIZE (to_mode)
125 - (GET_MODE_CLASS (to_mode) == MODE_PARTIAL_INT));
126 from_size = (GET_MODE_BITSIZE (from_mode)
127 - (GET_MODE_CLASS (from_mode) == MODE_PARTIAL_INT));
128
129 /* Assume cost of zero-extend and sign-extend is the same. */
130 which = (to_size < from_size ? &all->trunc : &all->zext);
131
132 PUT_MODE (&all->reg, from_mode);
133 set_convert_cost (to_mode, from_mode, speed, set_src_cost (which, speed));
134 }
135
136 static void
137 init_expmed_one_mode (struct init_expmed_rtl *all,
138 enum machine_mode mode, int speed)
139 {
140 int m, n, mode_bitsize;
141 enum machine_mode mode_from;
142
143 mode_bitsize = GET_MODE_UNIT_BITSIZE (mode);
144
145 PUT_MODE (&all->reg, mode);
146 PUT_MODE (&all->plus, mode);
147 PUT_MODE (&all->neg, mode);
148 PUT_MODE (&all->mult, mode);
149 PUT_MODE (&all->sdiv, mode);
150 PUT_MODE (&all->udiv, mode);
151 PUT_MODE (&all->sdiv_32, mode);
152 PUT_MODE (&all->smod_32, mode);
153 PUT_MODE (&all->wide_trunc, mode);
154 PUT_MODE (&all->shift, mode);
155 PUT_MODE (&all->shift_mult, mode);
156 PUT_MODE (&all->shift_add, mode);
157 PUT_MODE (&all->shift_sub0, mode);
158 PUT_MODE (&all->shift_sub1, mode);
159 PUT_MODE (&all->zext, mode);
160 PUT_MODE (&all->trunc, mode);
161
162 set_add_cost (speed, mode, set_src_cost (&all->plus, speed));
163 set_neg_cost (speed, mode, set_src_cost (&all->neg, speed));
164 set_mul_cost (speed, mode, set_src_cost (&all->mult, speed));
165 set_sdiv_cost (speed, mode, set_src_cost (&all->sdiv, speed));
166 set_udiv_cost (speed, mode, set_src_cost (&all->udiv, speed));
167
168 set_sdiv_pow2_cheap (speed, mode, (set_src_cost (&all->sdiv_32, speed)
169 <= 2 * add_cost (speed, mode)));
170 set_smod_pow2_cheap (speed, mode, (set_src_cost (&all->smod_32, speed)
171 <= 4 * add_cost (speed, mode)));
172
173 set_shift_cost (speed, mode, 0, 0);
174 {
175 int cost = add_cost (speed, mode);
176 set_shiftadd_cost (speed, mode, 0, cost);
177 set_shiftsub0_cost (speed, mode, 0, cost);
178 set_shiftsub1_cost (speed, mode, 0, cost);
179 }
180
181 n = MIN (MAX_BITS_PER_WORD, mode_bitsize);
182 for (m = 1; m < n; m++)
183 {
184 XEXP (&all->shift, 1) = all->cint[m];
185 XEXP (&all->shift_mult, 1) = all->pow2[m];
186
187 set_shift_cost (speed, mode, m, set_src_cost (&all->shift, speed));
188 set_shiftadd_cost (speed, mode, m, set_src_cost (&all->shift_add, speed));
189 set_shiftsub0_cost (speed, mode, m, set_src_cost (&all->shift_sub0, speed));
190 set_shiftsub1_cost (speed, mode, m, set_src_cost (&all->shift_sub1, speed));
191 }
192
193 if (SCALAR_INT_MODE_P (mode))
194 {
195 for (mode_from = MIN_MODE_INT; mode_from <= MAX_MODE_INT;
196 mode_from = (enum machine_mode)(mode_from + 1))
197 init_expmed_one_conv (all, mode, mode_from, speed);
198 }
199 if (GET_MODE_CLASS (mode) == MODE_INT)
200 {
201 enum machine_mode wider_mode = GET_MODE_WIDER_MODE (mode);
202 if (wider_mode != VOIDmode)
203 {
204 PUT_MODE (&all->zext, wider_mode);
205 PUT_MODE (&all->wide_mult, wider_mode);
206 PUT_MODE (&all->wide_lshr, wider_mode);
207 XEXP (&all->wide_lshr, 1) = GEN_INT (mode_bitsize);
208
209 set_mul_widen_cost (speed, wider_mode,
210 set_src_cost (&all->wide_mult, speed));
211 set_mul_highpart_cost (speed, mode,
212 set_src_cost (&all->wide_trunc, speed));
213 }
214 }
215 }
216
217 void
218 init_expmed (void)
219 {
220 struct init_expmed_rtl all;
221 enum machine_mode mode;
222 int m, speed;
223
224 memset (&all, 0, sizeof all);
225 for (m = 1; m < MAX_BITS_PER_WORD; m++)
226 {
227 all.pow2[m] = GEN_INT ((HOST_WIDE_INT) 1 << m);
228 all.cint[m] = GEN_INT (m);
229 }
230
231 PUT_CODE (&all.reg, REG);
232 /* Avoid using hard regs in ways which may be unsupported. */
233 SET_REGNO (&all.reg, LAST_VIRTUAL_REGISTER + 1);
234
235 PUT_CODE (&all.plus, PLUS);
236 XEXP (&all.plus, 0) = &all.reg;
237 XEXP (&all.plus, 1) = &all.reg;
238
239 PUT_CODE (&all.neg, NEG);
240 XEXP (&all.neg, 0) = &all.reg;
241
242 PUT_CODE (&all.mult, MULT);
243 XEXP (&all.mult, 0) = &all.reg;
244 XEXP (&all.mult, 1) = &all.reg;
245
246 PUT_CODE (&all.sdiv, DIV);
247 XEXP (&all.sdiv, 0) = &all.reg;
248 XEXP (&all.sdiv, 1) = &all.reg;
249
250 PUT_CODE (&all.udiv, UDIV);
251 XEXP (&all.udiv, 0) = &all.reg;
252 XEXP (&all.udiv, 1) = &all.reg;
253
254 PUT_CODE (&all.sdiv_32, DIV);
255 XEXP (&all.sdiv_32, 0) = &all.reg;
256 XEXP (&all.sdiv_32, 1) = 32 < MAX_BITS_PER_WORD ? all.cint[32] : GEN_INT (32);
257
258 PUT_CODE (&all.smod_32, MOD);
259 XEXP (&all.smod_32, 0) = &all.reg;
260 XEXP (&all.smod_32, 1) = XEXP (&all.sdiv_32, 1);
261
262 PUT_CODE (&all.zext, ZERO_EXTEND);
263 XEXP (&all.zext, 0) = &all.reg;
264
265 PUT_CODE (&all.wide_mult, MULT);
266 XEXP (&all.wide_mult, 0) = &all.zext;
267 XEXP (&all.wide_mult, 1) = &all.zext;
268
269 PUT_CODE (&all.wide_lshr, LSHIFTRT);
270 XEXP (&all.wide_lshr, 0) = &all.wide_mult;
271
272 PUT_CODE (&all.wide_trunc, TRUNCATE);
273 XEXP (&all.wide_trunc, 0) = &all.wide_lshr;
274
275 PUT_CODE (&all.shift, ASHIFT);
276 XEXP (&all.shift, 0) = &all.reg;
277
278 PUT_CODE (&all.shift_mult, MULT);
279 XEXP (&all.shift_mult, 0) = &all.reg;
280
281 PUT_CODE (&all.shift_add, PLUS);
282 XEXP (&all.shift_add, 0) = &all.shift_mult;
283 XEXP (&all.shift_add, 1) = &all.reg;
284
285 PUT_CODE (&all.shift_sub0, MINUS);
286 XEXP (&all.shift_sub0, 0) = &all.shift_mult;
287 XEXP (&all.shift_sub0, 1) = &all.reg;
288
289 PUT_CODE (&all.shift_sub1, MINUS);
290 XEXP (&all.shift_sub1, 0) = &all.reg;
291 XEXP (&all.shift_sub1, 1) = &all.shift_mult;
292
293 PUT_CODE (&all.trunc, TRUNCATE);
294 XEXP (&all.trunc, 0) = &all.reg;
295
296 for (speed = 0; speed < 2; speed++)
297 {
298 crtl->maybe_hot_insn_p = speed;
299 set_zero_cost (speed, set_src_cost (const0_rtx, speed));
300
301 for (mode = MIN_MODE_INT; mode <= MAX_MODE_INT;
302 mode = (enum machine_mode)(mode + 1))
303 init_expmed_one_mode (&all, mode, speed);
304
305 if (MIN_MODE_PARTIAL_INT != VOIDmode)
306 for (mode = MIN_MODE_PARTIAL_INT; mode <= MAX_MODE_PARTIAL_INT;
307 mode = (enum machine_mode)(mode + 1))
308 init_expmed_one_mode (&all, mode, speed);
309
310 if (MIN_MODE_VECTOR_INT != VOIDmode)
311 for (mode = MIN_MODE_VECTOR_INT; mode <= MAX_MODE_VECTOR_INT;
312 mode = (enum machine_mode)(mode + 1))
313 init_expmed_one_mode (&all, mode, speed);
314 }
315
316 if (alg_hash_used_p ())
317 {
318 struct alg_hash_entry *p = alg_hash_entry_ptr (0);
319 memset (p, 0, sizeof (*p) * NUM_ALG_HASH_ENTRIES);
320 }
321 else
322 set_alg_hash_used_p (true);
323 default_rtl_profile ();
324 }
325
326 /* Return an rtx representing minus the value of X.
327 MODE is the intended mode of the result,
328 useful if X is a CONST_INT. */
329
330 rtx
331 negate_rtx (enum machine_mode mode, rtx x)
332 {
333 rtx result = simplify_unary_operation (NEG, mode, x, mode);
334
335 if (result == 0)
336 result = expand_unop (mode, neg_optab, x, NULL_RTX, 0);
337
338 return result;
339 }
340
341 /* Report on the availability of insv/extv/extzv and the desired mode
342 of each of their operands. Returns MAX_MACHINE_MODE if HAVE_foo
343 is false; else the mode of the specified operand. If OPNO is -1,
344 all the caller cares about is whether the insn is available. */
345 enum machine_mode
346 mode_for_extraction (enum extraction_pattern pattern, int opno)
347 {
348 const struct insn_data_d *data;
349
350 switch (pattern)
351 {
352 case EP_insv:
353 if (HAVE_insv)
354 {
355 data = &insn_data[CODE_FOR_insv];
356 break;
357 }
358 return MAX_MACHINE_MODE;
359
360 case EP_extv:
361 if (HAVE_extv)
362 {
363 data = &insn_data[CODE_FOR_extv];
364 break;
365 }
366 return MAX_MACHINE_MODE;
367
368 case EP_extzv:
369 if (HAVE_extzv)
370 {
371 data = &insn_data[CODE_FOR_extzv];
372 break;
373 }
374 return MAX_MACHINE_MODE;
375
376 default:
377 gcc_unreachable ();
378 }
379
380 if (opno == -1)
381 return VOIDmode;
382
383 /* Everyone who uses this function used to follow it with
384 if (result == VOIDmode) result = word_mode; */
385 if (data->operand[opno].mode == VOIDmode)
386 return word_mode;
387 return data->operand[opno].mode;
388 }
389
390 /* Return true if a bitfield of size BITSIZE at bit number BITNUM within
391 a structure of mode STRUCT_MODE represents a lowpart subreg. The subreg
392 offset is then BITNUM / BITS_PER_UNIT. */
393
394 static bool
395 lowpart_bit_field_p (unsigned HOST_WIDE_INT bitnum,
396 unsigned HOST_WIDE_INT bitsize,
397 enum machine_mode struct_mode)
398 {
399 if (BYTES_BIG_ENDIAN)
400 return (bitnum % BITS_PER_UNIT == 0
401 && (bitnum + bitsize == GET_MODE_BITSIZE (struct_mode)
402 || (bitnum + bitsize) % BITS_PER_WORD == 0));
403 else
404 return bitnum % BITS_PER_WORD == 0;
405 }
406 \f
407 /* A subroutine of store_bit_field, with the same arguments. Return true
408 if the operation could be implemented.
409
410 If FALLBACK_P is true, fall back to store_fixed_bit_field if we have
411 no other way of implementing the operation. If FALLBACK_P is false,
412 return false instead. */
413
414 static bool
415 store_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
416 unsigned HOST_WIDE_INT bitnum,
417 unsigned HOST_WIDE_INT bitregion_start,
418 unsigned HOST_WIDE_INT bitregion_end,
419 enum machine_mode fieldmode,
420 rtx value, bool fallback_p)
421 {
422 rtx op0 = str_rtx;
423 rtx orig_value;
424
425 while (GET_CODE (op0) == SUBREG)
426 {
427 /* The following line once was done only if WORDS_BIG_ENDIAN,
428 but I think that is a mistake. WORDS_BIG_ENDIAN is
429 meaningful at a much higher level; when structures are copied
430 between memory and regs, the higher-numbered regs
431 always get higher addresses. */
432 int inner_mode_size = GET_MODE_SIZE (GET_MODE (SUBREG_REG (op0)));
433 int outer_mode_size = GET_MODE_SIZE (GET_MODE (op0));
434 int byte_offset = 0;
435
436 /* Paradoxical subregs need special handling on big endian machines. */
437 if (SUBREG_BYTE (op0) == 0 && inner_mode_size < outer_mode_size)
438 {
439 int difference = inner_mode_size - outer_mode_size;
440
441 if (WORDS_BIG_ENDIAN)
442 byte_offset += (difference / UNITS_PER_WORD) * UNITS_PER_WORD;
443 if (BYTES_BIG_ENDIAN)
444 byte_offset += difference % UNITS_PER_WORD;
445 }
446 else
447 byte_offset = SUBREG_BYTE (op0);
448
449 bitnum += byte_offset * BITS_PER_UNIT;
450 op0 = SUBREG_REG (op0);
451 }
452
453 /* No action is needed if the target is a register and if the field
454 lies completely outside that register. This can occur if the source
455 code contains an out-of-bounds access to a small array. */
456 if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
457 return true;
458
459 /* Use vec_set patterns for inserting parts of vectors whenever
460 available. */
461 if (VECTOR_MODE_P (GET_MODE (op0))
462 && !MEM_P (op0)
463 && optab_handler (vec_set_optab, GET_MODE (op0)) != CODE_FOR_nothing
464 && fieldmode == GET_MODE_INNER (GET_MODE (op0))
465 && bitsize == GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
466 && !(bitnum % GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
467 {
468 struct expand_operand ops[3];
469 enum machine_mode outermode = GET_MODE (op0);
470 enum machine_mode innermode = GET_MODE_INNER (outermode);
471 enum insn_code icode = optab_handler (vec_set_optab, outermode);
472 int pos = bitnum / GET_MODE_BITSIZE (innermode);
473
474 create_fixed_operand (&ops[0], op0);
475 create_input_operand (&ops[1], value, innermode);
476 create_integer_operand (&ops[2], pos);
477 if (maybe_expand_insn (icode, 3, ops))
478 return true;
479 }
480
481 /* If the target is a register, overwriting the entire object, or storing
482 a full-word or multi-word field can be done with just a SUBREG. */
483 if (!MEM_P (op0)
484 && bitsize == GET_MODE_BITSIZE (fieldmode)
485 && ((bitsize == GET_MODE_BITSIZE (GET_MODE (op0)) && bitnum == 0)
486 || (bitsize % BITS_PER_WORD == 0 && bitnum % BITS_PER_WORD == 0)))
487 {
488 /* Use the subreg machinery either to narrow OP0 to the required
489 words or to cope with mode punning between equal-sized modes. */
490 rtx sub = simplify_gen_subreg (fieldmode, op0, GET_MODE (op0),
491 bitnum / BITS_PER_UNIT);
492 if (sub)
493 {
494 emit_move_insn (sub, value);
495 return true;
496 }
497 }
498
499 /* If the target is memory, storing any naturally aligned field can be
500 done with a simple store. For targets that support fast unaligned
501 memory, any naturally sized, unit aligned field can be done directly. */
502 if (MEM_P (op0)
503 && bitnum % BITS_PER_UNIT == 0
504 && bitsize == GET_MODE_BITSIZE (fieldmode)
505 && (!SLOW_UNALIGNED_ACCESS (fieldmode, MEM_ALIGN (op0))
506 || (bitnum % bitsize == 0
507 && MEM_ALIGN (op0) % bitsize == 0)))
508 {
509 op0 = adjust_bitfield_address (op0, fieldmode, bitnum / BITS_PER_UNIT);
510 emit_move_insn (op0, value);
511 return true;
512 }
513
514 /* Make sure we are playing with integral modes. Pun with subregs
515 if we aren't. This must come after the entire register case above,
516 since that case is valid for any mode. The following cases are only
517 valid for integral modes. */
518 {
519 enum machine_mode imode = int_mode_for_mode (GET_MODE (op0));
520 if (imode != GET_MODE (op0))
521 {
522 if (MEM_P (op0))
523 op0 = adjust_bitfield_address (op0, imode, 0);
524 else
525 {
526 gcc_assert (imode != BLKmode);
527 op0 = gen_lowpart (imode, op0);
528 }
529 }
530 }
531
532 /* Storing an lsb-aligned field in a register
533 can be done with a movstrict instruction. */
534
535 if (!MEM_P (op0)
536 && lowpart_bit_field_p (bitnum, bitsize, GET_MODE (op0))
537 && bitsize == GET_MODE_BITSIZE (fieldmode)
538 && optab_handler (movstrict_optab, fieldmode) != CODE_FOR_nothing)
539 {
540 struct expand_operand ops[2];
541 enum insn_code icode = optab_handler (movstrict_optab, fieldmode);
542 rtx arg0 = op0;
543 unsigned HOST_WIDE_INT subreg_off;
544
545 if (GET_CODE (arg0) == SUBREG)
546 {
547 /* Else we've got some float mode source being extracted into
548 a different float mode destination -- this combination of
549 subregs results in Severe Tire Damage. */
550 gcc_assert (GET_MODE (SUBREG_REG (arg0)) == fieldmode
551 || GET_MODE_CLASS (fieldmode) == MODE_INT
552 || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT);
553 arg0 = SUBREG_REG (arg0);
554 }
555
556 subreg_off = bitnum / BITS_PER_UNIT;
557 if (validate_subreg (fieldmode, GET_MODE (arg0), arg0, subreg_off))
558 {
559 arg0 = gen_rtx_SUBREG (fieldmode, arg0, subreg_off);
560
561 create_fixed_operand (&ops[0], arg0);
562 /* Shrink the source operand to FIELDMODE. */
563 create_convert_operand_to (&ops[1], value, fieldmode, false);
564 if (maybe_expand_insn (icode, 2, ops))
565 return true;
566 }
567 }
568
569 /* Handle fields bigger than a word. */
570
571 if (bitsize > BITS_PER_WORD)
572 {
573 /* Here we transfer the words of the field
574 in the order least significant first.
575 This is because the most significant word is the one which may
576 be less than full.
577 However, only do that if the value is not BLKmode. */
578
579 unsigned int backwards = WORDS_BIG_ENDIAN && fieldmode != BLKmode;
580 unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
581 unsigned int i;
582 rtx last;
583
584 /* This is the mode we must force value to, so that there will be enough
585 subwords to extract. Note that fieldmode will often (always?) be
586 VOIDmode, because that is what store_field uses to indicate that this
587 is a bit field, but passing VOIDmode to operand_subword_force
588 is not allowed. */
589 fieldmode = GET_MODE (value);
590 if (fieldmode == VOIDmode)
591 fieldmode = smallest_mode_for_size (nwords * BITS_PER_WORD, MODE_INT);
592
593 last = get_last_insn ();
594 for (i = 0; i < nwords; i++)
595 {
596 /* If I is 0, use the low-order word in both field and target;
597 if I is 1, use the next to lowest word; and so on. */
598 unsigned int wordnum = (backwards
599 ? GET_MODE_SIZE (fieldmode) / UNITS_PER_WORD
600 - i - 1
601 : i);
602 unsigned int bit_offset = (backwards
603 ? MAX ((int) bitsize - ((int) i + 1)
604 * BITS_PER_WORD,
605 0)
606 : (int) i * BITS_PER_WORD);
607 rtx value_word = operand_subword_force (value, wordnum, fieldmode);
608 unsigned HOST_WIDE_INT new_bitsize =
609 MIN (BITS_PER_WORD, bitsize - i * BITS_PER_WORD);
610
611 /* If the remaining chunk doesn't have full wordsize we have
612 to make sure that for big endian machines the higher order
613 bits are used. */
614 if (new_bitsize < BITS_PER_WORD && BYTES_BIG_ENDIAN && !backwards)
615 value_word = simplify_expand_binop (word_mode, lshr_optab,
616 value_word,
617 GEN_INT (BITS_PER_WORD
618 - new_bitsize),
619 NULL_RTX, true,
620 OPTAB_LIB_WIDEN);
621
622 if (!store_bit_field_1 (op0, new_bitsize,
623 bitnum + bit_offset,
624 bitregion_start, bitregion_end,
625 word_mode,
626 value_word, fallback_p))
627 {
628 delete_insns_since (last);
629 return false;
630 }
631 }
632 return true;
633 }
634
635 /* If VALUE has a floating-point or complex mode, access it as an
636 integer of the corresponding size. This can occur on a machine
637 with 64 bit registers that uses SFmode for float. It can also
638 occur for unaligned float or complex fields. */
639 orig_value = value;
640 if (GET_MODE (value) != VOIDmode
641 && GET_MODE_CLASS (GET_MODE (value)) != MODE_INT
642 && GET_MODE_CLASS (GET_MODE (value)) != MODE_PARTIAL_INT)
643 {
644 value = gen_reg_rtx (int_mode_for_mode (GET_MODE (value)));
645 emit_move_insn (gen_lowpart (GET_MODE (orig_value), value), orig_value);
646 }
647
648 /* If OP0 is a multi-word register, narrow it to the affected word.
649 If the region spans two words, defer to store_split_bit_field. */
650 if (!MEM_P (op0) && GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
651 {
652 op0 = simplify_gen_subreg (word_mode, op0, GET_MODE (op0),
653 bitnum / BITS_PER_WORD * UNITS_PER_WORD);
654 gcc_assert (op0);
655 bitnum %= BITS_PER_WORD;
656 if (bitnum + bitsize > BITS_PER_WORD)
657 {
658 if (!fallback_p)
659 return false;
660
661 store_split_bit_field (op0, bitsize, bitnum, bitregion_start,
662 bitregion_end, value);
663 return true;
664 }
665 }
666
667 /* From here on we can assume that the field to be stored in fits
668 within a word. If the destination is a register, it too fits
669 in a word. */
670
671 enum machine_mode op_mode = mode_for_extraction (EP_insv, 3);
672 if (HAVE_insv
673 && GET_MODE (value) != BLKmode
674 && bitsize > 0
675 && GET_MODE_BITSIZE (op_mode) >= bitsize
676 /* Do not use insv for volatile bitfields when
677 -fstrict-volatile-bitfields is in effect. */
678 && !(MEM_P (op0) && MEM_VOLATILE_P (op0)
679 && flag_strict_volatile_bitfields > 0)
680 /* Do not use insv if the bit region is restricted and
681 op_mode integer at offset doesn't fit into the
682 restricted region. */
683 && !(MEM_P (op0) && bitregion_end
684 && bitnum - (bitnum % BITS_PER_UNIT) + GET_MODE_BITSIZE (op_mode)
685 > bitregion_end + 1))
686 {
687 struct expand_operand ops[4];
688 unsigned HOST_WIDE_INT bitpos = bitnum;
689 rtx value1;
690 rtx xop0 = op0;
691 rtx last = get_last_insn ();
692 bool copy_back = false;
693
694 unsigned int unit = GET_MODE_BITSIZE (op_mode);
695 if (MEM_P (xop0))
696 {
697 /* Get a reference to the first byte of the field. */
698 xop0 = adjust_bitfield_address (xop0, byte_mode,
699 bitpos / BITS_PER_UNIT);
700 bitpos %= BITS_PER_UNIT;
701 }
702 else
703 {
704 /* Convert from counting within OP0 to counting in OP_MODE. */
705 if (BYTES_BIG_ENDIAN)
706 bitpos += unit - GET_MODE_BITSIZE (GET_MODE (op0));
707 }
708
709 /* If xop0 is a register, we need it in OP_MODE
710 to make it acceptable to the format of insv. */
711 if (GET_CODE (xop0) == SUBREG)
712 /* We can't just change the mode, because this might clobber op0,
713 and we will need the original value of op0 if insv fails. */
714 xop0 = gen_rtx_SUBREG (op_mode, SUBREG_REG (xop0), SUBREG_BYTE (xop0));
715 if (REG_P (xop0) && GET_MODE (xop0) != op_mode)
716 xop0 = gen_lowpart_SUBREG (op_mode, xop0);
717
718 /* If the destination is a paradoxical subreg such that we need a
719 truncate to the inner mode, perform the insertion on a temporary and
720 truncate the result to the original destination. Note that we can't
721 just truncate the paradoxical subreg as (truncate:N (subreg:W (reg:N
722 X) 0)) is (reg:N X). */
723 if (GET_CODE (xop0) == SUBREG
724 && REG_P (SUBREG_REG (xop0))
725 && (!TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (SUBREG_REG (xop0)),
726 op_mode)))
727 {
728 rtx tem = gen_reg_rtx (op_mode);
729 emit_move_insn (tem, xop0);
730 xop0 = tem;
731 copy_back = true;
732 }
733
734 /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
735 "backwards" from the size of the unit we are inserting into.
736 Otherwise, we count bits from the most significant on a
737 BYTES/BITS_BIG_ENDIAN machine. */
738
739 if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
740 bitpos = unit - bitsize - bitpos;
741
742 /* Convert VALUE to op_mode (which insv insn wants) in VALUE1. */
743 value1 = value;
744 if (GET_MODE (value) != op_mode)
745 {
746 if (GET_MODE_BITSIZE (GET_MODE (value)) >= bitsize)
747 {
748 /* Optimization: Don't bother really extending VALUE
749 if it has all the bits we will actually use. However,
750 if we must narrow it, be sure we do it correctly. */
751
752 if (GET_MODE_SIZE (GET_MODE (value)) < GET_MODE_SIZE (op_mode))
753 {
754 rtx tmp;
755
756 tmp = simplify_subreg (op_mode, value1, GET_MODE (value), 0);
757 if (! tmp)
758 tmp = simplify_gen_subreg (op_mode,
759 force_reg (GET_MODE (value),
760 value1),
761 GET_MODE (value), 0);
762 value1 = tmp;
763 }
764 else
765 value1 = gen_lowpart (op_mode, value1);
766 }
767 else if (CONST_INT_P (value))
768 value1 = gen_int_mode (INTVAL (value), op_mode);
769 else
770 /* Parse phase is supposed to make VALUE's data type
771 match that of the component reference, which is a type
772 at least as wide as the field; so VALUE should have
773 a mode that corresponds to that type. */
774 gcc_assert (CONSTANT_P (value));
775 }
776
777 create_fixed_operand (&ops[0], xop0);
778 create_integer_operand (&ops[1], bitsize);
779 create_integer_operand (&ops[2], bitpos);
780 create_input_operand (&ops[3], value1, op_mode);
781 if (maybe_expand_insn (CODE_FOR_insv, 4, ops))
782 {
783 if (copy_back)
784 convert_move (op0, xop0, true);
785 return true;
786 }
787 delete_insns_since (last);
788 }
789
790 /* If OP0 is a memory, try copying it to a register and seeing if a
791 cheap register alternative is available. */
792 if (HAVE_insv && MEM_P (op0))
793 {
794 enum machine_mode bestmode;
795 unsigned HOST_WIDE_INT maxbits = MAX_FIXED_MODE_SIZE;
796
797 if (bitregion_end)
798 maxbits = bitregion_end - bitregion_start + 1;
799
800 /* Get the mode to use for inserting into this field. If OP0 is
801 BLKmode, get the smallest mode consistent with the alignment. If
802 OP0 is a non-BLKmode object that is no wider than OP_MODE, use its
803 mode. Otherwise, use the smallest mode containing the field. */
804
805 if (GET_MODE (op0) == BLKmode
806 || GET_MODE_BITSIZE (GET_MODE (op0)) > maxbits
807 || (op_mode != MAX_MACHINE_MODE
808 && GET_MODE_SIZE (GET_MODE (op0)) > GET_MODE_SIZE (op_mode)))
809 bestmode = get_best_mode (bitsize, bitnum,
810 bitregion_start, bitregion_end,
811 MEM_ALIGN (op0),
812 (op_mode == MAX_MACHINE_MODE
813 ? VOIDmode : op_mode),
814 MEM_VOLATILE_P (op0));
815 else
816 bestmode = GET_MODE (op0);
817
818 if (bestmode != VOIDmode
819 && GET_MODE_SIZE (bestmode) >= GET_MODE_SIZE (fieldmode)
820 && !(SLOW_UNALIGNED_ACCESS (bestmode, MEM_ALIGN (op0))
821 && GET_MODE_BITSIZE (bestmode) > MEM_ALIGN (op0)))
822 {
823 rtx last, tempreg, xop0;
824 unsigned int unit;
825 unsigned HOST_WIDE_INT offset, bitpos;
826
827 last = get_last_insn ();
828
829 /* Adjust address to point to the containing unit of
830 that mode. Compute the offset as a multiple of this unit,
831 counting in bytes. */
832 unit = GET_MODE_BITSIZE (bestmode);
833 offset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
834 bitpos = bitnum % unit;
835 xop0 = adjust_bitfield_address (op0, bestmode, offset);
836
837 /* Fetch that unit, store the bitfield in it, then store
838 the unit. */
839 tempreg = copy_to_reg (xop0);
840 if (store_bit_field_1 (tempreg, bitsize, bitpos,
841 bitregion_start, bitregion_end,
842 fieldmode, orig_value, false))
843 {
844 emit_move_insn (xop0, tempreg);
845 return true;
846 }
847 delete_insns_since (last);
848 }
849 }
850
851 if (!fallback_p)
852 return false;
853
854 store_fixed_bit_field (op0, bitsize, bitnum, bitregion_start,
855 bitregion_end, value);
856 return true;
857 }
858
859 /* Generate code to store value from rtx VALUE
860 into a bit-field within structure STR_RTX
861 containing BITSIZE bits starting at bit BITNUM.
862
863 BITREGION_START is bitpos of the first bitfield in this region.
864 BITREGION_END is the bitpos of the ending bitfield in this region.
865 These two fields are 0, if the C++ memory model does not apply,
866 or we are not interested in keeping track of bitfield regions.
867
868 FIELDMODE is the machine-mode of the FIELD_DECL node for this field. */
869
870 void
871 store_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
872 unsigned HOST_WIDE_INT bitnum,
873 unsigned HOST_WIDE_INT bitregion_start,
874 unsigned HOST_WIDE_INT bitregion_end,
875 enum machine_mode fieldmode,
876 rtx value)
877 {
878 /* Under the C++0x memory model, we must not touch bits outside the
879 bit region. Adjust the address to start at the beginning of the
880 bit region. */
881 if (MEM_P (str_rtx) && bitregion_start > 0)
882 {
883 enum machine_mode bestmode;
884 enum machine_mode op_mode;
885 unsigned HOST_WIDE_INT offset;
886
887 op_mode = mode_for_extraction (EP_insv, 3);
888 if (op_mode == MAX_MACHINE_MODE)
889 op_mode = VOIDmode;
890
891 gcc_assert ((bitregion_start % BITS_PER_UNIT) == 0);
892
893 offset = bitregion_start / BITS_PER_UNIT;
894 bitnum -= bitregion_start;
895 bitregion_end -= bitregion_start;
896 bitregion_start = 0;
897 bestmode = get_best_mode (bitsize, bitnum,
898 bitregion_start, bitregion_end,
899 MEM_ALIGN (str_rtx),
900 op_mode,
901 MEM_VOLATILE_P (str_rtx));
902 str_rtx = adjust_address (str_rtx, bestmode, offset);
903 }
904
905 if (!store_bit_field_1 (str_rtx, bitsize, bitnum,
906 bitregion_start, bitregion_end,
907 fieldmode, value, true))
908 gcc_unreachable ();
909 }
910 \f
911 /* Use shifts and boolean operations to store VALUE into a bit field of
912 width BITSIZE in OP0, starting at bit BITNUM. */
913
914 static void
915 store_fixed_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
916 unsigned HOST_WIDE_INT bitnum,
917 unsigned HOST_WIDE_INT bitregion_start,
918 unsigned HOST_WIDE_INT bitregion_end,
919 rtx value)
920 {
921 enum machine_mode mode;
922 rtx temp;
923 int all_zero = 0;
924 int all_one = 0;
925
926 /* There is a case not handled here:
927 a structure with a known alignment of just a halfword
928 and a field split across two aligned halfwords within the structure.
929 Or likewise a structure with a known alignment of just a byte
930 and a field split across two bytes.
931 Such cases are not supposed to be able to occur. */
932
933 if (MEM_P (op0))
934 {
935 unsigned HOST_WIDE_INT maxbits = MAX_FIXED_MODE_SIZE;
936
937 if (bitregion_end)
938 maxbits = bitregion_end - bitregion_start + 1;
939
940 /* Get the proper mode to use for this field. We want a mode that
941 includes the entire field. If such a mode would be larger than
942 a word, we won't be doing the extraction the normal way.
943 We don't want a mode bigger than the destination. */
944
945 mode = GET_MODE (op0);
946 if (GET_MODE_BITSIZE (mode) == 0
947 || GET_MODE_BITSIZE (mode) > GET_MODE_BITSIZE (word_mode))
948 mode = word_mode;
949
950 if (MEM_VOLATILE_P (op0)
951 && GET_MODE_BITSIZE (GET_MODE (op0)) > 0
952 && GET_MODE_BITSIZE (GET_MODE (op0)) <= maxbits
953 && flag_strict_volatile_bitfields > 0)
954 mode = GET_MODE (op0);
955 else
956 mode = get_best_mode (bitsize, bitnum, bitregion_start, bitregion_end,
957 MEM_ALIGN (op0), mode, MEM_VOLATILE_P (op0));
958
959 if (mode == VOIDmode)
960 {
961 /* The only way this should occur is if the field spans word
962 boundaries. */
963 store_split_bit_field (op0, bitsize, bitnum, bitregion_start,
964 bitregion_end, value);
965 return;
966 }
967
968 HOST_WIDE_INT bit_offset = bitnum - bitnum % GET_MODE_BITSIZE (mode);
969 op0 = adjust_bitfield_address (op0, mode, bit_offset / BITS_PER_UNIT);
970 bitnum -= bit_offset;
971 }
972
973 mode = GET_MODE (op0);
974 gcc_assert (SCALAR_INT_MODE_P (mode));
975
976 /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode)
977 for invalid input, such as f5 from gcc.dg/pr48335-2.c. */
978
979 if (BYTES_BIG_ENDIAN)
980 /* BITNUM is the distance between our msb
981 and that of the containing datum.
982 Convert it to the distance from the lsb. */
983 bitnum = GET_MODE_BITSIZE (mode) - bitsize - bitnum;
984
985 /* Now BITNUM is always the distance between our lsb
986 and that of OP0. */
987
988 /* Shift VALUE left by BITNUM bits. If VALUE is not constant,
989 we must first convert its mode to MODE. */
990
991 if (CONST_INT_P (value))
992 {
993 HOST_WIDE_INT v = INTVAL (value);
994
995 if (bitsize < HOST_BITS_PER_WIDE_INT)
996 v &= ((HOST_WIDE_INT) 1 << bitsize) - 1;
997
998 if (v == 0)
999 all_zero = 1;
1000 else if ((bitsize < HOST_BITS_PER_WIDE_INT
1001 && v == ((HOST_WIDE_INT) 1 << bitsize) - 1)
1002 || (bitsize == HOST_BITS_PER_WIDE_INT && v == -1))
1003 all_one = 1;
1004
1005 value = lshift_value (mode, value, bitnum, bitsize);
1006 }
1007 else
1008 {
1009 int must_and = (GET_MODE_BITSIZE (GET_MODE (value)) != bitsize
1010 && bitnum + bitsize != GET_MODE_BITSIZE (mode));
1011
1012 if (GET_MODE (value) != mode)
1013 value = convert_to_mode (mode, value, 1);
1014
1015 if (must_and)
1016 value = expand_binop (mode, and_optab, value,
1017 mask_rtx (mode, 0, bitsize, 0),
1018 NULL_RTX, 1, OPTAB_LIB_WIDEN);
1019 if (bitnum > 0)
1020 value = expand_shift (LSHIFT_EXPR, mode, value,
1021 bitnum, NULL_RTX, 1);
1022 }
1023
1024 /* Now clear the chosen bits in OP0,
1025 except that if VALUE is -1 we need not bother. */
1026 /* We keep the intermediates in registers to allow CSE to combine
1027 consecutive bitfield assignments. */
1028
1029 temp = force_reg (mode, op0);
1030
1031 if (! all_one)
1032 {
1033 temp = expand_binop (mode, and_optab, temp,
1034 mask_rtx (mode, bitnum, bitsize, 1),
1035 NULL_RTX, 1, OPTAB_LIB_WIDEN);
1036 temp = force_reg (mode, temp);
1037 }
1038
1039 /* Now logical-or VALUE into OP0, unless it is zero. */
1040
1041 if (! all_zero)
1042 {
1043 temp = expand_binop (mode, ior_optab, temp, value,
1044 NULL_RTX, 1, OPTAB_LIB_WIDEN);
1045 temp = force_reg (mode, temp);
1046 }
1047
1048 if (op0 != temp)
1049 {
1050 op0 = copy_rtx (op0);
1051 emit_move_insn (op0, temp);
1052 }
1053 }
1054 \f
1055 /* Store a bit field that is split across multiple accessible memory objects.
1056
1057 OP0 is the REG, SUBREG or MEM rtx for the first of the objects.
1058 BITSIZE is the field width; BITPOS the position of its first bit
1059 (within the word).
1060 VALUE is the value to store.
1061
1062 This does not yet handle fields wider than BITS_PER_WORD. */
1063
1064 static void
1065 store_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1066 unsigned HOST_WIDE_INT bitpos,
1067 unsigned HOST_WIDE_INT bitregion_start,
1068 unsigned HOST_WIDE_INT bitregion_end,
1069 rtx value)
1070 {
1071 unsigned int unit;
1072 unsigned int bitsdone = 0;
1073
1074 /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1075 much at a time. */
1076 if (REG_P (op0) || GET_CODE (op0) == SUBREG)
1077 unit = BITS_PER_WORD;
1078 else
1079 unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1080
1081 /* If VALUE is a constant other than a CONST_INT, get it into a register in
1082 WORD_MODE. If we can do this using gen_lowpart_common, do so. Note
1083 that VALUE might be a floating-point constant. */
1084 if (CONSTANT_P (value) && !CONST_INT_P (value))
1085 {
1086 rtx word = gen_lowpart_common (word_mode, value);
1087
1088 if (word && (value != word))
1089 value = word;
1090 else
1091 value = gen_lowpart_common (word_mode,
1092 force_reg (GET_MODE (value) != VOIDmode
1093 ? GET_MODE (value)
1094 : word_mode, value));
1095 }
1096
1097 while (bitsdone < bitsize)
1098 {
1099 unsigned HOST_WIDE_INT thissize;
1100 rtx part, word;
1101 unsigned HOST_WIDE_INT thispos;
1102 unsigned HOST_WIDE_INT offset;
1103
1104 offset = (bitpos + bitsdone) / unit;
1105 thispos = (bitpos + bitsdone) % unit;
1106
1107 /* When region of bytes we can touch is restricted, decrease
1108 UNIT close to the end of the region as needed. */
1109 if (bitregion_end
1110 && unit > BITS_PER_UNIT
1111 && bitpos + bitsdone - thispos + unit > bitregion_end + 1)
1112 {
1113 unit = unit / 2;
1114 continue;
1115 }
1116
1117 /* THISSIZE must not overrun a word boundary. Otherwise,
1118 store_fixed_bit_field will call us again, and we will mutually
1119 recurse forever. */
1120 thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1121 thissize = MIN (thissize, unit - thispos);
1122
1123 if (BYTES_BIG_ENDIAN)
1124 {
1125 /* Fetch successively less significant portions. */
1126 if (CONST_INT_P (value))
1127 part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1128 >> (bitsize - bitsdone - thissize))
1129 & (((HOST_WIDE_INT) 1 << thissize) - 1));
1130 else
1131 {
1132 int total_bits = GET_MODE_BITSIZE (GET_MODE (value));
1133 /* The args are chosen so that the last part includes the
1134 lsb. Give extract_bit_field the value it needs (with
1135 endianness compensation) to fetch the piece we want. */
1136 part = extract_fixed_bit_field (word_mode, value, thissize,
1137 total_bits - bitsize + bitsdone,
1138 NULL_RTX, 1, false);
1139 }
1140 }
1141 else
1142 {
1143 /* Fetch successively more significant portions. */
1144 if (CONST_INT_P (value))
1145 part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1146 >> bitsdone)
1147 & (((HOST_WIDE_INT) 1 << thissize) - 1));
1148 else
1149 part = extract_fixed_bit_field (word_mode, value, thissize,
1150 bitsdone, NULL_RTX, 1, false);
1151 }
1152
1153 /* If OP0 is a register, then handle OFFSET here.
1154
1155 When handling multiword bitfields, extract_bit_field may pass
1156 down a word_mode SUBREG of a larger REG for a bitfield that actually
1157 crosses a word boundary. Thus, for a SUBREG, we must find
1158 the current word starting from the base register. */
1159 if (GET_CODE (op0) == SUBREG)
1160 {
1161 int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset;
1162 enum machine_mode sub_mode = GET_MODE (SUBREG_REG (op0));
1163 if (sub_mode != BLKmode && GET_MODE_SIZE (sub_mode) < UNITS_PER_WORD)
1164 word = word_offset ? const0_rtx : op0;
1165 else
1166 word = operand_subword_force (SUBREG_REG (op0), word_offset,
1167 GET_MODE (SUBREG_REG (op0)));
1168 offset = 0;
1169 }
1170 else if (REG_P (op0))
1171 {
1172 enum machine_mode op0_mode = GET_MODE (op0);
1173 if (op0_mode != BLKmode && GET_MODE_SIZE (op0_mode) < UNITS_PER_WORD)
1174 word = offset ? const0_rtx : op0;
1175 else
1176 word = operand_subword_force (op0, offset, GET_MODE (op0));
1177 offset = 0;
1178 }
1179 else
1180 word = op0;
1181
1182 /* OFFSET is in UNITs, and UNIT is in bits. If WORD is const0_rtx,
1183 it is just an out-of-bounds access. Ignore it. */
1184 if (word != const0_rtx)
1185 store_fixed_bit_field (word, thissize, offset * unit + thispos,
1186 bitregion_start, bitregion_end, part);
1187 bitsdone += thissize;
1188 }
1189 }
1190 \f
1191 /* A subroutine of extract_bit_field_1 that converts return value X
1192 to either MODE or TMODE. MODE, TMODE and UNSIGNEDP are arguments
1193 to extract_bit_field. */
1194
1195 static rtx
1196 convert_extracted_bit_field (rtx x, enum machine_mode mode,
1197 enum machine_mode tmode, bool unsignedp)
1198 {
1199 if (GET_MODE (x) == tmode || GET_MODE (x) == mode)
1200 return x;
1201
1202 /* If the x mode is not a scalar integral, first convert to the
1203 integer mode of that size and then access it as a floating-point
1204 value via a SUBREG. */
1205 if (!SCALAR_INT_MODE_P (tmode))
1206 {
1207 enum machine_mode smode;
1208
1209 smode = mode_for_size (GET_MODE_BITSIZE (tmode), MODE_INT, 0);
1210 x = convert_to_mode (smode, x, unsignedp);
1211 x = force_reg (smode, x);
1212 return gen_lowpart (tmode, x);
1213 }
1214
1215 return convert_to_mode (tmode, x, unsignedp);
1216 }
1217
1218 /* A subroutine of extract_bit_field, with the same arguments.
1219 If FALLBACK_P is true, fall back to extract_fixed_bit_field
1220 if we can find no other means of implementing the operation.
1221 if FALLBACK_P is false, return NULL instead. */
1222
1223 static rtx
1224 extract_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1225 unsigned HOST_WIDE_INT bitnum,
1226 int unsignedp, bool packedp, rtx target,
1227 enum machine_mode mode, enum machine_mode tmode,
1228 bool fallback_p)
1229 {
1230 rtx op0 = str_rtx;
1231 enum machine_mode int_mode;
1232 enum machine_mode ext_mode;
1233 enum machine_mode mode1;
1234
1235 if (tmode == VOIDmode)
1236 tmode = mode;
1237
1238 while (GET_CODE (op0) == SUBREG)
1239 {
1240 bitnum += SUBREG_BYTE (op0) * BITS_PER_UNIT;
1241 op0 = SUBREG_REG (op0);
1242 }
1243
1244 /* If we have an out-of-bounds access to a register, just return an
1245 uninitialized register of the required mode. This can occur if the
1246 source code contains an out-of-bounds access to a small array. */
1247 if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
1248 return gen_reg_rtx (tmode);
1249
1250 if (REG_P (op0)
1251 && mode == GET_MODE (op0)
1252 && bitnum == 0
1253 && bitsize == GET_MODE_BITSIZE (GET_MODE (op0)))
1254 {
1255 /* We're trying to extract a full register from itself. */
1256 return op0;
1257 }
1258
1259 /* See if we can get a better vector mode before extracting. */
1260 if (VECTOR_MODE_P (GET_MODE (op0))
1261 && !MEM_P (op0)
1262 && GET_MODE_INNER (GET_MODE (op0)) != tmode)
1263 {
1264 enum machine_mode new_mode;
1265
1266 if (GET_MODE_CLASS (tmode) == MODE_FLOAT)
1267 new_mode = MIN_MODE_VECTOR_FLOAT;
1268 else if (GET_MODE_CLASS (tmode) == MODE_FRACT)
1269 new_mode = MIN_MODE_VECTOR_FRACT;
1270 else if (GET_MODE_CLASS (tmode) == MODE_UFRACT)
1271 new_mode = MIN_MODE_VECTOR_UFRACT;
1272 else if (GET_MODE_CLASS (tmode) == MODE_ACCUM)
1273 new_mode = MIN_MODE_VECTOR_ACCUM;
1274 else if (GET_MODE_CLASS (tmode) == MODE_UACCUM)
1275 new_mode = MIN_MODE_VECTOR_UACCUM;
1276 else
1277 new_mode = MIN_MODE_VECTOR_INT;
1278
1279 for (; new_mode != VOIDmode ; new_mode = GET_MODE_WIDER_MODE (new_mode))
1280 if (GET_MODE_SIZE (new_mode) == GET_MODE_SIZE (GET_MODE (op0))
1281 && targetm.vector_mode_supported_p (new_mode))
1282 break;
1283 if (new_mode != VOIDmode)
1284 op0 = gen_lowpart (new_mode, op0);
1285 }
1286
1287 /* Use vec_extract patterns for extracting parts of vectors whenever
1288 available. */
1289 if (VECTOR_MODE_P (GET_MODE (op0))
1290 && !MEM_P (op0)
1291 && optab_handler (vec_extract_optab, GET_MODE (op0)) != CODE_FOR_nothing
1292 && ((bitnum + bitsize - 1) / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
1293 == bitnum / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
1294 {
1295 struct expand_operand ops[3];
1296 enum machine_mode outermode = GET_MODE (op0);
1297 enum machine_mode innermode = GET_MODE_INNER (outermode);
1298 enum insn_code icode = optab_handler (vec_extract_optab, outermode);
1299 unsigned HOST_WIDE_INT pos = bitnum / GET_MODE_BITSIZE (innermode);
1300
1301 create_output_operand (&ops[0], target, innermode);
1302 create_input_operand (&ops[1], op0, outermode);
1303 create_integer_operand (&ops[2], pos);
1304 if (maybe_expand_insn (icode, 3, ops))
1305 {
1306 target = ops[0].value;
1307 if (GET_MODE (target) != mode)
1308 return gen_lowpart (tmode, target);
1309 return target;
1310 }
1311 }
1312
1313 /* Make sure we are playing with integral modes. Pun with subregs
1314 if we aren't. */
1315 {
1316 enum machine_mode imode = int_mode_for_mode (GET_MODE (op0));
1317 if (imode != GET_MODE (op0))
1318 {
1319 if (MEM_P (op0))
1320 op0 = adjust_bitfield_address (op0, imode, 0);
1321 else if (imode != BLKmode)
1322 {
1323 op0 = gen_lowpart (imode, op0);
1324
1325 /* If we got a SUBREG, force it into a register since we
1326 aren't going to be able to do another SUBREG on it. */
1327 if (GET_CODE (op0) == SUBREG)
1328 op0 = force_reg (imode, op0);
1329 }
1330 else if (REG_P (op0))
1331 {
1332 rtx reg, subreg;
1333 imode = smallest_mode_for_size (GET_MODE_BITSIZE (GET_MODE (op0)),
1334 MODE_INT);
1335 reg = gen_reg_rtx (imode);
1336 subreg = gen_lowpart_SUBREG (GET_MODE (op0), reg);
1337 emit_move_insn (subreg, op0);
1338 op0 = reg;
1339 bitnum += SUBREG_BYTE (subreg) * BITS_PER_UNIT;
1340 }
1341 else
1342 {
1343 rtx mem = assign_stack_temp (GET_MODE (op0),
1344 GET_MODE_SIZE (GET_MODE (op0)));
1345 emit_move_insn (mem, op0);
1346 op0 = adjust_bitfield_address (mem, BLKmode, 0);
1347 }
1348 }
1349 }
1350
1351 /* ??? We currently assume TARGET is at least as big as BITSIZE.
1352 If that's wrong, the solution is to test for it and set TARGET to 0
1353 if needed. */
1354
1355 /* If the bitfield is volatile, we need to make sure the access
1356 remains on a type-aligned boundary. */
1357 if (GET_CODE (op0) == MEM
1358 && MEM_VOLATILE_P (op0)
1359 && GET_MODE_BITSIZE (GET_MODE (op0)) > 0
1360 && flag_strict_volatile_bitfields > 0)
1361 goto no_subreg_mode_swap;
1362
1363 /* Only scalar integer modes can be converted via subregs. There is an
1364 additional problem for FP modes here in that they can have a precision
1365 which is different from the size. mode_for_size uses precision, but
1366 we want a mode based on the size, so we must avoid calling it for FP
1367 modes. */
1368 mode1 = mode;
1369 if (SCALAR_INT_MODE_P (tmode))
1370 {
1371 enum machine_mode try_mode = mode_for_size (bitsize,
1372 GET_MODE_CLASS (tmode), 0);
1373 if (try_mode != BLKmode)
1374 mode1 = try_mode;
1375 }
1376 gcc_assert (mode1 != BLKmode);
1377
1378 /* Extraction of a full MODE1 value can be done with a subreg as long
1379 as the least significant bit of the value is the least significant
1380 bit of either OP0 or a word of OP0. */
1381 if (!MEM_P (op0)
1382 && lowpart_bit_field_p (bitnum, bitsize, GET_MODE (op0))
1383 && bitsize == GET_MODE_BITSIZE (mode1)
1384 && TRULY_NOOP_TRUNCATION_MODES_P (mode1, GET_MODE (op0)))
1385 {
1386 rtx sub = simplify_gen_subreg (mode1, op0, GET_MODE (op0),
1387 bitnum / BITS_PER_UNIT);
1388 if (sub)
1389 return convert_extracted_bit_field (sub, mode, tmode, unsignedp);
1390 }
1391
1392 /* Extraction of a full MODE1 value can be done with a load as long as
1393 the field is on a byte boundary and is sufficiently aligned. */
1394 if (MEM_P (op0)
1395 && bitnum % BITS_PER_UNIT == 0
1396 && bitsize == GET_MODE_BITSIZE (mode1)
1397 && (!SLOW_UNALIGNED_ACCESS (mode1, MEM_ALIGN (op0))
1398 || (bitnum % bitsize == 0
1399 && MEM_ALIGN (op0) % bitsize == 0)))
1400 {
1401 op0 = adjust_bitfield_address (op0, mode1, bitnum / BITS_PER_UNIT);
1402 return convert_extracted_bit_field (op0, mode, tmode, unsignedp);
1403 }
1404
1405 no_subreg_mode_swap:
1406
1407 /* Handle fields bigger than a word. */
1408
1409 if (bitsize > BITS_PER_WORD)
1410 {
1411 /* Here we transfer the words of the field
1412 in the order least significant first.
1413 This is because the most significant word is the one which may
1414 be less than full. */
1415
1416 unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
1417 unsigned int i;
1418 rtx last;
1419
1420 if (target == 0 || !REG_P (target) || !valid_multiword_target_p (target))
1421 target = gen_reg_rtx (mode);
1422
1423 /* Indicate for flow that the entire target reg is being set. */
1424 emit_clobber (target);
1425
1426 last = get_last_insn ();
1427 for (i = 0; i < nwords; i++)
1428 {
1429 /* If I is 0, use the low-order word in both field and target;
1430 if I is 1, use the next to lowest word; and so on. */
1431 /* Word number in TARGET to use. */
1432 unsigned int wordnum
1433 = (WORDS_BIG_ENDIAN
1434 ? GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD - i - 1
1435 : i);
1436 /* Offset from start of field in OP0. */
1437 unsigned int bit_offset = (WORDS_BIG_ENDIAN
1438 ? MAX (0, ((int) bitsize - ((int) i + 1)
1439 * (int) BITS_PER_WORD))
1440 : (int) i * BITS_PER_WORD);
1441 rtx target_part = operand_subword (target, wordnum, 1, VOIDmode);
1442 rtx result_part
1443 = extract_bit_field_1 (op0, MIN (BITS_PER_WORD,
1444 bitsize - i * BITS_PER_WORD),
1445 bitnum + bit_offset, 1, false, target_part,
1446 mode, word_mode, fallback_p);
1447
1448 gcc_assert (target_part);
1449 if (!result_part)
1450 {
1451 delete_insns_since (last);
1452 return NULL;
1453 }
1454
1455 if (result_part != target_part)
1456 emit_move_insn (target_part, result_part);
1457 }
1458
1459 if (unsignedp)
1460 {
1461 /* Unless we've filled TARGET, the upper regs in a multi-reg value
1462 need to be zero'd out. */
1463 if (GET_MODE_SIZE (GET_MODE (target)) > nwords * UNITS_PER_WORD)
1464 {
1465 unsigned int i, total_words;
1466
1467 total_words = GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD;
1468 for (i = nwords; i < total_words; i++)
1469 emit_move_insn
1470 (operand_subword (target,
1471 WORDS_BIG_ENDIAN ? total_words - i - 1 : i,
1472 1, VOIDmode),
1473 const0_rtx);
1474 }
1475 return target;
1476 }
1477
1478 /* Signed bit field: sign-extend with two arithmetic shifts. */
1479 target = expand_shift (LSHIFT_EXPR, mode, target,
1480 GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1481 return expand_shift (RSHIFT_EXPR, mode, target,
1482 GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1483 }
1484
1485 /* If OP0 is a multi-word register, narrow it to the affected word.
1486 If the region spans two words, defer to extract_split_bit_field. */
1487 if (!MEM_P (op0) && GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
1488 {
1489 op0 = simplify_gen_subreg (word_mode, op0, GET_MODE (op0),
1490 bitnum / BITS_PER_WORD * UNITS_PER_WORD);
1491 bitnum %= BITS_PER_WORD;
1492 if (bitnum + bitsize > BITS_PER_WORD)
1493 {
1494 if (!fallback_p)
1495 return NULL_RTX;
1496 target = extract_split_bit_field (op0, bitsize, bitnum, unsignedp);
1497 return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1498 }
1499 }
1500
1501 /* From here on we know the desired field is smaller than a word.
1502 If OP0 is a register, it too fits within a word. */
1503
1504 ext_mode = mode_for_extraction (unsignedp ? EP_extzv : EP_extv, 0);
1505 if (ext_mode != MAX_MACHINE_MODE
1506 && bitsize > 0
1507 && GET_MODE_BITSIZE (ext_mode) >= bitsize
1508 /* Do not use extv/extzv for volatile bitfields when
1509 -fstrict-volatile-bitfields is in effect. */
1510 && !(MEM_P (op0) && MEM_VOLATILE_P (op0)
1511 && flag_strict_volatile_bitfields > 0)
1512 /* If op0 is a register, we need it in EXT_MODE to make it
1513 acceptable to the format of ext(z)v. */
1514 && !(GET_CODE (op0) == SUBREG && GET_MODE (op0) != ext_mode))
1515 {
1516 struct expand_operand ops[4];
1517 unsigned HOST_WIDE_INT bitpos = bitnum;
1518 rtx xop0 = op0;
1519 rtx xtarget = target;
1520 rtx xspec_target = target;
1521 rtx xspec_target_subreg = 0;
1522 unsigned unit = GET_MODE_BITSIZE (ext_mode);
1523
1524 /* If op0 is a register, we need it in EXT_MODE to make it
1525 acceptable to the format of ext(z)v. */
1526 if (REG_P (xop0) && GET_MODE (xop0) != ext_mode)
1527 xop0 = gen_lowpart_SUBREG (ext_mode, xop0);
1528
1529 if (MEM_P (xop0))
1530 {
1531 /* Get a reference to the first byte of the field. */
1532 xop0 = adjust_bitfield_address (xop0, byte_mode,
1533 bitpos / BITS_PER_UNIT);
1534 bitpos %= BITS_PER_UNIT;
1535 }
1536 else
1537 {
1538 /* Convert from counting within OP0 to counting in EXT_MODE. */
1539 if (BYTES_BIG_ENDIAN)
1540 bitpos += unit - GET_MODE_BITSIZE (GET_MODE (op0));
1541 }
1542
1543 /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
1544 "backwards" from the size of the unit we are extracting from.
1545 Otherwise, we count bits from the most significant on a
1546 BYTES/BITS_BIG_ENDIAN machine. */
1547
1548 if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1549 bitpos = unit - bitsize - bitpos;
1550
1551 if (xtarget == 0)
1552 xtarget = xspec_target = gen_reg_rtx (tmode);
1553
1554 if (GET_MODE (xtarget) != ext_mode)
1555 {
1556 /* Don't use LHS paradoxical subreg if explicit truncation is needed
1557 between the mode of the extraction (word_mode) and the target
1558 mode. Instead, create a temporary and use convert_move to set
1559 the target. */
1560 if (REG_P (xtarget)
1561 && TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (xtarget), ext_mode))
1562 {
1563 xtarget = gen_lowpart (ext_mode, xtarget);
1564 if (GET_MODE_PRECISION (ext_mode)
1565 > GET_MODE_PRECISION (GET_MODE (xspec_target)))
1566 xspec_target_subreg = xtarget;
1567 }
1568 else
1569 xtarget = gen_reg_rtx (ext_mode);
1570 }
1571
1572 create_output_operand (&ops[0], xtarget, ext_mode);
1573 create_fixed_operand (&ops[1], xop0);
1574 create_integer_operand (&ops[2], bitsize);
1575 create_integer_operand (&ops[3], bitpos);
1576 if (maybe_expand_insn (unsignedp ? CODE_FOR_extzv : CODE_FOR_extv,
1577 4, ops))
1578 {
1579 xtarget = ops[0].value;
1580 if (xtarget == xspec_target)
1581 return xtarget;
1582 if (xtarget == xspec_target_subreg)
1583 return xspec_target;
1584 return convert_extracted_bit_field (xtarget, mode, tmode, unsignedp);
1585 }
1586 }
1587
1588 /* If OP0 is a memory, try copying it to a register and seeing if a
1589 cheap register alternative is available. */
1590 if (ext_mode != MAX_MACHINE_MODE && MEM_P (op0))
1591 {
1592 enum machine_mode bestmode;
1593
1594 /* Get the mode to use for inserting into this field. If
1595 OP0 is BLKmode, get the smallest mode consistent with the
1596 alignment. If OP0 is a non-BLKmode object that is no
1597 wider than EXT_MODE, use its mode. Otherwise, use the
1598 smallest mode containing the field. */
1599
1600 if (GET_MODE (op0) == BLKmode
1601 || (ext_mode != MAX_MACHINE_MODE
1602 && GET_MODE_SIZE (GET_MODE (op0)) > GET_MODE_SIZE (ext_mode)))
1603 bestmode = get_best_mode (bitsize, bitnum, 0, 0, MEM_ALIGN (op0),
1604 (ext_mode == MAX_MACHINE_MODE
1605 ? VOIDmode : ext_mode),
1606 MEM_VOLATILE_P (op0));
1607 else
1608 bestmode = GET_MODE (op0);
1609
1610 if (bestmode != VOIDmode
1611 && !(SLOW_UNALIGNED_ACCESS (bestmode, MEM_ALIGN (op0))
1612 && GET_MODE_BITSIZE (bestmode) > MEM_ALIGN (op0)))
1613 {
1614 unsigned HOST_WIDE_INT offset, bitpos;
1615
1616 /* Compute the offset as a multiple of this unit,
1617 counting in bytes. */
1618 unsigned int unit = GET_MODE_BITSIZE (bestmode);
1619 offset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
1620 bitpos = bitnum % unit;
1621
1622 /* Make sure the register is big enough for the whole field. */
1623 if (bitpos + bitsize <= unit)
1624 {
1625 rtx last, result, xop0;
1626
1627 last = get_last_insn ();
1628
1629 /* Fetch it to a register in that size. */
1630 xop0 = adjust_bitfield_address (op0, bestmode, offset);
1631 xop0 = force_reg (bestmode, xop0);
1632 result = extract_bit_field_1 (xop0, bitsize, bitpos,
1633 unsignedp, packedp, target,
1634 mode, tmode, false);
1635 if (result)
1636 return result;
1637
1638 delete_insns_since (last);
1639 }
1640 }
1641 }
1642
1643 if (!fallback_p)
1644 return NULL;
1645
1646 /* Find a correspondingly-sized integer field, so we can apply
1647 shifts and masks to it. */
1648 int_mode = int_mode_for_mode (tmode);
1649 if (int_mode == BLKmode)
1650 int_mode = int_mode_for_mode (mode);
1651 /* Should probably push op0 out to memory and then do a load. */
1652 gcc_assert (int_mode != BLKmode);
1653
1654 target = extract_fixed_bit_field (int_mode, op0, bitsize, bitnum,
1655 target, unsignedp, packedp);
1656 return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1657 }
1658
1659 /* Generate code to extract a byte-field from STR_RTX
1660 containing BITSIZE bits, starting at BITNUM,
1661 and put it in TARGET if possible (if TARGET is nonzero).
1662 Regardless of TARGET, we return the rtx for where the value is placed.
1663
1664 STR_RTX is the structure containing the byte (a REG or MEM).
1665 UNSIGNEDP is nonzero if this is an unsigned bit field.
1666 PACKEDP is nonzero if the field has the packed attribute.
1667 MODE is the natural mode of the field value once extracted.
1668 TMODE is the mode the caller would like the value to have;
1669 but the value may be returned with type MODE instead.
1670
1671 If a TARGET is specified and we can store in it at no extra cost,
1672 we do so, and return TARGET.
1673 Otherwise, we return a REG of mode TMODE or MODE, with TMODE preferred
1674 if they are equally easy. */
1675
1676 rtx
1677 extract_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1678 unsigned HOST_WIDE_INT bitnum, int unsignedp, bool packedp,
1679 rtx target, enum machine_mode mode, enum machine_mode tmode)
1680 {
1681 return extract_bit_field_1 (str_rtx, bitsize, bitnum, unsignedp, packedp,
1682 target, mode, tmode, true);
1683 }
1684 \f
1685 /* Use shifts and boolean operations to extract a field of BITSIZE bits
1686 from bit BITNUM of OP0.
1687
1688 UNSIGNEDP is nonzero for an unsigned bit field (don't sign-extend value).
1689 PACKEDP is true if the field has the packed attribute.
1690
1691 If TARGET is nonzero, attempts to store the value there
1692 and return TARGET, but this is not guaranteed.
1693 If TARGET is not used, create a pseudo-reg of mode TMODE for the value. */
1694
1695 static rtx
1696 extract_fixed_bit_field (enum machine_mode tmode, rtx op0,
1697 unsigned HOST_WIDE_INT bitsize,
1698 unsigned HOST_WIDE_INT bitnum, rtx target,
1699 int unsignedp, bool packedp)
1700 {
1701 enum machine_mode mode;
1702
1703 if (MEM_P (op0))
1704 {
1705 /* Get the proper mode to use for this field. We want a mode that
1706 includes the entire field. If such a mode would be larger than
1707 a word, we won't be doing the extraction the normal way. */
1708
1709 if (MEM_VOLATILE_P (op0)
1710 && flag_strict_volatile_bitfields > 0)
1711 {
1712 if (GET_MODE_BITSIZE (GET_MODE (op0)) > 0)
1713 mode = GET_MODE (op0);
1714 else if (target && GET_MODE_BITSIZE (GET_MODE (target)) > 0)
1715 mode = GET_MODE (target);
1716 else
1717 mode = tmode;
1718 }
1719 else
1720 mode = get_best_mode (bitsize, bitnum, 0, 0,
1721 MEM_ALIGN (op0), word_mode, MEM_VOLATILE_P (op0));
1722
1723 if (mode == VOIDmode)
1724 /* The only way this should occur is if the field spans word
1725 boundaries. */
1726 return extract_split_bit_field (op0, bitsize, bitnum, unsignedp);
1727
1728 unsigned int total_bits = GET_MODE_BITSIZE (mode);
1729 HOST_WIDE_INT bit_offset = bitnum - bitnum % total_bits;
1730
1731 /* If we're accessing a volatile MEM, we can't apply BIT_OFFSET
1732 if it results in a multi-word access where we otherwise wouldn't
1733 have one. So, check for that case here. */
1734 if (MEM_P (op0)
1735 && MEM_VOLATILE_P (op0)
1736 && flag_strict_volatile_bitfields > 0
1737 && bitnum % BITS_PER_UNIT + bitsize <= total_bits
1738 && bitnum % GET_MODE_BITSIZE (mode) + bitsize > total_bits)
1739 {
1740 if (STRICT_ALIGNMENT)
1741 {
1742 static bool informed_about_misalignment = false;
1743
1744 if (packedp)
1745 {
1746 if (bitsize == total_bits)
1747 warning_at (input_location, OPT_fstrict_volatile_bitfields,
1748 "multiple accesses to volatile structure"
1749 " member because of packed attribute");
1750 else
1751 warning_at (input_location, OPT_fstrict_volatile_bitfields,
1752 "multiple accesses to volatile structure"
1753 " bitfield because of packed attribute");
1754
1755 return extract_split_bit_field (op0, bitsize, bitnum,
1756 unsignedp);
1757 }
1758
1759 if (bitsize == total_bits)
1760 warning_at (input_location, OPT_fstrict_volatile_bitfields,
1761 "mis-aligned access used for structure member");
1762 else
1763 warning_at (input_location, OPT_fstrict_volatile_bitfields,
1764 "mis-aligned access used for structure bitfield");
1765
1766 if (! informed_about_misalignment)
1767 {
1768 informed_about_misalignment = true;
1769 inform (input_location,
1770 "when a volatile object spans multiple type-sized"
1771 " locations, the compiler must choose between using"
1772 " a single mis-aligned access to preserve the"
1773 " volatility, or using multiple aligned accesses"
1774 " to avoid runtime faults; this code may fail at"
1775 " runtime if the hardware does not allow this"
1776 " access");
1777 }
1778 }
1779 bit_offset = bitnum - bitnum % BITS_PER_UNIT;
1780 }
1781 op0 = adjust_bitfield_address (op0, mode, bit_offset / BITS_PER_UNIT);
1782 bitnum -= bit_offset;
1783 }
1784
1785 mode = GET_MODE (op0);
1786 gcc_assert (SCALAR_INT_MODE_P (mode));
1787
1788 /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode)
1789 for invalid input, such as extract equivalent of f5 from
1790 gcc.dg/pr48335-2.c. */
1791
1792 if (BYTES_BIG_ENDIAN)
1793 /* BITNUM is the distance between our msb and that of OP0.
1794 Convert it to the distance from the lsb. */
1795 bitnum = GET_MODE_BITSIZE (mode) - bitsize - bitnum;
1796
1797 /* Now BITNUM is always the distance between the field's lsb and that of OP0.
1798 We have reduced the big-endian case to the little-endian case. */
1799
1800 if (unsignedp)
1801 {
1802 if (bitnum)
1803 {
1804 /* If the field does not already start at the lsb,
1805 shift it so it does. */
1806 /* Maybe propagate the target for the shift. */
1807 rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
1808 if (tmode != mode)
1809 subtarget = 0;
1810 op0 = expand_shift (RSHIFT_EXPR, mode, op0, bitnum, subtarget, 1);
1811 }
1812 /* Convert the value to the desired mode. */
1813 if (mode != tmode)
1814 op0 = convert_to_mode (tmode, op0, 1);
1815
1816 /* Unless the msb of the field used to be the msb when we shifted,
1817 mask out the upper bits. */
1818
1819 if (GET_MODE_BITSIZE (mode) != bitnum + bitsize)
1820 return expand_binop (GET_MODE (op0), and_optab, op0,
1821 mask_rtx (GET_MODE (op0), 0, bitsize, 0),
1822 target, 1, OPTAB_LIB_WIDEN);
1823 return op0;
1824 }
1825
1826 /* To extract a signed bit-field, first shift its msb to the msb of the word,
1827 then arithmetic-shift its lsb to the lsb of the word. */
1828 op0 = force_reg (mode, op0);
1829
1830 /* Find the narrowest integer mode that contains the field. */
1831
1832 for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT); mode != VOIDmode;
1833 mode = GET_MODE_WIDER_MODE (mode))
1834 if (GET_MODE_BITSIZE (mode) >= bitsize + bitnum)
1835 {
1836 op0 = convert_to_mode (mode, op0, 0);
1837 break;
1838 }
1839
1840 if (mode != tmode)
1841 target = 0;
1842
1843 if (GET_MODE_BITSIZE (mode) != (bitsize + bitnum))
1844 {
1845 int amount = GET_MODE_BITSIZE (mode) - (bitsize + bitnum);
1846 /* Maybe propagate the target for the shift. */
1847 rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
1848 op0 = expand_shift (LSHIFT_EXPR, mode, op0, amount, subtarget, 1);
1849 }
1850
1851 return expand_shift (RSHIFT_EXPR, mode, op0,
1852 GET_MODE_BITSIZE (mode) - bitsize, target, 0);
1853 }
1854 \f
1855 /* Return a constant integer (CONST_INT or CONST_DOUBLE) mask value
1856 of mode MODE with BITSIZE ones followed by BITPOS zeros, or the
1857 complement of that if COMPLEMENT. The mask is truncated if
1858 necessary to the width of mode MODE. The mask is zero-extended if
1859 BITSIZE+BITPOS is too small for MODE. */
1860
1861 static rtx
1862 mask_rtx (enum machine_mode mode, int bitpos, int bitsize, int complement)
1863 {
1864 double_int mask;
1865
1866 mask = double_int::mask (bitsize);
1867 mask = mask.llshift (bitpos, HOST_BITS_PER_DOUBLE_INT);
1868
1869 if (complement)
1870 mask = ~mask;
1871
1872 return immed_double_int_const (mask, mode);
1873 }
1874
1875 /* Return a constant integer (CONST_INT or CONST_DOUBLE) rtx with the value
1876 VALUE truncated to BITSIZE bits and then shifted left BITPOS bits. */
1877
1878 static rtx
1879 lshift_value (enum machine_mode mode, rtx value, int bitpos, int bitsize)
1880 {
1881 double_int val;
1882
1883 val = double_int::from_uhwi (INTVAL (value)).zext (bitsize);
1884 val = val.llshift (bitpos, HOST_BITS_PER_DOUBLE_INT);
1885
1886 return immed_double_int_const (val, mode);
1887 }
1888 \f
1889 /* Extract a bit field that is split across two words
1890 and return an RTX for the result.
1891
1892 OP0 is the REG, SUBREG or MEM rtx for the first of the two words.
1893 BITSIZE is the field width; BITPOS, position of its first bit, in the word.
1894 UNSIGNEDP is 1 if should zero-extend the contents; else sign-extend. */
1895
1896 static rtx
1897 extract_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1898 unsigned HOST_WIDE_INT bitpos, int unsignedp)
1899 {
1900 unsigned int unit;
1901 unsigned int bitsdone = 0;
1902 rtx result = NULL_RTX;
1903 int first = 1;
1904
1905 /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1906 much at a time. */
1907 if (REG_P (op0) || GET_CODE (op0) == SUBREG)
1908 unit = BITS_PER_WORD;
1909 else
1910 unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1911
1912 while (bitsdone < bitsize)
1913 {
1914 unsigned HOST_WIDE_INT thissize;
1915 rtx part, word;
1916 unsigned HOST_WIDE_INT thispos;
1917 unsigned HOST_WIDE_INT offset;
1918
1919 offset = (bitpos + bitsdone) / unit;
1920 thispos = (bitpos + bitsdone) % unit;
1921
1922 /* THISSIZE must not overrun a word boundary. Otherwise,
1923 extract_fixed_bit_field will call us again, and we will mutually
1924 recurse forever. */
1925 thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1926 thissize = MIN (thissize, unit - thispos);
1927
1928 /* If OP0 is a register, then handle OFFSET here.
1929
1930 When handling multiword bitfields, extract_bit_field may pass
1931 down a word_mode SUBREG of a larger REG for a bitfield that actually
1932 crosses a word boundary. Thus, for a SUBREG, we must find
1933 the current word starting from the base register. */
1934 if (GET_CODE (op0) == SUBREG)
1935 {
1936 int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset;
1937 word = operand_subword_force (SUBREG_REG (op0), word_offset,
1938 GET_MODE (SUBREG_REG (op0)));
1939 offset = 0;
1940 }
1941 else if (REG_P (op0))
1942 {
1943 word = operand_subword_force (op0, offset, GET_MODE (op0));
1944 offset = 0;
1945 }
1946 else
1947 word = op0;
1948
1949 /* Extract the parts in bit-counting order,
1950 whose meaning is determined by BYTES_PER_UNIT.
1951 OFFSET is in UNITs, and UNIT is in bits. */
1952 part = extract_fixed_bit_field (word_mode, word, thissize,
1953 offset * unit + thispos, 0, 1, false);
1954 bitsdone += thissize;
1955
1956 /* Shift this part into place for the result. */
1957 if (BYTES_BIG_ENDIAN)
1958 {
1959 if (bitsize != bitsdone)
1960 part = expand_shift (LSHIFT_EXPR, word_mode, part,
1961 bitsize - bitsdone, 0, 1);
1962 }
1963 else
1964 {
1965 if (bitsdone != thissize)
1966 part = expand_shift (LSHIFT_EXPR, word_mode, part,
1967 bitsdone - thissize, 0, 1);
1968 }
1969
1970 if (first)
1971 result = part;
1972 else
1973 /* Combine the parts with bitwise or. This works
1974 because we extracted each part as an unsigned bit field. */
1975 result = expand_binop (word_mode, ior_optab, part, result, NULL_RTX, 1,
1976 OPTAB_LIB_WIDEN);
1977
1978 first = 0;
1979 }
1980
1981 /* Unsigned bit field: we are done. */
1982 if (unsignedp)
1983 return result;
1984 /* Signed bit field: sign-extend with two arithmetic shifts. */
1985 result = expand_shift (LSHIFT_EXPR, word_mode, result,
1986 BITS_PER_WORD - bitsize, NULL_RTX, 0);
1987 return expand_shift (RSHIFT_EXPR, word_mode, result,
1988 BITS_PER_WORD - bitsize, NULL_RTX, 0);
1989 }
1990 \f
1991 /* Try to read the low bits of SRC as an rvalue of mode MODE, preserving
1992 the bit pattern. SRC_MODE is the mode of SRC; if this is smaller than
1993 MODE, fill the upper bits with zeros. Fail if the layout of either
1994 mode is unknown (as for CC modes) or if the extraction would involve
1995 unprofitable mode punning. Return the value on success, otherwise
1996 return null.
1997
1998 This is different from gen_lowpart* in these respects:
1999
2000 - the returned value must always be considered an rvalue
2001
2002 - when MODE is wider than SRC_MODE, the extraction involves
2003 a zero extension
2004
2005 - when MODE is smaller than SRC_MODE, the extraction involves
2006 a truncation (and is thus subject to TRULY_NOOP_TRUNCATION).
2007
2008 In other words, this routine performs a computation, whereas the
2009 gen_lowpart* routines are conceptually lvalue or rvalue subreg
2010 operations. */
2011
2012 rtx
2013 extract_low_bits (enum machine_mode mode, enum machine_mode src_mode, rtx src)
2014 {
2015 enum machine_mode int_mode, src_int_mode;
2016
2017 if (mode == src_mode)
2018 return src;
2019
2020 if (CONSTANT_P (src))
2021 {
2022 /* simplify_gen_subreg can't be used here, as if simplify_subreg
2023 fails, it will happily create (subreg (symbol_ref)) or similar
2024 invalid SUBREGs. */
2025 unsigned int byte = subreg_lowpart_offset (mode, src_mode);
2026 rtx ret = simplify_subreg (mode, src, src_mode, byte);
2027 if (ret)
2028 return ret;
2029
2030 if (GET_MODE (src) == VOIDmode
2031 || !validate_subreg (mode, src_mode, src, byte))
2032 return NULL_RTX;
2033
2034 src = force_reg (GET_MODE (src), src);
2035 return gen_rtx_SUBREG (mode, src, byte);
2036 }
2037
2038 if (GET_MODE_CLASS (mode) == MODE_CC || GET_MODE_CLASS (src_mode) == MODE_CC)
2039 return NULL_RTX;
2040
2041 if (GET_MODE_BITSIZE (mode) == GET_MODE_BITSIZE (src_mode)
2042 && MODES_TIEABLE_P (mode, src_mode))
2043 {
2044 rtx x = gen_lowpart_common (mode, src);
2045 if (x)
2046 return x;
2047 }
2048
2049 src_int_mode = int_mode_for_mode (src_mode);
2050 int_mode = int_mode_for_mode (mode);
2051 if (src_int_mode == BLKmode || int_mode == BLKmode)
2052 return NULL_RTX;
2053
2054 if (!MODES_TIEABLE_P (src_int_mode, src_mode))
2055 return NULL_RTX;
2056 if (!MODES_TIEABLE_P (int_mode, mode))
2057 return NULL_RTX;
2058
2059 src = gen_lowpart (src_int_mode, src);
2060 src = convert_modes (int_mode, src_int_mode, src, true);
2061 src = gen_lowpart (mode, src);
2062 return src;
2063 }
2064 \f
2065 /* Add INC into TARGET. */
2066
2067 void
2068 expand_inc (rtx target, rtx inc)
2069 {
2070 rtx value = expand_binop (GET_MODE (target), add_optab,
2071 target, inc,
2072 target, 0, OPTAB_LIB_WIDEN);
2073 if (value != target)
2074 emit_move_insn (target, value);
2075 }
2076
2077 /* Subtract DEC from TARGET. */
2078
2079 void
2080 expand_dec (rtx target, rtx dec)
2081 {
2082 rtx value = expand_binop (GET_MODE (target), sub_optab,
2083 target, dec,
2084 target, 0, OPTAB_LIB_WIDEN);
2085 if (value != target)
2086 emit_move_insn (target, value);
2087 }
2088 \f
2089 /* Output a shift instruction for expression code CODE,
2090 with SHIFTED being the rtx for the value to shift,
2091 and AMOUNT the rtx for the amount to shift by.
2092 Store the result in the rtx TARGET, if that is convenient.
2093 If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2094 Return the rtx for where the value is. */
2095
2096 static rtx
2097 expand_shift_1 (enum tree_code code, enum machine_mode mode, rtx shifted,
2098 rtx amount, rtx target, int unsignedp)
2099 {
2100 rtx op1, temp = 0;
2101 int left = (code == LSHIFT_EXPR || code == LROTATE_EXPR);
2102 int rotate = (code == LROTATE_EXPR || code == RROTATE_EXPR);
2103 optab lshift_optab = ashl_optab;
2104 optab rshift_arith_optab = ashr_optab;
2105 optab rshift_uns_optab = lshr_optab;
2106 optab lrotate_optab = rotl_optab;
2107 optab rrotate_optab = rotr_optab;
2108 enum machine_mode op1_mode;
2109 int attempt;
2110 bool speed = optimize_insn_for_speed_p ();
2111
2112 op1 = amount;
2113 op1_mode = GET_MODE (op1);
2114
2115 /* Determine whether the shift/rotate amount is a vector, or scalar. If the
2116 shift amount is a vector, use the vector/vector shift patterns. */
2117 if (VECTOR_MODE_P (mode) && VECTOR_MODE_P (op1_mode))
2118 {
2119 lshift_optab = vashl_optab;
2120 rshift_arith_optab = vashr_optab;
2121 rshift_uns_optab = vlshr_optab;
2122 lrotate_optab = vrotl_optab;
2123 rrotate_optab = vrotr_optab;
2124 }
2125
2126 /* Previously detected shift-counts computed by NEGATE_EXPR
2127 and shifted in the other direction; but that does not work
2128 on all machines. */
2129
2130 if (SHIFT_COUNT_TRUNCATED)
2131 {
2132 if (CONST_INT_P (op1)
2133 && ((unsigned HOST_WIDE_INT) INTVAL (op1) >=
2134 (unsigned HOST_WIDE_INT) GET_MODE_BITSIZE (mode)))
2135 op1 = GEN_INT ((unsigned HOST_WIDE_INT) INTVAL (op1)
2136 % GET_MODE_BITSIZE (mode));
2137 else if (GET_CODE (op1) == SUBREG
2138 && subreg_lowpart_p (op1)
2139 && INTEGRAL_MODE_P (GET_MODE (SUBREG_REG (op1))))
2140 op1 = SUBREG_REG (op1);
2141 }
2142
2143 if (op1 == const0_rtx)
2144 return shifted;
2145
2146 /* Check whether its cheaper to implement a left shift by a constant
2147 bit count by a sequence of additions. */
2148 if (code == LSHIFT_EXPR
2149 && CONST_INT_P (op1)
2150 && INTVAL (op1) > 0
2151 && INTVAL (op1) < GET_MODE_PRECISION (mode)
2152 && INTVAL (op1) < MAX_BITS_PER_WORD
2153 && (shift_cost (speed, mode, INTVAL (op1))
2154 > INTVAL (op1) * add_cost (speed, mode))
2155 && shift_cost (speed, mode, INTVAL (op1)) != MAX_COST)
2156 {
2157 int i;
2158 for (i = 0; i < INTVAL (op1); i++)
2159 {
2160 temp = force_reg (mode, shifted);
2161 shifted = expand_binop (mode, add_optab, temp, temp, NULL_RTX,
2162 unsignedp, OPTAB_LIB_WIDEN);
2163 }
2164 return shifted;
2165 }
2166
2167 for (attempt = 0; temp == 0 && attempt < 3; attempt++)
2168 {
2169 enum optab_methods methods;
2170
2171 if (attempt == 0)
2172 methods = OPTAB_DIRECT;
2173 else if (attempt == 1)
2174 methods = OPTAB_WIDEN;
2175 else
2176 methods = OPTAB_LIB_WIDEN;
2177
2178 if (rotate)
2179 {
2180 /* Widening does not work for rotation. */
2181 if (methods == OPTAB_WIDEN)
2182 continue;
2183 else if (methods == OPTAB_LIB_WIDEN)
2184 {
2185 /* If we have been unable to open-code this by a rotation,
2186 do it as the IOR of two shifts. I.e., to rotate A
2187 by N bits, compute (A << N) | ((unsigned) A >> (C - N))
2188 where C is the bitsize of A.
2189
2190 It is theoretically possible that the target machine might
2191 not be able to perform either shift and hence we would
2192 be making two libcalls rather than just the one for the
2193 shift (similarly if IOR could not be done). We will allow
2194 this extremely unlikely lossage to avoid complicating the
2195 code below. */
2196
2197 rtx subtarget = target == shifted ? 0 : target;
2198 rtx new_amount, other_amount;
2199 rtx temp1;
2200
2201 new_amount = op1;
2202 if (CONST_INT_P (op1))
2203 other_amount = GEN_INT (GET_MODE_BITSIZE (mode)
2204 - INTVAL (op1));
2205 else
2206 other_amount
2207 = simplify_gen_binary (MINUS, GET_MODE (op1),
2208 GEN_INT (GET_MODE_PRECISION (mode)),
2209 op1);
2210
2211 shifted = force_reg (mode, shifted);
2212
2213 temp = expand_shift_1 (left ? LSHIFT_EXPR : RSHIFT_EXPR,
2214 mode, shifted, new_amount, 0, 1);
2215 temp1 = expand_shift_1 (left ? RSHIFT_EXPR : LSHIFT_EXPR,
2216 mode, shifted, other_amount,
2217 subtarget, 1);
2218 return expand_binop (mode, ior_optab, temp, temp1, target,
2219 unsignedp, methods);
2220 }
2221
2222 temp = expand_binop (mode,
2223 left ? lrotate_optab : rrotate_optab,
2224 shifted, op1, target, unsignedp, methods);
2225 }
2226 else if (unsignedp)
2227 temp = expand_binop (mode,
2228 left ? lshift_optab : rshift_uns_optab,
2229 shifted, op1, target, unsignedp, methods);
2230
2231 /* Do arithmetic shifts.
2232 Also, if we are going to widen the operand, we can just as well
2233 use an arithmetic right-shift instead of a logical one. */
2234 if (temp == 0 && ! rotate
2235 && (! unsignedp || (! left && methods == OPTAB_WIDEN)))
2236 {
2237 enum optab_methods methods1 = methods;
2238
2239 /* If trying to widen a log shift to an arithmetic shift,
2240 don't accept an arithmetic shift of the same size. */
2241 if (unsignedp)
2242 methods1 = OPTAB_MUST_WIDEN;
2243
2244 /* Arithmetic shift */
2245
2246 temp = expand_binop (mode,
2247 left ? lshift_optab : rshift_arith_optab,
2248 shifted, op1, target, unsignedp, methods1);
2249 }
2250
2251 /* We used to try extzv here for logical right shifts, but that was
2252 only useful for one machine, the VAX, and caused poor code
2253 generation there for lshrdi3, so the code was deleted and a
2254 define_expand for lshrsi3 was added to vax.md. */
2255 }
2256
2257 gcc_assert (temp);
2258 return temp;
2259 }
2260
2261 /* Output a shift instruction for expression code CODE,
2262 with SHIFTED being the rtx for the value to shift,
2263 and AMOUNT the amount to shift by.
2264 Store the result in the rtx TARGET, if that is convenient.
2265 If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2266 Return the rtx for where the value is. */
2267
2268 rtx
2269 expand_shift (enum tree_code code, enum machine_mode mode, rtx shifted,
2270 int amount, rtx target, int unsignedp)
2271 {
2272 return expand_shift_1 (code, mode,
2273 shifted, GEN_INT (amount), target, unsignedp);
2274 }
2275
2276 /* Output a shift instruction for expression code CODE,
2277 with SHIFTED being the rtx for the value to shift,
2278 and AMOUNT the tree for the amount to shift by.
2279 Store the result in the rtx TARGET, if that is convenient.
2280 If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2281 Return the rtx for where the value is. */
2282
2283 rtx
2284 expand_variable_shift (enum tree_code code, enum machine_mode mode, rtx shifted,
2285 tree amount, rtx target, int unsignedp)
2286 {
2287 return expand_shift_1 (code, mode,
2288 shifted, expand_normal (amount), target, unsignedp);
2289 }
2290
2291 \f
2292 /* Indicates the type of fixup needed after a constant multiplication.
2293 BASIC_VARIANT means no fixup is needed, NEGATE_VARIANT means that
2294 the result should be negated, and ADD_VARIANT means that the
2295 multiplicand should be added to the result. */
2296 enum mult_variant {basic_variant, negate_variant, add_variant};
2297
2298 static void synth_mult (struct algorithm *, unsigned HOST_WIDE_INT,
2299 const struct mult_cost *, enum machine_mode mode);
2300 static bool choose_mult_variant (enum machine_mode, HOST_WIDE_INT,
2301 struct algorithm *, enum mult_variant *, int);
2302 static rtx expand_mult_const (enum machine_mode, rtx, HOST_WIDE_INT, rtx,
2303 const struct algorithm *, enum mult_variant);
2304 static unsigned HOST_WIDE_INT invert_mod2n (unsigned HOST_WIDE_INT, int);
2305 static rtx extract_high_half (enum machine_mode, rtx);
2306 static rtx expmed_mult_highpart (enum machine_mode, rtx, rtx, rtx, int, int);
2307 static rtx expmed_mult_highpart_optab (enum machine_mode, rtx, rtx, rtx,
2308 int, int);
2309 /* Compute and return the best algorithm for multiplying by T.
2310 The algorithm must cost less than cost_limit
2311 If retval.cost >= COST_LIMIT, no algorithm was found and all
2312 other field of the returned struct are undefined.
2313 MODE is the machine mode of the multiplication. */
2314
2315 static void
2316 synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
2317 const struct mult_cost *cost_limit, enum machine_mode mode)
2318 {
2319 int m;
2320 struct algorithm *alg_in, *best_alg;
2321 struct mult_cost best_cost;
2322 struct mult_cost new_limit;
2323 int op_cost, op_latency;
2324 unsigned HOST_WIDE_INT orig_t = t;
2325 unsigned HOST_WIDE_INT q;
2326 int maxm, hash_index;
2327 bool cache_hit = false;
2328 enum alg_code cache_alg = alg_zero;
2329 bool speed = optimize_insn_for_speed_p ();
2330 enum machine_mode imode;
2331 struct alg_hash_entry *entry_ptr;
2332
2333 /* Indicate that no algorithm is yet found. If no algorithm
2334 is found, this value will be returned and indicate failure. */
2335 alg_out->cost.cost = cost_limit->cost + 1;
2336 alg_out->cost.latency = cost_limit->latency + 1;
2337
2338 if (cost_limit->cost < 0
2339 || (cost_limit->cost == 0 && cost_limit->latency <= 0))
2340 return;
2341
2342 /* Be prepared for vector modes. */
2343 imode = GET_MODE_INNER (mode);
2344 if (imode == VOIDmode)
2345 imode = mode;
2346
2347 maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (imode));
2348
2349 /* Restrict the bits of "t" to the multiplication's mode. */
2350 t &= GET_MODE_MASK (imode);
2351
2352 /* t == 1 can be done in zero cost. */
2353 if (t == 1)
2354 {
2355 alg_out->ops = 1;
2356 alg_out->cost.cost = 0;
2357 alg_out->cost.latency = 0;
2358 alg_out->op[0] = alg_m;
2359 return;
2360 }
2361
2362 /* t == 0 sometimes has a cost. If it does and it exceeds our limit,
2363 fail now. */
2364 if (t == 0)
2365 {
2366 if (MULT_COST_LESS (cost_limit, zero_cost (speed)))
2367 return;
2368 else
2369 {
2370 alg_out->ops = 1;
2371 alg_out->cost.cost = zero_cost (speed);
2372 alg_out->cost.latency = zero_cost (speed);
2373 alg_out->op[0] = alg_zero;
2374 return;
2375 }
2376 }
2377
2378 /* We'll be needing a couple extra algorithm structures now. */
2379
2380 alg_in = XALLOCA (struct algorithm);
2381 best_alg = XALLOCA (struct algorithm);
2382 best_cost = *cost_limit;
2383
2384 /* Compute the hash index. */
2385 hash_index = (t ^ (unsigned int) mode ^ (speed * 256)) % NUM_ALG_HASH_ENTRIES;
2386
2387 /* See if we already know what to do for T. */
2388 entry_ptr = alg_hash_entry_ptr (hash_index);
2389 if (entry_ptr->t == t
2390 && entry_ptr->mode == mode
2391 && entry_ptr->mode == mode
2392 && entry_ptr->speed == speed
2393 && entry_ptr->alg != alg_unknown)
2394 {
2395 cache_alg = entry_ptr->alg;
2396
2397 if (cache_alg == alg_impossible)
2398 {
2399 /* The cache tells us that it's impossible to synthesize
2400 multiplication by T within entry_ptr->cost. */
2401 if (!CHEAPER_MULT_COST (&entry_ptr->cost, cost_limit))
2402 /* COST_LIMIT is at least as restrictive as the one
2403 recorded in the hash table, in which case we have no
2404 hope of synthesizing a multiplication. Just
2405 return. */
2406 return;
2407
2408 /* If we get here, COST_LIMIT is less restrictive than the
2409 one recorded in the hash table, so we may be able to
2410 synthesize a multiplication. Proceed as if we didn't
2411 have the cache entry. */
2412 }
2413 else
2414 {
2415 if (CHEAPER_MULT_COST (cost_limit, &entry_ptr->cost))
2416 /* The cached algorithm shows that this multiplication
2417 requires more cost than COST_LIMIT. Just return. This
2418 way, we don't clobber this cache entry with
2419 alg_impossible but retain useful information. */
2420 return;
2421
2422 cache_hit = true;
2423
2424 switch (cache_alg)
2425 {
2426 case alg_shift:
2427 goto do_alg_shift;
2428
2429 case alg_add_t_m2:
2430 case alg_sub_t_m2:
2431 goto do_alg_addsub_t_m2;
2432
2433 case alg_add_factor:
2434 case alg_sub_factor:
2435 goto do_alg_addsub_factor;
2436
2437 case alg_add_t2_m:
2438 goto do_alg_add_t2_m;
2439
2440 case alg_sub_t2_m:
2441 goto do_alg_sub_t2_m;
2442
2443 default:
2444 gcc_unreachable ();
2445 }
2446 }
2447 }
2448
2449 /* If we have a group of zero bits at the low-order part of T, try
2450 multiplying by the remaining bits and then doing a shift. */
2451
2452 if ((t & 1) == 0)
2453 {
2454 do_alg_shift:
2455 m = floor_log2 (t & -t); /* m = number of low zero bits */
2456 if (m < maxm)
2457 {
2458 q = t >> m;
2459 /* The function expand_shift will choose between a shift and
2460 a sequence of additions, so the observed cost is given as
2461 MIN (m * add_cost(speed, mode), shift_cost(speed, mode, m)). */
2462 op_cost = m * add_cost (speed, mode);
2463 if (shift_cost (speed, mode, m) < op_cost)
2464 op_cost = shift_cost (speed, mode, m);
2465 new_limit.cost = best_cost.cost - op_cost;
2466 new_limit.latency = best_cost.latency - op_cost;
2467 synth_mult (alg_in, q, &new_limit, mode);
2468
2469 alg_in->cost.cost += op_cost;
2470 alg_in->cost.latency += op_cost;
2471 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2472 {
2473 struct algorithm *x;
2474 best_cost = alg_in->cost;
2475 x = alg_in, alg_in = best_alg, best_alg = x;
2476 best_alg->log[best_alg->ops] = m;
2477 best_alg->op[best_alg->ops] = alg_shift;
2478 }
2479
2480 /* See if treating ORIG_T as a signed number yields a better
2481 sequence. Try this sequence only for a negative ORIG_T
2482 as it would be useless for a non-negative ORIG_T. */
2483 if ((HOST_WIDE_INT) orig_t < 0)
2484 {
2485 /* Shift ORIG_T as follows because a right shift of a
2486 negative-valued signed type is implementation
2487 defined. */
2488 q = ~(~orig_t >> m);
2489 /* The function expand_shift will choose between a shift
2490 and a sequence of additions, so the observed cost is
2491 given as MIN (m * add_cost(speed, mode),
2492 shift_cost(speed, mode, m)). */
2493 op_cost = m * add_cost (speed, mode);
2494 if (shift_cost (speed, mode, m) < op_cost)
2495 op_cost = shift_cost (speed, mode, m);
2496 new_limit.cost = best_cost.cost - op_cost;
2497 new_limit.latency = best_cost.latency - op_cost;
2498 synth_mult (alg_in, q, &new_limit, mode);
2499
2500 alg_in->cost.cost += op_cost;
2501 alg_in->cost.latency += op_cost;
2502 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2503 {
2504 struct algorithm *x;
2505 best_cost = alg_in->cost;
2506 x = alg_in, alg_in = best_alg, best_alg = x;
2507 best_alg->log[best_alg->ops] = m;
2508 best_alg->op[best_alg->ops] = alg_shift;
2509 }
2510 }
2511 }
2512 if (cache_hit)
2513 goto done;
2514 }
2515
2516 /* If we have an odd number, add or subtract one. */
2517 if ((t & 1) != 0)
2518 {
2519 unsigned HOST_WIDE_INT w;
2520
2521 do_alg_addsub_t_m2:
2522 for (w = 1; (w & t) != 0; w <<= 1)
2523 ;
2524 /* If T was -1, then W will be zero after the loop. This is another
2525 case where T ends with ...111. Handling this with (T + 1) and
2526 subtract 1 produces slightly better code and results in algorithm
2527 selection much faster than treating it like the ...0111 case
2528 below. */
2529 if (w == 0
2530 || (w > 2
2531 /* Reject the case where t is 3.
2532 Thus we prefer addition in that case. */
2533 && t != 3))
2534 {
2535 /* T ends with ...111. Multiply by (T + 1) and subtract 1. */
2536
2537 op_cost = add_cost (speed, mode);
2538 new_limit.cost = best_cost.cost - op_cost;
2539 new_limit.latency = best_cost.latency - op_cost;
2540 synth_mult (alg_in, t + 1, &new_limit, mode);
2541
2542 alg_in->cost.cost += op_cost;
2543 alg_in->cost.latency += op_cost;
2544 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2545 {
2546 struct algorithm *x;
2547 best_cost = alg_in->cost;
2548 x = alg_in, alg_in = best_alg, best_alg = x;
2549 best_alg->log[best_alg->ops] = 0;
2550 best_alg->op[best_alg->ops] = alg_sub_t_m2;
2551 }
2552 }
2553 else
2554 {
2555 /* T ends with ...01 or ...011. Multiply by (T - 1) and add 1. */
2556
2557 op_cost = add_cost (speed, mode);
2558 new_limit.cost = best_cost.cost - op_cost;
2559 new_limit.latency = best_cost.latency - op_cost;
2560 synth_mult (alg_in, t - 1, &new_limit, mode);
2561
2562 alg_in->cost.cost += op_cost;
2563 alg_in->cost.latency += op_cost;
2564 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2565 {
2566 struct algorithm *x;
2567 best_cost = alg_in->cost;
2568 x = alg_in, alg_in = best_alg, best_alg = x;
2569 best_alg->log[best_alg->ops] = 0;
2570 best_alg->op[best_alg->ops] = alg_add_t_m2;
2571 }
2572 }
2573
2574 /* We may be able to calculate a * -7, a * -15, a * -31, etc
2575 quickly with a - a * n for some appropriate constant n. */
2576 m = exact_log2 (-orig_t + 1);
2577 if (m >= 0 && m < maxm)
2578 {
2579 op_cost = shiftsub1_cost (speed, mode, m);
2580 new_limit.cost = best_cost.cost - op_cost;
2581 new_limit.latency = best_cost.latency - op_cost;
2582 synth_mult (alg_in, (unsigned HOST_WIDE_INT) (-orig_t + 1) >> m,
2583 &new_limit, mode);
2584
2585 alg_in->cost.cost += op_cost;
2586 alg_in->cost.latency += op_cost;
2587 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2588 {
2589 struct algorithm *x;
2590 best_cost = alg_in->cost;
2591 x = alg_in, alg_in = best_alg, best_alg = x;
2592 best_alg->log[best_alg->ops] = m;
2593 best_alg->op[best_alg->ops] = alg_sub_t_m2;
2594 }
2595 }
2596
2597 if (cache_hit)
2598 goto done;
2599 }
2600
2601 /* Look for factors of t of the form
2602 t = q(2**m +- 1), 2 <= m <= floor(log2(t - 1)).
2603 If we find such a factor, we can multiply by t using an algorithm that
2604 multiplies by q, shift the result by m and add/subtract it to itself.
2605
2606 We search for large factors first and loop down, even if large factors
2607 are less probable than small; if we find a large factor we will find a
2608 good sequence quickly, and therefore be able to prune (by decreasing
2609 COST_LIMIT) the search. */
2610
2611 do_alg_addsub_factor:
2612 for (m = floor_log2 (t - 1); m >= 2; m--)
2613 {
2614 unsigned HOST_WIDE_INT d;
2615
2616 d = ((unsigned HOST_WIDE_INT) 1 << m) + 1;
2617 if (t % d == 0 && t > d && m < maxm
2618 && (!cache_hit || cache_alg == alg_add_factor))
2619 {
2620 /* If the target has a cheap shift-and-add instruction use
2621 that in preference to a shift insn followed by an add insn.
2622 Assume that the shift-and-add is "atomic" with a latency
2623 equal to its cost, otherwise assume that on superscalar
2624 hardware the shift may be executed concurrently with the
2625 earlier steps in the algorithm. */
2626 op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
2627 if (shiftadd_cost (speed, mode, m) < op_cost)
2628 {
2629 op_cost = shiftadd_cost (speed, mode, m);
2630 op_latency = op_cost;
2631 }
2632 else
2633 op_latency = add_cost (speed, mode);
2634
2635 new_limit.cost = best_cost.cost - op_cost;
2636 new_limit.latency = best_cost.latency - op_latency;
2637 synth_mult (alg_in, t / d, &new_limit, mode);
2638
2639 alg_in->cost.cost += op_cost;
2640 alg_in->cost.latency += op_latency;
2641 if (alg_in->cost.latency < op_cost)
2642 alg_in->cost.latency = op_cost;
2643 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2644 {
2645 struct algorithm *x;
2646 best_cost = alg_in->cost;
2647 x = alg_in, alg_in = best_alg, best_alg = x;
2648 best_alg->log[best_alg->ops] = m;
2649 best_alg->op[best_alg->ops] = alg_add_factor;
2650 }
2651 /* Other factors will have been taken care of in the recursion. */
2652 break;
2653 }
2654
2655 d = ((unsigned HOST_WIDE_INT) 1 << m) - 1;
2656 if (t % d == 0 && t > d && m < maxm
2657 && (!cache_hit || cache_alg == alg_sub_factor))
2658 {
2659 /* If the target has a cheap shift-and-subtract insn use
2660 that in preference to a shift insn followed by a sub insn.
2661 Assume that the shift-and-sub is "atomic" with a latency
2662 equal to it's cost, otherwise assume that on superscalar
2663 hardware the shift may be executed concurrently with the
2664 earlier steps in the algorithm. */
2665 op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
2666 if (shiftsub0_cost (speed, mode, m) < op_cost)
2667 {
2668 op_cost = shiftsub0_cost (speed, mode, m);
2669 op_latency = op_cost;
2670 }
2671 else
2672 op_latency = add_cost (speed, mode);
2673
2674 new_limit.cost = best_cost.cost - op_cost;
2675 new_limit.latency = best_cost.latency - op_latency;
2676 synth_mult (alg_in, t / d, &new_limit, mode);
2677
2678 alg_in->cost.cost += op_cost;
2679 alg_in->cost.latency += op_latency;
2680 if (alg_in->cost.latency < op_cost)
2681 alg_in->cost.latency = op_cost;
2682 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2683 {
2684 struct algorithm *x;
2685 best_cost = alg_in->cost;
2686 x = alg_in, alg_in = best_alg, best_alg = x;
2687 best_alg->log[best_alg->ops] = m;
2688 best_alg->op[best_alg->ops] = alg_sub_factor;
2689 }
2690 break;
2691 }
2692 }
2693 if (cache_hit)
2694 goto done;
2695
2696 /* Try shift-and-add (load effective address) instructions,
2697 i.e. do a*3, a*5, a*9. */
2698 if ((t & 1) != 0)
2699 {
2700 do_alg_add_t2_m:
2701 q = t - 1;
2702 q = q & -q;
2703 m = exact_log2 (q);
2704 if (m >= 0 && m < maxm)
2705 {
2706 op_cost = shiftadd_cost (speed, mode, m);
2707 new_limit.cost = best_cost.cost - op_cost;
2708 new_limit.latency = best_cost.latency - op_cost;
2709 synth_mult (alg_in, (t - 1) >> m, &new_limit, mode);
2710
2711 alg_in->cost.cost += op_cost;
2712 alg_in->cost.latency += op_cost;
2713 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2714 {
2715 struct algorithm *x;
2716 best_cost = alg_in->cost;
2717 x = alg_in, alg_in = best_alg, best_alg = x;
2718 best_alg->log[best_alg->ops] = m;
2719 best_alg->op[best_alg->ops] = alg_add_t2_m;
2720 }
2721 }
2722 if (cache_hit)
2723 goto done;
2724
2725 do_alg_sub_t2_m:
2726 q = t + 1;
2727 q = q & -q;
2728 m = exact_log2 (q);
2729 if (m >= 0 && m < maxm)
2730 {
2731 op_cost = shiftsub0_cost (speed, mode, m);
2732 new_limit.cost = best_cost.cost - op_cost;
2733 new_limit.latency = best_cost.latency - op_cost;
2734 synth_mult (alg_in, (t + 1) >> m, &new_limit, mode);
2735
2736 alg_in->cost.cost += op_cost;
2737 alg_in->cost.latency += op_cost;
2738 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2739 {
2740 struct algorithm *x;
2741 best_cost = alg_in->cost;
2742 x = alg_in, alg_in = best_alg, best_alg = x;
2743 best_alg->log[best_alg->ops] = m;
2744 best_alg->op[best_alg->ops] = alg_sub_t2_m;
2745 }
2746 }
2747 if (cache_hit)
2748 goto done;
2749 }
2750
2751 done:
2752 /* If best_cost has not decreased, we have not found any algorithm. */
2753 if (!CHEAPER_MULT_COST (&best_cost, cost_limit))
2754 {
2755 /* We failed to find an algorithm. Record alg_impossible for
2756 this case (that is, <T, MODE, COST_LIMIT>) so that next time
2757 we are asked to find an algorithm for T within the same or
2758 lower COST_LIMIT, we can immediately return to the
2759 caller. */
2760 entry_ptr->t = t;
2761 entry_ptr->mode = mode;
2762 entry_ptr->speed = speed;
2763 entry_ptr->alg = alg_impossible;
2764 entry_ptr->cost = *cost_limit;
2765 return;
2766 }
2767
2768 /* Cache the result. */
2769 if (!cache_hit)
2770 {
2771 entry_ptr->t = t;
2772 entry_ptr->mode = mode;
2773 entry_ptr->speed = speed;
2774 entry_ptr->alg = best_alg->op[best_alg->ops];
2775 entry_ptr->cost.cost = best_cost.cost;
2776 entry_ptr->cost.latency = best_cost.latency;
2777 }
2778
2779 /* If we are getting a too long sequence for `struct algorithm'
2780 to record, make this search fail. */
2781 if (best_alg->ops == MAX_BITS_PER_WORD)
2782 return;
2783
2784 /* Copy the algorithm from temporary space to the space at alg_out.
2785 We avoid using structure assignment because the majority of
2786 best_alg is normally undefined, and this is a critical function. */
2787 alg_out->ops = best_alg->ops + 1;
2788 alg_out->cost = best_cost;
2789 memcpy (alg_out->op, best_alg->op,
2790 alg_out->ops * sizeof *alg_out->op);
2791 memcpy (alg_out->log, best_alg->log,
2792 alg_out->ops * sizeof *alg_out->log);
2793 }
2794 \f
2795 /* Find the cheapest way of multiplying a value of mode MODE by VAL.
2796 Try three variations:
2797
2798 - a shift/add sequence based on VAL itself
2799 - a shift/add sequence based on -VAL, followed by a negation
2800 - a shift/add sequence based on VAL - 1, followed by an addition.
2801
2802 Return true if the cheapest of these cost less than MULT_COST,
2803 describing the algorithm in *ALG and final fixup in *VARIANT. */
2804
2805 static bool
2806 choose_mult_variant (enum machine_mode mode, HOST_WIDE_INT val,
2807 struct algorithm *alg, enum mult_variant *variant,
2808 int mult_cost)
2809 {
2810 struct algorithm alg2;
2811 struct mult_cost limit;
2812 int op_cost;
2813 bool speed = optimize_insn_for_speed_p ();
2814
2815 /* Fail quickly for impossible bounds. */
2816 if (mult_cost < 0)
2817 return false;
2818
2819 /* Ensure that mult_cost provides a reasonable upper bound.
2820 Any constant multiplication can be performed with less
2821 than 2 * bits additions. */
2822 op_cost = 2 * GET_MODE_UNIT_BITSIZE (mode) * add_cost (speed, mode);
2823 if (mult_cost > op_cost)
2824 mult_cost = op_cost;
2825
2826 *variant = basic_variant;
2827 limit.cost = mult_cost;
2828 limit.latency = mult_cost;
2829 synth_mult (alg, val, &limit, mode);
2830
2831 /* This works only if the inverted value actually fits in an
2832 `unsigned int' */
2833 if (HOST_BITS_PER_INT >= GET_MODE_UNIT_BITSIZE (mode))
2834 {
2835 op_cost = neg_cost(speed, mode);
2836 if (MULT_COST_LESS (&alg->cost, mult_cost))
2837 {
2838 limit.cost = alg->cost.cost - op_cost;
2839 limit.latency = alg->cost.latency - op_cost;
2840 }
2841 else
2842 {
2843 limit.cost = mult_cost - op_cost;
2844 limit.latency = mult_cost - op_cost;
2845 }
2846
2847 synth_mult (&alg2, -val, &limit, mode);
2848 alg2.cost.cost += op_cost;
2849 alg2.cost.latency += op_cost;
2850 if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
2851 *alg = alg2, *variant = negate_variant;
2852 }
2853
2854 /* This proves very useful for division-by-constant. */
2855 op_cost = add_cost (speed, mode);
2856 if (MULT_COST_LESS (&alg->cost, mult_cost))
2857 {
2858 limit.cost = alg->cost.cost - op_cost;
2859 limit.latency = alg->cost.latency - op_cost;
2860 }
2861 else
2862 {
2863 limit.cost = mult_cost - op_cost;
2864 limit.latency = mult_cost - op_cost;
2865 }
2866
2867 synth_mult (&alg2, val - 1, &limit, mode);
2868 alg2.cost.cost += op_cost;
2869 alg2.cost.latency += op_cost;
2870 if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
2871 *alg = alg2, *variant = add_variant;
2872
2873 return MULT_COST_LESS (&alg->cost, mult_cost);
2874 }
2875
2876 /* A subroutine of expand_mult, used for constant multiplications.
2877 Multiply OP0 by VAL in mode MODE, storing the result in TARGET if
2878 convenient. Use the shift/add sequence described by ALG and apply
2879 the final fixup specified by VARIANT. */
2880
2881 static rtx
2882 expand_mult_const (enum machine_mode mode, rtx op0, HOST_WIDE_INT val,
2883 rtx target, const struct algorithm *alg,
2884 enum mult_variant variant)
2885 {
2886 HOST_WIDE_INT val_so_far;
2887 rtx insn, accum, tem;
2888 int opno;
2889 enum machine_mode nmode;
2890
2891 /* Avoid referencing memory over and over and invalid sharing
2892 on SUBREGs. */
2893 op0 = force_reg (mode, op0);
2894
2895 /* ACCUM starts out either as OP0 or as a zero, depending on
2896 the first operation. */
2897
2898 if (alg->op[0] == alg_zero)
2899 {
2900 accum = copy_to_mode_reg (mode, CONST0_RTX (mode));
2901 val_so_far = 0;
2902 }
2903 else if (alg->op[0] == alg_m)
2904 {
2905 accum = copy_to_mode_reg (mode, op0);
2906 val_so_far = 1;
2907 }
2908 else
2909 gcc_unreachable ();
2910
2911 for (opno = 1; opno < alg->ops; opno++)
2912 {
2913 int log = alg->log[opno];
2914 rtx shift_subtarget = optimize ? 0 : accum;
2915 rtx add_target
2916 = (opno == alg->ops - 1 && target != 0 && variant != add_variant
2917 && !optimize)
2918 ? target : 0;
2919 rtx accum_target = optimize ? 0 : accum;
2920 rtx accum_inner;
2921
2922 switch (alg->op[opno])
2923 {
2924 case alg_shift:
2925 tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
2926 /* REG_EQUAL note will be attached to the following insn. */
2927 emit_move_insn (accum, tem);
2928 val_so_far <<= log;
2929 break;
2930
2931 case alg_add_t_m2:
2932 tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
2933 accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
2934 add_target ? add_target : accum_target);
2935 val_so_far += (HOST_WIDE_INT) 1 << log;
2936 break;
2937
2938 case alg_sub_t_m2:
2939 tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
2940 accum = force_operand (gen_rtx_MINUS (mode, accum, tem),
2941 add_target ? add_target : accum_target);
2942 val_so_far -= (HOST_WIDE_INT) 1 << log;
2943 break;
2944
2945 case alg_add_t2_m:
2946 accum = expand_shift (LSHIFT_EXPR, mode, accum,
2947 log, shift_subtarget, 0);
2948 accum = force_operand (gen_rtx_PLUS (mode, accum, op0),
2949 add_target ? add_target : accum_target);
2950 val_so_far = (val_so_far << log) + 1;
2951 break;
2952
2953 case alg_sub_t2_m:
2954 accum = expand_shift (LSHIFT_EXPR, mode, accum,
2955 log, shift_subtarget, 0);
2956 accum = force_operand (gen_rtx_MINUS (mode, accum, op0),
2957 add_target ? add_target : accum_target);
2958 val_so_far = (val_so_far << log) - 1;
2959 break;
2960
2961 case alg_add_factor:
2962 tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
2963 accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
2964 add_target ? add_target : accum_target);
2965 val_so_far += val_so_far << log;
2966 break;
2967
2968 case alg_sub_factor:
2969 tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
2970 accum = force_operand (gen_rtx_MINUS (mode, tem, accum),
2971 (add_target
2972 ? add_target : (optimize ? 0 : tem)));
2973 val_so_far = (val_so_far << log) - val_so_far;
2974 break;
2975
2976 default:
2977 gcc_unreachable ();
2978 }
2979
2980 if (SCALAR_INT_MODE_P (mode))
2981 {
2982 /* Write a REG_EQUAL note on the last insn so that we can cse
2983 multiplication sequences. Note that if ACCUM is a SUBREG,
2984 we've set the inner register and must properly indicate that. */
2985 tem = op0, nmode = mode;
2986 accum_inner = accum;
2987 if (GET_CODE (accum) == SUBREG)
2988 {
2989 accum_inner = SUBREG_REG (accum);
2990 nmode = GET_MODE (accum_inner);
2991 tem = gen_lowpart (nmode, op0);
2992 }
2993
2994 insn = get_last_insn ();
2995 set_dst_reg_note (insn, REG_EQUAL,
2996 gen_rtx_MULT (nmode, tem, GEN_INT (val_so_far)),
2997 accum_inner);
2998 }
2999 }
3000
3001 if (variant == negate_variant)
3002 {
3003 val_so_far = -val_so_far;
3004 accum = expand_unop (mode, neg_optab, accum, target, 0);
3005 }
3006 else if (variant == add_variant)
3007 {
3008 val_so_far = val_so_far + 1;
3009 accum = force_operand (gen_rtx_PLUS (mode, accum, op0), target);
3010 }
3011
3012 /* Compare only the bits of val and val_so_far that are significant
3013 in the result mode, to avoid sign-/zero-extension confusion. */
3014 nmode = GET_MODE_INNER (mode);
3015 if (nmode == VOIDmode)
3016 nmode = mode;
3017 val &= GET_MODE_MASK (nmode);
3018 val_so_far &= GET_MODE_MASK (nmode);
3019 gcc_assert (val == val_so_far);
3020
3021 return accum;
3022 }
3023
3024 /* Perform a multiplication and return an rtx for the result.
3025 MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3026 TARGET is a suggestion for where to store the result (an rtx).
3027
3028 We check specially for a constant integer as OP1.
3029 If you want this check for OP0 as well, then before calling
3030 you should swap the two operands if OP0 would be constant. */
3031
3032 rtx
3033 expand_mult (enum machine_mode mode, rtx op0, rtx op1, rtx target,
3034 int unsignedp)
3035 {
3036 enum mult_variant variant;
3037 struct algorithm algorithm;
3038 rtx scalar_op1;
3039 int max_cost;
3040 bool speed = optimize_insn_for_speed_p ();
3041 bool do_trapv = flag_trapv && SCALAR_INT_MODE_P (mode) && !unsignedp;
3042
3043 if (CONSTANT_P (op0))
3044 {
3045 rtx temp = op0;
3046 op0 = op1;
3047 op1 = temp;
3048 }
3049
3050 /* For vectors, there are several simplifications that can be made if
3051 all elements of the vector constant are identical. */
3052 scalar_op1 = op1;
3053 if (GET_CODE (op1) == CONST_VECTOR)
3054 {
3055 int i, n = CONST_VECTOR_NUNITS (op1);
3056 scalar_op1 = CONST_VECTOR_ELT (op1, 0);
3057 for (i = 1; i < n; ++i)
3058 if (!rtx_equal_p (scalar_op1, CONST_VECTOR_ELT (op1, i)))
3059 goto skip_scalar;
3060 }
3061
3062 if (INTEGRAL_MODE_P (mode))
3063 {
3064 rtx fake_reg;
3065 HOST_WIDE_INT coeff;
3066 bool is_neg;
3067 int mode_bitsize;
3068
3069 if (op1 == CONST0_RTX (mode))
3070 return op1;
3071 if (op1 == CONST1_RTX (mode))
3072 return op0;
3073 if (op1 == CONSTM1_RTX (mode))
3074 return expand_unop (mode, do_trapv ? negv_optab : neg_optab,
3075 op0, target, 0);
3076
3077 if (do_trapv)
3078 goto skip_synth;
3079
3080 /* These are the operations that are potentially turned into
3081 a sequence of shifts and additions. */
3082 mode_bitsize = GET_MODE_UNIT_BITSIZE (mode);
3083
3084 /* synth_mult does an `unsigned int' multiply. As long as the mode is
3085 less than or equal in size to `unsigned int' this doesn't matter.
3086 If the mode is larger than `unsigned int', then synth_mult works
3087 only if the constant value exactly fits in an `unsigned int' without
3088 any truncation. This means that multiplying by negative values does
3089 not work; results are off by 2^32 on a 32 bit machine. */
3090
3091 if (CONST_INT_P (scalar_op1))
3092 {
3093 coeff = INTVAL (scalar_op1);
3094 is_neg = coeff < 0;
3095 }
3096 else if (CONST_DOUBLE_AS_INT_P (scalar_op1))
3097 {
3098 /* If we are multiplying in DImode, it may still be a win
3099 to try to work with shifts and adds. */
3100 if (CONST_DOUBLE_HIGH (scalar_op1) == 0
3101 && CONST_DOUBLE_LOW (scalar_op1) > 0)
3102 {
3103 coeff = CONST_DOUBLE_LOW (scalar_op1);
3104 is_neg = false;
3105 }
3106 else if (CONST_DOUBLE_LOW (scalar_op1) == 0)
3107 {
3108 coeff = CONST_DOUBLE_HIGH (scalar_op1);
3109 if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3110 {
3111 int shift = floor_log2 (coeff) + HOST_BITS_PER_WIDE_INT;
3112 if (shift < HOST_BITS_PER_DOUBLE_INT - 1
3113 || mode_bitsize <= HOST_BITS_PER_DOUBLE_INT)
3114 return expand_shift (LSHIFT_EXPR, mode, op0,
3115 shift, target, unsignedp);
3116 }
3117 goto skip_synth;
3118 }
3119 else
3120 goto skip_synth;
3121 }
3122 else
3123 goto skip_synth;
3124
3125 /* We used to test optimize here, on the grounds that it's better to
3126 produce a smaller program when -O is not used. But this causes
3127 such a terrible slowdown sometimes that it seems better to always
3128 use synth_mult. */
3129
3130 /* Special case powers of two. */
3131 if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3132 return expand_shift (LSHIFT_EXPR, mode, op0,
3133 floor_log2 (coeff), target, unsignedp);
3134
3135 fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3136
3137 /* Attempt to handle multiplication of DImode values by negative
3138 coefficients, by performing the multiplication by a positive
3139 multiplier and then inverting the result. */
3140 if (is_neg && mode_bitsize > HOST_BITS_PER_WIDE_INT)
3141 {
3142 /* Its safe to use -coeff even for INT_MIN, as the
3143 result is interpreted as an unsigned coefficient.
3144 Exclude cost of op0 from max_cost to match the cost
3145 calculation of the synth_mult. */
3146 max_cost = (set_src_cost (gen_rtx_MULT (mode, fake_reg, op1), speed)
3147 - neg_cost(speed, mode));
3148 if (max_cost > 0
3149 && choose_mult_variant (mode, -coeff, &algorithm,
3150 &variant, max_cost))
3151 {
3152 rtx temp = expand_mult_const (mode, op0, -coeff, NULL_RTX,
3153 &algorithm, variant);
3154 return expand_unop (mode, neg_optab, temp, target, 0);
3155 }
3156 goto skip_synth;
3157 }
3158
3159 /* Exclude cost of op0 from max_cost to match the cost
3160 calculation of the synth_mult. */
3161 max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, op1), speed);
3162 if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost))
3163 return expand_mult_const (mode, op0, coeff, target,
3164 &algorithm, variant);
3165 }
3166 skip_synth:
3167
3168 /* Expand x*2.0 as x+x. */
3169 if (CONST_DOUBLE_AS_FLOAT_P (scalar_op1))
3170 {
3171 REAL_VALUE_TYPE d;
3172 REAL_VALUE_FROM_CONST_DOUBLE (d, scalar_op1);
3173
3174 if (REAL_VALUES_EQUAL (d, dconst2))
3175 {
3176 op0 = force_reg (GET_MODE (op0), op0);
3177 return expand_binop (mode, add_optab, op0, op0,
3178 target, unsignedp, OPTAB_LIB_WIDEN);
3179 }
3180 }
3181 skip_scalar:
3182
3183 /* This used to use umul_optab if unsigned, but for non-widening multiply
3184 there is no difference between signed and unsigned. */
3185 op0 = expand_binop (mode, do_trapv ? smulv_optab : smul_optab,
3186 op0, op1, target, unsignedp, OPTAB_LIB_WIDEN);
3187 gcc_assert (op0);
3188 return op0;
3189 }
3190
3191 /* Return a cost estimate for multiplying a register by the given
3192 COEFFicient in the given MODE and SPEED. */
3193
3194 int
3195 mult_by_coeff_cost (HOST_WIDE_INT coeff, enum machine_mode mode, bool speed)
3196 {
3197 int max_cost;
3198 struct algorithm algorithm;
3199 enum mult_variant variant;
3200
3201 rtx fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3202 max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, fake_reg), speed);
3203 if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost))
3204 return algorithm.cost.cost;
3205 else
3206 return max_cost;
3207 }
3208
3209 /* Perform a widening multiplication and return an rtx for the result.
3210 MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3211 TARGET is a suggestion for where to store the result (an rtx).
3212 THIS_OPTAB is the optab we should use, it must be either umul_widen_optab
3213 or smul_widen_optab.
3214
3215 We check specially for a constant integer as OP1, comparing the
3216 cost of a widening multiply against the cost of a sequence of shifts
3217 and adds. */
3218
3219 rtx
3220 expand_widening_mult (enum machine_mode mode, rtx op0, rtx op1, rtx target,
3221 int unsignedp, optab this_optab)
3222 {
3223 bool speed = optimize_insn_for_speed_p ();
3224 rtx cop1;
3225
3226 if (CONST_INT_P (op1)
3227 && GET_MODE (op0) != VOIDmode
3228 && (cop1 = convert_modes (mode, GET_MODE (op0), op1,
3229 this_optab == umul_widen_optab))
3230 && CONST_INT_P (cop1)
3231 && (INTVAL (cop1) >= 0
3232 || HWI_COMPUTABLE_MODE_P (mode)))
3233 {
3234 HOST_WIDE_INT coeff = INTVAL (cop1);
3235 int max_cost;
3236 enum mult_variant variant;
3237 struct algorithm algorithm;
3238
3239 /* Special case powers of two. */
3240 if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3241 {
3242 op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3243 return expand_shift (LSHIFT_EXPR, mode, op0,
3244 floor_log2 (coeff), target, unsignedp);
3245 }
3246
3247 /* Exclude cost of op0 from max_cost to match the cost
3248 calculation of the synth_mult. */
3249 max_cost = mul_widen_cost (speed, mode);
3250 if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3251 max_cost))
3252 {
3253 op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3254 return expand_mult_const (mode, op0, coeff, target,
3255 &algorithm, variant);
3256 }
3257 }
3258 return expand_binop (mode, this_optab, op0, op1, target,
3259 unsignedp, OPTAB_LIB_WIDEN);
3260 }
3261 \f
3262 /* Choose a minimal N + 1 bit approximation to 1/D that can be used to
3263 replace division by D, and put the least significant N bits of the result
3264 in *MULTIPLIER_PTR and return the most significant bit.
3265
3266 The width of operations is N (should be <= HOST_BITS_PER_WIDE_INT), the
3267 needed precision is in PRECISION (should be <= N).
3268
3269 PRECISION should be as small as possible so this function can choose
3270 multiplier more freely.
3271
3272 The rounded-up logarithm of D is placed in *lgup_ptr. A shift count that
3273 is to be used for a final right shift is placed in *POST_SHIFT_PTR.
3274
3275 Using this function, x/D will be equal to (x * m) >> (*POST_SHIFT_PTR),
3276 where m is the full HOST_BITS_PER_WIDE_INT + 1 bit multiplier. */
3277
3278 unsigned HOST_WIDE_INT
3279 choose_multiplier (unsigned HOST_WIDE_INT d, int n, int precision,
3280 unsigned HOST_WIDE_INT *multiplier_ptr,
3281 int *post_shift_ptr, int *lgup_ptr)
3282 {
3283 double_int mhigh, mlow;
3284 int lgup, post_shift;
3285 int pow, pow2;
3286
3287 /* lgup = ceil(log2(divisor)); */
3288 lgup = ceil_log2 (d);
3289
3290 gcc_assert (lgup <= n);
3291
3292 pow = n + lgup;
3293 pow2 = n + lgup - precision;
3294
3295 /* We could handle this with some effort, but this case is much
3296 better handled directly with a scc insn, so rely on caller using
3297 that. */
3298 gcc_assert (pow != HOST_BITS_PER_DOUBLE_INT);
3299
3300 /* mlow = 2^(N + lgup)/d */
3301 double_int val = double_int_zero.set_bit (pow);
3302 mlow = val.div (double_int::from_uhwi (d), true, TRUNC_DIV_EXPR);
3303
3304 /* mhigh = (2^(N + lgup) + 2^(N + lgup - precision))/d */
3305 val |= double_int_zero.set_bit (pow2);
3306 mhigh = val.div (double_int::from_uhwi (d), true, TRUNC_DIV_EXPR);
3307
3308 gcc_assert (!mhigh.high || val.high - d < d);
3309 gcc_assert (mhigh.high <= 1 && mlow.high <= 1);
3310 /* Assert that mlow < mhigh. */
3311 gcc_assert (mlow.ult (mhigh));
3312
3313 /* If precision == N, then mlow, mhigh exceed 2^N
3314 (but they do not exceed 2^(N+1)). */
3315
3316 /* Reduce to lowest terms. */
3317 for (post_shift = lgup; post_shift > 0; post_shift--)
3318 {
3319 int shft = HOST_BITS_PER_WIDE_INT - 1;
3320 unsigned HOST_WIDE_INT ml_lo = (mlow.high << shft) | (mlow.low >> 1);
3321 unsigned HOST_WIDE_INT mh_lo = (mhigh.high << shft) | (mhigh.low >> 1);
3322 if (ml_lo >= mh_lo)
3323 break;
3324
3325 mlow = double_int::from_uhwi (ml_lo);
3326 mhigh = double_int::from_uhwi (mh_lo);
3327 }
3328
3329 *post_shift_ptr = post_shift;
3330 *lgup_ptr = lgup;
3331 if (n < HOST_BITS_PER_WIDE_INT)
3332 {
3333 unsigned HOST_WIDE_INT mask = ((unsigned HOST_WIDE_INT) 1 << n) - 1;
3334 *multiplier_ptr = mhigh.low & mask;
3335 return mhigh.low >= mask;
3336 }
3337 else
3338 {
3339 *multiplier_ptr = mhigh.low;
3340 return mhigh.high;
3341 }
3342 }
3343
3344 /* Compute the inverse of X mod 2**n, i.e., find Y such that X * Y is
3345 congruent to 1 (mod 2**N). */
3346
3347 static unsigned HOST_WIDE_INT
3348 invert_mod2n (unsigned HOST_WIDE_INT x, int n)
3349 {
3350 /* Solve x*y == 1 (mod 2^n), where x is odd. Return y. */
3351
3352 /* The algorithm notes that the choice y = x satisfies
3353 x*y == 1 mod 2^3, since x is assumed odd.
3354 Each iteration doubles the number of bits of significance in y. */
3355
3356 unsigned HOST_WIDE_INT mask;
3357 unsigned HOST_WIDE_INT y = x;
3358 int nbit = 3;
3359
3360 mask = (n == HOST_BITS_PER_WIDE_INT
3361 ? ~(unsigned HOST_WIDE_INT) 0
3362 : ((unsigned HOST_WIDE_INT) 1 << n) - 1);
3363
3364 while (nbit < n)
3365 {
3366 y = y * (2 - x*y) & mask; /* Modulo 2^N */
3367 nbit *= 2;
3368 }
3369 return y;
3370 }
3371
3372 /* Emit code to adjust ADJ_OPERAND after multiplication of wrong signedness
3373 flavor of OP0 and OP1. ADJ_OPERAND is already the high half of the
3374 product OP0 x OP1. If UNSIGNEDP is nonzero, adjust the signed product
3375 to become unsigned, if UNSIGNEDP is zero, adjust the unsigned product to
3376 become signed.
3377
3378 The result is put in TARGET if that is convenient.
3379
3380 MODE is the mode of operation. */
3381
3382 rtx
3383 expand_mult_highpart_adjust (enum machine_mode mode, rtx adj_operand, rtx op0,
3384 rtx op1, rtx target, int unsignedp)
3385 {
3386 rtx tem;
3387 enum rtx_code adj_code = unsignedp ? PLUS : MINUS;
3388
3389 tem = expand_shift (RSHIFT_EXPR, mode, op0,
3390 GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3391 tem = expand_and (mode, tem, op1, NULL_RTX);
3392 adj_operand
3393 = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3394 adj_operand);
3395
3396 tem = expand_shift (RSHIFT_EXPR, mode, op1,
3397 GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3398 tem = expand_and (mode, tem, op0, NULL_RTX);
3399 target = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3400 target);
3401
3402 return target;
3403 }
3404
3405 /* Subroutine of expmed_mult_highpart. Return the MODE high part of OP. */
3406
3407 static rtx
3408 extract_high_half (enum machine_mode mode, rtx op)
3409 {
3410 enum machine_mode wider_mode;
3411
3412 if (mode == word_mode)
3413 return gen_highpart (mode, op);
3414
3415 gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3416
3417 wider_mode = GET_MODE_WIDER_MODE (mode);
3418 op = expand_shift (RSHIFT_EXPR, wider_mode, op,
3419 GET_MODE_BITSIZE (mode), 0, 1);
3420 return convert_modes (mode, wider_mode, op, 0);
3421 }
3422
3423 /* Like expmed_mult_highpart, but only consider using a multiplication
3424 optab. OP1 is an rtx for the constant operand. */
3425
3426 static rtx
3427 expmed_mult_highpart_optab (enum machine_mode mode, rtx op0, rtx op1,
3428 rtx target, int unsignedp, int max_cost)
3429 {
3430 rtx narrow_op1 = gen_int_mode (INTVAL (op1), mode);
3431 enum machine_mode wider_mode;
3432 optab moptab;
3433 rtx tem;
3434 int size;
3435 bool speed = optimize_insn_for_speed_p ();
3436
3437 gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3438
3439 wider_mode = GET_MODE_WIDER_MODE (mode);
3440 size = GET_MODE_BITSIZE (mode);
3441
3442 /* Firstly, try using a multiplication insn that only generates the needed
3443 high part of the product, and in the sign flavor of unsignedp. */
3444 if (mul_highpart_cost (speed, mode) < max_cost)
3445 {
3446 moptab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
3447 tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3448 unsignedp, OPTAB_DIRECT);
3449 if (tem)
3450 return tem;
3451 }
3452
3453 /* Secondly, same as above, but use sign flavor opposite of unsignedp.
3454 Need to adjust the result after the multiplication. */
3455 if (size - 1 < BITS_PER_WORD
3456 && (mul_highpart_cost (speed, mode)
3457 + 2 * shift_cost (speed, mode, size-1)
3458 + 4 * add_cost (speed, mode) < max_cost))
3459 {
3460 moptab = unsignedp ? smul_highpart_optab : umul_highpart_optab;
3461 tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3462 unsignedp, OPTAB_DIRECT);
3463 if (tem)
3464 /* We used the wrong signedness. Adjust the result. */
3465 return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3466 tem, unsignedp);
3467 }
3468
3469 /* Try widening multiplication. */
3470 moptab = unsignedp ? umul_widen_optab : smul_widen_optab;
3471 if (widening_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3472 && mul_widen_cost (speed, wider_mode) < max_cost)
3473 {
3474 tem = expand_binop (wider_mode, moptab, op0, narrow_op1, 0,
3475 unsignedp, OPTAB_WIDEN);
3476 if (tem)
3477 return extract_high_half (mode, tem);
3478 }
3479
3480 /* Try widening the mode and perform a non-widening multiplication. */
3481 if (optab_handler (smul_optab, wider_mode) != CODE_FOR_nothing
3482 && size - 1 < BITS_PER_WORD
3483 && (mul_cost (speed, wider_mode) + shift_cost (speed, mode, size-1)
3484 < max_cost))
3485 {
3486 rtx insns, wop0, wop1;
3487
3488 /* We need to widen the operands, for example to ensure the
3489 constant multiplier is correctly sign or zero extended.
3490 Use a sequence to clean-up any instructions emitted by
3491 the conversions if things don't work out. */
3492 start_sequence ();
3493 wop0 = convert_modes (wider_mode, mode, op0, unsignedp);
3494 wop1 = convert_modes (wider_mode, mode, op1, unsignedp);
3495 tem = expand_binop (wider_mode, smul_optab, wop0, wop1, 0,
3496 unsignedp, OPTAB_WIDEN);
3497 insns = get_insns ();
3498 end_sequence ();
3499
3500 if (tem)
3501 {
3502 emit_insn (insns);
3503 return extract_high_half (mode, tem);
3504 }
3505 }
3506
3507 /* Try widening multiplication of opposite signedness, and adjust. */
3508 moptab = unsignedp ? smul_widen_optab : umul_widen_optab;
3509 if (widening_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3510 && size - 1 < BITS_PER_WORD
3511 && (mul_widen_cost (speed, wider_mode)
3512 + 2 * shift_cost (speed, mode, size-1)
3513 + 4 * add_cost (speed, mode) < max_cost))
3514 {
3515 tem = expand_binop (wider_mode, moptab, op0, narrow_op1,
3516 NULL_RTX, ! unsignedp, OPTAB_WIDEN);
3517 if (tem != 0)
3518 {
3519 tem = extract_high_half (mode, tem);
3520 /* We used the wrong signedness. Adjust the result. */
3521 return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3522 target, unsignedp);
3523 }
3524 }
3525
3526 return 0;
3527 }
3528
3529 /* Emit code to multiply OP0 and OP1 (where OP1 is an integer constant),
3530 putting the high half of the result in TARGET if that is convenient,
3531 and return where the result is. If the operation can not be performed,
3532 0 is returned.
3533
3534 MODE is the mode of operation and result.
3535
3536 UNSIGNEDP nonzero means unsigned multiply.
3537
3538 MAX_COST is the total allowed cost for the expanded RTL. */
3539
3540 static rtx
3541 expmed_mult_highpart (enum machine_mode mode, rtx op0, rtx op1,
3542 rtx target, int unsignedp, int max_cost)
3543 {
3544 enum machine_mode wider_mode = GET_MODE_WIDER_MODE (mode);
3545 unsigned HOST_WIDE_INT cnst1;
3546 int extra_cost;
3547 bool sign_adjust = false;
3548 enum mult_variant variant;
3549 struct algorithm alg;
3550 rtx tem;
3551 bool speed = optimize_insn_for_speed_p ();
3552
3553 gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3554 /* We can't support modes wider than HOST_BITS_PER_INT. */
3555 gcc_assert (HWI_COMPUTABLE_MODE_P (mode));
3556
3557 cnst1 = INTVAL (op1) & GET_MODE_MASK (mode);
3558
3559 /* We can't optimize modes wider than BITS_PER_WORD.
3560 ??? We might be able to perform double-word arithmetic if
3561 mode == word_mode, however all the cost calculations in
3562 synth_mult etc. assume single-word operations. */
3563 if (GET_MODE_BITSIZE (wider_mode) > BITS_PER_WORD)
3564 return expmed_mult_highpart_optab (mode, op0, op1, target,
3565 unsignedp, max_cost);
3566
3567 extra_cost = shift_cost (speed, mode, GET_MODE_BITSIZE (mode) - 1);
3568
3569 /* Check whether we try to multiply by a negative constant. */
3570 if (!unsignedp && ((cnst1 >> (GET_MODE_BITSIZE (mode) - 1)) & 1))
3571 {
3572 sign_adjust = true;
3573 extra_cost += add_cost (speed, mode);
3574 }
3575
3576 /* See whether shift/add multiplication is cheap enough. */
3577 if (choose_mult_variant (wider_mode, cnst1, &alg, &variant,
3578 max_cost - extra_cost))
3579 {
3580 /* See whether the specialized multiplication optabs are
3581 cheaper than the shift/add version. */
3582 tem = expmed_mult_highpart_optab (mode, op0, op1, target, unsignedp,
3583 alg.cost.cost + extra_cost);
3584 if (tem)
3585 return tem;
3586
3587 tem = convert_to_mode (wider_mode, op0, unsignedp);
3588 tem = expand_mult_const (wider_mode, tem, cnst1, 0, &alg, variant);
3589 tem = extract_high_half (mode, tem);
3590
3591 /* Adjust result for signedness. */
3592 if (sign_adjust)
3593 tem = force_operand (gen_rtx_MINUS (mode, tem, op0), tem);
3594
3595 return tem;
3596 }
3597 return expmed_mult_highpart_optab (mode, op0, op1, target,
3598 unsignedp, max_cost);
3599 }
3600
3601
3602 /* Expand signed modulus of OP0 by a power of two D in mode MODE. */
3603
3604 static rtx
3605 expand_smod_pow2 (enum machine_mode mode, rtx op0, HOST_WIDE_INT d)
3606 {
3607 unsigned HOST_WIDE_INT masklow, maskhigh;
3608 rtx result, temp, shift, label;
3609 int logd;
3610
3611 logd = floor_log2 (d);
3612 result = gen_reg_rtx (mode);
3613
3614 /* Avoid conditional branches when they're expensive. */
3615 if (BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2
3616 && optimize_insn_for_speed_p ())
3617 {
3618 rtx signmask = emit_store_flag (result, LT, op0, const0_rtx,
3619 mode, 0, -1);
3620 if (signmask)
3621 {
3622 signmask = force_reg (mode, signmask);
3623 masklow = ((HOST_WIDE_INT) 1 << logd) - 1;
3624 shift = GEN_INT (GET_MODE_BITSIZE (mode) - logd);
3625
3626 /* Use the rtx_cost of a LSHIFTRT instruction to determine
3627 which instruction sequence to use. If logical right shifts
3628 are expensive the use 2 XORs, 2 SUBs and an AND, otherwise
3629 use a LSHIFTRT, 1 ADD, 1 SUB and an AND. */
3630
3631 temp = gen_rtx_LSHIFTRT (mode, result, shift);
3632 if (optab_handler (lshr_optab, mode) == CODE_FOR_nothing
3633 || (set_src_cost (temp, optimize_insn_for_speed_p ())
3634 > COSTS_N_INSNS (2)))
3635 {
3636 temp = expand_binop (mode, xor_optab, op0, signmask,
3637 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3638 temp = expand_binop (mode, sub_optab, temp, signmask,
3639 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3640 temp = expand_binop (mode, and_optab, temp, GEN_INT (masklow),
3641 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3642 temp = expand_binop (mode, xor_optab, temp, signmask,
3643 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3644 temp = expand_binop (mode, sub_optab, temp, signmask,
3645 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3646 }
3647 else
3648 {
3649 signmask = expand_binop (mode, lshr_optab, signmask, shift,
3650 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3651 signmask = force_reg (mode, signmask);
3652
3653 temp = expand_binop (mode, add_optab, op0, signmask,
3654 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3655 temp = expand_binop (mode, and_optab, temp, GEN_INT (masklow),
3656 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3657 temp = expand_binop (mode, sub_optab, temp, signmask,
3658 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3659 }
3660 return temp;
3661 }
3662 }
3663
3664 /* Mask contains the mode's signbit and the significant bits of the
3665 modulus. By including the signbit in the operation, many targets
3666 can avoid an explicit compare operation in the following comparison
3667 against zero. */
3668
3669 masklow = ((HOST_WIDE_INT) 1 << logd) - 1;
3670 if (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
3671 {
3672 masklow |= (HOST_WIDE_INT) -1 << (GET_MODE_BITSIZE (mode) - 1);
3673 maskhigh = -1;
3674 }
3675 else
3676 maskhigh = (HOST_WIDE_INT) -1
3677 << (GET_MODE_BITSIZE (mode) - HOST_BITS_PER_WIDE_INT - 1);
3678
3679 temp = expand_binop (mode, and_optab, op0,
3680 immed_double_const (masklow, maskhigh, mode),
3681 result, 1, OPTAB_LIB_WIDEN);
3682 if (temp != result)
3683 emit_move_insn (result, temp);
3684
3685 label = gen_label_rtx ();
3686 do_cmp_and_jump (result, const0_rtx, GE, mode, label);
3687
3688 temp = expand_binop (mode, sub_optab, result, const1_rtx, result,
3689 0, OPTAB_LIB_WIDEN);
3690 masklow = (HOST_WIDE_INT) -1 << logd;
3691 maskhigh = -1;
3692 temp = expand_binop (mode, ior_optab, temp,
3693 immed_double_const (masklow, maskhigh, mode),
3694 result, 1, OPTAB_LIB_WIDEN);
3695 temp = expand_binop (mode, add_optab, temp, const1_rtx, result,
3696 0, OPTAB_LIB_WIDEN);
3697 if (temp != result)
3698 emit_move_insn (result, temp);
3699 emit_label (label);
3700 return result;
3701 }
3702
3703 /* Expand signed division of OP0 by a power of two D in mode MODE.
3704 This routine is only called for positive values of D. */
3705
3706 static rtx
3707 expand_sdiv_pow2 (enum machine_mode mode, rtx op0, HOST_WIDE_INT d)
3708 {
3709 rtx temp, label;
3710 int logd;
3711
3712 logd = floor_log2 (d);
3713
3714 if (d == 2
3715 && BRANCH_COST (optimize_insn_for_speed_p (),
3716 false) >= 1)
3717 {
3718 temp = gen_reg_rtx (mode);
3719 temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, 1);
3720 temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3721 0, OPTAB_LIB_WIDEN);
3722 return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3723 }
3724
3725 #ifdef HAVE_conditional_move
3726 if (BRANCH_COST (optimize_insn_for_speed_p (), false)
3727 >= 2)
3728 {
3729 rtx temp2;
3730
3731 /* ??? emit_conditional_move forces a stack adjustment via
3732 compare_from_rtx so, if the sequence is discarded, it will
3733 be lost. Do it now instead. */
3734 do_pending_stack_adjust ();
3735
3736 start_sequence ();
3737 temp2 = copy_to_mode_reg (mode, op0);
3738 temp = expand_binop (mode, add_optab, temp2, GEN_INT (d-1),
3739 NULL_RTX, 0, OPTAB_LIB_WIDEN);
3740 temp = force_reg (mode, temp);
3741
3742 /* Construct "temp2 = (temp2 < 0) ? temp : temp2". */
3743 temp2 = emit_conditional_move (temp2, LT, temp2, const0_rtx,
3744 mode, temp, temp2, mode, 0);
3745 if (temp2)
3746 {
3747 rtx seq = get_insns ();
3748 end_sequence ();
3749 emit_insn (seq);
3750 return expand_shift (RSHIFT_EXPR, mode, temp2, logd, NULL_RTX, 0);
3751 }
3752 end_sequence ();
3753 }
3754 #endif
3755
3756 if (BRANCH_COST (optimize_insn_for_speed_p (),
3757 false) >= 2)
3758 {
3759 int ushift = GET_MODE_BITSIZE (mode) - logd;
3760
3761 temp = gen_reg_rtx (mode);
3762 temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, -1);
3763 if (shift_cost (optimize_insn_for_speed_p (), mode, ushift)
3764 > COSTS_N_INSNS (1))
3765 temp = expand_binop (mode, and_optab, temp, GEN_INT (d - 1),
3766 NULL_RTX, 0, OPTAB_LIB_WIDEN);
3767 else
3768 temp = expand_shift (RSHIFT_EXPR, mode, temp,
3769 ushift, NULL_RTX, 1);
3770 temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3771 0, OPTAB_LIB_WIDEN);
3772 return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3773 }
3774
3775 label = gen_label_rtx ();
3776 temp = copy_to_mode_reg (mode, op0);
3777 do_cmp_and_jump (temp, const0_rtx, GE, mode, label);
3778 expand_inc (temp, GEN_INT (d - 1));
3779 emit_label (label);
3780 return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3781 }
3782 \f
3783 /* Emit the code to divide OP0 by OP1, putting the result in TARGET
3784 if that is convenient, and returning where the result is.
3785 You may request either the quotient or the remainder as the result;
3786 specify REM_FLAG nonzero to get the remainder.
3787
3788 CODE is the expression code for which kind of division this is;
3789 it controls how rounding is done. MODE is the machine mode to use.
3790 UNSIGNEDP nonzero means do unsigned division. */
3791
3792 /* ??? For CEIL_MOD_EXPR, can compute incorrect remainder with ANDI
3793 and then correct it by or'ing in missing high bits
3794 if result of ANDI is nonzero.
3795 For ROUND_MOD_EXPR, can use ANDI and then sign-extend the result.
3796 This could optimize to a bfexts instruction.
3797 But C doesn't use these operations, so their optimizations are
3798 left for later. */
3799 /* ??? For modulo, we don't actually need the highpart of the first product,
3800 the low part will do nicely. And for small divisors, the second multiply
3801 can also be a low-part only multiply or even be completely left out.
3802 E.g. to calculate the remainder of a division by 3 with a 32 bit
3803 multiply, multiply with 0x55555556 and extract the upper two bits;
3804 the result is exact for inputs up to 0x1fffffff.
3805 The input range can be reduced by using cross-sum rules.
3806 For odd divisors >= 3, the following table gives right shift counts
3807 so that if a number is shifted by an integer multiple of the given
3808 amount, the remainder stays the same:
3809 2, 4, 3, 6, 10, 12, 4, 8, 18, 6, 11, 20, 18, 0, 5, 10, 12, 0, 12, 20,
3810 14, 12, 23, 21, 8, 0, 20, 18, 0, 0, 6, 12, 0, 22, 0, 18, 20, 30, 0, 0,
3811 0, 8, 0, 11, 12, 10, 36, 0, 30, 0, 0, 12, 0, 0, 0, 0, 44, 12, 24, 0,
3812 20, 0, 7, 14, 0, 18, 36, 0, 0, 46, 60, 0, 42, 0, 15, 24, 20, 0, 0, 33,
3813 0, 20, 0, 0, 18, 0, 60, 0, 0, 0, 0, 0, 40, 18, 0, 0, 12
3814
3815 Cross-sum rules for even numbers can be derived by leaving as many bits
3816 to the right alone as the divisor has zeros to the right.
3817 E.g. if x is an unsigned 32 bit number:
3818 (x mod 12) == (((x & 1023) + ((x >> 8) & ~3)) * 0x15555558 >> 2 * 3) >> 28
3819 */
3820
3821 rtx
3822 expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode,
3823 rtx op0, rtx op1, rtx target, int unsignedp)
3824 {
3825 enum machine_mode compute_mode;
3826 rtx tquotient;
3827 rtx quotient = 0, remainder = 0;
3828 rtx last;
3829 int size;
3830 rtx insn;
3831 optab optab1, optab2;
3832 int op1_is_constant, op1_is_pow2 = 0;
3833 int max_cost, extra_cost;
3834 static HOST_WIDE_INT last_div_const = 0;
3835 static HOST_WIDE_INT ext_op1;
3836 bool speed = optimize_insn_for_speed_p ();
3837
3838 op1_is_constant = CONST_INT_P (op1);
3839 if (op1_is_constant)
3840 {
3841 ext_op1 = INTVAL (op1);
3842 if (unsignedp)
3843 ext_op1 &= GET_MODE_MASK (mode);
3844 op1_is_pow2 = ((EXACT_POWER_OF_2_OR_ZERO_P (ext_op1)
3845 || (! unsignedp && EXACT_POWER_OF_2_OR_ZERO_P (-ext_op1))));
3846 }
3847
3848 /*
3849 This is the structure of expand_divmod:
3850
3851 First comes code to fix up the operands so we can perform the operations
3852 correctly and efficiently.
3853
3854 Second comes a switch statement with code specific for each rounding mode.
3855 For some special operands this code emits all RTL for the desired
3856 operation, for other cases, it generates only a quotient and stores it in
3857 QUOTIENT. The case for trunc division/remainder might leave quotient = 0,
3858 to indicate that it has not done anything.
3859
3860 Last comes code that finishes the operation. If QUOTIENT is set and
3861 REM_FLAG is set, the remainder is computed as OP0 - QUOTIENT * OP1. If
3862 QUOTIENT is not set, it is computed using trunc rounding.
3863
3864 We try to generate special code for division and remainder when OP1 is a
3865 constant. If |OP1| = 2**n we can use shifts and some other fast
3866 operations. For other values of OP1, we compute a carefully selected
3867 fixed-point approximation m = 1/OP1, and generate code that multiplies OP0
3868 by m.
3869
3870 In all cases but EXACT_DIV_EXPR, this multiplication requires the upper
3871 half of the product. Different strategies for generating the product are
3872 implemented in expmed_mult_highpart.
3873
3874 If what we actually want is the remainder, we generate that by another
3875 by-constant multiplication and a subtraction. */
3876
3877 /* We shouldn't be called with OP1 == const1_rtx, but some of the
3878 code below will malfunction if we are, so check here and handle
3879 the special case if so. */
3880 if (op1 == const1_rtx)
3881 return rem_flag ? const0_rtx : op0;
3882
3883 /* When dividing by -1, we could get an overflow.
3884 negv_optab can handle overflows. */
3885 if (! unsignedp && op1 == constm1_rtx)
3886 {
3887 if (rem_flag)
3888 return const0_rtx;
3889 return expand_unop (mode, flag_trapv && GET_MODE_CLASS(mode) == MODE_INT
3890 ? negv_optab : neg_optab, op0, target, 0);
3891 }
3892
3893 if (target
3894 /* Don't use the function value register as a target
3895 since we have to read it as well as write it,
3896 and function-inlining gets confused by this. */
3897 && ((REG_P (target) && REG_FUNCTION_VALUE_P (target))
3898 /* Don't clobber an operand while doing a multi-step calculation. */
3899 || ((rem_flag || op1_is_constant)
3900 && (reg_mentioned_p (target, op0)
3901 || (MEM_P (op0) && MEM_P (target))))
3902 || reg_mentioned_p (target, op1)
3903 || (MEM_P (op1) && MEM_P (target))))
3904 target = 0;
3905
3906 /* Get the mode in which to perform this computation. Normally it will
3907 be MODE, but sometimes we can't do the desired operation in MODE.
3908 If so, pick a wider mode in which we can do the operation. Convert
3909 to that mode at the start to avoid repeated conversions.
3910
3911 First see what operations we need. These depend on the expression
3912 we are evaluating. (We assume that divxx3 insns exist under the
3913 same conditions that modxx3 insns and that these insns don't normally
3914 fail. If these assumptions are not correct, we may generate less
3915 efficient code in some cases.)
3916
3917 Then see if we find a mode in which we can open-code that operation
3918 (either a division, modulus, or shift). Finally, check for the smallest
3919 mode for which we can do the operation with a library call. */
3920
3921 /* We might want to refine this now that we have division-by-constant
3922 optimization. Since expmed_mult_highpart tries so many variants, it is
3923 not straightforward to generalize this. Maybe we should make an array
3924 of possible modes in init_expmed? Save this for GCC 2.7. */
3925
3926 optab1 = ((op1_is_pow2 && op1 != const0_rtx)
3927 ? (unsignedp ? lshr_optab : ashr_optab)
3928 : (unsignedp ? udiv_optab : sdiv_optab));
3929 optab2 = ((op1_is_pow2 && op1 != const0_rtx)
3930 ? optab1
3931 : (unsignedp ? udivmod_optab : sdivmod_optab));
3932
3933 for (compute_mode = mode; compute_mode != VOIDmode;
3934 compute_mode = GET_MODE_WIDER_MODE (compute_mode))
3935 if (optab_handler (optab1, compute_mode) != CODE_FOR_nothing
3936 || optab_handler (optab2, compute_mode) != CODE_FOR_nothing)
3937 break;
3938
3939 if (compute_mode == VOIDmode)
3940 for (compute_mode = mode; compute_mode != VOIDmode;
3941 compute_mode = GET_MODE_WIDER_MODE (compute_mode))
3942 if (optab_libfunc (optab1, compute_mode)
3943 || optab_libfunc (optab2, compute_mode))
3944 break;
3945
3946 /* If we still couldn't find a mode, use MODE, but expand_binop will
3947 probably die. */
3948 if (compute_mode == VOIDmode)
3949 compute_mode = mode;
3950
3951 if (target && GET_MODE (target) == compute_mode)
3952 tquotient = target;
3953 else
3954 tquotient = gen_reg_rtx (compute_mode);
3955
3956 size = GET_MODE_BITSIZE (compute_mode);
3957 #if 0
3958 /* It should be possible to restrict the precision to GET_MODE_BITSIZE
3959 (mode), and thereby get better code when OP1 is a constant. Do that
3960 later. It will require going over all usages of SIZE below. */
3961 size = GET_MODE_BITSIZE (mode);
3962 #endif
3963
3964 /* Only deduct something for a REM if the last divide done was
3965 for a different constant. Then set the constant of the last
3966 divide. */
3967 max_cost = (unsignedp
3968 ? udiv_cost (speed, compute_mode)
3969 : sdiv_cost (speed, compute_mode));
3970 if (rem_flag && ! (last_div_const != 0 && op1_is_constant
3971 && INTVAL (op1) == last_div_const))
3972 max_cost -= (mul_cost (speed, compute_mode)
3973 + add_cost (speed, compute_mode));
3974
3975 last_div_const = ! rem_flag && op1_is_constant ? INTVAL (op1) : 0;
3976
3977 /* Now convert to the best mode to use. */
3978 if (compute_mode != mode)
3979 {
3980 op0 = convert_modes (compute_mode, mode, op0, unsignedp);
3981 op1 = convert_modes (compute_mode, mode, op1, unsignedp);
3982
3983 /* convert_modes may have placed op1 into a register, so we
3984 must recompute the following. */
3985 op1_is_constant = CONST_INT_P (op1);
3986 op1_is_pow2 = (op1_is_constant
3987 && ((EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
3988 || (! unsignedp
3989 && EXACT_POWER_OF_2_OR_ZERO_P (-INTVAL (op1)))))) ;
3990 }
3991
3992 /* If one of the operands is a volatile MEM, copy it into a register. */
3993
3994 if (MEM_P (op0) && MEM_VOLATILE_P (op0))
3995 op0 = force_reg (compute_mode, op0);
3996 if (MEM_P (op1) && MEM_VOLATILE_P (op1))
3997 op1 = force_reg (compute_mode, op1);
3998
3999 /* If we need the remainder or if OP1 is constant, we need to
4000 put OP0 in a register in case it has any queued subexpressions. */
4001 if (rem_flag || op1_is_constant)
4002 op0 = force_reg (compute_mode, op0);
4003
4004 last = get_last_insn ();
4005
4006 /* Promote floor rounding to trunc rounding for unsigned operations. */
4007 if (unsignedp)
4008 {
4009 if (code == FLOOR_DIV_EXPR)
4010 code = TRUNC_DIV_EXPR;
4011 if (code == FLOOR_MOD_EXPR)
4012 code = TRUNC_MOD_EXPR;
4013 if (code == EXACT_DIV_EXPR && op1_is_pow2)
4014 code = TRUNC_DIV_EXPR;
4015 }
4016
4017 if (op1 != const0_rtx)
4018 switch (code)
4019 {
4020 case TRUNC_MOD_EXPR:
4021 case TRUNC_DIV_EXPR:
4022 if (op1_is_constant)
4023 {
4024 if (unsignedp)
4025 {
4026 unsigned HOST_WIDE_INT mh, ml;
4027 int pre_shift, post_shift;
4028 int dummy;
4029 unsigned HOST_WIDE_INT d = (INTVAL (op1)
4030 & GET_MODE_MASK (compute_mode));
4031
4032 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4033 {
4034 pre_shift = floor_log2 (d);
4035 if (rem_flag)
4036 {
4037 remainder
4038 = expand_binop (compute_mode, and_optab, op0,
4039 GEN_INT (((HOST_WIDE_INT) 1 << pre_shift) - 1),
4040 remainder, 1,
4041 OPTAB_LIB_WIDEN);
4042 if (remainder)
4043 return gen_lowpart (mode, remainder);
4044 }
4045 quotient = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4046 pre_shift, tquotient, 1);
4047 }
4048 else if (size <= HOST_BITS_PER_WIDE_INT)
4049 {
4050 if (d >= ((unsigned HOST_WIDE_INT) 1 << (size - 1)))
4051 {
4052 /* Most significant bit of divisor is set; emit an scc
4053 insn. */
4054 quotient = emit_store_flag_force (tquotient, GEU, op0, op1,
4055 compute_mode, 1, 1);
4056 }
4057 else
4058 {
4059 /* Find a suitable multiplier and right shift count
4060 instead of multiplying with D. */
4061
4062 mh = choose_multiplier (d, size, size,
4063 &ml, &post_shift, &dummy);
4064
4065 /* If the suggested multiplier is more than SIZE bits,
4066 we can do better for even divisors, using an
4067 initial right shift. */
4068 if (mh != 0 && (d & 1) == 0)
4069 {
4070 pre_shift = floor_log2 (d & -d);
4071 mh = choose_multiplier (d >> pre_shift, size,
4072 size - pre_shift,
4073 &ml, &post_shift, &dummy);
4074 gcc_assert (!mh);
4075 }
4076 else
4077 pre_shift = 0;
4078
4079 if (mh != 0)
4080 {
4081 rtx t1, t2, t3, t4;
4082
4083 if (post_shift - 1 >= BITS_PER_WORD)
4084 goto fail1;
4085
4086 extra_cost
4087 = (shift_cost (speed, compute_mode, post_shift - 1)
4088 + shift_cost (speed, compute_mode, 1)
4089 + 2 * add_cost (speed, compute_mode));
4090 t1 = expmed_mult_highpart (compute_mode, op0,
4091 GEN_INT (ml),
4092 NULL_RTX, 1,
4093 max_cost - extra_cost);
4094 if (t1 == 0)
4095 goto fail1;
4096 t2 = force_operand (gen_rtx_MINUS (compute_mode,
4097 op0, t1),
4098 NULL_RTX);
4099 t3 = expand_shift (RSHIFT_EXPR, compute_mode,
4100 t2, 1, NULL_RTX, 1);
4101 t4 = force_operand (gen_rtx_PLUS (compute_mode,
4102 t1, t3),
4103 NULL_RTX);
4104 quotient = expand_shift
4105 (RSHIFT_EXPR, compute_mode, t4,
4106 post_shift - 1, tquotient, 1);
4107 }
4108 else
4109 {
4110 rtx t1, t2;
4111
4112 if (pre_shift >= BITS_PER_WORD
4113 || post_shift >= BITS_PER_WORD)
4114 goto fail1;
4115
4116 t1 = expand_shift
4117 (RSHIFT_EXPR, compute_mode, op0,
4118 pre_shift, NULL_RTX, 1);
4119 extra_cost
4120 = (shift_cost (speed, compute_mode, pre_shift)
4121 + shift_cost (speed, compute_mode, post_shift));
4122 t2 = expmed_mult_highpart (compute_mode, t1,
4123 GEN_INT (ml),
4124 NULL_RTX, 1,
4125 max_cost - extra_cost);
4126 if (t2 == 0)
4127 goto fail1;
4128 quotient = expand_shift
4129 (RSHIFT_EXPR, compute_mode, t2,
4130 post_shift, tquotient, 1);
4131 }
4132 }
4133 }
4134 else /* Too wide mode to use tricky code */
4135 break;
4136
4137 insn = get_last_insn ();
4138 if (insn != last)
4139 set_dst_reg_note (insn, REG_EQUAL,
4140 gen_rtx_UDIV (compute_mode, op0, op1),
4141 quotient);
4142 }
4143 else /* TRUNC_DIV, signed */
4144 {
4145 unsigned HOST_WIDE_INT ml;
4146 int lgup, post_shift;
4147 rtx mlr;
4148 HOST_WIDE_INT d = INTVAL (op1);
4149 unsigned HOST_WIDE_INT abs_d;
4150
4151 /* Since d might be INT_MIN, we have to cast to
4152 unsigned HOST_WIDE_INT before negating to avoid
4153 undefined signed overflow. */
4154 abs_d = (d >= 0
4155 ? (unsigned HOST_WIDE_INT) d
4156 : - (unsigned HOST_WIDE_INT) d);
4157
4158 /* n rem d = n rem -d */
4159 if (rem_flag && d < 0)
4160 {
4161 d = abs_d;
4162 op1 = gen_int_mode (abs_d, compute_mode);
4163 }
4164
4165 if (d == 1)
4166 quotient = op0;
4167 else if (d == -1)
4168 quotient = expand_unop (compute_mode, neg_optab, op0,
4169 tquotient, 0);
4170 else if (HOST_BITS_PER_WIDE_INT >= size
4171 && abs_d == (unsigned HOST_WIDE_INT) 1 << (size - 1))
4172 {
4173 /* This case is not handled correctly below. */
4174 quotient = emit_store_flag (tquotient, EQ, op0, op1,
4175 compute_mode, 1, 1);
4176 if (quotient == 0)
4177 goto fail1;
4178 }
4179 else if (EXACT_POWER_OF_2_OR_ZERO_P (d)
4180 && (rem_flag
4181 ? smod_pow2_cheap (speed, compute_mode)
4182 : sdiv_pow2_cheap (speed, compute_mode))
4183 /* We assume that cheap metric is true if the
4184 optab has an expander for this mode. */
4185 && ((optab_handler ((rem_flag ? smod_optab
4186 : sdiv_optab),
4187 compute_mode)
4188 != CODE_FOR_nothing)
4189 || (optab_handler (sdivmod_optab,
4190 compute_mode)
4191 != CODE_FOR_nothing)))
4192 ;
4193 else if (EXACT_POWER_OF_2_OR_ZERO_P (abs_d))
4194 {
4195 if (rem_flag)
4196 {
4197 remainder = expand_smod_pow2 (compute_mode, op0, d);
4198 if (remainder)
4199 return gen_lowpart (mode, remainder);
4200 }
4201
4202 if (sdiv_pow2_cheap (speed, compute_mode)
4203 && ((optab_handler (sdiv_optab, compute_mode)
4204 != CODE_FOR_nothing)
4205 || (optab_handler (sdivmod_optab, compute_mode)
4206 != CODE_FOR_nothing)))
4207 quotient = expand_divmod (0, TRUNC_DIV_EXPR,
4208 compute_mode, op0,
4209 gen_int_mode (abs_d,
4210 compute_mode),
4211 NULL_RTX, 0);
4212 else
4213 quotient = expand_sdiv_pow2 (compute_mode, op0, abs_d);
4214
4215 /* We have computed OP0 / abs(OP1). If OP1 is negative,
4216 negate the quotient. */
4217 if (d < 0)
4218 {
4219 insn = get_last_insn ();
4220 if (insn != last
4221 && abs_d < ((unsigned HOST_WIDE_INT) 1
4222 << (HOST_BITS_PER_WIDE_INT - 1)))
4223 set_dst_reg_note (insn, REG_EQUAL,
4224 gen_rtx_DIV (compute_mode, op0,
4225 gen_int_mode
4226 (abs_d,
4227 compute_mode)),
4228 quotient);
4229
4230 quotient = expand_unop (compute_mode, neg_optab,
4231 quotient, quotient, 0);
4232 }
4233 }
4234 else if (size <= HOST_BITS_PER_WIDE_INT)
4235 {
4236 choose_multiplier (abs_d, size, size - 1,
4237 &ml, &post_shift, &lgup);
4238 if (ml < (unsigned HOST_WIDE_INT) 1 << (size - 1))
4239 {
4240 rtx t1, t2, t3;
4241
4242 if (post_shift >= BITS_PER_WORD
4243 || size - 1 >= BITS_PER_WORD)
4244 goto fail1;
4245
4246 extra_cost = (shift_cost (speed, compute_mode, post_shift)
4247 + shift_cost (speed, compute_mode, size - 1)
4248 + add_cost (speed, compute_mode));
4249 t1 = expmed_mult_highpart (compute_mode, op0,
4250 GEN_INT (ml), NULL_RTX, 0,
4251 max_cost - extra_cost);
4252 if (t1 == 0)
4253 goto fail1;
4254 t2 = expand_shift
4255 (RSHIFT_EXPR, compute_mode, t1,
4256 post_shift, NULL_RTX, 0);
4257 t3 = expand_shift
4258 (RSHIFT_EXPR, compute_mode, op0,
4259 size - 1, NULL_RTX, 0);
4260 if (d < 0)
4261 quotient
4262 = force_operand (gen_rtx_MINUS (compute_mode,
4263 t3, t2),
4264 tquotient);
4265 else
4266 quotient
4267 = force_operand (gen_rtx_MINUS (compute_mode,
4268 t2, t3),
4269 tquotient);
4270 }
4271 else
4272 {
4273 rtx t1, t2, t3, t4;
4274
4275 if (post_shift >= BITS_PER_WORD
4276 || size - 1 >= BITS_PER_WORD)
4277 goto fail1;
4278
4279 ml |= (~(unsigned HOST_WIDE_INT) 0) << (size - 1);
4280 mlr = gen_int_mode (ml, compute_mode);
4281 extra_cost = (shift_cost (speed, compute_mode, post_shift)
4282 + shift_cost (speed, compute_mode, size - 1)
4283 + 2 * add_cost (speed, compute_mode));
4284 t1 = expmed_mult_highpart (compute_mode, op0, mlr,
4285 NULL_RTX, 0,
4286 max_cost - extra_cost);
4287 if (t1 == 0)
4288 goto fail1;
4289 t2 = force_operand (gen_rtx_PLUS (compute_mode,
4290 t1, op0),
4291 NULL_RTX);
4292 t3 = expand_shift
4293 (RSHIFT_EXPR, compute_mode, t2,
4294 post_shift, NULL_RTX, 0);
4295 t4 = expand_shift
4296 (RSHIFT_EXPR, compute_mode, op0,
4297 size - 1, NULL_RTX, 0);
4298 if (d < 0)
4299 quotient
4300 = force_operand (gen_rtx_MINUS (compute_mode,
4301 t4, t3),
4302 tquotient);
4303 else
4304 quotient
4305 = force_operand (gen_rtx_MINUS (compute_mode,
4306 t3, t4),
4307 tquotient);
4308 }
4309 }
4310 else /* Too wide mode to use tricky code */
4311 break;
4312
4313 insn = get_last_insn ();
4314 if (insn != last)
4315 set_dst_reg_note (insn, REG_EQUAL,
4316 gen_rtx_DIV (compute_mode, op0, op1),
4317 quotient);
4318 }
4319 break;
4320 }
4321 fail1:
4322 delete_insns_since (last);
4323 break;
4324
4325 case FLOOR_DIV_EXPR:
4326 case FLOOR_MOD_EXPR:
4327 /* We will come here only for signed operations. */
4328 if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4329 {
4330 unsigned HOST_WIDE_INT mh, ml;
4331 int pre_shift, lgup, post_shift;
4332 HOST_WIDE_INT d = INTVAL (op1);
4333
4334 if (d > 0)
4335 {
4336 /* We could just as easily deal with negative constants here,
4337 but it does not seem worth the trouble for GCC 2.6. */
4338 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4339 {
4340 pre_shift = floor_log2 (d);
4341 if (rem_flag)
4342 {
4343 remainder = expand_binop (compute_mode, and_optab, op0,
4344 GEN_INT (((HOST_WIDE_INT) 1 << pre_shift) - 1),
4345 remainder, 0, OPTAB_LIB_WIDEN);
4346 if (remainder)
4347 return gen_lowpart (mode, remainder);
4348 }
4349 quotient = expand_shift
4350 (RSHIFT_EXPR, compute_mode, op0,
4351 pre_shift, tquotient, 0);
4352 }
4353 else
4354 {
4355 rtx t1, t2, t3, t4;
4356
4357 mh = choose_multiplier (d, size, size - 1,
4358 &ml, &post_shift, &lgup);
4359 gcc_assert (!mh);
4360
4361 if (post_shift < BITS_PER_WORD
4362 && size - 1 < BITS_PER_WORD)
4363 {
4364 t1 = expand_shift
4365 (RSHIFT_EXPR, compute_mode, op0,
4366 size - 1, NULL_RTX, 0);
4367 t2 = expand_binop (compute_mode, xor_optab, op0, t1,
4368 NULL_RTX, 0, OPTAB_WIDEN);
4369 extra_cost = (shift_cost (speed, compute_mode, post_shift)
4370 + shift_cost (speed, compute_mode, size - 1)
4371 + 2 * add_cost (speed, compute_mode));
4372 t3 = expmed_mult_highpart (compute_mode, t2,
4373 GEN_INT (ml), NULL_RTX, 1,
4374 max_cost - extra_cost);
4375 if (t3 != 0)
4376 {
4377 t4 = expand_shift
4378 (RSHIFT_EXPR, compute_mode, t3,
4379 post_shift, NULL_RTX, 1);
4380 quotient = expand_binop (compute_mode, xor_optab,
4381 t4, t1, tquotient, 0,
4382 OPTAB_WIDEN);
4383 }
4384 }
4385 }
4386 }
4387 else
4388 {
4389 rtx nsign, t1, t2, t3, t4;
4390 t1 = force_operand (gen_rtx_PLUS (compute_mode,
4391 op0, constm1_rtx), NULL_RTX);
4392 t2 = expand_binop (compute_mode, ior_optab, op0, t1, NULL_RTX,
4393 0, OPTAB_WIDEN);
4394 nsign = expand_shift
4395 (RSHIFT_EXPR, compute_mode, t2,
4396 size - 1, NULL_RTX, 0);
4397 t3 = force_operand (gen_rtx_MINUS (compute_mode, t1, nsign),
4398 NULL_RTX);
4399 t4 = expand_divmod (0, TRUNC_DIV_EXPR, compute_mode, t3, op1,
4400 NULL_RTX, 0);
4401 if (t4)
4402 {
4403 rtx t5;
4404 t5 = expand_unop (compute_mode, one_cmpl_optab, nsign,
4405 NULL_RTX, 0);
4406 quotient = force_operand (gen_rtx_PLUS (compute_mode,
4407 t4, t5),
4408 tquotient);
4409 }
4410 }
4411 }
4412
4413 if (quotient != 0)
4414 break;
4415 delete_insns_since (last);
4416
4417 /* Try using an instruction that produces both the quotient and
4418 remainder, using truncation. We can easily compensate the quotient
4419 or remainder to get floor rounding, once we have the remainder.
4420 Notice that we compute also the final remainder value here,
4421 and return the result right away. */
4422 if (target == 0 || GET_MODE (target) != compute_mode)
4423 target = gen_reg_rtx (compute_mode);
4424
4425 if (rem_flag)
4426 {
4427 remainder
4428 = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4429 quotient = gen_reg_rtx (compute_mode);
4430 }
4431 else
4432 {
4433 quotient
4434 = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4435 remainder = gen_reg_rtx (compute_mode);
4436 }
4437
4438 if (expand_twoval_binop (sdivmod_optab, op0, op1,
4439 quotient, remainder, 0))
4440 {
4441 /* This could be computed with a branch-less sequence.
4442 Save that for later. */
4443 rtx tem;
4444 rtx label = gen_label_rtx ();
4445 do_cmp_and_jump (remainder, const0_rtx, EQ, compute_mode, label);
4446 tem = expand_binop (compute_mode, xor_optab, op0, op1,
4447 NULL_RTX, 0, OPTAB_WIDEN);
4448 do_cmp_and_jump (tem, const0_rtx, GE, compute_mode, label);
4449 expand_dec (quotient, const1_rtx);
4450 expand_inc (remainder, op1);
4451 emit_label (label);
4452 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4453 }
4454
4455 /* No luck with division elimination or divmod. Have to do it
4456 by conditionally adjusting op0 *and* the result. */
4457 {
4458 rtx label1, label2, label3, label4, label5;
4459 rtx adjusted_op0;
4460 rtx tem;
4461
4462 quotient = gen_reg_rtx (compute_mode);
4463 adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4464 label1 = gen_label_rtx ();
4465 label2 = gen_label_rtx ();
4466 label3 = gen_label_rtx ();
4467 label4 = gen_label_rtx ();
4468 label5 = gen_label_rtx ();
4469 do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4470 do_cmp_and_jump (adjusted_op0, const0_rtx, LT, compute_mode, label1);
4471 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4472 quotient, 0, OPTAB_LIB_WIDEN);
4473 if (tem != quotient)
4474 emit_move_insn (quotient, tem);
4475 emit_jump_insn (gen_jump (label5));
4476 emit_barrier ();
4477 emit_label (label1);
4478 expand_inc (adjusted_op0, const1_rtx);
4479 emit_jump_insn (gen_jump (label4));
4480 emit_barrier ();
4481 emit_label (label2);
4482 do_cmp_and_jump (adjusted_op0, const0_rtx, GT, compute_mode, label3);
4483 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4484 quotient, 0, OPTAB_LIB_WIDEN);
4485 if (tem != quotient)
4486 emit_move_insn (quotient, tem);
4487 emit_jump_insn (gen_jump (label5));
4488 emit_barrier ();
4489 emit_label (label3);
4490 expand_dec (adjusted_op0, const1_rtx);
4491 emit_label (label4);
4492 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4493 quotient, 0, OPTAB_LIB_WIDEN);
4494 if (tem != quotient)
4495 emit_move_insn (quotient, tem);
4496 expand_dec (quotient, const1_rtx);
4497 emit_label (label5);
4498 }
4499 break;
4500
4501 case CEIL_DIV_EXPR:
4502 case CEIL_MOD_EXPR:
4503 if (unsignedp)
4504 {
4505 if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1)))
4506 {
4507 rtx t1, t2, t3;
4508 unsigned HOST_WIDE_INT d = INTVAL (op1);
4509 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4510 floor_log2 (d), tquotient, 1);
4511 t2 = expand_binop (compute_mode, and_optab, op0,
4512 GEN_INT (d - 1),
4513 NULL_RTX, 1, OPTAB_LIB_WIDEN);
4514 t3 = gen_reg_rtx (compute_mode);
4515 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4516 compute_mode, 1, 1);
4517 if (t3 == 0)
4518 {
4519 rtx lab;
4520 lab = gen_label_rtx ();
4521 do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4522 expand_inc (t1, const1_rtx);
4523 emit_label (lab);
4524 quotient = t1;
4525 }
4526 else
4527 quotient = force_operand (gen_rtx_PLUS (compute_mode,
4528 t1, t3),
4529 tquotient);
4530 break;
4531 }
4532
4533 /* Try using an instruction that produces both the quotient and
4534 remainder, using truncation. We can easily compensate the
4535 quotient or remainder to get ceiling rounding, once we have the
4536 remainder. Notice that we compute also the final remainder
4537 value here, and return the result right away. */
4538 if (target == 0 || GET_MODE (target) != compute_mode)
4539 target = gen_reg_rtx (compute_mode);
4540
4541 if (rem_flag)
4542 {
4543 remainder = (REG_P (target)
4544 ? target : gen_reg_rtx (compute_mode));
4545 quotient = gen_reg_rtx (compute_mode);
4546 }
4547 else
4548 {
4549 quotient = (REG_P (target)
4550 ? target : gen_reg_rtx (compute_mode));
4551 remainder = gen_reg_rtx (compute_mode);
4552 }
4553
4554 if (expand_twoval_binop (udivmod_optab, op0, op1, quotient,
4555 remainder, 1))
4556 {
4557 /* This could be computed with a branch-less sequence.
4558 Save that for later. */
4559 rtx label = gen_label_rtx ();
4560 do_cmp_and_jump (remainder, const0_rtx, EQ,
4561 compute_mode, label);
4562 expand_inc (quotient, const1_rtx);
4563 expand_dec (remainder, op1);
4564 emit_label (label);
4565 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4566 }
4567
4568 /* No luck with division elimination or divmod. Have to do it
4569 by conditionally adjusting op0 *and* the result. */
4570 {
4571 rtx label1, label2;
4572 rtx adjusted_op0, tem;
4573
4574 quotient = gen_reg_rtx (compute_mode);
4575 adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4576 label1 = gen_label_rtx ();
4577 label2 = gen_label_rtx ();
4578 do_cmp_and_jump (adjusted_op0, const0_rtx, NE,
4579 compute_mode, label1);
4580 emit_move_insn (quotient, const0_rtx);
4581 emit_jump_insn (gen_jump (label2));
4582 emit_barrier ();
4583 emit_label (label1);
4584 expand_dec (adjusted_op0, const1_rtx);
4585 tem = expand_binop (compute_mode, udiv_optab, adjusted_op0, op1,
4586 quotient, 1, OPTAB_LIB_WIDEN);
4587 if (tem != quotient)
4588 emit_move_insn (quotient, tem);
4589 expand_inc (quotient, const1_rtx);
4590 emit_label (label2);
4591 }
4592 }
4593 else /* signed */
4594 {
4595 if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4596 && INTVAL (op1) >= 0)
4597 {
4598 /* This is extremely similar to the code for the unsigned case
4599 above. For 2.7 we should merge these variants, but for
4600 2.6.1 I don't want to touch the code for unsigned since that
4601 get used in C. The signed case will only be used by other
4602 languages (Ada). */
4603
4604 rtx t1, t2, t3;
4605 unsigned HOST_WIDE_INT d = INTVAL (op1);
4606 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4607 floor_log2 (d), tquotient, 0);
4608 t2 = expand_binop (compute_mode, and_optab, op0,
4609 GEN_INT (d - 1),
4610 NULL_RTX, 1, OPTAB_LIB_WIDEN);
4611 t3 = gen_reg_rtx (compute_mode);
4612 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4613 compute_mode, 1, 1);
4614 if (t3 == 0)
4615 {
4616 rtx lab;
4617 lab = gen_label_rtx ();
4618 do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4619 expand_inc (t1, const1_rtx);
4620 emit_label (lab);
4621 quotient = t1;
4622 }
4623 else
4624 quotient = force_operand (gen_rtx_PLUS (compute_mode,
4625 t1, t3),
4626 tquotient);
4627 break;
4628 }
4629
4630 /* Try using an instruction that produces both the quotient and
4631 remainder, using truncation. We can easily compensate the
4632 quotient or remainder to get ceiling rounding, once we have the
4633 remainder. Notice that we compute also the final remainder
4634 value here, and return the result right away. */
4635 if (target == 0 || GET_MODE (target) != compute_mode)
4636 target = gen_reg_rtx (compute_mode);
4637 if (rem_flag)
4638 {
4639 remainder= (REG_P (target)
4640 ? target : gen_reg_rtx (compute_mode));
4641 quotient = gen_reg_rtx (compute_mode);
4642 }
4643 else
4644 {
4645 quotient = (REG_P (target)
4646 ? target : gen_reg_rtx (compute_mode));
4647 remainder = gen_reg_rtx (compute_mode);
4648 }
4649
4650 if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient,
4651 remainder, 0))
4652 {
4653 /* This could be computed with a branch-less sequence.
4654 Save that for later. */
4655 rtx tem;
4656 rtx label = gen_label_rtx ();
4657 do_cmp_and_jump (remainder, const0_rtx, EQ,
4658 compute_mode, label);
4659 tem = expand_binop (compute_mode, xor_optab, op0, op1,
4660 NULL_RTX, 0, OPTAB_WIDEN);
4661 do_cmp_and_jump (tem, const0_rtx, LT, compute_mode, label);
4662 expand_inc (quotient, const1_rtx);
4663 expand_dec (remainder, op1);
4664 emit_label (label);
4665 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4666 }
4667
4668 /* No luck with division elimination or divmod. Have to do it
4669 by conditionally adjusting op0 *and* the result. */
4670 {
4671 rtx label1, label2, label3, label4, label5;
4672 rtx adjusted_op0;
4673 rtx tem;
4674
4675 quotient = gen_reg_rtx (compute_mode);
4676 adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4677 label1 = gen_label_rtx ();
4678 label2 = gen_label_rtx ();
4679 label3 = gen_label_rtx ();
4680 label4 = gen_label_rtx ();
4681 label5 = gen_label_rtx ();
4682 do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4683 do_cmp_and_jump (adjusted_op0, const0_rtx, GT,
4684 compute_mode, label1);
4685 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4686 quotient, 0, OPTAB_LIB_WIDEN);
4687 if (tem != quotient)
4688 emit_move_insn (quotient, tem);
4689 emit_jump_insn (gen_jump (label5));
4690 emit_barrier ();
4691 emit_label (label1);
4692 expand_dec (adjusted_op0, const1_rtx);
4693 emit_jump_insn (gen_jump (label4));
4694 emit_barrier ();
4695 emit_label (label2);
4696 do_cmp_and_jump (adjusted_op0, const0_rtx, LT,
4697 compute_mode, label3);
4698 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4699 quotient, 0, OPTAB_LIB_WIDEN);
4700 if (tem != quotient)
4701 emit_move_insn (quotient, tem);
4702 emit_jump_insn (gen_jump (label5));
4703 emit_barrier ();
4704 emit_label (label3);
4705 expand_inc (adjusted_op0, const1_rtx);
4706 emit_label (label4);
4707 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4708 quotient, 0, OPTAB_LIB_WIDEN);
4709 if (tem != quotient)
4710 emit_move_insn (quotient, tem);
4711 expand_inc (quotient, const1_rtx);
4712 emit_label (label5);
4713 }
4714 }
4715 break;
4716
4717 case EXACT_DIV_EXPR:
4718 if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4719 {
4720 HOST_WIDE_INT d = INTVAL (op1);
4721 unsigned HOST_WIDE_INT ml;
4722 int pre_shift;
4723 rtx t1;
4724
4725 pre_shift = floor_log2 (d & -d);
4726 ml = invert_mod2n (d >> pre_shift, size);
4727 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4728 pre_shift, NULL_RTX, unsignedp);
4729 quotient = expand_mult (compute_mode, t1,
4730 gen_int_mode (ml, compute_mode),
4731 NULL_RTX, 1);
4732
4733 insn = get_last_insn ();
4734 set_dst_reg_note (insn, REG_EQUAL,
4735 gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
4736 compute_mode, op0, op1),
4737 quotient);
4738 }
4739 break;
4740
4741 case ROUND_DIV_EXPR:
4742 case ROUND_MOD_EXPR:
4743 if (unsignedp)
4744 {
4745 rtx tem;
4746 rtx label;
4747 label = gen_label_rtx ();
4748 quotient = gen_reg_rtx (compute_mode);
4749 remainder = gen_reg_rtx (compute_mode);
4750 if (expand_twoval_binop (udivmod_optab, op0, op1, quotient, remainder, 1) == 0)
4751 {
4752 rtx tem;
4753 quotient = expand_binop (compute_mode, udiv_optab, op0, op1,
4754 quotient, 1, OPTAB_LIB_WIDEN);
4755 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 1);
4756 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4757 remainder, 1, OPTAB_LIB_WIDEN);
4758 }
4759 tem = plus_constant (compute_mode, op1, -1);
4760 tem = expand_shift (RSHIFT_EXPR, compute_mode, tem, 1, NULL_RTX, 1);
4761 do_cmp_and_jump (remainder, tem, LEU, compute_mode, label);
4762 expand_inc (quotient, const1_rtx);
4763 expand_dec (remainder, op1);
4764 emit_label (label);
4765 }
4766 else
4767 {
4768 rtx abs_rem, abs_op1, tem, mask;
4769 rtx label;
4770 label = gen_label_rtx ();
4771 quotient = gen_reg_rtx (compute_mode);
4772 remainder = gen_reg_rtx (compute_mode);
4773 if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient, remainder, 0) == 0)
4774 {
4775 rtx tem;
4776 quotient = expand_binop (compute_mode, sdiv_optab, op0, op1,
4777 quotient, 0, OPTAB_LIB_WIDEN);
4778 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 0);
4779 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4780 remainder, 0, OPTAB_LIB_WIDEN);
4781 }
4782 abs_rem = expand_abs (compute_mode, remainder, NULL_RTX, 1, 0);
4783 abs_op1 = expand_abs (compute_mode, op1, NULL_RTX, 1, 0);
4784 tem = expand_shift (LSHIFT_EXPR, compute_mode, abs_rem,
4785 1, NULL_RTX, 1);
4786 do_cmp_and_jump (tem, abs_op1, LTU, compute_mode, label);
4787 tem = expand_binop (compute_mode, xor_optab, op0, op1,
4788 NULL_RTX, 0, OPTAB_WIDEN);
4789 mask = expand_shift (RSHIFT_EXPR, compute_mode, tem,
4790 size - 1, NULL_RTX, 0);
4791 tem = expand_binop (compute_mode, xor_optab, mask, const1_rtx,
4792 NULL_RTX, 0, OPTAB_WIDEN);
4793 tem = expand_binop (compute_mode, sub_optab, tem, mask,
4794 NULL_RTX, 0, OPTAB_WIDEN);
4795 expand_inc (quotient, tem);
4796 tem = expand_binop (compute_mode, xor_optab, mask, op1,
4797 NULL_RTX, 0, OPTAB_WIDEN);
4798 tem = expand_binop (compute_mode, sub_optab, tem, mask,
4799 NULL_RTX, 0, OPTAB_WIDEN);
4800 expand_dec (remainder, tem);
4801 emit_label (label);
4802 }
4803 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4804
4805 default:
4806 gcc_unreachable ();
4807 }
4808
4809 if (quotient == 0)
4810 {
4811 if (target && GET_MODE (target) != compute_mode)
4812 target = 0;
4813
4814 if (rem_flag)
4815 {
4816 /* Try to produce the remainder without producing the quotient.
4817 If we seem to have a divmod pattern that does not require widening,
4818 don't try widening here. We should really have a WIDEN argument
4819 to expand_twoval_binop, since what we'd really like to do here is
4820 1) try a mod insn in compute_mode
4821 2) try a divmod insn in compute_mode
4822 3) try a div insn in compute_mode and multiply-subtract to get
4823 remainder
4824 4) try the same things with widening allowed. */
4825 remainder
4826 = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4827 op0, op1, target,
4828 unsignedp,
4829 ((optab_handler (optab2, compute_mode)
4830 != CODE_FOR_nothing)
4831 ? OPTAB_DIRECT : OPTAB_WIDEN));
4832 if (remainder == 0)
4833 {
4834 /* No luck there. Can we do remainder and divide at once
4835 without a library call? */
4836 remainder = gen_reg_rtx (compute_mode);
4837 if (! expand_twoval_binop ((unsignedp
4838 ? udivmod_optab
4839 : sdivmod_optab),
4840 op0, op1,
4841 NULL_RTX, remainder, unsignedp))
4842 remainder = 0;
4843 }
4844
4845 if (remainder)
4846 return gen_lowpart (mode, remainder);
4847 }
4848
4849 /* Produce the quotient. Try a quotient insn, but not a library call.
4850 If we have a divmod in this mode, use it in preference to widening
4851 the div (for this test we assume it will not fail). Note that optab2
4852 is set to the one of the two optabs that the call below will use. */
4853 quotient
4854 = sign_expand_binop (compute_mode, udiv_optab, sdiv_optab,
4855 op0, op1, rem_flag ? NULL_RTX : target,
4856 unsignedp,
4857 ((optab_handler (optab2, compute_mode)
4858 != CODE_FOR_nothing)
4859 ? OPTAB_DIRECT : OPTAB_WIDEN));
4860
4861 if (quotient == 0)
4862 {
4863 /* No luck there. Try a quotient-and-remainder insn,
4864 keeping the quotient alone. */
4865 quotient = gen_reg_rtx (compute_mode);
4866 if (! expand_twoval_binop (unsignedp ? udivmod_optab : sdivmod_optab,
4867 op0, op1,
4868 quotient, NULL_RTX, unsignedp))
4869 {
4870 quotient = 0;
4871 if (! rem_flag)
4872 /* Still no luck. If we are not computing the remainder,
4873 use a library call for the quotient. */
4874 quotient = sign_expand_binop (compute_mode,
4875 udiv_optab, sdiv_optab,
4876 op0, op1, target,
4877 unsignedp, OPTAB_LIB_WIDEN);
4878 }
4879 }
4880 }
4881
4882 if (rem_flag)
4883 {
4884 if (target && GET_MODE (target) != compute_mode)
4885 target = 0;
4886
4887 if (quotient == 0)
4888 {
4889 /* No divide instruction either. Use library for remainder. */
4890 remainder = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4891 op0, op1, target,
4892 unsignedp, OPTAB_LIB_WIDEN);
4893 /* No remainder function. Try a quotient-and-remainder
4894 function, keeping the remainder. */
4895 if (!remainder)
4896 {
4897 remainder = gen_reg_rtx (compute_mode);
4898 if (!expand_twoval_binop_libfunc
4899 (unsignedp ? udivmod_optab : sdivmod_optab,
4900 op0, op1,
4901 NULL_RTX, remainder,
4902 unsignedp ? UMOD : MOD))
4903 remainder = NULL_RTX;
4904 }
4905 }
4906 else
4907 {
4908 /* We divided. Now finish doing X - Y * (X / Y). */
4909 remainder = expand_mult (compute_mode, quotient, op1,
4910 NULL_RTX, unsignedp);
4911 remainder = expand_binop (compute_mode, sub_optab, op0,
4912 remainder, target, unsignedp,
4913 OPTAB_LIB_WIDEN);
4914 }
4915 }
4916
4917 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4918 }
4919 \f
4920 /* Return a tree node with data type TYPE, describing the value of X.
4921 Usually this is an VAR_DECL, if there is no obvious better choice.
4922 X may be an expression, however we only support those expressions
4923 generated by loop.c. */
4924
4925 tree
4926 make_tree (tree type, rtx x)
4927 {
4928 tree t;
4929
4930 switch (GET_CODE (x))
4931 {
4932 case CONST_INT:
4933 {
4934 HOST_WIDE_INT hi = 0;
4935
4936 if (INTVAL (x) < 0
4937 && !(TYPE_UNSIGNED (type)
4938 && (GET_MODE_BITSIZE (TYPE_MODE (type))
4939 < HOST_BITS_PER_WIDE_INT)))
4940 hi = -1;
4941
4942 t = build_int_cst_wide (type, INTVAL (x), hi);
4943
4944 return t;
4945 }
4946
4947 case CONST_DOUBLE:
4948 if (GET_MODE (x) == VOIDmode)
4949 t = build_int_cst_wide (type,
4950 CONST_DOUBLE_LOW (x), CONST_DOUBLE_HIGH (x));
4951 else
4952 {
4953 REAL_VALUE_TYPE d;
4954
4955 REAL_VALUE_FROM_CONST_DOUBLE (d, x);
4956 t = build_real (type, d);
4957 }
4958
4959 return t;
4960
4961 case CONST_VECTOR:
4962 {
4963 int units = CONST_VECTOR_NUNITS (x);
4964 tree itype = TREE_TYPE (type);
4965 tree *elts;
4966 int i;
4967
4968 /* Build a tree with vector elements. */
4969 elts = XALLOCAVEC (tree, units);
4970 for (i = units - 1; i >= 0; --i)
4971 {
4972 rtx elt = CONST_VECTOR_ELT (x, i);
4973 elts[i] = make_tree (itype, elt);
4974 }
4975
4976 return build_vector (type, elts);
4977 }
4978
4979 case PLUS:
4980 return fold_build2 (PLUS_EXPR, type, make_tree (type, XEXP (x, 0)),
4981 make_tree (type, XEXP (x, 1)));
4982
4983 case MINUS:
4984 return fold_build2 (MINUS_EXPR, type, make_tree (type, XEXP (x, 0)),
4985 make_tree (type, XEXP (x, 1)));
4986
4987 case NEG:
4988 return fold_build1 (NEGATE_EXPR, type, make_tree (type, XEXP (x, 0)));
4989
4990 case MULT:
4991 return fold_build2 (MULT_EXPR, type, make_tree (type, XEXP (x, 0)),
4992 make_tree (type, XEXP (x, 1)));
4993
4994 case ASHIFT:
4995 return fold_build2 (LSHIFT_EXPR, type, make_tree (type, XEXP (x, 0)),
4996 make_tree (type, XEXP (x, 1)));
4997
4998 case LSHIFTRT:
4999 t = unsigned_type_for (type);
5000 return fold_convert (type, build2 (RSHIFT_EXPR, t,
5001 make_tree (t, XEXP (x, 0)),
5002 make_tree (type, XEXP (x, 1))));
5003
5004 case ASHIFTRT:
5005 t = signed_type_for (type);
5006 return fold_convert (type, build2 (RSHIFT_EXPR, t,
5007 make_tree (t, XEXP (x, 0)),
5008 make_tree (type, XEXP (x, 1))));
5009
5010 case DIV:
5011 if (TREE_CODE (type) != REAL_TYPE)
5012 t = signed_type_for (type);
5013 else
5014 t = type;
5015
5016 return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5017 make_tree (t, XEXP (x, 0)),
5018 make_tree (t, XEXP (x, 1))));
5019 case UDIV:
5020 t = unsigned_type_for (type);
5021 return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5022 make_tree (t, XEXP (x, 0)),
5023 make_tree (t, XEXP (x, 1))));
5024
5025 case SIGN_EXTEND:
5026 case ZERO_EXTEND:
5027 t = lang_hooks.types.type_for_mode (GET_MODE (XEXP (x, 0)),
5028 GET_CODE (x) == ZERO_EXTEND);
5029 return fold_convert (type, make_tree (t, XEXP (x, 0)));
5030
5031 case CONST:
5032 return make_tree (type, XEXP (x, 0));
5033
5034 case SYMBOL_REF:
5035 t = SYMBOL_REF_DECL (x);
5036 if (t)
5037 return fold_convert (type, build_fold_addr_expr (t));
5038 /* else fall through. */
5039
5040 default:
5041 t = build_decl (RTL_LOCATION (x), VAR_DECL, NULL_TREE, type);
5042
5043 /* If TYPE is a POINTER_TYPE, we might need to convert X from
5044 address mode to pointer mode. */
5045 if (POINTER_TYPE_P (type))
5046 x = convert_memory_address_addr_space
5047 (TYPE_MODE (type), x, TYPE_ADDR_SPACE (TREE_TYPE (type)));
5048
5049 /* Note that we do *not* use SET_DECL_RTL here, because we do not
5050 want set_decl_rtl to go adjusting REG_ATTRS for this temporary. */
5051 t->decl_with_rtl.rtl = x;
5052
5053 return t;
5054 }
5055 }
5056 \f
5057 /* Compute the logical-and of OP0 and OP1, storing it in TARGET
5058 and returning TARGET.
5059
5060 If TARGET is 0, a pseudo-register or constant is returned. */
5061
5062 rtx
5063 expand_and (enum machine_mode mode, rtx op0, rtx op1, rtx target)
5064 {
5065 rtx tem = 0;
5066
5067 if (GET_MODE (op0) == VOIDmode && GET_MODE (op1) == VOIDmode)
5068 tem = simplify_binary_operation (AND, mode, op0, op1);
5069 if (tem == 0)
5070 tem = expand_binop (mode, and_optab, op0, op1, target, 0, OPTAB_LIB_WIDEN);
5071
5072 if (target == 0)
5073 target = tem;
5074 else if (tem != target)
5075 emit_move_insn (target, tem);
5076 return target;
5077 }
5078
5079 /* Helper function for emit_store_flag. */
5080 static rtx
5081 emit_cstore (rtx target, enum insn_code icode, enum rtx_code code,
5082 enum machine_mode mode, enum machine_mode compare_mode,
5083 int unsignedp, rtx x, rtx y, int normalizep,
5084 enum machine_mode target_mode)
5085 {
5086 struct expand_operand ops[4];
5087 rtx op0, last, comparison, subtarget;
5088 enum machine_mode result_mode = insn_data[(int) icode].operand[0].mode;
5089
5090 last = get_last_insn ();
5091 x = prepare_operand (icode, x, 2, mode, compare_mode, unsignedp);
5092 y = prepare_operand (icode, y, 3, mode, compare_mode, unsignedp);
5093 if (!x || !y)
5094 {
5095 delete_insns_since (last);
5096 return NULL_RTX;
5097 }
5098
5099 if (target_mode == VOIDmode)
5100 target_mode = result_mode;
5101 if (!target)
5102 target = gen_reg_rtx (target_mode);
5103
5104 comparison = gen_rtx_fmt_ee (code, result_mode, x, y);
5105
5106 create_output_operand (&ops[0], optimize ? NULL_RTX : target, result_mode);
5107 create_fixed_operand (&ops[1], comparison);
5108 create_fixed_operand (&ops[2], x);
5109 create_fixed_operand (&ops[3], y);
5110 if (!maybe_expand_insn (icode, 4, ops))
5111 {
5112 delete_insns_since (last);
5113 return NULL_RTX;
5114 }
5115 subtarget = ops[0].value;
5116
5117 /* If we are converting to a wider mode, first convert to
5118 TARGET_MODE, then normalize. This produces better combining
5119 opportunities on machines that have a SIGN_EXTRACT when we are
5120 testing a single bit. This mostly benefits the 68k.
5121
5122 If STORE_FLAG_VALUE does not have the sign bit set when
5123 interpreted in MODE, we can do this conversion as unsigned, which
5124 is usually more efficient. */
5125 if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (result_mode))
5126 {
5127 convert_move (target, subtarget,
5128 val_signbit_known_clear_p (result_mode,
5129 STORE_FLAG_VALUE));
5130 op0 = target;
5131 result_mode = target_mode;
5132 }
5133 else
5134 op0 = subtarget;
5135
5136 /* If we want to keep subexpressions around, don't reuse our last
5137 target. */
5138 if (optimize)
5139 subtarget = 0;
5140
5141 /* Now normalize to the proper value in MODE. Sometimes we don't
5142 have to do anything. */
5143 if (normalizep == 0 || normalizep == STORE_FLAG_VALUE)
5144 ;
5145 /* STORE_FLAG_VALUE might be the most negative number, so write
5146 the comparison this way to avoid a compiler-time warning. */
5147 else if (- normalizep == STORE_FLAG_VALUE)
5148 op0 = expand_unop (result_mode, neg_optab, op0, subtarget, 0);
5149
5150 /* We don't want to use STORE_FLAG_VALUE < 0 below since this makes
5151 it hard to use a value of just the sign bit due to ANSI integer
5152 constant typing rules. */
5153 else if (val_signbit_known_set_p (result_mode, STORE_FLAG_VALUE))
5154 op0 = expand_shift (RSHIFT_EXPR, result_mode, op0,
5155 GET_MODE_BITSIZE (result_mode) - 1, subtarget,
5156 normalizep == 1);
5157 else
5158 {
5159 gcc_assert (STORE_FLAG_VALUE & 1);
5160
5161 op0 = expand_and (result_mode, op0, const1_rtx, subtarget);
5162 if (normalizep == -1)
5163 op0 = expand_unop (result_mode, neg_optab, op0, op0, 0);
5164 }
5165
5166 /* If we were converting to a smaller mode, do the conversion now. */
5167 if (target_mode != result_mode)
5168 {
5169 convert_move (target, op0, 0);
5170 return target;
5171 }
5172 else
5173 return op0;
5174 }
5175
5176
5177 /* A subroutine of emit_store_flag only including "tricks" that do not
5178 need a recursive call. These are kept separate to avoid infinite
5179 loops. */
5180
5181 static rtx
5182 emit_store_flag_1 (rtx target, enum rtx_code code, rtx op0, rtx op1,
5183 enum machine_mode mode, int unsignedp, int normalizep,
5184 enum machine_mode target_mode)
5185 {
5186 rtx subtarget;
5187 enum insn_code icode;
5188 enum machine_mode compare_mode;
5189 enum mode_class mclass;
5190 enum rtx_code scode;
5191 rtx tem;
5192
5193 if (unsignedp)
5194 code = unsigned_condition (code);
5195 scode = swap_condition (code);
5196
5197 /* If one operand is constant, make it the second one. Only do this
5198 if the other operand is not constant as well. */
5199
5200 if (swap_commutative_operands_p (op0, op1))
5201 {
5202 tem = op0;
5203 op0 = op1;
5204 op1 = tem;
5205 code = swap_condition (code);
5206 }
5207
5208 if (mode == VOIDmode)
5209 mode = GET_MODE (op0);
5210
5211 /* For some comparisons with 1 and -1, we can convert this to
5212 comparisons with zero. This will often produce more opportunities for
5213 store-flag insns. */
5214
5215 switch (code)
5216 {
5217 case LT:
5218 if (op1 == const1_rtx)
5219 op1 = const0_rtx, code = LE;
5220 break;
5221 case LE:
5222 if (op1 == constm1_rtx)
5223 op1 = const0_rtx, code = LT;
5224 break;
5225 case GE:
5226 if (op1 == const1_rtx)
5227 op1 = const0_rtx, code = GT;
5228 break;
5229 case GT:
5230 if (op1 == constm1_rtx)
5231 op1 = const0_rtx, code = GE;
5232 break;
5233 case GEU:
5234 if (op1 == const1_rtx)
5235 op1 = const0_rtx, code = NE;
5236 break;
5237 case LTU:
5238 if (op1 == const1_rtx)
5239 op1 = const0_rtx, code = EQ;
5240 break;
5241 default:
5242 break;
5243 }
5244
5245 /* If we are comparing a double-word integer with zero or -1, we can
5246 convert the comparison into one involving a single word. */
5247 if (GET_MODE_BITSIZE (mode) == BITS_PER_WORD * 2
5248 && GET_MODE_CLASS (mode) == MODE_INT
5249 && (!MEM_P (op0) || ! MEM_VOLATILE_P (op0)))
5250 {
5251 if ((code == EQ || code == NE)
5252 && (op1 == const0_rtx || op1 == constm1_rtx))
5253 {
5254 rtx op00, op01;
5255
5256 /* Do a logical OR or AND of the two words and compare the
5257 result. */
5258 op00 = simplify_gen_subreg (word_mode, op0, mode, 0);
5259 op01 = simplify_gen_subreg (word_mode, op0, mode, UNITS_PER_WORD);
5260 tem = expand_binop (word_mode,
5261 op1 == const0_rtx ? ior_optab : and_optab,
5262 op00, op01, NULL_RTX, unsignedp,
5263 OPTAB_DIRECT);
5264
5265 if (tem != 0)
5266 tem = emit_store_flag (NULL_RTX, code, tem, op1, word_mode,
5267 unsignedp, normalizep);
5268 }
5269 else if ((code == LT || code == GE) && op1 == const0_rtx)
5270 {
5271 rtx op0h;
5272
5273 /* If testing the sign bit, can just test on high word. */
5274 op0h = simplify_gen_subreg (word_mode, op0, mode,
5275 subreg_highpart_offset (word_mode,
5276 mode));
5277 tem = emit_store_flag (NULL_RTX, code, op0h, op1, word_mode,
5278 unsignedp, normalizep);
5279 }
5280 else
5281 tem = NULL_RTX;
5282
5283 if (tem)
5284 {
5285 if (target_mode == VOIDmode || GET_MODE (tem) == target_mode)
5286 return tem;
5287 if (!target)
5288 target = gen_reg_rtx (target_mode);
5289
5290 convert_move (target, tem,
5291 !val_signbit_known_set_p (word_mode,
5292 (normalizep ? normalizep
5293 : STORE_FLAG_VALUE)));
5294 return target;
5295 }
5296 }
5297
5298 /* If this is A < 0 or A >= 0, we can do this by taking the ones
5299 complement of A (for GE) and shifting the sign bit to the low bit. */
5300 if (op1 == const0_rtx && (code == LT || code == GE)
5301 && GET_MODE_CLASS (mode) == MODE_INT
5302 && (normalizep || STORE_FLAG_VALUE == 1
5303 || val_signbit_p (mode, STORE_FLAG_VALUE)))
5304 {
5305 subtarget = target;
5306
5307 if (!target)
5308 target_mode = mode;
5309
5310 /* If the result is to be wider than OP0, it is best to convert it
5311 first. If it is to be narrower, it is *incorrect* to convert it
5312 first. */
5313 else if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (mode))
5314 {
5315 op0 = convert_modes (target_mode, mode, op0, 0);
5316 mode = target_mode;
5317 }
5318
5319 if (target_mode != mode)
5320 subtarget = 0;
5321
5322 if (code == GE)
5323 op0 = expand_unop (mode, one_cmpl_optab, op0,
5324 ((STORE_FLAG_VALUE == 1 || normalizep)
5325 ? 0 : subtarget), 0);
5326
5327 if (STORE_FLAG_VALUE == 1 || normalizep)
5328 /* If we are supposed to produce a 0/1 value, we want to do
5329 a logical shift from the sign bit to the low-order bit; for
5330 a -1/0 value, we do an arithmetic shift. */
5331 op0 = expand_shift (RSHIFT_EXPR, mode, op0,
5332 GET_MODE_BITSIZE (mode) - 1,
5333 subtarget, normalizep != -1);
5334
5335 if (mode != target_mode)
5336 op0 = convert_modes (target_mode, mode, op0, 0);
5337
5338 return op0;
5339 }
5340
5341 mclass = GET_MODE_CLASS (mode);
5342 for (compare_mode = mode; compare_mode != VOIDmode;
5343 compare_mode = GET_MODE_WIDER_MODE (compare_mode))
5344 {
5345 enum machine_mode optab_mode = mclass == MODE_CC ? CCmode : compare_mode;
5346 icode = optab_handler (cstore_optab, optab_mode);
5347 if (icode != CODE_FOR_nothing)
5348 {
5349 do_pending_stack_adjust ();
5350 tem = emit_cstore (target, icode, code, mode, compare_mode,
5351 unsignedp, op0, op1, normalizep, target_mode);
5352 if (tem)
5353 return tem;
5354
5355 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5356 {
5357 tem = emit_cstore (target, icode, scode, mode, compare_mode,
5358 unsignedp, op1, op0, normalizep, target_mode);
5359 if (tem)
5360 return tem;
5361 }
5362 break;
5363 }
5364 }
5365
5366 return 0;
5367 }
5368
5369 /* Emit a store-flags instruction for comparison CODE on OP0 and OP1
5370 and storing in TARGET. Normally return TARGET.
5371 Return 0 if that cannot be done.
5372
5373 MODE is the mode to use for OP0 and OP1 should they be CONST_INTs. If
5374 it is VOIDmode, they cannot both be CONST_INT.
5375
5376 UNSIGNEDP is for the case where we have to widen the operands
5377 to perform the operation. It says to use zero-extension.
5378
5379 NORMALIZEP is 1 if we should convert the result to be either zero
5380 or one. Normalize is -1 if we should convert the result to be
5381 either zero or -1. If NORMALIZEP is zero, the result will be left
5382 "raw" out of the scc insn. */
5383
5384 rtx
5385 emit_store_flag (rtx target, enum rtx_code code, rtx op0, rtx op1,
5386 enum machine_mode mode, int unsignedp, int normalizep)
5387 {
5388 enum machine_mode target_mode = target ? GET_MODE (target) : VOIDmode;
5389 enum rtx_code rcode;
5390 rtx subtarget;
5391 rtx tem, last, trueval;
5392
5393 tem = emit_store_flag_1 (target, code, op0, op1, mode, unsignedp, normalizep,
5394 target_mode);
5395 if (tem)
5396 return tem;
5397
5398 /* If we reached here, we can't do this with a scc insn, however there
5399 are some comparisons that can be done in other ways. Don't do any
5400 of these cases if branches are very cheap. */
5401 if (BRANCH_COST (optimize_insn_for_speed_p (), false) == 0)
5402 return 0;
5403
5404 /* See what we need to return. We can only return a 1, -1, or the
5405 sign bit. */
5406
5407 if (normalizep == 0)
5408 {
5409 if (STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1)
5410 normalizep = STORE_FLAG_VALUE;
5411
5412 else if (val_signbit_p (mode, STORE_FLAG_VALUE))
5413 ;
5414 else
5415 return 0;
5416 }
5417
5418 last = get_last_insn ();
5419
5420 /* If optimizing, use different pseudo registers for each insn, instead
5421 of reusing the same pseudo. This leads to better CSE, but slows
5422 down the compiler, since there are more pseudos */
5423 subtarget = (!optimize
5424 && (target_mode == mode)) ? target : NULL_RTX;
5425 trueval = GEN_INT (normalizep ? normalizep : STORE_FLAG_VALUE);
5426
5427 /* For floating-point comparisons, try the reverse comparison or try
5428 changing the "orderedness" of the comparison. */
5429 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5430 {
5431 enum rtx_code first_code;
5432 bool and_them;
5433
5434 rcode = reverse_condition_maybe_unordered (code);
5435 if (can_compare_p (rcode, mode, ccp_store_flag)
5436 && (code == ORDERED || code == UNORDERED
5437 || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
5438 || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
5439 {
5440 int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5441 || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5442
5443 /* For the reverse comparison, use either an addition or a XOR. */
5444 if (want_add
5445 && rtx_cost (GEN_INT (normalizep), PLUS, 1,
5446 optimize_insn_for_speed_p ()) == 0)
5447 {
5448 tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5449 STORE_FLAG_VALUE, target_mode);
5450 if (tem)
5451 return expand_binop (target_mode, add_optab, tem,
5452 GEN_INT (normalizep),
5453 target, 0, OPTAB_WIDEN);
5454 }
5455 else if (!want_add
5456 && rtx_cost (trueval, XOR, 1,
5457 optimize_insn_for_speed_p ()) == 0)
5458 {
5459 tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5460 normalizep, target_mode);
5461 if (tem)
5462 return expand_binop (target_mode, xor_optab, tem, trueval,
5463 target, INTVAL (trueval) >= 0, OPTAB_WIDEN);
5464 }
5465 }
5466
5467 delete_insns_since (last);
5468
5469 /* Cannot split ORDERED and UNORDERED, only try the above trick. */
5470 if (code == ORDERED || code == UNORDERED)
5471 return 0;
5472
5473 and_them = split_comparison (code, mode, &first_code, &code);
5474
5475 /* If there are no NaNs, the first comparison should always fall through.
5476 Effectively change the comparison to the other one. */
5477 if (!HONOR_NANS (mode))
5478 {
5479 gcc_assert (first_code == (and_them ? ORDERED : UNORDERED));
5480 return emit_store_flag_1 (target, code, op0, op1, mode, 0, normalizep,
5481 target_mode);
5482 }
5483
5484 #ifdef HAVE_conditional_move
5485 /* Try using a setcc instruction for ORDERED/UNORDERED, followed by a
5486 conditional move. */
5487 tem = emit_store_flag_1 (subtarget, first_code, op0, op1, mode, 0,
5488 normalizep, target_mode);
5489 if (tem == 0)
5490 return 0;
5491
5492 if (and_them)
5493 tem = emit_conditional_move (target, code, op0, op1, mode,
5494 tem, const0_rtx, GET_MODE (tem), 0);
5495 else
5496 tem = emit_conditional_move (target, code, op0, op1, mode,
5497 trueval, tem, GET_MODE (tem), 0);
5498
5499 if (tem == 0)
5500 delete_insns_since (last);
5501 return tem;
5502 #else
5503 return 0;
5504 #endif
5505 }
5506
5507 /* The remaining tricks only apply to integer comparisons. */
5508
5509 if (GET_MODE_CLASS (mode) != MODE_INT)
5510 return 0;
5511
5512 /* If this is an equality comparison of integers, we can try to exclusive-or
5513 (or subtract) the two operands and use a recursive call to try the
5514 comparison with zero. Don't do any of these cases if branches are
5515 very cheap. */
5516
5517 if ((code == EQ || code == NE) && op1 != const0_rtx)
5518 {
5519 tem = expand_binop (mode, xor_optab, op0, op1, subtarget, 1,
5520 OPTAB_WIDEN);
5521
5522 if (tem == 0)
5523 tem = expand_binop (mode, sub_optab, op0, op1, subtarget, 1,
5524 OPTAB_WIDEN);
5525 if (tem != 0)
5526 tem = emit_store_flag (target, code, tem, const0_rtx,
5527 mode, unsignedp, normalizep);
5528 if (tem != 0)
5529 return tem;
5530
5531 delete_insns_since (last);
5532 }
5533
5534 /* For integer comparisons, try the reverse comparison. However, for
5535 small X and if we'd have anyway to extend, implementing "X != 0"
5536 as "-(int)X >> 31" is still cheaper than inverting "(int)X == 0". */
5537 rcode = reverse_condition (code);
5538 if (can_compare_p (rcode, mode, ccp_store_flag)
5539 && ! (optab_handler (cstore_optab, mode) == CODE_FOR_nothing
5540 && code == NE
5541 && GET_MODE_SIZE (mode) < UNITS_PER_WORD
5542 && op1 == const0_rtx))
5543 {
5544 int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5545 || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5546
5547 /* Again, for the reverse comparison, use either an addition or a XOR. */
5548 if (want_add
5549 && rtx_cost (GEN_INT (normalizep), PLUS, 1,
5550 optimize_insn_for_speed_p ()) == 0)
5551 {
5552 tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5553 STORE_FLAG_VALUE, target_mode);
5554 if (tem != 0)
5555 tem = expand_binop (target_mode, add_optab, tem,
5556 GEN_INT (normalizep), target, 0, OPTAB_WIDEN);
5557 }
5558 else if (!want_add
5559 && rtx_cost (trueval, XOR, 1,
5560 optimize_insn_for_speed_p ()) == 0)
5561 {
5562 tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5563 normalizep, target_mode);
5564 if (tem != 0)
5565 tem = expand_binop (target_mode, xor_optab, tem, trueval, target,
5566 INTVAL (trueval) >= 0, OPTAB_WIDEN);
5567 }
5568
5569 if (tem != 0)
5570 return tem;
5571 delete_insns_since (last);
5572 }
5573
5574 /* Some other cases we can do are EQ, NE, LE, and GT comparisons with
5575 the constant zero. Reject all other comparisons at this point. Only
5576 do LE and GT if branches are expensive since they are expensive on
5577 2-operand machines. */
5578
5579 if (op1 != const0_rtx
5580 || (code != EQ && code != NE
5581 && (BRANCH_COST (optimize_insn_for_speed_p (),
5582 false) <= 1 || (code != LE && code != GT))))
5583 return 0;
5584
5585 /* Try to put the result of the comparison in the sign bit. Assume we can't
5586 do the necessary operation below. */
5587
5588 tem = 0;
5589
5590 /* To see if A <= 0, compute (A | (A - 1)). A <= 0 iff that result has
5591 the sign bit set. */
5592
5593 if (code == LE)
5594 {
5595 /* This is destructive, so SUBTARGET can't be OP0. */
5596 if (rtx_equal_p (subtarget, op0))
5597 subtarget = 0;
5598
5599 tem = expand_binop (mode, sub_optab, op0, const1_rtx, subtarget, 0,
5600 OPTAB_WIDEN);
5601 if (tem)
5602 tem = expand_binop (mode, ior_optab, op0, tem, subtarget, 0,
5603 OPTAB_WIDEN);
5604 }
5605
5606 /* To see if A > 0, compute (((signed) A) << BITS) - A, where BITS is the
5607 number of bits in the mode of OP0, minus one. */
5608
5609 if (code == GT)
5610 {
5611 if (rtx_equal_p (subtarget, op0))
5612 subtarget = 0;
5613
5614 tem = expand_shift (RSHIFT_EXPR, mode, op0,
5615 GET_MODE_BITSIZE (mode) - 1,
5616 subtarget, 0);
5617 tem = expand_binop (mode, sub_optab, tem, op0, subtarget, 0,
5618 OPTAB_WIDEN);
5619 }
5620
5621 if (code == EQ || code == NE)
5622 {
5623 /* For EQ or NE, one way to do the comparison is to apply an operation
5624 that converts the operand into a positive number if it is nonzero
5625 or zero if it was originally zero. Then, for EQ, we subtract 1 and
5626 for NE we negate. This puts the result in the sign bit. Then we
5627 normalize with a shift, if needed.
5628
5629 Two operations that can do the above actions are ABS and FFS, so try
5630 them. If that doesn't work, and MODE is smaller than a full word,
5631 we can use zero-extension to the wider mode (an unsigned conversion)
5632 as the operation. */
5633
5634 /* Note that ABS doesn't yield a positive number for INT_MIN, but
5635 that is compensated by the subsequent overflow when subtracting
5636 one / negating. */
5637
5638 if (optab_handler (abs_optab, mode) != CODE_FOR_nothing)
5639 tem = expand_unop (mode, abs_optab, op0, subtarget, 1);
5640 else if (optab_handler (ffs_optab, mode) != CODE_FOR_nothing)
5641 tem = expand_unop (mode, ffs_optab, op0, subtarget, 1);
5642 else if (GET_MODE_SIZE (mode) < UNITS_PER_WORD)
5643 {
5644 tem = convert_modes (word_mode, mode, op0, 1);
5645 mode = word_mode;
5646 }
5647
5648 if (tem != 0)
5649 {
5650 if (code == EQ)
5651 tem = expand_binop (mode, sub_optab, tem, const1_rtx, subtarget,
5652 0, OPTAB_WIDEN);
5653 else
5654 tem = expand_unop (mode, neg_optab, tem, subtarget, 0);
5655 }
5656
5657 /* If we couldn't do it that way, for NE we can "or" the two's complement
5658 of the value with itself. For EQ, we take the one's complement of
5659 that "or", which is an extra insn, so we only handle EQ if branches
5660 are expensive. */
5661
5662 if (tem == 0
5663 && (code == NE
5664 || BRANCH_COST (optimize_insn_for_speed_p (),
5665 false) > 1))
5666 {
5667 if (rtx_equal_p (subtarget, op0))
5668 subtarget = 0;
5669
5670 tem = expand_unop (mode, neg_optab, op0, subtarget, 0);
5671 tem = expand_binop (mode, ior_optab, tem, op0, subtarget, 0,
5672 OPTAB_WIDEN);
5673
5674 if (tem && code == EQ)
5675 tem = expand_unop (mode, one_cmpl_optab, tem, subtarget, 0);
5676 }
5677 }
5678
5679 if (tem && normalizep)
5680 tem = expand_shift (RSHIFT_EXPR, mode, tem,
5681 GET_MODE_BITSIZE (mode) - 1,
5682 subtarget, normalizep == 1);
5683
5684 if (tem)
5685 {
5686 if (!target)
5687 ;
5688 else if (GET_MODE (tem) != target_mode)
5689 {
5690 convert_move (target, tem, 0);
5691 tem = target;
5692 }
5693 else if (!subtarget)
5694 {
5695 emit_move_insn (target, tem);
5696 tem = target;
5697 }
5698 }
5699 else
5700 delete_insns_since (last);
5701
5702 return tem;
5703 }
5704
5705 /* Like emit_store_flag, but always succeeds. */
5706
5707 rtx
5708 emit_store_flag_force (rtx target, enum rtx_code code, rtx op0, rtx op1,
5709 enum machine_mode mode, int unsignedp, int normalizep)
5710 {
5711 rtx tem, label;
5712 rtx trueval, falseval;
5713
5714 /* First see if emit_store_flag can do the job. */
5715 tem = emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep);
5716 if (tem != 0)
5717 return tem;
5718
5719 if (!target)
5720 target = gen_reg_rtx (word_mode);
5721
5722 /* If this failed, we have to do this with set/compare/jump/set code.
5723 For foo != 0, if foo is in OP0, just replace it with 1 if nonzero. */
5724 trueval = normalizep ? GEN_INT (normalizep) : const1_rtx;
5725 if (code == NE
5726 && GET_MODE_CLASS (mode) == MODE_INT
5727 && REG_P (target)
5728 && op0 == target
5729 && op1 == const0_rtx)
5730 {
5731 label = gen_label_rtx ();
5732 do_compare_rtx_and_jump (target, const0_rtx, EQ, unsignedp,
5733 mode, NULL_RTX, NULL_RTX, label, -1);
5734 emit_move_insn (target, trueval);
5735 emit_label (label);
5736 return target;
5737 }
5738
5739 if (!REG_P (target)
5740 || reg_mentioned_p (target, op0) || reg_mentioned_p (target, op1))
5741 target = gen_reg_rtx (GET_MODE (target));
5742
5743 /* Jump in the right direction if the target cannot implement CODE
5744 but can jump on its reverse condition. */
5745 falseval = const0_rtx;
5746 if (! can_compare_p (code, mode, ccp_jump)
5747 && (! FLOAT_MODE_P (mode)
5748 || code == ORDERED || code == UNORDERED
5749 || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
5750 || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
5751 {
5752 enum rtx_code rcode;
5753 if (FLOAT_MODE_P (mode))
5754 rcode = reverse_condition_maybe_unordered (code);
5755 else
5756 rcode = reverse_condition (code);
5757
5758 /* Canonicalize to UNORDERED for the libcall. */
5759 if (can_compare_p (rcode, mode, ccp_jump)
5760 || (code == ORDERED && ! can_compare_p (ORDERED, mode, ccp_jump)))
5761 {
5762 falseval = trueval;
5763 trueval = const0_rtx;
5764 code = rcode;
5765 }
5766 }
5767
5768 emit_move_insn (target, trueval);
5769 label = gen_label_rtx ();
5770 do_compare_rtx_and_jump (op0, op1, code, unsignedp, mode, NULL_RTX,
5771 NULL_RTX, label, -1);
5772
5773 emit_move_insn (target, falseval);
5774 emit_label (label);
5775
5776 return target;
5777 }
5778 \f
5779 /* Perform possibly multi-word comparison and conditional jump to LABEL
5780 if ARG1 OP ARG2 true where ARG1 and ARG2 are of mode MODE. This is
5781 now a thin wrapper around do_compare_rtx_and_jump. */
5782
5783 static void
5784 do_cmp_and_jump (rtx arg1, rtx arg2, enum rtx_code op, enum machine_mode mode,
5785 rtx label)
5786 {
5787 int unsignedp = (op == LTU || op == LEU || op == GTU || op == GEU);
5788 do_compare_rtx_and_jump (arg1, arg2, op, unsignedp, mode,
5789 NULL_RTX, NULL_RTX, label, -1);
5790 }