Fix gcc.c-torture/compile/pr53410-2.c on sparc.
[gcc.git] / gcc / expmed.c
1 /* Medium-level subroutines: convert bit-field store and extract
2 and shifts, multiplies and divides to rtl instructions.
3 Copyright (C) 1987, 1988, 1989, 1992, 1993, 1994, 1995, 1996, 1997, 1998,
4 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010,
5 2011, 2012
6 Free Software Foundation, Inc.
7
8 This file is part of GCC.
9
10 GCC is free software; you can redistribute it and/or modify it under
11 the terms of the GNU General Public License as published by the Free
12 Software Foundation; either version 3, or (at your option) any later
13 version.
14
15 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
16 WARRANTY; without even the implied warranty of MERCHANTABILITY or
17 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 for more details.
19
20 You should have received a copy of the GNU General Public License
21 along with GCC; see the file COPYING3. If not see
22 <http://www.gnu.org/licenses/>. */
23
24
25 #include "config.h"
26 #include "system.h"
27 #include "coretypes.h"
28 #include "tm.h"
29 #include "diagnostic-core.h"
30 #include "rtl.h"
31 #include "tree.h"
32 #include "tm_p.h"
33 #include "flags.h"
34 #include "insn-config.h"
35 #include "expr.h"
36 #include "optabs.h"
37 #include "recog.h"
38 #include "langhooks.h"
39 #include "df.h"
40 #include "target.h"
41 #include "expmed.h"
42
43 struct target_expmed default_target_expmed;
44 #if SWITCHABLE_TARGET
45 struct target_expmed *this_target_expmed = &default_target_expmed;
46 #endif
47
48 static void store_fixed_bit_field (rtx, unsigned HOST_WIDE_INT,
49 unsigned HOST_WIDE_INT,
50 unsigned HOST_WIDE_INT,
51 unsigned HOST_WIDE_INT,
52 rtx);
53 static void store_split_bit_field (rtx, unsigned HOST_WIDE_INT,
54 unsigned HOST_WIDE_INT,
55 unsigned HOST_WIDE_INT,
56 unsigned HOST_WIDE_INT,
57 rtx);
58 static rtx extract_fixed_bit_field (enum machine_mode, rtx,
59 unsigned HOST_WIDE_INT,
60 unsigned HOST_WIDE_INT, rtx, int, bool);
61 static rtx mask_rtx (enum machine_mode, int, int, int);
62 static rtx lshift_value (enum machine_mode, rtx, int, int);
63 static rtx extract_split_bit_field (rtx, unsigned HOST_WIDE_INT,
64 unsigned HOST_WIDE_INT, int);
65 static void do_cmp_and_jump (rtx, rtx, enum rtx_code, enum machine_mode, rtx);
66 static rtx expand_smod_pow2 (enum machine_mode, rtx, HOST_WIDE_INT);
67 static rtx expand_sdiv_pow2 (enum machine_mode, rtx, HOST_WIDE_INT);
68
69 /* Test whether a value is zero of a power of two. */
70 #define EXACT_POWER_OF_2_OR_ZERO_P(x) (((x) & ((x) - 1)) == 0)
71
72 /* Reduce conditional compilation elsewhere. */
73 #ifndef HAVE_insv
74 #define HAVE_insv 0
75 #define CODE_FOR_insv CODE_FOR_nothing
76 #define gen_insv(a,b,c,d) NULL_RTX
77 #endif
78 #ifndef HAVE_extv
79 #define HAVE_extv 0
80 #define CODE_FOR_extv CODE_FOR_nothing
81 #define gen_extv(a,b,c,d) NULL_RTX
82 #endif
83 #ifndef HAVE_extzv
84 #define HAVE_extzv 0
85 #define CODE_FOR_extzv CODE_FOR_nothing
86 #define gen_extzv(a,b,c,d) NULL_RTX
87 #endif
88
89 struct init_expmed_rtl
90 {
91 struct rtx_def reg; rtunion reg_fld[2];
92 struct rtx_def plus; rtunion plus_fld1;
93 struct rtx_def neg;
94 struct rtx_def mult; rtunion mult_fld1;
95 struct rtx_def sdiv; rtunion sdiv_fld1;
96 struct rtx_def udiv; rtunion udiv_fld1;
97 struct rtx_def sdiv_32; rtunion sdiv_32_fld1;
98 struct rtx_def smod_32; rtunion smod_32_fld1;
99 struct rtx_def wide_mult; rtunion wide_mult_fld1;
100 struct rtx_def wide_lshr; rtunion wide_lshr_fld1;
101 struct rtx_def wide_trunc;
102 struct rtx_def shift; rtunion shift_fld1;
103 struct rtx_def shift_mult; rtunion shift_mult_fld1;
104 struct rtx_def shift_add; rtunion shift_add_fld1;
105 struct rtx_def shift_sub0; rtunion shift_sub0_fld1;
106 struct rtx_def shift_sub1; rtunion shift_sub1_fld1;
107 struct rtx_def zext;
108 struct rtx_def trunc;
109
110 rtx pow2[MAX_BITS_PER_WORD];
111 rtx cint[MAX_BITS_PER_WORD];
112 };
113
114 static void
115 init_expmed_one_conv (struct init_expmed_rtl *all, enum machine_mode to_mode,
116 enum machine_mode from_mode, bool speed)
117 {
118 int to_size, from_size;
119 rtx which;
120
121 /* We're given no information about the true size of a partial integer,
122 only the size of the "full" integer it requires for storage. For
123 comparison purposes here, reduce the bit size by one in that case. */
124 to_size = (GET_MODE_BITSIZE (to_mode)
125 - (GET_MODE_CLASS (to_mode) == MODE_PARTIAL_INT));
126 from_size = (GET_MODE_BITSIZE (from_mode)
127 - (GET_MODE_CLASS (from_mode) == MODE_PARTIAL_INT));
128
129 /* Assume cost of zero-extend and sign-extend is the same. */
130 which = (to_size < from_size ? &all->trunc : &all->zext);
131
132 PUT_MODE (&all->reg, from_mode);
133 set_convert_cost (to_mode, from_mode, speed, set_src_cost (which, speed));
134 }
135
136 static void
137 init_expmed_one_mode (struct init_expmed_rtl *all,
138 enum machine_mode mode, int speed)
139 {
140 int m, n, mode_bitsize;
141 enum machine_mode mode_from;
142
143 mode_bitsize = GET_MODE_UNIT_BITSIZE (mode);
144
145 PUT_MODE (&all->reg, mode);
146 PUT_MODE (&all->plus, mode);
147 PUT_MODE (&all->neg, mode);
148 PUT_MODE (&all->mult, mode);
149 PUT_MODE (&all->sdiv, mode);
150 PUT_MODE (&all->udiv, mode);
151 PUT_MODE (&all->sdiv_32, mode);
152 PUT_MODE (&all->smod_32, mode);
153 PUT_MODE (&all->wide_trunc, mode);
154 PUT_MODE (&all->shift, mode);
155 PUT_MODE (&all->shift_mult, mode);
156 PUT_MODE (&all->shift_add, mode);
157 PUT_MODE (&all->shift_sub0, mode);
158 PUT_MODE (&all->shift_sub1, mode);
159 PUT_MODE (&all->zext, mode);
160 PUT_MODE (&all->trunc, mode);
161
162 set_add_cost (speed, mode, set_src_cost (&all->plus, speed));
163 set_neg_cost (speed, mode, set_src_cost (&all->neg, speed));
164 set_mul_cost (speed, mode, set_src_cost (&all->mult, speed));
165 set_sdiv_cost (speed, mode, set_src_cost (&all->sdiv, speed));
166 set_udiv_cost (speed, mode, set_src_cost (&all->udiv, speed));
167
168 set_sdiv_pow2_cheap (speed, mode, (set_src_cost (&all->sdiv_32, speed)
169 <= 2 * add_cost (speed, mode)));
170 set_smod_pow2_cheap (speed, mode, (set_src_cost (&all->smod_32, speed)
171 <= 4 * add_cost (speed, mode)));
172
173 set_shift_cost (speed, mode, 0, 0);
174 {
175 int cost = add_cost (speed, mode);
176 set_shiftadd_cost (speed, mode, 0, cost);
177 set_shiftsub0_cost (speed, mode, 0, cost);
178 set_shiftsub1_cost (speed, mode, 0, cost);
179 }
180
181 n = MIN (MAX_BITS_PER_WORD, mode_bitsize);
182 for (m = 1; m < n; m++)
183 {
184 XEXP (&all->shift, 1) = all->cint[m];
185 XEXP (&all->shift_mult, 1) = all->pow2[m];
186
187 set_shift_cost (speed, mode, m, set_src_cost (&all->shift, speed));
188 set_shiftadd_cost (speed, mode, m, set_src_cost (&all->shift_add, speed));
189 set_shiftsub0_cost (speed, mode, m, set_src_cost (&all->shift_sub0, speed));
190 set_shiftsub1_cost (speed, mode, m, set_src_cost (&all->shift_sub1, speed));
191 }
192
193 if (SCALAR_INT_MODE_P (mode))
194 {
195 for (mode_from = MIN_MODE_INT; mode_from <= MAX_MODE_INT;
196 mode_from = (enum machine_mode)(mode_from + 1))
197 init_expmed_one_conv (all, mode, mode_from, speed);
198 }
199 if (GET_MODE_CLASS (mode) == MODE_INT)
200 {
201 enum machine_mode wider_mode = GET_MODE_WIDER_MODE (mode);
202 if (wider_mode != VOIDmode)
203 {
204 PUT_MODE (&all->zext, wider_mode);
205 PUT_MODE (&all->wide_mult, wider_mode);
206 PUT_MODE (&all->wide_lshr, wider_mode);
207 XEXP (&all->wide_lshr, 1) = GEN_INT (mode_bitsize);
208
209 set_mul_widen_cost (speed, wider_mode,
210 set_src_cost (&all->wide_mult, speed));
211 set_mul_highpart_cost (speed, mode,
212 set_src_cost (&all->wide_trunc, speed));
213 }
214 }
215 }
216
217 void
218 init_expmed (void)
219 {
220 struct init_expmed_rtl all;
221 enum machine_mode mode;
222 int m, speed;
223
224 memset (&all, 0, sizeof all);
225 for (m = 1; m < MAX_BITS_PER_WORD; m++)
226 {
227 all.pow2[m] = GEN_INT ((HOST_WIDE_INT) 1 << m);
228 all.cint[m] = GEN_INT (m);
229 }
230
231 PUT_CODE (&all.reg, REG);
232 /* Avoid using hard regs in ways which may be unsupported. */
233 SET_REGNO (&all.reg, LAST_VIRTUAL_REGISTER + 1);
234
235 PUT_CODE (&all.plus, PLUS);
236 XEXP (&all.plus, 0) = &all.reg;
237 XEXP (&all.plus, 1) = &all.reg;
238
239 PUT_CODE (&all.neg, NEG);
240 XEXP (&all.neg, 0) = &all.reg;
241
242 PUT_CODE (&all.mult, MULT);
243 XEXP (&all.mult, 0) = &all.reg;
244 XEXP (&all.mult, 1) = &all.reg;
245
246 PUT_CODE (&all.sdiv, DIV);
247 XEXP (&all.sdiv, 0) = &all.reg;
248 XEXP (&all.sdiv, 1) = &all.reg;
249
250 PUT_CODE (&all.udiv, UDIV);
251 XEXP (&all.udiv, 0) = &all.reg;
252 XEXP (&all.udiv, 1) = &all.reg;
253
254 PUT_CODE (&all.sdiv_32, DIV);
255 XEXP (&all.sdiv_32, 0) = &all.reg;
256 XEXP (&all.sdiv_32, 1) = 32 < MAX_BITS_PER_WORD ? all.cint[32] : GEN_INT (32);
257
258 PUT_CODE (&all.smod_32, MOD);
259 XEXP (&all.smod_32, 0) = &all.reg;
260 XEXP (&all.smod_32, 1) = XEXP (&all.sdiv_32, 1);
261
262 PUT_CODE (&all.zext, ZERO_EXTEND);
263 XEXP (&all.zext, 0) = &all.reg;
264
265 PUT_CODE (&all.wide_mult, MULT);
266 XEXP (&all.wide_mult, 0) = &all.zext;
267 XEXP (&all.wide_mult, 1) = &all.zext;
268
269 PUT_CODE (&all.wide_lshr, LSHIFTRT);
270 XEXP (&all.wide_lshr, 0) = &all.wide_mult;
271
272 PUT_CODE (&all.wide_trunc, TRUNCATE);
273 XEXP (&all.wide_trunc, 0) = &all.wide_lshr;
274
275 PUT_CODE (&all.shift, ASHIFT);
276 XEXP (&all.shift, 0) = &all.reg;
277
278 PUT_CODE (&all.shift_mult, MULT);
279 XEXP (&all.shift_mult, 0) = &all.reg;
280
281 PUT_CODE (&all.shift_add, PLUS);
282 XEXP (&all.shift_add, 0) = &all.shift_mult;
283 XEXP (&all.shift_add, 1) = &all.reg;
284
285 PUT_CODE (&all.shift_sub0, MINUS);
286 XEXP (&all.shift_sub0, 0) = &all.shift_mult;
287 XEXP (&all.shift_sub0, 1) = &all.reg;
288
289 PUT_CODE (&all.shift_sub1, MINUS);
290 XEXP (&all.shift_sub1, 0) = &all.reg;
291 XEXP (&all.shift_sub1, 1) = &all.shift_mult;
292
293 PUT_CODE (&all.trunc, TRUNCATE);
294 XEXP (&all.trunc, 0) = &all.reg;
295
296 for (speed = 0; speed < 2; speed++)
297 {
298 crtl->maybe_hot_insn_p = speed;
299 set_zero_cost (speed, set_src_cost (const0_rtx, speed));
300
301 for (mode = MIN_MODE_INT; mode <= MAX_MODE_INT;
302 mode = (enum machine_mode)(mode + 1))
303 init_expmed_one_mode (&all, mode, speed);
304
305 if (MIN_MODE_PARTIAL_INT != VOIDmode)
306 for (mode = MIN_MODE_PARTIAL_INT; mode <= MAX_MODE_PARTIAL_INT;
307 mode = (enum machine_mode)(mode + 1))
308 init_expmed_one_mode (&all, mode, speed);
309
310 if (MIN_MODE_VECTOR_INT != VOIDmode)
311 for (mode = MIN_MODE_VECTOR_INT; mode <= MAX_MODE_VECTOR_INT;
312 mode = (enum machine_mode)(mode + 1))
313 init_expmed_one_mode (&all, mode, speed);
314 }
315
316 if (alg_hash_used_p ())
317 {
318 struct alg_hash_entry *p = alg_hash_entry_ptr (0);
319 memset (p, 0, sizeof (*p) * NUM_ALG_HASH_ENTRIES);
320 }
321 else
322 set_alg_hash_used_p (true);
323 default_rtl_profile ();
324 }
325
326 /* Return an rtx representing minus the value of X.
327 MODE is the intended mode of the result,
328 useful if X is a CONST_INT. */
329
330 rtx
331 negate_rtx (enum machine_mode mode, rtx x)
332 {
333 rtx result = simplify_unary_operation (NEG, mode, x, mode);
334
335 if (result == 0)
336 result = expand_unop (mode, neg_optab, x, NULL_RTX, 0);
337
338 return result;
339 }
340
341 /* Report on the availability of insv/extv/extzv and the desired mode
342 of each of their operands. Returns MAX_MACHINE_MODE if HAVE_foo
343 is false; else the mode of the specified operand. If OPNO is -1,
344 all the caller cares about is whether the insn is available. */
345 enum machine_mode
346 mode_for_extraction (enum extraction_pattern pattern, int opno)
347 {
348 const struct insn_data_d *data;
349
350 switch (pattern)
351 {
352 case EP_insv:
353 if (HAVE_insv)
354 {
355 data = &insn_data[CODE_FOR_insv];
356 break;
357 }
358 return MAX_MACHINE_MODE;
359
360 case EP_extv:
361 if (HAVE_extv)
362 {
363 data = &insn_data[CODE_FOR_extv];
364 break;
365 }
366 return MAX_MACHINE_MODE;
367
368 case EP_extzv:
369 if (HAVE_extzv)
370 {
371 data = &insn_data[CODE_FOR_extzv];
372 break;
373 }
374 return MAX_MACHINE_MODE;
375
376 default:
377 gcc_unreachable ();
378 }
379
380 if (opno == -1)
381 return VOIDmode;
382
383 /* Everyone who uses this function used to follow it with
384 if (result == VOIDmode) result = word_mode; */
385 if (data->operand[opno].mode == VOIDmode)
386 return word_mode;
387 return data->operand[opno].mode;
388 }
389
390 /* Return true if a bitfield of size BITSIZE at bit number BITNUM within
391 a structure of mode STRUCT_MODE represents a lowpart subreg. The subreg
392 offset is then BITNUM / BITS_PER_UNIT. */
393
394 static bool
395 lowpart_bit_field_p (unsigned HOST_WIDE_INT bitnum,
396 unsigned HOST_WIDE_INT bitsize,
397 enum machine_mode struct_mode)
398 {
399 if (BYTES_BIG_ENDIAN)
400 return (bitnum % BITS_PER_UNIT == 0
401 && (bitnum + bitsize == GET_MODE_BITSIZE (struct_mode)
402 || (bitnum + bitsize) % BITS_PER_WORD == 0));
403 else
404 return bitnum % BITS_PER_WORD == 0;
405 }
406 \f
407 /* Try to use an insv pattern to store VALUE into a field of OP0.
408 OP_MODE is the mode of the insertion and BITSIZE and BITNUM are
409 as for store_bit_field. */
410
411 static bool
412 store_bit_field_using_insv (rtx op0, unsigned HOST_WIDE_INT bitsize,
413 unsigned HOST_WIDE_INT bitnum, rtx value,
414 enum machine_mode op_mode)
415 {
416 struct expand_operand ops[4];
417 rtx value1;
418 rtx xop0 = op0;
419 rtx last = get_last_insn ();
420 bool copy_back = false;
421
422 unsigned int unit = GET_MODE_BITSIZE (op_mode);
423 if (bitsize == 0 || bitsize > unit)
424 return false;
425
426 if (MEM_P (xop0))
427 {
428 /* Get a reference to the first byte of the field. */
429 xop0 = adjust_bitfield_address (xop0, byte_mode, bitnum / BITS_PER_UNIT);
430 bitnum %= BITS_PER_UNIT;
431 }
432 else
433 {
434 /* Convert from counting within OP0 to counting in OP_MODE. */
435 if (BYTES_BIG_ENDIAN)
436 bitnum += unit - GET_MODE_BITSIZE (GET_MODE (op0));
437
438 /* If xop0 is a register, we need it in OP_MODE
439 to make it acceptable to the format of insv. */
440 if (GET_CODE (xop0) == SUBREG)
441 /* We can't just change the mode, because this might clobber op0,
442 and we will need the original value of op0 if insv fails. */
443 xop0 = gen_rtx_SUBREG (op_mode, SUBREG_REG (xop0), SUBREG_BYTE (xop0));
444 if (REG_P (xop0) && GET_MODE (xop0) != op_mode)
445 xop0 = gen_lowpart_SUBREG (op_mode, xop0);
446 }
447
448 /* If the destination is a paradoxical subreg such that we need a
449 truncate to the inner mode, perform the insertion on a temporary and
450 truncate the result to the original destination. Note that we can't
451 just truncate the paradoxical subreg as (truncate:N (subreg:W (reg:N
452 X) 0)) is (reg:N X). */
453 if (GET_CODE (xop0) == SUBREG
454 && REG_P (SUBREG_REG (xop0))
455 && !TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (SUBREG_REG (xop0)),
456 op_mode))
457 {
458 rtx tem = gen_reg_rtx (op_mode);
459 emit_move_insn (tem, xop0);
460 xop0 = tem;
461 copy_back = true;
462 }
463
464 /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
465 "backwards" from the size of the unit we are inserting into.
466 Otherwise, we count bits from the most significant on a
467 BYTES/BITS_BIG_ENDIAN machine. */
468
469 if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
470 bitnum = unit - bitsize - bitnum;
471
472 /* Convert VALUE to op_mode (which insv insn wants) in VALUE1. */
473 value1 = value;
474 if (GET_MODE (value) != op_mode)
475 {
476 if (GET_MODE_BITSIZE (GET_MODE (value)) >= bitsize)
477 {
478 /* Optimization: Don't bother really extending VALUE
479 if it has all the bits we will actually use. However,
480 if we must narrow it, be sure we do it correctly. */
481
482 if (GET_MODE_SIZE (GET_MODE (value)) < GET_MODE_SIZE (op_mode))
483 {
484 rtx tmp;
485
486 tmp = simplify_subreg (op_mode, value1, GET_MODE (value), 0);
487 if (! tmp)
488 tmp = simplify_gen_subreg (op_mode,
489 force_reg (GET_MODE (value),
490 value1),
491 GET_MODE (value), 0);
492 value1 = tmp;
493 }
494 else
495 value1 = gen_lowpart (op_mode, value1);
496 }
497 else if (CONST_INT_P (value))
498 value1 = gen_int_mode (INTVAL (value), op_mode);
499 else
500 /* Parse phase is supposed to make VALUE's data type
501 match that of the component reference, which is a type
502 at least as wide as the field; so VALUE should have
503 a mode that corresponds to that type. */
504 gcc_assert (CONSTANT_P (value));
505 }
506
507 create_fixed_operand (&ops[0], xop0);
508 create_integer_operand (&ops[1], bitsize);
509 create_integer_operand (&ops[2], bitnum);
510 create_input_operand (&ops[3], value1, op_mode);
511 if (maybe_expand_insn (CODE_FOR_insv, 4, ops))
512 {
513 if (copy_back)
514 convert_move (op0, xop0, true);
515 return true;
516 }
517 delete_insns_since (last);
518 return false;
519 }
520
521 /* A subroutine of store_bit_field, with the same arguments. Return true
522 if the operation could be implemented.
523
524 If FALLBACK_P is true, fall back to store_fixed_bit_field if we have
525 no other way of implementing the operation. If FALLBACK_P is false,
526 return false instead. */
527
528 static bool
529 store_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
530 unsigned HOST_WIDE_INT bitnum,
531 unsigned HOST_WIDE_INT bitregion_start,
532 unsigned HOST_WIDE_INT bitregion_end,
533 enum machine_mode fieldmode,
534 rtx value, bool fallback_p)
535 {
536 rtx op0 = str_rtx;
537 rtx orig_value;
538
539 while (GET_CODE (op0) == SUBREG)
540 {
541 /* The following line once was done only if WORDS_BIG_ENDIAN,
542 but I think that is a mistake. WORDS_BIG_ENDIAN is
543 meaningful at a much higher level; when structures are copied
544 between memory and regs, the higher-numbered regs
545 always get higher addresses. */
546 int inner_mode_size = GET_MODE_SIZE (GET_MODE (SUBREG_REG (op0)));
547 int outer_mode_size = GET_MODE_SIZE (GET_MODE (op0));
548 int byte_offset = 0;
549
550 /* Paradoxical subregs need special handling on big endian machines. */
551 if (SUBREG_BYTE (op0) == 0 && inner_mode_size < outer_mode_size)
552 {
553 int difference = inner_mode_size - outer_mode_size;
554
555 if (WORDS_BIG_ENDIAN)
556 byte_offset += (difference / UNITS_PER_WORD) * UNITS_PER_WORD;
557 if (BYTES_BIG_ENDIAN)
558 byte_offset += difference % UNITS_PER_WORD;
559 }
560 else
561 byte_offset = SUBREG_BYTE (op0);
562
563 bitnum += byte_offset * BITS_PER_UNIT;
564 op0 = SUBREG_REG (op0);
565 }
566
567 /* No action is needed if the target is a register and if the field
568 lies completely outside that register. This can occur if the source
569 code contains an out-of-bounds access to a small array. */
570 if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
571 return true;
572
573 /* Use vec_set patterns for inserting parts of vectors whenever
574 available. */
575 if (VECTOR_MODE_P (GET_MODE (op0))
576 && !MEM_P (op0)
577 && optab_handler (vec_set_optab, GET_MODE (op0)) != CODE_FOR_nothing
578 && fieldmode == GET_MODE_INNER (GET_MODE (op0))
579 && bitsize == GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
580 && !(bitnum % GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
581 {
582 struct expand_operand ops[3];
583 enum machine_mode outermode = GET_MODE (op0);
584 enum machine_mode innermode = GET_MODE_INNER (outermode);
585 enum insn_code icode = optab_handler (vec_set_optab, outermode);
586 int pos = bitnum / GET_MODE_BITSIZE (innermode);
587
588 create_fixed_operand (&ops[0], op0);
589 create_input_operand (&ops[1], value, innermode);
590 create_integer_operand (&ops[2], pos);
591 if (maybe_expand_insn (icode, 3, ops))
592 return true;
593 }
594
595 /* If the target is a register, overwriting the entire object, or storing
596 a full-word or multi-word field can be done with just a SUBREG. */
597 if (!MEM_P (op0)
598 && bitsize == GET_MODE_BITSIZE (fieldmode)
599 && ((bitsize == GET_MODE_BITSIZE (GET_MODE (op0)) && bitnum == 0)
600 || (bitsize % BITS_PER_WORD == 0 && bitnum % BITS_PER_WORD == 0)))
601 {
602 /* Use the subreg machinery either to narrow OP0 to the required
603 words or to cope with mode punning between equal-sized modes. */
604 rtx sub = simplify_gen_subreg (fieldmode, op0, GET_MODE (op0),
605 bitnum / BITS_PER_UNIT);
606 if (sub)
607 {
608 emit_move_insn (sub, value);
609 return true;
610 }
611 }
612
613 /* If the target is memory, storing any naturally aligned field can be
614 done with a simple store. For targets that support fast unaligned
615 memory, any naturally sized, unit aligned field can be done directly. */
616 if (MEM_P (op0)
617 && bitnum % BITS_PER_UNIT == 0
618 && bitsize == GET_MODE_BITSIZE (fieldmode)
619 && (!SLOW_UNALIGNED_ACCESS (fieldmode, MEM_ALIGN (op0))
620 || (bitnum % bitsize == 0
621 && MEM_ALIGN (op0) % bitsize == 0)))
622 {
623 op0 = adjust_bitfield_address (op0, fieldmode, bitnum / BITS_PER_UNIT);
624 emit_move_insn (op0, value);
625 return true;
626 }
627
628 /* Make sure we are playing with integral modes. Pun with subregs
629 if we aren't. This must come after the entire register case above,
630 since that case is valid for any mode. The following cases are only
631 valid for integral modes. */
632 {
633 enum machine_mode imode = int_mode_for_mode (GET_MODE (op0));
634 if (imode != GET_MODE (op0))
635 {
636 if (MEM_P (op0))
637 op0 = adjust_bitfield_address (op0, imode, 0);
638 else
639 {
640 gcc_assert (imode != BLKmode);
641 op0 = gen_lowpart (imode, op0);
642 }
643 }
644 }
645
646 /* Storing an lsb-aligned field in a register
647 can be done with a movstrict instruction. */
648
649 if (!MEM_P (op0)
650 && lowpart_bit_field_p (bitnum, bitsize, GET_MODE (op0))
651 && bitsize == GET_MODE_BITSIZE (fieldmode)
652 && optab_handler (movstrict_optab, fieldmode) != CODE_FOR_nothing)
653 {
654 struct expand_operand ops[2];
655 enum insn_code icode = optab_handler (movstrict_optab, fieldmode);
656 rtx arg0 = op0;
657 unsigned HOST_WIDE_INT subreg_off;
658
659 if (GET_CODE (arg0) == SUBREG)
660 {
661 /* Else we've got some float mode source being extracted into
662 a different float mode destination -- this combination of
663 subregs results in Severe Tire Damage. */
664 gcc_assert (GET_MODE (SUBREG_REG (arg0)) == fieldmode
665 || GET_MODE_CLASS (fieldmode) == MODE_INT
666 || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT);
667 arg0 = SUBREG_REG (arg0);
668 }
669
670 subreg_off = bitnum / BITS_PER_UNIT;
671 if (validate_subreg (fieldmode, GET_MODE (arg0), arg0, subreg_off))
672 {
673 arg0 = gen_rtx_SUBREG (fieldmode, arg0, subreg_off);
674
675 create_fixed_operand (&ops[0], arg0);
676 /* Shrink the source operand to FIELDMODE. */
677 create_convert_operand_to (&ops[1], value, fieldmode, false);
678 if (maybe_expand_insn (icode, 2, ops))
679 return true;
680 }
681 }
682
683 /* Handle fields bigger than a word. */
684
685 if (bitsize > BITS_PER_WORD)
686 {
687 /* Here we transfer the words of the field
688 in the order least significant first.
689 This is because the most significant word is the one which may
690 be less than full.
691 However, only do that if the value is not BLKmode. */
692
693 unsigned int backwards = WORDS_BIG_ENDIAN && fieldmode != BLKmode;
694 unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
695 unsigned int i;
696 rtx last;
697
698 /* This is the mode we must force value to, so that there will be enough
699 subwords to extract. Note that fieldmode will often (always?) be
700 VOIDmode, because that is what store_field uses to indicate that this
701 is a bit field, but passing VOIDmode to operand_subword_force
702 is not allowed. */
703 fieldmode = GET_MODE (value);
704 if (fieldmode == VOIDmode)
705 fieldmode = smallest_mode_for_size (nwords * BITS_PER_WORD, MODE_INT);
706
707 last = get_last_insn ();
708 for (i = 0; i < nwords; i++)
709 {
710 /* If I is 0, use the low-order word in both field and target;
711 if I is 1, use the next to lowest word; and so on. */
712 unsigned int wordnum = (backwards
713 ? GET_MODE_SIZE (fieldmode) / UNITS_PER_WORD
714 - i - 1
715 : i);
716 unsigned int bit_offset = (backwards
717 ? MAX ((int) bitsize - ((int) i + 1)
718 * BITS_PER_WORD,
719 0)
720 : (int) i * BITS_PER_WORD);
721 rtx value_word = operand_subword_force (value, wordnum, fieldmode);
722 unsigned HOST_WIDE_INT new_bitsize =
723 MIN (BITS_PER_WORD, bitsize - i * BITS_PER_WORD);
724
725 /* If the remaining chunk doesn't have full wordsize we have
726 to make sure that for big endian machines the higher order
727 bits are used. */
728 if (new_bitsize < BITS_PER_WORD && BYTES_BIG_ENDIAN && !backwards)
729 value_word = simplify_expand_binop (word_mode, lshr_optab,
730 value_word,
731 GEN_INT (BITS_PER_WORD
732 - new_bitsize),
733 NULL_RTX, true,
734 OPTAB_LIB_WIDEN);
735
736 if (!store_bit_field_1 (op0, new_bitsize,
737 bitnum + bit_offset,
738 bitregion_start, bitregion_end,
739 word_mode,
740 value_word, fallback_p))
741 {
742 delete_insns_since (last);
743 return false;
744 }
745 }
746 return true;
747 }
748
749 /* If VALUE has a floating-point or complex mode, access it as an
750 integer of the corresponding size. This can occur on a machine
751 with 64 bit registers that uses SFmode for float. It can also
752 occur for unaligned float or complex fields. */
753 orig_value = value;
754 if (GET_MODE (value) != VOIDmode
755 && GET_MODE_CLASS (GET_MODE (value)) != MODE_INT
756 && GET_MODE_CLASS (GET_MODE (value)) != MODE_PARTIAL_INT)
757 {
758 value = gen_reg_rtx (int_mode_for_mode (GET_MODE (value)));
759 emit_move_insn (gen_lowpart (GET_MODE (orig_value), value), orig_value);
760 }
761
762 /* If OP0 is a multi-word register, narrow it to the affected word.
763 If the region spans two words, defer to store_split_bit_field. */
764 if (!MEM_P (op0) && GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
765 {
766 op0 = simplify_gen_subreg (word_mode, op0, GET_MODE (op0),
767 bitnum / BITS_PER_WORD * UNITS_PER_WORD);
768 gcc_assert (op0);
769 bitnum %= BITS_PER_WORD;
770 if (bitnum + bitsize > BITS_PER_WORD)
771 {
772 if (!fallback_p)
773 return false;
774
775 store_split_bit_field (op0, bitsize, bitnum, bitregion_start,
776 bitregion_end, value);
777 return true;
778 }
779 }
780
781 /* From here on we can assume that the field to be stored in fits
782 within a word. If the destination is a register, it too fits
783 in a word. */
784
785 enum machine_mode op_mode = mode_for_extraction (EP_insv, 3);
786 if (op_mode != MAX_MACHINE_MODE
787 && !MEM_P (op0)
788 && store_bit_field_using_insv (op0, bitsize, bitnum, value, op_mode))
789 return true;
790
791 /* If OP0 is a memory, try copying it to a register and seeing if a
792 cheap register alternative is available. */
793 if (op_mode != MAX_MACHINE_MODE && MEM_P (op0))
794 {
795 enum machine_mode bestmode;
796 unsigned HOST_WIDE_INT maxbits = MAX_FIXED_MODE_SIZE;
797
798 /* Do not use insv for volatile bitfields when
799 -fstrict-volatile-bitfields is in effect. */
800 if (!(MEM_VOLATILE_P (op0) && flag_strict_volatile_bitfields > 0)
801 /* Do not use insv if the bit region is restricted and
802 an op_mode integer doesn't fit into the restricted region. */
803 && !(bitregion_end
804 && (bitnum - (bitnum % BITS_PER_UNIT)
805 + GET_MODE_BITSIZE (op_mode)
806 > bitregion_end + 1))
807 && store_bit_field_using_insv (op0, bitsize, bitnum, value, op_mode))
808 return true;
809
810 if (bitregion_end)
811 maxbits = bitregion_end - bitregion_start + 1;
812
813 /* Get the mode to use for inserting into this field. If OP0 is
814 BLKmode, get the smallest mode consistent with the alignment. If
815 OP0 is a non-BLKmode object that is no wider than OP_MODE, use its
816 mode. Otherwise, use the smallest mode containing the field. */
817
818 if (GET_MODE (op0) == BLKmode
819 || GET_MODE_BITSIZE (GET_MODE (op0)) > maxbits
820 || GET_MODE_SIZE (GET_MODE (op0)) > GET_MODE_SIZE (op_mode))
821 bestmode = get_best_mode (bitsize, bitnum,
822 bitregion_start, bitregion_end,
823 MEM_ALIGN (op0), op_mode,
824 MEM_VOLATILE_P (op0));
825 else
826 bestmode = GET_MODE (op0);
827
828 if (bestmode != VOIDmode
829 && GET_MODE_SIZE (bestmode) >= GET_MODE_SIZE (fieldmode)
830 && !(SLOW_UNALIGNED_ACCESS (bestmode, MEM_ALIGN (op0))
831 && GET_MODE_BITSIZE (bestmode) > MEM_ALIGN (op0)))
832 {
833 rtx last, tempreg, xop0;
834 unsigned int unit;
835 unsigned HOST_WIDE_INT offset, bitpos;
836
837 last = get_last_insn ();
838
839 /* Adjust address to point to the containing unit of
840 that mode. Compute the offset as a multiple of this unit,
841 counting in bytes. */
842 unit = GET_MODE_BITSIZE (bestmode);
843 offset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
844 bitpos = bitnum % unit;
845 xop0 = adjust_bitfield_address (op0, bestmode, offset);
846
847 /* Fetch that unit, store the bitfield in it, then store
848 the unit. */
849 tempreg = copy_to_reg (xop0);
850 if (store_bit_field_1 (tempreg, bitsize, bitpos,
851 bitregion_start, bitregion_end,
852 fieldmode, orig_value, false))
853 {
854 emit_move_insn (xop0, tempreg);
855 return true;
856 }
857 delete_insns_since (last);
858 }
859 }
860
861 if (!fallback_p)
862 return false;
863
864 store_fixed_bit_field (op0, bitsize, bitnum, bitregion_start,
865 bitregion_end, value);
866 return true;
867 }
868
869 /* Generate code to store value from rtx VALUE
870 into a bit-field within structure STR_RTX
871 containing BITSIZE bits starting at bit BITNUM.
872
873 BITREGION_START is bitpos of the first bitfield in this region.
874 BITREGION_END is the bitpos of the ending bitfield in this region.
875 These two fields are 0, if the C++ memory model does not apply,
876 or we are not interested in keeping track of bitfield regions.
877
878 FIELDMODE is the machine-mode of the FIELD_DECL node for this field. */
879
880 void
881 store_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
882 unsigned HOST_WIDE_INT bitnum,
883 unsigned HOST_WIDE_INT bitregion_start,
884 unsigned HOST_WIDE_INT bitregion_end,
885 enum machine_mode fieldmode,
886 rtx value)
887 {
888 /* Under the C++0x memory model, we must not touch bits outside the
889 bit region. Adjust the address to start at the beginning of the
890 bit region. */
891 if (MEM_P (str_rtx) && bitregion_start > 0)
892 {
893 enum machine_mode bestmode;
894 enum machine_mode op_mode;
895 unsigned HOST_WIDE_INT offset;
896
897 op_mode = mode_for_extraction (EP_insv, 3);
898 if (op_mode == MAX_MACHINE_MODE)
899 op_mode = VOIDmode;
900
901 gcc_assert ((bitregion_start % BITS_PER_UNIT) == 0);
902
903 offset = bitregion_start / BITS_PER_UNIT;
904 bitnum -= bitregion_start;
905 bitregion_end -= bitregion_start;
906 bitregion_start = 0;
907 bestmode = get_best_mode (bitsize, bitnum,
908 bitregion_start, bitregion_end,
909 MEM_ALIGN (str_rtx),
910 op_mode,
911 MEM_VOLATILE_P (str_rtx));
912 str_rtx = adjust_address (str_rtx, bestmode, offset);
913 }
914
915 if (!store_bit_field_1 (str_rtx, bitsize, bitnum,
916 bitregion_start, bitregion_end,
917 fieldmode, value, true))
918 gcc_unreachable ();
919 }
920 \f
921 /* Use shifts and boolean operations to store VALUE into a bit field of
922 width BITSIZE in OP0, starting at bit BITNUM. */
923
924 static void
925 store_fixed_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
926 unsigned HOST_WIDE_INT bitnum,
927 unsigned HOST_WIDE_INT bitregion_start,
928 unsigned HOST_WIDE_INT bitregion_end,
929 rtx value)
930 {
931 enum machine_mode mode;
932 rtx temp;
933 int all_zero = 0;
934 int all_one = 0;
935
936 /* There is a case not handled here:
937 a structure with a known alignment of just a halfword
938 and a field split across two aligned halfwords within the structure.
939 Or likewise a structure with a known alignment of just a byte
940 and a field split across two bytes.
941 Such cases are not supposed to be able to occur. */
942
943 if (MEM_P (op0))
944 {
945 unsigned HOST_WIDE_INT maxbits = MAX_FIXED_MODE_SIZE;
946
947 if (bitregion_end)
948 maxbits = bitregion_end - bitregion_start + 1;
949
950 /* Get the proper mode to use for this field. We want a mode that
951 includes the entire field. If such a mode would be larger than
952 a word, we won't be doing the extraction the normal way.
953 We don't want a mode bigger than the destination. */
954
955 mode = GET_MODE (op0);
956 if (GET_MODE_BITSIZE (mode) == 0
957 || GET_MODE_BITSIZE (mode) > GET_MODE_BITSIZE (word_mode))
958 mode = word_mode;
959
960 if (MEM_VOLATILE_P (op0)
961 && GET_MODE_BITSIZE (GET_MODE (op0)) > 0
962 && GET_MODE_BITSIZE (GET_MODE (op0)) <= maxbits
963 && flag_strict_volatile_bitfields > 0)
964 mode = GET_MODE (op0);
965 else
966 mode = get_best_mode (bitsize, bitnum, bitregion_start, bitregion_end,
967 MEM_ALIGN (op0), mode, MEM_VOLATILE_P (op0));
968
969 if (mode == VOIDmode)
970 {
971 /* The only way this should occur is if the field spans word
972 boundaries. */
973 store_split_bit_field (op0, bitsize, bitnum, bitregion_start,
974 bitregion_end, value);
975 return;
976 }
977
978 HOST_WIDE_INT bit_offset = bitnum - bitnum % GET_MODE_BITSIZE (mode);
979 op0 = adjust_bitfield_address (op0, mode, bit_offset / BITS_PER_UNIT);
980 bitnum -= bit_offset;
981 }
982
983 mode = GET_MODE (op0);
984 gcc_assert (SCALAR_INT_MODE_P (mode));
985
986 /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode)
987 for invalid input, such as f5 from gcc.dg/pr48335-2.c. */
988
989 if (BYTES_BIG_ENDIAN)
990 /* BITNUM is the distance between our msb
991 and that of the containing datum.
992 Convert it to the distance from the lsb. */
993 bitnum = GET_MODE_BITSIZE (mode) - bitsize - bitnum;
994
995 /* Now BITNUM is always the distance between our lsb
996 and that of OP0. */
997
998 /* Shift VALUE left by BITNUM bits. If VALUE is not constant,
999 we must first convert its mode to MODE. */
1000
1001 if (CONST_INT_P (value))
1002 {
1003 HOST_WIDE_INT v = INTVAL (value);
1004
1005 if (bitsize < HOST_BITS_PER_WIDE_INT)
1006 v &= ((HOST_WIDE_INT) 1 << bitsize) - 1;
1007
1008 if (v == 0)
1009 all_zero = 1;
1010 else if ((bitsize < HOST_BITS_PER_WIDE_INT
1011 && v == ((HOST_WIDE_INT) 1 << bitsize) - 1)
1012 || (bitsize == HOST_BITS_PER_WIDE_INT && v == -1))
1013 all_one = 1;
1014
1015 value = lshift_value (mode, value, bitnum, bitsize);
1016 }
1017 else
1018 {
1019 int must_and = (GET_MODE_BITSIZE (GET_MODE (value)) != bitsize
1020 && bitnum + bitsize != GET_MODE_BITSIZE (mode));
1021
1022 if (GET_MODE (value) != mode)
1023 value = convert_to_mode (mode, value, 1);
1024
1025 if (must_and)
1026 value = expand_binop (mode, and_optab, value,
1027 mask_rtx (mode, 0, bitsize, 0),
1028 NULL_RTX, 1, OPTAB_LIB_WIDEN);
1029 if (bitnum > 0)
1030 value = expand_shift (LSHIFT_EXPR, mode, value,
1031 bitnum, NULL_RTX, 1);
1032 }
1033
1034 /* Now clear the chosen bits in OP0,
1035 except that if VALUE is -1 we need not bother. */
1036 /* We keep the intermediates in registers to allow CSE to combine
1037 consecutive bitfield assignments. */
1038
1039 temp = force_reg (mode, op0);
1040
1041 if (! all_one)
1042 {
1043 temp = expand_binop (mode, and_optab, temp,
1044 mask_rtx (mode, bitnum, bitsize, 1),
1045 NULL_RTX, 1, OPTAB_LIB_WIDEN);
1046 temp = force_reg (mode, temp);
1047 }
1048
1049 /* Now logical-or VALUE into OP0, unless it is zero. */
1050
1051 if (! all_zero)
1052 {
1053 temp = expand_binop (mode, ior_optab, temp, value,
1054 NULL_RTX, 1, OPTAB_LIB_WIDEN);
1055 temp = force_reg (mode, temp);
1056 }
1057
1058 if (op0 != temp)
1059 {
1060 op0 = copy_rtx (op0);
1061 emit_move_insn (op0, temp);
1062 }
1063 }
1064 \f
1065 /* Store a bit field that is split across multiple accessible memory objects.
1066
1067 OP0 is the REG, SUBREG or MEM rtx for the first of the objects.
1068 BITSIZE is the field width; BITPOS the position of its first bit
1069 (within the word).
1070 VALUE is the value to store.
1071
1072 This does not yet handle fields wider than BITS_PER_WORD. */
1073
1074 static void
1075 store_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1076 unsigned HOST_WIDE_INT bitpos,
1077 unsigned HOST_WIDE_INT bitregion_start,
1078 unsigned HOST_WIDE_INT bitregion_end,
1079 rtx value)
1080 {
1081 unsigned int unit;
1082 unsigned int bitsdone = 0;
1083
1084 /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1085 much at a time. */
1086 if (REG_P (op0) || GET_CODE (op0) == SUBREG)
1087 unit = BITS_PER_WORD;
1088 else
1089 unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1090
1091 /* If VALUE is a constant other than a CONST_INT, get it into a register in
1092 WORD_MODE. If we can do this using gen_lowpart_common, do so. Note
1093 that VALUE might be a floating-point constant. */
1094 if (CONSTANT_P (value) && !CONST_INT_P (value))
1095 {
1096 rtx word = gen_lowpart_common (word_mode, value);
1097
1098 if (word && (value != word))
1099 value = word;
1100 else
1101 value = gen_lowpart_common (word_mode,
1102 force_reg (GET_MODE (value) != VOIDmode
1103 ? GET_MODE (value)
1104 : word_mode, value));
1105 }
1106
1107 while (bitsdone < bitsize)
1108 {
1109 unsigned HOST_WIDE_INT thissize;
1110 rtx part, word;
1111 unsigned HOST_WIDE_INT thispos;
1112 unsigned HOST_WIDE_INT offset;
1113
1114 offset = (bitpos + bitsdone) / unit;
1115 thispos = (bitpos + bitsdone) % unit;
1116
1117 /* When region of bytes we can touch is restricted, decrease
1118 UNIT close to the end of the region as needed. */
1119 if (bitregion_end
1120 && unit > BITS_PER_UNIT
1121 && bitpos + bitsdone - thispos + unit > bitregion_end + 1)
1122 {
1123 unit = unit / 2;
1124 continue;
1125 }
1126
1127 /* THISSIZE must not overrun a word boundary. Otherwise,
1128 store_fixed_bit_field will call us again, and we will mutually
1129 recurse forever. */
1130 thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1131 thissize = MIN (thissize, unit - thispos);
1132
1133 if (BYTES_BIG_ENDIAN)
1134 {
1135 /* Fetch successively less significant portions. */
1136 if (CONST_INT_P (value))
1137 part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1138 >> (bitsize - bitsdone - thissize))
1139 & (((HOST_WIDE_INT) 1 << thissize) - 1));
1140 else
1141 {
1142 int total_bits = GET_MODE_BITSIZE (GET_MODE (value));
1143 /* The args are chosen so that the last part includes the
1144 lsb. Give extract_bit_field the value it needs (with
1145 endianness compensation) to fetch the piece we want. */
1146 part = extract_fixed_bit_field (word_mode, value, thissize,
1147 total_bits - bitsize + bitsdone,
1148 NULL_RTX, 1, false);
1149 }
1150 }
1151 else
1152 {
1153 /* Fetch successively more significant portions. */
1154 if (CONST_INT_P (value))
1155 part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1156 >> bitsdone)
1157 & (((HOST_WIDE_INT) 1 << thissize) - 1));
1158 else
1159 part = extract_fixed_bit_field (word_mode, value, thissize,
1160 bitsdone, NULL_RTX, 1, false);
1161 }
1162
1163 /* If OP0 is a register, then handle OFFSET here.
1164
1165 When handling multiword bitfields, extract_bit_field may pass
1166 down a word_mode SUBREG of a larger REG for a bitfield that actually
1167 crosses a word boundary. Thus, for a SUBREG, we must find
1168 the current word starting from the base register. */
1169 if (GET_CODE (op0) == SUBREG)
1170 {
1171 int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset;
1172 enum machine_mode sub_mode = GET_MODE (SUBREG_REG (op0));
1173 if (sub_mode != BLKmode && GET_MODE_SIZE (sub_mode) < UNITS_PER_WORD)
1174 word = word_offset ? const0_rtx : op0;
1175 else
1176 word = operand_subword_force (SUBREG_REG (op0), word_offset,
1177 GET_MODE (SUBREG_REG (op0)));
1178 offset = 0;
1179 }
1180 else if (REG_P (op0))
1181 {
1182 enum machine_mode op0_mode = GET_MODE (op0);
1183 if (op0_mode != BLKmode && GET_MODE_SIZE (op0_mode) < UNITS_PER_WORD)
1184 word = offset ? const0_rtx : op0;
1185 else
1186 word = operand_subword_force (op0, offset, GET_MODE (op0));
1187 offset = 0;
1188 }
1189 else
1190 word = op0;
1191
1192 /* OFFSET is in UNITs, and UNIT is in bits. If WORD is const0_rtx,
1193 it is just an out-of-bounds access. Ignore it. */
1194 if (word != const0_rtx)
1195 store_fixed_bit_field (word, thissize, offset * unit + thispos,
1196 bitregion_start, bitregion_end, part);
1197 bitsdone += thissize;
1198 }
1199 }
1200 \f
1201 /* A subroutine of extract_bit_field_1 that converts return value X
1202 to either MODE or TMODE. MODE, TMODE and UNSIGNEDP are arguments
1203 to extract_bit_field. */
1204
1205 static rtx
1206 convert_extracted_bit_field (rtx x, enum machine_mode mode,
1207 enum machine_mode tmode, bool unsignedp)
1208 {
1209 if (GET_MODE (x) == tmode || GET_MODE (x) == mode)
1210 return x;
1211
1212 /* If the x mode is not a scalar integral, first convert to the
1213 integer mode of that size and then access it as a floating-point
1214 value via a SUBREG. */
1215 if (!SCALAR_INT_MODE_P (tmode))
1216 {
1217 enum machine_mode smode;
1218
1219 smode = mode_for_size (GET_MODE_BITSIZE (tmode), MODE_INT, 0);
1220 x = convert_to_mode (smode, x, unsignedp);
1221 x = force_reg (smode, x);
1222 return gen_lowpart (tmode, x);
1223 }
1224
1225 return convert_to_mode (tmode, x, unsignedp);
1226 }
1227
1228 /* Try to use an ext(z)v pattern to extract a field from OP0.
1229 Return the extracted value on success, otherwise return null.
1230 EXT_MODE is the mode of the extraction and the other arguments
1231 are as for extract_bit_field. */
1232
1233 static rtx
1234 extract_bit_field_using_extv (rtx op0, unsigned HOST_WIDE_INT bitsize,
1235 unsigned HOST_WIDE_INT bitnum,
1236 int unsignedp, rtx target,
1237 enum machine_mode mode, enum machine_mode tmode,
1238 enum machine_mode ext_mode)
1239 {
1240 struct expand_operand ops[4];
1241 rtx spec_target = target;
1242 rtx spec_target_subreg = 0;
1243 unsigned unit = GET_MODE_BITSIZE (ext_mode);
1244
1245 if (bitsize == 0 || unit < bitsize)
1246 return NULL_RTX;
1247
1248 if (MEM_P (op0))
1249 {
1250 /* Get a reference to the first byte of the field. */
1251 op0 = adjust_bitfield_address (op0, byte_mode, bitnum / BITS_PER_UNIT);
1252 bitnum %= BITS_PER_UNIT;
1253 }
1254 else
1255 {
1256 /* Convert from counting within OP0 to counting in EXT_MODE. */
1257 if (BYTES_BIG_ENDIAN)
1258 bitnum += unit - GET_MODE_BITSIZE (GET_MODE (op0));
1259
1260 /* If op0 is a register, we need it in EXT_MODE to make it
1261 acceptable to the format of ext(z)v. */
1262 if (GET_CODE (op0) == SUBREG && GET_MODE (op0) != ext_mode)
1263 return NULL_RTX;
1264 if (REG_P (op0) && GET_MODE (op0) != ext_mode)
1265 op0 = gen_lowpart_SUBREG (ext_mode, op0);
1266 }
1267
1268 /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
1269 "backwards" from the size of the unit we are extracting from.
1270 Otherwise, we count bits from the most significant on a
1271 BYTES/BITS_BIG_ENDIAN machine. */
1272
1273 if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1274 bitnum = unit - bitsize - bitnum;
1275
1276 if (target == 0)
1277 target = spec_target = gen_reg_rtx (tmode);
1278
1279 if (GET_MODE (target) != ext_mode)
1280 {
1281 /* Don't use LHS paradoxical subreg if explicit truncation is needed
1282 between the mode of the extraction (word_mode) and the target
1283 mode. Instead, create a temporary and use convert_move to set
1284 the target. */
1285 if (REG_P (target)
1286 && TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (target), ext_mode))
1287 {
1288 target = gen_lowpart (ext_mode, target);
1289 if (GET_MODE_PRECISION (ext_mode)
1290 > GET_MODE_PRECISION (GET_MODE (spec_target)))
1291 spec_target_subreg = target;
1292 }
1293 else
1294 target = gen_reg_rtx (ext_mode);
1295 }
1296
1297 create_output_operand (&ops[0], target, ext_mode);
1298 create_fixed_operand (&ops[1], op0);
1299 create_integer_operand (&ops[2], bitsize);
1300 create_integer_operand (&ops[3], bitnum);
1301 if (maybe_expand_insn (unsignedp ? CODE_FOR_extzv : CODE_FOR_extv, 4, ops))
1302 {
1303 target = ops[0].value;
1304 if (target == spec_target)
1305 return target;
1306 if (target == spec_target_subreg)
1307 return spec_target;
1308 return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1309 }
1310 return NULL_RTX;
1311 }
1312
1313 /* A subroutine of extract_bit_field, with the same arguments.
1314 If FALLBACK_P is true, fall back to extract_fixed_bit_field
1315 if we can find no other means of implementing the operation.
1316 if FALLBACK_P is false, return NULL instead. */
1317
1318 static rtx
1319 extract_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1320 unsigned HOST_WIDE_INT bitnum,
1321 int unsignedp, bool packedp, rtx target,
1322 enum machine_mode mode, enum machine_mode tmode,
1323 bool fallback_p)
1324 {
1325 rtx op0 = str_rtx;
1326 enum machine_mode int_mode;
1327 enum machine_mode ext_mode;
1328 enum machine_mode mode1;
1329
1330 if (tmode == VOIDmode)
1331 tmode = mode;
1332
1333 while (GET_CODE (op0) == SUBREG)
1334 {
1335 bitnum += SUBREG_BYTE (op0) * BITS_PER_UNIT;
1336 op0 = SUBREG_REG (op0);
1337 }
1338
1339 /* If we have an out-of-bounds access to a register, just return an
1340 uninitialized register of the required mode. This can occur if the
1341 source code contains an out-of-bounds access to a small array. */
1342 if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
1343 return gen_reg_rtx (tmode);
1344
1345 if (REG_P (op0)
1346 && mode == GET_MODE (op0)
1347 && bitnum == 0
1348 && bitsize == GET_MODE_BITSIZE (GET_MODE (op0)))
1349 {
1350 /* We're trying to extract a full register from itself. */
1351 return op0;
1352 }
1353
1354 /* See if we can get a better vector mode before extracting. */
1355 if (VECTOR_MODE_P (GET_MODE (op0))
1356 && !MEM_P (op0)
1357 && GET_MODE_INNER (GET_MODE (op0)) != tmode)
1358 {
1359 enum machine_mode new_mode;
1360
1361 if (GET_MODE_CLASS (tmode) == MODE_FLOAT)
1362 new_mode = MIN_MODE_VECTOR_FLOAT;
1363 else if (GET_MODE_CLASS (tmode) == MODE_FRACT)
1364 new_mode = MIN_MODE_VECTOR_FRACT;
1365 else if (GET_MODE_CLASS (tmode) == MODE_UFRACT)
1366 new_mode = MIN_MODE_VECTOR_UFRACT;
1367 else if (GET_MODE_CLASS (tmode) == MODE_ACCUM)
1368 new_mode = MIN_MODE_VECTOR_ACCUM;
1369 else if (GET_MODE_CLASS (tmode) == MODE_UACCUM)
1370 new_mode = MIN_MODE_VECTOR_UACCUM;
1371 else
1372 new_mode = MIN_MODE_VECTOR_INT;
1373
1374 for (; new_mode != VOIDmode ; new_mode = GET_MODE_WIDER_MODE (new_mode))
1375 if (GET_MODE_SIZE (new_mode) == GET_MODE_SIZE (GET_MODE (op0))
1376 && targetm.vector_mode_supported_p (new_mode))
1377 break;
1378 if (new_mode != VOIDmode)
1379 op0 = gen_lowpart (new_mode, op0);
1380 }
1381
1382 /* Use vec_extract patterns for extracting parts of vectors whenever
1383 available. */
1384 if (VECTOR_MODE_P (GET_MODE (op0))
1385 && !MEM_P (op0)
1386 && optab_handler (vec_extract_optab, GET_MODE (op0)) != CODE_FOR_nothing
1387 && ((bitnum + bitsize - 1) / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
1388 == bitnum / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
1389 {
1390 struct expand_operand ops[3];
1391 enum machine_mode outermode = GET_MODE (op0);
1392 enum machine_mode innermode = GET_MODE_INNER (outermode);
1393 enum insn_code icode = optab_handler (vec_extract_optab, outermode);
1394 unsigned HOST_WIDE_INT pos = bitnum / GET_MODE_BITSIZE (innermode);
1395
1396 create_output_operand (&ops[0], target, innermode);
1397 create_input_operand (&ops[1], op0, outermode);
1398 create_integer_operand (&ops[2], pos);
1399 if (maybe_expand_insn (icode, 3, ops))
1400 {
1401 target = ops[0].value;
1402 if (GET_MODE (target) != mode)
1403 return gen_lowpart (tmode, target);
1404 return target;
1405 }
1406 }
1407
1408 /* Make sure we are playing with integral modes. Pun with subregs
1409 if we aren't. */
1410 {
1411 enum machine_mode imode = int_mode_for_mode (GET_MODE (op0));
1412 if (imode != GET_MODE (op0))
1413 {
1414 if (MEM_P (op0))
1415 op0 = adjust_bitfield_address (op0, imode, 0);
1416 else if (imode != BLKmode)
1417 {
1418 op0 = gen_lowpart (imode, op0);
1419
1420 /* If we got a SUBREG, force it into a register since we
1421 aren't going to be able to do another SUBREG on it. */
1422 if (GET_CODE (op0) == SUBREG)
1423 op0 = force_reg (imode, op0);
1424 }
1425 else if (REG_P (op0))
1426 {
1427 rtx reg, subreg;
1428 imode = smallest_mode_for_size (GET_MODE_BITSIZE (GET_MODE (op0)),
1429 MODE_INT);
1430 reg = gen_reg_rtx (imode);
1431 subreg = gen_lowpart_SUBREG (GET_MODE (op0), reg);
1432 emit_move_insn (subreg, op0);
1433 op0 = reg;
1434 bitnum += SUBREG_BYTE (subreg) * BITS_PER_UNIT;
1435 }
1436 else
1437 {
1438 rtx mem = assign_stack_temp (GET_MODE (op0),
1439 GET_MODE_SIZE (GET_MODE (op0)));
1440 emit_move_insn (mem, op0);
1441 op0 = adjust_bitfield_address (mem, BLKmode, 0);
1442 }
1443 }
1444 }
1445
1446 /* ??? We currently assume TARGET is at least as big as BITSIZE.
1447 If that's wrong, the solution is to test for it and set TARGET to 0
1448 if needed. */
1449
1450 /* If the bitfield is volatile, we need to make sure the access
1451 remains on a type-aligned boundary. */
1452 if (GET_CODE (op0) == MEM
1453 && MEM_VOLATILE_P (op0)
1454 && GET_MODE_BITSIZE (GET_MODE (op0)) > 0
1455 && flag_strict_volatile_bitfields > 0)
1456 goto no_subreg_mode_swap;
1457
1458 /* Only scalar integer modes can be converted via subregs. There is an
1459 additional problem for FP modes here in that they can have a precision
1460 which is different from the size. mode_for_size uses precision, but
1461 we want a mode based on the size, so we must avoid calling it for FP
1462 modes. */
1463 mode1 = mode;
1464 if (SCALAR_INT_MODE_P (tmode))
1465 {
1466 enum machine_mode try_mode = mode_for_size (bitsize,
1467 GET_MODE_CLASS (tmode), 0);
1468 if (try_mode != BLKmode)
1469 mode1 = try_mode;
1470 }
1471 gcc_assert (mode1 != BLKmode);
1472
1473 /* Extraction of a full MODE1 value can be done with a subreg as long
1474 as the least significant bit of the value is the least significant
1475 bit of either OP0 or a word of OP0. */
1476 if (!MEM_P (op0)
1477 && lowpart_bit_field_p (bitnum, bitsize, GET_MODE (op0))
1478 && bitsize == GET_MODE_BITSIZE (mode1)
1479 && TRULY_NOOP_TRUNCATION_MODES_P (mode1, GET_MODE (op0)))
1480 {
1481 rtx sub = simplify_gen_subreg (mode1, op0, GET_MODE (op0),
1482 bitnum / BITS_PER_UNIT);
1483 if (sub)
1484 return convert_extracted_bit_field (sub, mode, tmode, unsignedp);
1485 }
1486
1487 /* Extraction of a full MODE1 value can be done with a load as long as
1488 the field is on a byte boundary and is sufficiently aligned. */
1489 if (MEM_P (op0)
1490 && bitnum % BITS_PER_UNIT == 0
1491 && bitsize == GET_MODE_BITSIZE (mode1)
1492 && (!SLOW_UNALIGNED_ACCESS (mode1, MEM_ALIGN (op0))
1493 || (bitnum % bitsize == 0
1494 && MEM_ALIGN (op0) % bitsize == 0)))
1495 {
1496 op0 = adjust_bitfield_address (op0, mode1, bitnum / BITS_PER_UNIT);
1497 return convert_extracted_bit_field (op0, mode, tmode, unsignedp);
1498 }
1499
1500 no_subreg_mode_swap:
1501
1502 /* Handle fields bigger than a word. */
1503
1504 if (bitsize > BITS_PER_WORD)
1505 {
1506 /* Here we transfer the words of the field
1507 in the order least significant first.
1508 This is because the most significant word is the one which may
1509 be less than full. */
1510
1511 unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
1512 unsigned int i;
1513 rtx last;
1514
1515 if (target == 0 || !REG_P (target) || !valid_multiword_target_p (target))
1516 target = gen_reg_rtx (mode);
1517
1518 /* Indicate for flow that the entire target reg is being set. */
1519 emit_clobber (target);
1520
1521 last = get_last_insn ();
1522 for (i = 0; i < nwords; i++)
1523 {
1524 /* If I is 0, use the low-order word in both field and target;
1525 if I is 1, use the next to lowest word; and so on. */
1526 /* Word number in TARGET to use. */
1527 unsigned int wordnum
1528 = (WORDS_BIG_ENDIAN
1529 ? GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD - i - 1
1530 : i);
1531 /* Offset from start of field in OP0. */
1532 unsigned int bit_offset = (WORDS_BIG_ENDIAN
1533 ? MAX (0, ((int) bitsize - ((int) i + 1)
1534 * (int) BITS_PER_WORD))
1535 : (int) i * BITS_PER_WORD);
1536 rtx target_part = operand_subword (target, wordnum, 1, VOIDmode);
1537 rtx result_part
1538 = extract_bit_field_1 (op0, MIN (BITS_PER_WORD,
1539 bitsize - i * BITS_PER_WORD),
1540 bitnum + bit_offset, 1, false, target_part,
1541 mode, word_mode, fallback_p);
1542
1543 gcc_assert (target_part);
1544 if (!result_part)
1545 {
1546 delete_insns_since (last);
1547 return NULL;
1548 }
1549
1550 if (result_part != target_part)
1551 emit_move_insn (target_part, result_part);
1552 }
1553
1554 if (unsignedp)
1555 {
1556 /* Unless we've filled TARGET, the upper regs in a multi-reg value
1557 need to be zero'd out. */
1558 if (GET_MODE_SIZE (GET_MODE (target)) > nwords * UNITS_PER_WORD)
1559 {
1560 unsigned int i, total_words;
1561
1562 total_words = GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD;
1563 for (i = nwords; i < total_words; i++)
1564 emit_move_insn
1565 (operand_subword (target,
1566 WORDS_BIG_ENDIAN ? total_words - i - 1 : i,
1567 1, VOIDmode),
1568 const0_rtx);
1569 }
1570 return target;
1571 }
1572
1573 /* Signed bit field: sign-extend with two arithmetic shifts. */
1574 target = expand_shift (LSHIFT_EXPR, mode, target,
1575 GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1576 return expand_shift (RSHIFT_EXPR, mode, target,
1577 GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1578 }
1579
1580 /* If OP0 is a multi-word register, narrow it to the affected word.
1581 If the region spans two words, defer to extract_split_bit_field. */
1582 if (!MEM_P (op0) && GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
1583 {
1584 op0 = simplify_gen_subreg (word_mode, op0, GET_MODE (op0),
1585 bitnum / BITS_PER_WORD * UNITS_PER_WORD);
1586 bitnum %= BITS_PER_WORD;
1587 if (bitnum + bitsize > BITS_PER_WORD)
1588 {
1589 if (!fallback_p)
1590 return NULL_RTX;
1591 target = extract_split_bit_field (op0, bitsize, bitnum, unsignedp);
1592 return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1593 }
1594 }
1595
1596 /* From here on we know the desired field is smaller than a word.
1597 If OP0 is a register, it too fits within a word. */
1598
1599 ext_mode = mode_for_extraction (unsignedp ? EP_extzv : EP_extv, 0);
1600 if (ext_mode != MAX_MACHINE_MODE && !MEM_P (op0))
1601 {
1602 rtx result = extract_bit_field_using_extv (op0, bitsize, bitnum,
1603 unsignedp, target, mode,
1604 tmode, ext_mode);
1605 if (result)
1606 return result;
1607 }
1608
1609 /* If OP0 is a memory, try copying it to a register and seeing if a
1610 cheap register alternative is available. */
1611 if (ext_mode != MAX_MACHINE_MODE && MEM_P (op0))
1612 {
1613 enum machine_mode bestmode;
1614
1615 /* Do not use extv/extzv for volatile bitfields when
1616 -fstrict-volatile-bitfields is in effect. */
1617 if (!(MEM_VOLATILE_P (op0) && flag_strict_volatile_bitfields > 0))
1618 {
1619 rtx result = extract_bit_field_using_extv (op0, bitsize, bitnum,
1620 unsignedp, target, mode,
1621 tmode, ext_mode);
1622 if (result)
1623 return result;
1624 }
1625
1626 /* Get the mode to use for inserting into this field. If
1627 OP0 is BLKmode, get the smallest mode consistent with the
1628 alignment. If OP0 is a non-BLKmode object that is no
1629 wider than EXT_MODE, use its mode. Otherwise, use the
1630 smallest mode containing the field. */
1631
1632 if (GET_MODE (op0) == BLKmode
1633 || GET_MODE_SIZE (GET_MODE (op0)) > GET_MODE_SIZE (ext_mode))
1634 bestmode = get_best_mode (bitsize, bitnum, 0, 0, MEM_ALIGN (op0),
1635 ext_mode, MEM_VOLATILE_P (op0));
1636 else
1637 bestmode = GET_MODE (op0);
1638
1639 if (bestmode != VOIDmode
1640 && !(SLOW_UNALIGNED_ACCESS (bestmode, MEM_ALIGN (op0))
1641 && GET_MODE_BITSIZE (bestmode) > MEM_ALIGN (op0)))
1642 {
1643 unsigned HOST_WIDE_INT offset, bitpos;
1644
1645 /* Compute the offset as a multiple of this unit,
1646 counting in bytes. */
1647 unsigned int unit = GET_MODE_BITSIZE (bestmode);
1648 offset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
1649 bitpos = bitnum % unit;
1650
1651 /* Make sure the register is big enough for the whole field. */
1652 if (bitpos + bitsize <= unit)
1653 {
1654 rtx last, result, xop0;
1655
1656 last = get_last_insn ();
1657
1658 /* Fetch it to a register in that size. */
1659 xop0 = adjust_bitfield_address (op0, bestmode, offset);
1660 xop0 = force_reg (bestmode, xop0);
1661 result = extract_bit_field_1 (xop0, bitsize, bitpos,
1662 unsignedp, packedp, target,
1663 mode, tmode, false);
1664 if (result)
1665 return result;
1666
1667 delete_insns_since (last);
1668 }
1669 }
1670 }
1671
1672 if (!fallback_p)
1673 return NULL;
1674
1675 /* Find a correspondingly-sized integer field, so we can apply
1676 shifts and masks to it. */
1677 int_mode = int_mode_for_mode (tmode);
1678 if (int_mode == BLKmode)
1679 int_mode = int_mode_for_mode (mode);
1680 /* Should probably push op0 out to memory and then do a load. */
1681 gcc_assert (int_mode != BLKmode);
1682
1683 target = extract_fixed_bit_field (int_mode, op0, bitsize, bitnum,
1684 target, unsignedp, packedp);
1685 return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1686 }
1687
1688 /* Generate code to extract a byte-field from STR_RTX
1689 containing BITSIZE bits, starting at BITNUM,
1690 and put it in TARGET if possible (if TARGET is nonzero).
1691 Regardless of TARGET, we return the rtx for where the value is placed.
1692
1693 STR_RTX is the structure containing the byte (a REG or MEM).
1694 UNSIGNEDP is nonzero if this is an unsigned bit field.
1695 PACKEDP is nonzero if the field has the packed attribute.
1696 MODE is the natural mode of the field value once extracted.
1697 TMODE is the mode the caller would like the value to have;
1698 but the value may be returned with type MODE instead.
1699
1700 If a TARGET is specified and we can store in it at no extra cost,
1701 we do so, and return TARGET.
1702 Otherwise, we return a REG of mode TMODE or MODE, with TMODE preferred
1703 if they are equally easy. */
1704
1705 rtx
1706 extract_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1707 unsigned HOST_WIDE_INT bitnum, int unsignedp, bool packedp,
1708 rtx target, enum machine_mode mode, enum machine_mode tmode)
1709 {
1710 return extract_bit_field_1 (str_rtx, bitsize, bitnum, unsignedp, packedp,
1711 target, mode, tmode, true);
1712 }
1713 \f
1714 /* Use shifts and boolean operations to extract a field of BITSIZE bits
1715 from bit BITNUM of OP0.
1716
1717 UNSIGNEDP is nonzero for an unsigned bit field (don't sign-extend value).
1718 PACKEDP is true if the field has the packed attribute.
1719
1720 If TARGET is nonzero, attempts to store the value there
1721 and return TARGET, but this is not guaranteed.
1722 If TARGET is not used, create a pseudo-reg of mode TMODE for the value. */
1723
1724 static rtx
1725 extract_fixed_bit_field (enum machine_mode tmode, rtx op0,
1726 unsigned HOST_WIDE_INT bitsize,
1727 unsigned HOST_WIDE_INT bitnum, rtx target,
1728 int unsignedp, bool packedp)
1729 {
1730 enum machine_mode mode;
1731
1732 if (MEM_P (op0))
1733 {
1734 /* Get the proper mode to use for this field. We want a mode that
1735 includes the entire field. If such a mode would be larger than
1736 a word, we won't be doing the extraction the normal way. */
1737
1738 if (MEM_VOLATILE_P (op0)
1739 && flag_strict_volatile_bitfields > 0)
1740 {
1741 if (GET_MODE_BITSIZE (GET_MODE (op0)) > 0)
1742 mode = GET_MODE (op0);
1743 else if (target && GET_MODE_BITSIZE (GET_MODE (target)) > 0)
1744 mode = GET_MODE (target);
1745 else
1746 mode = tmode;
1747 }
1748 else
1749 mode = get_best_mode (bitsize, bitnum, 0, 0,
1750 MEM_ALIGN (op0), word_mode, MEM_VOLATILE_P (op0));
1751
1752 if (mode == VOIDmode)
1753 /* The only way this should occur is if the field spans word
1754 boundaries. */
1755 return extract_split_bit_field (op0, bitsize, bitnum, unsignedp);
1756
1757 unsigned int total_bits = GET_MODE_BITSIZE (mode);
1758 HOST_WIDE_INT bit_offset = bitnum - bitnum % total_bits;
1759
1760 /* If we're accessing a volatile MEM, we can't apply BIT_OFFSET
1761 if it results in a multi-word access where we otherwise wouldn't
1762 have one. So, check for that case here. */
1763 if (MEM_P (op0)
1764 && MEM_VOLATILE_P (op0)
1765 && flag_strict_volatile_bitfields > 0
1766 && bitnum % BITS_PER_UNIT + bitsize <= total_bits
1767 && bitnum % GET_MODE_BITSIZE (mode) + bitsize > total_bits)
1768 {
1769 if (STRICT_ALIGNMENT)
1770 {
1771 static bool informed_about_misalignment = false;
1772
1773 if (packedp)
1774 {
1775 if (bitsize == total_bits)
1776 warning_at (input_location, OPT_fstrict_volatile_bitfields,
1777 "multiple accesses to volatile structure"
1778 " member because of packed attribute");
1779 else
1780 warning_at (input_location, OPT_fstrict_volatile_bitfields,
1781 "multiple accesses to volatile structure"
1782 " bitfield because of packed attribute");
1783
1784 return extract_split_bit_field (op0, bitsize, bitnum,
1785 unsignedp);
1786 }
1787
1788 if (bitsize == total_bits)
1789 warning_at (input_location, OPT_fstrict_volatile_bitfields,
1790 "mis-aligned access used for structure member");
1791 else
1792 warning_at (input_location, OPT_fstrict_volatile_bitfields,
1793 "mis-aligned access used for structure bitfield");
1794
1795 if (! informed_about_misalignment)
1796 {
1797 informed_about_misalignment = true;
1798 inform (input_location,
1799 "when a volatile object spans multiple type-sized"
1800 " locations, the compiler must choose between using"
1801 " a single mis-aligned access to preserve the"
1802 " volatility, or using multiple aligned accesses"
1803 " to avoid runtime faults; this code may fail at"
1804 " runtime if the hardware does not allow this"
1805 " access");
1806 }
1807 }
1808 bit_offset = bitnum - bitnum % BITS_PER_UNIT;
1809 }
1810 op0 = adjust_bitfield_address (op0, mode, bit_offset / BITS_PER_UNIT);
1811 bitnum -= bit_offset;
1812 }
1813
1814 mode = GET_MODE (op0);
1815 gcc_assert (SCALAR_INT_MODE_P (mode));
1816
1817 /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode)
1818 for invalid input, such as extract equivalent of f5 from
1819 gcc.dg/pr48335-2.c. */
1820
1821 if (BYTES_BIG_ENDIAN)
1822 /* BITNUM is the distance between our msb and that of OP0.
1823 Convert it to the distance from the lsb. */
1824 bitnum = GET_MODE_BITSIZE (mode) - bitsize - bitnum;
1825
1826 /* Now BITNUM is always the distance between the field's lsb and that of OP0.
1827 We have reduced the big-endian case to the little-endian case. */
1828
1829 if (unsignedp)
1830 {
1831 if (bitnum)
1832 {
1833 /* If the field does not already start at the lsb,
1834 shift it so it does. */
1835 /* Maybe propagate the target for the shift. */
1836 rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
1837 if (tmode != mode)
1838 subtarget = 0;
1839 op0 = expand_shift (RSHIFT_EXPR, mode, op0, bitnum, subtarget, 1);
1840 }
1841 /* Convert the value to the desired mode. */
1842 if (mode != tmode)
1843 op0 = convert_to_mode (tmode, op0, 1);
1844
1845 /* Unless the msb of the field used to be the msb when we shifted,
1846 mask out the upper bits. */
1847
1848 if (GET_MODE_BITSIZE (mode) != bitnum + bitsize)
1849 return expand_binop (GET_MODE (op0), and_optab, op0,
1850 mask_rtx (GET_MODE (op0), 0, bitsize, 0),
1851 target, 1, OPTAB_LIB_WIDEN);
1852 return op0;
1853 }
1854
1855 /* To extract a signed bit-field, first shift its msb to the msb of the word,
1856 then arithmetic-shift its lsb to the lsb of the word. */
1857 op0 = force_reg (mode, op0);
1858
1859 /* Find the narrowest integer mode that contains the field. */
1860
1861 for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT); mode != VOIDmode;
1862 mode = GET_MODE_WIDER_MODE (mode))
1863 if (GET_MODE_BITSIZE (mode) >= bitsize + bitnum)
1864 {
1865 op0 = convert_to_mode (mode, op0, 0);
1866 break;
1867 }
1868
1869 if (mode != tmode)
1870 target = 0;
1871
1872 if (GET_MODE_BITSIZE (mode) != (bitsize + bitnum))
1873 {
1874 int amount = GET_MODE_BITSIZE (mode) - (bitsize + bitnum);
1875 /* Maybe propagate the target for the shift. */
1876 rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
1877 op0 = expand_shift (LSHIFT_EXPR, mode, op0, amount, subtarget, 1);
1878 }
1879
1880 return expand_shift (RSHIFT_EXPR, mode, op0,
1881 GET_MODE_BITSIZE (mode) - bitsize, target, 0);
1882 }
1883 \f
1884 /* Return a constant integer (CONST_INT or CONST_DOUBLE) mask value
1885 of mode MODE with BITSIZE ones followed by BITPOS zeros, or the
1886 complement of that if COMPLEMENT. The mask is truncated if
1887 necessary to the width of mode MODE. The mask is zero-extended if
1888 BITSIZE+BITPOS is too small for MODE. */
1889
1890 static rtx
1891 mask_rtx (enum machine_mode mode, int bitpos, int bitsize, int complement)
1892 {
1893 double_int mask;
1894
1895 mask = double_int::mask (bitsize);
1896 mask = mask.llshift (bitpos, HOST_BITS_PER_DOUBLE_INT);
1897
1898 if (complement)
1899 mask = ~mask;
1900
1901 return immed_double_int_const (mask, mode);
1902 }
1903
1904 /* Return a constant integer (CONST_INT or CONST_DOUBLE) rtx with the value
1905 VALUE truncated to BITSIZE bits and then shifted left BITPOS bits. */
1906
1907 static rtx
1908 lshift_value (enum machine_mode mode, rtx value, int bitpos, int bitsize)
1909 {
1910 double_int val;
1911
1912 val = double_int::from_uhwi (INTVAL (value)).zext (bitsize);
1913 val = val.llshift (bitpos, HOST_BITS_PER_DOUBLE_INT);
1914
1915 return immed_double_int_const (val, mode);
1916 }
1917 \f
1918 /* Extract a bit field that is split across two words
1919 and return an RTX for the result.
1920
1921 OP0 is the REG, SUBREG or MEM rtx for the first of the two words.
1922 BITSIZE is the field width; BITPOS, position of its first bit, in the word.
1923 UNSIGNEDP is 1 if should zero-extend the contents; else sign-extend. */
1924
1925 static rtx
1926 extract_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1927 unsigned HOST_WIDE_INT bitpos, int unsignedp)
1928 {
1929 unsigned int unit;
1930 unsigned int bitsdone = 0;
1931 rtx result = NULL_RTX;
1932 int first = 1;
1933
1934 /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1935 much at a time. */
1936 if (REG_P (op0) || GET_CODE (op0) == SUBREG)
1937 unit = BITS_PER_WORD;
1938 else
1939 unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1940
1941 while (bitsdone < bitsize)
1942 {
1943 unsigned HOST_WIDE_INT thissize;
1944 rtx part, word;
1945 unsigned HOST_WIDE_INT thispos;
1946 unsigned HOST_WIDE_INT offset;
1947
1948 offset = (bitpos + bitsdone) / unit;
1949 thispos = (bitpos + bitsdone) % unit;
1950
1951 /* THISSIZE must not overrun a word boundary. Otherwise,
1952 extract_fixed_bit_field will call us again, and we will mutually
1953 recurse forever. */
1954 thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1955 thissize = MIN (thissize, unit - thispos);
1956
1957 /* If OP0 is a register, then handle OFFSET here.
1958
1959 When handling multiword bitfields, extract_bit_field may pass
1960 down a word_mode SUBREG of a larger REG for a bitfield that actually
1961 crosses a word boundary. Thus, for a SUBREG, we must find
1962 the current word starting from the base register. */
1963 if (GET_CODE (op0) == SUBREG)
1964 {
1965 int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset;
1966 word = operand_subword_force (SUBREG_REG (op0), word_offset,
1967 GET_MODE (SUBREG_REG (op0)));
1968 offset = 0;
1969 }
1970 else if (REG_P (op0))
1971 {
1972 word = operand_subword_force (op0, offset, GET_MODE (op0));
1973 offset = 0;
1974 }
1975 else
1976 word = op0;
1977
1978 /* Extract the parts in bit-counting order,
1979 whose meaning is determined by BYTES_PER_UNIT.
1980 OFFSET is in UNITs, and UNIT is in bits. */
1981 part = extract_fixed_bit_field (word_mode, word, thissize,
1982 offset * unit + thispos, 0, 1, false);
1983 bitsdone += thissize;
1984
1985 /* Shift this part into place for the result. */
1986 if (BYTES_BIG_ENDIAN)
1987 {
1988 if (bitsize != bitsdone)
1989 part = expand_shift (LSHIFT_EXPR, word_mode, part,
1990 bitsize - bitsdone, 0, 1);
1991 }
1992 else
1993 {
1994 if (bitsdone != thissize)
1995 part = expand_shift (LSHIFT_EXPR, word_mode, part,
1996 bitsdone - thissize, 0, 1);
1997 }
1998
1999 if (first)
2000 result = part;
2001 else
2002 /* Combine the parts with bitwise or. This works
2003 because we extracted each part as an unsigned bit field. */
2004 result = expand_binop (word_mode, ior_optab, part, result, NULL_RTX, 1,
2005 OPTAB_LIB_WIDEN);
2006
2007 first = 0;
2008 }
2009
2010 /* Unsigned bit field: we are done. */
2011 if (unsignedp)
2012 return result;
2013 /* Signed bit field: sign-extend with two arithmetic shifts. */
2014 result = expand_shift (LSHIFT_EXPR, word_mode, result,
2015 BITS_PER_WORD - bitsize, NULL_RTX, 0);
2016 return expand_shift (RSHIFT_EXPR, word_mode, result,
2017 BITS_PER_WORD - bitsize, NULL_RTX, 0);
2018 }
2019 \f
2020 /* Try to read the low bits of SRC as an rvalue of mode MODE, preserving
2021 the bit pattern. SRC_MODE is the mode of SRC; if this is smaller than
2022 MODE, fill the upper bits with zeros. Fail if the layout of either
2023 mode is unknown (as for CC modes) or if the extraction would involve
2024 unprofitable mode punning. Return the value on success, otherwise
2025 return null.
2026
2027 This is different from gen_lowpart* in these respects:
2028
2029 - the returned value must always be considered an rvalue
2030
2031 - when MODE is wider than SRC_MODE, the extraction involves
2032 a zero extension
2033
2034 - when MODE is smaller than SRC_MODE, the extraction involves
2035 a truncation (and is thus subject to TRULY_NOOP_TRUNCATION).
2036
2037 In other words, this routine performs a computation, whereas the
2038 gen_lowpart* routines are conceptually lvalue or rvalue subreg
2039 operations. */
2040
2041 rtx
2042 extract_low_bits (enum machine_mode mode, enum machine_mode src_mode, rtx src)
2043 {
2044 enum machine_mode int_mode, src_int_mode;
2045
2046 if (mode == src_mode)
2047 return src;
2048
2049 if (CONSTANT_P (src))
2050 {
2051 /* simplify_gen_subreg can't be used here, as if simplify_subreg
2052 fails, it will happily create (subreg (symbol_ref)) or similar
2053 invalid SUBREGs. */
2054 unsigned int byte = subreg_lowpart_offset (mode, src_mode);
2055 rtx ret = simplify_subreg (mode, src, src_mode, byte);
2056 if (ret)
2057 return ret;
2058
2059 if (GET_MODE (src) == VOIDmode
2060 || !validate_subreg (mode, src_mode, src, byte))
2061 return NULL_RTX;
2062
2063 src = force_reg (GET_MODE (src), src);
2064 return gen_rtx_SUBREG (mode, src, byte);
2065 }
2066
2067 if (GET_MODE_CLASS (mode) == MODE_CC || GET_MODE_CLASS (src_mode) == MODE_CC)
2068 return NULL_RTX;
2069
2070 if (GET_MODE_BITSIZE (mode) == GET_MODE_BITSIZE (src_mode)
2071 && MODES_TIEABLE_P (mode, src_mode))
2072 {
2073 rtx x = gen_lowpart_common (mode, src);
2074 if (x)
2075 return x;
2076 }
2077
2078 src_int_mode = int_mode_for_mode (src_mode);
2079 int_mode = int_mode_for_mode (mode);
2080 if (src_int_mode == BLKmode || int_mode == BLKmode)
2081 return NULL_RTX;
2082
2083 if (!MODES_TIEABLE_P (src_int_mode, src_mode))
2084 return NULL_RTX;
2085 if (!MODES_TIEABLE_P (int_mode, mode))
2086 return NULL_RTX;
2087
2088 src = gen_lowpart (src_int_mode, src);
2089 src = convert_modes (int_mode, src_int_mode, src, true);
2090 src = gen_lowpart (mode, src);
2091 return src;
2092 }
2093 \f
2094 /* Add INC into TARGET. */
2095
2096 void
2097 expand_inc (rtx target, rtx inc)
2098 {
2099 rtx value = expand_binop (GET_MODE (target), add_optab,
2100 target, inc,
2101 target, 0, OPTAB_LIB_WIDEN);
2102 if (value != target)
2103 emit_move_insn (target, value);
2104 }
2105
2106 /* Subtract DEC from TARGET. */
2107
2108 void
2109 expand_dec (rtx target, rtx dec)
2110 {
2111 rtx value = expand_binop (GET_MODE (target), sub_optab,
2112 target, dec,
2113 target, 0, OPTAB_LIB_WIDEN);
2114 if (value != target)
2115 emit_move_insn (target, value);
2116 }
2117 \f
2118 /* Output a shift instruction for expression code CODE,
2119 with SHIFTED being the rtx for the value to shift,
2120 and AMOUNT the rtx for the amount to shift by.
2121 Store the result in the rtx TARGET, if that is convenient.
2122 If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2123 Return the rtx for where the value is. */
2124
2125 static rtx
2126 expand_shift_1 (enum tree_code code, enum machine_mode mode, rtx shifted,
2127 rtx amount, rtx target, int unsignedp)
2128 {
2129 rtx op1, temp = 0;
2130 int left = (code == LSHIFT_EXPR || code == LROTATE_EXPR);
2131 int rotate = (code == LROTATE_EXPR || code == RROTATE_EXPR);
2132 optab lshift_optab = ashl_optab;
2133 optab rshift_arith_optab = ashr_optab;
2134 optab rshift_uns_optab = lshr_optab;
2135 optab lrotate_optab = rotl_optab;
2136 optab rrotate_optab = rotr_optab;
2137 enum machine_mode op1_mode;
2138 int attempt;
2139 bool speed = optimize_insn_for_speed_p ();
2140
2141 op1 = amount;
2142 op1_mode = GET_MODE (op1);
2143
2144 /* Determine whether the shift/rotate amount is a vector, or scalar. If the
2145 shift amount is a vector, use the vector/vector shift patterns. */
2146 if (VECTOR_MODE_P (mode) && VECTOR_MODE_P (op1_mode))
2147 {
2148 lshift_optab = vashl_optab;
2149 rshift_arith_optab = vashr_optab;
2150 rshift_uns_optab = vlshr_optab;
2151 lrotate_optab = vrotl_optab;
2152 rrotate_optab = vrotr_optab;
2153 }
2154
2155 /* Previously detected shift-counts computed by NEGATE_EXPR
2156 and shifted in the other direction; but that does not work
2157 on all machines. */
2158
2159 if (SHIFT_COUNT_TRUNCATED)
2160 {
2161 if (CONST_INT_P (op1)
2162 && ((unsigned HOST_WIDE_INT) INTVAL (op1) >=
2163 (unsigned HOST_WIDE_INT) GET_MODE_BITSIZE (mode)))
2164 op1 = GEN_INT ((unsigned HOST_WIDE_INT) INTVAL (op1)
2165 % GET_MODE_BITSIZE (mode));
2166 else if (GET_CODE (op1) == SUBREG
2167 && subreg_lowpart_p (op1)
2168 && INTEGRAL_MODE_P (GET_MODE (SUBREG_REG (op1)))
2169 && INTEGRAL_MODE_P (GET_MODE (op1)))
2170 op1 = SUBREG_REG (op1);
2171 }
2172
2173 if (op1 == const0_rtx)
2174 return shifted;
2175
2176 /* Check whether its cheaper to implement a left shift by a constant
2177 bit count by a sequence of additions. */
2178 if (code == LSHIFT_EXPR
2179 && CONST_INT_P (op1)
2180 && INTVAL (op1) > 0
2181 && INTVAL (op1) < GET_MODE_PRECISION (mode)
2182 && INTVAL (op1) < MAX_BITS_PER_WORD
2183 && (shift_cost (speed, mode, INTVAL (op1))
2184 > INTVAL (op1) * add_cost (speed, mode))
2185 && shift_cost (speed, mode, INTVAL (op1)) != MAX_COST)
2186 {
2187 int i;
2188 for (i = 0; i < INTVAL (op1); i++)
2189 {
2190 temp = force_reg (mode, shifted);
2191 shifted = expand_binop (mode, add_optab, temp, temp, NULL_RTX,
2192 unsignedp, OPTAB_LIB_WIDEN);
2193 }
2194 return shifted;
2195 }
2196
2197 for (attempt = 0; temp == 0 && attempt < 3; attempt++)
2198 {
2199 enum optab_methods methods;
2200
2201 if (attempt == 0)
2202 methods = OPTAB_DIRECT;
2203 else if (attempt == 1)
2204 methods = OPTAB_WIDEN;
2205 else
2206 methods = OPTAB_LIB_WIDEN;
2207
2208 if (rotate)
2209 {
2210 /* Widening does not work for rotation. */
2211 if (methods == OPTAB_WIDEN)
2212 continue;
2213 else if (methods == OPTAB_LIB_WIDEN)
2214 {
2215 /* If we have been unable to open-code this by a rotation,
2216 do it as the IOR of two shifts. I.e., to rotate A
2217 by N bits, compute (A << N) | ((unsigned) A >> (C - N))
2218 where C is the bitsize of A.
2219
2220 It is theoretically possible that the target machine might
2221 not be able to perform either shift and hence we would
2222 be making two libcalls rather than just the one for the
2223 shift (similarly if IOR could not be done). We will allow
2224 this extremely unlikely lossage to avoid complicating the
2225 code below. */
2226
2227 rtx subtarget = target == shifted ? 0 : target;
2228 rtx new_amount, other_amount;
2229 rtx temp1;
2230
2231 new_amount = op1;
2232 if (CONST_INT_P (op1))
2233 other_amount = GEN_INT (GET_MODE_BITSIZE (mode)
2234 - INTVAL (op1));
2235 else
2236 other_amount
2237 = simplify_gen_binary (MINUS, GET_MODE (op1),
2238 GEN_INT (GET_MODE_PRECISION (mode)),
2239 op1);
2240
2241 shifted = force_reg (mode, shifted);
2242
2243 temp = expand_shift_1 (left ? LSHIFT_EXPR : RSHIFT_EXPR,
2244 mode, shifted, new_amount, 0, 1);
2245 temp1 = expand_shift_1 (left ? RSHIFT_EXPR : LSHIFT_EXPR,
2246 mode, shifted, other_amount,
2247 subtarget, 1);
2248 return expand_binop (mode, ior_optab, temp, temp1, target,
2249 unsignedp, methods);
2250 }
2251
2252 temp = expand_binop (mode,
2253 left ? lrotate_optab : rrotate_optab,
2254 shifted, op1, target, unsignedp, methods);
2255 }
2256 else if (unsignedp)
2257 temp = expand_binop (mode,
2258 left ? lshift_optab : rshift_uns_optab,
2259 shifted, op1, target, unsignedp, methods);
2260
2261 /* Do arithmetic shifts.
2262 Also, if we are going to widen the operand, we can just as well
2263 use an arithmetic right-shift instead of a logical one. */
2264 if (temp == 0 && ! rotate
2265 && (! unsignedp || (! left && methods == OPTAB_WIDEN)))
2266 {
2267 enum optab_methods methods1 = methods;
2268
2269 /* If trying to widen a log shift to an arithmetic shift,
2270 don't accept an arithmetic shift of the same size. */
2271 if (unsignedp)
2272 methods1 = OPTAB_MUST_WIDEN;
2273
2274 /* Arithmetic shift */
2275
2276 temp = expand_binop (mode,
2277 left ? lshift_optab : rshift_arith_optab,
2278 shifted, op1, target, unsignedp, methods1);
2279 }
2280
2281 /* We used to try extzv here for logical right shifts, but that was
2282 only useful for one machine, the VAX, and caused poor code
2283 generation there for lshrdi3, so the code was deleted and a
2284 define_expand for lshrsi3 was added to vax.md. */
2285 }
2286
2287 gcc_assert (temp);
2288 return temp;
2289 }
2290
2291 /* Output a shift instruction for expression code CODE,
2292 with SHIFTED being the rtx for the value to shift,
2293 and AMOUNT the amount to shift by.
2294 Store the result in the rtx TARGET, if that is convenient.
2295 If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2296 Return the rtx for where the value is. */
2297
2298 rtx
2299 expand_shift (enum tree_code code, enum machine_mode mode, rtx shifted,
2300 int amount, rtx target, int unsignedp)
2301 {
2302 return expand_shift_1 (code, mode,
2303 shifted, GEN_INT (amount), target, unsignedp);
2304 }
2305
2306 /* Output a shift instruction for expression code CODE,
2307 with SHIFTED being the rtx for the value to shift,
2308 and AMOUNT the tree for the amount to shift by.
2309 Store the result in the rtx TARGET, if that is convenient.
2310 If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2311 Return the rtx for where the value is. */
2312
2313 rtx
2314 expand_variable_shift (enum tree_code code, enum machine_mode mode, rtx shifted,
2315 tree amount, rtx target, int unsignedp)
2316 {
2317 return expand_shift_1 (code, mode,
2318 shifted, expand_normal (amount), target, unsignedp);
2319 }
2320
2321 \f
2322 /* Indicates the type of fixup needed after a constant multiplication.
2323 BASIC_VARIANT means no fixup is needed, NEGATE_VARIANT means that
2324 the result should be negated, and ADD_VARIANT means that the
2325 multiplicand should be added to the result. */
2326 enum mult_variant {basic_variant, negate_variant, add_variant};
2327
2328 static void synth_mult (struct algorithm *, unsigned HOST_WIDE_INT,
2329 const struct mult_cost *, enum machine_mode mode);
2330 static bool choose_mult_variant (enum machine_mode, HOST_WIDE_INT,
2331 struct algorithm *, enum mult_variant *, int);
2332 static rtx expand_mult_const (enum machine_mode, rtx, HOST_WIDE_INT, rtx,
2333 const struct algorithm *, enum mult_variant);
2334 static unsigned HOST_WIDE_INT invert_mod2n (unsigned HOST_WIDE_INT, int);
2335 static rtx extract_high_half (enum machine_mode, rtx);
2336 static rtx expmed_mult_highpart (enum machine_mode, rtx, rtx, rtx, int, int);
2337 static rtx expmed_mult_highpart_optab (enum machine_mode, rtx, rtx, rtx,
2338 int, int);
2339 /* Compute and return the best algorithm for multiplying by T.
2340 The algorithm must cost less than cost_limit
2341 If retval.cost >= COST_LIMIT, no algorithm was found and all
2342 other field of the returned struct are undefined.
2343 MODE is the machine mode of the multiplication. */
2344
2345 static void
2346 synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
2347 const struct mult_cost *cost_limit, enum machine_mode mode)
2348 {
2349 int m;
2350 struct algorithm *alg_in, *best_alg;
2351 struct mult_cost best_cost;
2352 struct mult_cost new_limit;
2353 int op_cost, op_latency;
2354 unsigned HOST_WIDE_INT orig_t = t;
2355 unsigned HOST_WIDE_INT q;
2356 int maxm, hash_index;
2357 bool cache_hit = false;
2358 enum alg_code cache_alg = alg_zero;
2359 bool speed = optimize_insn_for_speed_p ();
2360 enum machine_mode imode;
2361 struct alg_hash_entry *entry_ptr;
2362
2363 /* Indicate that no algorithm is yet found. If no algorithm
2364 is found, this value will be returned and indicate failure. */
2365 alg_out->cost.cost = cost_limit->cost + 1;
2366 alg_out->cost.latency = cost_limit->latency + 1;
2367
2368 if (cost_limit->cost < 0
2369 || (cost_limit->cost == 0 && cost_limit->latency <= 0))
2370 return;
2371
2372 /* Be prepared for vector modes. */
2373 imode = GET_MODE_INNER (mode);
2374 if (imode == VOIDmode)
2375 imode = mode;
2376
2377 maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (imode));
2378
2379 /* Restrict the bits of "t" to the multiplication's mode. */
2380 t &= GET_MODE_MASK (imode);
2381
2382 /* t == 1 can be done in zero cost. */
2383 if (t == 1)
2384 {
2385 alg_out->ops = 1;
2386 alg_out->cost.cost = 0;
2387 alg_out->cost.latency = 0;
2388 alg_out->op[0] = alg_m;
2389 return;
2390 }
2391
2392 /* t == 0 sometimes has a cost. If it does and it exceeds our limit,
2393 fail now. */
2394 if (t == 0)
2395 {
2396 if (MULT_COST_LESS (cost_limit, zero_cost (speed)))
2397 return;
2398 else
2399 {
2400 alg_out->ops = 1;
2401 alg_out->cost.cost = zero_cost (speed);
2402 alg_out->cost.latency = zero_cost (speed);
2403 alg_out->op[0] = alg_zero;
2404 return;
2405 }
2406 }
2407
2408 /* We'll be needing a couple extra algorithm structures now. */
2409
2410 alg_in = XALLOCA (struct algorithm);
2411 best_alg = XALLOCA (struct algorithm);
2412 best_cost = *cost_limit;
2413
2414 /* Compute the hash index. */
2415 hash_index = (t ^ (unsigned int) mode ^ (speed * 256)) % NUM_ALG_HASH_ENTRIES;
2416
2417 /* See if we already know what to do for T. */
2418 entry_ptr = alg_hash_entry_ptr (hash_index);
2419 if (entry_ptr->t == t
2420 && entry_ptr->mode == mode
2421 && entry_ptr->mode == mode
2422 && entry_ptr->speed == speed
2423 && entry_ptr->alg != alg_unknown)
2424 {
2425 cache_alg = entry_ptr->alg;
2426
2427 if (cache_alg == alg_impossible)
2428 {
2429 /* The cache tells us that it's impossible to synthesize
2430 multiplication by T within entry_ptr->cost. */
2431 if (!CHEAPER_MULT_COST (&entry_ptr->cost, cost_limit))
2432 /* COST_LIMIT is at least as restrictive as the one
2433 recorded in the hash table, in which case we have no
2434 hope of synthesizing a multiplication. Just
2435 return. */
2436 return;
2437
2438 /* If we get here, COST_LIMIT is less restrictive than the
2439 one recorded in the hash table, so we may be able to
2440 synthesize a multiplication. Proceed as if we didn't
2441 have the cache entry. */
2442 }
2443 else
2444 {
2445 if (CHEAPER_MULT_COST (cost_limit, &entry_ptr->cost))
2446 /* The cached algorithm shows that this multiplication
2447 requires more cost than COST_LIMIT. Just return. This
2448 way, we don't clobber this cache entry with
2449 alg_impossible but retain useful information. */
2450 return;
2451
2452 cache_hit = true;
2453
2454 switch (cache_alg)
2455 {
2456 case alg_shift:
2457 goto do_alg_shift;
2458
2459 case alg_add_t_m2:
2460 case alg_sub_t_m2:
2461 goto do_alg_addsub_t_m2;
2462
2463 case alg_add_factor:
2464 case alg_sub_factor:
2465 goto do_alg_addsub_factor;
2466
2467 case alg_add_t2_m:
2468 goto do_alg_add_t2_m;
2469
2470 case alg_sub_t2_m:
2471 goto do_alg_sub_t2_m;
2472
2473 default:
2474 gcc_unreachable ();
2475 }
2476 }
2477 }
2478
2479 /* If we have a group of zero bits at the low-order part of T, try
2480 multiplying by the remaining bits and then doing a shift. */
2481
2482 if ((t & 1) == 0)
2483 {
2484 do_alg_shift:
2485 m = floor_log2 (t & -t); /* m = number of low zero bits */
2486 if (m < maxm)
2487 {
2488 q = t >> m;
2489 /* The function expand_shift will choose between a shift and
2490 a sequence of additions, so the observed cost is given as
2491 MIN (m * add_cost(speed, mode), shift_cost(speed, mode, m)). */
2492 op_cost = m * add_cost (speed, mode);
2493 if (shift_cost (speed, mode, m) < op_cost)
2494 op_cost = shift_cost (speed, mode, m);
2495 new_limit.cost = best_cost.cost - op_cost;
2496 new_limit.latency = best_cost.latency - op_cost;
2497 synth_mult (alg_in, q, &new_limit, mode);
2498
2499 alg_in->cost.cost += op_cost;
2500 alg_in->cost.latency += op_cost;
2501 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2502 {
2503 struct algorithm *x;
2504 best_cost = alg_in->cost;
2505 x = alg_in, alg_in = best_alg, best_alg = x;
2506 best_alg->log[best_alg->ops] = m;
2507 best_alg->op[best_alg->ops] = alg_shift;
2508 }
2509
2510 /* See if treating ORIG_T as a signed number yields a better
2511 sequence. Try this sequence only for a negative ORIG_T
2512 as it would be useless for a non-negative ORIG_T. */
2513 if ((HOST_WIDE_INT) orig_t < 0)
2514 {
2515 /* Shift ORIG_T as follows because a right shift of a
2516 negative-valued signed type is implementation
2517 defined. */
2518 q = ~(~orig_t >> m);
2519 /* The function expand_shift will choose between a shift
2520 and a sequence of additions, so the observed cost is
2521 given as MIN (m * add_cost(speed, mode),
2522 shift_cost(speed, mode, m)). */
2523 op_cost = m * add_cost (speed, mode);
2524 if (shift_cost (speed, mode, m) < op_cost)
2525 op_cost = shift_cost (speed, mode, m);
2526 new_limit.cost = best_cost.cost - op_cost;
2527 new_limit.latency = best_cost.latency - op_cost;
2528 synth_mult (alg_in, q, &new_limit, mode);
2529
2530 alg_in->cost.cost += op_cost;
2531 alg_in->cost.latency += op_cost;
2532 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2533 {
2534 struct algorithm *x;
2535 best_cost = alg_in->cost;
2536 x = alg_in, alg_in = best_alg, best_alg = x;
2537 best_alg->log[best_alg->ops] = m;
2538 best_alg->op[best_alg->ops] = alg_shift;
2539 }
2540 }
2541 }
2542 if (cache_hit)
2543 goto done;
2544 }
2545
2546 /* If we have an odd number, add or subtract one. */
2547 if ((t & 1) != 0)
2548 {
2549 unsigned HOST_WIDE_INT w;
2550
2551 do_alg_addsub_t_m2:
2552 for (w = 1; (w & t) != 0; w <<= 1)
2553 ;
2554 /* If T was -1, then W will be zero after the loop. This is another
2555 case where T ends with ...111. Handling this with (T + 1) and
2556 subtract 1 produces slightly better code and results in algorithm
2557 selection much faster than treating it like the ...0111 case
2558 below. */
2559 if (w == 0
2560 || (w > 2
2561 /* Reject the case where t is 3.
2562 Thus we prefer addition in that case. */
2563 && t != 3))
2564 {
2565 /* T ends with ...111. Multiply by (T + 1) and subtract 1. */
2566
2567 op_cost = add_cost (speed, mode);
2568 new_limit.cost = best_cost.cost - op_cost;
2569 new_limit.latency = best_cost.latency - op_cost;
2570 synth_mult (alg_in, t + 1, &new_limit, mode);
2571
2572 alg_in->cost.cost += op_cost;
2573 alg_in->cost.latency += op_cost;
2574 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2575 {
2576 struct algorithm *x;
2577 best_cost = alg_in->cost;
2578 x = alg_in, alg_in = best_alg, best_alg = x;
2579 best_alg->log[best_alg->ops] = 0;
2580 best_alg->op[best_alg->ops] = alg_sub_t_m2;
2581 }
2582 }
2583 else
2584 {
2585 /* T ends with ...01 or ...011. Multiply by (T - 1) and add 1. */
2586
2587 op_cost = add_cost (speed, mode);
2588 new_limit.cost = best_cost.cost - op_cost;
2589 new_limit.latency = best_cost.latency - op_cost;
2590 synth_mult (alg_in, t - 1, &new_limit, mode);
2591
2592 alg_in->cost.cost += op_cost;
2593 alg_in->cost.latency += op_cost;
2594 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2595 {
2596 struct algorithm *x;
2597 best_cost = alg_in->cost;
2598 x = alg_in, alg_in = best_alg, best_alg = x;
2599 best_alg->log[best_alg->ops] = 0;
2600 best_alg->op[best_alg->ops] = alg_add_t_m2;
2601 }
2602 }
2603
2604 /* We may be able to calculate a * -7, a * -15, a * -31, etc
2605 quickly with a - a * n for some appropriate constant n. */
2606 m = exact_log2 (-orig_t + 1);
2607 if (m >= 0 && m < maxm)
2608 {
2609 op_cost = shiftsub1_cost (speed, mode, m);
2610 new_limit.cost = best_cost.cost - op_cost;
2611 new_limit.latency = best_cost.latency - op_cost;
2612 synth_mult (alg_in, (unsigned HOST_WIDE_INT) (-orig_t + 1) >> m,
2613 &new_limit, mode);
2614
2615 alg_in->cost.cost += op_cost;
2616 alg_in->cost.latency += op_cost;
2617 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2618 {
2619 struct algorithm *x;
2620 best_cost = alg_in->cost;
2621 x = alg_in, alg_in = best_alg, best_alg = x;
2622 best_alg->log[best_alg->ops] = m;
2623 best_alg->op[best_alg->ops] = alg_sub_t_m2;
2624 }
2625 }
2626
2627 if (cache_hit)
2628 goto done;
2629 }
2630
2631 /* Look for factors of t of the form
2632 t = q(2**m +- 1), 2 <= m <= floor(log2(t - 1)).
2633 If we find such a factor, we can multiply by t using an algorithm that
2634 multiplies by q, shift the result by m and add/subtract it to itself.
2635
2636 We search for large factors first and loop down, even if large factors
2637 are less probable than small; if we find a large factor we will find a
2638 good sequence quickly, and therefore be able to prune (by decreasing
2639 COST_LIMIT) the search. */
2640
2641 do_alg_addsub_factor:
2642 for (m = floor_log2 (t - 1); m >= 2; m--)
2643 {
2644 unsigned HOST_WIDE_INT d;
2645
2646 d = ((unsigned HOST_WIDE_INT) 1 << m) + 1;
2647 if (t % d == 0 && t > d && m < maxm
2648 && (!cache_hit || cache_alg == alg_add_factor))
2649 {
2650 /* If the target has a cheap shift-and-add instruction use
2651 that in preference to a shift insn followed by an add insn.
2652 Assume that the shift-and-add is "atomic" with a latency
2653 equal to its cost, otherwise assume that on superscalar
2654 hardware the shift may be executed concurrently with the
2655 earlier steps in the algorithm. */
2656 op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
2657 if (shiftadd_cost (speed, mode, m) < op_cost)
2658 {
2659 op_cost = shiftadd_cost (speed, mode, m);
2660 op_latency = op_cost;
2661 }
2662 else
2663 op_latency = add_cost (speed, mode);
2664
2665 new_limit.cost = best_cost.cost - op_cost;
2666 new_limit.latency = best_cost.latency - op_latency;
2667 synth_mult (alg_in, t / d, &new_limit, mode);
2668
2669 alg_in->cost.cost += op_cost;
2670 alg_in->cost.latency += op_latency;
2671 if (alg_in->cost.latency < op_cost)
2672 alg_in->cost.latency = op_cost;
2673 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2674 {
2675 struct algorithm *x;
2676 best_cost = alg_in->cost;
2677 x = alg_in, alg_in = best_alg, best_alg = x;
2678 best_alg->log[best_alg->ops] = m;
2679 best_alg->op[best_alg->ops] = alg_add_factor;
2680 }
2681 /* Other factors will have been taken care of in the recursion. */
2682 break;
2683 }
2684
2685 d = ((unsigned HOST_WIDE_INT) 1 << m) - 1;
2686 if (t % d == 0 && t > d && m < maxm
2687 && (!cache_hit || cache_alg == alg_sub_factor))
2688 {
2689 /* If the target has a cheap shift-and-subtract insn use
2690 that in preference to a shift insn followed by a sub insn.
2691 Assume that the shift-and-sub is "atomic" with a latency
2692 equal to it's cost, otherwise assume that on superscalar
2693 hardware the shift may be executed concurrently with the
2694 earlier steps in the algorithm. */
2695 op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
2696 if (shiftsub0_cost (speed, mode, m) < op_cost)
2697 {
2698 op_cost = shiftsub0_cost (speed, mode, m);
2699 op_latency = op_cost;
2700 }
2701 else
2702 op_latency = add_cost (speed, mode);
2703
2704 new_limit.cost = best_cost.cost - op_cost;
2705 new_limit.latency = best_cost.latency - op_latency;
2706 synth_mult (alg_in, t / d, &new_limit, mode);
2707
2708 alg_in->cost.cost += op_cost;
2709 alg_in->cost.latency += op_latency;
2710 if (alg_in->cost.latency < op_cost)
2711 alg_in->cost.latency = op_cost;
2712 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2713 {
2714 struct algorithm *x;
2715 best_cost = alg_in->cost;
2716 x = alg_in, alg_in = best_alg, best_alg = x;
2717 best_alg->log[best_alg->ops] = m;
2718 best_alg->op[best_alg->ops] = alg_sub_factor;
2719 }
2720 break;
2721 }
2722 }
2723 if (cache_hit)
2724 goto done;
2725
2726 /* Try shift-and-add (load effective address) instructions,
2727 i.e. do a*3, a*5, a*9. */
2728 if ((t & 1) != 0)
2729 {
2730 do_alg_add_t2_m:
2731 q = t - 1;
2732 q = q & -q;
2733 m = exact_log2 (q);
2734 if (m >= 0 && m < maxm)
2735 {
2736 op_cost = shiftadd_cost (speed, mode, m);
2737 new_limit.cost = best_cost.cost - op_cost;
2738 new_limit.latency = best_cost.latency - op_cost;
2739 synth_mult (alg_in, (t - 1) >> m, &new_limit, mode);
2740
2741 alg_in->cost.cost += op_cost;
2742 alg_in->cost.latency += op_cost;
2743 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2744 {
2745 struct algorithm *x;
2746 best_cost = alg_in->cost;
2747 x = alg_in, alg_in = best_alg, best_alg = x;
2748 best_alg->log[best_alg->ops] = m;
2749 best_alg->op[best_alg->ops] = alg_add_t2_m;
2750 }
2751 }
2752 if (cache_hit)
2753 goto done;
2754
2755 do_alg_sub_t2_m:
2756 q = t + 1;
2757 q = q & -q;
2758 m = exact_log2 (q);
2759 if (m >= 0 && m < maxm)
2760 {
2761 op_cost = shiftsub0_cost (speed, mode, m);
2762 new_limit.cost = best_cost.cost - op_cost;
2763 new_limit.latency = best_cost.latency - op_cost;
2764 synth_mult (alg_in, (t + 1) >> m, &new_limit, mode);
2765
2766 alg_in->cost.cost += op_cost;
2767 alg_in->cost.latency += op_cost;
2768 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2769 {
2770 struct algorithm *x;
2771 best_cost = alg_in->cost;
2772 x = alg_in, alg_in = best_alg, best_alg = x;
2773 best_alg->log[best_alg->ops] = m;
2774 best_alg->op[best_alg->ops] = alg_sub_t2_m;
2775 }
2776 }
2777 if (cache_hit)
2778 goto done;
2779 }
2780
2781 done:
2782 /* If best_cost has not decreased, we have not found any algorithm. */
2783 if (!CHEAPER_MULT_COST (&best_cost, cost_limit))
2784 {
2785 /* We failed to find an algorithm. Record alg_impossible for
2786 this case (that is, <T, MODE, COST_LIMIT>) so that next time
2787 we are asked to find an algorithm for T within the same or
2788 lower COST_LIMIT, we can immediately return to the
2789 caller. */
2790 entry_ptr->t = t;
2791 entry_ptr->mode = mode;
2792 entry_ptr->speed = speed;
2793 entry_ptr->alg = alg_impossible;
2794 entry_ptr->cost = *cost_limit;
2795 return;
2796 }
2797
2798 /* Cache the result. */
2799 if (!cache_hit)
2800 {
2801 entry_ptr->t = t;
2802 entry_ptr->mode = mode;
2803 entry_ptr->speed = speed;
2804 entry_ptr->alg = best_alg->op[best_alg->ops];
2805 entry_ptr->cost.cost = best_cost.cost;
2806 entry_ptr->cost.latency = best_cost.latency;
2807 }
2808
2809 /* If we are getting a too long sequence for `struct algorithm'
2810 to record, make this search fail. */
2811 if (best_alg->ops == MAX_BITS_PER_WORD)
2812 return;
2813
2814 /* Copy the algorithm from temporary space to the space at alg_out.
2815 We avoid using structure assignment because the majority of
2816 best_alg is normally undefined, and this is a critical function. */
2817 alg_out->ops = best_alg->ops + 1;
2818 alg_out->cost = best_cost;
2819 memcpy (alg_out->op, best_alg->op,
2820 alg_out->ops * sizeof *alg_out->op);
2821 memcpy (alg_out->log, best_alg->log,
2822 alg_out->ops * sizeof *alg_out->log);
2823 }
2824 \f
2825 /* Find the cheapest way of multiplying a value of mode MODE by VAL.
2826 Try three variations:
2827
2828 - a shift/add sequence based on VAL itself
2829 - a shift/add sequence based on -VAL, followed by a negation
2830 - a shift/add sequence based on VAL - 1, followed by an addition.
2831
2832 Return true if the cheapest of these cost less than MULT_COST,
2833 describing the algorithm in *ALG and final fixup in *VARIANT. */
2834
2835 static bool
2836 choose_mult_variant (enum machine_mode mode, HOST_WIDE_INT val,
2837 struct algorithm *alg, enum mult_variant *variant,
2838 int mult_cost)
2839 {
2840 struct algorithm alg2;
2841 struct mult_cost limit;
2842 int op_cost;
2843 bool speed = optimize_insn_for_speed_p ();
2844
2845 /* Fail quickly for impossible bounds. */
2846 if (mult_cost < 0)
2847 return false;
2848
2849 /* Ensure that mult_cost provides a reasonable upper bound.
2850 Any constant multiplication can be performed with less
2851 than 2 * bits additions. */
2852 op_cost = 2 * GET_MODE_UNIT_BITSIZE (mode) * add_cost (speed, mode);
2853 if (mult_cost > op_cost)
2854 mult_cost = op_cost;
2855
2856 *variant = basic_variant;
2857 limit.cost = mult_cost;
2858 limit.latency = mult_cost;
2859 synth_mult (alg, val, &limit, mode);
2860
2861 /* This works only if the inverted value actually fits in an
2862 `unsigned int' */
2863 if (HOST_BITS_PER_INT >= GET_MODE_UNIT_BITSIZE (mode))
2864 {
2865 op_cost = neg_cost(speed, mode);
2866 if (MULT_COST_LESS (&alg->cost, mult_cost))
2867 {
2868 limit.cost = alg->cost.cost - op_cost;
2869 limit.latency = alg->cost.latency - op_cost;
2870 }
2871 else
2872 {
2873 limit.cost = mult_cost - op_cost;
2874 limit.latency = mult_cost - op_cost;
2875 }
2876
2877 synth_mult (&alg2, -val, &limit, mode);
2878 alg2.cost.cost += op_cost;
2879 alg2.cost.latency += op_cost;
2880 if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
2881 *alg = alg2, *variant = negate_variant;
2882 }
2883
2884 /* This proves very useful for division-by-constant. */
2885 op_cost = add_cost (speed, mode);
2886 if (MULT_COST_LESS (&alg->cost, mult_cost))
2887 {
2888 limit.cost = alg->cost.cost - op_cost;
2889 limit.latency = alg->cost.latency - op_cost;
2890 }
2891 else
2892 {
2893 limit.cost = mult_cost - op_cost;
2894 limit.latency = mult_cost - op_cost;
2895 }
2896
2897 synth_mult (&alg2, val - 1, &limit, mode);
2898 alg2.cost.cost += op_cost;
2899 alg2.cost.latency += op_cost;
2900 if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
2901 *alg = alg2, *variant = add_variant;
2902
2903 return MULT_COST_LESS (&alg->cost, mult_cost);
2904 }
2905
2906 /* A subroutine of expand_mult, used for constant multiplications.
2907 Multiply OP0 by VAL in mode MODE, storing the result in TARGET if
2908 convenient. Use the shift/add sequence described by ALG and apply
2909 the final fixup specified by VARIANT. */
2910
2911 static rtx
2912 expand_mult_const (enum machine_mode mode, rtx op0, HOST_WIDE_INT val,
2913 rtx target, const struct algorithm *alg,
2914 enum mult_variant variant)
2915 {
2916 HOST_WIDE_INT val_so_far;
2917 rtx insn, accum, tem;
2918 int opno;
2919 enum machine_mode nmode;
2920
2921 /* Avoid referencing memory over and over and invalid sharing
2922 on SUBREGs. */
2923 op0 = force_reg (mode, op0);
2924
2925 /* ACCUM starts out either as OP0 or as a zero, depending on
2926 the first operation. */
2927
2928 if (alg->op[0] == alg_zero)
2929 {
2930 accum = copy_to_mode_reg (mode, CONST0_RTX (mode));
2931 val_so_far = 0;
2932 }
2933 else if (alg->op[0] == alg_m)
2934 {
2935 accum = copy_to_mode_reg (mode, op0);
2936 val_so_far = 1;
2937 }
2938 else
2939 gcc_unreachable ();
2940
2941 for (opno = 1; opno < alg->ops; opno++)
2942 {
2943 int log = alg->log[opno];
2944 rtx shift_subtarget = optimize ? 0 : accum;
2945 rtx add_target
2946 = (opno == alg->ops - 1 && target != 0 && variant != add_variant
2947 && !optimize)
2948 ? target : 0;
2949 rtx accum_target = optimize ? 0 : accum;
2950 rtx accum_inner;
2951
2952 switch (alg->op[opno])
2953 {
2954 case alg_shift:
2955 tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
2956 /* REG_EQUAL note will be attached to the following insn. */
2957 emit_move_insn (accum, tem);
2958 val_so_far <<= log;
2959 break;
2960
2961 case alg_add_t_m2:
2962 tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
2963 accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
2964 add_target ? add_target : accum_target);
2965 val_so_far += (HOST_WIDE_INT) 1 << log;
2966 break;
2967
2968 case alg_sub_t_m2:
2969 tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
2970 accum = force_operand (gen_rtx_MINUS (mode, accum, tem),
2971 add_target ? add_target : accum_target);
2972 val_so_far -= (HOST_WIDE_INT) 1 << log;
2973 break;
2974
2975 case alg_add_t2_m:
2976 accum = expand_shift (LSHIFT_EXPR, mode, accum,
2977 log, shift_subtarget, 0);
2978 accum = force_operand (gen_rtx_PLUS (mode, accum, op0),
2979 add_target ? add_target : accum_target);
2980 val_so_far = (val_so_far << log) + 1;
2981 break;
2982
2983 case alg_sub_t2_m:
2984 accum = expand_shift (LSHIFT_EXPR, mode, accum,
2985 log, shift_subtarget, 0);
2986 accum = force_operand (gen_rtx_MINUS (mode, accum, op0),
2987 add_target ? add_target : accum_target);
2988 val_so_far = (val_so_far << log) - 1;
2989 break;
2990
2991 case alg_add_factor:
2992 tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
2993 accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
2994 add_target ? add_target : accum_target);
2995 val_so_far += val_so_far << log;
2996 break;
2997
2998 case alg_sub_factor:
2999 tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3000 accum = force_operand (gen_rtx_MINUS (mode, tem, accum),
3001 (add_target
3002 ? add_target : (optimize ? 0 : tem)));
3003 val_so_far = (val_so_far << log) - val_so_far;
3004 break;
3005
3006 default:
3007 gcc_unreachable ();
3008 }
3009
3010 if (SCALAR_INT_MODE_P (mode))
3011 {
3012 /* Write a REG_EQUAL note on the last insn so that we can cse
3013 multiplication sequences. Note that if ACCUM is a SUBREG,
3014 we've set the inner register and must properly indicate that. */
3015 tem = op0, nmode = mode;
3016 accum_inner = accum;
3017 if (GET_CODE (accum) == SUBREG)
3018 {
3019 accum_inner = SUBREG_REG (accum);
3020 nmode = GET_MODE (accum_inner);
3021 tem = gen_lowpart (nmode, op0);
3022 }
3023
3024 insn = get_last_insn ();
3025 set_dst_reg_note (insn, REG_EQUAL,
3026 gen_rtx_MULT (nmode, tem, GEN_INT (val_so_far)),
3027 accum_inner);
3028 }
3029 }
3030
3031 if (variant == negate_variant)
3032 {
3033 val_so_far = -val_so_far;
3034 accum = expand_unop (mode, neg_optab, accum, target, 0);
3035 }
3036 else if (variant == add_variant)
3037 {
3038 val_so_far = val_so_far + 1;
3039 accum = force_operand (gen_rtx_PLUS (mode, accum, op0), target);
3040 }
3041
3042 /* Compare only the bits of val and val_so_far that are significant
3043 in the result mode, to avoid sign-/zero-extension confusion. */
3044 nmode = GET_MODE_INNER (mode);
3045 if (nmode == VOIDmode)
3046 nmode = mode;
3047 val &= GET_MODE_MASK (nmode);
3048 val_so_far &= GET_MODE_MASK (nmode);
3049 gcc_assert (val == val_so_far);
3050
3051 return accum;
3052 }
3053
3054 /* Perform a multiplication and return an rtx for the result.
3055 MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3056 TARGET is a suggestion for where to store the result (an rtx).
3057
3058 We check specially for a constant integer as OP1.
3059 If you want this check for OP0 as well, then before calling
3060 you should swap the two operands if OP0 would be constant. */
3061
3062 rtx
3063 expand_mult (enum machine_mode mode, rtx op0, rtx op1, rtx target,
3064 int unsignedp)
3065 {
3066 enum mult_variant variant;
3067 struct algorithm algorithm;
3068 rtx scalar_op1;
3069 int max_cost;
3070 bool speed = optimize_insn_for_speed_p ();
3071 bool do_trapv = flag_trapv && SCALAR_INT_MODE_P (mode) && !unsignedp;
3072
3073 if (CONSTANT_P (op0))
3074 {
3075 rtx temp = op0;
3076 op0 = op1;
3077 op1 = temp;
3078 }
3079
3080 /* For vectors, there are several simplifications that can be made if
3081 all elements of the vector constant are identical. */
3082 scalar_op1 = op1;
3083 if (GET_CODE (op1) == CONST_VECTOR)
3084 {
3085 int i, n = CONST_VECTOR_NUNITS (op1);
3086 scalar_op1 = CONST_VECTOR_ELT (op1, 0);
3087 for (i = 1; i < n; ++i)
3088 if (!rtx_equal_p (scalar_op1, CONST_VECTOR_ELT (op1, i)))
3089 goto skip_scalar;
3090 }
3091
3092 if (INTEGRAL_MODE_P (mode))
3093 {
3094 rtx fake_reg;
3095 HOST_WIDE_INT coeff;
3096 bool is_neg;
3097 int mode_bitsize;
3098
3099 if (op1 == CONST0_RTX (mode))
3100 return op1;
3101 if (op1 == CONST1_RTX (mode))
3102 return op0;
3103 if (op1 == CONSTM1_RTX (mode))
3104 return expand_unop (mode, do_trapv ? negv_optab : neg_optab,
3105 op0, target, 0);
3106
3107 if (do_trapv)
3108 goto skip_synth;
3109
3110 /* These are the operations that are potentially turned into
3111 a sequence of shifts and additions. */
3112 mode_bitsize = GET_MODE_UNIT_BITSIZE (mode);
3113
3114 /* synth_mult does an `unsigned int' multiply. As long as the mode is
3115 less than or equal in size to `unsigned int' this doesn't matter.
3116 If the mode is larger than `unsigned int', then synth_mult works
3117 only if the constant value exactly fits in an `unsigned int' without
3118 any truncation. This means that multiplying by negative values does
3119 not work; results are off by 2^32 on a 32 bit machine. */
3120
3121 if (CONST_INT_P (scalar_op1))
3122 {
3123 coeff = INTVAL (scalar_op1);
3124 is_neg = coeff < 0;
3125 }
3126 else if (CONST_DOUBLE_AS_INT_P (scalar_op1))
3127 {
3128 /* If we are multiplying in DImode, it may still be a win
3129 to try to work with shifts and adds. */
3130 if (CONST_DOUBLE_HIGH (scalar_op1) == 0
3131 && CONST_DOUBLE_LOW (scalar_op1) > 0)
3132 {
3133 coeff = CONST_DOUBLE_LOW (scalar_op1);
3134 is_neg = false;
3135 }
3136 else if (CONST_DOUBLE_LOW (scalar_op1) == 0)
3137 {
3138 coeff = CONST_DOUBLE_HIGH (scalar_op1);
3139 if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3140 {
3141 int shift = floor_log2 (coeff) + HOST_BITS_PER_WIDE_INT;
3142 if (shift < HOST_BITS_PER_DOUBLE_INT - 1
3143 || mode_bitsize <= HOST_BITS_PER_DOUBLE_INT)
3144 return expand_shift (LSHIFT_EXPR, mode, op0,
3145 shift, target, unsignedp);
3146 }
3147 goto skip_synth;
3148 }
3149 else
3150 goto skip_synth;
3151 }
3152 else
3153 goto skip_synth;
3154
3155 /* We used to test optimize here, on the grounds that it's better to
3156 produce a smaller program when -O is not used. But this causes
3157 such a terrible slowdown sometimes that it seems better to always
3158 use synth_mult. */
3159
3160 /* Special case powers of two. */
3161 if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3162 return expand_shift (LSHIFT_EXPR, mode, op0,
3163 floor_log2 (coeff), target, unsignedp);
3164
3165 fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3166
3167 /* Attempt to handle multiplication of DImode values by negative
3168 coefficients, by performing the multiplication by a positive
3169 multiplier and then inverting the result. */
3170 if (is_neg && mode_bitsize > HOST_BITS_PER_WIDE_INT)
3171 {
3172 /* Its safe to use -coeff even for INT_MIN, as the
3173 result is interpreted as an unsigned coefficient.
3174 Exclude cost of op0 from max_cost to match the cost
3175 calculation of the synth_mult. */
3176 max_cost = (set_src_cost (gen_rtx_MULT (mode, fake_reg, op1), speed)
3177 - neg_cost(speed, mode));
3178 if (max_cost > 0
3179 && choose_mult_variant (mode, -coeff, &algorithm,
3180 &variant, max_cost))
3181 {
3182 rtx temp = expand_mult_const (mode, op0, -coeff, NULL_RTX,
3183 &algorithm, variant);
3184 return expand_unop (mode, neg_optab, temp, target, 0);
3185 }
3186 goto skip_synth;
3187 }
3188
3189 /* Exclude cost of op0 from max_cost to match the cost
3190 calculation of the synth_mult. */
3191 max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, op1), speed);
3192 if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost))
3193 return expand_mult_const (mode, op0, coeff, target,
3194 &algorithm, variant);
3195 }
3196 skip_synth:
3197
3198 /* Expand x*2.0 as x+x. */
3199 if (CONST_DOUBLE_AS_FLOAT_P (scalar_op1))
3200 {
3201 REAL_VALUE_TYPE d;
3202 REAL_VALUE_FROM_CONST_DOUBLE (d, scalar_op1);
3203
3204 if (REAL_VALUES_EQUAL (d, dconst2))
3205 {
3206 op0 = force_reg (GET_MODE (op0), op0);
3207 return expand_binop (mode, add_optab, op0, op0,
3208 target, unsignedp, OPTAB_LIB_WIDEN);
3209 }
3210 }
3211 skip_scalar:
3212
3213 /* This used to use umul_optab if unsigned, but for non-widening multiply
3214 there is no difference between signed and unsigned. */
3215 op0 = expand_binop (mode, do_trapv ? smulv_optab : smul_optab,
3216 op0, op1, target, unsignedp, OPTAB_LIB_WIDEN);
3217 gcc_assert (op0);
3218 return op0;
3219 }
3220
3221 /* Return a cost estimate for multiplying a register by the given
3222 COEFFicient in the given MODE and SPEED. */
3223
3224 int
3225 mult_by_coeff_cost (HOST_WIDE_INT coeff, enum machine_mode mode, bool speed)
3226 {
3227 int max_cost;
3228 struct algorithm algorithm;
3229 enum mult_variant variant;
3230
3231 rtx fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3232 max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, fake_reg), speed);
3233 if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost))
3234 return algorithm.cost.cost;
3235 else
3236 return max_cost;
3237 }
3238
3239 /* Perform a widening multiplication and return an rtx for the result.
3240 MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3241 TARGET is a suggestion for where to store the result (an rtx).
3242 THIS_OPTAB is the optab we should use, it must be either umul_widen_optab
3243 or smul_widen_optab.
3244
3245 We check specially for a constant integer as OP1, comparing the
3246 cost of a widening multiply against the cost of a sequence of shifts
3247 and adds. */
3248
3249 rtx
3250 expand_widening_mult (enum machine_mode mode, rtx op0, rtx op1, rtx target,
3251 int unsignedp, optab this_optab)
3252 {
3253 bool speed = optimize_insn_for_speed_p ();
3254 rtx cop1;
3255
3256 if (CONST_INT_P (op1)
3257 && GET_MODE (op0) != VOIDmode
3258 && (cop1 = convert_modes (mode, GET_MODE (op0), op1,
3259 this_optab == umul_widen_optab))
3260 && CONST_INT_P (cop1)
3261 && (INTVAL (cop1) >= 0
3262 || HWI_COMPUTABLE_MODE_P (mode)))
3263 {
3264 HOST_WIDE_INT coeff = INTVAL (cop1);
3265 int max_cost;
3266 enum mult_variant variant;
3267 struct algorithm algorithm;
3268
3269 /* Special case powers of two. */
3270 if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3271 {
3272 op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3273 return expand_shift (LSHIFT_EXPR, mode, op0,
3274 floor_log2 (coeff), target, unsignedp);
3275 }
3276
3277 /* Exclude cost of op0 from max_cost to match the cost
3278 calculation of the synth_mult. */
3279 max_cost = mul_widen_cost (speed, mode);
3280 if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3281 max_cost))
3282 {
3283 op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3284 return expand_mult_const (mode, op0, coeff, target,
3285 &algorithm, variant);
3286 }
3287 }
3288 return expand_binop (mode, this_optab, op0, op1, target,
3289 unsignedp, OPTAB_LIB_WIDEN);
3290 }
3291 \f
3292 /* Choose a minimal N + 1 bit approximation to 1/D that can be used to
3293 replace division by D, and put the least significant N bits of the result
3294 in *MULTIPLIER_PTR and return the most significant bit.
3295
3296 The width of operations is N (should be <= HOST_BITS_PER_WIDE_INT), the
3297 needed precision is in PRECISION (should be <= N).
3298
3299 PRECISION should be as small as possible so this function can choose
3300 multiplier more freely.
3301
3302 The rounded-up logarithm of D is placed in *lgup_ptr. A shift count that
3303 is to be used for a final right shift is placed in *POST_SHIFT_PTR.
3304
3305 Using this function, x/D will be equal to (x * m) >> (*POST_SHIFT_PTR),
3306 where m is the full HOST_BITS_PER_WIDE_INT + 1 bit multiplier. */
3307
3308 unsigned HOST_WIDE_INT
3309 choose_multiplier (unsigned HOST_WIDE_INT d, int n, int precision,
3310 unsigned HOST_WIDE_INT *multiplier_ptr,
3311 int *post_shift_ptr, int *lgup_ptr)
3312 {
3313 double_int mhigh, mlow;
3314 int lgup, post_shift;
3315 int pow, pow2;
3316
3317 /* lgup = ceil(log2(divisor)); */
3318 lgup = ceil_log2 (d);
3319
3320 gcc_assert (lgup <= n);
3321
3322 pow = n + lgup;
3323 pow2 = n + lgup - precision;
3324
3325 /* We could handle this with some effort, but this case is much
3326 better handled directly with a scc insn, so rely on caller using
3327 that. */
3328 gcc_assert (pow != HOST_BITS_PER_DOUBLE_INT);
3329
3330 /* mlow = 2^(N + lgup)/d */
3331 double_int val = double_int_zero.set_bit (pow);
3332 mlow = val.div (double_int::from_uhwi (d), true, TRUNC_DIV_EXPR);
3333
3334 /* mhigh = (2^(N + lgup) + 2^(N + lgup - precision))/d */
3335 val |= double_int_zero.set_bit (pow2);
3336 mhigh = val.div (double_int::from_uhwi (d), true, TRUNC_DIV_EXPR);
3337
3338 gcc_assert (!mhigh.high || val.high - d < d);
3339 gcc_assert (mhigh.high <= 1 && mlow.high <= 1);
3340 /* Assert that mlow < mhigh. */
3341 gcc_assert (mlow.ult (mhigh));
3342
3343 /* If precision == N, then mlow, mhigh exceed 2^N
3344 (but they do not exceed 2^(N+1)). */
3345
3346 /* Reduce to lowest terms. */
3347 for (post_shift = lgup; post_shift > 0; post_shift--)
3348 {
3349 int shft = HOST_BITS_PER_WIDE_INT - 1;
3350 unsigned HOST_WIDE_INT ml_lo = (mlow.high << shft) | (mlow.low >> 1);
3351 unsigned HOST_WIDE_INT mh_lo = (mhigh.high << shft) | (mhigh.low >> 1);
3352 if (ml_lo >= mh_lo)
3353 break;
3354
3355 mlow = double_int::from_uhwi (ml_lo);
3356 mhigh = double_int::from_uhwi (mh_lo);
3357 }
3358
3359 *post_shift_ptr = post_shift;
3360 *lgup_ptr = lgup;
3361 if (n < HOST_BITS_PER_WIDE_INT)
3362 {
3363 unsigned HOST_WIDE_INT mask = ((unsigned HOST_WIDE_INT) 1 << n) - 1;
3364 *multiplier_ptr = mhigh.low & mask;
3365 return mhigh.low >= mask;
3366 }
3367 else
3368 {
3369 *multiplier_ptr = mhigh.low;
3370 return mhigh.high;
3371 }
3372 }
3373
3374 /* Compute the inverse of X mod 2**n, i.e., find Y such that X * Y is
3375 congruent to 1 (mod 2**N). */
3376
3377 static unsigned HOST_WIDE_INT
3378 invert_mod2n (unsigned HOST_WIDE_INT x, int n)
3379 {
3380 /* Solve x*y == 1 (mod 2^n), where x is odd. Return y. */
3381
3382 /* The algorithm notes that the choice y = x satisfies
3383 x*y == 1 mod 2^3, since x is assumed odd.
3384 Each iteration doubles the number of bits of significance in y. */
3385
3386 unsigned HOST_WIDE_INT mask;
3387 unsigned HOST_WIDE_INT y = x;
3388 int nbit = 3;
3389
3390 mask = (n == HOST_BITS_PER_WIDE_INT
3391 ? ~(unsigned HOST_WIDE_INT) 0
3392 : ((unsigned HOST_WIDE_INT) 1 << n) - 1);
3393
3394 while (nbit < n)
3395 {
3396 y = y * (2 - x*y) & mask; /* Modulo 2^N */
3397 nbit *= 2;
3398 }
3399 return y;
3400 }
3401
3402 /* Emit code to adjust ADJ_OPERAND after multiplication of wrong signedness
3403 flavor of OP0 and OP1. ADJ_OPERAND is already the high half of the
3404 product OP0 x OP1. If UNSIGNEDP is nonzero, adjust the signed product
3405 to become unsigned, if UNSIGNEDP is zero, adjust the unsigned product to
3406 become signed.
3407
3408 The result is put in TARGET if that is convenient.
3409
3410 MODE is the mode of operation. */
3411
3412 rtx
3413 expand_mult_highpart_adjust (enum machine_mode mode, rtx adj_operand, rtx op0,
3414 rtx op1, rtx target, int unsignedp)
3415 {
3416 rtx tem;
3417 enum rtx_code adj_code = unsignedp ? PLUS : MINUS;
3418
3419 tem = expand_shift (RSHIFT_EXPR, mode, op0,
3420 GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3421 tem = expand_and (mode, tem, op1, NULL_RTX);
3422 adj_operand
3423 = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3424 adj_operand);
3425
3426 tem = expand_shift (RSHIFT_EXPR, mode, op1,
3427 GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3428 tem = expand_and (mode, tem, op0, NULL_RTX);
3429 target = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3430 target);
3431
3432 return target;
3433 }
3434
3435 /* Subroutine of expmed_mult_highpart. Return the MODE high part of OP. */
3436
3437 static rtx
3438 extract_high_half (enum machine_mode mode, rtx op)
3439 {
3440 enum machine_mode wider_mode;
3441
3442 if (mode == word_mode)
3443 return gen_highpart (mode, op);
3444
3445 gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3446
3447 wider_mode = GET_MODE_WIDER_MODE (mode);
3448 op = expand_shift (RSHIFT_EXPR, wider_mode, op,
3449 GET_MODE_BITSIZE (mode), 0, 1);
3450 return convert_modes (mode, wider_mode, op, 0);
3451 }
3452
3453 /* Like expmed_mult_highpart, but only consider using a multiplication
3454 optab. OP1 is an rtx for the constant operand. */
3455
3456 static rtx
3457 expmed_mult_highpart_optab (enum machine_mode mode, rtx op0, rtx op1,
3458 rtx target, int unsignedp, int max_cost)
3459 {
3460 rtx narrow_op1 = gen_int_mode (INTVAL (op1), mode);
3461 enum machine_mode wider_mode;
3462 optab moptab;
3463 rtx tem;
3464 int size;
3465 bool speed = optimize_insn_for_speed_p ();
3466
3467 gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3468
3469 wider_mode = GET_MODE_WIDER_MODE (mode);
3470 size = GET_MODE_BITSIZE (mode);
3471
3472 /* Firstly, try using a multiplication insn that only generates the needed
3473 high part of the product, and in the sign flavor of unsignedp. */
3474 if (mul_highpart_cost (speed, mode) < max_cost)
3475 {
3476 moptab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
3477 tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3478 unsignedp, OPTAB_DIRECT);
3479 if (tem)
3480 return tem;
3481 }
3482
3483 /* Secondly, same as above, but use sign flavor opposite of unsignedp.
3484 Need to adjust the result after the multiplication. */
3485 if (size - 1 < BITS_PER_WORD
3486 && (mul_highpart_cost (speed, mode)
3487 + 2 * shift_cost (speed, mode, size-1)
3488 + 4 * add_cost (speed, mode) < max_cost))
3489 {
3490 moptab = unsignedp ? smul_highpart_optab : umul_highpart_optab;
3491 tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3492 unsignedp, OPTAB_DIRECT);
3493 if (tem)
3494 /* We used the wrong signedness. Adjust the result. */
3495 return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3496 tem, unsignedp);
3497 }
3498
3499 /* Try widening multiplication. */
3500 moptab = unsignedp ? umul_widen_optab : smul_widen_optab;
3501 if (widening_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3502 && mul_widen_cost (speed, wider_mode) < max_cost)
3503 {
3504 tem = expand_binop (wider_mode, moptab, op0, narrow_op1, 0,
3505 unsignedp, OPTAB_WIDEN);
3506 if (tem)
3507 return extract_high_half (mode, tem);
3508 }
3509
3510 /* Try widening the mode and perform a non-widening multiplication. */
3511 if (optab_handler (smul_optab, wider_mode) != CODE_FOR_nothing
3512 && size - 1 < BITS_PER_WORD
3513 && (mul_cost (speed, wider_mode) + shift_cost (speed, mode, size-1)
3514 < max_cost))
3515 {
3516 rtx insns, wop0, wop1;
3517
3518 /* We need to widen the operands, for example to ensure the
3519 constant multiplier is correctly sign or zero extended.
3520 Use a sequence to clean-up any instructions emitted by
3521 the conversions if things don't work out. */
3522 start_sequence ();
3523 wop0 = convert_modes (wider_mode, mode, op0, unsignedp);
3524 wop1 = convert_modes (wider_mode, mode, op1, unsignedp);
3525 tem = expand_binop (wider_mode, smul_optab, wop0, wop1, 0,
3526 unsignedp, OPTAB_WIDEN);
3527 insns = get_insns ();
3528 end_sequence ();
3529
3530 if (tem)
3531 {
3532 emit_insn (insns);
3533 return extract_high_half (mode, tem);
3534 }
3535 }
3536
3537 /* Try widening multiplication of opposite signedness, and adjust. */
3538 moptab = unsignedp ? smul_widen_optab : umul_widen_optab;
3539 if (widening_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3540 && size - 1 < BITS_PER_WORD
3541 && (mul_widen_cost (speed, wider_mode)
3542 + 2 * shift_cost (speed, mode, size-1)
3543 + 4 * add_cost (speed, mode) < max_cost))
3544 {
3545 tem = expand_binop (wider_mode, moptab, op0, narrow_op1,
3546 NULL_RTX, ! unsignedp, OPTAB_WIDEN);
3547 if (tem != 0)
3548 {
3549 tem = extract_high_half (mode, tem);
3550 /* We used the wrong signedness. Adjust the result. */
3551 return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3552 target, unsignedp);
3553 }
3554 }
3555
3556 return 0;
3557 }
3558
3559 /* Emit code to multiply OP0 and OP1 (where OP1 is an integer constant),
3560 putting the high half of the result in TARGET if that is convenient,
3561 and return where the result is. If the operation can not be performed,
3562 0 is returned.
3563
3564 MODE is the mode of operation and result.
3565
3566 UNSIGNEDP nonzero means unsigned multiply.
3567
3568 MAX_COST is the total allowed cost for the expanded RTL. */
3569
3570 static rtx
3571 expmed_mult_highpart (enum machine_mode mode, rtx op0, rtx op1,
3572 rtx target, int unsignedp, int max_cost)
3573 {
3574 enum machine_mode wider_mode = GET_MODE_WIDER_MODE (mode);
3575 unsigned HOST_WIDE_INT cnst1;
3576 int extra_cost;
3577 bool sign_adjust = false;
3578 enum mult_variant variant;
3579 struct algorithm alg;
3580 rtx tem;
3581 bool speed = optimize_insn_for_speed_p ();
3582
3583 gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3584 /* We can't support modes wider than HOST_BITS_PER_INT. */
3585 gcc_assert (HWI_COMPUTABLE_MODE_P (mode));
3586
3587 cnst1 = INTVAL (op1) & GET_MODE_MASK (mode);
3588
3589 /* We can't optimize modes wider than BITS_PER_WORD.
3590 ??? We might be able to perform double-word arithmetic if
3591 mode == word_mode, however all the cost calculations in
3592 synth_mult etc. assume single-word operations. */
3593 if (GET_MODE_BITSIZE (wider_mode) > BITS_PER_WORD)
3594 return expmed_mult_highpart_optab (mode, op0, op1, target,
3595 unsignedp, max_cost);
3596
3597 extra_cost = shift_cost (speed, mode, GET_MODE_BITSIZE (mode) - 1);
3598
3599 /* Check whether we try to multiply by a negative constant. */
3600 if (!unsignedp && ((cnst1 >> (GET_MODE_BITSIZE (mode) - 1)) & 1))
3601 {
3602 sign_adjust = true;
3603 extra_cost += add_cost (speed, mode);
3604 }
3605
3606 /* See whether shift/add multiplication is cheap enough. */
3607 if (choose_mult_variant (wider_mode, cnst1, &alg, &variant,
3608 max_cost - extra_cost))
3609 {
3610 /* See whether the specialized multiplication optabs are
3611 cheaper than the shift/add version. */
3612 tem = expmed_mult_highpart_optab (mode, op0, op1, target, unsignedp,
3613 alg.cost.cost + extra_cost);
3614 if (tem)
3615 return tem;
3616
3617 tem = convert_to_mode (wider_mode, op0, unsignedp);
3618 tem = expand_mult_const (wider_mode, tem, cnst1, 0, &alg, variant);
3619 tem = extract_high_half (mode, tem);
3620
3621 /* Adjust result for signedness. */
3622 if (sign_adjust)
3623 tem = force_operand (gen_rtx_MINUS (mode, tem, op0), tem);
3624
3625 return tem;
3626 }
3627 return expmed_mult_highpart_optab (mode, op0, op1, target,
3628 unsignedp, max_cost);
3629 }
3630
3631
3632 /* Expand signed modulus of OP0 by a power of two D in mode MODE. */
3633
3634 static rtx
3635 expand_smod_pow2 (enum machine_mode mode, rtx op0, HOST_WIDE_INT d)
3636 {
3637 unsigned HOST_WIDE_INT masklow, maskhigh;
3638 rtx result, temp, shift, label;
3639 int logd;
3640
3641 logd = floor_log2 (d);
3642 result = gen_reg_rtx (mode);
3643
3644 /* Avoid conditional branches when they're expensive. */
3645 if (BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2
3646 && optimize_insn_for_speed_p ())
3647 {
3648 rtx signmask = emit_store_flag (result, LT, op0, const0_rtx,
3649 mode, 0, -1);
3650 if (signmask)
3651 {
3652 signmask = force_reg (mode, signmask);
3653 masklow = ((HOST_WIDE_INT) 1 << logd) - 1;
3654 shift = GEN_INT (GET_MODE_BITSIZE (mode) - logd);
3655
3656 /* Use the rtx_cost of a LSHIFTRT instruction to determine
3657 which instruction sequence to use. If logical right shifts
3658 are expensive the use 2 XORs, 2 SUBs and an AND, otherwise
3659 use a LSHIFTRT, 1 ADD, 1 SUB and an AND. */
3660
3661 temp = gen_rtx_LSHIFTRT (mode, result, shift);
3662 if (optab_handler (lshr_optab, mode) == CODE_FOR_nothing
3663 || (set_src_cost (temp, optimize_insn_for_speed_p ())
3664 > COSTS_N_INSNS (2)))
3665 {
3666 temp = expand_binop (mode, xor_optab, op0, signmask,
3667 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3668 temp = expand_binop (mode, sub_optab, temp, signmask,
3669 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3670 temp = expand_binop (mode, and_optab, temp, GEN_INT (masklow),
3671 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3672 temp = expand_binop (mode, xor_optab, temp, signmask,
3673 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3674 temp = expand_binop (mode, sub_optab, temp, signmask,
3675 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3676 }
3677 else
3678 {
3679 signmask = expand_binop (mode, lshr_optab, signmask, shift,
3680 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3681 signmask = force_reg (mode, signmask);
3682
3683 temp = expand_binop (mode, add_optab, op0, signmask,
3684 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3685 temp = expand_binop (mode, and_optab, temp, GEN_INT (masklow),
3686 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3687 temp = expand_binop (mode, sub_optab, temp, signmask,
3688 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3689 }
3690 return temp;
3691 }
3692 }
3693
3694 /* Mask contains the mode's signbit and the significant bits of the
3695 modulus. By including the signbit in the operation, many targets
3696 can avoid an explicit compare operation in the following comparison
3697 against zero. */
3698
3699 masklow = ((HOST_WIDE_INT) 1 << logd) - 1;
3700 if (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
3701 {
3702 masklow |= (HOST_WIDE_INT) -1 << (GET_MODE_BITSIZE (mode) - 1);
3703 maskhigh = -1;
3704 }
3705 else
3706 maskhigh = (HOST_WIDE_INT) -1
3707 << (GET_MODE_BITSIZE (mode) - HOST_BITS_PER_WIDE_INT - 1);
3708
3709 temp = expand_binop (mode, and_optab, op0,
3710 immed_double_const (masklow, maskhigh, mode),
3711 result, 1, OPTAB_LIB_WIDEN);
3712 if (temp != result)
3713 emit_move_insn (result, temp);
3714
3715 label = gen_label_rtx ();
3716 do_cmp_and_jump (result, const0_rtx, GE, mode, label);
3717
3718 temp = expand_binop (mode, sub_optab, result, const1_rtx, result,
3719 0, OPTAB_LIB_WIDEN);
3720 masklow = (HOST_WIDE_INT) -1 << logd;
3721 maskhigh = -1;
3722 temp = expand_binop (mode, ior_optab, temp,
3723 immed_double_const (masklow, maskhigh, mode),
3724 result, 1, OPTAB_LIB_WIDEN);
3725 temp = expand_binop (mode, add_optab, temp, const1_rtx, result,
3726 0, OPTAB_LIB_WIDEN);
3727 if (temp != result)
3728 emit_move_insn (result, temp);
3729 emit_label (label);
3730 return result;
3731 }
3732
3733 /* Expand signed division of OP0 by a power of two D in mode MODE.
3734 This routine is only called for positive values of D. */
3735
3736 static rtx
3737 expand_sdiv_pow2 (enum machine_mode mode, rtx op0, HOST_WIDE_INT d)
3738 {
3739 rtx temp, label;
3740 int logd;
3741
3742 logd = floor_log2 (d);
3743
3744 if (d == 2
3745 && BRANCH_COST (optimize_insn_for_speed_p (),
3746 false) >= 1)
3747 {
3748 temp = gen_reg_rtx (mode);
3749 temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, 1);
3750 temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3751 0, OPTAB_LIB_WIDEN);
3752 return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3753 }
3754
3755 #ifdef HAVE_conditional_move
3756 if (BRANCH_COST (optimize_insn_for_speed_p (), false)
3757 >= 2)
3758 {
3759 rtx temp2;
3760
3761 /* ??? emit_conditional_move forces a stack adjustment via
3762 compare_from_rtx so, if the sequence is discarded, it will
3763 be lost. Do it now instead. */
3764 do_pending_stack_adjust ();
3765
3766 start_sequence ();
3767 temp2 = copy_to_mode_reg (mode, op0);
3768 temp = expand_binop (mode, add_optab, temp2, GEN_INT (d-1),
3769 NULL_RTX, 0, OPTAB_LIB_WIDEN);
3770 temp = force_reg (mode, temp);
3771
3772 /* Construct "temp2 = (temp2 < 0) ? temp : temp2". */
3773 temp2 = emit_conditional_move (temp2, LT, temp2, const0_rtx,
3774 mode, temp, temp2, mode, 0);
3775 if (temp2)
3776 {
3777 rtx seq = get_insns ();
3778 end_sequence ();
3779 emit_insn (seq);
3780 return expand_shift (RSHIFT_EXPR, mode, temp2, logd, NULL_RTX, 0);
3781 }
3782 end_sequence ();
3783 }
3784 #endif
3785
3786 if (BRANCH_COST (optimize_insn_for_speed_p (),
3787 false) >= 2)
3788 {
3789 int ushift = GET_MODE_BITSIZE (mode) - logd;
3790
3791 temp = gen_reg_rtx (mode);
3792 temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, -1);
3793 if (shift_cost (optimize_insn_for_speed_p (), mode, ushift)
3794 > COSTS_N_INSNS (1))
3795 temp = expand_binop (mode, and_optab, temp, GEN_INT (d - 1),
3796 NULL_RTX, 0, OPTAB_LIB_WIDEN);
3797 else
3798 temp = expand_shift (RSHIFT_EXPR, mode, temp,
3799 ushift, NULL_RTX, 1);
3800 temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3801 0, OPTAB_LIB_WIDEN);
3802 return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3803 }
3804
3805 label = gen_label_rtx ();
3806 temp = copy_to_mode_reg (mode, op0);
3807 do_cmp_and_jump (temp, const0_rtx, GE, mode, label);
3808 expand_inc (temp, GEN_INT (d - 1));
3809 emit_label (label);
3810 return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3811 }
3812 \f
3813 /* Emit the code to divide OP0 by OP1, putting the result in TARGET
3814 if that is convenient, and returning where the result is.
3815 You may request either the quotient or the remainder as the result;
3816 specify REM_FLAG nonzero to get the remainder.
3817
3818 CODE is the expression code for which kind of division this is;
3819 it controls how rounding is done. MODE is the machine mode to use.
3820 UNSIGNEDP nonzero means do unsigned division. */
3821
3822 /* ??? For CEIL_MOD_EXPR, can compute incorrect remainder with ANDI
3823 and then correct it by or'ing in missing high bits
3824 if result of ANDI is nonzero.
3825 For ROUND_MOD_EXPR, can use ANDI and then sign-extend the result.
3826 This could optimize to a bfexts instruction.
3827 But C doesn't use these operations, so their optimizations are
3828 left for later. */
3829 /* ??? For modulo, we don't actually need the highpart of the first product,
3830 the low part will do nicely. And for small divisors, the second multiply
3831 can also be a low-part only multiply or even be completely left out.
3832 E.g. to calculate the remainder of a division by 3 with a 32 bit
3833 multiply, multiply with 0x55555556 and extract the upper two bits;
3834 the result is exact for inputs up to 0x1fffffff.
3835 The input range can be reduced by using cross-sum rules.
3836 For odd divisors >= 3, the following table gives right shift counts
3837 so that if a number is shifted by an integer multiple of the given
3838 amount, the remainder stays the same:
3839 2, 4, 3, 6, 10, 12, 4, 8, 18, 6, 11, 20, 18, 0, 5, 10, 12, 0, 12, 20,
3840 14, 12, 23, 21, 8, 0, 20, 18, 0, 0, 6, 12, 0, 22, 0, 18, 20, 30, 0, 0,
3841 0, 8, 0, 11, 12, 10, 36, 0, 30, 0, 0, 12, 0, 0, 0, 0, 44, 12, 24, 0,
3842 20, 0, 7, 14, 0, 18, 36, 0, 0, 46, 60, 0, 42, 0, 15, 24, 20, 0, 0, 33,
3843 0, 20, 0, 0, 18, 0, 60, 0, 0, 0, 0, 0, 40, 18, 0, 0, 12
3844
3845 Cross-sum rules for even numbers can be derived by leaving as many bits
3846 to the right alone as the divisor has zeros to the right.
3847 E.g. if x is an unsigned 32 bit number:
3848 (x mod 12) == (((x & 1023) + ((x >> 8) & ~3)) * 0x15555558 >> 2 * 3) >> 28
3849 */
3850
3851 rtx
3852 expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode,
3853 rtx op0, rtx op1, rtx target, int unsignedp)
3854 {
3855 enum machine_mode compute_mode;
3856 rtx tquotient;
3857 rtx quotient = 0, remainder = 0;
3858 rtx last;
3859 int size;
3860 rtx insn;
3861 optab optab1, optab2;
3862 int op1_is_constant, op1_is_pow2 = 0;
3863 int max_cost, extra_cost;
3864 static HOST_WIDE_INT last_div_const = 0;
3865 static HOST_WIDE_INT ext_op1;
3866 bool speed = optimize_insn_for_speed_p ();
3867
3868 op1_is_constant = CONST_INT_P (op1);
3869 if (op1_is_constant)
3870 {
3871 ext_op1 = INTVAL (op1);
3872 if (unsignedp)
3873 ext_op1 &= GET_MODE_MASK (mode);
3874 op1_is_pow2 = ((EXACT_POWER_OF_2_OR_ZERO_P (ext_op1)
3875 || (! unsignedp && EXACT_POWER_OF_2_OR_ZERO_P (-ext_op1))));
3876 }
3877
3878 /*
3879 This is the structure of expand_divmod:
3880
3881 First comes code to fix up the operands so we can perform the operations
3882 correctly and efficiently.
3883
3884 Second comes a switch statement with code specific for each rounding mode.
3885 For some special operands this code emits all RTL for the desired
3886 operation, for other cases, it generates only a quotient and stores it in
3887 QUOTIENT. The case for trunc division/remainder might leave quotient = 0,
3888 to indicate that it has not done anything.
3889
3890 Last comes code that finishes the operation. If QUOTIENT is set and
3891 REM_FLAG is set, the remainder is computed as OP0 - QUOTIENT * OP1. If
3892 QUOTIENT is not set, it is computed using trunc rounding.
3893
3894 We try to generate special code for division and remainder when OP1 is a
3895 constant. If |OP1| = 2**n we can use shifts and some other fast
3896 operations. For other values of OP1, we compute a carefully selected
3897 fixed-point approximation m = 1/OP1, and generate code that multiplies OP0
3898 by m.
3899
3900 In all cases but EXACT_DIV_EXPR, this multiplication requires the upper
3901 half of the product. Different strategies for generating the product are
3902 implemented in expmed_mult_highpart.
3903
3904 If what we actually want is the remainder, we generate that by another
3905 by-constant multiplication and a subtraction. */
3906
3907 /* We shouldn't be called with OP1 == const1_rtx, but some of the
3908 code below will malfunction if we are, so check here and handle
3909 the special case if so. */
3910 if (op1 == const1_rtx)
3911 return rem_flag ? const0_rtx : op0;
3912
3913 /* When dividing by -1, we could get an overflow.
3914 negv_optab can handle overflows. */
3915 if (! unsignedp && op1 == constm1_rtx)
3916 {
3917 if (rem_flag)
3918 return const0_rtx;
3919 return expand_unop (mode, flag_trapv && GET_MODE_CLASS(mode) == MODE_INT
3920 ? negv_optab : neg_optab, op0, target, 0);
3921 }
3922
3923 if (target
3924 /* Don't use the function value register as a target
3925 since we have to read it as well as write it,
3926 and function-inlining gets confused by this. */
3927 && ((REG_P (target) && REG_FUNCTION_VALUE_P (target))
3928 /* Don't clobber an operand while doing a multi-step calculation. */
3929 || ((rem_flag || op1_is_constant)
3930 && (reg_mentioned_p (target, op0)
3931 || (MEM_P (op0) && MEM_P (target))))
3932 || reg_mentioned_p (target, op1)
3933 || (MEM_P (op1) && MEM_P (target))))
3934 target = 0;
3935
3936 /* Get the mode in which to perform this computation. Normally it will
3937 be MODE, but sometimes we can't do the desired operation in MODE.
3938 If so, pick a wider mode in which we can do the operation. Convert
3939 to that mode at the start to avoid repeated conversions.
3940
3941 First see what operations we need. These depend on the expression
3942 we are evaluating. (We assume that divxx3 insns exist under the
3943 same conditions that modxx3 insns and that these insns don't normally
3944 fail. If these assumptions are not correct, we may generate less
3945 efficient code in some cases.)
3946
3947 Then see if we find a mode in which we can open-code that operation
3948 (either a division, modulus, or shift). Finally, check for the smallest
3949 mode for which we can do the operation with a library call. */
3950
3951 /* We might want to refine this now that we have division-by-constant
3952 optimization. Since expmed_mult_highpart tries so many variants, it is
3953 not straightforward to generalize this. Maybe we should make an array
3954 of possible modes in init_expmed? Save this for GCC 2.7. */
3955
3956 optab1 = ((op1_is_pow2 && op1 != const0_rtx)
3957 ? (unsignedp ? lshr_optab : ashr_optab)
3958 : (unsignedp ? udiv_optab : sdiv_optab));
3959 optab2 = ((op1_is_pow2 && op1 != const0_rtx)
3960 ? optab1
3961 : (unsignedp ? udivmod_optab : sdivmod_optab));
3962
3963 for (compute_mode = mode; compute_mode != VOIDmode;
3964 compute_mode = GET_MODE_WIDER_MODE (compute_mode))
3965 if (optab_handler (optab1, compute_mode) != CODE_FOR_nothing
3966 || optab_handler (optab2, compute_mode) != CODE_FOR_nothing)
3967 break;
3968
3969 if (compute_mode == VOIDmode)
3970 for (compute_mode = mode; compute_mode != VOIDmode;
3971 compute_mode = GET_MODE_WIDER_MODE (compute_mode))
3972 if (optab_libfunc (optab1, compute_mode)
3973 || optab_libfunc (optab2, compute_mode))
3974 break;
3975
3976 /* If we still couldn't find a mode, use MODE, but expand_binop will
3977 probably die. */
3978 if (compute_mode == VOIDmode)
3979 compute_mode = mode;
3980
3981 if (target && GET_MODE (target) == compute_mode)
3982 tquotient = target;
3983 else
3984 tquotient = gen_reg_rtx (compute_mode);
3985
3986 size = GET_MODE_BITSIZE (compute_mode);
3987 #if 0
3988 /* It should be possible to restrict the precision to GET_MODE_BITSIZE
3989 (mode), and thereby get better code when OP1 is a constant. Do that
3990 later. It will require going over all usages of SIZE below. */
3991 size = GET_MODE_BITSIZE (mode);
3992 #endif
3993
3994 /* Only deduct something for a REM if the last divide done was
3995 for a different constant. Then set the constant of the last
3996 divide. */
3997 max_cost = (unsignedp
3998 ? udiv_cost (speed, compute_mode)
3999 : sdiv_cost (speed, compute_mode));
4000 if (rem_flag && ! (last_div_const != 0 && op1_is_constant
4001 && INTVAL (op1) == last_div_const))
4002 max_cost -= (mul_cost (speed, compute_mode)
4003 + add_cost (speed, compute_mode));
4004
4005 last_div_const = ! rem_flag && op1_is_constant ? INTVAL (op1) : 0;
4006
4007 /* Now convert to the best mode to use. */
4008 if (compute_mode != mode)
4009 {
4010 op0 = convert_modes (compute_mode, mode, op0, unsignedp);
4011 op1 = convert_modes (compute_mode, mode, op1, unsignedp);
4012
4013 /* convert_modes may have placed op1 into a register, so we
4014 must recompute the following. */
4015 op1_is_constant = CONST_INT_P (op1);
4016 op1_is_pow2 = (op1_is_constant
4017 && ((EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4018 || (! unsignedp
4019 && EXACT_POWER_OF_2_OR_ZERO_P (-INTVAL (op1)))))) ;
4020 }
4021
4022 /* If one of the operands is a volatile MEM, copy it into a register. */
4023
4024 if (MEM_P (op0) && MEM_VOLATILE_P (op0))
4025 op0 = force_reg (compute_mode, op0);
4026 if (MEM_P (op1) && MEM_VOLATILE_P (op1))
4027 op1 = force_reg (compute_mode, op1);
4028
4029 /* If we need the remainder or if OP1 is constant, we need to
4030 put OP0 in a register in case it has any queued subexpressions. */
4031 if (rem_flag || op1_is_constant)
4032 op0 = force_reg (compute_mode, op0);
4033
4034 last = get_last_insn ();
4035
4036 /* Promote floor rounding to trunc rounding for unsigned operations. */
4037 if (unsignedp)
4038 {
4039 if (code == FLOOR_DIV_EXPR)
4040 code = TRUNC_DIV_EXPR;
4041 if (code == FLOOR_MOD_EXPR)
4042 code = TRUNC_MOD_EXPR;
4043 if (code == EXACT_DIV_EXPR && op1_is_pow2)
4044 code = TRUNC_DIV_EXPR;
4045 }
4046
4047 if (op1 != const0_rtx)
4048 switch (code)
4049 {
4050 case TRUNC_MOD_EXPR:
4051 case TRUNC_DIV_EXPR:
4052 if (op1_is_constant)
4053 {
4054 if (unsignedp)
4055 {
4056 unsigned HOST_WIDE_INT mh, ml;
4057 int pre_shift, post_shift;
4058 int dummy;
4059 unsigned HOST_WIDE_INT d = (INTVAL (op1)
4060 & GET_MODE_MASK (compute_mode));
4061
4062 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4063 {
4064 pre_shift = floor_log2 (d);
4065 if (rem_flag)
4066 {
4067 remainder
4068 = expand_binop (compute_mode, and_optab, op0,
4069 GEN_INT (((HOST_WIDE_INT) 1 << pre_shift) - 1),
4070 remainder, 1,
4071 OPTAB_LIB_WIDEN);
4072 if (remainder)
4073 return gen_lowpart (mode, remainder);
4074 }
4075 quotient = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4076 pre_shift, tquotient, 1);
4077 }
4078 else if (size <= HOST_BITS_PER_WIDE_INT)
4079 {
4080 if (d >= ((unsigned HOST_WIDE_INT) 1 << (size - 1)))
4081 {
4082 /* Most significant bit of divisor is set; emit an scc
4083 insn. */
4084 quotient = emit_store_flag_force (tquotient, GEU, op0, op1,
4085 compute_mode, 1, 1);
4086 }
4087 else
4088 {
4089 /* Find a suitable multiplier and right shift count
4090 instead of multiplying with D. */
4091
4092 mh = choose_multiplier (d, size, size,
4093 &ml, &post_shift, &dummy);
4094
4095 /* If the suggested multiplier is more than SIZE bits,
4096 we can do better for even divisors, using an
4097 initial right shift. */
4098 if (mh != 0 && (d & 1) == 0)
4099 {
4100 pre_shift = floor_log2 (d & -d);
4101 mh = choose_multiplier (d >> pre_shift, size,
4102 size - pre_shift,
4103 &ml, &post_shift, &dummy);
4104 gcc_assert (!mh);
4105 }
4106 else
4107 pre_shift = 0;
4108
4109 if (mh != 0)
4110 {
4111 rtx t1, t2, t3, t4;
4112
4113 if (post_shift - 1 >= BITS_PER_WORD)
4114 goto fail1;
4115
4116 extra_cost
4117 = (shift_cost (speed, compute_mode, post_shift - 1)
4118 + shift_cost (speed, compute_mode, 1)
4119 + 2 * add_cost (speed, compute_mode));
4120 t1 = expmed_mult_highpart (compute_mode, op0,
4121 GEN_INT (ml),
4122 NULL_RTX, 1,
4123 max_cost - extra_cost);
4124 if (t1 == 0)
4125 goto fail1;
4126 t2 = force_operand (gen_rtx_MINUS (compute_mode,
4127 op0, t1),
4128 NULL_RTX);
4129 t3 = expand_shift (RSHIFT_EXPR, compute_mode,
4130 t2, 1, NULL_RTX, 1);
4131 t4 = force_operand (gen_rtx_PLUS (compute_mode,
4132 t1, t3),
4133 NULL_RTX);
4134 quotient = expand_shift
4135 (RSHIFT_EXPR, compute_mode, t4,
4136 post_shift - 1, tquotient, 1);
4137 }
4138 else
4139 {
4140 rtx t1, t2;
4141
4142 if (pre_shift >= BITS_PER_WORD
4143 || post_shift >= BITS_PER_WORD)
4144 goto fail1;
4145
4146 t1 = expand_shift
4147 (RSHIFT_EXPR, compute_mode, op0,
4148 pre_shift, NULL_RTX, 1);
4149 extra_cost
4150 = (shift_cost (speed, compute_mode, pre_shift)
4151 + shift_cost (speed, compute_mode, post_shift));
4152 t2 = expmed_mult_highpart (compute_mode, t1,
4153 GEN_INT (ml),
4154 NULL_RTX, 1,
4155 max_cost - extra_cost);
4156 if (t2 == 0)
4157 goto fail1;
4158 quotient = expand_shift
4159 (RSHIFT_EXPR, compute_mode, t2,
4160 post_shift, tquotient, 1);
4161 }
4162 }
4163 }
4164 else /* Too wide mode to use tricky code */
4165 break;
4166
4167 insn = get_last_insn ();
4168 if (insn != last)
4169 set_dst_reg_note (insn, REG_EQUAL,
4170 gen_rtx_UDIV (compute_mode, op0, op1),
4171 quotient);
4172 }
4173 else /* TRUNC_DIV, signed */
4174 {
4175 unsigned HOST_WIDE_INT ml;
4176 int lgup, post_shift;
4177 rtx mlr;
4178 HOST_WIDE_INT d = INTVAL (op1);
4179 unsigned HOST_WIDE_INT abs_d;
4180
4181 /* Since d might be INT_MIN, we have to cast to
4182 unsigned HOST_WIDE_INT before negating to avoid
4183 undefined signed overflow. */
4184 abs_d = (d >= 0
4185 ? (unsigned HOST_WIDE_INT) d
4186 : - (unsigned HOST_WIDE_INT) d);
4187
4188 /* n rem d = n rem -d */
4189 if (rem_flag && d < 0)
4190 {
4191 d = abs_d;
4192 op1 = gen_int_mode (abs_d, compute_mode);
4193 }
4194
4195 if (d == 1)
4196 quotient = op0;
4197 else if (d == -1)
4198 quotient = expand_unop (compute_mode, neg_optab, op0,
4199 tquotient, 0);
4200 else if (HOST_BITS_PER_WIDE_INT >= size
4201 && abs_d == (unsigned HOST_WIDE_INT) 1 << (size - 1))
4202 {
4203 /* This case is not handled correctly below. */
4204 quotient = emit_store_flag (tquotient, EQ, op0, op1,
4205 compute_mode, 1, 1);
4206 if (quotient == 0)
4207 goto fail1;
4208 }
4209 else if (EXACT_POWER_OF_2_OR_ZERO_P (d)
4210 && (rem_flag
4211 ? smod_pow2_cheap (speed, compute_mode)
4212 : sdiv_pow2_cheap (speed, compute_mode))
4213 /* We assume that cheap metric is true if the
4214 optab has an expander for this mode. */
4215 && ((optab_handler ((rem_flag ? smod_optab
4216 : sdiv_optab),
4217 compute_mode)
4218 != CODE_FOR_nothing)
4219 || (optab_handler (sdivmod_optab,
4220 compute_mode)
4221 != CODE_FOR_nothing)))
4222 ;
4223 else if (EXACT_POWER_OF_2_OR_ZERO_P (abs_d))
4224 {
4225 if (rem_flag)
4226 {
4227 remainder = expand_smod_pow2 (compute_mode, op0, d);
4228 if (remainder)
4229 return gen_lowpart (mode, remainder);
4230 }
4231
4232 if (sdiv_pow2_cheap (speed, compute_mode)
4233 && ((optab_handler (sdiv_optab, compute_mode)
4234 != CODE_FOR_nothing)
4235 || (optab_handler (sdivmod_optab, compute_mode)
4236 != CODE_FOR_nothing)))
4237 quotient = expand_divmod (0, TRUNC_DIV_EXPR,
4238 compute_mode, op0,
4239 gen_int_mode (abs_d,
4240 compute_mode),
4241 NULL_RTX, 0);
4242 else
4243 quotient = expand_sdiv_pow2 (compute_mode, op0, abs_d);
4244
4245 /* We have computed OP0 / abs(OP1). If OP1 is negative,
4246 negate the quotient. */
4247 if (d < 0)
4248 {
4249 insn = get_last_insn ();
4250 if (insn != last
4251 && abs_d < ((unsigned HOST_WIDE_INT) 1
4252 << (HOST_BITS_PER_WIDE_INT - 1)))
4253 set_dst_reg_note (insn, REG_EQUAL,
4254 gen_rtx_DIV (compute_mode, op0,
4255 gen_int_mode
4256 (abs_d,
4257 compute_mode)),
4258 quotient);
4259
4260 quotient = expand_unop (compute_mode, neg_optab,
4261 quotient, quotient, 0);
4262 }
4263 }
4264 else if (size <= HOST_BITS_PER_WIDE_INT)
4265 {
4266 choose_multiplier (abs_d, size, size - 1,
4267 &ml, &post_shift, &lgup);
4268 if (ml < (unsigned HOST_WIDE_INT) 1 << (size - 1))
4269 {
4270 rtx t1, t2, t3;
4271
4272 if (post_shift >= BITS_PER_WORD
4273 || size - 1 >= BITS_PER_WORD)
4274 goto fail1;
4275
4276 extra_cost = (shift_cost (speed, compute_mode, post_shift)
4277 + shift_cost (speed, compute_mode, size - 1)
4278 + add_cost (speed, compute_mode));
4279 t1 = expmed_mult_highpart (compute_mode, op0,
4280 GEN_INT (ml), NULL_RTX, 0,
4281 max_cost - extra_cost);
4282 if (t1 == 0)
4283 goto fail1;
4284 t2 = expand_shift
4285 (RSHIFT_EXPR, compute_mode, t1,
4286 post_shift, NULL_RTX, 0);
4287 t3 = expand_shift
4288 (RSHIFT_EXPR, compute_mode, op0,
4289 size - 1, NULL_RTX, 0);
4290 if (d < 0)
4291 quotient
4292 = force_operand (gen_rtx_MINUS (compute_mode,
4293 t3, t2),
4294 tquotient);
4295 else
4296 quotient
4297 = force_operand (gen_rtx_MINUS (compute_mode,
4298 t2, t3),
4299 tquotient);
4300 }
4301 else
4302 {
4303 rtx t1, t2, t3, t4;
4304
4305 if (post_shift >= BITS_PER_WORD
4306 || size - 1 >= BITS_PER_WORD)
4307 goto fail1;
4308
4309 ml |= (~(unsigned HOST_WIDE_INT) 0) << (size - 1);
4310 mlr = gen_int_mode (ml, compute_mode);
4311 extra_cost = (shift_cost (speed, compute_mode, post_shift)
4312 + shift_cost (speed, compute_mode, size - 1)
4313 + 2 * add_cost (speed, compute_mode));
4314 t1 = expmed_mult_highpart (compute_mode, op0, mlr,
4315 NULL_RTX, 0,
4316 max_cost - extra_cost);
4317 if (t1 == 0)
4318 goto fail1;
4319 t2 = force_operand (gen_rtx_PLUS (compute_mode,
4320 t1, op0),
4321 NULL_RTX);
4322 t3 = expand_shift
4323 (RSHIFT_EXPR, compute_mode, t2,
4324 post_shift, NULL_RTX, 0);
4325 t4 = expand_shift
4326 (RSHIFT_EXPR, compute_mode, op0,
4327 size - 1, NULL_RTX, 0);
4328 if (d < 0)
4329 quotient
4330 = force_operand (gen_rtx_MINUS (compute_mode,
4331 t4, t3),
4332 tquotient);
4333 else
4334 quotient
4335 = force_operand (gen_rtx_MINUS (compute_mode,
4336 t3, t4),
4337 tquotient);
4338 }
4339 }
4340 else /* Too wide mode to use tricky code */
4341 break;
4342
4343 insn = get_last_insn ();
4344 if (insn != last)
4345 set_dst_reg_note (insn, REG_EQUAL,
4346 gen_rtx_DIV (compute_mode, op0, op1),
4347 quotient);
4348 }
4349 break;
4350 }
4351 fail1:
4352 delete_insns_since (last);
4353 break;
4354
4355 case FLOOR_DIV_EXPR:
4356 case FLOOR_MOD_EXPR:
4357 /* We will come here only for signed operations. */
4358 if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4359 {
4360 unsigned HOST_WIDE_INT mh, ml;
4361 int pre_shift, lgup, post_shift;
4362 HOST_WIDE_INT d = INTVAL (op1);
4363
4364 if (d > 0)
4365 {
4366 /* We could just as easily deal with negative constants here,
4367 but it does not seem worth the trouble for GCC 2.6. */
4368 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4369 {
4370 pre_shift = floor_log2 (d);
4371 if (rem_flag)
4372 {
4373 remainder = expand_binop (compute_mode, and_optab, op0,
4374 GEN_INT (((HOST_WIDE_INT) 1 << pre_shift) - 1),
4375 remainder, 0, OPTAB_LIB_WIDEN);
4376 if (remainder)
4377 return gen_lowpart (mode, remainder);
4378 }
4379 quotient = expand_shift
4380 (RSHIFT_EXPR, compute_mode, op0,
4381 pre_shift, tquotient, 0);
4382 }
4383 else
4384 {
4385 rtx t1, t2, t3, t4;
4386
4387 mh = choose_multiplier (d, size, size - 1,
4388 &ml, &post_shift, &lgup);
4389 gcc_assert (!mh);
4390
4391 if (post_shift < BITS_PER_WORD
4392 && size - 1 < BITS_PER_WORD)
4393 {
4394 t1 = expand_shift
4395 (RSHIFT_EXPR, compute_mode, op0,
4396 size - 1, NULL_RTX, 0);
4397 t2 = expand_binop (compute_mode, xor_optab, op0, t1,
4398 NULL_RTX, 0, OPTAB_WIDEN);
4399 extra_cost = (shift_cost (speed, compute_mode, post_shift)
4400 + shift_cost (speed, compute_mode, size - 1)
4401 + 2 * add_cost (speed, compute_mode));
4402 t3 = expmed_mult_highpart (compute_mode, t2,
4403 GEN_INT (ml), NULL_RTX, 1,
4404 max_cost - extra_cost);
4405 if (t3 != 0)
4406 {
4407 t4 = expand_shift
4408 (RSHIFT_EXPR, compute_mode, t3,
4409 post_shift, NULL_RTX, 1);
4410 quotient = expand_binop (compute_mode, xor_optab,
4411 t4, t1, tquotient, 0,
4412 OPTAB_WIDEN);
4413 }
4414 }
4415 }
4416 }
4417 else
4418 {
4419 rtx nsign, t1, t2, t3, t4;
4420 t1 = force_operand (gen_rtx_PLUS (compute_mode,
4421 op0, constm1_rtx), NULL_RTX);
4422 t2 = expand_binop (compute_mode, ior_optab, op0, t1, NULL_RTX,
4423 0, OPTAB_WIDEN);
4424 nsign = expand_shift
4425 (RSHIFT_EXPR, compute_mode, t2,
4426 size - 1, NULL_RTX, 0);
4427 t3 = force_operand (gen_rtx_MINUS (compute_mode, t1, nsign),
4428 NULL_RTX);
4429 t4 = expand_divmod (0, TRUNC_DIV_EXPR, compute_mode, t3, op1,
4430 NULL_RTX, 0);
4431 if (t4)
4432 {
4433 rtx t5;
4434 t5 = expand_unop (compute_mode, one_cmpl_optab, nsign,
4435 NULL_RTX, 0);
4436 quotient = force_operand (gen_rtx_PLUS (compute_mode,
4437 t4, t5),
4438 tquotient);
4439 }
4440 }
4441 }
4442
4443 if (quotient != 0)
4444 break;
4445 delete_insns_since (last);
4446
4447 /* Try using an instruction that produces both the quotient and
4448 remainder, using truncation. We can easily compensate the quotient
4449 or remainder to get floor rounding, once we have the remainder.
4450 Notice that we compute also the final remainder value here,
4451 and return the result right away. */
4452 if (target == 0 || GET_MODE (target) != compute_mode)
4453 target = gen_reg_rtx (compute_mode);
4454
4455 if (rem_flag)
4456 {
4457 remainder
4458 = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4459 quotient = gen_reg_rtx (compute_mode);
4460 }
4461 else
4462 {
4463 quotient
4464 = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4465 remainder = gen_reg_rtx (compute_mode);
4466 }
4467
4468 if (expand_twoval_binop (sdivmod_optab, op0, op1,
4469 quotient, remainder, 0))
4470 {
4471 /* This could be computed with a branch-less sequence.
4472 Save that for later. */
4473 rtx tem;
4474 rtx label = gen_label_rtx ();
4475 do_cmp_and_jump (remainder, const0_rtx, EQ, compute_mode, label);
4476 tem = expand_binop (compute_mode, xor_optab, op0, op1,
4477 NULL_RTX, 0, OPTAB_WIDEN);
4478 do_cmp_and_jump (tem, const0_rtx, GE, compute_mode, label);
4479 expand_dec (quotient, const1_rtx);
4480 expand_inc (remainder, op1);
4481 emit_label (label);
4482 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4483 }
4484
4485 /* No luck with division elimination or divmod. Have to do it
4486 by conditionally adjusting op0 *and* the result. */
4487 {
4488 rtx label1, label2, label3, label4, label5;
4489 rtx adjusted_op0;
4490 rtx tem;
4491
4492 quotient = gen_reg_rtx (compute_mode);
4493 adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4494 label1 = gen_label_rtx ();
4495 label2 = gen_label_rtx ();
4496 label3 = gen_label_rtx ();
4497 label4 = gen_label_rtx ();
4498 label5 = gen_label_rtx ();
4499 do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4500 do_cmp_and_jump (adjusted_op0, const0_rtx, LT, compute_mode, label1);
4501 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4502 quotient, 0, OPTAB_LIB_WIDEN);
4503 if (tem != quotient)
4504 emit_move_insn (quotient, tem);
4505 emit_jump_insn (gen_jump (label5));
4506 emit_barrier ();
4507 emit_label (label1);
4508 expand_inc (adjusted_op0, const1_rtx);
4509 emit_jump_insn (gen_jump (label4));
4510 emit_barrier ();
4511 emit_label (label2);
4512 do_cmp_and_jump (adjusted_op0, const0_rtx, GT, compute_mode, label3);
4513 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4514 quotient, 0, OPTAB_LIB_WIDEN);
4515 if (tem != quotient)
4516 emit_move_insn (quotient, tem);
4517 emit_jump_insn (gen_jump (label5));
4518 emit_barrier ();
4519 emit_label (label3);
4520 expand_dec (adjusted_op0, const1_rtx);
4521 emit_label (label4);
4522 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4523 quotient, 0, OPTAB_LIB_WIDEN);
4524 if (tem != quotient)
4525 emit_move_insn (quotient, tem);
4526 expand_dec (quotient, const1_rtx);
4527 emit_label (label5);
4528 }
4529 break;
4530
4531 case CEIL_DIV_EXPR:
4532 case CEIL_MOD_EXPR:
4533 if (unsignedp)
4534 {
4535 if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1)))
4536 {
4537 rtx t1, t2, t3;
4538 unsigned HOST_WIDE_INT d = INTVAL (op1);
4539 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4540 floor_log2 (d), tquotient, 1);
4541 t2 = expand_binop (compute_mode, and_optab, op0,
4542 GEN_INT (d - 1),
4543 NULL_RTX, 1, OPTAB_LIB_WIDEN);
4544 t3 = gen_reg_rtx (compute_mode);
4545 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4546 compute_mode, 1, 1);
4547 if (t3 == 0)
4548 {
4549 rtx lab;
4550 lab = gen_label_rtx ();
4551 do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4552 expand_inc (t1, const1_rtx);
4553 emit_label (lab);
4554 quotient = t1;
4555 }
4556 else
4557 quotient = force_operand (gen_rtx_PLUS (compute_mode,
4558 t1, t3),
4559 tquotient);
4560 break;
4561 }
4562
4563 /* Try using an instruction that produces both the quotient and
4564 remainder, using truncation. We can easily compensate the
4565 quotient or remainder to get ceiling rounding, once we have the
4566 remainder. Notice that we compute also the final remainder
4567 value here, and return the result right away. */
4568 if (target == 0 || GET_MODE (target) != compute_mode)
4569 target = gen_reg_rtx (compute_mode);
4570
4571 if (rem_flag)
4572 {
4573 remainder = (REG_P (target)
4574 ? target : gen_reg_rtx (compute_mode));
4575 quotient = gen_reg_rtx (compute_mode);
4576 }
4577 else
4578 {
4579 quotient = (REG_P (target)
4580 ? target : gen_reg_rtx (compute_mode));
4581 remainder = gen_reg_rtx (compute_mode);
4582 }
4583
4584 if (expand_twoval_binop (udivmod_optab, op0, op1, quotient,
4585 remainder, 1))
4586 {
4587 /* This could be computed with a branch-less sequence.
4588 Save that for later. */
4589 rtx label = gen_label_rtx ();
4590 do_cmp_and_jump (remainder, const0_rtx, EQ,
4591 compute_mode, label);
4592 expand_inc (quotient, const1_rtx);
4593 expand_dec (remainder, op1);
4594 emit_label (label);
4595 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4596 }
4597
4598 /* No luck with division elimination or divmod. Have to do it
4599 by conditionally adjusting op0 *and* the result. */
4600 {
4601 rtx label1, label2;
4602 rtx adjusted_op0, tem;
4603
4604 quotient = gen_reg_rtx (compute_mode);
4605 adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4606 label1 = gen_label_rtx ();
4607 label2 = gen_label_rtx ();
4608 do_cmp_and_jump (adjusted_op0, const0_rtx, NE,
4609 compute_mode, label1);
4610 emit_move_insn (quotient, const0_rtx);
4611 emit_jump_insn (gen_jump (label2));
4612 emit_barrier ();
4613 emit_label (label1);
4614 expand_dec (adjusted_op0, const1_rtx);
4615 tem = expand_binop (compute_mode, udiv_optab, adjusted_op0, op1,
4616 quotient, 1, OPTAB_LIB_WIDEN);
4617 if (tem != quotient)
4618 emit_move_insn (quotient, tem);
4619 expand_inc (quotient, const1_rtx);
4620 emit_label (label2);
4621 }
4622 }
4623 else /* signed */
4624 {
4625 if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4626 && INTVAL (op1) >= 0)
4627 {
4628 /* This is extremely similar to the code for the unsigned case
4629 above. For 2.7 we should merge these variants, but for
4630 2.6.1 I don't want to touch the code for unsigned since that
4631 get used in C. The signed case will only be used by other
4632 languages (Ada). */
4633
4634 rtx t1, t2, t3;
4635 unsigned HOST_WIDE_INT d = INTVAL (op1);
4636 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4637 floor_log2 (d), tquotient, 0);
4638 t2 = expand_binop (compute_mode, and_optab, op0,
4639 GEN_INT (d - 1),
4640 NULL_RTX, 1, OPTAB_LIB_WIDEN);
4641 t3 = gen_reg_rtx (compute_mode);
4642 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4643 compute_mode, 1, 1);
4644 if (t3 == 0)
4645 {
4646 rtx lab;
4647 lab = gen_label_rtx ();
4648 do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4649 expand_inc (t1, const1_rtx);
4650 emit_label (lab);
4651 quotient = t1;
4652 }
4653 else
4654 quotient = force_operand (gen_rtx_PLUS (compute_mode,
4655 t1, t3),
4656 tquotient);
4657 break;
4658 }
4659
4660 /* Try using an instruction that produces both the quotient and
4661 remainder, using truncation. We can easily compensate the
4662 quotient or remainder to get ceiling rounding, once we have the
4663 remainder. Notice that we compute also the final remainder
4664 value here, and return the result right away. */
4665 if (target == 0 || GET_MODE (target) != compute_mode)
4666 target = gen_reg_rtx (compute_mode);
4667 if (rem_flag)
4668 {
4669 remainder= (REG_P (target)
4670 ? target : gen_reg_rtx (compute_mode));
4671 quotient = gen_reg_rtx (compute_mode);
4672 }
4673 else
4674 {
4675 quotient = (REG_P (target)
4676 ? target : gen_reg_rtx (compute_mode));
4677 remainder = gen_reg_rtx (compute_mode);
4678 }
4679
4680 if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient,
4681 remainder, 0))
4682 {
4683 /* This could be computed with a branch-less sequence.
4684 Save that for later. */
4685 rtx tem;
4686 rtx label = gen_label_rtx ();
4687 do_cmp_and_jump (remainder, const0_rtx, EQ,
4688 compute_mode, label);
4689 tem = expand_binop (compute_mode, xor_optab, op0, op1,
4690 NULL_RTX, 0, OPTAB_WIDEN);
4691 do_cmp_and_jump (tem, const0_rtx, LT, compute_mode, label);
4692 expand_inc (quotient, const1_rtx);
4693 expand_dec (remainder, op1);
4694 emit_label (label);
4695 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4696 }
4697
4698 /* No luck with division elimination or divmod. Have to do it
4699 by conditionally adjusting op0 *and* the result. */
4700 {
4701 rtx label1, label2, label3, label4, label5;
4702 rtx adjusted_op0;
4703 rtx tem;
4704
4705 quotient = gen_reg_rtx (compute_mode);
4706 adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4707 label1 = gen_label_rtx ();
4708 label2 = gen_label_rtx ();
4709 label3 = gen_label_rtx ();
4710 label4 = gen_label_rtx ();
4711 label5 = gen_label_rtx ();
4712 do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4713 do_cmp_and_jump (adjusted_op0, const0_rtx, GT,
4714 compute_mode, label1);
4715 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4716 quotient, 0, OPTAB_LIB_WIDEN);
4717 if (tem != quotient)
4718 emit_move_insn (quotient, tem);
4719 emit_jump_insn (gen_jump (label5));
4720 emit_barrier ();
4721 emit_label (label1);
4722 expand_dec (adjusted_op0, const1_rtx);
4723 emit_jump_insn (gen_jump (label4));
4724 emit_barrier ();
4725 emit_label (label2);
4726 do_cmp_and_jump (adjusted_op0, const0_rtx, LT,
4727 compute_mode, label3);
4728 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4729 quotient, 0, OPTAB_LIB_WIDEN);
4730 if (tem != quotient)
4731 emit_move_insn (quotient, tem);
4732 emit_jump_insn (gen_jump (label5));
4733 emit_barrier ();
4734 emit_label (label3);
4735 expand_inc (adjusted_op0, const1_rtx);
4736 emit_label (label4);
4737 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4738 quotient, 0, OPTAB_LIB_WIDEN);
4739 if (tem != quotient)
4740 emit_move_insn (quotient, tem);
4741 expand_inc (quotient, const1_rtx);
4742 emit_label (label5);
4743 }
4744 }
4745 break;
4746
4747 case EXACT_DIV_EXPR:
4748 if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4749 {
4750 HOST_WIDE_INT d = INTVAL (op1);
4751 unsigned HOST_WIDE_INT ml;
4752 int pre_shift;
4753 rtx t1;
4754
4755 pre_shift = floor_log2 (d & -d);
4756 ml = invert_mod2n (d >> pre_shift, size);
4757 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4758 pre_shift, NULL_RTX, unsignedp);
4759 quotient = expand_mult (compute_mode, t1,
4760 gen_int_mode (ml, compute_mode),
4761 NULL_RTX, 1);
4762
4763 insn = get_last_insn ();
4764 set_dst_reg_note (insn, REG_EQUAL,
4765 gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
4766 compute_mode, op0, op1),
4767 quotient);
4768 }
4769 break;
4770
4771 case ROUND_DIV_EXPR:
4772 case ROUND_MOD_EXPR:
4773 if (unsignedp)
4774 {
4775 rtx tem;
4776 rtx label;
4777 label = gen_label_rtx ();
4778 quotient = gen_reg_rtx (compute_mode);
4779 remainder = gen_reg_rtx (compute_mode);
4780 if (expand_twoval_binop (udivmod_optab, op0, op1, quotient, remainder, 1) == 0)
4781 {
4782 rtx tem;
4783 quotient = expand_binop (compute_mode, udiv_optab, op0, op1,
4784 quotient, 1, OPTAB_LIB_WIDEN);
4785 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 1);
4786 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4787 remainder, 1, OPTAB_LIB_WIDEN);
4788 }
4789 tem = plus_constant (compute_mode, op1, -1);
4790 tem = expand_shift (RSHIFT_EXPR, compute_mode, tem, 1, NULL_RTX, 1);
4791 do_cmp_and_jump (remainder, tem, LEU, compute_mode, label);
4792 expand_inc (quotient, const1_rtx);
4793 expand_dec (remainder, op1);
4794 emit_label (label);
4795 }
4796 else
4797 {
4798 rtx abs_rem, abs_op1, tem, mask;
4799 rtx label;
4800 label = gen_label_rtx ();
4801 quotient = gen_reg_rtx (compute_mode);
4802 remainder = gen_reg_rtx (compute_mode);
4803 if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient, remainder, 0) == 0)
4804 {
4805 rtx tem;
4806 quotient = expand_binop (compute_mode, sdiv_optab, op0, op1,
4807 quotient, 0, OPTAB_LIB_WIDEN);
4808 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 0);
4809 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4810 remainder, 0, OPTAB_LIB_WIDEN);
4811 }
4812 abs_rem = expand_abs (compute_mode, remainder, NULL_RTX, 1, 0);
4813 abs_op1 = expand_abs (compute_mode, op1, NULL_RTX, 1, 0);
4814 tem = expand_shift (LSHIFT_EXPR, compute_mode, abs_rem,
4815 1, NULL_RTX, 1);
4816 do_cmp_and_jump (tem, abs_op1, LTU, compute_mode, label);
4817 tem = expand_binop (compute_mode, xor_optab, op0, op1,
4818 NULL_RTX, 0, OPTAB_WIDEN);
4819 mask = expand_shift (RSHIFT_EXPR, compute_mode, tem,
4820 size - 1, NULL_RTX, 0);
4821 tem = expand_binop (compute_mode, xor_optab, mask, const1_rtx,
4822 NULL_RTX, 0, OPTAB_WIDEN);
4823 tem = expand_binop (compute_mode, sub_optab, tem, mask,
4824 NULL_RTX, 0, OPTAB_WIDEN);
4825 expand_inc (quotient, tem);
4826 tem = expand_binop (compute_mode, xor_optab, mask, op1,
4827 NULL_RTX, 0, OPTAB_WIDEN);
4828 tem = expand_binop (compute_mode, sub_optab, tem, mask,
4829 NULL_RTX, 0, OPTAB_WIDEN);
4830 expand_dec (remainder, tem);
4831 emit_label (label);
4832 }
4833 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4834
4835 default:
4836 gcc_unreachable ();
4837 }
4838
4839 if (quotient == 0)
4840 {
4841 if (target && GET_MODE (target) != compute_mode)
4842 target = 0;
4843
4844 if (rem_flag)
4845 {
4846 /* Try to produce the remainder without producing the quotient.
4847 If we seem to have a divmod pattern that does not require widening,
4848 don't try widening here. We should really have a WIDEN argument
4849 to expand_twoval_binop, since what we'd really like to do here is
4850 1) try a mod insn in compute_mode
4851 2) try a divmod insn in compute_mode
4852 3) try a div insn in compute_mode and multiply-subtract to get
4853 remainder
4854 4) try the same things with widening allowed. */
4855 remainder
4856 = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4857 op0, op1, target,
4858 unsignedp,
4859 ((optab_handler (optab2, compute_mode)
4860 != CODE_FOR_nothing)
4861 ? OPTAB_DIRECT : OPTAB_WIDEN));
4862 if (remainder == 0)
4863 {
4864 /* No luck there. Can we do remainder and divide at once
4865 without a library call? */
4866 remainder = gen_reg_rtx (compute_mode);
4867 if (! expand_twoval_binop ((unsignedp
4868 ? udivmod_optab
4869 : sdivmod_optab),
4870 op0, op1,
4871 NULL_RTX, remainder, unsignedp))
4872 remainder = 0;
4873 }
4874
4875 if (remainder)
4876 return gen_lowpart (mode, remainder);
4877 }
4878
4879 /* Produce the quotient. Try a quotient insn, but not a library call.
4880 If we have a divmod in this mode, use it in preference to widening
4881 the div (for this test we assume it will not fail). Note that optab2
4882 is set to the one of the two optabs that the call below will use. */
4883 quotient
4884 = sign_expand_binop (compute_mode, udiv_optab, sdiv_optab,
4885 op0, op1, rem_flag ? NULL_RTX : target,
4886 unsignedp,
4887 ((optab_handler (optab2, compute_mode)
4888 != CODE_FOR_nothing)
4889 ? OPTAB_DIRECT : OPTAB_WIDEN));
4890
4891 if (quotient == 0)
4892 {
4893 /* No luck there. Try a quotient-and-remainder insn,
4894 keeping the quotient alone. */
4895 quotient = gen_reg_rtx (compute_mode);
4896 if (! expand_twoval_binop (unsignedp ? udivmod_optab : sdivmod_optab,
4897 op0, op1,
4898 quotient, NULL_RTX, unsignedp))
4899 {
4900 quotient = 0;
4901 if (! rem_flag)
4902 /* Still no luck. If we are not computing the remainder,
4903 use a library call for the quotient. */
4904 quotient = sign_expand_binop (compute_mode,
4905 udiv_optab, sdiv_optab,
4906 op0, op1, target,
4907 unsignedp, OPTAB_LIB_WIDEN);
4908 }
4909 }
4910 }
4911
4912 if (rem_flag)
4913 {
4914 if (target && GET_MODE (target) != compute_mode)
4915 target = 0;
4916
4917 if (quotient == 0)
4918 {
4919 /* No divide instruction either. Use library for remainder. */
4920 remainder = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4921 op0, op1, target,
4922 unsignedp, OPTAB_LIB_WIDEN);
4923 /* No remainder function. Try a quotient-and-remainder
4924 function, keeping the remainder. */
4925 if (!remainder)
4926 {
4927 remainder = gen_reg_rtx (compute_mode);
4928 if (!expand_twoval_binop_libfunc
4929 (unsignedp ? udivmod_optab : sdivmod_optab,
4930 op0, op1,
4931 NULL_RTX, remainder,
4932 unsignedp ? UMOD : MOD))
4933 remainder = NULL_RTX;
4934 }
4935 }
4936 else
4937 {
4938 /* We divided. Now finish doing X - Y * (X / Y). */
4939 remainder = expand_mult (compute_mode, quotient, op1,
4940 NULL_RTX, unsignedp);
4941 remainder = expand_binop (compute_mode, sub_optab, op0,
4942 remainder, target, unsignedp,
4943 OPTAB_LIB_WIDEN);
4944 }
4945 }
4946
4947 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4948 }
4949 \f
4950 /* Return a tree node with data type TYPE, describing the value of X.
4951 Usually this is an VAR_DECL, if there is no obvious better choice.
4952 X may be an expression, however we only support those expressions
4953 generated by loop.c. */
4954
4955 tree
4956 make_tree (tree type, rtx x)
4957 {
4958 tree t;
4959
4960 switch (GET_CODE (x))
4961 {
4962 case CONST_INT:
4963 {
4964 HOST_WIDE_INT hi = 0;
4965
4966 if (INTVAL (x) < 0
4967 && !(TYPE_UNSIGNED (type)
4968 && (GET_MODE_BITSIZE (TYPE_MODE (type))
4969 < HOST_BITS_PER_WIDE_INT)))
4970 hi = -1;
4971
4972 t = build_int_cst_wide (type, INTVAL (x), hi);
4973
4974 return t;
4975 }
4976
4977 case CONST_DOUBLE:
4978 if (GET_MODE (x) == VOIDmode)
4979 t = build_int_cst_wide (type,
4980 CONST_DOUBLE_LOW (x), CONST_DOUBLE_HIGH (x));
4981 else
4982 {
4983 REAL_VALUE_TYPE d;
4984
4985 REAL_VALUE_FROM_CONST_DOUBLE (d, x);
4986 t = build_real (type, d);
4987 }
4988
4989 return t;
4990
4991 case CONST_VECTOR:
4992 {
4993 int units = CONST_VECTOR_NUNITS (x);
4994 tree itype = TREE_TYPE (type);
4995 tree *elts;
4996 int i;
4997
4998 /* Build a tree with vector elements. */
4999 elts = XALLOCAVEC (tree, units);
5000 for (i = units - 1; i >= 0; --i)
5001 {
5002 rtx elt = CONST_VECTOR_ELT (x, i);
5003 elts[i] = make_tree (itype, elt);
5004 }
5005
5006 return build_vector (type, elts);
5007 }
5008
5009 case PLUS:
5010 return fold_build2 (PLUS_EXPR, type, make_tree (type, XEXP (x, 0)),
5011 make_tree (type, XEXP (x, 1)));
5012
5013 case MINUS:
5014 return fold_build2 (MINUS_EXPR, type, make_tree (type, XEXP (x, 0)),
5015 make_tree (type, XEXP (x, 1)));
5016
5017 case NEG:
5018 return fold_build1 (NEGATE_EXPR, type, make_tree (type, XEXP (x, 0)));
5019
5020 case MULT:
5021 return fold_build2 (MULT_EXPR, type, make_tree (type, XEXP (x, 0)),
5022 make_tree (type, XEXP (x, 1)));
5023
5024 case ASHIFT:
5025 return fold_build2 (LSHIFT_EXPR, type, make_tree (type, XEXP (x, 0)),
5026 make_tree (type, XEXP (x, 1)));
5027
5028 case LSHIFTRT:
5029 t = unsigned_type_for (type);
5030 return fold_convert (type, build2 (RSHIFT_EXPR, t,
5031 make_tree (t, XEXP (x, 0)),
5032 make_tree (type, XEXP (x, 1))));
5033
5034 case ASHIFTRT:
5035 t = signed_type_for (type);
5036 return fold_convert (type, build2 (RSHIFT_EXPR, t,
5037 make_tree (t, XEXP (x, 0)),
5038 make_tree (type, XEXP (x, 1))));
5039
5040 case DIV:
5041 if (TREE_CODE (type) != REAL_TYPE)
5042 t = signed_type_for (type);
5043 else
5044 t = type;
5045
5046 return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5047 make_tree (t, XEXP (x, 0)),
5048 make_tree (t, XEXP (x, 1))));
5049 case UDIV:
5050 t = unsigned_type_for (type);
5051 return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5052 make_tree (t, XEXP (x, 0)),
5053 make_tree (t, XEXP (x, 1))));
5054
5055 case SIGN_EXTEND:
5056 case ZERO_EXTEND:
5057 t = lang_hooks.types.type_for_mode (GET_MODE (XEXP (x, 0)),
5058 GET_CODE (x) == ZERO_EXTEND);
5059 return fold_convert (type, make_tree (t, XEXP (x, 0)));
5060
5061 case CONST:
5062 return make_tree (type, XEXP (x, 0));
5063
5064 case SYMBOL_REF:
5065 t = SYMBOL_REF_DECL (x);
5066 if (t)
5067 return fold_convert (type, build_fold_addr_expr (t));
5068 /* else fall through. */
5069
5070 default:
5071 t = build_decl (RTL_LOCATION (x), VAR_DECL, NULL_TREE, type);
5072
5073 /* If TYPE is a POINTER_TYPE, we might need to convert X from
5074 address mode to pointer mode. */
5075 if (POINTER_TYPE_P (type))
5076 x = convert_memory_address_addr_space
5077 (TYPE_MODE (type), x, TYPE_ADDR_SPACE (TREE_TYPE (type)));
5078
5079 /* Note that we do *not* use SET_DECL_RTL here, because we do not
5080 want set_decl_rtl to go adjusting REG_ATTRS for this temporary. */
5081 t->decl_with_rtl.rtl = x;
5082
5083 return t;
5084 }
5085 }
5086 \f
5087 /* Compute the logical-and of OP0 and OP1, storing it in TARGET
5088 and returning TARGET.
5089
5090 If TARGET is 0, a pseudo-register or constant is returned. */
5091
5092 rtx
5093 expand_and (enum machine_mode mode, rtx op0, rtx op1, rtx target)
5094 {
5095 rtx tem = 0;
5096
5097 if (GET_MODE (op0) == VOIDmode && GET_MODE (op1) == VOIDmode)
5098 tem = simplify_binary_operation (AND, mode, op0, op1);
5099 if (tem == 0)
5100 tem = expand_binop (mode, and_optab, op0, op1, target, 0, OPTAB_LIB_WIDEN);
5101
5102 if (target == 0)
5103 target = tem;
5104 else if (tem != target)
5105 emit_move_insn (target, tem);
5106 return target;
5107 }
5108
5109 /* Helper function for emit_store_flag. */
5110 static rtx
5111 emit_cstore (rtx target, enum insn_code icode, enum rtx_code code,
5112 enum machine_mode mode, enum machine_mode compare_mode,
5113 int unsignedp, rtx x, rtx y, int normalizep,
5114 enum machine_mode target_mode)
5115 {
5116 struct expand_operand ops[4];
5117 rtx op0, last, comparison, subtarget;
5118 enum machine_mode result_mode = insn_data[(int) icode].operand[0].mode;
5119
5120 last = get_last_insn ();
5121 x = prepare_operand (icode, x, 2, mode, compare_mode, unsignedp);
5122 y = prepare_operand (icode, y, 3, mode, compare_mode, unsignedp);
5123 if (!x || !y)
5124 {
5125 delete_insns_since (last);
5126 return NULL_RTX;
5127 }
5128
5129 if (target_mode == VOIDmode)
5130 target_mode = result_mode;
5131 if (!target)
5132 target = gen_reg_rtx (target_mode);
5133
5134 comparison = gen_rtx_fmt_ee (code, result_mode, x, y);
5135
5136 create_output_operand (&ops[0], optimize ? NULL_RTX : target, result_mode);
5137 create_fixed_operand (&ops[1], comparison);
5138 create_fixed_operand (&ops[2], x);
5139 create_fixed_operand (&ops[3], y);
5140 if (!maybe_expand_insn (icode, 4, ops))
5141 {
5142 delete_insns_since (last);
5143 return NULL_RTX;
5144 }
5145 subtarget = ops[0].value;
5146
5147 /* If we are converting to a wider mode, first convert to
5148 TARGET_MODE, then normalize. This produces better combining
5149 opportunities on machines that have a SIGN_EXTRACT when we are
5150 testing a single bit. This mostly benefits the 68k.
5151
5152 If STORE_FLAG_VALUE does not have the sign bit set when
5153 interpreted in MODE, we can do this conversion as unsigned, which
5154 is usually more efficient. */
5155 if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (result_mode))
5156 {
5157 convert_move (target, subtarget,
5158 val_signbit_known_clear_p (result_mode,
5159 STORE_FLAG_VALUE));
5160 op0 = target;
5161 result_mode = target_mode;
5162 }
5163 else
5164 op0 = subtarget;
5165
5166 /* If we want to keep subexpressions around, don't reuse our last
5167 target. */
5168 if (optimize)
5169 subtarget = 0;
5170
5171 /* Now normalize to the proper value in MODE. Sometimes we don't
5172 have to do anything. */
5173 if (normalizep == 0 || normalizep == STORE_FLAG_VALUE)
5174 ;
5175 /* STORE_FLAG_VALUE might be the most negative number, so write
5176 the comparison this way to avoid a compiler-time warning. */
5177 else if (- normalizep == STORE_FLAG_VALUE)
5178 op0 = expand_unop (result_mode, neg_optab, op0, subtarget, 0);
5179
5180 /* We don't want to use STORE_FLAG_VALUE < 0 below since this makes
5181 it hard to use a value of just the sign bit due to ANSI integer
5182 constant typing rules. */
5183 else if (val_signbit_known_set_p (result_mode, STORE_FLAG_VALUE))
5184 op0 = expand_shift (RSHIFT_EXPR, result_mode, op0,
5185 GET_MODE_BITSIZE (result_mode) - 1, subtarget,
5186 normalizep == 1);
5187 else
5188 {
5189 gcc_assert (STORE_FLAG_VALUE & 1);
5190
5191 op0 = expand_and (result_mode, op0, const1_rtx, subtarget);
5192 if (normalizep == -1)
5193 op0 = expand_unop (result_mode, neg_optab, op0, op0, 0);
5194 }
5195
5196 /* If we were converting to a smaller mode, do the conversion now. */
5197 if (target_mode != result_mode)
5198 {
5199 convert_move (target, op0, 0);
5200 return target;
5201 }
5202 else
5203 return op0;
5204 }
5205
5206
5207 /* A subroutine of emit_store_flag only including "tricks" that do not
5208 need a recursive call. These are kept separate to avoid infinite
5209 loops. */
5210
5211 static rtx
5212 emit_store_flag_1 (rtx target, enum rtx_code code, rtx op0, rtx op1,
5213 enum machine_mode mode, int unsignedp, int normalizep,
5214 enum machine_mode target_mode)
5215 {
5216 rtx subtarget;
5217 enum insn_code icode;
5218 enum machine_mode compare_mode;
5219 enum mode_class mclass;
5220 enum rtx_code scode;
5221 rtx tem;
5222
5223 if (unsignedp)
5224 code = unsigned_condition (code);
5225 scode = swap_condition (code);
5226
5227 /* If one operand is constant, make it the second one. Only do this
5228 if the other operand is not constant as well. */
5229
5230 if (swap_commutative_operands_p (op0, op1))
5231 {
5232 tem = op0;
5233 op0 = op1;
5234 op1 = tem;
5235 code = swap_condition (code);
5236 }
5237
5238 if (mode == VOIDmode)
5239 mode = GET_MODE (op0);
5240
5241 /* For some comparisons with 1 and -1, we can convert this to
5242 comparisons with zero. This will often produce more opportunities for
5243 store-flag insns. */
5244
5245 switch (code)
5246 {
5247 case LT:
5248 if (op1 == const1_rtx)
5249 op1 = const0_rtx, code = LE;
5250 break;
5251 case LE:
5252 if (op1 == constm1_rtx)
5253 op1 = const0_rtx, code = LT;
5254 break;
5255 case GE:
5256 if (op1 == const1_rtx)
5257 op1 = const0_rtx, code = GT;
5258 break;
5259 case GT:
5260 if (op1 == constm1_rtx)
5261 op1 = const0_rtx, code = GE;
5262 break;
5263 case GEU:
5264 if (op1 == const1_rtx)
5265 op1 = const0_rtx, code = NE;
5266 break;
5267 case LTU:
5268 if (op1 == const1_rtx)
5269 op1 = const0_rtx, code = EQ;
5270 break;
5271 default:
5272 break;
5273 }
5274
5275 /* If we are comparing a double-word integer with zero or -1, we can
5276 convert the comparison into one involving a single word. */
5277 if (GET_MODE_BITSIZE (mode) == BITS_PER_WORD * 2
5278 && GET_MODE_CLASS (mode) == MODE_INT
5279 && (!MEM_P (op0) || ! MEM_VOLATILE_P (op0)))
5280 {
5281 if ((code == EQ || code == NE)
5282 && (op1 == const0_rtx || op1 == constm1_rtx))
5283 {
5284 rtx op00, op01;
5285
5286 /* Do a logical OR or AND of the two words and compare the
5287 result. */
5288 op00 = simplify_gen_subreg (word_mode, op0, mode, 0);
5289 op01 = simplify_gen_subreg (word_mode, op0, mode, UNITS_PER_WORD);
5290 tem = expand_binop (word_mode,
5291 op1 == const0_rtx ? ior_optab : and_optab,
5292 op00, op01, NULL_RTX, unsignedp,
5293 OPTAB_DIRECT);
5294
5295 if (tem != 0)
5296 tem = emit_store_flag (NULL_RTX, code, tem, op1, word_mode,
5297 unsignedp, normalizep);
5298 }
5299 else if ((code == LT || code == GE) && op1 == const0_rtx)
5300 {
5301 rtx op0h;
5302
5303 /* If testing the sign bit, can just test on high word. */
5304 op0h = simplify_gen_subreg (word_mode, op0, mode,
5305 subreg_highpart_offset (word_mode,
5306 mode));
5307 tem = emit_store_flag (NULL_RTX, code, op0h, op1, word_mode,
5308 unsignedp, normalizep);
5309 }
5310 else
5311 tem = NULL_RTX;
5312
5313 if (tem)
5314 {
5315 if (target_mode == VOIDmode || GET_MODE (tem) == target_mode)
5316 return tem;
5317 if (!target)
5318 target = gen_reg_rtx (target_mode);
5319
5320 convert_move (target, tem,
5321 !val_signbit_known_set_p (word_mode,
5322 (normalizep ? normalizep
5323 : STORE_FLAG_VALUE)));
5324 return target;
5325 }
5326 }
5327
5328 /* If this is A < 0 or A >= 0, we can do this by taking the ones
5329 complement of A (for GE) and shifting the sign bit to the low bit. */
5330 if (op1 == const0_rtx && (code == LT || code == GE)
5331 && GET_MODE_CLASS (mode) == MODE_INT
5332 && (normalizep || STORE_FLAG_VALUE == 1
5333 || val_signbit_p (mode, STORE_FLAG_VALUE)))
5334 {
5335 subtarget = target;
5336
5337 if (!target)
5338 target_mode = mode;
5339
5340 /* If the result is to be wider than OP0, it is best to convert it
5341 first. If it is to be narrower, it is *incorrect* to convert it
5342 first. */
5343 else if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (mode))
5344 {
5345 op0 = convert_modes (target_mode, mode, op0, 0);
5346 mode = target_mode;
5347 }
5348
5349 if (target_mode != mode)
5350 subtarget = 0;
5351
5352 if (code == GE)
5353 op0 = expand_unop (mode, one_cmpl_optab, op0,
5354 ((STORE_FLAG_VALUE == 1 || normalizep)
5355 ? 0 : subtarget), 0);
5356
5357 if (STORE_FLAG_VALUE == 1 || normalizep)
5358 /* If we are supposed to produce a 0/1 value, we want to do
5359 a logical shift from the sign bit to the low-order bit; for
5360 a -1/0 value, we do an arithmetic shift. */
5361 op0 = expand_shift (RSHIFT_EXPR, mode, op0,
5362 GET_MODE_BITSIZE (mode) - 1,
5363 subtarget, normalizep != -1);
5364
5365 if (mode != target_mode)
5366 op0 = convert_modes (target_mode, mode, op0, 0);
5367
5368 return op0;
5369 }
5370
5371 mclass = GET_MODE_CLASS (mode);
5372 for (compare_mode = mode; compare_mode != VOIDmode;
5373 compare_mode = GET_MODE_WIDER_MODE (compare_mode))
5374 {
5375 enum machine_mode optab_mode = mclass == MODE_CC ? CCmode : compare_mode;
5376 icode = optab_handler (cstore_optab, optab_mode);
5377 if (icode != CODE_FOR_nothing)
5378 {
5379 do_pending_stack_adjust ();
5380 tem = emit_cstore (target, icode, code, mode, compare_mode,
5381 unsignedp, op0, op1, normalizep, target_mode);
5382 if (tem)
5383 return tem;
5384
5385 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5386 {
5387 tem = emit_cstore (target, icode, scode, mode, compare_mode,
5388 unsignedp, op1, op0, normalizep, target_mode);
5389 if (tem)
5390 return tem;
5391 }
5392 break;
5393 }
5394 }
5395
5396 return 0;
5397 }
5398
5399 /* Emit a store-flags instruction for comparison CODE on OP0 and OP1
5400 and storing in TARGET. Normally return TARGET.
5401 Return 0 if that cannot be done.
5402
5403 MODE is the mode to use for OP0 and OP1 should they be CONST_INTs. If
5404 it is VOIDmode, they cannot both be CONST_INT.
5405
5406 UNSIGNEDP is for the case where we have to widen the operands
5407 to perform the operation. It says to use zero-extension.
5408
5409 NORMALIZEP is 1 if we should convert the result to be either zero
5410 or one. Normalize is -1 if we should convert the result to be
5411 either zero or -1. If NORMALIZEP is zero, the result will be left
5412 "raw" out of the scc insn. */
5413
5414 rtx
5415 emit_store_flag (rtx target, enum rtx_code code, rtx op0, rtx op1,
5416 enum machine_mode mode, int unsignedp, int normalizep)
5417 {
5418 enum machine_mode target_mode = target ? GET_MODE (target) : VOIDmode;
5419 enum rtx_code rcode;
5420 rtx subtarget;
5421 rtx tem, last, trueval;
5422
5423 tem = emit_store_flag_1 (target, code, op0, op1, mode, unsignedp, normalizep,
5424 target_mode);
5425 if (tem)
5426 return tem;
5427
5428 /* If we reached here, we can't do this with a scc insn, however there
5429 are some comparisons that can be done in other ways. Don't do any
5430 of these cases if branches are very cheap. */
5431 if (BRANCH_COST (optimize_insn_for_speed_p (), false) == 0)
5432 return 0;
5433
5434 /* See what we need to return. We can only return a 1, -1, or the
5435 sign bit. */
5436
5437 if (normalizep == 0)
5438 {
5439 if (STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1)
5440 normalizep = STORE_FLAG_VALUE;
5441
5442 else if (val_signbit_p (mode, STORE_FLAG_VALUE))
5443 ;
5444 else
5445 return 0;
5446 }
5447
5448 last = get_last_insn ();
5449
5450 /* If optimizing, use different pseudo registers for each insn, instead
5451 of reusing the same pseudo. This leads to better CSE, but slows
5452 down the compiler, since there are more pseudos */
5453 subtarget = (!optimize
5454 && (target_mode == mode)) ? target : NULL_RTX;
5455 trueval = GEN_INT (normalizep ? normalizep : STORE_FLAG_VALUE);
5456
5457 /* For floating-point comparisons, try the reverse comparison or try
5458 changing the "orderedness" of the comparison. */
5459 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5460 {
5461 enum rtx_code first_code;
5462 bool and_them;
5463
5464 rcode = reverse_condition_maybe_unordered (code);
5465 if (can_compare_p (rcode, mode, ccp_store_flag)
5466 && (code == ORDERED || code == UNORDERED
5467 || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
5468 || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
5469 {
5470 int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5471 || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5472
5473 /* For the reverse comparison, use either an addition or a XOR. */
5474 if (want_add
5475 && rtx_cost (GEN_INT (normalizep), PLUS, 1,
5476 optimize_insn_for_speed_p ()) == 0)
5477 {
5478 tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5479 STORE_FLAG_VALUE, target_mode);
5480 if (tem)
5481 return expand_binop (target_mode, add_optab, tem,
5482 GEN_INT (normalizep),
5483 target, 0, OPTAB_WIDEN);
5484 }
5485 else if (!want_add
5486 && rtx_cost (trueval, XOR, 1,
5487 optimize_insn_for_speed_p ()) == 0)
5488 {
5489 tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5490 normalizep, target_mode);
5491 if (tem)
5492 return expand_binop (target_mode, xor_optab, tem, trueval,
5493 target, INTVAL (trueval) >= 0, OPTAB_WIDEN);
5494 }
5495 }
5496
5497 delete_insns_since (last);
5498
5499 /* Cannot split ORDERED and UNORDERED, only try the above trick. */
5500 if (code == ORDERED || code == UNORDERED)
5501 return 0;
5502
5503 and_them = split_comparison (code, mode, &first_code, &code);
5504
5505 /* If there are no NaNs, the first comparison should always fall through.
5506 Effectively change the comparison to the other one. */
5507 if (!HONOR_NANS (mode))
5508 {
5509 gcc_assert (first_code == (and_them ? ORDERED : UNORDERED));
5510 return emit_store_flag_1 (target, code, op0, op1, mode, 0, normalizep,
5511 target_mode);
5512 }
5513
5514 #ifdef HAVE_conditional_move
5515 /* Try using a setcc instruction for ORDERED/UNORDERED, followed by a
5516 conditional move. */
5517 tem = emit_store_flag_1 (subtarget, first_code, op0, op1, mode, 0,
5518 normalizep, target_mode);
5519 if (tem == 0)
5520 return 0;
5521
5522 if (and_them)
5523 tem = emit_conditional_move (target, code, op0, op1, mode,
5524 tem, const0_rtx, GET_MODE (tem), 0);
5525 else
5526 tem = emit_conditional_move (target, code, op0, op1, mode,
5527 trueval, tem, GET_MODE (tem), 0);
5528
5529 if (tem == 0)
5530 delete_insns_since (last);
5531 return tem;
5532 #else
5533 return 0;
5534 #endif
5535 }
5536
5537 /* The remaining tricks only apply to integer comparisons. */
5538
5539 if (GET_MODE_CLASS (mode) != MODE_INT)
5540 return 0;
5541
5542 /* If this is an equality comparison of integers, we can try to exclusive-or
5543 (or subtract) the two operands and use a recursive call to try the
5544 comparison with zero. Don't do any of these cases if branches are
5545 very cheap. */
5546
5547 if ((code == EQ || code == NE) && op1 != const0_rtx)
5548 {
5549 tem = expand_binop (mode, xor_optab, op0, op1, subtarget, 1,
5550 OPTAB_WIDEN);
5551
5552 if (tem == 0)
5553 tem = expand_binop (mode, sub_optab, op0, op1, subtarget, 1,
5554 OPTAB_WIDEN);
5555 if (tem != 0)
5556 tem = emit_store_flag (target, code, tem, const0_rtx,
5557 mode, unsignedp, normalizep);
5558 if (tem != 0)
5559 return tem;
5560
5561 delete_insns_since (last);
5562 }
5563
5564 /* For integer comparisons, try the reverse comparison. However, for
5565 small X and if we'd have anyway to extend, implementing "X != 0"
5566 as "-(int)X >> 31" is still cheaper than inverting "(int)X == 0". */
5567 rcode = reverse_condition (code);
5568 if (can_compare_p (rcode, mode, ccp_store_flag)
5569 && ! (optab_handler (cstore_optab, mode) == CODE_FOR_nothing
5570 && code == NE
5571 && GET_MODE_SIZE (mode) < UNITS_PER_WORD
5572 && op1 == const0_rtx))
5573 {
5574 int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5575 || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5576
5577 /* Again, for the reverse comparison, use either an addition or a XOR. */
5578 if (want_add
5579 && rtx_cost (GEN_INT (normalizep), PLUS, 1,
5580 optimize_insn_for_speed_p ()) == 0)
5581 {
5582 tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5583 STORE_FLAG_VALUE, target_mode);
5584 if (tem != 0)
5585 tem = expand_binop (target_mode, add_optab, tem,
5586 GEN_INT (normalizep), target, 0, OPTAB_WIDEN);
5587 }
5588 else if (!want_add
5589 && rtx_cost (trueval, XOR, 1,
5590 optimize_insn_for_speed_p ()) == 0)
5591 {
5592 tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5593 normalizep, target_mode);
5594 if (tem != 0)
5595 tem = expand_binop (target_mode, xor_optab, tem, trueval, target,
5596 INTVAL (trueval) >= 0, OPTAB_WIDEN);
5597 }
5598
5599 if (tem != 0)
5600 return tem;
5601 delete_insns_since (last);
5602 }
5603
5604 /* Some other cases we can do are EQ, NE, LE, and GT comparisons with
5605 the constant zero. Reject all other comparisons at this point. Only
5606 do LE and GT if branches are expensive since they are expensive on
5607 2-operand machines. */
5608
5609 if (op1 != const0_rtx
5610 || (code != EQ && code != NE
5611 && (BRANCH_COST (optimize_insn_for_speed_p (),
5612 false) <= 1 || (code != LE && code != GT))))
5613 return 0;
5614
5615 /* Try to put the result of the comparison in the sign bit. Assume we can't
5616 do the necessary operation below. */
5617
5618 tem = 0;
5619
5620 /* To see if A <= 0, compute (A | (A - 1)). A <= 0 iff that result has
5621 the sign bit set. */
5622
5623 if (code == LE)
5624 {
5625 /* This is destructive, so SUBTARGET can't be OP0. */
5626 if (rtx_equal_p (subtarget, op0))
5627 subtarget = 0;
5628
5629 tem = expand_binop (mode, sub_optab, op0, const1_rtx, subtarget, 0,
5630 OPTAB_WIDEN);
5631 if (tem)
5632 tem = expand_binop (mode, ior_optab, op0, tem, subtarget, 0,
5633 OPTAB_WIDEN);
5634 }
5635
5636 /* To see if A > 0, compute (((signed) A) << BITS) - A, where BITS is the
5637 number of bits in the mode of OP0, minus one. */
5638
5639 if (code == GT)
5640 {
5641 if (rtx_equal_p (subtarget, op0))
5642 subtarget = 0;
5643
5644 tem = expand_shift (RSHIFT_EXPR, mode, op0,
5645 GET_MODE_BITSIZE (mode) - 1,
5646 subtarget, 0);
5647 tem = expand_binop (mode, sub_optab, tem, op0, subtarget, 0,
5648 OPTAB_WIDEN);
5649 }
5650
5651 if (code == EQ || code == NE)
5652 {
5653 /* For EQ or NE, one way to do the comparison is to apply an operation
5654 that converts the operand into a positive number if it is nonzero
5655 or zero if it was originally zero. Then, for EQ, we subtract 1 and
5656 for NE we negate. This puts the result in the sign bit. Then we
5657 normalize with a shift, if needed.
5658
5659 Two operations that can do the above actions are ABS and FFS, so try
5660 them. If that doesn't work, and MODE is smaller than a full word,
5661 we can use zero-extension to the wider mode (an unsigned conversion)
5662 as the operation. */
5663
5664 /* Note that ABS doesn't yield a positive number for INT_MIN, but
5665 that is compensated by the subsequent overflow when subtracting
5666 one / negating. */
5667
5668 if (optab_handler (abs_optab, mode) != CODE_FOR_nothing)
5669 tem = expand_unop (mode, abs_optab, op0, subtarget, 1);
5670 else if (optab_handler (ffs_optab, mode) != CODE_FOR_nothing)
5671 tem = expand_unop (mode, ffs_optab, op0, subtarget, 1);
5672 else if (GET_MODE_SIZE (mode) < UNITS_PER_WORD)
5673 {
5674 tem = convert_modes (word_mode, mode, op0, 1);
5675 mode = word_mode;
5676 }
5677
5678 if (tem != 0)
5679 {
5680 if (code == EQ)
5681 tem = expand_binop (mode, sub_optab, tem, const1_rtx, subtarget,
5682 0, OPTAB_WIDEN);
5683 else
5684 tem = expand_unop (mode, neg_optab, tem, subtarget, 0);
5685 }
5686
5687 /* If we couldn't do it that way, for NE we can "or" the two's complement
5688 of the value with itself. For EQ, we take the one's complement of
5689 that "or", which is an extra insn, so we only handle EQ if branches
5690 are expensive. */
5691
5692 if (tem == 0
5693 && (code == NE
5694 || BRANCH_COST (optimize_insn_for_speed_p (),
5695 false) > 1))
5696 {
5697 if (rtx_equal_p (subtarget, op0))
5698 subtarget = 0;
5699
5700 tem = expand_unop (mode, neg_optab, op0, subtarget, 0);
5701 tem = expand_binop (mode, ior_optab, tem, op0, subtarget, 0,
5702 OPTAB_WIDEN);
5703
5704 if (tem && code == EQ)
5705 tem = expand_unop (mode, one_cmpl_optab, tem, subtarget, 0);
5706 }
5707 }
5708
5709 if (tem && normalizep)
5710 tem = expand_shift (RSHIFT_EXPR, mode, tem,
5711 GET_MODE_BITSIZE (mode) - 1,
5712 subtarget, normalizep == 1);
5713
5714 if (tem)
5715 {
5716 if (!target)
5717 ;
5718 else if (GET_MODE (tem) != target_mode)
5719 {
5720 convert_move (target, tem, 0);
5721 tem = target;
5722 }
5723 else if (!subtarget)
5724 {
5725 emit_move_insn (target, tem);
5726 tem = target;
5727 }
5728 }
5729 else
5730 delete_insns_since (last);
5731
5732 return tem;
5733 }
5734
5735 /* Like emit_store_flag, but always succeeds. */
5736
5737 rtx
5738 emit_store_flag_force (rtx target, enum rtx_code code, rtx op0, rtx op1,
5739 enum machine_mode mode, int unsignedp, int normalizep)
5740 {
5741 rtx tem, label;
5742 rtx trueval, falseval;
5743
5744 /* First see if emit_store_flag can do the job. */
5745 tem = emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep);
5746 if (tem != 0)
5747 return tem;
5748
5749 if (!target)
5750 target = gen_reg_rtx (word_mode);
5751
5752 /* If this failed, we have to do this with set/compare/jump/set code.
5753 For foo != 0, if foo is in OP0, just replace it with 1 if nonzero. */
5754 trueval = normalizep ? GEN_INT (normalizep) : const1_rtx;
5755 if (code == NE
5756 && GET_MODE_CLASS (mode) == MODE_INT
5757 && REG_P (target)
5758 && op0 == target
5759 && op1 == const0_rtx)
5760 {
5761 label = gen_label_rtx ();
5762 do_compare_rtx_and_jump (target, const0_rtx, EQ, unsignedp,
5763 mode, NULL_RTX, NULL_RTX, label, -1);
5764 emit_move_insn (target, trueval);
5765 emit_label (label);
5766 return target;
5767 }
5768
5769 if (!REG_P (target)
5770 || reg_mentioned_p (target, op0) || reg_mentioned_p (target, op1))
5771 target = gen_reg_rtx (GET_MODE (target));
5772
5773 /* Jump in the right direction if the target cannot implement CODE
5774 but can jump on its reverse condition. */
5775 falseval = const0_rtx;
5776 if (! can_compare_p (code, mode, ccp_jump)
5777 && (! FLOAT_MODE_P (mode)
5778 || code == ORDERED || code == UNORDERED
5779 || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
5780 || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
5781 {
5782 enum rtx_code rcode;
5783 if (FLOAT_MODE_P (mode))
5784 rcode = reverse_condition_maybe_unordered (code);
5785 else
5786 rcode = reverse_condition (code);
5787
5788 /* Canonicalize to UNORDERED for the libcall. */
5789 if (can_compare_p (rcode, mode, ccp_jump)
5790 || (code == ORDERED && ! can_compare_p (ORDERED, mode, ccp_jump)))
5791 {
5792 falseval = trueval;
5793 trueval = const0_rtx;
5794 code = rcode;
5795 }
5796 }
5797
5798 emit_move_insn (target, trueval);
5799 label = gen_label_rtx ();
5800 do_compare_rtx_and_jump (op0, op1, code, unsignedp, mode, NULL_RTX,
5801 NULL_RTX, label, -1);
5802
5803 emit_move_insn (target, falseval);
5804 emit_label (label);
5805
5806 return target;
5807 }
5808 \f
5809 /* Perform possibly multi-word comparison and conditional jump to LABEL
5810 if ARG1 OP ARG2 true where ARG1 and ARG2 are of mode MODE. This is
5811 now a thin wrapper around do_compare_rtx_and_jump. */
5812
5813 static void
5814 do_cmp_and_jump (rtx arg1, rtx arg2, enum rtx_code op, enum machine_mode mode,
5815 rtx label)
5816 {
5817 int unsignedp = (op == LTU || op == LEU || op == GTU || op == GEU);
5818 do_compare_rtx_and_jump (arg1, arg2, op, unsignedp, mode,
5819 NULL_RTX, NULL_RTX, label, -1);
5820 }