expmed.c (store_bit_field_1): Move generation of MEM insvs to the MEM_P block.
[gcc.git] / gcc / expmed.c
1 /* Medium-level subroutines: convert bit-field store and extract
2 and shifts, multiplies and divides to rtl instructions.
3 Copyright (C) 1987, 1988, 1989, 1992, 1993, 1994, 1995, 1996, 1997, 1998,
4 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010,
5 2011, 2012
6 Free Software Foundation, Inc.
7
8 This file is part of GCC.
9
10 GCC is free software; you can redistribute it and/or modify it under
11 the terms of the GNU General Public License as published by the Free
12 Software Foundation; either version 3, or (at your option) any later
13 version.
14
15 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
16 WARRANTY; without even the implied warranty of MERCHANTABILITY or
17 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 for more details.
19
20 You should have received a copy of the GNU General Public License
21 along with GCC; see the file COPYING3. If not see
22 <http://www.gnu.org/licenses/>. */
23
24
25 #include "config.h"
26 #include "system.h"
27 #include "coretypes.h"
28 #include "tm.h"
29 #include "diagnostic-core.h"
30 #include "rtl.h"
31 #include "tree.h"
32 #include "tm_p.h"
33 #include "flags.h"
34 #include "insn-config.h"
35 #include "expr.h"
36 #include "optabs.h"
37 #include "recog.h"
38 #include "langhooks.h"
39 #include "df.h"
40 #include "target.h"
41 #include "expmed.h"
42
43 struct target_expmed default_target_expmed;
44 #if SWITCHABLE_TARGET
45 struct target_expmed *this_target_expmed = &default_target_expmed;
46 #endif
47
48 static void store_fixed_bit_field (rtx, unsigned HOST_WIDE_INT,
49 unsigned HOST_WIDE_INT,
50 unsigned HOST_WIDE_INT,
51 unsigned HOST_WIDE_INT,
52 rtx);
53 static void store_split_bit_field (rtx, unsigned HOST_WIDE_INT,
54 unsigned HOST_WIDE_INT,
55 unsigned HOST_WIDE_INT,
56 unsigned HOST_WIDE_INT,
57 rtx);
58 static rtx extract_fixed_bit_field (enum machine_mode, rtx,
59 unsigned HOST_WIDE_INT,
60 unsigned HOST_WIDE_INT, rtx, int, bool);
61 static rtx mask_rtx (enum machine_mode, int, int, int);
62 static rtx lshift_value (enum machine_mode, rtx, int, int);
63 static rtx extract_split_bit_field (rtx, unsigned HOST_WIDE_INT,
64 unsigned HOST_WIDE_INT, int);
65 static void do_cmp_and_jump (rtx, rtx, enum rtx_code, enum machine_mode, rtx);
66 static rtx expand_smod_pow2 (enum machine_mode, rtx, HOST_WIDE_INT);
67 static rtx expand_sdiv_pow2 (enum machine_mode, rtx, HOST_WIDE_INT);
68
69 /* Test whether a value is zero of a power of two. */
70 #define EXACT_POWER_OF_2_OR_ZERO_P(x) (((x) & ((x) - 1)) == 0)
71
72 /* Reduce conditional compilation elsewhere. */
73 #ifndef HAVE_insv
74 #define HAVE_insv 0
75 #define CODE_FOR_insv CODE_FOR_nothing
76 #define gen_insv(a,b,c,d) NULL_RTX
77 #endif
78 #ifndef HAVE_extv
79 #define HAVE_extv 0
80 #define CODE_FOR_extv CODE_FOR_nothing
81 #define gen_extv(a,b,c,d) NULL_RTX
82 #endif
83 #ifndef HAVE_extzv
84 #define HAVE_extzv 0
85 #define CODE_FOR_extzv CODE_FOR_nothing
86 #define gen_extzv(a,b,c,d) NULL_RTX
87 #endif
88
89 struct init_expmed_rtl
90 {
91 struct rtx_def reg; rtunion reg_fld[2];
92 struct rtx_def plus; rtunion plus_fld1;
93 struct rtx_def neg;
94 struct rtx_def mult; rtunion mult_fld1;
95 struct rtx_def sdiv; rtunion sdiv_fld1;
96 struct rtx_def udiv; rtunion udiv_fld1;
97 struct rtx_def sdiv_32; rtunion sdiv_32_fld1;
98 struct rtx_def smod_32; rtunion smod_32_fld1;
99 struct rtx_def wide_mult; rtunion wide_mult_fld1;
100 struct rtx_def wide_lshr; rtunion wide_lshr_fld1;
101 struct rtx_def wide_trunc;
102 struct rtx_def shift; rtunion shift_fld1;
103 struct rtx_def shift_mult; rtunion shift_mult_fld1;
104 struct rtx_def shift_add; rtunion shift_add_fld1;
105 struct rtx_def shift_sub0; rtunion shift_sub0_fld1;
106 struct rtx_def shift_sub1; rtunion shift_sub1_fld1;
107 struct rtx_def zext;
108 struct rtx_def trunc;
109
110 rtx pow2[MAX_BITS_PER_WORD];
111 rtx cint[MAX_BITS_PER_WORD];
112 };
113
114 static void
115 init_expmed_one_conv (struct init_expmed_rtl *all, enum machine_mode to_mode,
116 enum machine_mode from_mode, bool speed)
117 {
118 int to_size, from_size;
119 rtx which;
120
121 /* We're given no information about the true size of a partial integer,
122 only the size of the "full" integer it requires for storage. For
123 comparison purposes here, reduce the bit size by one in that case. */
124 to_size = (GET_MODE_BITSIZE (to_mode)
125 - (GET_MODE_CLASS (to_mode) == MODE_PARTIAL_INT));
126 from_size = (GET_MODE_BITSIZE (from_mode)
127 - (GET_MODE_CLASS (from_mode) == MODE_PARTIAL_INT));
128
129 /* Assume cost of zero-extend and sign-extend is the same. */
130 which = (to_size < from_size ? &all->trunc : &all->zext);
131
132 PUT_MODE (&all->reg, from_mode);
133 set_convert_cost (to_mode, from_mode, speed, set_src_cost (which, speed));
134 }
135
136 static void
137 init_expmed_one_mode (struct init_expmed_rtl *all,
138 enum machine_mode mode, int speed)
139 {
140 int m, n, mode_bitsize;
141 enum machine_mode mode_from;
142
143 mode_bitsize = GET_MODE_UNIT_BITSIZE (mode);
144
145 PUT_MODE (&all->reg, mode);
146 PUT_MODE (&all->plus, mode);
147 PUT_MODE (&all->neg, mode);
148 PUT_MODE (&all->mult, mode);
149 PUT_MODE (&all->sdiv, mode);
150 PUT_MODE (&all->udiv, mode);
151 PUT_MODE (&all->sdiv_32, mode);
152 PUT_MODE (&all->smod_32, mode);
153 PUT_MODE (&all->wide_trunc, mode);
154 PUT_MODE (&all->shift, mode);
155 PUT_MODE (&all->shift_mult, mode);
156 PUT_MODE (&all->shift_add, mode);
157 PUT_MODE (&all->shift_sub0, mode);
158 PUT_MODE (&all->shift_sub1, mode);
159 PUT_MODE (&all->zext, mode);
160 PUT_MODE (&all->trunc, mode);
161
162 set_add_cost (speed, mode, set_src_cost (&all->plus, speed));
163 set_neg_cost (speed, mode, set_src_cost (&all->neg, speed));
164 set_mul_cost (speed, mode, set_src_cost (&all->mult, speed));
165 set_sdiv_cost (speed, mode, set_src_cost (&all->sdiv, speed));
166 set_udiv_cost (speed, mode, set_src_cost (&all->udiv, speed));
167
168 set_sdiv_pow2_cheap (speed, mode, (set_src_cost (&all->sdiv_32, speed)
169 <= 2 * add_cost (speed, mode)));
170 set_smod_pow2_cheap (speed, mode, (set_src_cost (&all->smod_32, speed)
171 <= 4 * add_cost (speed, mode)));
172
173 set_shift_cost (speed, mode, 0, 0);
174 {
175 int cost = add_cost (speed, mode);
176 set_shiftadd_cost (speed, mode, 0, cost);
177 set_shiftsub0_cost (speed, mode, 0, cost);
178 set_shiftsub1_cost (speed, mode, 0, cost);
179 }
180
181 n = MIN (MAX_BITS_PER_WORD, mode_bitsize);
182 for (m = 1; m < n; m++)
183 {
184 XEXP (&all->shift, 1) = all->cint[m];
185 XEXP (&all->shift_mult, 1) = all->pow2[m];
186
187 set_shift_cost (speed, mode, m, set_src_cost (&all->shift, speed));
188 set_shiftadd_cost (speed, mode, m, set_src_cost (&all->shift_add, speed));
189 set_shiftsub0_cost (speed, mode, m, set_src_cost (&all->shift_sub0, speed));
190 set_shiftsub1_cost (speed, mode, m, set_src_cost (&all->shift_sub1, speed));
191 }
192
193 if (SCALAR_INT_MODE_P (mode))
194 {
195 for (mode_from = MIN_MODE_INT; mode_from <= MAX_MODE_INT;
196 mode_from = (enum machine_mode)(mode_from + 1))
197 init_expmed_one_conv (all, mode, mode_from, speed);
198 }
199 if (GET_MODE_CLASS (mode) == MODE_INT)
200 {
201 enum machine_mode wider_mode = GET_MODE_WIDER_MODE (mode);
202 if (wider_mode != VOIDmode)
203 {
204 PUT_MODE (&all->zext, wider_mode);
205 PUT_MODE (&all->wide_mult, wider_mode);
206 PUT_MODE (&all->wide_lshr, wider_mode);
207 XEXP (&all->wide_lshr, 1) = GEN_INT (mode_bitsize);
208
209 set_mul_widen_cost (speed, wider_mode,
210 set_src_cost (&all->wide_mult, speed));
211 set_mul_highpart_cost (speed, mode,
212 set_src_cost (&all->wide_trunc, speed));
213 }
214 }
215 }
216
217 void
218 init_expmed (void)
219 {
220 struct init_expmed_rtl all;
221 enum machine_mode mode;
222 int m, speed;
223
224 memset (&all, 0, sizeof all);
225 for (m = 1; m < MAX_BITS_PER_WORD; m++)
226 {
227 all.pow2[m] = GEN_INT ((HOST_WIDE_INT) 1 << m);
228 all.cint[m] = GEN_INT (m);
229 }
230
231 PUT_CODE (&all.reg, REG);
232 /* Avoid using hard regs in ways which may be unsupported. */
233 SET_REGNO (&all.reg, LAST_VIRTUAL_REGISTER + 1);
234
235 PUT_CODE (&all.plus, PLUS);
236 XEXP (&all.plus, 0) = &all.reg;
237 XEXP (&all.plus, 1) = &all.reg;
238
239 PUT_CODE (&all.neg, NEG);
240 XEXP (&all.neg, 0) = &all.reg;
241
242 PUT_CODE (&all.mult, MULT);
243 XEXP (&all.mult, 0) = &all.reg;
244 XEXP (&all.mult, 1) = &all.reg;
245
246 PUT_CODE (&all.sdiv, DIV);
247 XEXP (&all.sdiv, 0) = &all.reg;
248 XEXP (&all.sdiv, 1) = &all.reg;
249
250 PUT_CODE (&all.udiv, UDIV);
251 XEXP (&all.udiv, 0) = &all.reg;
252 XEXP (&all.udiv, 1) = &all.reg;
253
254 PUT_CODE (&all.sdiv_32, DIV);
255 XEXP (&all.sdiv_32, 0) = &all.reg;
256 XEXP (&all.sdiv_32, 1) = 32 < MAX_BITS_PER_WORD ? all.cint[32] : GEN_INT (32);
257
258 PUT_CODE (&all.smod_32, MOD);
259 XEXP (&all.smod_32, 0) = &all.reg;
260 XEXP (&all.smod_32, 1) = XEXP (&all.sdiv_32, 1);
261
262 PUT_CODE (&all.zext, ZERO_EXTEND);
263 XEXP (&all.zext, 0) = &all.reg;
264
265 PUT_CODE (&all.wide_mult, MULT);
266 XEXP (&all.wide_mult, 0) = &all.zext;
267 XEXP (&all.wide_mult, 1) = &all.zext;
268
269 PUT_CODE (&all.wide_lshr, LSHIFTRT);
270 XEXP (&all.wide_lshr, 0) = &all.wide_mult;
271
272 PUT_CODE (&all.wide_trunc, TRUNCATE);
273 XEXP (&all.wide_trunc, 0) = &all.wide_lshr;
274
275 PUT_CODE (&all.shift, ASHIFT);
276 XEXP (&all.shift, 0) = &all.reg;
277
278 PUT_CODE (&all.shift_mult, MULT);
279 XEXP (&all.shift_mult, 0) = &all.reg;
280
281 PUT_CODE (&all.shift_add, PLUS);
282 XEXP (&all.shift_add, 0) = &all.shift_mult;
283 XEXP (&all.shift_add, 1) = &all.reg;
284
285 PUT_CODE (&all.shift_sub0, MINUS);
286 XEXP (&all.shift_sub0, 0) = &all.shift_mult;
287 XEXP (&all.shift_sub0, 1) = &all.reg;
288
289 PUT_CODE (&all.shift_sub1, MINUS);
290 XEXP (&all.shift_sub1, 0) = &all.reg;
291 XEXP (&all.shift_sub1, 1) = &all.shift_mult;
292
293 PUT_CODE (&all.trunc, TRUNCATE);
294 XEXP (&all.trunc, 0) = &all.reg;
295
296 for (speed = 0; speed < 2; speed++)
297 {
298 crtl->maybe_hot_insn_p = speed;
299 set_zero_cost (speed, set_src_cost (const0_rtx, speed));
300
301 for (mode = MIN_MODE_INT; mode <= MAX_MODE_INT;
302 mode = (enum machine_mode)(mode + 1))
303 init_expmed_one_mode (&all, mode, speed);
304
305 if (MIN_MODE_PARTIAL_INT != VOIDmode)
306 for (mode = MIN_MODE_PARTIAL_INT; mode <= MAX_MODE_PARTIAL_INT;
307 mode = (enum machine_mode)(mode + 1))
308 init_expmed_one_mode (&all, mode, speed);
309
310 if (MIN_MODE_VECTOR_INT != VOIDmode)
311 for (mode = MIN_MODE_VECTOR_INT; mode <= MAX_MODE_VECTOR_INT;
312 mode = (enum machine_mode)(mode + 1))
313 init_expmed_one_mode (&all, mode, speed);
314 }
315
316 if (alg_hash_used_p ())
317 {
318 struct alg_hash_entry *p = alg_hash_entry_ptr (0);
319 memset (p, 0, sizeof (*p) * NUM_ALG_HASH_ENTRIES);
320 }
321 else
322 set_alg_hash_used_p (true);
323 default_rtl_profile ();
324 }
325
326 /* Return an rtx representing minus the value of X.
327 MODE is the intended mode of the result,
328 useful if X is a CONST_INT. */
329
330 rtx
331 negate_rtx (enum machine_mode mode, rtx x)
332 {
333 rtx result = simplify_unary_operation (NEG, mode, x, mode);
334
335 if (result == 0)
336 result = expand_unop (mode, neg_optab, x, NULL_RTX, 0);
337
338 return result;
339 }
340
341 /* Report on the availability of insv/extv/extzv and the desired mode
342 of each of their operands. Returns MAX_MACHINE_MODE if HAVE_foo
343 is false; else the mode of the specified operand. If OPNO is -1,
344 all the caller cares about is whether the insn is available. */
345 enum machine_mode
346 mode_for_extraction (enum extraction_pattern pattern, int opno)
347 {
348 const struct insn_data_d *data;
349
350 switch (pattern)
351 {
352 case EP_insv:
353 if (HAVE_insv)
354 {
355 data = &insn_data[CODE_FOR_insv];
356 break;
357 }
358 return MAX_MACHINE_MODE;
359
360 case EP_extv:
361 if (HAVE_extv)
362 {
363 data = &insn_data[CODE_FOR_extv];
364 break;
365 }
366 return MAX_MACHINE_MODE;
367
368 case EP_extzv:
369 if (HAVE_extzv)
370 {
371 data = &insn_data[CODE_FOR_extzv];
372 break;
373 }
374 return MAX_MACHINE_MODE;
375
376 default:
377 gcc_unreachable ();
378 }
379
380 if (opno == -1)
381 return VOIDmode;
382
383 /* Everyone who uses this function used to follow it with
384 if (result == VOIDmode) result = word_mode; */
385 if (data->operand[opno].mode == VOIDmode)
386 return word_mode;
387 return data->operand[opno].mode;
388 }
389
390 /* Return true if a bitfield of size BITSIZE at bit number BITNUM within
391 a structure of mode STRUCT_MODE represents a lowpart subreg. The subreg
392 offset is then BITNUM / BITS_PER_UNIT. */
393
394 static bool
395 lowpart_bit_field_p (unsigned HOST_WIDE_INT bitnum,
396 unsigned HOST_WIDE_INT bitsize,
397 enum machine_mode struct_mode)
398 {
399 if (BYTES_BIG_ENDIAN)
400 return (bitnum % BITS_PER_UNIT == 0
401 && (bitnum + bitsize == GET_MODE_BITSIZE (struct_mode)
402 || (bitnum + bitsize) % BITS_PER_WORD == 0));
403 else
404 return bitnum % BITS_PER_WORD == 0;
405 }
406 \f
407 /* Try to use an insv pattern to store VALUE into a field of OP0.
408 OP_MODE is the mode of the insertion and BITSIZE and BITNUM are
409 as for store_bit_field. */
410
411 static bool
412 store_bit_field_using_insv (rtx op0, unsigned HOST_WIDE_INT bitsize,
413 unsigned HOST_WIDE_INT bitnum, rtx value,
414 enum machine_mode op_mode)
415 {
416 struct expand_operand ops[4];
417 rtx value1;
418 rtx xop0 = op0;
419 rtx last = get_last_insn ();
420 bool copy_back = false;
421
422 unsigned int unit = GET_MODE_BITSIZE (op_mode);
423 if (bitsize == 0 || bitsize > unit)
424 return false;
425
426 if (MEM_P (xop0))
427 {
428 /* Get a reference to the first byte of the field. */
429 xop0 = adjust_bitfield_address (xop0, byte_mode, bitnum / BITS_PER_UNIT);
430 bitnum %= BITS_PER_UNIT;
431 }
432 else
433 {
434 /* Convert from counting within OP0 to counting in OP_MODE. */
435 if (BYTES_BIG_ENDIAN)
436 bitnum += unit - GET_MODE_BITSIZE (GET_MODE (op0));
437
438 /* If xop0 is a register, we need it in OP_MODE
439 to make it acceptable to the format of insv. */
440 if (GET_CODE (xop0) == SUBREG)
441 /* We can't just change the mode, because this might clobber op0,
442 and we will need the original value of op0 if insv fails. */
443 xop0 = gen_rtx_SUBREG (op_mode, SUBREG_REG (xop0), SUBREG_BYTE (xop0));
444 if (REG_P (xop0) && GET_MODE (xop0) != op_mode)
445 xop0 = gen_lowpart_SUBREG (op_mode, xop0);
446 }
447
448 /* If the destination is a paradoxical subreg such that we need a
449 truncate to the inner mode, perform the insertion on a temporary and
450 truncate the result to the original destination. Note that we can't
451 just truncate the paradoxical subreg as (truncate:N (subreg:W (reg:N
452 X) 0)) is (reg:N X). */
453 if (GET_CODE (xop0) == SUBREG
454 && REG_P (SUBREG_REG (xop0))
455 && !TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (SUBREG_REG (xop0)),
456 op_mode))
457 {
458 rtx tem = gen_reg_rtx (op_mode);
459 emit_move_insn (tem, xop0);
460 xop0 = tem;
461 copy_back = true;
462 }
463
464 /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
465 "backwards" from the size of the unit we are inserting into.
466 Otherwise, we count bits from the most significant on a
467 BYTES/BITS_BIG_ENDIAN machine. */
468
469 if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
470 bitnum = unit - bitsize - bitnum;
471
472 /* Convert VALUE to op_mode (which insv insn wants) in VALUE1. */
473 value1 = value;
474 if (GET_MODE (value) != op_mode)
475 {
476 if (GET_MODE_BITSIZE (GET_MODE (value)) >= bitsize)
477 {
478 /* Optimization: Don't bother really extending VALUE
479 if it has all the bits we will actually use. However,
480 if we must narrow it, be sure we do it correctly. */
481
482 if (GET_MODE_SIZE (GET_MODE (value)) < GET_MODE_SIZE (op_mode))
483 {
484 rtx tmp;
485
486 tmp = simplify_subreg (op_mode, value1, GET_MODE (value), 0);
487 if (! tmp)
488 tmp = simplify_gen_subreg (op_mode,
489 force_reg (GET_MODE (value),
490 value1),
491 GET_MODE (value), 0);
492 value1 = tmp;
493 }
494 else
495 value1 = gen_lowpart (op_mode, value1);
496 }
497 else if (CONST_INT_P (value))
498 value1 = gen_int_mode (INTVAL (value), op_mode);
499 else
500 /* Parse phase is supposed to make VALUE's data type
501 match that of the component reference, which is a type
502 at least as wide as the field; so VALUE should have
503 a mode that corresponds to that type. */
504 gcc_assert (CONSTANT_P (value));
505 }
506
507 create_fixed_operand (&ops[0], xop0);
508 create_integer_operand (&ops[1], bitsize);
509 create_integer_operand (&ops[2], bitnum);
510 create_input_operand (&ops[3], value1, op_mode);
511 if (maybe_expand_insn (CODE_FOR_insv, 4, ops))
512 {
513 if (copy_back)
514 convert_move (op0, xop0, true);
515 return true;
516 }
517 delete_insns_since (last);
518 return false;
519 }
520
521 /* A subroutine of store_bit_field, with the same arguments. Return true
522 if the operation could be implemented.
523
524 If FALLBACK_P is true, fall back to store_fixed_bit_field if we have
525 no other way of implementing the operation. If FALLBACK_P is false,
526 return false instead. */
527
528 static bool
529 store_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
530 unsigned HOST_WIDE_INT bitnum,
531 unsigned HOST_WIDE_INT bitregion_start,
532 unsigned HOST_WIDE_INT bitregion_end,
533 enum machine_mode fieldmode,
534 rtx value, bool fallback_p)
535 {
536 rtx op0 = str_rtx;
537 rtx orig_value;
538
539 while (GET_CODE (op0) == SUBREG)
540 {
541 /* The following line once was done only if WORDS_BIG_ENDIAN,
542 but I think that is a mistake. WORDS_BIG_ENDIAN is
543 meaningful at a much higher level; when structures are copied
544 between memory and regs, the higher-numbered regs
545 always get higher addresses. */
546 int inner_mode_size = GET_MODE_SIZE (GET_MODE (SUBREG_REG (op0)));
547 int outer_mode_size = GET_MODE_SIZE (GET_MODE (op0));
548 int byte_offset = 0;
549
550 /* Paradoxical subregs need special handling on big endian machines. */
551 if (SUBREG_BYTE (op0) == 0 && inner_mode_size < outer_mode_size)
552 {
553 int difference = inner_mode_size - outer_mode_size;
554
555 if (WORDS_BIG_ENDIAN)
556 byte_offset += (difference / UNITS_PER_WORD) * UNITS_PER_WORD;
557 if (BYTES_BIG_ENDIAN)
558 byte_offset += difference % UNITS_PER_WORD;
559 }
560 else
561 byte_offset = SUBREG_BYTE (op0);
562
563 bitnum += byte_offset * BITS_PER_UNIT;
564 op0 = SUBREG_REG (op0);
565 }
566
567 /* No action is needed if the target is a register and if the field
568 lies completely outside that register. This can occur if the source
569 code contains an out-of-bounds access to a small array. */
570 if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
571 return true;
572
573 /* Use vec_set patterns for inserting parts of vectors whenever
574 available. */
575 if (VECTOR_MODE_P (GET_MODE (op0))
576 && !MEM_P (op0)
577 && optab_handler (vec_set_optab, GET_MODE (op0)) != CODE_FOR_nothing
578 && fieldmode == GET_MODE_INNER (GET_MODE (op0))
579 && bitsize == GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
580 && !(bitnum % GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
581 {
582 struct expand_operand ops[3];
583 enum machine_mode outermode = GET_MODE (op0);
584 enum machine_mode innermode = GET_MODE_INNER (outermode);
585 enum insn_code icode = optab_handler (vec_set_optab, outermode);
586 int pos = bitnum / GET_MODE_BITSIZE (innermode);
587
588 create_fixed_operand (&ops[0], op0);
589 create_input_operand (&ops[1], value, innermode);
590 create_integer_operand (&ops[2], pos);
591 if (maybe_expand_insn (icode, 3, ops))
592 return true;
593 }
594
595 /* If the target is a register, overwriting the entire object, or storing
596 a full-word or multi-word field can be done with just a SUBREG. */
597 if (!MEM_P (op0)
598 && bitsize == GET_MODE_BITSIZE (fieldmode)
599 && ((bitsize == GET_MODE_BITSIZE (GET_MODE (op0)) && bitnum == 0)
600 || (bitsize % BITS_PER_WORD == 0 && bitnum % BITS_PER_WORD == 0)))
601 {
602 /* Use the subreg machinery either to narrow OP0 to the required
603 words or to cope with mode punning between equal-sized modes. */
604 rtx sub = simplify_gen_subreg (fieldmode, op0, GET_MODE (op0),
605 bitnum / BITS_PER_UNIT);
606 if (sub)
607 {
608 emit_move_insn (sub, value);
609 return true;
610 }
611 }
612
613 /* If the target is memory, storing any naturally aligned field can be
614 done with a simple store. For targets that support fast unaligned
615 memory, any naturally sized, unit aligned field can be done directly. */
616 if (MEM_P (op0)
617 && bitnum % BITS_PER_UNIT == 0
618 && bitsize == GET_MODE_BITSIZE (fieldmode)
619 && (!SLOW_UNALIGNED_ACCESS (fieldmode, MEM_ALIGN (op0))
620 || (bitnum % bitsize == 0
621 && MEM_ALIGN (op0) % bitsize == 0)))
622 {
623 op0 = adjust_bitfield_address (op0, fieldmode, bitnum / BITS_PER_UNIT);
624 emit_move_insn (op0, value);
625 return true;
626 }
627
628 /* Make sure we are playing with integral modes. Pun with subregs
629 if we aren't. This must come after the entire register case above,
630 since that case is valid for any mode. The following cases are only
631 valid for integral modes. */
632 {
633 enum machine_mode imode = int_mode_for_mode (GET_MODE (op0));
634 if (imode != GET_MODE (op0))
635 {
636 if (MEM_P (op0))
637 op0 = adjust_bitfield_address (op0, imode, 0);
638 else
639 {
640 gcc_assert (imode != BLKmode);
641 op0 = gen_lowpart (imode, op0);
642 }
643 }
644 }
645
646 /* Storing an lsb-aligned field in a register
647 can be done with a movstrict instruction. */
648
649 if (!MEM_P (op0)
650 && lowpart_bit_field_p (bitnum, bitsize, GET_MODE (op0))
651 && bitsize == GET_MODE_BITSIZE (fieldmode)
652 && optab_handler (movstrict_optab, fieldmode) != CODE_FOR_nothing)
653 {
654 struct expand_operand ops[2];
655 enum insn_code icode = optab_handler (movstrict_optab, fieldmode);
656 rtx arg0 = op0;
657 unsigned HOST_WIDE_INT subreg_off;
658
659 if (GET_CODE (arg0) == SUBREG)
660 {
661 /* Else we've got some float mode source being extracted into
662 a different float mode destination -- this combination of
663 subregs results in Severe Tire Damage. */
664 gcc_assert (GET_MODE (SUBREG_REG (arg0)) == fieldmode
665 || GET_MODE_CLASS (fieldmode) == MODE_INT
666 || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT);
667 arg0 = SUBREG_REG (arg0);
668 }
669
670 subreg_off = bitnum / BITS_PER_UNIT;
671 if (validate_subreg (fieldmode, GET_MODE (arg0), arg0, subreg_off))
672 {
673 arg0 = gen_rtx_SUBREG (fieldmode, arg0, subreg_off);
674
675 create_fixed_operand (&ops[0], arg0);
676 /* Shrink the source operand to FIELDMODE. */
677 create_convert_operand_to (&ops[1], value, fieldmode, false);
678 if (maybe_expand_insn (icode, 2, ops))
679 return true;
680 }
681 }
682
683 /* Handle fields bigger than a word. */
684
685 if (bitsize > BITS_PER_WORD)
686 {
687 /* Here we transfer the words of the field
688 in the order least significant first.
689 This is because the most significant word is the one which may
690 be less than full.
691 However, only do that if the value is not BLKmode. */
692
693 unsigned int backwards = WORDS_BIG_ENDIAN && fieldmode != BLKmode;
694 unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
695 unsigned int i;
696 rtx last;
697
698 /* This is the mode we must force value to, so that there will be enough
699 subwords to extract. Note that fieldmode will often (always?) be
700 VOIDmode, because that is what store_field uses to indicate that this
701 is a bit field, but passing VOIDmode to operand_subword_force
702 is not allowed. */
703 fieldmode = GET_MODE (value);
704 if (fieldmode == VOIDmode)
705 fieldmode = smallest_mode_for_size (nwords * BITS_PER_WORD, MODE_INT);
706
707 last = get_last_insn ();
708 for (i = 0; i < nwords; i++)
709 {
710 /* If I is 0, use the low-order word in both field and target;
711 if I is 1, use the next to lowest word; and so on. */
712 unsigned int wordnum = (backwards
713 ? GET_MODE_SIZE (fieldmode) / UNITS_PER_WORD
714 - i - 1
715 : i);
716 unsigned int bit_offset = (backwards
717 ? MAX ((int) bitsize - ((int) i + 1)
718 * BITS_PER_WORD,
719 0)
720 : (int) i * BITS_PER_WORD);
721 rtx value_word = operand_subword_force (value, wordnum, fieldmode);
722 unsigned HOST_WIDE_INT new_bitsize =
723 MIN (BITS_PER_WORD, bitsize - i * BITS_PER_WORD);
724
725 /* If the remaining chunk doesn't have full wordsize we have
726 to make sure that for big endian machines the higher order
727 bits are used. */
728 if (new_bitsize < BITS_PER_WORD && BYTES_BIG_ENDIAN && !backwards)
729 value_word = simplify_expand_binop (word_mode, lshr_optab,
730 value_word,
731 GEN_INT (BITS_PER_WORD
732 - new_bitsize),
733 NULL_RTX, true,
734 OPTAB_LIB_WIDEN);
735
736 if (!store_bit_field_1 (op0, new_bitsize,
737 bitnum + bit_offset,
738 bitregion_start, bitregion_end,
739 word_mode,
740 value_word, fallback_p))
741 {
742 delete_insns_since (last);
743 return false;
744 }
745 }
746 return true;
747 }
748
749 /* If VALUE has a floating-point or complex mode, access it as an
750 integer of the corresponding size. This can occur on a machine
751 with 64 bit registers that uses SFmode for float. It can also
752 occur for unaligned float or complex fields. */
753 orig_value = value;
754 if (GET_MODE (value) != VOIDmode
755 && GET_MODE_CLASS (GET_MODE (value)) != MODE_INT
756 && GET_MODE_CLASS (GET_MODE (value)) != MODE_PARTIAL_INT)
757 {
758 value = gen_reg_rtx (int_mode_for_mode (GET_MODE (value)));
759 emit_move_insn (gen_lowpart (GET_MODE (orig_value), value), orig_value);
760 }
761
762 /* If OP0 is a multi-word register, narrow it to the affected word.
763 If the region spans two words, defer to store_split_bit_field. */
764 if (!MEM_P (op0) && GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
765 {
766 op0 = simplify_gen_subreg (word_mode, op0, GET_MODE (op0),
767 bitnum / BITS_PER_WORD * UNITS_PER_WORD);
768 gcc_assert (op0);
769 bitnum %= BITS_PER_WORD;
770 if (bitnum + bitsize > BITS_PER_WORD)
771 {
772 if (!fallback_p)
773 return false;
774
775 store_split_bit_field (op0, bitsize, bitnum, bitregion_start,
776 bitregion_end, value);
777 return true;
778 }
779 }
780
781 /* From here on we can assume that the field to be stored in fits
782 within a word. If the destination is a register, it too fits
783 in a word. */
784
785 enum machine_mode op_mode = mode_for_extraction (EP_insv, 3);
786 if (op_mode != MAX_MACHINE_MODE
787 && !MEM_P (op0)
788 && store_bit_field_using_insv (op0, bitsize, bitnum, value, op_mode))
789 return true;
790
791 /* If OP0 is a memory, try copying it to a register and seeing if a
792 cheap register alternative is available. */
793 if (op_mode != MAX_MACHINE_MODE && MEM_P (op0))
794 {
795 enum machine_mode bestmode;
796 unsigned HOST_WIDE_INT maxbits = MAX_FIXED_MODE_SIZE;
797
798 /* Do not use insv for volatile bitfields when
799 -fstrict-volatile-bitfields is in effect. */
800 if (!(MEM_VOLATILE_P (op0) && flag_strict_volatile_bitfields > 0)
801 /* Do not use insv if the bit region is restricted and
802 an op_mode integer doesn't fit into the restricted region. */
803 && !(bitregion_end
804 && (bitnum - (bitnum % BITS_PER_UNIT)
805 + GET_MODE_BITSIZE (op_mode)
806 > bitregion_end + 1))
807 && store_bit_field_using_insv (op0, bitsize, bitnum, value, op_mode))
808 return true;
809
810 if (bitregion_end)
811 maxbits = bitregion_end - bitregion_start + 1;
812
813 /* Get the mode to use for inserting into this field. If OP0 is
814 BLKmode, get the smallest mode consistent with the alignment. If
815 OP0 is a non-BLKmode object that is no wider than OP_MODE, use its
816 mode. Otherwise, use the smallest mode containing the field. */
817
818 if (GET_MODE (op0) == BLKmode
819 || GET_MODE_BITSIZE (GET_MODE (op0)) > maxbits
820 || GET_MODE_SIZE (GET_MODE (op0)) > GET_MODE_SIZE (op_mode))
821 bestmode = get_best_mode (bitsize, bitnum,
822 bitregion_start, bitregion_end,
823 MEM_ALIGN (op0), op_mode,
824 MEM_VOLATILE_P (op0));
825 else
826 bestmode = GET_MODE (op0);
827
828 if (bestmode != VOIDmode
829 && GET_MODE_SIZE (bestmode) >= GET_MODE_SIZE (fieldmode)
830 && !(SLOW_UNALIGNED_ACCESS (bestmode, MEM_ALIGN (op0))
831 && GET_MODE_BITSIZE (bestmode) > MEM_ALIGN (op0)))
832 {
833 rtx last, tempreg, xop0;
834 unsigned int unit;
835 unsigned HOST_WIDE_INT offset, bitpos;
836
837 last = get_last_insn ();
838
839 /* Adjust address to point to the containing unit of
840 that mode. Compute the offset as a multiple of this unit,
841 counting in bytes. */
842 unit = GET_MODE_BITSIZE (bestmode);
843 offset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
844 bitpos = bitnum % unit;
845 xop0 = adjust_bitfield_address (op0, bestmode, offset);
846
847 /* Fetch that unit, store the bitfield in it, then store
848 the unit. */
849 tempreg = copy_to_reg (xop0);
850 if (store_bit_field_1 (tempreg, bitsize, bitpos,
851 bitregion_start, bitregion_end,
852 fieldmode, orig_value, false))
853 {
854 emit_move_insn (xop0, tempreg);
855 return true;
856 }
857 delete_insns_since (last);
858 }
859 }
860
861 if (!fallback_p)
862 return false;
863
864 store_fixed_bit_field (op0, bitsize, bitnum, bitregion_start,
865 bitregion_end, value);
866 return true;
867 }
868
869 /* Generate code to store value from rtx VALUE
870 into a bit-field within structure STR_RTX
871 containing BITSIZE bits starting at bit BITNUM.
872
873 BITREGION_START is bitpos of the first bitfield in this region.
874 BITREGION_END is the bitpos of the ending bitfield in this region.
875 These two fields are 0, if the C++ memory model does not apply,
876 or we are not interested in keeping track of bitfield regions.
877
878 FIELDMODE is the machine-mode of the FIELD_DECL node for this field. */
879
880 void
881 store_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
882 unsigned HOST_WIDE_INT bitnum,
883 unsigned HOST_WIDE_INT bitregion_start,
884 unsigned HOST_WIDE_INT bitregion_end,
885 enum machine_mode fieldmode,
886 rtx value)
887 {
888 /* Under the C++0x memory model, we must not touch bits outside the
889 bit region. Adjust the address to start at the beginning of the
890 bit region. */
891 if (MEM_P (str_rtx) && bitregion_start > 0)
892 {
893 enum machine_mode bestmode;
894 enum machine_mode op_mode;
895 unsigned HOST_WIDE_INT offset;
896
897 op_mode = mode_for_extraction (EP_insv, 3);
898 if (op_mode == MAX_MACHINE_MODE)
899 op_mode = VOIDmode;
900
901 gcc_assert ((bitregion_start % BITS_PER_UNIT) == 0);
902
903 offset = bitregion_start / BITS_PER_UNIT;
904 bitnum -= bitregion_start;
905 bitregion_end -= bitregion_start;
906 bitregion_start = 0;
907 bestmode = get_best_mode (bitsize, bitnum,
908 bitregion_start, bitregion_end,
909 MEM_ALIGN (str_rtx),
910 op_mode,
911 MEM_VOLATILE_P (str_rtx));
912 str_rtx = adjust_address (str_rtx, bestmode, offset);
913 }
914
915 if (!store_bit_field_1 (str_rtx, bitsize, bitnum,
916 bitregion_start, bitregion_end,
917 fieldmode, value, true))
918 gcc_unreachable ();
919 }
920 \f
921 /* Use shifts and boolean operations to store VALUE into a bit field of
922 width BITSIZE in OP0, starting at bit BITNUM. */
923
924 static void
925 store_fixed_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
926 unsigned HOST_WIDE_INT bitnum,
927 unsigned HOST_WIDE_INT bitregion_start,
928 unsigned HOST_WIDE_INT bitregion_end,
929 rtx value)
930 {
931 enum machine_mode mode;
932 rtx temp;
933 int all_zero = 0;
934 int all_one = 0;
935
936 /* There is a case not handled here:
937 a structure with a known alignment of just a halfword
938 and a field split across two aligned halfwords within the structure.
939 Or likewise a structure with a known alignment of just a byte
940 and a field split across two bytes.
941 Such cases are not supposed to be able to occur. */
942
943 if (MEM_P (op0))
944 {
945 unsigned HOST_WIDE_INT maxbits = MAX_FIXED_MODE_SIZE;
946
947 if (bitregion_end)
948 maxbits = bitregion_end - bitregion_start + 1;
949
950 /* Get the proper mode to use for this field. We want a mode that
951 includes the entire field. If such a mode would be larger than
952 a word, we won't be doing the extraction the normal way.
953 We don't want a mode bigger than the destination. */
954
955 mode = GET_MODE (op0);
956 if (GET_MODE_BITSIZE (mode) == 0
957 || GET_MODE_BITSIZE (mode) > GET_MODE_BITSIZE (word_mode))
958 mode = word_mode;
959
960 if (MEM_VOLATILE_P (op0)
961 && GET_MODE_BITSIZE (GET_MODE (op0)) > 0
962 && GET_MODE_BITSIZE (GET_MODE (op0)) <= maxbits
963 && flag_strict_volatile_bitfields > 0)
964 mode = GET_MODE (op0);
965 else
966 mode = get_best_mode (bitsize, bitnum, bitregion_start, bitregion_end,
967 MEM_ALIGN (op0), mode, MEM_VOLATILE_P (op0));
968
969 if (mode == VOIDmode)
970 {
971 /* The only way this should occur is if the field spans word
972 boundaries. */
973 store_split_bit_field (op0, bitsize, bitnum, bitregion_start,
974 bitregion_end, value);
975 return;
976 }
977
978 HOST_WIDE_INT bit_offset = bitnum - bitnum % GET_MODE_BITSIZE (mode);
979 op0 = adjust_bitfield_address (op0, mode, bit_offset / BITS_PER_UNIT);
980 bitnum -= bit_offset;
981 }
982
983 mode = GET_MODE (op0);
984 gcc_assert (SCALAR_INT_MODE_P (mode));
985
986 /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode)
987 for invalid input, such as f5 from gcc.dg/pr48335-2.c. */
988
989 if (BYTES_BIG_ENDIAN)
990 /* BITNUM is the distance between our msb
991 and that of the containing datum.
992 Convert it to the distance from the lsb. */
993 bitnum = GET_MODE_BITSIZE (mode) - bitsize - bitnum;
994
995 /* Now BITNUM is always the distance between our lsb
996 and that of OP0. */
997
998 /* Shift VALUE left by BITNUM bits. If VALUE is not constant,
999 we must first convert its mode to MODE. */
1000
1001 if (CONST_INT_P (value))
1002 {
1003 HOST_WIDE_INT v = INTVAL (value);
1004
1005 if (bitsize < HOST_BITS_PER_WIDE_INT)
1006 v &= ((HOST_WIDE_INT) 1 << bitsize) - 1;
1007
1008 if (v == 0)
1009 all_zero = 1;
1010 else if ((bitsize < HOST_BITS_PER_WIDE_INT
1011 && v == ((HOST_WIDE_INT) 1 << bitsize) - 1)
1012 || (bitsize == HOST_BITS_PER_WIDE_INT && v == -1))
1013 all_one = 1;
1014
1015 value = lshift_value (mode, value, bitnum, bitsize);
1016 }
1017 else
1018 {
1019 int must_and = (GET_MODE_BITSIZE (GET_MODE (value)) != bitsize
1020 && bitnum + bitsize != GET_MODE_BITSIZE (mode));
1021
1022 if (GET_MODE (value) != mode)
1023 value = convert_to_mode (mode, value, 1);
1024
1025 if (must_and)
1026 value = expand_binop (mode, and_optab, value,
1027 mask_rtx (mode, 0, bitsize, 0),
1028 NULL_RTX, 1, OPTAB_LIB_WIDEN);
1029 if (bitnum > 0)
1030 value = expand_shift (LSHIFT_EXPR, mode, value,
1031 bitnum, NULL_RTX, 1);
1032 }
1033
1034 /* Now clear the chosen bits in OP0,
1035 except that if VALUE is -1 we need not bother. */
1036 /* We keep the intermediates in registers to allow CSE to combine
1037 consecutive bitfield assignments. */
1038
1039 temp = force_reg (mode, op0);
1040
1041 if (! all_one)
1042 {
1043 temp = expand_binop (mode, and_optab, temp,
1044 mask_rtx (mode, bitnum, bitsize, 1),
1045 NULL_RTX, 1, OPTAB_LIB_WIDEN);
1046 temp = force_reg (mode, temp);
1047 }
1048
1049 /* Now logical-or VALUE into OP0, unless it is zero. */
1050
1051 if (! all_zero)
1052 {
1053 temp = expand_binop (mode, ior_optab, temp, value,
1054 NULL_RTX, 1, OPTAB_LIB_WIDEN);
1055 temp = force_reg (mode, temp);
1056 }
1057
1058 if (op0 != temp)
1059 {
1060 op0 = copy_rtx (op0);
1061 emit_move_insn (op0, temp);
1062 }
1063 }
1064 \f
1065 /* Store a bit field that is split across multiple accessible memory objects.
1066
1067 OP0 is the REG, SUBREG or MEM rtx for the first of the objects.
1068 BITSIZE is the field width; BITPOS the position of its first bit
1069 (within the word).
1070 VALUE is the value to store.
1071
1072 This does not yet handle fields wider than BITS_PER_WORD. */
1073
1074 static void
1075 store_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1076 unsigned HOST_WIDE_INT bitpos,
1077 unsigned HOST_WIDE_INT bitregion_start,
1078 unsigned HOST_WIDE_INT bitregion_end,
1079 rtx value)
1080 {
1081 unsigned int unit;
1082 unsigned int bitsdone = 0;
1083
1084 /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1085 much at a time. */
1086 if (REG_P (op0) || GET_CODE (op0) == SUBREG)
1087 unit = BITS_PER_WORD;
1088 else
1089 unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1090
1091 /* If VALUE is a constant other than a CONST_INT, get it into a register in
1092 WORD_MODE. If we can do this using gen_lowpart_common, do so. Note
1093 that VALUE might be a floating-point constant. */
1094 if (CONSTANT_P (value) && !CONST_INT_P (value))
1095 {
1096 rtx word = gen_lowpart_common (word_mode, value);
1097
1098 if (word && (value != word))
1099 value = word;
1100 else
1101 value = gen_lowpart_common (word_mode,
1102 force_reg (GET_MODE (value) != VOIDmode
1103 ? GET_MODE (value)
1104 : word_mode, value));
1105 }
1106
1107 while (bitsdone < bitsize)
1108 {
1109 unsigned HOST_WIDE_INT thissize;
1110 rtx part, word;
1111 unsigned HOST_WIDE_INT thispos;
1112 unsigned HOST_WIDE_INT offset;
1113
1114 offset = (bitpos + bitsdone) / unit;
1115 thispos = (bitpos + bitsdone) % unit;
1116
1117 /* When region of bytes we can touch is restricted, decrease
1118 UNIT close to the end of the region as needed. */
1119 if (bitregion_end
1120 && unit > BITS_PER_UNIT
1121 && bitpos + bitsdone - thispos + unit > bitregion_end + 1)
1122 {
1123 unit = unit / 2;
1124 continue;
1125 }
1126
1127 /* THISSIZE must not overrun a word boundary. Otherwise,
1128 store_fixed_bit_field will call us again, and we will mutually
1129 recurse forever. */
1130 thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1131 thissize = MIN (thissize, unit - thispos);
1132
1133 if (BYTES_BIG_ENDIAN)
1134 {
1135 /* Fetch successively less significant portions. */
1136 if (CONST_INT_P (value))
1137 part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1138 >> (bitsize - bitsdone - thissize))
1139 & (((HOST_WIDE_INT) 1 << thissize) - 1));
1140 else
1141 {
1142 int total_bits = GET_MODE_BITSIZE (GET_MODE (value));
1143 /* The args are chosen so that the last part includes the
1144 lsb. Give extract_bit_field the value it needs (with
1145 endianness compensation) to fetch the piece we want. */
1146 part = extract_fixed_bit_field (word_mode, value, thissize,
1147 total_bits - bitsize + bitsdone,
1148 NULL_RTX, 1, false);
1149 }
1150 }
1151 else
1152 {
1153 /* Fetch successively more significant portions. */
1154 if (CONST_INT_P (value))
1155 part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1156 >> bitsdone)
1157 & (((HOST_WIDE_INT) 1 << thissize) - 1));
1158 else
1159 part = extract_fixed_bit_field (word_mode, value, thissize,
1160 bitsdone, NULL_RTX, 1, false);
1161 }
1162
1163 /* If OP0 is a register, then handle OFFSET here.
1164
1165 When handling multiword bitfields, extract_bit_field may pass
1166 down a word_mode SUBREG of a larger REG for a bitfield that actually
1167 crosses a word boundary. Thus, for a SUBREG, we must find
1168 the current word starting from the base register. */
1169 if (GET_CODE (op0) == SUBREG)
1170 {
1171 int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset;
1172 enum machine_mode sub_mode = GET_MODE (SUBREG_REG (op0));
1173 if (sub_mode != BLKmode && GET_MODE_SIZE (sub_mode) < UNITS_PER_WORD)
1174 word = word_offset ? const0_rtx : op0;
1175 else
1176 word = operand_subword_force (SUBREG_REG (op0), word_offset,
1177 GET_MODE (SUBREG_REG (op0)));
1178 offset = 0;
1179 }
1180 else if (REG_P (op0))
1181 {
1182 enum machine_mode op0_mode = GET_MODE (op0);
1183 if (op0_mode != BLKmode && GET_MODE_SIZE (op0_mode) < UNITS_PER_WORD)
1184 word = offset ? const0_rtx : op0;
1185 else
1186 word = operand_subword_force (op0, offset, GET_MODE (op0));
1187 offset = 0;
1188 }
1189 else
1190 word = op0;
1191
1192 /* OFFSET is in UNITs, and UNIT is in bits. If WORD is const0_rtx,
1193 it is just an out-of-bounds access. Ignore it. */
1194 if (word != const0_rtx)
1195 store_fixed_bit_field (word, thissize, offset * unit + thispos,
1196 bitregion_start, bitregion_end, part);
1197 bitsdone += thissize;
1198 }
1199 }
1200 \f
1201 /* A subroutine of extract_bit_field_1 that converts return value X
1202 to either MODE or TMODE. MODE, TMODE and UNSIGNEDP are arguments
1203 to extract_bit_field. */
1204
1205 static rtx
1206 convert_extracted_bit_field (rtx x, enum machine_mode mode,
1207 enum machine_mode tmode, bool unsignedp)
1208 {
1209 if (GET_MODE (x) == tmode || GET_MODE (x) == mode)
1210 return x;
1211
1212 /* If the x mode is not a scalar integral, first convert to the
1213 integer mode of that size and then access it as a floating-point
1214 value via a SUBREG. */
1215 if (!SCALAR_INT_MODE_P (tmode))
1216 {
1217 enum machine_mode smode;
1218
1219 smode = mode_for_size (GET_MODE_BITSIZE (tmode), MODE_INT, 0);
1220 x = convert_to_mode (smode, x, unsignedp);
1221 x = force_reg (smode, x);
1222 return gen_lowpart (tmode, x);
1223 }
1224
1225 return convert_to_mode (tmode, x, unsignedp);
1226 }
1227
1228 /* Try to use an ext(z)v pattern to extract a field from OP0.
1229 Return the extracted value on success, otherwise return null.
1230 EXT_MODE is the mode of the extraction and the other arguments
1231 are as for extract_bit_field. */
1232
1233 static rtx
1234 extract_bit_field_using_extv (rtx op0, unsigned HOST_WIDE_INT bitsize,
1235 unsigned HOST_WIDE_INT bitnum,
1236 int unsignedp, rtx target,
1237 enum machine_mode mode, enum machine_mode tmode,
1238 enum machine_mode ext_mode)
1239 {
1240 struct expand_operand ops[4];
1241 rtx spec_target = target;
1242 rtx spec_target_subreg = 0;
1243 unsigned unit = GET_MODE_BITSIZE (ext_mode);
1244
1245 if (bitsize == 0 || unit < bitsize)
1246 return NULL_RTX;
1247
1248 if (MEM_P (op0))
1249 {
1250 /* Get a reference to the first byte of the field. */
1251 op0 = adjust_bitfield_address (op0, byte_mode, bitnum / BITS_PER_UNIT);
1252 bitnum %= BITS_PER_UNIT;
1253 }
1254 else
1255 {
1256 /* Convert from counting within OP0 to counting in EXT_MODE. */
1257 if (BYTES_BIG_ENDIAN)
1258 bitnum += unit - GET_MODE_BITSIZE (GET_MODE (op0));
1259
1260 /* If op0 is a register, we need it in EXT_MODE to make it
1261 acceptable to the format of ext(z)v. */
1262 if (GET_CODE (op0) == SUBREG && GET_MODE (op0) != ext_mode)
1263 return NULL_RTX;
1264 if (REG_P (op0) && GET_MODE (op0) != ext_mode)
1265 op0 = gen_lowpart_SUBREG (ext_mode, op0);
1266 }
1267
1268 /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
1269 "backwards" from the size of the unit we are extracting from.
1270 Otherwise, we count bits from the most significant on a
1271 BYTES/BITS_BIG_ENDIAN machine. */
1272
1273 if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1274 bitnum = unit - bitsize - bitnum;
1275
1276 if (target == 0)
1277 target = spec_target = gen_reg_rtx (tmode);
1278
1279 if (GET_MODE (target) != ext_mode)
1280 {
1281 /* Don't use LHS paradoxical subreg if explicit truncation is needed
1282 between the mode of the extraction (word_mode) and the target
1283 mode. Instead, create a temporary and use convert_move to set
1284 the target. */
1285 if (REG_P (target)
1286 && TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (target), ext_mode))
1287 {
1288 target = gen_lowpart (ext_mode, target);
1289 if (GET_MODE_PRECISION (ext_mode)
1290 > GET_MODE_PRECISION (GET_MODE (spec_target)))
1291 spec_target_subreg = target;
1292 }
1293 else
1294 target = gen_reg_rtx (ext_mode);
1295 }
1296
1297 create_output_operand (&ops[0], target, ext_mode);
1298 create_fixed_operand (&ops[1], op0);
1299 create_integer_operand (&ops[2], bitsize);
1300 create_integer_operand (&ops[3], bitnum);
1301 if (maybe_expand_insn (unsignedp ? CODE_FOR_extzv : CODE_FOR_extv, 4, ops))
1302 {
1303 target = ops[0].value;
1304 if (target == spec_target)
1305 return target;
1306 if (target == spec_target_subreg)
1307 return spec_target;
1308 return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1309 }
1310 return NULL_RTX;
1311 }
1312
1313 /* A subroutine of extract_bit_field, with the same arguments.
1314 If FALLBACK_P is true, fall back to extract_fixed_bit_field
1315 if we can find no other means of implementing the operation.
1316 if FALLBACK_P is false, return NULL instead. */
1317
1318 static rtx
1319 extract_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1320 unsigned HOST_WIDE_INT bitnum,
1321 int unsignedp, bool packedp, rtx target,
1322 enum machine_mode mode, enum machine_mode tmode,
1323 bool fallback_p)
1324 {
1325 rtx op0 = str_rtx;
1326 enum machine_mode int_mode;
1327 enum machine_mode ext_mode;
1328 enum machine_mode mode1;
1329
1330 if (tmode == VOIDmode)
1331 tmode = mode;
1332
1333 while (GET_CODE (op0) == SUBREG)
1334 {
1335 bitnum += SUBREG_BYTE (op0) * BITS_PER_UNIT;
1336 op0 = SUBREG_REG (op0);
1337 }
1338
1339 /* If we have an out-of-bounds access to a register, just return an
1340 uninitialized register of the required mode. This can occur if the
1341 source code contains an out-of-bounds access to a small array. */
1342 if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
1343 return gen_reg_rtx (tmode);
1344
1345 if (REG_P (op0)
1346 && mode == GET_MODE (op0)
1347 && bitnum == 0
1348 && bitsize == GET_MODE_BITSIZE (GET_MODE (op0)))
1349 {
1350 /* We're trying to extract a full register from itself. */
1351 return op0;
1352 }
1353
1354 /* See if we can get a better vector mode before extracting. */
1355 if (VECTOR_MODE_P (GET_MODE (op0))
1356 && !MEM_P (op0)
1357 && GET_MODE_INNER (GET_MODE (op0)) != tmode)
1358 {
1359 enum machine_mode new_mode;
1360
1361 if (GET_MODE_CLASS (tmode) == MODE_FLOAT)
1362 new_mode = MIN_MODE_VECTOR_FLOAT;
1363 else if (GET_MODE_CLASS (tmode) == MODE_FRACT)
1364 new_mode = MIN_MODE_VECTOR_FRACT;
1365 else if (GET_MODE_CLASS (tmode) == MODE_UFRACT)
1366 new_mode = MIN_MODE_VECTOR_UFRACT;
1367 else if (GET_MODE_CLASS (tmode) == MODE_ACCUM)
1368 new_mode = MIN_MODE_VECTOR_ACCUM;
1369 else if (GET_MODE_CLASS (tmode) == MODE_UACCUM)
1370 new_mode = MIN_MODE_VECTOR_UACCUM;
1371 else
1372 new_mode = MIN_MODE_VECTOR_INT;
1373
1374 for (; new_mode != VOIDmode ; new_mode = GET_MODE_WIDER_MODE (new_mode))
1375 if (GET_MODE_SIZE (new_mode) == GET_MODE_SIZE (GET_MODE (op0))
1376 && targetm.vector_mode_supported_p (new_mode))
1377 break;
1378 if (new_mode != VOIDmode)
1379 op0 = gen_lowpart (new_mode, op0);
1380 }
1381
1382 /* Use vec_extract patterns for extracting parts of vectors whenever
1383 available. */
1384 if (VECTOR_MODE_P (GET_MODE (op0))
1385 && !MEM_P (op0)
1386 && optab_handler (vec_extract_optab, GET_MODE (op0)) != CODE_FOR_nothing
1387 && ((bitnum + bitsize - 1) / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
1388 == bitnum / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
1389 {
1390 struct expand_operand ops[3];
1391 enum machine_mode outermode = GET_MODE (op0);
1392 enum machine_mode innermode = GET_MODE_INNER (outermode);
1393 enum insn_code icode = optab_handler (vec_extract_optab, outermode);
1394 unsigned HOST_WIDE_INT pos = bitnum / GET_MODE_BITSIZE (innermode);
1395
1396 create_output_operand (&ops[0], target, innermode);
1397 create_input_operand (&ops[1], op0, outermode);
1398 create_integer_operand (&ops[2], pos);
1399 if (maybe_expand_insn (icode, 3, ops))
1400 {
1401 target = ops[0].value;
1402 if (GET_MODE (target) != mode)
1403 return gen_lowpart (tmode, target);
1404 return target;
1405 }
1406 }
1407
1408 /* Make sure we are playing with integral modes. Pun with subregs
1409 if we aren't. */
1410 {
1411 enum machine_mode imode = int_mode_for_mode (GET_MODE (op0));
1412 if (imode != GET_MODE (op0))
1413 {
1414 if (MEM_P (op0))
1415 op0 = adjust_bitfield_address (op0, imode, 0);
1416 else if (imode != BLKmode)
1417 {
1418 op0 = gen_lowpart (imode, op0);
1419
1420 /* If we got a SUBREG, force it into a register since we
1421 aren't going to be able to do another SUBREG on it. */
1422 if (GET_CODE (op0) == SUBREG)
1423 op0 = force_reg (imode, op0);
1424 }
1425 else if (REG_P (op0))
1426 {
1427 rtx reg, subreg;
1428 imode = smallest_mode_for_size (GET_MODE_BITSIZE (GET_MODE (op0)),
1429 MODE_INT);
1430 reg = gen_reg_rtx (imode);
1431 subreg = gen_lowpart_SUBREG (GET_MODE (op0), reg);
1432 emit_move_insn (subreg, op0);
1433 op0 = reg;
1434 bitnum += SUBREG_BYTE (subreg) * BITS_PER_UNIT;
1435 }
1436 else
1437 {
1438 rtx mem = assign_stack_temp (GET_MODE (op0),
1439 GET_MODE_SIZE (GET_MODE (op0)));
1440 emit_move_insn (mem, op0);
1441 op0 = adjust_bitfield_address (mem, BLKmode, 0);
1442 }
1443 }
1444 }
1445
1446 /* ??? We currently assume TARGET is at least as big as BITSIZE.
1447 If that's wrong, the solution is to test for it and set TARGET to 0
1448 if needed. */
1449
1450 /* If the bitfield is volatile, we need to make sure the access
1451 remains on a type-aligned boundary. */
1452 if (GET_CODE (op0) == MEM
1453 && MEM_VOLATILE_P (op0)
1454 && GET_MODE_BITSIZE (GET_MODE (op0)) > 0
1455 && flag_strict_volatile_bitfields > 0)
1456 goto no_subreg_mode_swap;
1457
1458 /* Only scalar integer modes can be converted via subregs. There is an
1459 additional problem for FP modes here in that they can have a precision
1460 which is different from the size. mode_for_size uses precision, but
1461 we want a mode based on the size, so we must avoid calling it for FP
1462 modes. */
1463 mode1 = mode;
1464 if (SCALAR_INT_MODE_P (tmode))
1465 {
1466 enum machine_mode try_mode = mode_for_size (bitsize,
1467 GET_MODE_CLASS (tmode), 0);
1468 if (try_mode != BLKmode)
1469 mode1 = try_mode;
1470 }
1471 gcc_assert (mode1 != BLKmode);
1472
1473 /* Extraction of a full MODE1 value can be done with a subreg as long
1474 as the least significant bit of the value is the least significant
1475 bit of either OP0 or a word of OP0. */
1476 if (!MEM_P (op0)
1477 && lowpart_bit_field_p (bitnum, bitsize, GET_MODE (op0))
1478 && bitsize == GET_MODE_BITSIZE (mode1)
1479 && TRULY_NOOP_TRUNCATION_MODES_P (mode1, GET_MODE (op0)))
1480 {
1481 rtx sub = simplify_gen_subreg (mode1, op0, GET_MODE (op0),
1482 bitnum / BITS_PER_UNIT);
1483 if (sub)
1484 return convert_extracted_bit_field (sub, mode, tmode, unsignedp);
1485 }
1486
1487 /* Extraction of a full MODE1 value can be done with a load as long as
1488 the field is on a byte boundary and is sufficiently aligned. */
1489 if (MEM_P (op0)
1490 && bitnum % BITS_PER_UNIT == 0
1491 && bitsize == GET_MODE_BITSIZE (mode1)
1492 && (!SLOW_UNALIGNED_ACCESS (mode1, MEM_ALIGN (op0))
1493 || (bitnum % bitsize == 0
1494 && MEM_ALIGN (op0) % bitsize == 0)))
1495 {
1496 op0 = adjust_bitfield_address (op0, mode1, bitnum / BITS_PER_UNIT);
1497 return convert_extracted_bit_field (op0, mode, tmode, unsignedp);
1498 }
1499
1500 no_subreg_mode_swap:
1501
1502 /* Handle fields bigger than a word. */
1503
1504 if (bitsize > BITS_PER_WORD)
1505 {
1506 /* Here we transfer the words of the field
1507 in the order least significant first.
1508 This is because the most significant word is the one which may
1509 be less than full. */
1510
1511 unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
1512 unsigned int i;
1513 rtx last;
1514
1515 if (target == 0 || !REG_P (target) || !valid_multiword_target_p (target))
1516 target = gen_reg_rtx (mode);
1517
1518 /* Indicate for flow that the entire target reg is being set. */
1519 emit_clobber (target);
1520
1521 last = get_last_insn ();
1522 for (i = 0; i < nwords; i++)
1523 {
1524 /* If I is 0, use the low-order word in both field and target;
1525 if I is 1, use the next to lowest word; and so on. */
1526 /* Word number in TARGET to use. */
1527 unsigned int wordnum
1528 = (WORDS_BIG_ENDIAN
1529 ? GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD - i - 1
1530 : i);
1531 /* Offset from start of field in OP0. */
1532 unsigned int bit_offset = (WORDS_BIG_ENDIAN
1533 ? MAX (0, ((int) bitsize - ((int) i + 1)
1534 * (int) BITS_PER_WORD))
1535 : (int) i * BITS_PER_WORD);
1536 rtx target_part = operand_subword (target, wordnum, 1, VOIDmode);
1537 rtx result_part
1538 = extract_bit_field_1 (op0, MIN (BITS_PER_WORD,
1539 bitsize - i * BITS_PER_WORD),
1540 bitnum + bit_offset, 1, false, target_part,
1541 mode, word_mode, fallback_p);
1542
1543 gcc_assert (target_part);
1544 if (!result_part)
1545 {
1546 delete_insns_since (last);
1547 return NULL;
1548 }
1549
1550 if (result_part != target_part)
1551 emit_move_insn (target_part, result_part);
1552 }
1553
1554 if (unsignedp)
1555 {
1556 /* Unless we've filled TARGET, the upper regs in a multi-reg value
1557 need to be zero'd out. */
1558 if (GET_MODE_SIZE (GET_MODE (target)) > nwords * UNITS_PER_WORD)
1559 {
1560 unsigned int i, total_words;
1561
1562 total_words = GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD;
1563 for (i = nwords; i < total_words; i++)
1564 emit_move_insn
1565 (operand_subword (target,
1566 WORDS_BIG_ENDIAN ? total_words - i - 1 : i,
1567 1, VOIDmode),
1568 const0_rtx);
1569 }
1570 return target;
1571 }
1572
1573 /* Signed bit field: sign-extend with two arithmetic shifts. */
1574 target = expand_shift (LSHIFT_EXPR, mode, target,
1575 GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1576 return expand_shift (RSHIFT_EXPR, mode, target,
1577 GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1578 }
1579
1580 /* If OP0 is a multi-word register, narrow it to the affected word.
1581 If the region spans two words, defer to extract_split_bit_field. */
1582 if (!MEM_P (op0) && GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
1583 {
1584 op0 = simplify_gen_subreg (word_mode, op0, GET_MODE (op0),
1585 bitnum / BITS_PER_WORD * UNITS_PER_WORD);
1586 bitnum %= BITS_PER_WORD;
1587 if (bitnum + bitsize > BITS_PER_WORD)
1588 {
1589 if (!fallback_p)
1590 return NULL_RTX;
1591 target = extract_split_bit_field (op0, bitsize, bitnum, unsignedp);
1592 return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1593 }
1594 }
1595
1596 /* From here on we know the desired field is smaller than a word.
1597 If OP0 is a register, it too fits within a word. */
1598
1599 ext_mode = mode_for_extraction (unsignedp ? EP_extzv : EP_extv, 0);
1600 if (ext_mode != MAX_MACHINE_MODE && !MEM_P (op0))
1601 {
1602 rtx result = extract_bit_field_using_extv (op0, bitsize, bitnum,
1603 unsignedp, target, mode,
1604 tmode, ext_mode);
1605 if (result)
1606 return result;
1607 }
1608
1609 /* If OP0 is a memory, try copying it to a register and seeing if a
1610 cheap register alternative is available. */
1611 if (ext_mode != MAX_MACHINE_MODE && MEM_P (op0))
1612 {
1613 enum machine_mode bestmode;
1614
1615 /* Do not use extv/extzv for volatile bitfields when
1616 -fstrict-volatile-bitfields is in effect. */
1617 if (!(MEM_VOLATILE_P (op0) && flag_strict_volatile_bitfields > 0))
1618 {
1619 rtx result = extract_bit_field_using_extv (op0, bitsize, bitnum,
1620 unsignedp, target, mode,
1621 tmode, ext_mode);
1622 if (result)
1623 return result;
1624 }
1625
1626 /* Get the mode to use for inserting into this field. If
1627 OP0 is BLKmode, get the smallest mode consistent with the
1628 alignment. If OP0 is a non-BLKmode object that is no
1629 wider than EXT_MODE, use its mode. Otherwise, use the
1630 smallest mode containing the field. */
1631
1632 if (GET_MODE (op0) == BLKmode
1633 || GET_MODE_SIZE (GET_MODE (op0)) > GET_MODE_SIZE (ext_mode))
1634 bestmode = get_best_mode (bitsize, bitnum, 0, 0, MEM_ALIGN (op0),
1635 ext_mode, MEM_VOLATILE_P (op0));
1636 else
1637 bestmode = GET_MODE (op0);
1638
1639 if (bestmode != VOIDmode
1640 && !(SLOW_UNALIGNED_ACCESS (bestmode, MEM_ALIGN (op0))
1641 && GET_MODE_BITSIZE (bestmode) > MEM_ALIGN (op0)))
1642 {
1643 unsigned HOST_WIDE_INT offset, bitpos;
1644
1645 /* Compute the offset as a multiple of this unit,
1646 counting in bytes. */
1647 unsigned int unit = GET_MODE_BITSIZE (bestmode);
1648 offset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
1649 bitpos = bitnum % unit;
1650
1651 /* Make sure the register is big enough for the whole field. */
1652 if (bitpos + bitsize <= unit)
1653 {
1654 rtx last, result, xop0;
1655
1656 last = get_last_insn ();
1657
1658 /* Fetch it to a register in that size. */
1659 xop0 = adjust_bitfield_address (op0, bestmode, offset);
1660 xop0 = force_reg (bestmode, xop0);
1661 result = extract_bit_field_1 (xop0, bitsize, bitpos,
1662 unsignedp, packedp, target,
1663 mode, tmode, false);
1664 if (result)
1665 return result;
1666
1667 delete_insns_since (last);
1668 }
1669 }
1670 }
1671
1672 if (!fallback_p)
1673 return NULL;
1674
1675 /* Find a correspondingly-sized integer field, so we can apply
1676 shifts and masks to it. */
1677 int_mode = int_mode_for_mode (tmode);
1678 if (int_mode == BLKmode)
1679 int_mode = int_mode_for_mode (mode);
1680 /* Should probably push op0 out to memory and then do a load. */
1681 gcc_assert (int_mode != BLKmode);
1682
1683 target = extract_fixed_bit_field (int_mode, op0, bitsize, bitnum,
1684 target, unsignedp, packedp);
1685 return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1686 }
1687
1688 /* Generate code to extract a byte-field from STR_RTX
1689 containing BITSIZE bits, starting at BITNUM,
1690 and put it in TARGET if possible (if TARGET is nonzero).
1691 Regardless of TARGET, we return the rtx for where the value is placed.
1692
1693 STR_RTX is the structure containing the byte (a REG or MEM).
1694 UNSIGNEDP is nonzero if this is an unsigned bit field.
1695 PACKEDP is nonzero if the field has the packed attribute.
1696 MODE is the natural mode of the field value once extracted.
1697 TMODE is the mode the caller would like the value to have;
1698 but the value may be returned with type MODE instead.
1699
1700 If a TARGET is specified and we can store in it at no extra cost,
1701 we do so, and return TARGET.
1702 Otherwise, we return a REG of mode TMODE or MODE, with TMODE preferred
1703 if they are equally easy. */
1704
1705 rtx
1706 extract_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1707 unsigned HOST_WIDE_INT bitnum, int unsignedp, bool packedp,
1708 rtx target, enum machine_mode mode, enum machine_mode tmode)
1709 {
1710 return extract_bit_field_1 (str_rtx, bitsize, bitnum, unsignedp, packedp,
1711 target, mode, tmode, true);
1712 }
1713 \f
1714 /* Use shifts and boolean operations to extract a field of BITSIZE bits
1715 from bit BITNUM of OP0.
1716
1717 UNSIGNEDP is nonzero for an unsigned bit field (don't sign-extend value).
1718 PACKEDP is true if the field has the packed attribute.
1719
1720 If TARGET is nonzero, attempts to store the value there
1721 and return TARGET, but this is not guaranteed.
1722 If TARGET is not used, create a pseudo-reg of mode TMODE for the value. */
1723
1724 static rtx
1725 extract_fixed_bit_field (enum machine_mode tmode, rtx op0,
1726 unsigned HOST_WIDE_INT bitsize,
1727 unsigned HOST_WIDE_INT bitnum, rtx target,
1728 int unsignedp, bool packedp)
1729 {
1730 enum machine_mode mode;
1731
1732 if (MEM_P (op0))
1733 {
1734 /* Get the proper mode to use for this field. We want a mode that
1735 includes the entire field. If such a mode would be larger than
1736 a word, we won't be doing the extraction the normal way. */
1737
1738 if (MEM_VOLATILE_P (op0)
1739 && flag_strict_volatile_bitfields > 0)
1740 {
1741 if (GET_MODE_BITSIZE (GET_MODE (op0)) > 0)
1742 mode = GET_MODE (op0);
1743 else if (target && GET_MODE_BITSIZE (GET_MODE (target)) > 0)
1744 mode = GET_MODE (target);
1745 else
1746 mode = tmode;
1747 }
1748 else
1749 mode = get_best_mode (bitsize, bitnum, 0, 0,
1750 MEM_ALIGN (op0), word_mode, MEM_VOLATILE_P (op0));
1751
1752 if (mode == VOIDmode)
1753 /* The only way this should occur is if the field spans word
1754 boundaries. */
1755 return extract_split_bit_field (op0, bitsize, bitnum, unsignedp);
1756
1757 unsigned int total_bits = GET_MODE_BITSIZE (mode);
1758 HOST_WIDE_INT bit_offset = bitnum - bitnum % total_bits;
1759
1760 /* If we're accessing a volatile MEM, we can't apply BIT_OFFSET
1761 if it results in a multi-word access where we otherwise wouldn't
1762 have one. So, check for that case here. */
1763 if (MEM_P (op0)
1764 && MEM_VOLATILE_P (op0)
1765 && flag_strict_volatile_bitfields > 0
1766 && bitnum % BITS_PER_UNIT + bitsize <= total_bits
1767 && bitnum % GET_MODE_BITSIZE (mode) + bitsize > total_bits)
1768 {
1769 if (STRICT_ALIGNMENT)
1770 {
1771 static bool informed_about_misalignment = false;
1772
1773 if (packedp)
1774 {
1775 if (bitsize == total_bits)
1776 warning_at (input_location, OPT_fstrict_volatile_bitfields,
1777 "multiple accesses to volatile structure"
1778 " member because of packed attribute");
1779 else
1780 warning_at (input_location, OPT_fstrict_volatile_bitfields,
1781 "multiple accesses to volatile structure"
1782 " bitfield because of packed attribute");
1783
1784 return extract_split_bit_field (op0, bitsize, bitnum,
1785 unsignedp);
1786 }
1787
1788 if (bitsize == total_bits)
1789 warning_at (input_location, OPT_fstrict_volatile_bitfields,
1790 "mis-aligned access used for structure member");
1791 else
1792 warning_at (input_location, OPT_fstrict_volatile_bitfields,
1793 "mis-aligned access used for structure bitfield");
1794
1795 if (! informed_about_misalignment)
1796 {
1797 informed_about_misalignment = true;
1798 inform (input_location,
1799 "when a volatile object spans multiple type-sized"
1800 " locations, the compiler must choose between using"
1801 " a single mis-aligned access to preserve the"
1802 " volatility, or using multiple aligned accesses"
1803 " to avoid runtime faults; this code may fail at"
1804 " runtime if the hardware does not allow this"
1805 " access");
1806 }
1807 }
1808 bit_offset = bitnum - bitnum % BITS_PER_UNIT;
1809 }
1810 op0 = adjust_bitfield_address (op0, mode, bit_offset / BITS_PER_UNIT);
1811 bitnum -= bit_offset;
1812 }
1813
1814 mode = GET_MODE (op0);
1815 gcc_assert (SCALAR_INT_MODE_P (mode));
1816
1817 /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode)
1818 for invalid input, such as extract equivalent of f5 from
1819 gcc.dg/pr48335-2.c. */
1820
1821 if (BYTES_BIG_ENDIAN)
1822 /* BITNUM is the distance between our msb and that of OP0.
1823 Convert it to the distance from the lsb. */
1824 bitnum = GET_MODE_BITSIZE (mode) - bitsize - bitnum;
1825
1826 /* Now BITNUM is always the distance between the field's lsb and that of OP0.
1827 We have reduced the big-endian case to the little-endian case. */
1828
1829 if (unsignedp)
1830 {
1831 if (bitnum)
1832 {
1833 /* If the field does not already start at the lsb,
1834 shift it so it does. */
1835 /* Maybe propagate the target for the shift. */
1836 rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
1837 if (tmode != mode)
1838 subtarget = 0;
1839 op0 = expand_shift (RSHIFT_EXPR, mode, op0, bitnum, subtarget, 1);
1840 }
1841 /* Convert the value to the desired mode. */
1842 if (mode != tmode)
1843 op0 = convert_to_mode (tmode, op0, 1);
1844
1845 /* Unless the msb of the field used to be the msb when we shifted,
1846 mask out the upper bits. */
1847
1848 if (GET_MODE_BITSIZE (mode) != bitnum + bitsize)
1849 return expand_binop (GET_MODE (op0), and_optab, op0,
1850 mask_rtx (GET_MODE (op0), 0, bitsize, 0),
1851 target, 1, OPTAB_LIB_WIDEN);
1852 return op0;
1853 }
1854
1855 /* To extract a signed bit-field, first shift its msb to the msb of the word,
1856 then arithmetic-shift its lsb to the lsb of the word. */
1857 op0 = force_reg (mode, op0);
1858
1859 /* Find the narrowest integer mode that contains the field. */
1860
1861 for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT); mode != VOIDmode;
1862 mode = GET_MODE_WIDER_MODE (mode))
1863 if (GET_MODE_BITSIZE (mode) >= bitsize + bitnum)
1864 {
1865 op0 = convert_to_mode (mode, op0, 0);
1866 break;
1867 }
1868
1869 if (mode != tmode)
1870 target = 0;
1871
1872 if (GET_MODE_BITSIZE (mode) != (bitsize + bitnum))
1873 {
1874 int amount = GET_MODE_BITSIZE (mode) - (bitsize + bitnum);
1875 /* Maybe propagate the target for the shift. */
1876 rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
1877 op0 = expand_shift (LSHIFT_EXPR, mode, op0, amount, subtarget, 1);
1878 }
1879
1880 return expand_shift (RSHIFT_EXPR, mode, op0,
1881 GET_MODE_BITSIZE (mode) - bitsize, target, 0);
1882 }
1883 \f
1884 /* Return a constant integer (CONST_INT or CONST_DOUBLE) mask value
1885 of mode MODE with BITSIZE ones followed by BITPOS zeros, or the
1886 complement of that if COMPLEMENT. The mask is truncated if
1887 necessary to the width of mode MODE. The mask is zero-extended if
1888 BITSIZE+BITPOS is too small for MODE. */
1889
1890 static rtx
1891 mask_rtx (enum machine_mode mode, int bitpos, int bitsize, int complement)
1892 {
1893 double_int mask;
1894
1895 mask = double_int::mask (bitsize);
1896 mask = mask.llshift (bitpos, HOST_BITS_PER_DOUBLE_INT);
1897
1898 if (complement)
1899 mask = ~mask;
1900
1901 return immed_double_int_const (mask, mode);
1902 }
1903
1904 /* Return a constant integer (CONST_INT or CONST_DOUBLE) rtx with the value
1905 VALUE truncated to BITSIZE bits and then shifted left BITPOS bits. */
1906
1907 static rtx
1908 lshift_value (enum machine_mode mode, rtx value, int bitpos, int bitsize)
1909 {
1910 double_int val;
1911
1912 val = double_int::from_uhwi (INTVAL (value)).zext (bitsize);
1913 val = val.llshift (bitpos, HOST_BITS_PER_DOUBLE_INT);
1914
1915 return immed_double_int_const (val, mode);
1916 }
1917 \f
1918 /* Extract a bit field that is split across two words
1919 and return an RTX for the result.
1920
1921 OP0 is the REG, SUBREG or MEM rtx for the first of the two words.
1922 BITSIZE is the field width; BITPOS, position of its first bit, in the word.
1923 UNSIGNEDP is 1 if should zero-extend the contents; else sign-extend. */
1924
1925 static rtx
1926 extract_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1927 unsigned HOST_WIDE_INT bitpos, int unsignedp)
1928 {
1929 unsigned int unit;
1930 unsigned int bitsdone = 0;
1931 rtx result = NULL_RTX;
1932 int first = 1;
1933
1934 /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1935 much at a time. */
1936 if (REG_P (op0) || GET_CODE (op0) == SUBREG)
1937 unit = BITS_PER_WORD;
1938 else
1939 unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1940
1941 while (bitsdone < bitsize)
1942 {
1943 unsigned HOST_WIDE_INT thissize;
1944 rtx part, word;
1945 unsigned HOST_WIDE_INT thispos;
1946 unsigned HOST_WIDE_INT offset;
1947
1948 offset = (bitpos + bitsdone) / unit;
1949 thispos = (bitpos + bitsdone) % unit;
1950
1951 /* THISSIZE must not overrun a word boundary. Otherwise,
1952 extract_fixed_bit_field will call us again, and we will mutually
1953 recurse forever. */
1954 thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1955 thissize = MIN (thissize, unit - thispos);
1956
1957 /* If OP0 is a register, then handle OFFSET here.
1958
1959 When handling multiword bitfields, extract_bit_field may pass
1960 down a word_mode SUBREG of a larger REG for a bitfield that actually
1961 crosses a word boundary. Thus, for a SUBREG, we must find
1962 the current word starting from the base register. */
1963 if (GET_CODE (op0) == SUBREG)
1964 {
1965 int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset;
1966 word = operand_subword_force (SUBREG_REG (op0), word_offset,
1967 GET_MODE (SUBREG_REG (op0)));
1968 offset = 0;
1969 }
1970 else if (REG_P (op0))
1971 {
1972 word = operand_subword_force (op0, offset, GET_MODE (op0));
1973 offset = 0;
1974 }
1975 else
1976 word = op0;
1977
1978 /* Extract the parts in bit-counting order,
1979 whose meaning is determined by BYTES_PER_UNIT.
1980 OFFSET is in UNITs, and UNIT is in bits. */
1981 part = extract_fixed_bit_field (word_mode, word, thissize,
1982 offset * unit + thispos, 0, 1, false);
1983 bitsdone += thissize;
1984
1985 /* Shift this part into place for the result. */
1986 if (BYTES_BIG_ENDIAN)
1987 {
1988 if (bitsize != bitsdone)
1989 part = expand_shift (LSHIFT_EXPR, word_mode, part,
1990 bitsize - bitsdone, 0, 1);
1991 }
1992 else
1993 {
1994 if (bitsdone != thissize)
1995 part = expand_shift (LSHIFT_EXPR, word_mode, part,
1996 bitsdone - thissize, 0, 1);
1997 }
1998
1999 if (first)
2000 result = part;
2001 else
2002 /* Combine the parts with bitwise or. This works
2003 because we extracted each part as an unsigned bit field. */
2004 result = expand_binop (word_mode, ior_optab, part, result, NULL_RTX, 1,
2005 OPTAB_LIB_WIDEN);
2006
2007 first = 0;
2008 }
2009
2010 /* Unsigned bit field: we are done. */
2011 if (unsignedp)
2012 return result;
2013 /* Signed bit field: sign-extend with two arithmetic shifts. */
2014 result = expand_shift (LSHIFT_EXPR, word_mode, result,
2015 BITS_PER_WORD - bitsize, NULL_RTX, 0);
2016 return expand_shift (RSHIFT_EXPR, word_mode, result,
2017 BITS_PER_WORD - bitsize, NULL_RTX, 0);
2018 }
2019 \f
2020 /* Try to read the low bits of SRC as an rvalue of mode MODE, preserving
2021 the bit pattern. SRC_MODE is the mode of SRC; if this is smaller than
2022 MODE, fill the upper bits with zeros. Fail if the layout of either
2023 mode is unknown (as for CC modes) or if the extraction would involve
2024 unprofitable mode punning. Return the value on success, otherwise
2025 return null.
2026
2027 This is different from gen_lowpart* in these respects:
2028
2029 - the returned value must always be considered an rvalue
2030
2031 - when MODE is wider than SRC_MODE, the extraction involves
2032 a zero extension
2033
2034 - when MODE is smaller than SRC_MODE, the extraction involves
2035 a truncation (and is thus subject to TRULY_NOOP_TRUNCATION).
2036
2037 In other words, this routine performs a computation, whereas the
2038 gen_lowpart* routines are conceptually lvalue or rvalue subreg
2039 operations. */
2040
2041 rtx
2042 extract_low_bits (enum machine_mode mode, enum machine_mode src_mode, rtx src)
2043 {
2044 enum machine_mode int_mode, src_int_mode;
2045
2046 if (mode == src_mode)
2047 return src;
2048
2049 if (CONSTANT_P (src))
2050 {
2051 /* simplify_gen_subreg can't be used here, as if simplify_subreg
2052 fails, it will happily create (subreg (symbol_ref)) or similar
2053 invalid SUBREGs. */
2054 unsigned int byte = subreg_lowpart_offset (mode, src_mode);
2055 rtx ret = simplify_subreg (mode, src, src_mode, byte);
2056 if (ret)
2057 return ret;
2058
2059 if (GET_MODE (src) == VOIDmode
2060 || !validate_subreg (mode, src_mode, src, byte))
2061 return NULL_RTX;
2062
2063 src = force_reg (GET_MODE (src), src);
2064 return gen_rtx_SUBREG (mode, src, byte);
2065 }
2066
2067 if (GET_MODE_CLASS (mode) == MODE_CC || GET_MODE_CLASS (src_mode) == MODE_CC)
2068 return NULL_RTX;
2069
2070 if (GET_MODE_BITSIZE (mode) == GET_MODE_BITSIZE (src_mode)
2071 && MODES_TIEABLE_P (mode, src_mode))
2072 {
2073 rtx x = gen_lowpart_common (mode, src);
2074 if (x)
2075 return x;
2076 }
2077
2078 src_int_mode = int_mode_for_mode (src_mode);
2079 int_mode = int_mode_for_mode (mode);
2080 if (src_int_mode == BLKmode || int_mode == BLKmode)
2081 return NULL_RTX;
2082
2083 if (!MODES_TIEABLE_P (src_int_mode, src_mode))
2084 return NULL_RTX;
2085 if (!MODES_TIEABLE_P (int_mode, mode))
2086 return NULL_RTX;
2087
2088 src = gen_lowpart (src_int_mode, src);
2089 src = convert_modes (int_mode, src_int_mode, src, true);
2090 src = gen_lowpart (mode, src);
2091 return src;
2092 }
2093 \f
2094 /* Add INC into TARGET. */
2095
2096 void
2097 expand_inc (rtx target, rtx inc)
2098 {
2099 rtx value = expand_binop (GET_MODE (target), add_optab,
2100 target, inc,
2101 target, 0, OPTAB_LIB_WIDEN);
2102 if (value != target)
2103 emit_move_insn (target, value);
2104 }
2105
2106 /* Subtract DEC from TARGET. */
2107
2108 void
2109 expand_dec (rtx target, rtx dec)
2110 {
2111 rtx value = expand_binop (GET_MODE (target), sub_optab,
2112 target, dec,
2113 target, 0, OPTAB_LIB_WIDEN);
2114 if (value != target)
2115 emit_move_insn (target, value);
2116 }
2117 \f
2118 /* Output a shift instruction for expression code CODE,
2119 with SHIFTED being the rtx for the value to shift,
2120 and AMOUNT the rtx for the amount to shift by.
2121 Store the result in the rtx TARGET, if that is convenient.
2122 If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2123 Return the rtx for where the value is. */
2124
2125 static rtx
2126 expand_shift_1 (enum tree_code code, enum machine_mode mode, rtx shifted,
2127 rtx amount, rtx target, int unsignedp)
2128 {
2129 rtx op1, temp = 0;
2130 int left = (code == LSHIFT_EXPR || code == LROTATE_EXPR);
2131 int rotate = (code == LROTATE_EXPR || code == RROTATE_EXPR);
2132 optab lshift_optab = ashl_optab;
2133 optab rshift_arith_optab = ashr_optab;
2134 optab rshift_uns_optab = lshr_optab;
2135 optab lrotate_optab = rotl_optab;
2136 optab rrotate_optab = rotr_optab;
2137 enum machine_mode op1_mode;
2138 int attempt;
2139 bool speed = optimize_insn_for_speed_p ();
2140
2141 op1 = amount;
2142 op1_mode = GET_MODE (op1);
2143
2144 /* Determine whether the shift/rotate amount is a vector, or scalar. If the
2145 shift amount is a vector, use the vector/vector shift patterns. */
2146 if (VECTOR_MODE_P (mode) && VECTOR_MODE_P (op1_mode))
2147 {
2148 lshift_optab = vashl_optab;
2149 rshift_arith_optab = vashr_optab;
2150 rshift_uns_optab = vlshr_optab;
2151 lrotate_optab = vrotl_optab;
2152 rrotate_optab = vrotr_optab;
2153 }
2154
2155 /* Previously detected shift-counts computed by NEGATE_EXPR
2156 and shifted in the other direction; but that does not work
2157 on all machines. */
2158
2159 if (SHIFT_COUNT_TRUNCATED)
2160 {
2161 if (CONST_INT_P (op1)
2162 && ((unsigned HOST_WIDE_INT) INTVAL (op1) >=
2163 (unsigned HOST_WIDE_INT) GET_MODE_BITSIZE (mode)))
2164 op1 = GEN_INT ((unsigned HOST_WIDE_INT) INTVAL (op1)
2165 % GET_MODE_BITSIZE (mode));
2166 else if (GET_CODE (op1) == SUBREG
2167 && subreg_lowpart_p (op1)
2168 && INTEGRAL_MODE_P (GET_MODE (SUBREG_REG (op1))))
2169 op1 = SUBREG_REG (op1);
2170 }
2171
2172 if (op1 == const0_rtx)
2173 return shifted;
2174
2175 /* Check whether its cheaper to implement a left shift by a constant
2176 bit count by a sequence of additions. */
2177 if (code == LSHIFT_EXPR
2178 && CONST_INT_P (op1)
2179 && INTVAL (op1) > 0
2180 && INTVAL (op1) < GET_MODE_PRECISION (mode)
2181 && INTVAL (op1) < MAX_BITS_PER_WORD
2182 && (shift_cost (speed, mode, INTVAL (op1))
2183 > INTVAL (op1) * add_cost (speed, mode))
2184 && shift_cost (speed, mode, INTVAL (op1)) != MAX_COST)
2185 {
2186 int i;
2187 for (i = 0; i < INTVAL (op1); i++)
2188 {
2189 temp = force_reg (mode, shifted);
2190 shifted = expand_binop (mode, add_optab, temp, temp, NULL_RTX,
2191 unsignedp, OPTAB_LIB_WIDEN);
2192 }
2193 return shifted;
2194 }
2195
2196 for (attempt = 0; temp == 0 && attempt < 3; attempt++)
2197 {
2198 enum optab_methods methods;
2199
2200 if (attempt == 0)
2201 methods = OPTAB_DIRECT;
2202 else if (attempt == 1)
2203 methods = OPTAB_WIDEN;
2204 else
2205 methods = OPTAB_LIB_WIDEN;
2206
2207 if (rotate)
2208 {
2209 /* Widening does not work for rotation. */
2210 if (methods == OPTAB_WIDEN)
2211 continue;
2212 else if (methods == OPTAB_LIB_WIDEN)
2213 {
2214 /* If we have been unable to open-code this by a rotation,
2215 do it as the IOR of two shifts. I.e., to rotate A
2216 by N bits, compute (A << N) | ((unsigned) A >> (C - N))
2217 where C is the bitsize of A.
2218
2219 It is theoretically possible that the target machine might
2220 not be able to perform either shift and hence we would
2221 be making two libcalls rather than just the one for the
2222 shift (similarly if IOR could not be done). We will allow
2223 this extremely unlikely lossage to avoid complicating the
2224 code below. */
2225
2226 rtx subtarget = target == shifted ? 0 : target;
2227 rtx new_amount, other_amount;
2228 rtx temp1;
2229
2230 new_amount = op1;
2231 if (CONST_INT_P (op1))
2232 other_amount = GEN_INT (GET_MODE_BITSIZE (mode)
2233 - INTVAL (op1));
2234 else
2235 other_amount
2236 = simplify_gen_binary (MINUS, GET_MODE (op1),
2237 GEN_INT (GET_MODE_PRECISION (mode)),
2238 op1);
2239
2240 shifted = force_reg (mode, shifted);
2241
2242 temp = expand_shift_1 (left ? LSHIFT_EXPR : RSHIFT_EXPR,
2243 mode, shifted, new_amount, 0, 1);
2244 temp1 = expand_shift_1 (left ? RSHIFT_EXPR : LSHIFT_EXPR,
2245 mode, shifted, other_amount,
2246 subtarget, 1);
2247 return expand_binop (mode, ior_optab, temp, temp1, target,
2248 unsignedp, methods);
2249 }
2250
2251 temp = expand_binop (mode,
2252 left ? lrotate_optab : rrotate_optab,
2253 shifted, op1, target, unsignedp, methods);
2254 }
2255 else if (unsignedp)
2256 temp = expand_binop (mode,
2257 left ? lshift_optab : rshift_uns_optab,
2258 shifted, op1, target, unsignedp, methods);
2259
2260 /* Do arithmetic shifts.
2261 Also, if we are going to widen the operand, we can just as well
2262 use an arithmetic right-shift instead of a logical one. */
2263 if (temp == 0 && ! rotate
2264 && (! unsignedp || (! left && methods == OPTAB_WIDEN)))
2265 {
2266 enum optab_methods methods1 = methods;
2267
2268 /* If trying to widen a log shift to an arithmetic shift,
2269 don't accept an arithmetic shift of the same size. */
2270 if (unsignedp)
2271 methods1 = OPTAB_MUST_WIDEN;
2272
2273 /* Arithmetic shift */
2274
2275 temp = expand_binop (mode,
2276 left ? lshift_optab : rshift_arith_optab,
2277 shifted, op1, target, unsignedp, methods1);
2278 }
2279
2280 /* We used to try extzv here for logical right shifts, but that was
2281 only useful for one machine, the VAX, and caused poor code
2282 generation there for lshrdi3, so the code was deleted and a
2283 define_expand for lshrsi3 was added to vax.md. */
2284 }
2285
2286 gcc_assert (temp);
2287 return temp;
2288 }
2289
2290 /* Output a shift instruction for expression code CODE,
2291 with SHIFTED being the rtx for the value to shift,
2292 and AMOUNT the amount to shift by.
2293 Store the result in the rtx TARGET, if that is convenient.
2294 If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2295 Return the rtx for where the value is. */
2296
2297 rtx
2298 expand_shift (enum tree_code code, enum machine_mode mode, rtx shifted,
2299 int amount, rtx target, int unsignedp)
2300 {
2301 return expand_shift_1 (code, mode,
2302 shifted, GEN_INT (amount), target, unsignedp);
2303 }
2304
2305 /* Output a shift instruction for expression code CODE,
2306 with SHIFTED being the rtx for the value to shift,
2307 and AMOUNT the tree for the amount to shift by.
2308 Store the result in the rtx TARGET, if that is convenient.
2309 If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2310 Return the rtx for where the value is. */
2311
2312 rtx
2313 expand_variable_shift (enum tree_code code, enum machine_mode mode, rtx shifted,
2314 tree amount, rtx target, int unsignedp)
2315 {
2316 return expand_shift_1 (code, mode,
2317 shifted, expand_normal (amount), target, unsignedp);
2318 }
2319
2320 \f
2321 /* Indicates the type of fixup needed after a constant multiplication.
2322 BASIC_VARIANT means no fixup is needed, NEGATE_VARIANT means that
2323 the result should be negated, and ADD_VARIANT means that the
2324 multiplicand should be added to the result. */
2325 enum mult_variant {basic_variant, negate_variant, add_variant};
2326
2327 static void synth_mult (struct algorithm *, unsigned HOST_WIDE_INT,
2328 const struct mult_cost *, enum machine_mode mode);
2329 static bool choose_mult_variant (enum machine_mode, HOST_WIDE_INT,
2330 struct algorithm *, enum mult_variant *, int);
2331 static rtx expand_mult_const (enum machine_mode, rtx, HOST_WIDE_INT, rtx,
2332 const struct algorithm *, enum mult_variant);
2333 static unsigned HOST_WIDE_INT invert_mod2n (unsigned HOST_WIDE_INT, int);
2334 static rtx extract_high_half (enum machine_mode, rtx);
2335 static rtx expmed_mult_highpart (enum machine_mode, rtx, rtx, rtx, int, int);
2336 static rtx expmed_mult_highpart_optab (enum machine_mode, rtx, rtx, rtx,
2337 int, int);
2338 /* Compute and return the best algorithm for multiplying by T.
2339 The algorithm must cost less than cost_limit
2340 If retval.cost >= COST_LIMIT, no algorithm was found and all
2341 other field of the returned struct are undefined.
2342 MODE is the machine mode of the multiplication. */
2343
2344 static void
2345 synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
2346 const struct mult_cost *cost_limit, enum machine_mode mode)
2347 {
2348 int m;
2349 struct algorithm *alg_in, *best_alg;
2350 struct mult_cost best_cost;
2351 struct mult_cost new_limit;
2352 int op_cost, op_latency;
2353 unsigned HOST_WIDE_INT orig_t = t;
2354 unsigned HOST_WIDE_INT q;
2355 int maxm, hash_index;
2356 bool cache_hit = false;
2357 enum alg_code cache_alg = alg_zero;
2358 bool speed = optimize_insn_for_speed_p ();
2359 enum machine_mode imode;
2360 struct alg_hash_entry *entry_ptr;
2361
2362 /* Indicate that no algorithm is yet found. If no algorithm
2363 is found, this value will be returned and indicate failure. */
2364 alg_out->cost.cost = cost_limit->cost + 1;
2365 alg_out->cost.latency = cost_limit->latency + 1;
2366
2367 if (cost_limit->cost < 0
2368 || (cost_limit->cost == 0 && cost_limit->latency <= 0))
2369 return;
2370
2371 /* Be prepared for vector modes. */
2372 imode = GET_MODE_INNER (mode);
2373 if (imode == VOIDmode)
2374 imode = mode;
2375
2376 maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (imode));
2377
2378 /* Restrict the bits of "t" to the multiplication's mode. */
2379 t &= GET_MODE_MASK (imode);
2380
2381 /* t == 1 can be done in zero cost. */
2382 if (t == 1)
2383 {
2384 alg_out->ops = 1;
2385 alg_out->cost.cost = 0;
2386 alg_out->cost.latency = 0;
2387 alg_out->op[0] = alg_m;
2388 return;
2389 }
2390
2391 /* t == 0 sometimes has a cost. If it does and it exceeds our limit,
2392 fail now. */
2393 if (t == 0)
2394 {
2395 if (MULT_COST_LESS (cost_limit, zero_cost (speed)))
2396 return;
2397 else
2398 {
2399 alg_out->ops = 1;
2400 alg_out->cost.cost = zero_cost (speed);
2401 alg_out->cost.latency = zero_cost (speed);
2402 alg_out->op[0] = alg_zero;
2403 return;
2404 }
2405 }
2406
2407 /* We'll be needing a couple extra algorithm structures now. */
2408
2409 alg_in = XALLOCA (struct algorithm);
2410 best_alg = XALLOCA (struct algorithm);
2411 best_cost = *cost_limit;
2412
2413 /* Compute the hash index. */
2414 hash_index = (t ^ (unsigned int) mode ^ (speed * 256)) % NUM_ALG_HASH_ENTRIES;
2415
2416 /* See if we already know what to do for T. */
2417 entry_ptr = alg_hash_entry_ptr (hash_index);
2418 if (entry_ptr->t == t
2419 && entry_ptr->mode == mode
2420 && entry_ptr->mode == mode
2421 && entry_ptr->speed == speed
2422 && entry_ptr->alg != alg_unknown)
2423 {
2424 cache_alg = entry_ptr->alg;
2425
2426 if (cache_alg == alg_impossible)
2427 {
2428 /* The cache tells us that it's impossible to synthesize
2429 multiplication by T within entry_ptr->cost. */
2430 if (!CHEAPER_MULT_COST (&entry_ptr->cost, cost_limit))
2431 /* COST_LIMIT is at least as restrictive as the one
2432 recorded in the hash table, in which case we have no
2433 hope of synthesizing a multiplication. Just
2434 return. */
2435 return;
2436
2437 /* If we get here, COST_LIMIT is less restrictive than the
2438 one recorded in the hash table, so we may be able to
2439 synthesize a multiplication. Proceed as if we didn't
2440 have the cache entry. */
2441 }
2442 else
2443 {
2444 if (CHEAPER_MULT_COST (cost_limit, &entry_ptr->cost))
2445 /* The cached algorithm shows that this multiplication
2446 requires more cost than COST_LIMIT. Just return. This
2447 way, we don't clobber this cache entry with
2448 alg_impossible but retain useful information. */
2449 return;
2450
2451 cache_hit = true;
2452
2453 switch (cache_alg)
2454 {
2455 case alg_shift:
2456 goto do_alg_shift;
2457
2458 case alg_add_t_m2:
2459 case alg_sub_t_m2:
2460 goto do_alg_addsub_t_m2;
2461
2462 case alg_add_factor:
2463 case alg_sub_factor:
2464 goto do_alg_addsub_factor;
2465
2466 case alg_add_t2_m:
2467 goto do_alg_add_t2_m;
2468
2469 case alg_sub_t2_m:
2470 goto do_alg_sub_t2_m;
2471
2472 default:
2473 gcc_unreachable ();
2474 }
2475 }
2476 }
2477
2478 /* If we have a group of zero bits at the low-order part of T, try
2479 multiplying by the remaining bits and then doing a shift. */
2480
2481 if ((t & 1) == 0)
2482 {
2483 do_alg_shift:
2484 m = floor_log2 (t & -t); /* m = number of low zero bits */
2485 if (m < maxm)
2486 {
2487 q = t >> m;
2488 /* The function expand_shift will choose between a shift and
2489 a sequence of additions, so the observed cost is given as
2490 MIN (m * add_cost(speed, mode), shift_cost(speed, mode, m)). */
2491 op_cost = m * add_cost (speed, mode);
2492 if (shift_cost (speed, mode, m) < op_cost)
2493 op_cost = shift_cost (speed, mode, m);
2494 new_limit.cost = best_cost.cost - op_cost;
2495 new_limit.latency = best_cost.latency - op_cost;
2496 synth_mult (alg_in, q, &new_limit, mode);
2497
2498 alg_in->cost.cost += op_cost;
2499 alg_in->cost.latency += op_cost;
2500 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2501 {
2502 struct algorithm *x;
2503 best_cost = alg_in->cost;
2504 x = alg_in, alg_in = best_alg, best_alg = x;
2505 best_alg->log[best_alg->ops] = m;
2506 best_alg->op[best_alg->ops] = alg_shift;
2507 }
2508
2509 /* See if treating ORIG_T as a signed number yields a better
2510 sequence. Try this sequence only for a negative ORIG_T
2511 as it would be useless for a non-negative ORIG_T. */
2512 if ((HOST_WIDE_INT) orig_t < 0)
2513 {
2514 /* Shift ORIG_T as follows because a right shift of a
2515 negative-valued signed type is implementation
2516 defined. */
2517 q = ~(~orig_t >> m);
2518 /* The function expand_shift will choose between a shift
2519 and a sequence of additions, so the observed cost is
2520 given as MIN (m * add_cost(speed, mode),
2521 shift_cost(speed, mode, m)). */
2522 op_cost = m * add_cost (speed, mode);
2523 if (shift_cost (speed, mode, m) < op_cost)
2524 op_cost = shift_cost (speed, mode, m);
2525 new_limit.cost = best_cost.cost - op_cost;
2526 new_limit.latency = best_cost.latency - op_cost;
2527 synth_mult (alg_in, q, &new_limit, mode);
2528
2529 alg_in->cost.cost += op_cost;
2530 alg_in->cost.latency += op_cost;
2531 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2532 {
2533 struct algorithm *x;
2534 best_cost = alg_in->cost;
2535 x = alg_in, alg_in = best_alg, best_alg = x;
2536 best_alg->log[best_alg->ops] = m;
2537 best_alg->op[best_alg->ops] = alg_shift;
2538 }
2539 }
2540 }
2541 if (cache_hit)
2542 goto done;
2543 }
2544
2545 /* If we have an odd number, add or subtract one. */
2546 if ((t & 1) != 0)
2547 {
2548 unsigned HOST_WIDE_INT w;
2549
2550 do_alg_addsub_t_m2:
2551 for (w = 1; (w & t) != 0; w <<= 1)
2552 ;
2553 /* If T was -1, then W will be zero after the loop. This is another
2554 case where T ends with ...111. Handling this with (T + 1) and
2555 subtract 1 produces slightly better code and results in algorithm
2556 selection much faster than treating it like the ...0111 case
2557 below. */
2558 if (w == 0
2559 || (w > 2
2560 /* Reject the case where t is 3.
2561 Thus we prefer addition in that case. */
2562 && t != 3))
2563 {
2564 /* T ends with ...111. Multiply by (T + 1) and subtract 1. */
2565
2566 op_cost = add_cost (speed, mode);
2567 new_limit.cost = best_cost.cost - op_cost;
2568 new_limit.latency = best_cost.latency - op_cost;
2569 synth_mult (alg_in, t + 1, &new_limit, mode);
2570
2571 alg_in->cost.cost += op_cost;
2572 alg_in->cost.latency += op_cost;
2573 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2574 {
2575 struct algorithm *x;
2576 best_cost = alg_in->cost;
2577 x = alg_in, alg_in = best_alg, best_alg = x;
2578 best_alg->log[best_alg->ops] = 0;
2579 best_alg->op[best_alg->ops] = alg_sub_t_m2;
2580 }
2581 }
2582 else
2583 {
2584 /* T ends with ...01 or ...011. Multiply by (T - 1) and add 1. */
2585
2586 op_cost = add_cost (speed, mode);
2587 new_limit.cost = best_cost.cost - op_cost;
2588 new_limit.latency = best_cost.latency - op_cost;
2589 synth_mult (alg_in, t - 1, &new_limit, mode);
2590
2591 alg_in->cost.cost += op_cost;
2592 alg_in->cost.latency += op_cost;
2593 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2594 {
2595 struct algorithm *x;
2596 best_cost = alg_in->cost;
2597 x = alg_in, alg_in = best_alg, best_alg = x;
2598 best_alg->log[best_alg->ops] = 0;
2599 best_alg->op[best_alg->ops] = alg_add_t_m2;
2600 }
2601 }
2602
2603 /* We may be able to calculate a * -7, a * -15, a * -31, etc
2604 quickly with a - a * n for some appropriate constant n. */
2605 m = exact_log2 (-orig_t + 1);
2606 if (m >= 0 && m < maxm)
2607 {
2608 op_cost = shiftsub1_cost (speed, mode, m);
2609 new_limit.cost = best_cost.cost - op_cost;
2610 new_limit.latency = best_cost.latency - op_cost;
2611 synth_mult (alg_in, (unsigned HOST_WIDE_INT) (-orig_t + 1) >> m,
2612 &new_limit, mode);
2613
2614 alg_in->cost.cost += op_cost;
2615 alg_in->cost.latency += op_cost;
2616 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2617 {
2618 struct algorithm *x;
2619 best_cost = alg_in->cost;
2620 x = alg_in, alg_in = best_alg, best_alg = x;
2621 best_alg->log[best_alg->ops] = m;
2622 best_alg->op[best_alg->ops] = alg_sub_t_m2;
2623 }
2624 }
2625
2626 if (cache_hit)
2627 goto done;
2628 }
2629
2630 /* Look for factors of t of the form
2631 t = q(2**m +- 1), 2 <= m <= floor(log2(t - 1)).
2632 If we find such a factor, we can multiply by t using an algorithm that
2633 multiplies by q, shift the result by m and add/subtract it to itself.
2634
2635 We search for large factors first and loop down, even if large factors
2636 are less probable than small; if we find a large factor we will find a
2637 good sequence quickly, and therefore be able to prune (by decreasing
2638 COST_LIMIT) the search. */
2639
2640 do_alg_addsub_factor:
2641 for (m = floor_log2 (t - 1); m >= 2; m--)
2642 {
2643 unsigned HOST_WIDE_INT d;
2644
2645 d = ((unsigned HOST_WIDE_INT) 1 << m) + 1;
2646 if (t % d == 0 && t > d && m < maxm
2647 && (!cache_hit || cache_alg == alg_add_factor))
2648 {
2649 /* If the target has a cheap shift-and-add instruction use
2650 that in preference to a shift insn followed by an add insn.
2651 Assume that the shift-and-add is "atomic" with a latency
2652 equal to its cost, otherwise assume that on superscalar
2653 hardware the shift may be executed concurrently with the
2654 earlier steps in the algorithm. */
2655 op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
2656 if (shiftadd_cost (speed, mode, m) < op_cost)
2657 {
2658 op_cost = shiftadd_cost (speed, mode, m);
2659 op_latency = op_cost;
2660 }
2661 else
2662 op_latency = add_cost (speed, mode);
2663
2664 new_limit.cost = best_cost.cost - op_cost;
2665 new_limit.latency = best_cost.latency - op_latency;
2666 synth_mult (alg_in, t / d, &new_limit, mode);
2667
2668 alg_in->cost.cost += op_cost;
2669 alg_in->cost.latency += op_latency;
2670 if (alg_in->cost.latency < op_cost)
2671 alg_in->cost.latency = op_cost;
2672 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2673 {
2674 struct algorithm *x;
2675 best_cost = alg_in->cost;
2676 x = alg_in, alg_in = best_alg, best_alg = x;
2677 best_alg->log[best_alg->ops] = m;
2678 best_alg->op[best_alg->ops] = alg_add_factor;
2679 }
2680 /* Other factors will have been taken care of in the recursion. */
2681 break;
2682 }
2683
2684 d = ((unsigned HOST_WIDE_INT) 1 << m) - 1;
2685 if (t % d == 0 && t > d && m < maxm
2686 && (!cache_hit || cache_alg == alg_sub_factor))
2687 {
2688 /* If the target has a cheap shift-and-subtract insn use
2689 that in preference to a shift insn followed by a sub insn.
2690 Assume that the shift-and-sub is "atomic" with a latency
2691 equal to it's cost, otherwise assume that on superscalar
2692 hardware the shift may be executed concurrently with the
2693 earlier steps in the algorithm. */
2694 op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
2695 if (shiftsub0_cost (speed, mode, m) < op_cost)
2696 {
2697 op_cost = shiftsub0_cost (speed, mode, m);
2698 op_latency = op_cost;
2699 }
2700 else
2701 op_latency = add_cost (speed, mode);
2702
2703 new_limit.cost = best_cost.cost - op_cost;
2704 new_limit.latency = best_cost.latency - op_latency;
2705 synth_mult (alg_in, t / d, &new_limit, mode);
2706
2707 alg_in->cost.cost += op_cost;
2708 alg_in->cost.latency += op_latency;
2709 if (alg_in->cost.latency < op_cost)
2710 alg_in->cost.latency = op_cost;
2711 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2712 {
2713 struct algorithm *x;
2714 best_cost = alg_in->cost;
2715 x = alg_in, alg_in = best_alg, best_alg = x;
2716 best_alg->log[best_alg->ops] = m;
2717 best_alg->op[best_alg->ops] = alg_sub_factor;
2718 }
2719 break;
2720 }
2721 }
2722 if (cache_hit)
2723 goto done;
2724
2725 /* Try shift-and-add (load effective address) instructions,
2726 i.e. do a*3, a*5, a*9. */
2727 if ((t & 1) != 0)
2728 {
2729 do_alg_add_t2_m:
2730 q = t - 1;
2731 q = q & -q;
2732 m = exact_log2 (q);
2733 if (m >= 0 && m < maxm)
2734 {
2735 op_cost = shiftadd_cost (speed, mode, m);
2736 new_limit.cost = best_cost.cost - op_cost;
2737 new_limit.latency = best_cost.latency - op_cost;
2738 synth_mult (alg_in, (t - 1) >> m, &new_limit, mode);
2739
2740 alg_in->cost.cost += op_cost;
2741 alg_in->cost.latency += op_cost;
2742 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2743 {
2744 struct algorithm *x;
2745 best_cost = alg_in->cost;
2746 x = alg_in, alg_in = best_alg, best_alg = x;
2747 best_alg->log[best_alg->ops] = m;
2748 best_alg->op[best_alg->ops] = alg_add_t2_m;
2749 }
2750 }
2751 if (cache_hit)
2752 goto done;
2753
2754 do_alg_sub_t2_m:
2755 q = t + 1;
2756 q = q & -q;
2757 m = exact_log2 (q);
2758 if (m >= 0 && m < maxm)
2759 {
2760 op_cost = shiftsub0_cost (speed, mode, m);
2761 new_limit.cost = best_cost.cost - op_cost;
2762 new_limit.latency = best_cost.latency - op_cost;
2763 synth_mult (alg_in, (t + 1) >> m, &new_limit, mode);
2764
2765 alg_in->cost.cost += op_cost;
2766 alg_in->cost.latency += op_cost;
2767 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2768 {
2769 struct algorithm *x;
2770 best_cost = alg_in->cost;
2771 x = alg_in, alg_in = best_alg, best_alg = x;
2772 best_alg->log[best_alg->ops] = m;
2773 best_alg->op[best_alg->ops] = alg_sub_t2_m;
2774 }
2775 }
2776 if (cache_hit)
2777 goto done;
2778 }
2779
2780 done:
2781 /* If best_cost has not decreased, we have not found any algorithm. */
2782 if (!CHEAPER_MULT_COST (&best_cost, cost_limit))
2783 {
2784 /* We failed to find an algorithm. Record alg_impossible for
2785 this case (that is, <T, MODE, COST_LIMIT>) so that next time
2786 we are asked to find an algorithm for T within the same or
2787 lower COST_LIMIT, we can immediately return to the
2788 caller. */
2789 entry_ptr->t = t;
2790 entry_ptr->mode = mode;
2791 entry_ptr->speed = speed;
2792 entry_ptr->alg = alg_impossible;
2793 entry_ptr->cost = *cost_limit;
2794 return;
2795 }
2796
2797 /* Cache the result. */
2798 if (!cache_hit)
2799 {
2800 entry_ptr->t = t;
2801 entry_ptr->mode = mode;
2802 entry_ptr->speed = speed;
2803 entry_ptr->alg = best_alg->op[best_alg->ops];
2804 entry_ptr->cost.cost = best_cost.cost;
2805 entry_ptr->cost.latency = best_cost.latency;
2806 }
2807
2808 /* If we are getting a too long sequence for `struct algorithm'
2809 to record, make this search fail. */
2810 if (best_alg->ops == MAX_BITS_PER_WORD)
2811 return;
2812
2813 /* Copy the algorithm from temporary space to the space at alg_out.
2814 We avoid using structure assignment because the majority of
2815 best_alg is normally undefined, and this is a critical function. */
2816 alg_out->ops = best_alg->ops + 1;
2817 alg_out->cost = best_cost;
2818 memcpy (alg_out->op, best_alg->op,
2819 alg_out->ops * sizeof *alg_out->op);
2820 memcpy (alg_out->log, best_alg->log,
2821 alg_out->ops * sizeof *alg_out->log);
2822 }
2823 \f
2824 /* Find the cheapest way of multiplying a value of mode MODE by VAL.
2825 Try three variations:
2826
2827 - a shift/add sequence based on VAL itself
2828 - a shift/add sequence based on -VAL, followed by a negation
2829 - a shift/add sequence based on VAL - 1, followed by an addition.
2830
2831 Return true if the cheapest of these cost less than MULT_COST,
2832 describing the algorithm in *ALG and final fixup in *VARIANT. */
2833
2834 static bool
2835 choose_mult_variant (enum machine_mode mode, HOST_WIDE_INT val,
2836 struct algorithm *alg, enum mult_variant *variant,
2837 int mult_cost)
2838 {
2839 struct algorithm alg2;
2840 struct mult_cost limit;
2841 int op_cost;
2842 bool speed = optimize_insn_for_speed_p ();
2843
2844 /* Fail quickly for impossible bounds. */
2845 if (mult_cost < 0)
2846 return false;
2847
2848 /* Ensure that mult_cost provides a reasonable upper bound.
2849 Any constant multiplication can be performed with less
2850 than 2 * bits additions. */
2851 op_cost = 2 * GET_MODE_UNIT_BITSIZE (mode) * add_cost (speed, mode);
2852 if (mult_cost > op_cost)
2853 mult_cost = op_cost;
2854
2855 *variant = basic_variant;
2856 limit.cost = mult_cost;
2857 limit.latency = mult_cost;
2858 synth_mult (alg, val, &limit, mode);
2859
2860 /* This works only if the inverted value actually fits in an
2861 `unsigned int' */
2862 if (HOST_BITS_PER_INT >= GET_MODE_UNIT_BITSIZE (mode))
2863 {
2864 op_cost = neg_cost(speed, mode);
2865 if (MULT_COST_LESS (&alg->cost, mult_cost))
2866 {
2867 limit.cost = alg->cost.cost - op_cost;
2868 limit.latency = alg->cost.latency - op_cost;
2869 }
2870 else
2871 {
2872 limit.cost = mult_cost - op_cost;
2873 limit.latency = mult_cost - op_cost;
2874 }
2875
2876 synth_mult (&alg2, -val, &limit, mode);
2877 alg2.cost.cost += op_cost;
2878 alg2.cost.latency += op_cost;
2879 if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
2880 *alg = alg2, *variant = negate_variant;
2881 }
2882
2883 /* This proves very useful for division-by-constant. */
2884 op_cost = add_cost (speed, mode);
2885 if (MULT_COST_LESS (&alg->cost, mult_cost))
2886 {
2887 limit.cost = alg->cost.cost - op_cost;
2888 limit.latency = alg->cost.latency - op_cost;
2889 }
2890 else
2891 {
2892 limit.cost = mult_cost - op_cost;
2893 limit.latency = mult_cost - op_cost;
2894 }
2895
2896 synth_mult (&alg2, val - 1, &limit, mode);
2897 alg2.cost.cost += op_cost;
2898 alg2.cost.latency += op_cost;
2899 if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
2900 *alg = alg2, *variant = add_variant;
2901
2902 return MULT_COST_LESS (&alg->cost, mult_cost);
2903 }
2904
2905 /* A subroutine of expand_mult, used for constant multiplications.
2906 Multiply OP0 by VAL in mode MODE, storing the result in TARGET if
2907 convenient. Use the shift/add sequence described by ALG and apply
2908 the final fixup specified by VARIANT. */
2909
2910 static rtx
2911 expand_mult_const (enum machine_mode mode, rtx op0, HOST_WIDE_INT val,
2912 rtx target, const struct algorithm *alg,
2913 enum mult_variant variant)
2914 {
2915 HOST_WIDE_INT val_so_far;
2916 rtx insn, accum, tem;
2917 int opno;
2918 enum machine_mode nmode;
2919
2920 /* Avoid referencing memory over and over and invalid sharing
2921 on SUBREGs. */
2922 op0 = force_reg (mode, op0);
2923
2924 /* ACCUM starts out either as OP0 or as a zero, depending on
2925 the first operation. */
2926
2927 if (alg->op[0] == alg_zero)
2928 {
2929 accum = copy_to_mode_reg (mode, CONST0_RTX (mode));
2930 val_so_far = 0;
2931 }
2932 else if (alg->op[0] == alg_m)
2933 {
2934 accum = copy_to_mode_reg (mode, op0);
2935 val_so_far = 1;
2936 }
2937 else
2938 gcc_unreachable ();
2939
2940 for (opno = 1; opno < alg->ops; opno++)
2941 {
2942 int log = alg->log[opno];
2943 rtx shift_subtarget = optimize ? 0 : accum;
2944 rtx add_target
2945 = (opno == alg->ops - 1 && target != 0 && variant != add_variant
2946 && !optimize)
2947 ? target : 0;
2948 rtx accum_target = optimize ? 0 : accum;
2949 rtx accum_inner;
2950
2951 switch (alg->op[opno])
2952 {
2953 case alg_shift:
2954 tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
2955 /* REG_EQUAL note will be attached to the following insn. */
2956 emit_move_insn (accum, tem);
2957 val_so_far <<= log;
2958 break;
2959
2960 case alg_add_t_m2:
2961 tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
2962 accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
2963 add_target ? add_target : accum_target);
2964 val_so_far += (HOST_WIDE_INT) 1 << log;
2965 break;
2966
2967 case alg_sub_t_m2:
2968 tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
2969 accum = force_operand (gen_rtx_MINUS (mode, accum, tem),
2970 add_target ? add_target : accum_target);
2971 val_so_far -= (HOST_WIDE_INT) 1 << log;
2972 break;
2973
2974 case alg_add_t2_m:
2975 accum = expand_shift (LSHIFT_EXPR, mode, accum,
2976 log, shift_subtarget, 0);
2977 accum = force_operand (gen_rtx_PLUS (mode, accum, op0),
2978 add_target ? add_target : accum_target);
2979 val_so_far = (val_so_far << log) + 1;
2980 break;
2981
2982 case alg_sub_t2_m:
2983 accum = expand_shift (LSHIFT_EXPR, mode, accum,
2984 log, shift_subtarget, 0);
2985 accum = force_operand (gen_rtx_MINUS (mode, accum, op0),
2986 add_target ? add_target : accum_target);
2987 val_so_far = (val_so_far << log) - 1;
2988 break;
2989
2990 case alg_add_factor:
2991 tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
2992 accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
2993 add_target ? add_target : accum_target);
2994 val_so_far += val_so_far << log;
2995 break;
2996
2997 case alg_sub_factor:
2998 tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
2999 accum = force_operand (gen_rtx_MINUS (mode, tem, accum),
3000 (add_target
3001 ? add_target : (optimize ? 0 : tem)));
3002 val_so_far = (val_so_far << log) - val_so_far;
3003 break;
3004
3005 default:
3006 gcc_unreachable ();
3007 }
3008
3009 if (SCALAR_INT_MODE_P (mode))
3010 {
3011 /* Write a REG_EQUAL note on the last insn so that we can cse
3012 multiplication sequences. Note that if ACCUM is a SUBREG,
3013 we've set the inner register and must properly indicate that. */
3014 tem = op0, nmode = mode;
3015 accum_inner = accum;
3016 if (GET_CODE (accum) == SUBREG)
3017 {
3018 accum_inner = SUBREG_REG (accum);
3019 nmode = GET_MODE (accum_inner);
3020 tem = gen_lowpart (nmode, op0);
3021 }
3022
3023 insn = get_last_insn ();
3024 set_dst_reg_note (insn, REG_EQUAL,
3025 gen_rtx_MULT (nmode, tem, GEN_INT (val_so_far)),
3026 accum_inner);
3027 }
3028 }
3029
3030 if (variant == negate_variant)
3031 {
3032 val_so_far = -val_so_far;
3033 accum = expand_unop (mode, neg_optab, accum, target, 0);
3034 }
3035 else if (variant == add_variant)
3036 {
3037 val_so_far = val_so_far + 1;
3038 accum = force_operand (gen_rtx_PLUS (mode, accum, op0), target);
3039 }
3040
3041 /* Compare only the bits of val and val_so_far that are significant
3042 in the result mode, to avoid sign-/zero-extension confusion. */
3043 nmode = GET_MODE_INNER (mode);
3044 if (nmode == VOIDmode)
3045 nmode = mode;
3046 val &= GET_MODE_MASK (nmode);
3047 val_so_far &= GET_MODE_MASK (nmode);
3048 gcc_assert (val == val_so_far);
3049
3050 return accum;
3051 }
3052
3053 /* Perform a multiplication and return an rtx for the result.
3054 MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3055 TARGET is a suggestion for where to store the result (an rtx).
3056
3057 We check specially for a constant integer as OP1.
3058 If you want this check for OP0 as well, then before calling
3059 you should swap the two operands if OP0 would be constant. */
3060
3061 rtx
3062 expand_mult (enum machine_mode mode, rtx op0, rtx op1, rtx target,
3063 int unsignedp)
3064 {
3065 enum mult_variant variant;
3066 struct algorithm algorithm;
3067 rtx scalar_op1;
3068 int max_cost;
3069 bool speed = optimize_insn_for_speed_p ();
3070 bool do_trapv = flag_trapv && SCALAR_INT_MODE_P (mode) && !unsignedp;
3071
3072 if (CONSTANT_P (op0))
3073 {
3074 rtx temp = op0;
3075 op0 = op1;
3076 op1 = temp;
3077 }
3078
3079 /* For vectors, there are several simplifications that can be made if
3080 all elements of the vector constant are identical. */
3081 scalar_op1 = op1;
3082 if (GET_CODE (op1) == CONST_VECTOR)
3083 {
3084 int i, n = CONST_VECTOR_NUNITS (op1);
3085 scalar_op1 = CONST_VECTOR_ELT (op1, 0);
3086 for (i = 1; i < n; ++i)
3087 if (!rtx_equal_p (scalar_op1, CONST_VECTOR_ELT (op1, i)))
3088 goto skip_scalar;
3089 }
3090
3091 if (INTEGRAL_MODE_P (mode))
3092 {
3093 rtx fake_reg;
3094 HOST_WIDE_INT coeff;
3095 bool is_neg;
3096 int mode_bitsize;
3097
3098 if (op1 == CONST0_RTX (mode))
3099 return op1;
3100 if (op1 == CONST1_RTX (mode))
3101 return op0;
3102 if (op1 == CONSTM1_RTX (mode))
3103 return expand_unop (mode, do_trapv ? negv_optab : neg_optab,
3104 op0, target, 0);
3105
3106 if (do_trapv)
3107 goto skip_synth;
3108
3109 /* These are the operations that are potentially turned into
3110 a sequence of shifts and additions. */
3111 mode_bitsize = GET_MODE_UNIT_BITSIZE (mode);
3112
3113 /* synth_mult does an `unsigned int' multiply. As long as the mode is
3114 less than or equal in size to `unsigned int' this doesn't matter.
3115 If the mode is larger than `unsigned int', then synth_mult works
3116 only if the constant value exactly fits in an `unsigned int' without
3117 any truncation. This means that multiplying by negative values does
3118 not work; results are off by 2^32 on a 32 bit machine. */
3119
3120 if (CONST_INT_P (scalar_op1))
3121 {
3122 coeff = INTVAL (scalar_op1);
3123 is_neg = coeff < 0;
3124 }
3125 else if (CONST_DOUBLE_AS_INT_P (scalar_op1))
3126 {
3127 /* If we are multiplying in DImode, it may still be a win
3128 to try to work with shifts and adds. */
3129 if (CONST_DOUBLE_HIGH (scalar_op1) == 0
3130 && CONST_DOUBLE_LOW (scalar_op1) > 0)
3131 {
3132 coeff = CONST_DOUBLE_LOW (scalar_op1);
3133 is_neg = false;
3134 }
3135 else if (CONST_DOUBLE_LOW (scalar_op1) == 0)
3136 {
3137 coeff = CONST_DOUBLE_HIGH (scalar_op1);
3138 if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3139 {
3140 int shift = floor_log2 (coeff) + HOST_BITS_PER_WIDE_INT;
3141 if (shift < HOST_BITS_PER_DOUBLE_INT - 1
3142 || mode_bitsize <= HOST_BITS_PER_DOUBLE_INT)
3143 return expand_shift (LSHIFT_EXPR, mode, op0,
3144 shift, target, unsignedp);
3145 }
3146 goto skip_synth;
3147 }
3148 else
3149 goto skip_synth;
3150 }
3151 else
3152 goto skip_synth;
3153
3154 /* We used to test optimize here, on the grounds that it's better to
3155 produce a smaller program when -O is not used. But this causes
3156 such a terrible slowdown sometimes that it seems better to always
3157 use synth_mult. */
3158
3159 /* Special case powers of two. */
3160 if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3161 return expand_shift (LSHIFT_EXPR, mode, op0,
3162 floor_log2 (coeff), target, unsignedp);
3163
3164 fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3165
3166 /* Attempt to handle multiplication of DImode values by negative
3167 coefficients, by performing the multiplication by a positive
3168 multiplier and then inverting the result. */
3169 if (is_neg && mode_bitsize > HOST_BITS_PER_WIDE_INT)
3170 {
3171 /* Its safe to use -coeff even for INT_MIN, as the
3172 result is interpreted as an unsigned coefficient.
3173 Exclude cost of op0 from max_cost to match the cost
3174 calculation of the synth_mult. */
3175 max_cost = (set_src_cost (gen_rtx_MULT (mode, fake_reg, op1), speed)
3176 - neg_cost(speed, mode));
3177 if (max_cost > 0
3178 && choose_mult_variant (mode, -coeff, &algorithm,
3179 &variant, max_cost))
3180 {
3181 rtx temp = expand_mult_const (mode, op0, -coeff, NULL_RTX,
3182 &algorithm, variant);
3183 return expand_unop (mode, neg_optab, temp, target, 0);
3184 }
3185 goto skip_synth;
3186 }
3187
3188 /* Exclude cost of op0 from max_cost to match the cost
3189 calculation of the synth_mult. */
3190 max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, op1), speed);
3191 if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost))
3192 return expand_mult_const (mode, op0, coeff, target,
3193 &algorithm, variant);
3194 }
3195 skip_synth:
3196
3197 /* Expand x*2.0 as x+x. */
3198 if (CONST_DOUBLE_AS_FLOAT_P (scalar_op1))
3199 {
3200 REAL_VALUE_TYPE d;
3201 REAL_VALUE_FROM_CONST_DOUBLE (d, scalar_op1);
3202
3203 if (REAL_VALUES_EQUAL (d, dconst2))
3204 {
3205 op0 = force_reg (GET_MODE (op0), op0);
3206 return expand_binop (mode, add_optab, op0, op0,
3207 target, unsignedp, OPTAB_LIB_WIDEN);
3208 }
3209 }
3210 skip_scalar:
3211
3212 /* This used to use umul_optab if unsigned, but for non-widening multiply
3213 there is no difference between signed and unsigned. */
3214 op0 = expand_binop (mode, do_trapv ? smulv_optab : smul_optab,
3215 op0, op1, target, unsignedp, OPTAB_LIB_WIDEN);
3216 gcc_assert (op0);
3217 return op0;
3218 }
3219
3220 /* Return a cost estimate for multiplying a register by the given
3221 COEFFicient in the given MODE and SPEED. */
3222
3223 int
3224 mult_by_coeff_cost (HOST_WIDE_INT coeff, enum machine_mode mode, bool speed)
3225 {
3226 int max_cost;
3227 struct algorithm algorithm;
3228 enum mult_variant variant;
3229
3230 rtx fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3231 max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, fake_reg), speed);
3232 if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost))
3233 return algorithm.cost.cost;
3234 else
3235 return max_cost;
3236 }
3237
3238 /* Perform a widening multiplication and return an rtx for the result.
3239 MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3240 TARGET is a suggestion for where to store the result (an rtx).
3241 THIS_OPTAB is the optab we should use, it must be either umul_widen_optab
3242 or smul_widen_optab.
3243
3244 We check specially for a constant integer as OP1, comparing the
3245 cost of a widening multiply against the cost of a sequence of shifts
3246 and adds. */
3247
3248 rtx
3249 expand_widening_mult (enum machine_mode mode, rtx op0, rtx op1, rtx target,
3250 int unsignedp, optab this_optab)
3251 {
3252 bool speed = optimize_insn_for_speed_p ();
3253 rtx cop1;
3254
3255 if (CONST_INT_P (op1)
3256 && GET_MODE (op0) != VOIDmode
3257 && (cop1 = convert_modes (mode, GET_MODE (op0), op1,
3258 this_optab == umul_widen_optab))
3259 && CONST_INT_P (cop1)
3260 && (INTVAL (cop1) >= 0
3261 || HWI_COMPUTABLE_MODE_P (mode)))
3262 {
3263 HOST_WIDE_INT coeff = INTVAL (cop1);
3264 int max_cost;
3265 enum mult_variant variant;
3266 struct algorithm algorithm;
3267
3268 /* Special case powers of two. */
3269 if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3270 {
3271 op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3272 return expand_shift (LSHIFT_EXPR, mode, op0,
3273 floor_log2 (coeff), target, unsignedp);
3274 }
3275
3276 /* Exclude cost of op0 from max_cost to match the cost
3277 calculation of the synth_mult. */
3278 max_cost = mul_widen_cost (speed, mode);
3279 if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3280 max_cost))
3281 {
3282 op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3283 return expand_mult_const (mode, op0, coeff, target,
3284 &algorithm, variant);
3285 }
3286 }
3287 return expand_binop (mode, this_optab, op0, op1, target,
3288 unsignedp, OPTAB_LIB_WIDEN);
3289 }
3290 \f
3291 /* Choose a minimal N + 1 bit approximation to 1/D that can be used to
3292 replace division by D, and put the least significant N bits of the result
3293 in *MULTIPLIER_PTR and return the most significant bit.
3294
3295 The width of operations is N (should be <= HOST_BITS_PER_WIDE_INT), the
3296 needed precision is in PRECISION (should be <= N).
3297
3298 PRECISION should be as small as possible so this function can choose
3299 multiplier more freely.
3300
3301 The rounded-up logarithm of D is placed in *lgup_ptr. A shift count that
3302 is to be used for a final right shift is placed in *POST_SHIFT_PTR.
3303
3304 Using this function, x/D will be equal to (x * m) >> (*POST_SHIFT_PTR),
3305 where m is the full HOST_BITS_PER_WIDE_INT + 1 bit multiplier. */
3306
3307 unsigned HOST_WIDE_INT
3308 choose_multiplier (unsigned HOST_WIDE_INT d, int n, int precision,
3309 unsigned HOST_WIDE_INT *multiplier_ptr,
3310 int *post_shift_ptr, int *lgup_ptr)
3311 {
3312 double_int mhigh, mlow;
3313 int lgup, post_shift;
3314 int pow, pow2;
3315
3316 /* lgup = ceil(log2(divisor)); */
3317 lgup = ceil_log2 (d);
3318
3319 gcc_assert (lgup <= n);
3320
3321 pow = n + lgup;
3322 pow2 = n + lgup - precision;
3323
3324 /* We could handle this with some effort, but this case is much
3325 better handled directly with a scc insn, so rely on caller using
3326 that. */
3327 gcc_assert (pow != HOST_BITS_PER_DOUBLE_INT);
3328
3329 /* mlow = 2^(N + lgup)/d */
3330 double_int val = double_int_zero.set_bit (pow);
3331 mlow = val.div (double_int::from_uhwi (d), true, TRUNC_DIV_EXPR);
3332
3333 /* mhigh = (2^(N + lgup) + 2^(N + lgup - precision))/d */
3334 val |= double_int_zero.set_bit (pow2);
3335 mhigh = val.div (double_int::from_uhwi (d), true, TRUNC_DIV_EXPR);
3336
3337 gcc_assert (!mhigh.high || val.high - d < d);
3338 gcc_assert (mhigh.high <= 1 && mlow.high <= 1);
3339 /* Assert that mlow < mhigh. */
3340 gcc_assert (mlow.ult (mhigh));
3341
3342 /* If precision == N, then mlow, mhigh exceed 2^N
3343 (but they do not exceed 2^(N+1)). */
3344
3345 /* Reduce to lowest terms. */
3346 for (post_shift = lgup; post_shift > 0; post_shift--)
3347 {
3348 int shft = HOST_BITS_PER_WIDE_INT - 1;
3349 unsigned HOST_WIDE_INT ml_lo = (mlow.high << shft) | (mlow.low >> 1);
3350 unsigned HOST_WIDE_INT mh_lo = (mhigh.high << shft) | (mhigh.low >> 1);
3351 if (ml_lo >= mh_lo)
3352 break;
3353
3354 mlow = double_int::from_uhwi (ml_lo);
3355 mhigh = double_int::from_uhwi (mh_lo);
3356 }
3357
3358 *post_shift_ptr = post_shift;
3359 *lgup_ptr = lgup;
3360 if (n < HOST_BITS_PER_WIDE_INT)
3361 {
3362 unsigned HOST_WIDE_INT mask = ((unsigned HOST_WIDE_INT) 1 << n) - 1;
3363 *multiplier_ptr = mhigh.low & mask;
3364 return mhigh.low >= mask;
3365 }
3366 else
3367 {
3368 *multiplier_ptr = mhigh.low;
3369 return mhigh.high;
3370 }
3371 }
3372
3373 /* Compute the inverse of X mod 2**n, i.e., find Y such that X * Y is
3374 congruent to 1 (mod 2**N). */
3375
3376 static unsigned HOST_WIDE_INT
3377 invert_mod2n (unsigned HOST_WIDE_INT x, int n)
3378 {
3379 /* Solve x*y == 1 (mod 2^n), where x is odd. Return y. */
3380
3381 /* The algorithm notes that the choice y = x satisfies
3382 x*y == 1 mod 2^3, since x is assumed odd.
3383 Each iteration doubles the number of bits of significance in y. */
3384
3385 unsigned HOST_WIDE_INT mask;
3386 unsigned HOST_WIDE_INT y = x;
3387 int nbit = 3;
3388
3389 mask = (n == HOST_BITS_PER_WIDE_INT
3390 ? ~(unsigned HOST_WIDE_INT) 0
3391 : ((unsigned HOST_WIDE_INT) 1 << n) - 1);
3392
3393 while (nbit < n)
3394 {
3395 y = y * (2 - x*y) & mask; /* Modulo 2^N */
3396 nbit *= 2;
3397 }
3398 return y;
3399 }
3400
3401 /* Emit code to adjust ADJ_OPERAND after multiplication of wrong signedness
3402 flavor of OP0 and OP1. ADJ_OPERAND is already the high half of the
3403 product OP0 x OP1. If UNSIGNEDP is nonzero, adjust the signed product
3404 to become unsigned, if UNSIGNEDP is zero, adjust the unsigned product to
3405 become signed.
3406
3407 The result is put in TARGET if that is convenient.
3408
3409 MODE is the mode of operation. */
3410
3411 rtx
3412 expand_mult_highpart_adjust (enum machine_mode mode, rtx adj_operand, rtx op0,
3413 rtx op1, rtx target, int unsignedp)
3414 {
3415 rtx tem;
3416 enum rtx_code adj_code = unsignedp ? PLUS : MINUS;
3417
3418 tem = expand_shift (RSHIFT_EXPR, mode, op0,
3419 GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3420 tem = expand_and (mode, tem, op1, NULL_RTX);
3421 adj_operand
3422 = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3423 adj_operand);
3424
3425 tem = expand_shift (RSHIFT_EXPR, mode, op1,
3426 GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3427 tem = expand_and (mode, tem, op0, NULL_RTX);
3428 target = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3429 target);
3430
3431 return target;
3432 }
3433
3434 /* Subroutine of expmed_mult_highpart. Return the MODE high part of OP. */
3435
3436 static rtx
3437 extract_high_half (enum machine_mode mode, rtx op)
3438 {
3439 enum machine_mode wider_mode;
3440
3441 if (mode == word_mode)
3442 return gen_highpart (mode, op);
3443
3444 gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3445
3446 wider_mode = GET_MODE_WIDER_MODE (mode);
3447 op = expand_shift (RSHIFT_EXPR, wider_mode, op,
3448 GET_MODE_BITSIZE (mode), 0, 1);
3449 return convert_modes (mode, wider_mode, op, 0);
3450 }
3451
3452 /* Like expmed_mult_highpart, but only consider using a multiplication
3453 optab. OP1 is an rtx for the constant operand. */
3454
3455 static rtx
3456 expmed_mult_highpart_optab (enum machine_mode mode, rtx op0, rtx op1,
3457 rtx target, int unsignedp, int max_cost)
3458 {
3459 rtx narrow_op1 = gen_int_mode (INTVAL (op1), mode);
3460 enum machine_mode wider_mode;
3461 optab moptab;
3462 rtx tem;
3463 int size;
3464 bool speed = optimize_insn_for_speed_p ();
3465
3466 gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3467
3468 wider_mode = GET_MODE_WIDER_MODE (mode);
3469 size = GET_MODE_BITSIZE (mode);
3470
3471 /* Firstly, try using a multiplication insn that only generates the needed
3472 high part of the product, and in the sign flavor of unsignedp. */
3473 if (mul_highpart_cost (speed, mode) < max_cost)
3474 {
3475 moptab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
3476 tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3477 unsignedp, OPTAB_DIRECT);
3478 if (tem)
3479 return tem;
3480 }
3481
3482 /* Secondly, same as above, but use sign flavor opposite of unsignedp.
3483 Need to adjust the result after the multiplication. */
3484 if (size - 1 < BITS_PER_WORD
3485 && (mul_highpart_cost (speed, mode)
3486 + 2 * shift_cost (speed, mode, size-1)
3487 + 4 * add_cost (speed, mode) < max_cost))
3488 {
3489 moptab = unsignedp ? smul_highpart_optab : umul_highpart_optab;
3490 tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3491 unsignedp, OPTAB_DIRECT);
3492 if (tem)
3493 /* We used the wrong signedness. Adjust the result. */
3494 return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3495 tem, unsignedp);
3496 }
3497
3498 /* Try widening multiplication. */
3499 moptab = unsignedp ? umul_widen_optab : smul_widen_optab;
3500 if (widening_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3501 && mul_widen_cost (speed, wider_mode) < max_cost)
3502 {
3503 tem = expand_binop (wider_mode, moptab, op0, narrow_op1, 0,
3504 unsignedp, OPTAB_WIDEN);
3505 if (tem)
3506 return extract_high_half (mode, tem);
3507 }
3508
3509 /* Try widening the mode and perform a non-widening multiplication. */
3510 if (optab_handler (smul_optab, wider_mode) != CODE_FOR_nothing
3511 && size - 1 < BITS_PER_WORD
3512 && (mul_cost (speed, wider_mode) + shift_cost (speed, mode, size-1)
3513 < max_cost))
3514 {
3515 rtx insns, wop0, wop1;
3516
3517 /* We need to widen the operands, for example to ensure the
3518 constant multiplier is correctly sign or zero extended.
3519 Use a sequence to clean-up any instructions emitted by
3520 the conversions if things don't work out. */
3521 start_sequence ();
3522 wop0 = convert_modes (wider_mode, mode, op0, unsignedp);
3523 wop1 = convert_modes (wider_mode, mode, op1, unsignedp);
3524 tem = expand_binop (wider_mode, smul_optab, wop0, wop1, 0,
3525 unsignedp, OPTAB_WIDEN);
3526 insns = get_insns ();
3527 end_sequence ();
3528
3529 if (tem)
3530 {
3531 emit_insn (insns);
3532 return extract_high_half (mode, tem);
3533 }
3534 }
3535
3536 /* Try widening multiplication of opposite signedness, and adjust. */
3537 moptab = unsignedp ? smul_widen_optab : umul_widen_optab;
3538 if (widening_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3539 && size - 1 < BITS_PER_WORD
3540 && (mul_widen_cost (speed, wider_mode)
3541 + 2 * shift_cost (speed, mode, size-1)
3542 + 4 * add_cost (speed, mode) < max_cost))
3543 {
3544 tem = expand_binop (wider_mode, moptab, op0, narrow_op1,
3545 NULL_RTX, ! unsignedp, OPTAB_WIDEN);
3546 if (tem != 0)
3547 {
3548 tem = extract_high_half (mode, tem);
3549 /* We used the wrong signedness. Adjust the result. */
3550 return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3551 target, unsignedp);
3552 }
3553 }
3554
3555 return 0;
3556 }
3557
3558 /* Emit code to multiply OP0 and OP1 (where OP1 is an integer constant),
3559 putting the high half of the result in TARGET if that is convenient,
3560 and return where the result is. If the operation can not be performed,
3561 0 is returned.
3562
3563 MODE is the mode of operation and result.
3564
3565 UNSIGNEDP nonzero means unsigned multiply.
3566
3567 MAX_COST is the total allowed cost for the expanded RTL. */
3568
3569 static rtx
3570 expmed_mult_highpart (enum machine_mode mode, rtx op0, rtx op1,
3571 rtx target, int unsignedp, int max_cost)
3572 {
3573 enum machine_mode wider_mode = GET_MODE_WIDER_MODE (mode);
3574 unsigned HOST_WIDE_INT cnst1;
3575 int extra_cost;
3576 bool sign_adjust = false;
3577 enum mult_variant variant;
3578 struct algorithm alg;
3579 rtx tem;
3580 bool speed = optimize_insn_for_speed_p ();
3581
3582 gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3583 /* We can't support modes wider than HOST_BITS_PER_INT. */
3584 gcc_assert (HWI_COMPUTABLE_MODE_P (mode));
3585
3586 cnst1 = INTVAL (op1) & GET_MODE_MASK (mode);
3587
3588 /* We can't optimize modes wider than BITS_PER_WORD.
3589 ??? We might be able to perform double-word arithmetic if
3590 mode == word_mode, however all the cost calculations in
3591 synth_mult etc. assume single-word operations. */
3592 if (GET_MODE_BITSIZE (wider_mode) > BITS_PER_WORD)
3593 return expmed_mult_highpart_optab (mode, op0, op1, target,
3594 unsignedp, max_cost);
3595
3596 extra_cost = shift_cost (speed, mode, GET_MODE_BITSIZE (mode) - 1);
3597
3598 /* Check whether we try to multiply by a negative constant. */
3599 if (!unsignedp && ((cnst1 >> (GET_MODE_BITSIZE (mode) - 1)) & 1))
3600 {
3601 sign_adjust = true;
3602 extra_cost += add_cost (speed, mode);
3603 }
3604
3605 /* See whether shift/add multiplication is cheap enough. */
3606 if (choose_mult_variant (wider_mode, cnst1, &alg, &variant,
3607 max_cost - extra_cost))
3608 {
3609 /* See whether the specialized multiplication optabs are
3610 cheaper than the shift/add version. */
3611 tem = expmed_mult_highpart_optab (mode, op0, op1, target, unsignedp,
3612 alg.cost.cost + extra_cost);
3613 if (tem)
3614 return tem;
3615
3616 tem = convert_to_mode (wider_mode, op0, unsignedp);
3617 tem = expand_mult_const (wider_mode, tem, cnst1, 0, &alg, variant);
3618 tem = extract_high_half (mode, tem);
3619
3620 /* Adjust result for signedness. */
3621 if (sign_adjust)
3622 tem = force_operand (gen_rtx_MINUS (mode, tem, op0), tem);
3623
3624 return tem;
3625 }
3626 return expmed_mult_highpart_optab (mode, op0, op1, target,
3627 unsignedp, max_cost);
3628 }
3629
3630
3631 /* Expand signed modulus of OP0 by a power of two D in mode MODE. */
3632
3633 static rtx
3634 expand_smod_pow2 (enum machine_mode mode, rtx op0, HOST_WIDE_INT d)
3635 {
3636 unsigned HOST_WIDE_INT masklow, maskhigh;
3637 rtx result, temp, shift, label;
3638 int logd;
3639
3640 logd = floor_log2 (d);
3641 result = gen_reg_rtx (mode);
3642
3643 /* Avoid conditional branches when they're expensive. */
3644 if (BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2
3645 && optimize_insn_for_speed_p ())
3646 {
3647 rtx signmask = emit_store_flag (result, LT, op0, const0_rtx,
3648 mode, 0, -1);
3649 if (signmask)
3650 {
3651 signmask = force_reg (mode, signmask);
3652 masklow = ((HOST_WIDE_INT) 1 << logd) - 1;
3653 shift = GEN_INT (GET_MODE_BITSIZE (mode) - logd);
3654
3655 /* Use the rtx_cost of a LSHIFTRT instruction to determine
3656 which instruction sequence to use. If logical right shifts
3657 are expensive the use 2 XORs, 2 SUBs and an AND, otherwise
3658 use a LSHIFTRT, 1 ADD, 1 SUB and an AND. */
3659
3660 temp = gen_rtx_LSHIFTRT (mode, result, shift);
3661 if (optab_handler (lshr_optab, mode) == CODE_FOR_nothing
3662 || (set_src_cost (temp, optimize_insn_for_speed_p ())
3663 > COSTS_N_INSNS (2)))
3664 {
3665 temp = expand_binop (mode, xor_optab, op0, signmask,
3666 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3667 temp = expand_binop (mode, sub_optab, temp, signmask,
3668 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3669 temp = expand_binop (mode, and_optab, temp, GEN_INT (masklow),
3670 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3671 temp = expand_binop (mode, xor_optab, temp, signmask,
3672 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3673 temp = expand_binop (mode, sub_optab, temp, signmask,
3674 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3675 }
3676 else
3677 {
3678 signmask = expand_binop (mode, lshr_optab, signmask, shift,
3679 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3680 signmask = force_reg (mode, signmask);
3681
3682 temp = expand_binop (mode, add_optab, op0, signmask,
3683 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3684 temp = expand_binop (mode, and_optab, temp, GEN_INT (masklow),
3685 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3686 temp = expand_binop (mode, sub_optab, temp, signmask,
3687 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3688 }
3689 return temp;
3690 }
3691 }
3692
3693 /* Mask contains the mode's signbit and the significant bits of the
3694 modulus. By including the signbit in the operation, many targets
3695 can avoid an explicit compare operation in the following comparison
3696 against zero. */
3697
3698 masklow = ((HOST_WIDE_INT) 1 << logd) - 1;
3699 if (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
3700 {
3701 masklow |= (HOST_WIDE_INT) -1 << (GET_MODE_BITSIZE (mode) - 1);
3702 maskhigh = -1;
3703 }
3704 else
3705 maskhigh = (HOST_WIDE_INT) -1
3706 << (GET_MODE_BITSIZE (mode) - HOST_BITS_PER_WIDE_INT - 1);
3707
3708 temp = expand_binop (mode, and_optab, op0,
3709 immed_double_const (masklow, maskhigh, mode),
3710 result, 1, OPTAB_LIB_WIDEN);
3711 if (temp != result)
3712 emit_move_insn (result, temp);
3713
3714 label = gen_label_rtx ();
3715 do_cmp_and_jump (result, const0_rtx, GE, mode, label);
3716
3717 temp = expand_binop (mode, sub_optab, result, const1_rtx, result,
3718 0, OPTAB_LIB_WIDEN);
3719 masklow = (HOST_WIDE_INT) -1 << logd;
3720 maskhigh = -1;
3721 temp = expand_binop (mode, ior_optab, temp,
3722 immed_double_const (masklow, maskhigh, mode),
3723 result, 1, OPTAB_LIB_WIDEN);
3724 temp = expand_binop (mode, add_optab, temp, const1_rtx, result,
3725 0, OPTAB_LIB_WIDEN);
3726 if (temp != result)
3727 emit_move_insn (result, temp);
3728 emit_label (label);
3729 return result;
3730 }
3731
3732 /* Expand signed division of OP0 by a power of two D in mode MODE.
3733 This routine is only called for positive values of D. */
3734
3735 static rtx
3736 expand_sdiv_pow2 (enum machine_mode mode, rtx op0, HOST_WIDE_INT d)
3737 {
3738 rtx temp, label;
3739 int logd;
3740
3741 logd = floor_log2 (d);
3742
3743 if (d == 2
3744 && BRANCH_COST (optimize_insn_for_speed_p (),
3745 false) >= 1)
3746 {
3747 temp = gen_reg_rtx (mode);
3748 temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, 1);
3749 temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3750 0, OPTAB_LIB_WIDEN);
3751 return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3752 }
3753
3754 #ifdef HAVE_conditional_move
3755 if (BRANCH_COST (optimize_insn_for_speed_p (), false)
3756 >= 2)
3757 {
3758 rtx temp2;
3759
3760 /* ??? emit_conditional_move forces a stack adjustment via
3761 compare_from_rtx so, if the sequence is discarded, it will
3762 be lost. Do it now instead. */
3763 do_pending_stack_adjust ();
3764
3765 start_sequence ();
3766 temp2 = copy_to_mode_reg (mode, op0);
3767 temp = expand_binop (mode, add_optab, temp2, GEN_INT (d-1),
3768 NULL_RTX, 0, OPTAB_LIB_WIDEN);
3769 temp = force_reg (mode, temp);
3770
3771 /* Construct "temp2 = (temp2 < 0) ? temp : temp2". */
3772 temp2 = emit_conditional_move (temp2, LT, temp2, const0_rtx,
3773 mode, temp, temp2, mode, 0);
3774 if (temp2)
3775 {
3776 rtx seq = get_insns ();
3777 end_sequence ();
3778 emit_insn (seq);
3779 return expand_shift (RSHIFT_EXPR, mode, temp2, logd, NULL_RTX, 0);
3780 }
3781 end_sequence ();
3782 }
3783 #endif
3784
3785 if (BRANCH_COST (optimize_insn_for_speed_p (),
3786 false) >= 2)
3787 {
3788 int ushift = GET_MODE_BITSIZE (mode) - logd;
3789
3790 temp = gen_reg_rtx (mode);
3791 temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, -1);
3792 if (shift_cost (optimize_insn_for_speed_p (), mode, ushift)
3793 > COSTS_N_INSNS (1))
3794 temp = expand_binop (mode, and_optab, temp, GEN_INT (d - 1),
3795 NULL_RTX, 0, OPTAB_LIB_WIDEN);
3796 else
3797 temp = expand_shift (RSHIFT_EXPR, mode, temp,
3798 ushift, NULL_RTX, 1);
3799 temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3800 0, OPTAB_LIB_WIDEN);
3801 return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3802 }
3803
3804 label = gen_label_rtx ();
3805 temp = copy_to_mode_reg (mode, op0);
3806 do_cmp_and_jump (temp, const0_rtx, GE, mode, label);
3807 expand_inc (temp, GEN_INT (d - 1));
3808 emit_label (label);
3809 return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3810 }
3811 \f
3812 /* Emit the code to divide OP0 by OP1, putting the result in TARGET
3813 if that is convenient, and returning where the result is.
3814 You may request either the quotient or the remainder as the result;
3815 specify REM_FLAG nonzero to get the remainder.
3816
3817 CODE is the expression code for which kind of division this is;
3818 it controls how rounding is done. MODE is the machine mode to use.
3819 UNSIGNEDP nonzero means do unsigned division. */
3820
3821 /* ??? For CEIL_MOD_EXPR, can compute incorrect remainder with ANDI
3822 and then correct it by or'ing in missing high bits
3823 if result of ANDI is nonzero.
3824 For ROUND_MOD_EXPR, can use ANDI and then sign-extend the result.
3825 This could optimize to a bfexts instruction.
3826 But C doesn't use these operations, so their optimizations are
3827 left for later. */
3828 /* ??? For modulo, we don't actually need the highpart of the first product,
3829 the low part will do nicely. And for small divisors, the second multiply
3830 can also be a low-part only multiply or even be completely left out.
3831 E.g. to calculate the remainder of a division by 3 with a 32 bit
3832 multiply, multiply with 0x55555556 and extract the upper two bits;
3833 the result is exact for inputs up to 0x1fffffff.
3834 The input range can be reduced by using cross-sum rules.
3835 For odd divisors >= 3, the following table gives right shift counts
3836 so that if a number is shifted by an integer multiple of the given
3837 amount, the remainder stays the same:
3838 2, 4, 3, 6, 10, 12, 4, 8, 18, 6, 11, 20, 18, 0, 5, 10, 12, 0, 12, 20,
3839 14, 12, 23, 21, 8, 0, 20, 18, 0, 0, 6, 12, 0, 22, 0, 18, 20, 30, 0, 0,
3840 0, 8, 0, 11, 12, 10, 36, 0, 30, 0, 0, 12, 0, 0, 0, 0, 44, 12, 24, 0,
3841 20, 0, 7, 14, 0, 18, 36, 0, 0, 46, 60, 0, 42, 0, 15, 24, 20, 0, 0, 33,
3842 0, 20, 0, 0, 18, 0, 60, 0, 0, 0, 0, 0, 40, 18, 0, 0, 12
3843
3844 Cross-sum rules for even numbers can be derived by leaving as many bits
3845 to the right alone as the divisor has zeros to the right.
3846 E.g. if x is an unsigned 32 bit number:
3847 (x mod 12) == (((x & 1023) + ((x >> 8) & ~3)) * 0x15555558 >> 2 * 3) >> 28
3848 */
3849
3850 rtx
3851 expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode,
3852 rtx op0, rtx op1, rtx target, int unsignedp)
3853 {
3854 enum machine_mode compute_mode;
3855 rtx tquotient;
3856 rtx quotient = 0, remainder = 0;
3857 rtx last;
3858 int size;
3859 rtx insn;
3860 optab optab1, optab2;
3861 int op1_is_constant, op1_is_pow2 = 0;
3862 int max_cost, extra_cost;
3863 static HOST_WIDE_INT last_div_const = 0;
3864 static HOST_WIDE_INT ext_op1;
3865 bool speed = optimize_insn_for_speed_p ();
3866
3867 op1_is_constant = CONST_INT_P (op1);
3868 if (op1_is_constant)
3869 {
3870 ext_op1 = INTVAL (op1);
3871 if (unsignedp)
3872 ext_op1 &= GET_MODE_MASK (mode);
3873 op1_is_pow2 = ((EXACT_POWER_OF_2_OR_ZERO_P (ext_op1)
3874 || (! unsignedp && EXACT_POWER_OF_2_OR_ZERO_P (-ext_op1))));
3875 }
3876
3877 /*
3878 This is the structure of expand_divmod:
3879
3880 First comes code to fix up the operands so we can perform the operations
3881 correctly and efficiently.
3882
3883 Second comes a switch statement with code specific for each rounding mode.
3884 For some special operands this code emits all RTL for the desired
3885 operation, for other cases, it generates only a quotient and stores it in
3886 QUOTIENT. The case for trunc division/remainder might leave quotient = 0,
3887 to indicate that it has not done anything.
3888
3889 Last comes code that finishes the operation. If QUOTIENT is set and
3890 REM_FLAG is set, the remainder is computed as OP0 - QUOTIENT * OP1. If
3891 QUOTIENT is not set, it is computed using trunc rounding.
3892
3893 We try to generate special code for division and remainder when OP1 is a
3894 constant. If |OP1| = 2**n we can use shifts and some other fast
3895 operations. For other values of OP1, we compute a carefully selected
3896 fixed-point approximation m = 1/OP1, and generate code that multiplies OP0
3897 by m.
3898
3899 In all cases but EXACT_DIV_EXPR, this multiplication requires the upper
3900 half of the product. Different strategies for generating the product are
3901 implemented in expmed_mult_highpart.
3902
3903 If what we actually want is the remainder, we generate that by another
3904 by-constant multiplication and a subtraction. */
3905
3906 /* We shouldn't be called with OP1 == const1_rtx, but some of the
3907 code below will malfunction if we are, so check here and handle
3908 the special case if so. */
3909 if (op1 == const1_rtx)
3910 return rem_flag ? const0_rtx : op0;
3911
3912 /* When dividing by -1, we could get an overflow.
3913 negv_optab can handle overflows. */
3914 if (! unsignedp && op1 == constm1_rtx)
3915 {
3916 if (rem_flag)
3917 return const0_rtx;
3918 return expand_unop (mode, flag_trapv && GET_MODE_CLASS(mode) == MODE_INT
3919 ? negv_optab : neg_optab, op0, target, 0);
3920 }
3921
3922 if (target
3923 /* Don't use the function value register as a target
3924 since we have to read it as well as write it,
3925 and function-inlining gets confused by this. */
3926 && ((REG_P (target) && REG_FUNCTION_VALUE_P (target))
3927 /* Don't clobber an operand while doing a multi-step calculation. */
3928 || ((rem_flag || op1_is_constant)
3929 && (reg_mentioned_p (target, op0)
3930 || (MEM_P (op0) && MEM_P (target))))
3931 || reg_mentioned_p (target, op1)
3932 || (MEM_P (op1) && MEM_P (target))))
3933 target = 0;
3934
3935 /* Get the mode in which to perform this computation. Normally it will
3936 be MODE, but sometimes we can't do the desired operation in MODE.
3937 If so, pick a wider mode in which we can do the operation. Convert
3938 to that mode at the start to avoid repeated conversions.
3939
3940 First see what operations we need. These depend on the expression
3941 we are evaluating. (We assume that divxx3 insns exist under the
3942 same conditions that modxx3 insns and that these insns don't normally
3943 fail. If these assumptions are not correct, we may generate less
3944 efficient code in some cases.)
3945
3946 Then see if we find a mode in which we can open-code that operation
3947 (either a division, modulus, or shift). Finally, check for the smallest
3948 mode for which we can do the operation with a library call. */
3949
3950 /* We might want to refine this now that we have division-by-constant
3951 optimization. Since expmed_mult_highpart tries so many variants, it is
3952 not straightforward to generalize this. Maybe we should make an array
3953 of possible modes in init_expmed? Save this for GCC 2.7. */
3954
3955 optab1 = ((op1_is_pow2 && op1 != const0_rtx)
3956 ? (unsignedp ? lshr_optab : ashr_optab)
3957 : (unsignedp ? udiv_optab : sdiv_optab));
3958 optab2 = ((op1_is_pow2 && op1 != const0_rtx)
3959 ? optab1
3960 : (unsignedp ? udivmod_optab : sdivmod_optab));
3961
3962 for (compute_mode = mode; compute_mode != VOIDmode;
3963 compute_mode = GET_MODE_WIDER_MODE (compute_mode))
3964 if (optab_handler (optab1, compute_mode) != CODE_FOR_nothing
3965 || optab_handler (optab2, compute_mode) != CODE_FOR_nothing)
3966 break;
3967
3968 if (compute_mode == VOIDmode)
3969 for (compute_mode = mode; compute_mode != VOIDmode;
3970 compute_mode = GET_MODE_WIDER_MODE (compute_mode))
3971 if (optab_libfunc (optab1, compute_mode)
3972 || optab_libfunc (optab2, compute_mode))
3973 break;
3974
3975 /* If we still couldn't find a mode, use MODE, but expand_binop will
3976 probably die. */
3977 if (compute_mode == VOIDmode)
3978 compute_mode = mode;
3979
3980 if (target && GET_MODE (target) == compute_mode)
3981 tquotient = target;
3982 else
3983 tquotient = gen_reg_rtx (compute_mode);
3984
3985 size = GET_MODE_BITSIZE (compute_mode);
3986 #if 0
3987 /* It should be possible to restrict the precision to GET_MODE_BITSIZE
3988 (mode), and thereby get better code when OP1 is a constant. Do that
3989 later. It will require going over all usages of SIZE below. */
3990 size = GET_MODE_BITSIZE (mode);
3991 #endif
3992
3993 /* Only deduct something for a REM if the last divide done was
3994 for a different constant. Then set the constant of the last
3995 divide. */
3996 max_cost = (unsignedp
3997 ? udiv_cost (speed, compute_mode)
3998 : sdiv_cost (speed, compute_mode));
3999 if (rem_flag && ! (last_div_const != 0 && op1_is_constant
4000 && INTVAL (op1) == last_div_const))
4001 max_cost -= (mul_cost (speed, compute_mode)
4002 + add_cost (speed, compute_mode));
4003
4004 last_div_const = ! rem_flag && op1_is_constant ? INTVAL (op1) : 0;
4005
4006 /* Now convert to the best mode to use. */
4007 if (compute_mode != mode)
4008 {
4009 op0 = convert_modes (compute_mode, mode, op0, unsignedp);
4010 op1 = convert_modes (compute_mode, mode, op1, unsignedp);
4011
4012 /* convert_modes may have placed op1 into a register, so we
4013 must recompute the following. */
4014 op1_is_constant = CONST_INT_P (op1);
4015 op1_is_pow2 = (op1_is_constant
4016 && ((EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4017 || (! unsignedp
4018 && EXACT_POWER_OF_2_OR_ZERO_P (-INTVAL (op1)))))) ;
4019 }
4020
4021 /* If one of the operands is a volatile MEM, copy it into a register. */
4022
4023 if (MEM_P (op0) && MEM_VOLATILE_P (op0))
4024 op0 = force_reg (compute_mode, op0);
4025 if (MEM_P (op1) && MEM_VOLATILE_P (op1))
4026 op1 = force_reg (compute_mode, op1);
4027
4028 /* If we need the remainder or if OP1 is constant, we need to
4029 put OP0 in a register in case it has any queued subexpressions. */
4030 if (rem_flag || op1_is_constant)
4031 op0 = force_reg (compute_mode, op0);
4032
4033 last = get_last_insn ();
4034
4035 /* Promote floor rounding to trunc rounding for unsigned operations. */
4036 if (unsignedp)
4037 {
4038 if (code == FLOOR_DIV_EXPR)
4039 code = TRUNC_DIV_EXPR;
4040 if (code == FLOOR_MOD_EXPR)
4041 code = TRUNC_MOD_EXPR;
4042 if (code == EXACT_DIV_EXPR && op1_is_pow2)
4043 code = TRUNC_DIV_EXPR;
4044 }
4045
4046 if (op1 != const0_rtx)
4047 switch (code)
4048 {
4049 case TRUNC_MOD_EXPR:
4050 case TRUNC_DIV_EXPR:
4051 if (op1_is_constant)
4052 {
4053 if (unsignedp)
4054 {
4055 unsigned HOST_WIDE_INT mh, ml;
4056 int pre_shift, post_shift;
4057 int dummy;
4058 unsigned HOST_WIDE_INT d = (INTVAL (op1)
4059 & GET_MODE_MASK (compute_mode));
4060
4061 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4062 {
4063 pre_shift = floor_log2 (d);
4064 if (rem_flag)
4065 {
4066 remainder
4067 = expand_binop (compute_mode, and_optab, op0,
4068 GEN_INT (((HOST_WIDE_INT) 1 << pre_shift) - 1),
4069 remainder, 1,
4070 OPTAB_LIB_WIDEN);
4071 if (remainder)
4072 return gen_lowpart (mode, remainder);
4073 }
4074 quotient = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4075 pre_shift, tquotient, 1);
4076 }
4077 else if (size <= HOST_BITS_PER_WIDE_INT)
4078 {
4079 if (d >= ((unsigned HOST_WIDE_INT) 1 << (size - 1)))
4080 {
4081 /* Most significant bit of divisor is set; emit an scc
4082 insn. */
4083 quotient = emit_store_flag_force (tquotient, GEU, op0, op1,
4084 compute_mode, 1, 1);
4085 }
4086 else
4087 {
4088 /* Find a suitable multiplier and right shift count
4089 instead of multiplying with D. */
4090
4091 mh = choose_multiplier (d, size, size,
4092 &ml, &post_shift, &dummy);
4093
4094 /* If the suggested multiplier is more than SIZE bits,
4095 we can do better for even divisors, using an
4096 initial right shift. */
4097 if (mh != 0 && (d & 1) == 0)
4098 {
4099 pre_shift = floor_log2 (d & -d);
4100 mh = choose_multiplier (d >> pre_shift, size,
4101 size - pre_shift,
4102 &ml, &post_shift, &dummy);
4103 gcc_assert (!mh);
4104 }
4105 else
4106 pre_shift = 0;
4107
4108 if (mh != 0)
4109 {
4110 rtx t1, t2, t3, t4;
4111
4112 if (post_shift - 1 >= BITS_PER_WORD)
4113 goto fail1;
4114
4115 extra_cost
4116 = (shift_cost (speed, compute_mode, post_shift - 1)
4117 + shift_cost (speed, compute_mode, 1)
4118 + 2 * add_cost (speed, compute_mode));
4119 t1 = expmed_mult_highpart (compute_mode, op0,
4120 GEN_INT (ml),
4121 NULL_RTX, 1,
4122 max_cost - extra_cost);
4123 if (t1 == 0)
4124 goto fail1;
4125 t2 = force_operand (gen_rtx_MINUS (compute_mode,
4126 op0, t1),
4127 NULL_RTX);
4128 t3 = expand_shift (RSHIFT_EXPR, compute_mode,
4129 t2, 1, NULL_RTX, 1);
4130 t4 = force_operand (gen_rtx_PLUS (compute_mode,
4131 t1, t3),
4132 NULL_RTX);
4133 quotient = expand_shift
4134 (RSHIFT_EXPR, compute_mode, t4,
4135 post_shift - 1, tquotient, 1);
4136 }
4137 else
4138 {
4139 rtx t1, t2;
4140
4141 if (pre_shift >= BITS_PER_WORD
4142 || post_shift >= BITS_PER_WORD)
4143 goto fail1;
4144
4145 t1 = expand_shift
4146 (RSHIFT_EXPR, compute_mode, op0,
4147 pre_shift, NULL_RTX, 1);
4148 extra_cost
4149 = (shift_cost (speed, compute_mode, pre_shift)
4150 + shift_cost (speed, compute_mode, post_shift));
4151 t2 = expmed_mult_highpart (compute_mode, t1,
4152 GEN_INT (ml),
4153 NULL_RTX, 1,
4154 max_cost - extra_cost);
4155 if (t2 == 0)
4156 goto fail1;
4157 quotient = expand_shift
4158 (RSHIFT_EXPR, compute_mode, t2,
4159 post_shift, tquotient, 1);
4160 }
4161 }
4162 }
4163 else /* Too wide mode to use tricky code */
4164 break;
4165
4166 insn = get_last_insn ();
4167 if (insn != last)
4168 set_dst_reg_note (insn, REG_EQUAL,
4169 gen_rtx_UDIV (compute_mode, op0, op1),
4170 quotient);
4171 }
4172 else /* TRUNC_DIV, signed */
4173 {
4174 unsigned HOST_WIDE_INT ml;
4175 int lgup, post_shift;
4176 rtx mlr;
4177 HOST_WIDE_INT d = INTVAL (op1);
4178 unsigned HOST_WIDE_INT abs_d;
4179
4180 /* Since d might be INT_MIN, we have to cast to
4181 unsigned HOST_WIDE_INT before negating to avoid
4182 undefined signed overflow. */
4183 abs_d = (d >= 0
4184 ? (unsigned HOST_WIDE_INT) d
4185 : - (unsigned HOST_WIDE_INT) d);
4186
4187 /* n rem d = n rem -d */
4188 if (rem_flag && d < 0)
4189 {
4190 d = abs_d;
4191 op1 = gen_int_mode (abs_d, compute_mode);
4192 }
4193
4194 if (d == 1)
4195 quotient = op0;
4196 else if (d == -1)
4197 quotient = expand_unop (compute_mode, neg_optab, op0,
4198 tquotient, 0);
4199 else if (HOST_BITS_PER_WIDE_INT >= size
4200 && abs_d == (unsigned HOST_WIDE_INT) 1 << (size - 1))
4201 {
4202 /* This case is not handled correctly below. */
4203 quotient = emit_store_flag (tquotient, EQ, op0, op1,
4204 compute_mode, 1, 1);
4205 if (quotient == 0)
4206 goto fail1;
4207 }
4208 else if (EXACT_POWER_OF_2_OR_ZERO_P (d)
4209 && (rem_flag
4210 ? smod_pow2_cheap (speed, compute_mode)
4211 : sdiv_pow2_cheap (speed, compute_mode))
4212 /* We assume that cheap metric is true if the
4213 optab has an expander for this mode. */
4214 && ((optab_handler ((rem_flag ? smod_optab
4215 : sdiv_optab),
4216 compute_mode)
4217 != CODE_FOR_nothing)
4218 || (optab_handler (sdivmod_optab,
4219 compute_mode)
4220 != CODE_FOR_nothing)))
4221 ;
4222 else if (EXACT_POWER_OF_2_OR_ZERO_P (abs_d))
4223 {
4224 if (rem_flag)
4225 {
4226 remainder = expand_smod_pow2 (compute_mode, op0, d);
4227 if (remainder)
4228 return gen_lowpart (mode, remainder);
4229 }
4230
4231 if (sdiv_pow2_cheap (speed, compute_mode)
4232 && ((optab_handler (sdiv_optab, compute_mode)
4233 != CODE_FOR_nothing)
4234 || (optab_handler (sdivmod_optab, compute_mode)
4235 != CODE_FOR_nothing)))
4236 quotient = expand_divmod (0, TRUNC_DIV_EXPR,
4237 compute_mode, op0,
4238 gen_int_mode (abs_d,
4239 compute_mode),
4240 NULL_RTX, 0);
4241 else
4242 quotient = expand_sdiv_pow2 (compute_mode, op0, abs_d);
4243
4244 /* We have computed OP0 / abs(OP1). If OP1 is negative,
4245 negate the quotient. */
4246 if (d < 0)
4247 {
4248 insn = get_last_insn ();
4249 if (insn != last
4250 && abs_d < ((unsigned HOST_WIDE_INT) 1
4251 << (HOST_BITS_PER_WIDE_INT - 1)))
4252 set_dst_reg_note (insn, REG_EQUAL,
4253 gen_rtx_DIV (compute_mode, op0,
4254 gen_int_mode
4255 (abs_d,
4256 compute_mode)),
4257 quotient);
4258
4259 quotient = expand_unop (compute_mode, neg_optab,
4260 quotient, quotient, 0);
4261 }
4262 }
4263 else if (size <= HOST_BITS_PER_WIDE_INT)
4264 {
4265 choose_multiplier (abs_d, size, size - 1,
4266 &ml, &post_shift, &lgup);
4267 if (ml < (unsigned HOST_WIDE_INT) 1 << (size - 1))
4268 {
4269 rtx t1, t2, t3;
4270
4271 if (post_shift >= BITS_PER_WORD
4272 || size - 1 >= BITS_PER_WORD)
4273 goto fail1;
4274
4275 extra_cost = (shift_cost (speed, compute_mode, post_shift)
4276 + shift_cost (speed, compute_mode, size - 1)
4277 + add_cost (speed, compute_mode));
4278 t1 = expmed_mult_highpart (compute_mode, op0,
4279 GEN_INT (ml), NULL_RTX, 0,
4280 max_cost - extra_cost);
4281 if (t1 == 0)
4282 goto fail1;
4283 t2 = expand_shift
4284 (RSHIFT_EXPR, compute_mode, t1,
4285 post_shift, NULL_RTX, 0);
4286 t3 = expand_shift
4287 (RSHIFT_EXPR, compute_mode, op0,
4288 size - 1, NULL_RTX, 0);
4289 if (d < 0)
4290 quotient
4291 = force_operand (gen_rtx_MINUS (compute_mode,
4292 t3, t2),
4293 tquotient);
4294 else
4295 quotient
4296 = force_operand (gen_rtx_MINUS (compute_mode,
4297 t2, t3),
4298 tquotient);
4299 }
4300 else
4301 {
4302 rtx t1, t2, t3, t4;
4303
4304 if (post_shift >= BITS_PER_WORD
4305 || size - 1 >= BITS_PER_WORD)
4306 goto fail1;
4307
4308 ml |= (~(unsigned HOST_WIDE_INT) 0) << (size - 1);
4309 mlr = gen_int_mode (ml, compute_mode);
4310 extra_cost = (shift_cost (speed, compute_mode, post_shift)
4311 + shift_cost (speed, compute_mode, size - 1)
4312 + 2 * add_cost (speed, compute_mode));
4313 t1 = expmed_mult_highpart (compute_mode, op0, mlr,
4314 NULL_RTX, 0,
4315 max_cost - extra_cost);
4316 if (t1 == 0)
4317 goto fail1;
4318 t2 = force_operand (gen_rtx_PLUS (compute_mode,
4319 t1, op0),
4320 NULL_RTX);
4321 t3 = expand_shift
4322 (RSHIFT_EXPR, compute_mode, t2,
4323 post_shift, NULL_RTX, 0);
4324 t4 = expand_shift
4325 (RSHIFT_EXPR, compute_mode, op0,
4326 size - 1, NULL_RTX, 0);
4327 if (d < 0)
4328 quotient
4329 = force_operand (gen_rtx_MINUS (compute_mode,
4330 t4, t3),
4331 tquotient);
4332 else
4333 quotient
4334 = force_operand (gen_rtx_MINUS (compute_mode,
4335 t3, t4),
4336 tquotient);
4337 }
4338 }
4339 else /* Too wide mode to use tricky code */
4340 break;
4341
4342 insn = get_last_insn ();
4343 if (insn != last)
4344 set_dst_reg_note (insn, REG_EQUAL,
4345 gen_rtx_DIV (compute_mode, op0, op1),
4346 quotient);
4347 }
4348 break;
4349 }
4350 fail1:
4351 delete_insns_since (last);
4352 break;
4353
4354 case FLOOR_DIV_EXPR:
4355 case FLOOR_MOD_EXPR:
4356 /* We will come here only for signed operations. */
4357 if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4358 {
4359 unsigned HOST_WIDE_INT mh, ml;
4360 int pre_shift, lgup, post_shift;
4361 HOST_WIDE_INT d = INTVAL (op1);
4362
4363 if (d > 0)
4364 {
4365 /* We could just as easily deal with negative constants here,
4366 but it does not seem worth the trouble for GCC 2.6. */
4367 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4368 {
4369 pre_shift = floor_log2 (d);
4370 if (rem_flag)
4371 {
4372 remainder = expand_binop (compute_mode, and_optab, op0,
4373 GEN_INT (((HOST_WIDE_INT) 1 << pre_shift) - 1),
4374 remainder, 0, OPTAB_LIB_WIDEN);
4375 if (remainder)
4376 return gen_lowpart (mode, remainder);
4377 }
4378 quotient = expand_shift
4379 (RSHIFT_EXPR, compute_mode, op0,
4380 pre_shift, tquotient, 0);
4381 }
4382 else
4383 {
4384 rtx t1, t2, t3, t4;
4385
4386 mh = choose_multiplier (d, size, size - 1,
4387 &ml, &post_shift, &lgup);
4388 gcc_assert (!mh);
4389
4390 if (post_shift < BITS_PER_WORD
4391 && size - 1 < BITS_PER_WORD)
4392 {
4393 t1 = expand_shift
4394 (RSHIFT_EXPR, compute_mode, op0,
4395 size - 1, NULL_RTX, 0);
4396 t2 = expand_binop (compute_mode, xor_optab, op0, t1,
4397 NULL_RTX, 0, OPTAB_WIDEN);
4398 extra_cost = (shift_cost (speed, compute_mode, post_shift)
4399 + shift_cost (speed, compute_mode, size - 1)
4400 + 2 * add_cost (speed, compute_mode));
4401 t3 = expmed_mult_highpart (compute_mode, t2,
4402 GEN_INT (ml), NULL_RTX, 1,
4403 max_cost - extra_cost);
4404 if (t3 != 0)
4405 {
4406 t4 = expand_shift
4407 (RSHIFT_EXPR, compute_mode, t3,
4408 post_shift, NULL_RTX, 1);
4409 quotient = expand_binop (compute_mode, xor_optab,
4410 t4, t1, tquotient, 0,
4411 OPTAB_WIDEN);
4412 }
4413 }
4414 }
4415 }
4416 else
4417 {
4418 rtx nsign, t1, t2, t3, t4;
4419 t1 = force_operand (gen_rtx_PLUS (compute_mode,
4420 op0, constm1_rtx), NULL_RTX);
4421 t2 = expand_binop (compute_mode, ior_optab, op0, t1, NULL_RTX,
4422 0, OPTAB_WIDEN);
4423 nsign = expand_shift
4424 (RSHIFT_EXPR, compute_mode, t2,
4425 size - 1, NULL_RTX, 0);
4426 t3 = force_operand (gen_rtx_MINUS (compute_mode, t1, nsign),
4427 NULL_RTX);
4428 t4 = expand_divmod (0, TRUNC_DIV_EXPR, compute_mode, t3, op1,
4429 NULL_RTX, 0);
4430 if (t4)
4431 {
4432 rtx t5;
4433 t5 = expand_unop (compute_mode, one_cmpl_optab, nsign,
4434 NULL_RTX, 0);
4435 quotient = force_operand (gen_rtx_PLUS (compute_mode,
4436 t4, t5),
4437 tquotient);
4438 }
4439 }
4440 }
4441
4442 if (quotient != 0)
4443 break;
4444 delete_insns_since (last);
4445
4446 /* Try using an instruction that produces both the quotient and
4447 remainder, using truncation. We can easily compensate the quotient
4448 or remainder to get floor rounding, once we have the remainder.
4449 Notice that we compute also the final remainder value here,
4450 and return the result right away. */
4451 if (target == 0 || GET_MODE (target) != compute_mode)
4452 target = gen_reg_rtx (compute_mode);
4453
4454 if (rem_flag)
4455 {
4456 remainder
4457 = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4458 quotient = gen_reg_rtx (compute_mode);
4459 }
4460 else
4461 {
4462 quotient
4463 = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4464 remainder = gen_reg_rtx (compute_mode);
4465 }
4466
4467 if (expand_twoval_binop (sdivmod_optab, op0, op1,
4468 quotient, remainder, 0))
4469 {
4470 /* This could be computed with a branch-less sequence.
4471 Save that for later. */
4472 rtx tem;
4473 rtx label = gen_label_rtx ();
4474 do_cmp_and_jump (remainder, const0_rtx, EQ, compute_mode, label);
4475 tem = expand_binop (compute_mode, xor_optab, op0, op1,
4476 NULL_RTX, 0, OPTAB_WIDEN);
4477 do_cmp_and_jump (tem, const0_rtx, GE, compute_mode, label);
4478 expand_dec (quotient, const1_rtx);
4479 expand_inc (remainder, op1);
4480 emit_label (label);
4481 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4482 }
4483
4484 /* No luck with division elimination or divmod. Have to do it
4485 by conditionally adjusting op0 *and* the result. */
4486 {
4487 rtx label1, label2, label3, label4, label5;
4488 rtx adjusted_op0;
4489 rtx tem;
4490
4491 quotient = gen_reg_rtx (compute_mode);
4492 adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4493 label1 = gen_label_rtx ();
4494 label2 = gen_label_rtx ();
4495 label3 = gen_label_rtx ();
4496 label4 = gen_label_rtx ();
4497 label5 = gen_label_rtx ();
4498 do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4499 do_cmp_and_jump (adjusted_op0, const0_rtx, LT, compute_mode, label1);
4500 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4501 quotient, 0, OPTAB_LIB_WIDEN);
4502 if (tem != quotient)
4503 emit_move_insn (quotient, tem);
4504 emit_jump_insn (gen_jump (label5));
4505 emit_barrier ();
4506 emit_label (label1);
4507 expand_inc (adjusted_op0, const1_rtx);
4508 emit_jump_insn (gen_jump (label4));
4509 emit_barrier ();
4510 emit_label (label2);
4511 do_cmp_and_jump (adjusted_op0, const0_rtx, GT, compute_mode, label3);
4512 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4513 quotient, 0, OPTAB_LIB_WIDEN);
4514 if (tem != quotient)
4515 emit_move_insn (quotient, tem);
4516 emit_jump_insn (gen_jump (label5));
4517 emit_barrier ();
4518 emit_label (label3);
4519 expand_dec (adjusted_op0, const1_rtx);
4520 emit_label (label4);
4521 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4522 quotient, 0, OPTAB_LIB_WIDEN);
4523 if (tem != quotient)
4524 emit_move_insn (quotient, tem);
4525 expand_dec (quotient, const1_rtx);
4526 emit_label (label5);
4527 }
4528 break;
4529
4530 case CEIL_DIV_EXPR:
4531 case CEIL_MOD_EXPR:
4532 if (unsignedp)
4533 {
4534 if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1)))
4535 {
4536 rtx t1, t2, t3;
4537 unsigned HOST_WIDE_INT d = INTVAL (op1);
4538 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4539 floor_log2 (d), tquotient, 1);
4540 t2 = expand_binop (compute_mode, and_optab, op0,
4541 GEN_INT (d - 1),
4542 NULL_RTX, 1, OPTAB_LIB_WIDEN);
4543 t3 = gen_reg_rtx (compute_mode);
4544 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4545 compute_mode, 1, 1);
4546 if (t3 == 0)
4547 {
4548 rtx lab;
4549 lab = gen_label_rtx ();
4550 do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4551 expand_inc (t1, const1_rtx);
4552 emit_label (lab);
4553 quotient = t1;
4554 }
4555 else
4556 quotient = force_operand (gen_rtx_PLUS (compute_mode,
4557 t1, t3),
4558 tquotient);
4559 break;
4560 }
4561
4562 /* Try using an instruction that produces both the quotient and
4563 remainder, using truncation. We can easily compensate the
4564 quotient or remainder to get ceiling rounding, once we have the
4565 remainder. Notice that we compute also the final remainder
4566 value here, and return the result right away. */
4567 if (target == 0 || GET_MODE (target) != compute_mode)
4568 target = gen_reg_rtx (compute_mode);
4569
4570 if (rem_flag)
4571 {
4572 remainder = (REG_P (target)
4573 ? target : gen_reg_rtx (compute_mode));
4574 quotient = gen_reg_rtx (compute_mode);
4575 }
4576 else
4577 {
4578 quotient = (REG_P (target)
4579 ? target : gen_reg_rtx (compute_mode));
4580 remainder = gen_reg_rtx (compute_mode);
4581 }
4582
4583 if (expand_twoval_binop (udivmod_optab, op0, op1, quotient,
4584 remainder, 1))
4585 {
4586 /* This could be computed with a branch-less sequence.
4587 Save that for later. */
4588 rtx label = gen_label_rtx ();
4589 do_cmp_and_jump (remainder, const0_rtx, EQ,
4590 compute_mode, label);
4591 expand_inc (quotient, const1_rtx);
4592 expand_dec (remainder, op1);
4593 emit_label (label);
4594 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4595 }
4596
4597 /* No luck with division elimination or divmod. Have to do it
4598 by conditionally adjusting op0 *and* the result. */
4599 {
4600 rtx label1, label2;
4601 rtx adjusted_op0, tem;
4602
4603 quotient = gen_reg_rtx (compute_mode);
4604 adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4605 label1 = gen_label_rtx ();
4606 label2 = gen_label_rtx ();
4607 do_cmp_and_jump (adjusted_op0, const0_rtx, NE,
4608 compute_mode, label1);
4609 emit_move_insn (quotient, const0_rtx);
4610 emit_jump_insn (gen_jump (label2));
4611 emit_barrier ();
4612 emit_label (label1);
4613 expand_dec (adjusted_op0, const1_rtx);
4614 tem = expand_binop (compute_mode, udiv_optab, adjusted_op0, op1,
4615 quotient, 1, OPTAB_LIB_WIDEN);
4616 if (tem != quotient)
4617 emit_move_insn (quotient, tem);
4618 expand_inc (quotient, const1_rtx);
4619 emit_label (label2);
4620 }
4621 }
4622 else /* signed */
4623 {
4624 if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4625 && INTVAL (op1) >= 0)
4626 {
4627 /* This is extremely similar to the code for the unsigned case
4628 above. For 2.7 we should merge these variants, but for
4629 2.6.1 I don't want to touch the code for unsigned since that
4630 get used in C. The signed case will only be used by other
4631 languages (Ada). */
4632
4633 rtx t1, t2, t3;
4634 unsigned HOST_WIDE_INT d = INTVAL (op1);
4635 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4636 floor_log2 (d), tquotient, 0);
4637 t2 = expand_binop (compute_mode, and_optab, op0,
4638 GEN_INT (d - 1),
4639 NULL_RTX, 1, OPTAB_LIB_WIDEN);
4640 t3 = gen_reg_rtx (compute_mode);
4641 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4642 compute_mode, 1, 1);
4643 if (t3 == 0)
4644 {
4645 rtx lab;
4646 lab = gen_label_rtx ();
4647 do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4648 expand_inc (t1, const1_rtx);
4649 emit_label (lab);
4650 quotient = t1;
4651 }
4652 else
4653 quotient = force_operand (gen_rtx_PLUS (compute_mode,
4654 t1, t3),
4655 tquotient);
4656 break;
4657 }
4658
4659 /* Try using an instruction that produces both the quotient and
4660 remainder, using truncation. We can easily compensate the
4661 quotient or remainder to get ceiling rounding, once we have the
4662 remainder. Notice that we compute also the final remainder
4663 value here, and return the result right away. */
4664 if (target == 0 || GET_MODE (target) != compute_mode)
4665 target = gen_reg_rtx (compute_mode);
4666 if (rem_flag)
4667 {
4668 remainder= (REG_P (target)
4669 ? target : gen_reg_rtx (compute_mode));
4670 quotient = gen_reg_rtx (compute_mode);
4671 }
4672 else
4673 {
4674 quotient = (REG_P (target)
4675 ? target : gen_reg_rtx (compute_mode));
4676 remainder = gen_reg_rtx (compute_mode);
4677 }
4678
4679 if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient,
4680 remainder, 0))
4681 {
4682 /* This could be computed with a branch-less sequence.
4683 Save that for later. */
4684 rtx tem;
4685 rtx label = gen_label_rtx ();
4686 do_cmp_and_jump (remainder, const0_rtx, EQ,
4687 compute_mode, label);
4688 tem = expand_binop (compute_mode, xor_optab, op0, op1,
4689 NULL_RTX, 0, OPTAB_WIDEN);
4690 do_cmp_and_jump (tem, const0_rtx, LT, compute_mode, label);
4691 expand_inc (quotient, const1_rtx);
4692 expand_dec (remainder, op1);
4693 emit_label (label);
4694 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4695 }
4696
4697 /* No luck with division elimination or divmod. Have to do it
4698 by conditionally adjusting op0 *and* the result. */
4699 {
4700 rtx label1, label2, label3, label4, label5;
4701 rtx adjusted_op0;
4702 rtx tem;
4703
4704 quotient = gen_reg_rtx (compute_mode);
4705 adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4706 label1 = gen_label_rtx ();
4707 label2 = gen_label_rtx ();
4708 label3 = gen_label_rtx ();
4709 label4 = gen_label_rtx ();
4710 label5 = gen_label_rtx ();
4711 do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4712 do_cmp_and_jump (adjusted_op0, const0_rtx, GT,
4713 compute_mode, label1);
4714 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4715 quotient, 0, OPTAB_LIB_WIDEN);
4716 if (tem != quotient)
4717 emit_move_insn (quotient, tem);
4718 emit_jump_insn (gen_jump (label5));
4719 emit_barrier ();
4720 emit_label (label1);
4721 expand_dec (adjusted_op0, const1_rtx);
4722 emit_jump_insn (gen_jump (label4));
4723 emit_barrier ();
4724 emit_label (label2);
4725 do_cmp_and_jump (adjusted_op0, const0_rtx, LT,
4726 compute_mode, label3);
4727 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4728 quotient, 0, OPTAB_LIB_WIDEN);
4729 if (tem != quotient)
4730 emit_move_insn (quotient, tem);
4731 emit_jump_insn (gen_jump (label5));
4732 emit_barrier ();
4733 emit_label (label3);
4734 expand_inc (adjusted_op0, const1_rtx);
4735 emit_label (label4);
4736 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4737 quotient, 0, OPTAB_LIB_WIDEN);
4738 if (tem != quotient)
4739 emit_move_insn (quotient, tem);
4740 expand_inc (quotient, const1_rtx);
4741 emit_label (label5);
4742 }
4743 }
4744 break;
4745
4746 case EXACT_DIV_EXPR:
4747 if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4748 {
4749 HOST_WIDE_INT d = INTVAL (op1);
4750 unsigned HOST_WIDE_INT ml;
4751 int pre_shift;
4752 rtx t1;
4753
4754 pre_shift = floor_log2 (d & -d);
4755 ml = invert_mod2n (d >> pre_shift, size);
4756 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4757 pre_shift, NULL_RTX, unsignedp);
4758 quotient = expand_mult (compute_mode, t1,
4759 gen_int_mode (ml, compute_mode),
4760 NULL_RTX, 1);
4761
4762 insn = get_last_insn ();
4763 set_dst_reg_note (insn, REG_EQUAL,
4764 gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
4765 compute_mode, op0, op1),
4766 quotient);
4767 }
4768 break;
4769
4770 case ROUND_DIV_EXPR:
4771 case ROUND_MOD_EXPR:
4772 if (unsignedp)
4773 {
4774 rtx tem;
4775 rtx label;
4776 label = gen_label_rtx ();
4777 quotient = gen_reg_rtx (compute_mode);
4778 remainder = gen_reg_rtx (compute_mode);
4779 if (expand_twoval_binop (udivmod_optab, op0, op1, quotient, remainder, 1) == 0)
4780 {
4781 rtx tem;
4782 quotient = expand_binop (compute_mode, udiv_optab, op0, op1,
4783 quotient, 1, OPTAB_LIB_WIDEN);
4784 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 1);
4785 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4786 remainder, 1, OPTAB_LIB_WIDEN);
4787 }
4788 tem = plus_constant (compute_mode, op1, -1);
4789 tem = expand_shift (RSHIFT_EXPR, compute_mode, tem, 1, NULL_RTX, 1);
4790 do_cmp_and_jump (remainder, tem, LEU, compute_mode, label);
4791 expand_inc (quotient, const1_rtx);
4792 expand_dec (remainder, op1);
4793 emit_label (label);
4794 }
4795 else
4796 {
4797 rtx abs_rem, abs_op1, tem, mask;
4798 rtx label;
4799 label = gen_label_rtx ();
4800 quotient = gen_reg_rtx (compute_mode);
4801 remainder = gen_reg_rtx (compute_mode);
4802 if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient, remainder, 0) == 0)
4803 {
4804 rtx tem;
4805 quotient = expand_binop (compute_mode, sdiv_optab, op0, op1,
4806 quotient, 0, OPTAB_LIB_WIDEN);
4807 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 0);
4808 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4809 remainder, 0, OPTAB_LIB_WIDEN);
4810 }
4811 abs_rem = expand_abs (compute_mode, remainder, NULL_RTX, 1, 0);
4812 abs_op1 = expand_abs (compute_mode, op1, NULL_RTX, 1, 0);
4813 tem = expand_shift (LSHIFT_EXPR, compute_mode, abs_rem,
4814 1, NULL_RTX, 1);
4815 do_cmp_and_jump (tem, abs_op1, LTU, compute_mode, label);
4816 tem = expand_binop (compute_mode, xor_optab, op0, op1,
4817 NULL_RTX, 0, OPTAB_WIDEN);
4818 mask = expand_shift (RSHIFT_EXPR, compute_mode, tem,
4819 size - 1, NULL_RTX, 0);
4820 tem = expand_binop (compute_mode, xor_optab, mask, const1_rtx,
4821 NULL_RTX, 0, OPTAB_WIDEN);
4822 tem = expand_binop (compute_mode, sub_optab, tem, mask,
4823 NULL_RTX, 0, OPTAB_WIDEN);
4824 expand_inc (quotient, tem);
4825 tem = expand_binop (compute_mode, xor_optab, mask, op1,
4826 NULL_RTX, 0, OPTAB_WIDEN);
4827 tem = expand_binop (compute_mode, sub_optab, tem, mask,
4828 NULL_RTX, 0, OPTAB_WIDEN);
4829 expand_dec (remainder, tem);
4830 emit_label (label);
4831 }
4832 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4833
4834 default:
4835 gcc_unreachable ();
4836 }
4837
4838 if (quotient == 0)
4839 {
4840 if (target && GET_MODE (target) != compute_mode)
4841 target = 0;
4842
4843 if (rem_flag)
4844 {
4845 /* Try to produce the remainder without producing the quotient.
4846 If we seem to have a divmod pattern that does not require widening,
4847 don't try widening here. We should really have a WIDEN argument
4848 to expand_twoval_binop, since what we'd really like to do here is
4849 1) try a mod insn in compute_mode
4850 2) try a divmod insn in compute_mode
4851 3) try a div insn in compute_mode and multiply-subtract to get
4852 remainder
4853 4) try the same things with widening allowed. */
4854 remainder
4855 = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4856 op0, op1, target,
4857 unsignedp,
4858 ((optab_handler (optab2, compute_mode)
4859 != CODE_FOR_nothing)
4860 ? OPTAB_DIRECT : OPTAB_WIDEN));
4861 if (remainder == 0)
4862 {
4863 /* No luck there. Can we do remainder and divide at once
4864 without a library call? */
4865 remainder = gen_reg_rtx (compute_mode);
4866 if (! expand_twoval_binop ((unsignedp
4867 ? udivmod_optab
4868 : sdivmod_optab),
4869 op0, op1,
4870 NULL_RTX, remainder, unsignedp))
4871 remainder = 0;
4872 }
4873
4874 if (remainder)
4875 return gen_lowpart (mode, remainder);
4876 }
4877
4878 /* Produce the quotient. Try a quotient insn, but not a library call.
4879 If we have a divmod in this mode, use it in preference to widening
4880 the div (for this test we assume it will not fail). Note that optab2
4881 is set to the one of the two optabs that the call below will use. */
4882 quotient
4883 = sign_expand_binop (compute_mode, udiv_optab, sdiv_optab,
4884 op0, op1, rem_flag ? NULL_RTX : target,
4885 unsignedp,
4886 ((optab_handler (optab2, compute_mode)
4887 != CODE_FOR_nothing)
4888 ? OPTAB_DIRECT : OPTAB_WIDEN));
4889
4890 if (quotient == 0)
4891 {
4892 /* No luck there. Try a quotient-and-remainder insn,
4893 keeping the quotient alone. */
4894 quotient = gen_reg_rtx (compute_mode);
4895 if (! expand_twoval_binop (unsignedp ? udivmod_optab : sdivmod_optab,
4896 op0, op1,
4897 quotient, NULL_RTX, unsignedp))
4898 {
4899 quotient = 0;
4900 if (! rem_flag)
4901 /* Still no luck. If we are not computing the remainder,
4902 use a library call for the quotient. */
4903 quotient = sign_expand_binop (compute_mode,
4904 udiv_optab, sdiv_optab,
4905 op0, op1, target,
4906 unsignedp, OPTAB_LIB_WIDEN);
4907 }
4908 }
4909 }
4910
4911 if (rem_flag)
4912 {
4913 if (target && GET_MODE (target) != compute_mode)
4914 target = 0;
4915
4916 if (quotient == 0)
4917 {
4918 /* No divide instruction either. Use library for remainder. */
4919 remainder = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4920 op0, op1, target,
4921 unsignedp, OPTAB_LIB_WIDEN);
4922 /* No remainder function. Try a quotient-and-remainder
4923 function, keeping the remainder. */
4924 if (!remainder)
4925 {
4926 remainder = gen_reg_rtx (compute_mode);
4927 if (!expand_twoval_binop_libfunc
4928 (unsignedp ? udivmod_optab : sdivmod_optab,
4929 op0, op1,
4930 NULL_RTX, remainder,
4931 unsignedp ? UMOD : MOD))
4932 remainder = NULL_RTX;
4933 }
4934 }
4935 else
4936 {
4937 /* We divided. Now finish doing X - Y * (X / Y). */
4938 remainder = expand_mult (compute_mode, quotient, op1,
4939 NULL_RTX, unsignedp);
4940 remainder = expand_binop (compute_mode, sub_optab, op0,
4941 remainder, target, unsignedp,
4942 OPTAB_LIB_WIDEN);
4943 }
4944 }
4945
4946 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4947 }
4948 \f
4949 /* Return a tree node with data type TYPE, describing the value of X.
4950 Usually this is an VAR_DECL, if there is no obvious better choice.
4951 X may be an expression, however we only support those expressions
4952 generated by loop.c. */
4953
4954 tree
4955 make_tree (tree type, rtx x)
4956 {
4957 tree t;
4958
4959 switch (GET_CODE (x))
4960 {
4961 case CONST_INT:
4962 {
4963 HOST_WIDE_INT hi = 0;
4964
4965 if (INTVAL (x) < 0
4966 && !(TYPE_UNSIGNED (type)
4967 && (GET_MODE_BITSIZE (TYPE_MODE (type))
4968 < HOST_BITS_PER_WIDE_INT)))
4969 hi = -1;
4970
4971 t = build_int_cst_wide (type, INTVAL (x), hi);
4972
4973 return t;
4974 }
4975
4976 case CONST_DOUBLE:
4977 if (GET_MODE (x) == VOIDmode)
4978 t = build_int_cst_wide (type,
4979 CONST_DOUBLE_LOW (x), CONST_DOUBLE_HIGH (x));
4980 else
4981 {
4982 REAL_VALUE_TYPE d;
4983
4984 REAL_VALUE_FROM_CONST_DOUBLE (d, x);
4985 t = build_real (type, d);
4986 }
4987
4988 return t;
4989
4990 case CONST_VECTOR:
4991 {
4992 int units = CONST_VECTOR_NUNITS (x);
4993 tree itype = TREE_TYPE (type);
4994 tree *elts;
4995 int i;
4996
4997 /* Build a tree with vector elements. */
4998 elts = XALLOCAVEC (tree, units);
4999 for (i = units - 1; i >= 0; --i)
5000 {
5001 rtx elt = CONST_VECTOR_ELT (x, i);
5002 elts[i] = make_tree (itype, elt);
5003 }
5004
5005 return build_vector (type, elts);
5006 }
5007
5008 case PLUS:
5009 return fold_build2 (PLUS_EXPR, type, make_tree (type, XEXP (x, 0)),
5010 make_tree (type, XEXP (x, 1)));
5011
5012 case MINUS:
5013 return fold_build2 (MINUS_EXPR, type, make_tree (type, XEXP (x, 0)),
5014 make_tree (type, XEXP (x, 1)));
5015
5016 case NEG:
5017 return fold_build1 (NEGATE_EXPR, type, make_tree (type, XEXP (x, 0)));
5018
5019 case MULT:
5020 return fold_build2 (MULT_EXPR, type, make_tree (type, XEXP (x, 0)),
5021 make_tree (type, XEXP (x, 1)));
5022
5023 case ASHIFT:
5024 return fold_build2 (LSHIFT_EXPR, type, make_tree (type, XEXP (x, 0)),
5025 make_tree (type, XEXP (x, 1)));
5026
5027 case LSHIFTRT:
5028 t = unsigned_type_for (type);
5029 return fold_convert (type, build2 (RSHIFT_EXPR, t,
5030 make_tree (t, XEXP (x, 0)),
5031 make_tree (type, XEXP (x, 1))));
5032
5033 case ASHIFTRT:
5034 t = signed_type_for (type);
5035 return fold_convert (type, build2 (RSHIFT_EXPR, t,
5036 make_tree (t, XEXP (x, 0)),
5037 make_tree (type, XEXP (x, 1))));
5038
5039 case DIV:
5040 if (TREE_CODE (type) != REAL_TYPE)
5041 t = signed_type_for (type);
5042 else
5043 t = type;
5044
5045 return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5046 make_tree (t, XEXP (x, 0)),
5047 make_tree (t, XEXP (x, 1))));
5048 case UDIV:
5049 t = unsigned_type_for (type);
5050 return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5051 make_tree (t, XEXP (x, 0)),
5052 make_tree (t, XEXP (x, 1))));
5053
5054 case SIGN_EXTEND:
5055 case ZERO_EXTEND:
5056 t = lang_hooks.types.type_for_mode (GET_MODE (XEXP (x, 0)),
5057 GET_CODE (x) == ZERO_EXTEND);
5058 return fold_convert (type, make_tree (t, XEXP (x, 0)));
5059
5060 case CONST:
5061 return make_tree (type, XEXP (x, 0));
5062
5063 case SYMBOL_REF:
5064 t = SYMBOL_REF_DECL (x);
5065 if (t)
5066 return fold_convert (type, build_fold_addr_expr (t));
5067 /* else fall through. */
5068
5069 default:
5070 t = build_decl (RTL_LOCATION (x), VAR_DECL, NULL_TREE, type);
5071
5072 /* If TYPE is a POINTER_TYPE, we might need to convert X from
5073 address mode to pointer mode. */
5074 if (POINTER_TYPE_P (type))
5075 x = convert_memory_address_addr_space
5076 (TYPE_MODE (type), x, TYPE_ADDR_SPACE (TREE_TYPE (type)));
5077
5078 /* Note that we do *not* use SET_DECL_RTL here, because we do not
5079 want set_decl_rtl to go adjusting REG_ATTRS for this temporary. */
5080 t->decl_with_rtl.rtl = x;
5081
5082 return t;
5083 }
5084 }
5085 \f
5086 /* Compute the logical-and of OP0 and OP1, storing it in TARGET
5087 and returning TARGET.
5088
5089 If TARGET is 0, a pseudo-register or constant is returned. */
5090
5091 rtx
5092 expand_and (enum machine_mode mode, rtx op0, rtx op1, rtx target)
5093 {
5094 rtx tem = 0;
5095
5096 if (GET_MODE (op0) == VOIDmode && GET_MODE (op1) == VOIDmode)
5097 tem = simplify_binary_operation (AND, mode, op0, op1);
5098 if (tem == 0)
5099 tem = expand_binop (mode, and_optab, op0, op1, target, 0, OPTAB_LIB_WIDEN);
5100
5101 if (target == 0)
5102 target = tem;
5103 else if (tem != target)
5104 emit_move_insn (target, tem);
5105 return target;
5106 }
5107
5108 /* Helper function for emit_store_flag. */
5109 static rtx
5110 emit_cstore (rtx target, enum insn_code icode, enum rtx_code code,
5111 enum machine_mode mode, enum machine_mode compare_mode,
5112 int unsignedp, rtx x, rtx y, int normalizep,
5113 enum machine_mode target_mode)
5114 {
5115 struct expand_operand ops[4];
5116 rtx op0, last, comparison, subtarget;
5117 enum machine_mode result_mode = insn_data[(int) icode].operand[0].mode;
5118
5119 last = get_last_insn ();
5120 x = prepare_operand (icode, x, 2, mode, compare_mode, unsignedp);
5121 y = prepare_operand (icode, y, 3, mode, compare_mode, unsignedp);
5122 if (!x || !y)
5123 {
5124 delete_insns_since (last);
5125 return NULL_RTX;
5126 }
5127
5128 if (target_mode == VOIDmode)
5129 target_mode = result_mode;
5130 if (!target)
5131 target = gen_reg_rtx (target_mode);
5132
5133 comparison = gen_rtx_fmt_ee (code, result_mode, x, y);
5134
5135 create_output_operand (&ops[0], optimize ? NULL_RTX : target, result_mode);
5136 create_fixed_operand (&ops[1], comparison);
5137 create_fixed_operand (&ops[2], x);
5138 create_fixed_operand (&ops[3], y);
5139 if (!maybe_expand_insn (icode, 4, ops))
5140 {
5141 delete_insns_since (last);
5142 return NULL_RTX;
5143 }
5144 subtarget = ops[0].value;
5145
5146 /* If we are converting to a wider mode, first convert to
5147 TARGET_MODE, then normalize. This produces better combining
5148 opportunities on machines that have a SIGN_EXTRACT when we are
5149 testing a single bit. This mostly benefits the 68k.
5150
5151 If STORE_FLAG_VALUE does not have the sign bit set when
5152 interpreted in MODE, we can do this conversion as unsigned, which
5153 is usually more efficient. */
5154 if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (result_mode))
5155 {
5156 convert_move (target, subtarget,
5157 val_signbit_known_clear_p (result_mode,
5158 STORE_FLAG_VALUE));
5159 op0 = target;
5160 result_mode = target_mode;
5161 }
5162 else
5163 op0 = subtarget;
5164
5165 /* If we want to keep subexpressions around, don't reuse our last
5166 target. */
5167 if (optimize)
5168 subtarget = 0;
5169
5170 /* Now normalize to the proper value in MODE. Sometimes we don't
5171 have to do anything. */
5172 if (normalizep == 0 || normalizep == STORE_FLAG_VALUE)
5173 ;
5174 /* STORE_FLAG_VALUE might be the most negative number, so write
5175 the comparison this way to avoid a compiler-time warning. */
5176 else if (- normalizep == STORE_FLAG_VALUE)
5177 op0 = expand_unop (result_mode, neg_optab, op0, subtarget, 0);
5178
5179 /* We don't want to use STORE_FLAG_VALUE < 0 below since this makes
5180 it hard to use a value of just the sign bit due to ANSI integer
5181 constant typing rules. */
5182 else if (val_signbit_known_set_p (result_mode, STORE_FLAG_VALUE))
5183 op0 = expand_shift (RSHIFT_EXPR, result_mode, op0,
5184 GET_MODE_BITSIZE (result_mode) - 1, subtarget,
5185 normalizep == 1);
5186 else
5187 {
5188 gcc_assert (STORE_FLAG_VALUE & 1);
5189
5190 op0 = expand_and (result_mode, op0, const1_rtx, subtarget);
5191 if (normalizep == -1)
5192 op0 = expand_unop (result_mode, neg_optab, op0, op0, 0);
5193 }
5194
5195 /* If we were converting to a smaller mode, do the conversion now. */
5196 if (target_mode != result_mode)
5197 {
5198 convert_move (target, op0, 0);
5199 return target;
5200 }
5201 else
5202 return op0;
5203 }
5204
5205
5206 /* A subroutine of emit_store_flag only including "tricks" that do not
5207 need a recursive call. These are kept separate to avoid infinite
5208 loops. */
5209
5210 static rtx
5211 emit_store_flag_1 (rtx target, enum rtx_code code, rtx op0, rtx op1,
5212 enum machine_mode mode, int unsignedp, int normalizep,
5213 enum machine_mode target_mode)
5214 {
5215 rtx subtarget;
5216 enum insn_code icode;
5217 enum machine_mode compare_mode;
5218 enum mode_class mclass;
5219 enum rtx_code scode;
5220 rtx tem;
5221
5222 if (unsignedp)
5223 code = unsigned_condition (code);
5224 scode = swap_condition (code);
5225
5226 /* If one operand is constant, make it the second one. Only do this
5227 if the other operand is not constant as well. */
5228
5229 if (swap_commutative_operands_p (op0, op1))
5230 {
5231 tem = op0;
5232 op0 = op1;
5233 op1 = tem;
5234 code = swap_condition (code);
5235 }
5236
5237 if (mode == VOIDmode)
5238 mode = GET_MODE (op0);
5239
5240 /* For some comparisons with 1 and -1, we can convert this to
5241 comparisons with zero. This will often produce more opportunities for
5242 store-flag insns. */
5243
5244 switch (code)
5245 {
5246 case LT:
5247 if (op1 == const1_rtx)
5248 op1 = const0_rtx, code = LE;
5249 break;
5250 case LE:
5251 if (op1 == constm1_rtx)
5252 op1 = const0_rtx, code = LT;
5253 break;
5254 case GE:
5255 if (op1 == const1_rtx)
5256 op1 = const0_rtx, code = GT;
5257 break;
5258 case GT:
5259 if (op1 == constm1_rtx)
5260 op1 = const0_rtx, code = GE;
5261 break;
5262 case GEU:
5263 if (op1 == const1_rtx)
5264 op1 = const0_rtx, code = NE;
5265 break;
5266 case LTU:
5267 if (op1 == const1_rtx)
5268 op1 = const0_rtx, code = EQ;
5269 break;
5270 default:
5271 break;
5272 }
5273
5274 /* If we are comparing a double-word integer with zero or -1, we can
5275 convert the comparison into one involving a single word. */
5276 if (GET_MODE_BITSIZE (mode) == BITS_PER_WORD * 2
5277 && GET_MODE_CLASS (mode) == MODE_INT
5278 && (!MEM_P (op0) || ! MEM_VOLATILE_P (op0)))
5279 {
5280 if ((code == EQ || code == NE)
5281 && (op1 == const0_rtx || op1 == constm1_rtx))
5282 {
5283 rtx op00, op01;
5284
5285 /* Do a logical OR or AND of the two words and compare the
5286 result. */
5287 op00 = simplify_gen_subreg (word_mode, op0, mode, 0);
5288 op01 = simplify_gen_subreg (word_mode, op0, mode, UNITS_PER_WORD);
5289 tem = expand_binop (word_mode,
5290 op1 == const0_rtx ? ior_optab : and_optab,
5291 op00, op01, NULL_RTX, unsignedp,
5292 OPTAB_DIRECT);
5293
5294 if (tem != 0)
5295 tem = emit_store_flag (NULL_RTX, code, tem, op1, word_mode,
5296 unsignedp, normalizep);
5297 }
5298 else if ((code == LT || code == GE) && op1 == const0_rtx)
5299 {
5300 rtx op0h;
5301
5302 /* If testing the sign bit, can just test on high word. */
5303 op0h = simplify_gen_subreg (word_mode, op0, mode,
5304 subreg_highpart_offset (word_mode,
5305 mode));
5306 tem = emit_store_flag (NULL_RTX, code, op0h, op1, word_mode,
5307 unsignedp, normalizep);
5308 }
5309 else
5310 tem = NULL_RTX;
5311
5312 if (tem)
5313 {
5314 if (target_mode == VOIDmode || GET_MODE (tem) == target_mode)
5315 return tem;
5316 if (!target)
5317 target = gen_reg_rtx (target_mode);
5318
5319 convert_move (target, tem,
5320 !val_signbit_known_set_p (word_mode,
5321 (normalizep ? normalizep
5322 : STORE_FLAG_VALUE)));
5323 return target;
5324 }
5325 }
5326
5327 /* If this is A < 0 or A >= 0, we can do this by taking the ones
5328 complement of A (for GE) and shifting the sign bit to the low bit. */
5329 if (op1 == const0_rtx && (code == LT || code == GE)
5330 && GET_MODE_CLASS (mode) == MODE_INT
5331 && (normalizep || STORE_FLAG_VALUE == 1
5332 || val_signbit_p (mode, STORE_FLAG_VALUE)))
5333 {
5334 subtarget = target;
5335
5336 if (!target)
5337 target_mode = mode;
5338
5339 /* If the result is to be wider than OP0, it is best to convert it
5340 first. If it is to be narrower, it is *incorrect* to convert it
5341 first. */
5342 else if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (mode))
5343 {
5344 op0 = convert_modes (target_mode, mode, op0, 0);
5345 mode = target_mode;
5346 }
5347
5348 if (target_mode != mode)
5349 subtarget = 0;
5350
5351 if (code == GE)
5352 op0 = expand_unop (mode, one_cmpl_optab, op0,
5353 ((STORE_FLAG_VALUE == 1 || normalizep)
5354 ? 0 : subtarget), 0);
5355
5356 if (STORE_FLAG_VALUE == 1 || normalizep)
5357 /* If we are supposed to produce a 0/1 value, we want to do
5358 a logical shift from the sign bit to the low-order bit; for
5359 a -1/0 value, we do an arithmetic shift. */
5360 op0 = expand_shift (RSHIFT_EXPR, mode, op0,
5361 GET_MODE_BITSIZE (mode) - 1,
5362 subtarget, normalizep != -1);
5363
5364 if (mode != target_mode)
5365 op0 = convert_modes (target_mode, mode, op0, 0);
5366
5367 return op0;
5368 }
5369
5370 mclass = GET_MODE_CLASS (mode);
5371 for (compare_mode = mode; compare_mode != VOIDmode;
5372 compare_mode = GET_MODE_WIDER_MODE (compare_mode))
5373 {
5374 enum machine_mode optab_mode = mclass == MODE_CC ? CCmode : compare_mode;
5375 icode = optab_handler (cstore_optab, optab_mode);
5376 if (icode != CODE_FOR_nothing)
5377 {
5378 do_pending_stack_adjust ();
5379 tem = emit_cstore (target, icode, code, mode, compare_mode,
5380 unsignedp, op0, op1, normalizep, target_mode);
5381 if (tem)
5382 return tem;
5383
5384 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5385 {
5386 tem = emit_cstore (target, icode, scode, mode, compare_mode,
5387 unsignedp, op1, op0, normalizep, target_mode);
5388 if (tem)
5389 return tem;
5390 }
5391 break;
5392 }
5393 }
5394
5395 return 0;
5396 }
5397
5398 /* Emit a store-flags instruction for comparison CODE on OP0 and OP1
5399 and storing in TARGET. Normally return TARGET.
5400 Return 0 if that cannot be done.
5401
5402 MODE is the mode to use for OP0 and OP1 should they be CONST_INTs. If
5403 it is VOIDmode, they cannot both be CONST_INT.
5404
5405 UNSIGNEDP is for the case where we have to widen the operands
5406 to perform the operation. It says to use zero-extension.
5407
5408 NORMALIZEP is 1 if we should convert the result to be either zero
5409 or one. Normalize is -1 if we should convert the result to be
5410 either zero or -1. If NORMALIZEP is zero, the result will be left
5411 "raw" out of the scc insn. */
5412
5413 rtx
5414 emit_store_flag (rtx target, enum rtx_code code, rtx op0, rtx op1,
5415 enum machine_mode mode, int unsignedp, int normalizep)
5416 {
5417 enum machine_mode target_mode = target ? GET_MODE (target) : VOIDmode;
5418 enum rtx_code rcode;
5419 rtx subtarget;
5420 rtx tem, last, trueval;
5421
5422 tem = emit_store_flag_1 (target, code, op0, op1, mode, unsignedp, normalizep,
5423 target_mode);
5424 if (tem)
5425 return tem;
5426
5427 /* If we reached here, we can't do this with a scc insn, however there
5428 are some comparisons that can be done in other ways. Don't do any
5429 of these cases if branches are very cheap. */
5430 if (BRANCH_COST (optimize_insn_for_speed_p (), false) == 0)
5431 return 0;
5432
5433 /* See what we need to return. We can only return a 1, -1, or the
5434 sign bit. */
5435
5436 if (normalizep == 0)
5437 {
5438 if (STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1)
5439 normalizep = STORE_FLAG_VALUE;
5440
5441 else if (val_signbit_p (mode, STORE_FLAG_VALUE))
5442 ;
5443 else
5444 return 0;
5445 }
5446
5447 last = get_last_insn ();
5448
5449 /* If optimizing, use different pseudo registers for each insn, instead
5450 of reusing the same pseudo. This leads to better CSE, but slows
5451 down the compiler, since there are more pseudos */
5452 subtarget = (!optimize
5453 && (target_mode == mode)) ? target : NULL_RTX;
5454 trueval = GEN_INT (normalizep ? normalizep : STORE_FLAG_VALUE);
5455
5456 /* For floating-point comparisons, try the reverse comparison or try
5457 changing the "orderedness" of the comparison. */
5458 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5459 {
5460 enum rtx_code first_code;
5461 bool and_them;
5462
5463 rcode = reverse_condition_maybe_unordered (code);
5464 if (can_compare_p (rcode, mode, ccp_store_flag)
5465 && (code == ORDERED || code == UNORDERED
5466 || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
5467 || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
5468 {
5469 int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5470 || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5471
5472 /* For the reverse comparison, use either an addition or a XOR. */
5473 if (want_add
5474 && rtx_cost (GEN_INT (normalizep), PLUS, 1,
5475 optimize_insn_for_speed_p ()) == 0)
5476 {
5477 tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5478 STORE_FLAG_VALUE, target_mode);
5479 if (tem)
5480 return expand_binop (target_mode, add_optab, tem,
5481 GEN_INT (normalizep),
5482 target, 0, OPTAB_WIDEN);
5483 }
5484 else if (!want_add
5485 && rtx_cost (trueval, XOR, 1,
5486 optimize_insn_for_speed_p ()) == 0)
5487 {
5488 tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5489 normalizep, target_mode);
5490 if (tem)
5491 return expand_binop (target_mode, xor_optab, tem, trueval,
5492 target, INTVAL (trueval) >= 0, OPTAB_WIDEN);
5493 }
5494 }
5495
5496 delete_insns_since (last);
5497
5498 /* Cannot split ORDERED and UNORDERED, only try the above trick. */
5499 if (code == ORDERED || code == UNORDERED)
5500 return 0;
5501
5502 and_them = split_comparison (code, mode, &first_code, &code);
5503
5504 /* If there are no NaNs, the first comparison should always fall through.
5505 Effectively change the comparison to the other one. */
5506 if (!HONOR_NANS (mode))
5507 {
5508 gcc_assert (first_code == (and_them ? ORDERED : UNORDERED));
5509 return emit_store_flag_1 (target, code, op0, op1, mode, 0, normalizep,
5510 target_mode);
5511 }
5512
5513 #ifdef HAVE_conditional_move
5514 /* Try using a setcc instruction for ORDERED/UNORDERED, followed by a
5515 conditional move. */
5516 tem = emit_store_flag_1 (subtarget, first_code, op0, op1, mode, 0,
5517 normalizep, target_mode);
5518 if (tem == 0)
5519 return 0;
5520
5521 if (and_them)
5522 tem = emit_conditional_move (target, code, op0, op1, mode,
5523 tem, const0_rtx, GET_MODE (tem), 0);
5524 else
5525 tem = emit_conditional_move (target, code, op0, op1, mode,
5526 trueval, tem, GET_MODE (tem), 0);
5527
5528 if (tem == 0)
5529 delete_insns_since (last);
5530 return tem;
5531 #else
5532 return 0;
5533 #endif
5534 }
5535
5536 /* The remaining tricks only apply to integer comparisons. */
5537
5538 if (GET_MODE_CLASS (mode) != MODE_INT)
5539 return 0;
5540
5541 /* If this is an equality comparison of integers, we can try to exclusive-or
5542 (or subtract) the two operands and use a recursive call to try the
5543 comparison with zero. Don't do any of these cases if branches are
5544 very cheap. */
5545
5546 if ((code == EQ || code == NE) && op1 != const0_rtx)
5547 {
5548 tem = expand_binop (mode, xor_optab, op0, op1, subtarget, 1,
5549 OPTAB_WIDEN);
5550
5551 if (tem == 0)
5552 tem = expand_binop (mode, sub_optab, op0, op1, subtarget, 1,
5553 OPTAB_WIDEN);
5554 if (tem != 0)
5555 tem = emit_store_flag (target, code, tem, const0_rtx,
5556 mode, unsignedp, normalizep);
5557 if (tem != 0)
5558 return tem;
5559
5560 delete_insns_since (last);
5561 }
5562
5563 /* For integer comparisons, try the reverse comparison. However, for
5564 small X and if we'd have anyway to extend, implementing "X != 0"
5565 as "-(int)X >> 31" is still cheaper than inverting "(int)X == 0". */
5566 rcode = reverse_condition (code);
5567 if (can_compare_p (rcode, mode, ccp_store_flag)
5568 && ! (optab_handler (cstore_optab, mode) == CODE_FOR_nothing
5569 && code == NE
5570 && GET_MODE_SIZE (mode) < UNITS_PER_WORD
5571 && op1 == const0_rtx))
5572 {
5573 int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5574 || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5575
5576 /* Again, for the reverse comparison, use either an addition or a XOR. */
5577 if (want_add
5578 && rtx_cost (GEN_INT (normalizep), PLUS, 1,
5579 optimize_insn_for_speed_p ()) == 0)
5580 {
5581 tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5582 STORE_FLAG_VALUE, target_mode);
5583 if (tem != 0)
5584 tem = expand_binop (target_mode, add_optab, tem,
5585 GEN_INT (normalizep), target, 0, OPTAB_WIDEN);
5586 }
5587 else if (!want_add
5588 && rtx_cost (trueval, XOR, 1,
5589 optimize_insn_for_speed_p ()) == 0)
5590 {
5591 tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5592 normalizep, target_mode);
5593 if (tem != 0)
5594 tem = expand_binop (target_mode, xor_optab, tem, trueval, target,
5595 INTVAL (trueval) >= 0, OPTAB_WIDEN);
5596 }
5597
5598 if (tem != 0)
5599 return tem;
5600 delete_insns_since (last);
5601 }
5602
5603 /* Some other cases we can do are EQ, NE, LE, and GT comparisons with
5604 the constant zero. Reject all other comparisons at this point. Only
5605 do LE and GT if branches are expensive since they are expensive on
5606 2-operand machines. */
5607
5608 if (op1 != const0_rtx
5609 || (code != EQ && code != NE
5610 && (BRANCH_COST (optimize_insn_for_speed_p (),
5611 false) <= 1 || (code != LE && code != GT))))
5612 return 0;
5613
5614 /* Try to put the result of the comparison in the sign bit. Assume we can't
5615 do the necessary operation below. */
5616
5617 tem = 0;
5618
5619 /* To see if A <= 0, compute (A | (A - 1)). A <= 0 iff that result has
5620 the sign bit set. */
5621
5622 if (code == LE)
5623 {
5624 /* This is destructive, so SUBTARGET can't be OP0. */
5625 if (rtx_equal_p (subtarget, op0))
5626 subtarget = 0;
5627
5628 tem = expand_binop (mode, sub_optab, op0, const1_rtx, subtarget, 0,
5629 OPTAB_WIDEN);
5630 if (tem)
5631 tem = expand_binop (mode, ior_optab, op0, tem, subtarget, 0,
5632 OPTAB_WIDEN);
5633 }
5634
5635 /* To see if A > 0, compute (((signed) A) << BITS) - A, where BITS is the
5636 number of bits in the mode of OP0, minus one. */
5637
5638 if (code == GT)
5639 {
5640 if (rtx_equal_p (subtarget, op0))
5641 subtarget = 0;
5642
5643 tem = expand_shift (RSHIFT_EXPR, mode, op0,
5644 GET_MODE_BITSIZE (mode) - 1,
5645 subtarget, 0);
5646 tem = expand_binop (mode, sub_optab, tem, op0, subtarget, 0,
5647 OPTAB_WIDEN);
5648 }
5649
5650 if (code == EQ || code == NE)
5651 {
5652 /* For EQ or NE, one way to do the comparison is to apply an operation
5653 that converts the operand into a positive number if it is nonzero
5654 or zero if it was originally zero. Then, for EQ, we subtract 1 and
5655 for NE we negate. This puts the result in the sign bit. Then we
5656 normalize with a shift, if needed.
5657
5658 Two operations that can do the above actions are ABS and FFS, so try
5659 them. If that doesn't work, and MODE is smaller than a full word,
5660 we can use zero-extension to the wider mode (an unsigned conversion)
5661 as the operation. */
5662
5663 /* Note that ABS doesn't yield a positive number for INT_MIN, but
5664 that is compensated by the subsequent overflow when subtracting
5665 one / negating. */
5666
5667 if (optab_handler (abs_optab, mode) != CODE_FOR_nothing)
5668 tem = expand_unop (mode, abs_optab, op0, subtarget, 1);
5669 else if (optab_handler (ffs_optab, mode) != CODE_FOR_nothing)
5670 tem = expand_unop (mode, ffs_optab, op0, subtarget, 1);
5671 else if (GET_MODE_SIZE (mode) < UNITS_PER_WORD)
5672 {
5673 tem = convert_modes (word_mode, mode, op0, 1);
5674 mode = word_mode;
5675 }
5676
5677 if (tem != 0)
5678 {
5679 if (code == EQ)
5680 tem = expand_binop (mode, sub_optab, tem, const1_rtx, subtarget,
5681 0, OPTAB_WIDEN);
5682 else
5683 tem = expand_unop (mode, neg_optab, tem, subtarget, 0);
5684 }
5685
5686 /* If we couldn't do it that way, for NE we can "or" the two's complement
5687 of the value with itself. For EQ, we take the one's complement of
5688 that "or", which is an extra insn, so we only handle EQ if branches
5689 are expensive. */
5690
5691 if (tem == 0
5692 && (code == NE
5693 || BRANCH_COST (optimize_insn_for_speed_p (),
5694 false) > 1))
5695 {
5696 if (rtx_equal_p (subtarget, op0))
5697 subtarget = 0;
5698
5699 tem = expand_unop (mode, neg_optab, op0, subtarget, 0);
5700 tem = expand_binop (mode, ior_optab, tem, op0, subtarget, 0,
5701 OPTAB_WIDEN);
5702
5703 if (tem && code == EQ)
5704 tem = expand_unop (mode, one_cmpl_optab, tem, subtarget, 0);
5705 }
5706 }
5707
5708 if (tem && normalizep)
5709 tem = expand_shift (RSHIFT_EXPR, mode, tem,
5710 GET_MODE_BITSIZE (mode) - 1,
5711 subtarget, normalizep == 1);
5712
5713 if (tem)
5714 {
5715 if (!target)
5716 ;
5717 else if (GET_MODE (tem) != target_mode)
5718 {
5719 convert_move (target, tem, 0);
5720 tem = target;
5721 }
5722 else if (!subtarget)
5723 {
5724 emit_move_insn (target, tem);
5725 tem = target;
5726 }
5727 }
5728 else
5729 delete_insns_since (last);
5730
5731 return tem;
5732 }
5733
5734 /* Like emit_store_flag, but always succeeds. */
5735
5736 rtx
5737 emit_store_flag_force (rtx target, enum rtx_code code, rtx op0, rtx op1,
5738 enum machine_mode mode, int unsignedp, int normalizep)
5739 {
5740 rtx tem, label;
5741 rtx trueval, falseval;
5742
5743 /* First see if emit_store_flag can do the job. */
5744 tem = emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep);
5745 if (tem != 0)
5746 return tem;
5747
5748 if (!target)
5749 target = gen_reg_rtx (word_mode);
5750
5751 /* If this failed, we have to do this with set/compare/jump/set code.
5752 For foo != 0, if foo is in OP0, just replace it with 1 if nonzero. */
5753 trueval = normalizep ? GEN_INT (normalizep) : const1_rtx;
5754 if (code == NE
5755 && GET_MODE_CLASS (mode) == MODE_INT
5756 && REG_P (target)
5757 && op0 == target
5758 && op1 == const0_rtx)
5759 {
5760 label = gen_label_rtx ();
5761 do_compare_rtx_and_jump (target, const0_rtx, EQ, unsignedp,
5762 mode, NULL_RTX, NULL_RTX, label, -1);
5763 emit_move_insn (target, trueval);
5764 emit_label (label);
5765 return target;
5766 }
5767
5768 if (!REG_P (target)
5769 || reg_mentioned_p (target, op0) || reg_mentioned_p (target, op1))
5770 target = gen_reg_rtx (GET_MODE (target));
5771
5772 /* Jump in the right direction if the target cannot implement CODE
5773 but can jump on its reverse condition. */
5774 falseval = const0_rtx;
5775 if (! can_compare_p (code, mode, ccp_jump)
5776 && (! FLOAT_MODE_P (mode)
5777 || code == ORDERED || code == UNORDERED
5778 || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
5779 || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
5780 {
5781 enum rtx_code rcode;
5782 if (FLOAT_MODE_P (mode))
5783 rcode = reverse_condition_maybe_unordered (code);
5784 else
5785 rcode = reverse_condition (code);
5786
5787 /* Canonicalize to UNORDERED for the libcall. */
5788 if (can_compare_p (rcode, mode, ccp_jump)
5789 || (code == ORDERED && ! can_compare_p (ORDERED, mode, ccp_jump)))
5790 {
5791 falseval = trueval;
5792 trueval = const0_rtx;
5793 code = rcode;
5794 }
5795 }
5796
5797 emit_move_insn (target, trueval);
5798 label = gen_label_rtx ();
5799 do_compare_rtx_and_jump (op0, op1, code, unsignedp, mode, NULL_RTX,
5800 NULL_RTX, label, -1);
5801
5802 emit_move_insn (target, falseval);
5803 emit_label (label);
5804
5805 return target;
5806 }
5807 \f
5808 /* Perform possibly multi-word comparison and conditional jump to LABEL
5809 if ARG1 OP ARG2 true where ARG1 and ARG2 are of mode MODE. This is
5810 now a thin wrapper around do_compare_rtx_and_jump. */
5811
5812 static void
5813 do_cmp_and_jump (rtx arg1, rtx arg2, enum rtx_code op, enum machine_mode mode,
5814 rtx label)
5815 {
5816 int unsignedp = (op == LTU || op == LEU || op == GTU || op == GEU);
5817 do_compare_rtx_and_jump (arg1, arg2, op, unsignedp, mode,
5818 NULL_RTX, NULL_RTX, label, -1);
5819 }