expmed.c (narrow_bit_field_mem): New function.
[gcc.git] / gcc / expmed.c
1 /* Medium-level subroutines: convert bit-field store and extract
2 and shifts, multiplies and divides to rtl instructions.
3 Copyright (C) 1987, 1988, 1989, 1992, 1993, 1994, 1995, 1996, 1997, 1998,
4 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010,
5 2011, 2012
6 Free Software Foundation, Inc.
7
8 This file is part of GCC.
9
10 GCC is free software; you can redistribute it and/or modify it under
11 the terms of the GNU General Public License as published by the Free
12 Software Foundation; either version 3, or (at your option) any later
13 version.
14
15 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
16 WARRANTY; without even the implied warranty of MERCHANTABILITY or
17 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 for more details.
19
20 You should have received a copy of the GNU General Public License
21 along with GCC; see the file COPYING3. If not see
22 <http://www.gnu.org/licenses/>. */
23
24
25 #include "config.h"
26 #include "system.h"
27 #include "coretypes.h"
28 #include "tm.h"
29 #include "diagnostic-core.h"
30 #include "rtl.h"
31 #include "tree.h"
32 #include "tm_p.h"
33 #include "flags.h"
34 #include "insn-config.h"
35 #include "expr.h"
36 #include "optabs.h"
37 #include "recog.h"
38 #include "langhooks.h"
39 #include "df.h"
40 #include "target.h"
41 #include "expmed.h"
42
43 struct target_expmed default_target_expmed;
44 #if SWITCHABLE_TARGET
45 struct target_expmed *this_target_expmed = &default_target_expmed;
46 #endif
47
48 static void store_fixed_bit_field (rtx, unsigned HOST_WIDE_INT,
49 unsigned HOST_WIDE_INT,
50 unsigned HOST_WIDE_INT,
51 unsigned HOST_WIDE_INT,
52 rtx);
53 static void store_split_bit_field (rtx, unsigned HOST_WIDE_INT,
54 unsigned HOST_WIDE_INT,
55 unsigned HOST_WIDE_INT,
56 unsigned HOST_WIDE_INT,
57 rtx);
58 static rtx extract_fixed_bit_field (enum machine_mode, rtx,
59 unsigned HOST_WIDE_INT,
60 unsigned HOST_WIDE_INT, rtx, int, bool);
61 static rtx mask_rtx (enum machine_mode, int, int, int);
62 static rtx lshift_value (enum machine_mode, rtx, int, int);
63 static rtx extract_split_bit_field (rtx, unsigned HOST_WIDE_INT,
64 unsigned HOST_WIDE_INT, int);
65 static void do_cmp_and_jump (rtx, rtx, enum rtx_code, enum machine_mode, rtx);
66 static rtx expand_smod_pow2 (enum machine_mode, rtx, HOST_WIDE_INT);
67 static rtx expand_sdiv_pow2 (enum machine_mode, rtx, HOST_WIDE_INT);
68
69 /* Test whether a value is zero of a power of two. */
70 #define EXACT_POWER_OF_2_OR_ZERO_P(x) (((x) & ((x) - 1)) == 0)
71
72 /* Reduce conditional compilation elsewhere. */
73 #ifndef HAVE_insv
74 #define HAVE_insv 0
75 #define CODE_FOR_insv CODE_FOR_nothing
76 #define gen_insv(a,b,c,d) NULL_RTX
77 #endif
78 #ifndef HAVE_extv
79 #define HAVE_extv 0
80 #define CODE_FOR_extv CODE_FOR_nothing
81 #define gen_extv(a,b,c,d) NULL_RTX
82 #endif
83 #ifndef HAVE_extzv
84 #define HAVE_extzv 0
85 #define CODE_FOR_extzv CODE_FOR_nothing
86 #define gen_extzv(a,b,c,d) NULL_RTX
87 #endif
88
89 struct init_expmed_rtl
90 {
91 struct rtx_def reg; rtunion reg_fld[2];
92 struct rtx_def plus; rtunion plus_fld1;
93 struct rtx_def neg;
94 struct rtx_def mult; rtunion mult_fld1;
95 struct rtx_def sdiv; rtunion sdiv_fld1;
96 struct rtx_def udiv; rtunion udiv_fld1;
97 struct rtx_def sdiv_32; rtunion sdiv_32_fld1;
98 struct rtx_def smod_32; rtunion smod_32_fld1;
99 struct rtx_def wide_mult; rtunion wide_mult_fld1;
100 struct rtx_def wide_lshr; rtunion wide_lshr_fld1;
101 struct rtx_def wide_trunc;
102 struct rtx_def shift; rtunion shift_fld1;
103 struct rtx_def shift_mult; rtunion shift_mult_fld1;
104 struct rtx_def shift_add; rtunion shift_add_fld1;
105 struct rtx_def shift_sub0; rtunion shift_sub0_fld1;
106 struct rtx_def shift_sub1; rtunion shift_sub1_fld1;
107 struct rtx_def zext;
108 struct rtx_def trunc;
109
110 rtx pow2[MAX_BITS_PER_WORD];
111 rtx cint[MAX_BITS_PER_WORD];
112 };
113
114 static void
115 init_expmed_one_conv (struct init_expmed_rtl *all, enum machine_mode to_mode,
116 enum machine_mode from_mode, bool speed)
117 {
118 int to_size, from_size;
119 rtx which;
120
121 /* We're given no information about the true size of a partial integer,
122 only the size of the "full" integer it requires for storage. For
123 comparison purposes here, reduce the bit size by one in that case. */
124 to_size = (GET_MODE_BITSIZE (to_mode)
125 - (GET_MODE_CLASS (to_mode) == MODE_PARTIAL_INT));
126 from_size = (GET_MODE_BITSIZE (from_mode)
127 - (GET_MODE_CLASS (from_mode) == MODE_PARTIAL_INT));
128
129 /* Assume cost of zero-extend and sign-extend is the same. */
130 which = (to_size < from_size ? &all->trunc : &all->zext);
131
132 PUT_MODE (&all->reg, from_mode);
133 set_convert_cost (to_mode, from_mode, speed, set_src_cost (which, speed));
134 }
135
136 static void
137 init_expmed_one_mode (struct init_expmed_rtl *all,
138 enum machine_mode mode, int speed)
139 {
140 int m, n, mode_bitsize;
141 enum machine_mode mode_from;
142
143 mode_bitsize = GET_MODE_UNIT_BITSIZE (mode);
144
145 PUT_MODE (&all->reg, mode);
146 PUT_MODE (&all->plus, mode);
147 PUT_MODE (&all->neg, mode);
148 PUT_MODE (&all->mult, mode);
149 PUT_MODE (&all->sdiv, mode);
150 PUT_MODE (&all->udiv, mode);
151 PUT_MODE (&all->sdiv_32, mode);
152 PUT_MODE (&all->smod_32, mode);
153 PUT_MODE (&all->wide_trunc, mode);
154 PUT_MODE (&all->shift, mode);
155 PUT_MODE (&all->shift_mult, mode);
156 PUT_MODE (&all->shift_add, mode);
157 PUT_MODE (&all->shift_sub0, mode);
158 PUT_MODE (&all->shift_sub1, mode);
159 PUT_MODE (&all->zext, mode);
160 PUT_MODE (&all->trunc, mode);
161
162 set_add_cost (speed, mode, set_src_cost (&all->plus, speed));
163 set_neg_cost (speed, mode, set_src_cost (&all->neg, speed));
164 set_mul_cost (speed, mode, set_src_cost (&all->mult, speed));
165 set_sdiv_cost (speed, mode, set_src_cost (&all->sdiv, speed));
166 set_udiv_cost (speed, mode, set_src_cost (&all->udiv, speed));
167
168 set_sdiv_pow2_cheap (speed, mode, (set_src_cost (&all->sdiv_32, speed)
169 <= 2 * add_cost (speed, mode)));
170 set_smod_pow2_cheap (speed, mode, (set_src_cost (&all->smod_32, speed)
171 <= 4 * add_cost (speed, mode)));
172
173 set_shift_cost (speed, mode, 0, 0);
174 {
175 int cost = add_cost (speed, mode);
176 set_shiftadd_cost (speed, mode, 0, cost);
177 set_shiftsub0_cost (speed, mode, 0, cost);
178 set_shiftsub1_cost (speed, mode, 0, cost);
179 }
180
181 n = MIN (MAX_BITS_PER_WORD, mode_bitsize);
182 for (m = 1; m < n; m++)
183 {
184 XEXP (&all->shift, 1) = all->cint[m];
185 XEXP (&all->shift_mult, 1) = all->pow2[m];
186
187 set_shift_cost (speed, mode, m, set_src_cost (&all->shift, speed));
188 set_shiftadd_cost (speed, mode, m, set_src_cost (&all->shift_add, speed));
189 set_shiftsub0_cost (speed, mode, m, set_src_cost (&all->shift_sub0, speed));
190 set_shiftsub1_cost (speed, mode, m, set_src_cost (&all->shift_sub1, speed));
191 }
192
193 if (SCALAR_INT_MODE_P (mode))
194 {
195 for (mode_from = MIN_MODE_INT; mode_from <= MAX_MODE_INT;
196 mode_from = (enum machine_mode)(mode_from + 1))
197 init_expmed_one_conv (all, mode, mode_from, speed);
198 }
199 if (GET_MODE_CLASS (mode) == MODE_INT)
200 {
201 enum machine_mode wider_mode = GET_MODE_WIDER_MODE (mode);
202 if (wider_mode != VOIDmode)
203 {
204 PUT_MODE (&all->zext, wider_mode);
205 PUT_MODE (&all->wide_mult, wider_mode);
206 PUT_MODE (&all->wide_lshr, wider_mode);
207 XEXP (&all->wide_lshr, 1) = GEN_INT (mode_bitsize);
208
209 set_mul_widen_cost (speed, wider_mode,
210 set_src_cost (&all->wide_mult, speed));
211 set_mul_highpart_cost (speed, mode,
212 set_src_cost (&all->wide_trunc, speed));
213 }
214 }
215 }
216
217 void
218 init_expmed (void)
219 {
220 struct init_expmed_rtl all;
221 enum machine_mode mode;
222 int m, speed;
223
224 memset (&all, 0, sizeof all);
225 for (m = 1; m < MAX_BITS_PER_WORD; m++)
226 {
227 all.pow2[m] = GEN_INT ((HOST_WIDE_INT) 1 << m);
228 all.cint[m] = GEN_INT (m);
229 }
230
231 PUT_CODE (&all.reg, REG);
232 /* Avoid using hard regs in ways which may be unsupported. */
233 SET_REGNO (&all.reg, LAST_VIRTUAL_REGISTER + 1);
234
235 PUT_CODE (&all.plus, PLUS);
236 XEXP (&all.plus, 0) = &all.reg;
237 XEXP (&all.plus, 1) = &all.reg;
238
239 PUT_CODE (&all.neg, NEG);
240 XEXP (&all.neg, 0) = &all.reg;
241
242 PUT_CODE (&all.mult, MULT);
243 XEXP (&all.mult, 0) = &all.reg;
244 XEXP (&all.mult, 1) = &all.reg;
245
246 PUT_CODE (&all.sdiv, DIV);
247 XEXP (&all.sdiv, 0) = &all.reg;
248 XEXP (&all.sdiv, 1) = &all.reg;
249
250 PUT_CODE (&all.udiv, UDIV);
251 XEXP (&all.udiv, 0) = &all.reg;
252 XEXP (&all.udiv, 1) = &all.reg;
253
254 PUT_CODE (&all.sdiv_32, DIV);
255 XEXP (&all.sdiv_32, 0) = &all.reg;
256 XEXP (&all.sdiv_32, 1) = 32 < MAX_BITS_PER_WORD ? all.cint[32] : GEN_INT (32);
257
258 PUT_CODE (&all.smod_32, MOD);
259 XEXP (&all.smod_32, 0) = &all.reg;
260 XEXP (&all.smod_32, 1) = XEXP (&all.sdiv_32, 1);
261
262 PUT_CODE (&all.zext, ZERO_EXTEND);
263 XEXP (&all.zext, 0) = &all.reg;
264
265 PUT_CODE (&all.wide_mult, MULT);
266 XEXP (&all.wide_mult, 0) = &all.zext;
267 XEXP (&all.wide_mult, 1) = &all.zext;
268
269 PUT_CODE (&all.wide_lshr, LSHIFTRT);
270 XEXP (&all.wide_lshr, 0) = &all.wide_mult;
271
272 PUT_CODE (&all.wide_trunc, TRUNCATE);
273 XEXP (&all.wide_trunc, 0) = &all.wide_lshr;
274
275 PUT_CODE (&all.shift, ASHIFT);
276 XEXP (&all.shift, 0) = &all.reg;
277
278 PUT_CODE (&all.shift_mult, MULT);
279 XEXP (&all.shift_mult, 0) = &all.reg;
280
281 PUT_CODE (&all.shift_add, PLUS);
282 XEXP (&all.shift_add, 0) = &all.shift_mult;
283 XEXP (&all.shift_add, 1) = &all.reg;
284
285 PUT_CODE (&all.shift_sub0, MINUS);
286 XEXP (&all.shift_sub0, 0) = &all.shift_mult;
287 XEXP (&all.shift_sub0, 1) = &all.reg;
288
289 PUT_CODE (&all.shift_sub1, MINUS);
290 XEXP (&all.shift_sub1, 0) = &all.reg;
291 XEXP (&all.shift_sub1, 1) = &all.shift_mult;
292
293 PUT_CODE (&all.trunc, TRUNCATE);
294 XEXP (&all.trunc, 0) = &all.reg;
295
296 for (speed = 0; speed < 2; speed++)
297 {
298 crtl->maybe_hot_insn_p = speed;
299 set_zero_cost (speed, set_src_cost (const0_rtx, speed));
300
301 for (mode = MIN_MODE_INT; mode <= MAX_MODE_INT;
302 mode = (enum machine_mode)(mode + 1))
303 init_expmed_one_mode (&all, mode, speed);
304
305 if (MIN_MODE_PARTIAL_INT != VOIDmode)
306 for (mode = MIN_MODE_PARTIAL_INT; mode <= MAX_MODE_PARTIAL_INT;
307 mode = (enum machine_mode)(mode + 1))
308 init_expmed_one_mode (&all, mode, speed);
309
310 if (MIN_MODE_VECTOR_INT != VOIDmode)
311 for (mode = MIN_MODE_VECTOR_INT; mode <= MAX_MODE_VECTOR_INT;
312 mode = (enum machine_mode)(mode + 1))
313 init_expmed_one_mode (&all, mode, speed);
314 }
315
316 if (alg_hash_used_p ())
317 {
318 struct alg_hash_entry *p = alg_hash_entry_ptr (0);
319 memset (p, 0, sizeof (*p) * NUM_ALG_HASH_ENTRIES);
320 }
321 else
322 set_alg_hash_used_p (true);
323 default_rtl_profile ();
324 }
325
326 /* Return an rtx representing minus the value of X.
327 MODE is the intended mode of the result,
328 useful if X is a CONST_INT. */
329
330 rtx
331 negate_rtx (enum machine_mode mode, rtx x)
332 {
333 rtx result = simplify_unary_operation (NEG, mode, x, mode);
334
335 if (result == 0)
336 result = expand_unop (mode, neg_optab, x, NULL_RTX, 0);
337
338 return result;
339 }
340
341 /* Report on the availability of insv/extv/extzv and the desired mode
342 of each of their operands. Returns MAX_MACHINE_MODE if HAVE_foo
343 is false; else the mode of the specified operand. If OPNO is -1,
344 all the caller cares about is whether the insn is available. */
345 enum machine_mode
346 mode_for_extraction (enum extraction_pattern pattern, int opno)
347 {
348 const struct insn_data_d *data;
349
350 switch (pattern)
351 {
352 case EP_insv:
353 if (HAVE_insv)
354 {
355 data = &insn_data[CODE_FOR_insv];
356 break;
357 }
358 return MAX_MACHINE_MODE;
359
360 case EP_extv:
361 if (HAVE_extv)
362 {
363 data = &insn_data[CODE_FOR_extv];
364 break;
365 }
366 return MAX_MACHINE_MODE;
367
368 case EP_extzv:
369 if (HAVE_extzv)
370 {
371 data = &insn_data[CODE_FOR_extzv];
372 break;
373 }
374 return MAX_MACHINE_MODE;
375
376 default:
377 gcc_unreachable ();
378 }
379
380 if (opno == -1)
381 return VOIDmode;
382
383 /* Everyone who uses this function used to follow it with
384 if (result == VOIDmode) result = word_mode; */
385 if (data->operand[opno].mode == VOIDmode)
386 return word_mode;
387 return data->operand[opno].mode;
388 }
389
390 /* Adjust bitfield memory MEM so that it points to the first unit of mode
391 MODE that contains a bitfield of size BITSIZE at bit position BITNUM.
392 If MODE is BLKmode, return a reference to every byte in the bitfield.
393 Set *NEW_BITNUM to the bit position of the field within the new memory. */
394
395 static rtx
396 narrow_bit_field_mem (rtx mem, enum machine_mode mode,
397 unsigned HOST_WIDE_INT bitsize,
398 unsigned HOST_WIDE_INT bitnum,
399 unsigned HOST_WIDE_INT *new_bitnum)
400 {
401 if (mode == BLKmode)
402 {
403 *new_bitnum = bitnum % BITS_PER_UNIT;
404 HOST_WIDE_INT offset = bitnum / BITS_PER_UNIT;
405 HOST_WIDE_INT size = ((*new_bitnum + bitsize + BITS_PER_UNIT - 1)
406 / BITS_PER_UNIT);
407 return adjust_bitfield_address_size (mem, mode, offset, size);
408 }
409 else
410 {
411 unsigned int unit = GET_MODE_BITSIZE (mode);
412 *new_bitnum = bitnum % unit;
413 HOST_WIDE_INT offset = (bitnum - *new_bitnum) / BITS_PER_UNIT;
414 return adjust_bitfield_address (mem, mode, offset);
415 }
416 }
417
418 /* Return true if a bitfield of size BITSIZE at bit number BITNUM within
419 a structure of mode STRUCT_MODE represents a lowpart subreg. The subreg
420 offset is then BITNUM / BITS_PER_UNIT. */
421
422 static bool
423 lowpart_bit_field_p (unsigned HOST_WIDE_INT bitnum,
424 unsigned HOST_WIDE_INT bitsize,
425 enum machine_mode struct_mode)
426 {
427 if (BYTES_BIG_ENDIAN)
428 return (bitnum % BITS_PER_UNIT == 0
429 && (bitnum + bitsize == GET_MODE_BITSIZE (struct_mode)
430 || (bitnum + bitsize) % BITS_PER_WORD == 0));
431 else
432 return bitnum % BITS_PER_WORD == 0;
433 }
434 \f
435 /* Try to use an insv pattern to store VALUE into a field of OP0.
436 OP_MODE is the mode of the insertion and BITSIZE and BITNUM are
437 as for store_bit_field. */
438
439 static bool
440 store_bit_field_using_insv (rtx op0, unsigned HOST_WIDE_INT bitsize,
441 unsigned HOST_WIDE_INT bitnum, rtx value,
442 enum machine_mode op_mode)
443 {
444 struct expand_operand ops[4];
445 rtx value1;
446 rtx xop0 = op0;
447 rtx last = get_last_insn ();
448 bool copy_back = false;
449
450 unsigned int unit = GET_MODE_BITSIZE (op_mode);
451 if (bitsize == 0 || bitsize > unit)
452 return false;
453
454 if (MEM_P (xop0))
455 /* Get a reference to the first byte of the field. */
456 xop0 = narrow_bit_field_mem (xop0, byte_mode, bitsize, bitnum, &bitnum);
457 else
458 {
459 /* Convert from counting within OP0 to counting in OP_MODE. */
460 if (BYTES_BIG_ENDIAN)
461 bitnum += unit - GET_MODE_BITSIZE (GET_MODE (op0));
462
463 /* If xop0 is a register, we need it in OP_MODE
464 to make it acceptable to the format of insv. */
465 if (GET_CODE (xop0) == SUBREG)
466 /* We can't just change the mode, because this might clobber op0,
467 and we will need the original value of op0 if insv fails. */
468 xop0 = gen_rtx_SUBREG (op_mode, SUBREG_REG (xop0), SUBREG_BYTE (xop0));
469 if (REG_P (xop0) && GET_MODE (xop0) != op_mode)
470 xop0 = gen_lowpart_SUBREG (op_mode, xop0);
471 }
472
473 /* If the destination is a paradoxical subreg such that we need a
474 truncate to the inner mode, perform the insertion on a temporary and
475 truncate the result to the original destination. Note that we can't
476 just truncate the paradoxical subreg as (truncate:N (subreg:W (reg:N
477 X) 0)) is (reg:N X). */
478 if (GET_CODE (xop0) == SUBREG
479 && REG_P (SUBREG_REG (xop0))
480 && !TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (SUBREG_REG (xop0)),
481 op_mode))
482 {
483 rtx tem = gen_reg_rtx (op_mode);
484 emit_move_insn (tem, xop0);
485 xop0 = tem;
486 copy_back = true;
487 }
488
489 /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
490 "backwards" from the size of the unit we are inserting into.
491 Otherwise, we count bits from the most significant on a
492 BYTES/BITS_BIG_ENDIAN machine. */
493
494 if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
495 bitnum = unit - bitsize - bitnum;
496
497 /* Convert VALUE to op_mode (which insv insn wants) in VALUE1. */
498 value1 = value;
499 if (GET_MODE (value) != op_mode)
500 {
501 if (GET_MODE_BITSIZE (GET_MODE (value)) >= bitsize)
502 {
503 /* Optimization: Don't bother really extending VALUE
504 if it has all the bits we will actually use. However,
505 if we must narrow it, be sure we do it correctly. */
506
507 if (GET_MODE_SIZE (GET_MODE (value)) < GET_MODE_SIZE (op_mode))
508 {
509 rtx tmp;
510
511 tmp = simplify_subreg (op_mode, value1, GET_MODE (value), 0);
512 if (! tmp)
513 tmp = simplify_gen_subreg (op_mode,
514 force_reg (GET_MODE (value),
515 value1),
516 GET_MODE (value), 0);
517 value1 = tmp;
518 }
519 else
520 value1 = gen_lowpart (op_mode, value1);
521 }
522 else if (CONST_INT_P (value))
523 value1 = gen_int_mode (INTVAL (value), op_mode);
524 else
525 /* Parse phase is supposed to make VALUE's data type
526 match that of the component reference, which is a type
527 at least as wide as the field; so VALUE should have
528 a mode that corresponds to that type. */
529 gcc_assert (CONSTANT_P (value));
530 }
531
532 create_fixed_operand (&ops[0], xop0);
533 create_integer_operand (&ops[1], bitsize);
534 create_integer_operand (&ops[2], bitnum);
535 create_input_operand (&ops[3], value1, op_mode);
536 if (maybe_expand_insn (CODE_FOR_insv, 4, ops))
537 {
538 if (copy_back)
539 convert_move (op0, xop0, true);
540 return true;
541 }
542 delete_insns_since (last);
543 return false;
544 }
545
546 /* A subroutine of store_bit_field, with the same arguments. Return true
547 if the operation could be implemented.
548
549 If FALLBACK_P is true, fall back to store_fixed_bit_field if we have
550 no other way of implementing the operation. If FALLBACK_P is false,
551 return false instead. */
552
553 static bool
554 store_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
555 unsigned HOST_WIDE_INT bitnum,
556 unsigned HOST_WIDE_INT bitregion_start,
557 unsigned HOST_WIDE_INT bitregion_end,
558 enum machine_mode fieldmode,
559 rtx value, bool fallback_p)
560 {
561 rtx op0 = str_rtx;
562 rtx orig_value;
563
564 while (GET_CODE (op0) == SUBREG)
565 {
566 /* The following line once was done only if WORDS_BIG_ENDIAN,
567 but I think that is a mistake. WORDS_BIG_ENDIAN is
568 meaningful at a much higher level; when structures are copied
569 between memory and regs, the higher-numbered regs
570 always get higher addresses. */
571 int inner_mode_size = GET_MODE_SIZE (GET_MODE (SUBREG_REG (op0)));
572 int outer_mode_size = GET_MODE_SIZE (GET_MODE (op0));
573 int byte_offset = 0;
574
575 /* Paradoxical subregs need special handling on big endian machines. */
576 if (SUBREG_BYTE (op0) == 0 && inner_mode_size < outer_mode_size)
577 {
578 int difference = inner_mode_size - outer_mode_size;
579
580 if (WORDS_BIG_ENDIAN)
581 byte_offset += (difference / UNITS_PER_WORD) * UNITS_PER_WORD;
582 if (BYTES_BIG_ENDIAN)
583 byte_offset += difference % UNITS_PER_WORD;
584 }
585 else
586 byte_offset = SUBREG_BYTE (op0);
587
588 bitnum += byte_offset * BITS_PER_UNIT;
589 op0 = SUBREG_REG (op0);
590 }
591
592 /* No action is needed if the target is a register and if the field
593 lies completely outside that register. This can occur if the source
594 code contains an out-of-bounds access to a small array. */
595 if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
596 return true;
597
598 /* Use vec_set patterns for inserting parts of vectors whenever
599 available. */
600 if (VECTOR_MODE_P (GET_MODE (op0))
601 && !MEM_P (op0)
602 && optab_handler (vec_set_optab, GET_MODE (op0)) != CODE_FOR_nothing
603 && fieldmode == GET_MODE_INNER (GET_MODE (op0))
604 && bitsize == GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
605 && !(bitnum % GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
606 {
607 struct expand_operand ops[3];
608 enum machine_mode outermode = GET_MODE (op0);
609 enum machine_mode innermode = GET_MODE_INNER (outermode);
610 enum insn_code icode = optab_handler (vec_set_optab, outermode);
611 int pos = bitnum / GET_MODE_BITSIZE (innermode);
612
613 create_fixed_operand (&ops[0], op0);
614 create_input_operand (&ops[1], value, innermode);
615 create_integer_operand (&ops[2], pos);
616 if (maybe_expand_insn (icode, 3, ops))
617 return true;
618 }
619
620 /* If the target is a register, overwriting the entire object, or storing
621 a full-word or multi-word field can be done with just a SUBREG. */
622 if (!MEM_P (op0)
623 && bitsize == GET_MODE_BITSIZE (fieldmode)
624 && ((bitsize == GET_MODE_BITSIZE (GET_MODE (op0)) && bitnum == 0)
625 || (bitsize % BITS_PER_WORD == 0 && bitnum % BITS_PER_WORD == 0)))
626 {
627 /* Use the subreg machinery either to narrow OP0 to the required
628 words or to cope with mode punning between equal-sized modes. */
629 rtx sub = simplify_gen_subreg (fieldmode, op0, GET_MODE (op0),
630 bitnum / BITS_PER_UNIT);
631 if (sub)
632 {
633 emit_move_insn (sub, value);
634 return true;
635 }
636 }
637
638 /* If the target is memory, storing any naturally aligned field can be
639 done with a simple store. For targets that support fast unaligned
640 memory, any naturally sized, unit aligned field can be done directly. */
641 if (MEM_P (op0)
642 && bitnum % BITS_PER_UNIT == 0
643 && bitsize == GET_MODE_BITSIZE (fieldmode)
644 && (!SLOW_UNALIGNED_ACCESS (fieldmode, MEM_ALIGN (op0))
645 || (bitnum % bitsize == 0
646 && MEM_ALIGN (op0) % bitsize == 0)))
647 {
648 op0 = adjust_bitfield_address (op0, fieldmode, bitnum / BITS_PER_UNIT);
649 emit_move_insn (op0, value);
650 return true;
651 }
652
653 /* Make sure we are playing with integral modes. Pun with subregs
654 if we aren't. This must come after the entire register case above,
655 since that case is valid for any mode. The following cases are only
656 valid for integral modes. */
657 {
658 enum machine_mode imode = int_mode_for_mode (GET_MODE (op0));
659 if (imode != GET_MODE (op0))
660 {
661 if (MEM_P (op0))
662 op0 = adjust_bitfield_address (op0, imode, 0);
663 else
664 {
665 gcc_assert (imode != BLKmode);
666 op0 = gen_lowpart (imode, op0);
667 }
668 }
669 }
670
671 /* Storing an lsb-aligned field in a register
672 can be done with a movstrict instruction. */
673
674 if (!MEM_P (op0)
675 && lowpart_bit_field_p (bitnum, bitsize, GET_MODE (op0))
676 && bitsize == GET_MODE_BITSIZE (fieldmode)
677 && optab_handler (movstrict_optab, fieldmode) != CODE_FOR_nothing)
678 {
679 struct expand_operand ops[2];
680 enum insn_code icode = optab_handler (movstrict_optab, fieldmode);
681 rtx arg0 = op0;
682 unsigned HOST_WIDE_INT subreg_off;
683
684 if (GET_CODE (arg0) == SUBREG)
685 {
686 /* Else we've got some float mode source being extracted into
687 a different float mode destination -- this combination of
688 subregs results in Severe Tire Damage. */
689 gcc_assert (GET_MODE (SUBREG_REG (arg0)) == fieldmode
690 || GET_MODE_CLASS (fieldmode) == MODE_INT
691 || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT);
692 arg0 = SUBREG_REG (arg0);
693 }
694
695 subreg_off = bitnum / BITS_PER_UNIT;
696 if (validate_subreg (fieldmode, GET_MODE (arg0), arg0, subreg_off))
697 {
698 arg0 = gen_rtx_SUBREG (fieldmode, arg0, subreg_off);
699
700 create_fixed_operand (&ops[0], arg0);
701 /* Shrink the source operand to FIELDMODE. */
702 create_convert_operand_to (&ops[1], value, fieldmode, false);
703 if (maybe_expand_insn (icode, 2, ops))
704 return true;
705 }
706 }
707
708 /* Handle fields bigger than a word. */
709
710 if (bitsize > BITS_PER_WORD)
711 {
712 /* Here we transfer the words of the field
713 in the order least significant first.
714 This is because the most significant word is the one which may
715 be less than full.
716 However, only do that if the value is not BLKmode. */
717
718 unsigned int backwards = WORDS_BIG_ENDIAN && fieldmode != BLKmode;
719 unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
720 unsigned int i;
721 rtx last;
722
723 /* This is the mode we must force value to, so that there will be enough
724 subwords to extract. Note that fieldmode will often (always?) be
725 VOIDmode, because that is what store_field uses to indicate that this
726 is a bit field, but passing VOIDmode to operand_subword_force
727 is not allowed. */
728 fieldmode = GET_MODE (value);
729 if (fieldmode == VOIDmode)
730 fieldmode = smallest_mode_for_size (nwords * BITS_PER_WORD, MODE_INT);
731
732 last = get_last_insn ();
733 for (i = 0; i < nwords; i++)
734 {
735 /* If I is 0, use the low-order word in both field and target;
736 if I is 1, use the next to lowest word; and so on. */
737 unsigned int wordnum = (backwards
738 ? GET_MODE_SIZE (fieldmode) / UNITS_PER_WORD
739 - i - 1
740 : i);
741 unsigned int bit_offset = (backwards
742 ? MAX ((int) bitsize - ((int) i + 1)
743 * BITS_PER_WORD,
744 0)
745 : (int) i * BITS_PER_WORD);
746 rtx value_word = operand_subword_force (value, wordnum, fieldmode);
747 unsigned HOST_WIDE_INT new_bitsize =
748 MIN (BITS_PER_WORD, bitsize - i * BITS_PER_WORD);
749
750 /* If the remaining chunk doesn't have full wordsize we have
751 to make sure that for big endian machines the higher order
752 bits are used. */
753 if (new_bitsize < BITS_PER_WORD && BYTES_BIG_ENDIAN && !backwards)
754 value_word = simplify_expand_binop (word_mode, lshr_optab,
755 value_word,
756 GEN_INT (BITS_PER_WORD
757 - new_bitsize),
758 NULL_RTX, true,
759 OPTAB_LIB_WIDEN);
760
761 if (!store_bit_field_1 (op0, new_bitsize,
762 bitnum + bit_offset,
763 bitregion_start, bitregion_end,
764 word_mode,
765 value_word, fallback_p))
766 {
767 delete_insns_since (last);
768 return false;
769 }
770 }
771 return true;
772 }
773
774 /* If VALUE has a floating-point or complex mode, access it as an
775 integer of the corresponding size. This can occur on a machine
776 with 64 bit registers that uses SFmode for float. It can also
777 occur for unaligned float or complex fields. */
778 orig_value = value;
779 if (GET_MODE (value) != VOIDmode
780 && GET_MODE_CLASS (GET_MODE (value)) != MODE_INT
781 && GET_MODE_CLASS (GET_MODE (value)) != MODE_PARTIAL_INT)
782 {
783 value = gen_reg_rtx (int_mode_for_mode (GET_MODE (value)));
784 emit_move_insn (gen_lowpart (GET_MODE (orig_value), value), orig_value);
785 }
786
787 /* If OP0 is a multi-word register, narrow it to the affected word.
788 If the region spans two words, defer to store_split_bit_field. */
789 if (!MEM_P (op0) && GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
790 {
791 op0 = simplify_gen_subreg (word_mode, op0, GET_MODE (op0),
792 bitnum / BITS_PER_WORD * UNITS_PER_WORD);
793 gcc_assert (op0);
794 bitnum %= BITS_PER_WORD;
795 if (bitnum + bitsize > BITS_PER_WORD)
796 {
797 if (!fallback_p)
798 return false;
799
800 store_split_bit_field (op0, bitsize, bitnum, bitregion_start,
801 bitregion_end, value);
802 return true;
803 }
804 }
805
806 /* From here on we can assume that the field to be stored in fits
807 within a word. If the destination is a register, it too fits
808 in a word. */
809
810 enum machine_mode op_mode = mode_for_extraction (EP_insv, 3);
811 if (op_mode != MAX_MACHINE_MODE
812 && !MEM_P (op0)
813 && store_bit_field_using_insv (op0, bitsize, bitnum, value, op_mode))
814 return true;
815
816 /* If OP0 is a memory, try copying it to a register and seeing if a
817 cheap register alternative is available. */
818 if (op_mode != MAX_MACHINE_MODE && MEM_P (op0))
819 {
820 enum machine_mode bestmode;
821 unsigned HOST_WIDE_INT maxbits = MAX_FIXED_MODE_SIZE;
822
823 /* Do not use insv for volatile bitfields when
824 -fstrict-volatile-bitfields is in effect. */
825 if (!(MEM_VOLATILE_P (op0) && flag_strict_volatile_bitfields > 0)
826 /* Do not use insv if the bit region is restricted and
827 an op_mode integer doesn't fit into the restricted region. */
828 && !(bitregion_end
829 && (bitnum - (bitnum % BITS_PER_UNIT)
830 + GET_MODE_BITSIZE (op_mode)
831 > bitregion_end + 1))
832 && store_bit_field_using_insv (op0, bitsize, bitnum, value, op_mode))
833 return true;
834
835 if (bitregion_end)
836 maxbits = bitregion_end - bitregion_start + 1;
837
838 /* Get the mode to use for inserting into this field. If OP0 is
839 BLKmode, get the smallest mode consistent with the alignment. If
840 OP0 is a non-BLKmode object that is no wider than OP_MODE, use its
841 mode. Otherwise, use the smallest mode containing the field. */
842
843 if (GET_MODE (op0) == BLKmode
844 || GET_MODE_BITSIZE (GET_MODE (op0)) > maxbits
845 || GET_MODE_SIZE (GET_MODE (op0)) > GET_MODE_SIZE (op_mode))
846 bestmode = get_best_mode (bitsize, bitnum,
847 bitregion_start, bitregion_end,
848 MEM_ALIGN (op0), op_mode,
849 MEM_VOLATILE_P (op0));
850 else
851 bestmode = GET_MODE (op0);
852
853 if (bestmode != VOIDmode
854 && GET_MODE_SIZE (bestmode) >= GET_MODE_SIZE (fieldmode)
855 && !(SLOW_UNALIGNED_ACCESS (bestmode, MEM_ALIGN (op0))
856 && GET_MODE_BITSIZE (bestmode) > MEM_ALIGN (op0)))
857 {
858 rtx last, tempreg, xop0;
859 unsigned HOST_WIDE_INT bitpos;
860
861 last = get_last_insn ();
862
863 /* Adjust address to point to the containing unit of
864 that mode. Compute the offset as a multiple of this unit,
865 counting in bytes. */
866 xop0 = narrow_bit_field_mem (op0, bestmode, bitsize, bitnum,
867 &bitpos);
868
869 /* Fetch that unit, store the bitfield in it, then store
870 the unit. */
871 tempreg = copy_to_reg (xop0);
872 if (store_bit_field_1 (tempreg, bitsize, bitpos,
873 bitregion_start, bitregion_end,
874 fieldmode, orig_value, false))
875 {
876 emit_move_insn (xop0, tempreg);
877 return true;
878 }
879 delete_insns_since (last);
880 }
881 }
882
883 if (!fallback_p)
884 return false;
885
886 store_fixed_bit_field (op0, bitsize, bitnum, bitregion_start,
887 bitregion_end, value);
888 return true;
889 }
890
891 /* Generate code to store value from rtx VALUE
892 into a bit-field within structure STR_RTX
893 containing BITSIZE bits starting at bit BITNUM.
894
895 BITREGION_START is bitpos of the first bitfield in this region.
896 BITREGION_END is the bitpos of the ending bitfield in this region.
897 These two fields are 0, if the C++ memory model does not apply,
898 or we are not interested in keeping track of bitfield regions.
899
900 FIELDMODE is the machine-mode of the FIELD_DECL node for this field. */
901
902 void
903 store_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
904 unsigned HOST_WIDE_INT bitnum,
905 unsigned HOST_WIDE_INT bitregion_start,
906 unsigned HOST_WIDE_INT bitregion_end,
907 enum machine_mode fieldmode,
908 rtx value)
909 {
910 /* Under the C++0x memory model, we must not touch bits outside the
911 bit region. Adjust the address to start at the beginning of the
912 bit region. */
913 if (MEM_P (str_rtx) && bitregion_start > 0)
914 {
915 enum machine_mode bestmode;
916 enum machine_mode op_mode;
917 unsigned HOST_WIDE_INT offset;
918
919 op_mode = mode_for_extraction (EP_insv, 3);
920 if (op_mode == MAX_MACHINE_MODE)
921 op_mode = VOIDmode;
922
923 gcc_assert ((bitregion_start % BITS_PER_UNIT) == 0);
924
925 offset = bitregion_start / BITS_PER_UNIT;
926 bitnum -= bitregion_start;
927 bitregion_end -= bitregion_start;
928 bitregion_start = 0;
929 bestmode = get_best_mode (bitsize, bitnum,
930 bitregion_start, bitregion_end,
931 MEM_ALIGN (str_rtx),
932 op_mode,
933 MEM_VOLATILE_P (str_rtx));
934 str_rtx = adjust_address (str_rtx, bestmode, offset);
935 }
936
937 if (!store_bit_field_1 (str_rtx, bitsize, bitnum,
938 bitregion_start, bitregion_end,
939 fieldmode, value, true))
940 gcc_unreachable ();
941 }
942 \f
943 /* Use shifts and boolean operations to store VALUE into a bit field of
944 width BITSIZE in OP0, starting at bit BITNUM. */
945
946 static void
947 store_fixed_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
948 unsigned HOST_WIDE_INT bitnum,
949 unsigned HOST_WIDE_INT bitregion_start,
950 unsigned HOST_WIDE_INT bitregion_end,
951 rtx value)
952 {
953 enum machine_mode mode;
954 rtx temp;
955 int all_zero = 0;
956 int all_one = 0;
957
958 /* There is a case not handled here:
959 a structure with a known alignment of just a halfword
960 and a field split across two aligned halfwords within the structure.
961 Or likewise a structure with a known alignment of just a byte
962 and a field split across two bytes.
963 Such cases are not supposed to be able to occur. */
964
965 if (MEM_P (op0))
966 {
967 unsigned HOST_WIDE_INT maxbits = MAX_FIXED_MODE_SIZE;
968
969 if (bitregion_end)
970 maxbits = bitregion_end - bitregion_start + 1;
971
972 /* Get the proper mode to use for this field. We want a mode that
973 includes the entire field. If such a mode would be larger than
974 a word, we won't be doing the extraction the normal way.
975 We don't want a mode bigger than the destination. */
976
977 mode = GET_MODE (op0);
978 if (GET_MODE_BITSIZE (mode) == 0
979 || GET_MODE_BITSIZE (mode) > GET_MODE_BITSIZE (word_mode))
980 mode = word_mode;
981
982 if (MEM_VOLATILE_P (op0)
983 && GET_MODE_BITSIZE (GET_MODE (op0)) > 0
984 && GET_MODE_BITSIZE (GET_MODE (op0)) <= maxbits
985 && flag_strict_volatile_bitfields > 0)
986 mode = GET_MODE (op0);
987 else
988 mode = get_best_mode (bitsize, bitnum, bitregion_start, bitregion_end,
989 MEM_ALIGN (op0), mode, MEM_VOLATILE_P (op0));
990
991 if (mode == VOIDmode)
992 {
993 /* The only way this should occur is if the field spans word
994 boundaries. */
995 store_split_bit_field (op0, bitsize, bitnum, bitregion_start,
996 bitregion_end, value);
997 return;
998 }
999
1000 op0 = narrow_bit_field_mem (op0, mode, bitsize, bitnum, &bitnum);
1001 }
1002
1003 mode = GET_MODE (op0);
1004 gcc_assert (SCALAR_INT_MODE_P (mode));
1005
1006 /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode)
1007 for invalid input, such as f5 from gcc.dg/pr48335-2.c. */
1008
1009 if (BYTES_BIG_ENDIAN)
1010 /* BITNUM is the distance between our msb
1011 and that of the containing datum.
1012 Convert it to the distance from the lsb. */
1013 bitnum = GET_MODE_BITSIZE (mode) - bitsize - bitnum;
1014
1015 /* Now BITNUM is always the distance between our lsb
1016 and that of OP0. */
1017
1018 /* Shift VALUE left by BITNUM bits. If VALUE is not constant,
1019 we must first convert its mode to MODE. */
1020
1021 if (CONST_INT_P (value))
1022 {
1023 HOST_WIDE_INT v = INTVAL (value);
1024
1025 if (bitsize < HOST_BITS_PER_WIDE_INT)
1026 v &= ((HOST_WIDE_INT) 1 << bitsize) - 1;
1027
1028 if (v == 0)
1029 all_zero = 1;
1030 else if ((bitsize < HOST_BITS_PER_WIDE_INT
1031 && v == ((HOST_WIDE_INT) 1 << bitsize) - 1)
1032 || (bitsize == HOST_BITS_PER_WIDE_INT && v == -1))
1033 all_one = 1;
1034
1035 value = lshift_value (mode, value, bitnum, bitsize);
1036 }
1037 else
1038 {
1039 int must_and = (GET_MODE_BITSIZE (GET_MODE (value)) != bitsize
1040 && bitnum + bitsize != GET_MODE_BITSIZE (mode));
1041
1042 if (GET_MODE (value) != mode)
1043 value = convert_to_mode (mode, value, 1);
1044
1045 if (must_and)
1046 value = expand_binop (mode, and_optab, value,
1047 mask_rtx (mode, 0, bitsize, 0),
1048 NULL_RTX, 1, OPTAB_LIB_WIDEN);
1049 if (bitnum > 0)
1050 value = expand_shift (LSHIFT_EXPR, mode, value,
1051 bitnum, NULL_RTX, 1);
1052 }
1053
1054 /* Now clear the chosen bits in OP0,
1055 except that if VALUE is -1 we need not bother. */
1056 /* We keep the intermediates in registers to allow CSE to combine
1057 consecutive bitfield assignments. */
1058
1059 temp = force_reg (mode, op0);
1060
1061 if (! all_one)
1062 {
1063 temp = expand_binop (mode, and_optab, temp,
1064 mask_rtx (mode, bitnum, bitsize, 1),
1065 NULL_RTX, 1, OPTAB_LIB_WIDEN);
1066 temp = force_reg (mode, temp);
1067 }
1068
1069 /* Now logical-or VALUE into OP0, unless it is zero. */
1070
1071 if (! all_zero)
1072 {
1073 temp = expand_binop (mode, ior_optab, temp, value,
1074 NULL_RTX, 1, OPTAB_LIB_WIDEN);
1075 temp = force_reg (mode, temp);
1076 }
1077
1078 if (op0 != temp)
1079 {
1080 op0 = copy_rtx (op0);
1081 emit_move_insn (op0, temp);
1082 }
1083 }
1084 \f
1085 /* Store a bit field that is split across multiple accessible memory objects.
1086
1087 OP0 is the REG, SUBREG or MEM rtx for the first of the objects.
1088 BITSIZE is the field width; BITPOS the position of its first bit
1089 (within the word).
1090 VALUE is the value to store.
1091
1092 This does not yet handle fields wider than BITS_PER_WORD. */
1093
1094 static void
1095 store_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1096 unsigned HOST_WIDE_INT bitpos,
1097 unsigned HOST_WIDE_INT bitregion_start,
1098 unsigned HOST_WIDE_INT bitregion_end,
1099 rtx value)
1100 {
1101 unsigned int unit;
1102 unsigned int bitsdone = 0;
1103
1104 /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1105 much at a time. */
1106 if (REG_P (op0) || GET_CODE (op0) == SUBREG)
1107 unit = BITS_PER_WORD;
1108 else
1109 unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1110
1111 /* If VALUE is a constant other than a CONST_INT, get it into a register in
1112 WORD_MODE. If we can do this using gen_lowpart_common, do so. Note
1113 that VALUE might be a floating-point constant. */
1114 if (CONSTANT_P (value) && !CONST_INT_P (value))
1115 {
1116 rtx word = gen_lowpart_common (word_mode, value);
1117
1118 if (word && (value != word))
1119 value = word;
1120 else
1121 value = gen_lowpart_common (word_mode,
1122 force_reg (GET_MODE (value) != VOIDmode
1123 ? GET_MODE (value)
1124 : word_mode, value));
1125 }
1126
1127 while (bitsdone < bitsize)
1128 {
1129 unsigned HOST_WIDE_INT thissize;
1130 rtx part, word;
1131 unsigned HOST_WIDE_INT thispos;
1132 unsigned HOST_WIDE_INT offset;
1133
1134 offset = (bitpos + bitsdone) / unit;
1135 thispos = (bitpos + bitsdone) % unit;
1136
1137 /* When region of bytes we can touch is restricted, decrease
1138 UNIT close to the end of the region as needed. */
1139 if (bitregion_end
1140 && unit > BITS_PER_UNIT
1141 && bitpos + bitsdone - thispos + unit > bitregion_end + 1)
1142 {
1143 unit = unit / 2;
1144 continue;
1145 }
1146
1147 /* THISSIZE must not overrun a word boundary. Otherwise,
1148 store_fixed_bit_field will call us again, and we will mutually
1149 recurse forever. */
1150 thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1151 thissize = MIN (thissize, unit - thispos);
1152
1153 if (BYTES_BIG_ENDIAN)
1154 {
1155 /* Fetch successively less significant portions. */
1156 if (CONST_INT_P (value))
1157 part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1158 >> (bitsize - bitsdone - thissize))
1159 & (((HOST_WIDE_INT) 1 << thissize) - 1));
1160 else
1161 {
1162 int total_bits = GET_MODE_BITSIZE (GET_MODE (value));
1163 /* The args are chosen so that the last part includes the
1164 lsb. Give extract_bit_field the value it needs (with
1165 endianness compensation) to fetch the piece we want. */
1166 part = extract_fixed_bit_field (word_mode, value, thissize,
1167 total_bits - bitsize + bitsdone,
1168 NULL_RTX, 1, false);
1169 }
1170 }
1171 else
1172 {
1173 /* Fetch successively more significant portions. */
1174 if (CONST_INT_P (value))
1175 part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1176 >> bitsdone)
1177 & (((HOST_WIDE_INT) 1 << thissize) - 1));
1178 else
1179 part = extract_fixed_bit_field (word_mode, value, thissize,
1180 bitsdone, NULL_RTX, 1, false);
1181 }
1182
1183 /* If OP0 is a register, then handle OFFSET here.
1184
1185 When handling multiword bitfields, extract_bit_field may pass
1186 down a word_mode SUBREG of a larger REG for a bitfield that actually
1187 crosses a word boundary. Thus, for a SUBREG, we must find
1188 the current word starting from the base register. */
1189 if (GET_CODE (op0) == SUBREG)
1190 {
1191 int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset;
1192 enum machine_mode sub_mode = GET_MODE (SUBREG_REG (op0));
1193 if (sub_mode != BLKmode && GET_MODE_SIZE (sub_mode) < UNITS_PER_WORD)
1194 word = word_offset ? const0_rtx : op0;
1195 else
1196 word = operand_subword_force (SUBREG_REG (op0), word_offset,
1197 GET_MODE (SUBREG_REG (op0)));
1198 offset = 0;
1199 }
1200 else if (REG_P (op0))
1201 {
1202 enum machine_mode op0_mode = GET_MODE (op0);
1203 if (op0_mode != BLKmode && GET_MODE_SIZE (op0_mode) < UNITS_PER_WORD)
1204 word = offset ? const0_rtx : op0;
1205 else
1206 word = operand_subword_force (op0, offset, GET_MODE (op0));
1207 offset = 0;
1208 }
1209 else
1210 word = op0;
1211
1212 /* OFFSET is in UNITs, and UNIT is in bits. If WORD is const0_rtx,
1213 it is just an out-of-bounds access. Ignore it. */
1214 if (word != const0_rtx)
1215 store_fixed_bit_field (word, thissize, offset * unit + thispos,
1216 bitregion_start, bitregion_end, part);
1217 bitsdone += thissize;
1218 }
1219 }
1220 \f
1221 /* A subroutine of extract_bit_field_1 that converts return value X
1222 to either MODE or TMODE. MODE, TMODE and UNSIGNEDP are arguments
1223 to extract_bit_field. */
1224
1225 static rtx
1226 convert_extracted_bit_field (rtx x, enum machine_mode mode,
1227 enum machine_mode tmode, bool unsignedp)
1228 {
1229 if (GET_MODE (x) == tmode || GET_MODE (x) == mode)
1230 return x;
1231
1232 /* If the x mode is not a scalar integral, first convert to the
1233 integer mode of that size and then access it as a floating-point
1234 value via a SUBREG. */
1235 if (!SCALAR_INT_MODE_P (tmode))
1236 {
1237 enum machine_mode smode;
1238
1239 smode = mode_for_size (GET_MODE_BITSIZE (tmode), MODE_INT, 0);
1240 x = convert_to_mode (smode, x, unsignedp);
1241 x = force_reg (smode, x);
1242 return gen_lowpart (tmode, x);
1243 }
1244
1245 return convert_to_mode (tmode, x, unsignedp);
1246 }
1247
1248 /* Try to use an ext(z)v pattern to extract a field from OP0.
1249 Return the extracted value on success, otherwise return null.
1250 EXT_MODE is the mode of the extraction and the other arguments
1251 are as for extract_bit_field. */
1252
1253 static rtx
1254 extract_bit_field_using_extv (rtx op0, unsigned HOST_WIDE_INT bitsize,
1255 unsigned HOST_WIDE_INT bitnum,
1256 int unsignedp, rtx target,
1257 enum machine_mode mode, enum machine_mode tmode,
1258 enum machine_mode ext_mode)
1259 {
1260 struct expand_operand ops[4];
1261 rtx spec_target = target;
1262 rtx spec_target_subreg = 0;
1263 unsigned unit = GET_MODE_BITSIZE (ext_mode);
1264
1265 if (bitsize == 0 || unit < bitsize)
1266 return NULL_RTX;
1267
1268 if (MEM_P (op0))
1269 /* Get a reference to the first byte of the field. */
1270 op0 = narrow_bit_field_mem (op0, byte_mode, bitsize, bitnum, &bitnum);
1271 else
1272 {
1273 /* Convert from counting within OP0 to counting in EXT_MODE. */
1274 if (BYTES_BIG_ENDIAN)
1275 bitnum += unit - GET_MODE_BITSIZE (GET_MODE (op0));
1276
1277 /* If op0 is a register, we need it in EXT_MODE to make it
1278 acceptable to the format of ext(z)v. */
1279 if (GET_CODE (op0) == SUBREG && GET_MODE (op0) != ext_mode)
1280 return NULL_RTX;
1281 if (REG_P (op0) && GET_MODE (op0) != ext_mode)
1282 op0 = gen_lowpart_SUBREG (ext_mode, op0);
1283 }
1284
1285 /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
1286 "backwards" from the size of the unit we are extracting from.
1287 Otherwise, we count bits from the most significant on a
1288 BYTES/BITS_BIG_ENDIAN machine. */
1289
1290 if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1291 bitnum = unit - bitsize - bitnum;
1292
1293 if (target == 0)
1294 target = spec_target = gen_reg_rtx (tmode);
1295
1296 if (GET_MODE (target) != ext_mode)
1297 {
1298 /* Don't use LHS paradoxical subreg if explicit truncation is needed
1299 between the mode of the extraction (word_mode) and the target
1300 mode. Instead, create a temporary and use convert_move to set
1301 the target. */
1302 if (REG_P (target)
1303 && TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (target), ext_mode))
1304 {
1305 target = gen_lowpart (ext_mode, target);
1306 if (GET_MODE_PRECISION (ext_mode)
1307 > GET_MODE_PRECISION (GET_MODE (spec_target)))
1308 spec_target_subreg = target;
1309 }
1310 else
1311 target = gen_reg_rtx (ext_mode);
1312 }
1313
1314 create_output_operand (&ops[0], target, ext_mode);
1315 create_fixed_operand (&ops[1], op0);
1316 create_integer_operand (&ops[2], bitsize);
1317 create_integer_operand (&ops[3], bitnum);
1318 if (maybe_expand_insn (unsignedp ? CODE_FOR_extzv : CODE_FOR_extv, 4, ops))
1319 {
1320 target = ops[0].value;
1321 if (target == spec_target)
1322 return target;
1323 if (target == spec_target_subreg)
1324 return spec_target;
1325 return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1326 }
1327 return NULL_RTX;
1328 }
1329
1330 /* A subroutine of extract_bit_field, with the same arguments.
1331 If FALLBACK_P is true, fall back to extract_fixed_bit_field
1332 if we can find no other means of implementing the operation.
1333 if FALLBACK_P is false, return NULL instead. */
1334
1335 static rtx
1336 extract_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1337 unsigned HOST_WIDE_INT bitnum,
1338 int unsignedp, bool packedp, rtx target,
1339 enum machine_mode mode, enum machine_mode tmode,
1340 bool fallback_p)
1341 {
1342 rtx op0 = str_rtx;
1343 enum machine_mode int_mode;
1344 enum machine_mode ext_mode;
1345 enum machine_mode mode1;
1346
1347 if (tmode == VOIDmode)
1348 tmode = mode;
1349
1350 while (GET_CODE (op0) == SUBREG)
1351 {
1352 bitnum += SUBREG_BYTE (op0) * BITS_PER_UNIT;
1353 op0 = SUBREG_REG (op0);
1354 }
1355
1356 /* If we have an out-of-bounds access to a register, just return an
1357 uninitialized register of the required mode. This can occur if the
1358 source code contains an out-of-bounds access to a small array. */
1359 if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
1360 return gen_reg_rtx (tmode);
1361
1362 if (REG_P (op0)
1363 && mode == GET_MODE (op0)
1364 && bitnum == 0
1365 && bitsize == GET_MODE_BITSIZE (GET_MODE (op0)))
1366 {
1367 /* We're trying to extract a full register from itself. */
1368 return op0;
1369 }
1370
1371 /* See if we can get a better vector mode before extracting. */
1372 if (VECTOR_MODE_P (GET_MODE (op0))
1373 && !MEM_P (op0)
1374 && GET_MODE_INNER (GET_MODE (op0)) != tmode)
1375 {
1376 enum machine_mode new_mode;
1377
1378 if (GET_MODE_CLASS (tmode) == MODE_FLOAT)
1379 new_mode = MIN_MODE_VECTOR_FLOAT;
1380 else if (GET_MODE_CLASS (tmode) == MODE_FRACT)
1381 new_mode = MIN_MODE_VECTOR_FRACT;
1382 else if (GET_MODE_CLASS (tmode) == MODE_UFRACT)
1383 new_mode = MIN_MODE_VECTOR_UFRACT;
1384 else if (GET_MODE_CLASS (tmode) == MODE_ACCUM)
1385 new_mode = MIN_MODE_VECTOR_ACCUM;
1386 else if (GET_MODE_CLASS (tmode) == MODE_UACCUM)
1387 new_mode = MIN_MODE_VECTOR_UACCUM;
1388 else
1389 new_mode = MIN_MODE_VECTOR_INT;
1390
1391 for (; new_mode != VOIDmode ; new_mode = GET_MODE_WIDER_MODE (new_mode))
1392 if (GET_MODE_SIZE (new_mode) == GET_MODE_SIZE (GET_MODE (op0))
1393 && targetm.vector_mode_supported_p (new_mode))
1394 break;
1395 if (new_mode != VOIDmode)
1396 op0 = gen_lowpart (new_mode, op0);
1397 }
1398
1399 /* Use vec_extract patterns for extracting parts of vectors whenever
1400 available. */
1401 if (VECTOR_MODE_P (GET_MODE (op0))
1402 && !MEM_P (op0)
1403 && optab_handler (vec_extract_optab, GET_MODE (op0)) != CODE_FOR_nothing
1404 && ((bitnum + bitsize - 1) / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
1405 == bitnum / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
1406 {
1407 struct expand_operand ops[3];
1408 enum machine_mode outermode = GET_MODE (op0);
1409 enum machine_mode innermode = GET_MODE_INNER (outermode);
1410 enum insn_code icode = optab_handler (vec_extract_optab, outermode);
1411 unsigned HOST_WIDE_INT pos = bitnum / GET_MODE_BITSIZE (innermode);
1412
1413 create_output_operand (&ops[0], target, innermode);
1414 create_input_operand (&ops[1], op0, outermode);
1415 create_integer_operand (&ops[2], pos);
1416 if (maybe_expand_insn (icode, 3, ops))
1417 {
1418 target = ops[0].value;
1419 if (GET_MODE (target) != mode)
1420 return gen_lowpart (tmode, target);
1421 return target;
1422 }
1423 }
1424
1425 /* Make sure we are playing with integral modes. Pun with subregs
1426 if we aren't. */
1427 {
1428 enum machine_mode imode = int_mode_for_mode (GET_MODE (op0));
1429 if (imode != GET_MODE (op0))
1430 {
1431 if (MEM_P (op0))
1432 op0 = adjust_bitfield_address (op0, imode, 0);
1433 else if (imode != BLKmode)
1434 {
1435 op0 = gen_lowpart (imode, op0);
1436
1437 /* If we got a SUBREG, force it into a register since we
1438 aren't going to be able to do another SUBREG on it. */
1439 if (GET_CODE (op0) == SUBREG)
1440 op0 = force_reg (imode, op0);
1441 }
1442 else if (REG_P (op0))
1443 {
1444 rtx reg, subreg;
1445 imode = smallest_mode_for_size (GET_MODE_BITSIZE (GET_MODE (op0)),
1446 MODE_INT);
1447 reg = gen_reg_rtx (imode);
1448 subreg = gen_lowpart_SUBREG (GET_MODE (op0), reg);
1449 emit_move_insn (subreg, op0);
1450 op0 = reg;
1451 bitnum += SUBREG_BYTE (subreg) * BITS_PER_UNIT;
1452 }
1453 else
1454 {
1455 rtx mem = assign_stack_temp (GET_MODE (op0),
1456 GET_MODE_SIZE (GET_MODE (op0)));
1457 emit_move_insn (mem, op0);
1458 op0 = adjust_bitfield_address (mem, BLKmode, 0);
1459 }
1460 }
1461 }
1462
1463 /* ??? We currently assume TARGET is at least as big as BITSIZE.
1464 If that's wrong, the solution is to test for it and set TARGET to 0
1465 if needed. */
1466
1467 /* If the bitfield is volatile, we need to make sure the access
1468 remains on a type-aligned boundary. */
1469 if (GET_CODE (op0) == MEM
1470 && MEM_VOLATILE_P (op0)
1471 && GET_MODE_BITSIZE (GET_MODE (op0)) > 0
1472 && flag_strict_volatile_bitfields > 0)
1473 goto no_subreg_mode_swap;
1474
1475 /* Only scalar integer modes can be converted via subregs. There is an
1476 additional problem for FP modes here in that they can have a precision
1477 which is different from the size. mode_for_size uses precision, but
1478 we want a mode based on the size, so we must avoid calling it for FP
1479 modes. */
1480 mode1 = mode;
1481 if (SCALAR_INT_MODE_P (tmode))
1482 {
1483 enum machine_mode try_mode = mode_for_size (bitsize,
1484 GET_MODE_CLASS (tmode), 0);
1485 if (try_mode != BLKmode)
1486 mode1 = try_mode;
1487 }
1488 gcc_assert (mode1 != BLKmode);
1489
1490 /* Extraction of a full MODE1 value can be done with a subreg as long
1491 as the least significant bit of the value is the least significant
1492 bit of either OP0 or a word of OP0. */
1493 if (!MEM_P (op0)
1494 && lowpart_bit_field_p (bitnum, bitsize, GET_MODE (op0))
1495 && bitsize == GET_MODE_BITSIZE (mode1)
1496 && TRULY_NOOP_TRUNCATION_MODES_P (mode1, GET_MODE (op0)))
1497 {
1498 rtx sub = simplify_gen_subreg (mode1, op0, GET_MODE (op0),
1499 bitnum / BITS_PER_UNIT);
1500 if (sub)
1501 return convert_extracted_bit_field (sub, mode, tmode, unsignedp);
1502 }
1503
1504 /* Extraction of a full MODE1 value can be done with a load as long as
1505 the field is on a byte boundary and is sufficiently aligned. */
1506 if (MEM_P (op0)
1507 && bitnum % BITS_PER_UNIT == 0
1508 && bitsize == GET_MODE_BITSIZE (mode1)
1509 && (!SLOW_UNALIGNED_ACCESS (mode1, MEM_ALIGN (op0))
1510 || (bitnum % bitsize == 0
1511 && MEM_ALIGN (op0) % bitsize == 0)))
1512 {
1513 op0 = adjust_bitfield_address (op0, mode1, bitnum / BITS_PER_UNIT);
1514 return convert_extracted_bit_field (op0, mode, tmode, unsignedp);
1515 }
1516
1517 no_subreg_mode_swap:
1518
1519 /* Handle fields bigger than a word. */
1520
1521 if (bitsize > BITS_PER_WORD)
1522 {
1523 /* Here we transfer the words of the field
1524 in the order least significant first.
1525 This is because the most significant word is the one which may
1526 be less than full. */
1527
1528 unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
1529 unsigned int i;
1530 rtx last;
1531
1532 if (target == 0 || !REG_P (target) || !valid_multiword_target_p (target))
1533 target = gen_reg_rtx (mode);
1534
1535 /* Indicate for flow that the entire target reg is being set. */
1536 emit_clobber (target);
1537
1538 last = get_last_insn ();
1539 for (i = 0; i < nwords; i++)
1540 {
1541 /* If I is 0, use the low-order word in both field and target;
1542 if I is 1, use the next to lowest word; and so on. */
1543 /* Word number in TARGET to use. */
1544 unsigned int wordnum
1545 = (WORDS_BIG_ENDIAN
1546 ? GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD - i - 1
1547 : i);
1548 /* Offset from start of field in OP0. */
1549 unsigned int bit_offset = (WORDS_BIG_ENDIAN
1550 ? MAX (0, ((int) bitsize - ((int) i + 1)
1551 * (int) BITS_PER_WORD))
1552 : (int) i * BITS_PER_WORD);
1553 rtx target_part = operand_subword (target, wordnum, 1, VOIDmode);
1554 rtx result_part
1555 = extract_bit_field_1 (op0, MIN (BITS_PER_WORD,
1556 bitsize - i * BITS_PER_WORD),
1557 bitnum + bit_offset, 1, false, target_part,
1558 mode, word_mode, fallback_p);
1559
1560 gcc_assert (target_part);
1561 if (!result_part)
1562 {
1563 delete_insns_since (last);
1564 return NULL;
1565 }
1566
1567 if (result_part != target_part)
1568 emit_move_insn (target_part, result_part);
1569 }
1570
1571 if (unsignedp)
1572 {
1573 /* Unless we've filled TARGET, the upper regs in a multi-reg value
1574 need to be zero'd out. */
1575 if (GET_MODE_SIZE (GET_MODE (target)) > nwords * UNITS_PER_WORD)
1576 {
1577 unsigned int i, total_words;
1578
1579 total_words = GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD;
1580 for (i = nwords; i < total_words; i++)
1581 emit_move_insn
1582 (operand_subword (target,
1583 WORDS_BIG_ENDIAN ? total_words - i - 1 : i,
1584 1, VOIDmode),
1585 const0_rtx);
1586 }
1587 return target;
1588 }
1589
1590 /* Signed bit field: sign-extend with two arithmetic shifts. */
1591 target = expand_shift (LSHIFT_EXPR, mode, target,
1592 GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1593 return expand_shift (RSHIFT_EXPR, mode, target,
1594 GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1595 }
1596
1597 /* If OP0 is a multi-word register, narrow it to the affected word.
1598 If the region spans two words, defer to extract_split_bit_field. */
1599 if (!MEM_P (op0) && GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
1600 {
1601 op0 = simplify_gen_subreg (word_mode, op0, GET_MODE (op0),
1602 bitnum / BITS_PER_WORD * UNITS_PER_WORD);
1603 bitnum %= BITS_PER_WORD;
1604 if (bitnum + bitsize > BITS_PER_WORD)
1605 {
1606 if (!fallback_p)
1607 return NULL_RTX;
1608 target = extract_split_bit_field (op0, bitsize, bitnum, unsignedp);
1609 return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1610 }
1611 }
1612
1613 /* From here on we know the desired field is smaller than a word.
1614 If OP0 is a register, it too fits within a word. */
1615
1616 ext_mode = mode_for_extraction (unsignedp ? EP_extzv : EP_extv, 0);
1617 if (ext_mode != MAX_MACHINE_MODE && !MEM_P (op0))
1618 {
1619 rtx result = extract_bit_field_using_extv (op0, bitsize, bitnum,
1620 unsignedp, target, mode,
1621 tmode, ext_mode);
1622 if (result)
1623 return result;
1624 }
1625
1626 /* If OP0 is a memory, try copying it to a register and seeing if a
1627 cheap register alternative is available. */
1628 if (ext_mode != MAX_MACHINE_MODE && MEM_P (op0))
1629 {
1630 enum machine_mode bestmode;
1631
1632 /* Do not use extv/extzv for volatile bitfields when
1633 -fstrict-volatile-bitfields is in effect. */
1634 if (!(MEM_VOLATILE_P (op0) && flag_strict_volatile_bitfields > 0))
1635 {
1636 rtx result = extract_bit_field_using_extv (op0, bitsize, bitnum,
1637 unsignedp, target, mode,
1638 tmode, ext_mode);
1639 if (result)
1640 return result;
1641 }
1642
1643 /* Get the mode to use for inserting into this field. If
1644 OP0 is BLKmode, get the smallest mode consistent with the
1645 alignment. If OP0 is a non-BLKmode object that is no
1646 wider than EXT_MODE, use its mode. Otherwise, use the
1647 smallest mode containing the field. */
1648
1649 if (GET_MODE (op0) == BLKmode
1650 || GET_MODE_SIZE (GET_MODE (op0)) > GET_MODE_SIZE (ext_mode))
1651 bestmode = get_best_mode (bitsize, bitnum, 0, 0, MEM_ALIGN (op0),
1652 ext_mode, MEM_VOLATILE_P (op0));
1653 else
1654 bestmode = GET_MODE (op0);
1655
1656 if (bestmode != VOIDmode
1657 && !(SLOW_UNALIGNED_ACCESS (bestmode, MEM_ALIGN (op0))
1658 && GET_MODE_BITSIZE (bestmode) > MEM_ALIGN (op0)))
1659 {
1660 unsigned HOST_WIDE_INT bitpos;
1661 rtx xop0 = narrow_bit_field_mem (op0, bestmode, bitsize, bitnum,
1662 &bitpos);
1663
1664 /* Make sure the register is big enough for the whole field.
1665 (It might not be if bestmode == GET_MODE (op0) and the input
1666 code was invalid.) */
1667 if (bitpos + bitsize <= GET_MODE_BITSIZE (bestmode))
1668 {
1669 rtx last, result;
1670
1671 last = get_last_insn ();
1672
1673 /* Fetch it to a register in that size. */
1674 xop0 = force_reg (bestmode, xop0);
1675 result = extract_bit_field_1 (xop0, bitsize, bitpos,
1676 unsignedp, packedp, target,
1677 mode, tmode, false);
1678 if (result)
1679 return result;
1680
1681 delete_insns_since (last);
1682 }
1683 }
1684 }
1685
1686 if (!fallback_p)
1687 return NULL;
1688
1689 /* Find a correspondingly-sized integer field, so we can apply
1690 shifts and masks to it. */
1691 int_mode = int_mode_for_mode (tmode);
1692 if (int_mode == BLKmode)
1693 int_mode = int_mode_for_mode (mode);
1694 /* Should probably push op0 out to memory and then do a load. */
1695 gcc_assert (int_mode != BLKmode);
1696
1697 target = extract_fixed_bit_field (int_mode, op0, bitsize, bitnum,
1698 target, unsignedp, packedp);
1699 return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1700 }
1701
1702 /* Generate code to extract a byte-field from STR_RTX
1703 containing BITSIZE bits, starting at BITNUM,
1704 and put it in TARGET if possible (if TARGET is nonzero).
1705 Regardless of TARGET, we return the rtx for where the value is placed.
1706
1707 STR_RTX is the structure containing the byte (a REG or MEM).
1708 UNSIGNEDP is nonzero if this is an unsigned bit field.
1709 PACKEDP is nonzero if the field has the packed attribute.
1710 MODE is the natural mode of the field value once extracted.
1711 TMODE is the mode the caller would like the value to have;
1712 but the value may be returned with type MODE instead.
1713
1714 If a TARGET is specified and we can store in it at no extra cost,
1715 we do so, and return TARGET.
1716 Otherwise, we return a REG of mode TMODE or MODE, with TMODE preferred
1717 if they are equally easy. */
1718
1719 rtx
1720 extract_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1721 unsigned HOST_WIDE_INT bitnum, int unsignedp, bool packedp,
1722 rtx target, enum machine_mode mode, enum machine_mode tmode)
1723 {
1724 return extract_bit_field_1 (str_rtx, bitsize, bitnum, unsignedp, packedp,
1725 target, mode, tmode, true);
1726 }
1727 \f
1728 /* Use shifts and boolean operations to extract a field of BITSIZE bits
1729 from bit BITNUM of OP0.
1730
1731 UNSIGNEDP is nonzero for an unsigned bit field (don't sign-extend value).
1732 PACKEDP is true if the field has the packed attribute.
1733
1734 If TARGET is nonzero, attempts to store the value there
1735 and return TARGET, but this is not guaranteed.
1736 If TARGET is not used, create a pseudo-reg of mode TMODE for the value. */
1737
1738 static rtx
1739 extract_fixed_bit_field (enum machine_mode tmode, rtx op0,
1740 unsigned HOST_WIDE_INT bitsize,
1741 unsigned HOST_WIDE_INT bitnum, rtx target,
1742 int unsignedp, bool packedp)
1743 {
1744 enum machine_mode mode;
1745
1746 if (MEM_P (op0))
1747 {
1748 /* Get the proper mode to use for this field. We want a mode that
1749 includes the entire field. If such a mode would be larger than
1750 a word, we won't be doing the extraction the normal way. */
1751
1752 if (MEM_VOLATILE_P (op0)
1753 && flag_strict_volatile_bitfields > 0)
1754 {
1755 if (GET_MODE_BITSIZE (GET_MODE (op0)) > 0)
1756 mode = GET_MODE (op0);
1757 else if (target && GET_MODE_BITSIZE (GET_MODE (target)) > 0)
1758 mode = GET_MODE (target);
1759 else
1760 mode = tmode;
1761 }
1762 else
1763 mode = get_best_mode (bitsize, bitnum, 0, 0,
1764 MEM_ALIGN (op0), word_mode, MEM_VOLATILE_P (op0));
1765
1766 if (mode == VOIDmode)
1767 /* The only way this should occur is if the field spans word
1768 boundaries. */
1769 return extract_split_bit_field (op0, bitsize, bitnum, unsignedp);
1770
1771 unsigned int total_bits = GET_MODE_BITSIZE (mode);
1772 HOST_WIDE_INT bit_offset = bitnum - bitnum % total_bits;
1773
1774 /* If we're accessing a volatile MEM, we can't apply BIT_OFFSET
1775 if it results in a multi-word access where we otherwise wouldn't
1776 have one. So, check for that case here. */
1777 if (MEM_P (op0)
1778 && MEM_VOLATILE_P (op0)
1779 && flag_strict_volatile_bitfields > 0
1780 && bitnum % BITS_PER_UNIT + bitsize <= total_bits
1781 && bitnum % GET_MODE_BITSIZE (mode) + bitsize > total_bits)
1782 {
1783 if (STRICT_ALIGNMENT)
1784 {
1785 static bool informed_about_misalignment = false;
1786
1787 if (packedp)
1788 {
1789 if (bitsize == total_bits)
1790 warning_at (input_location, OPT_fstrict_volatile_bitfields,
1791 "multiple accesses to volatile structure"
1792 " member because of packed attribute");
1793 else
1794 warning_at (input_location, OPT_fstrict_volatile_bitfields,
1795 "multiple accesses to volatile structure"
1796 " bitfield because of packed attribute");
1797
1798 return extract_split_bit_field (op0, bitsize, bitnum,
1799 unsignedp);
1800 }
1801
1802 if (bitsize == total_bits)
1803 warning_at (input_location, OPT_fstrict_volatile_bitfields,
1804 "mis-aligned access used for structure member");
1805 else
1806 warning_at (input_location, OPT_fstrict_volatile_bitfields,
1807 "mis-aligned access used for structure bitfield");
1808
1809 if (! informed_about_misalignment)
1810 {
1811 informed_about_misalignment = true;
1812 inform (input_location,
1813 "when a volatile object spans multiple type-sized"
1814 " locations, the compiler must choose between using"
1815 " a single mis-aligned access to preserve the"
1816 " volatility, or using multiple aligned accesses"
1817 " to avoid runtime faults; this code may fail at"
1818 " runtime if the hardware does not allow this"
1819 " access");
1820 }
1821 }
1822 bit_offset = bitnum - bitnum % BITS_PER_UNIT;
1823 }
1824 op0 = adjust_bitfield_address (op0, mode, bit_offset / BITS_PER_UNIT);
1825 bitnum -= bit_offset;
1826 }
1827
1828 mode = GET_MODE (op0);
1829 gcc_assert (SCALAR_INT_MODE_P (mode));
1830
1831 /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode)
1832 for invalid input, such as extract equivalent of f5 from
1833 gcc.dg/pr48335-2.c. */
1834
1835 if (BYTES_BIG_ENDIAN)
1836 /* BITNUM is the distance between our msb and that of OP0.
1837 Convert it to the distance from the lsb. */
1838 bitnum = GET_MODE_BITSIZE (mode) - bitsize - bitnum;
1839
1840 /* Now BITNUM is always the distance between the field's lsb and that of OP0.
1841 We have reduced the big-endian case to the little-endian case. */
1842
1843 if (unsignedp)
1844 {
1845 if (bitnum)
1846 {
1847 /* If the field does not already start at the lsb,
1848 shift it so it does. */
1849 /* Maybe propagate the target for the shift. */
1850 rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
1851 if (tmode != mode)
1852 subtarget = 0;
1853 op0 = expand_shift (RSHIFT_EXPR, mode, op0, bitnum, subtarget, 1);
1854 }
1855 /* Convert the value to the desired mode. */
1856 if (mode != tmode)
1857 op0 = convert_to_mode (tmode, op0, 1);
1858
1859 /* Unless the msb of the field used to be the msb when we shifted,
1860 mask out the upper bits. */
1861
1862 if (GET_MODE_BITSIZE (mode) != bitnum + bitsize)
1863 return expand_binop (GET_MODE (op0), and_optab, op0,
1864 mask_rtx (GET_MODE (op0), 0, bitsize, 0),
1865 target, 1, OPTAB_LIB_WIDEN);
1866 return op0;
1867 }
1868
1869 /* To extract a signed bit-field, first shift its msb to the msb of the word,
1870 then arithmetic-shift its lsb to the lsb of the word. */
1871 op0 = force_reg (mode, op0);
1872
1873 /* Find the narrowest integer mode that contains the field. */
1874
1875 for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT); mode != VOIDmode;
1876 mode = GET_MODE_WIDER_MODE (mode))
1877 if (GET_MODE_BITSIZE (mode) >= bitsize + bitnum)
1878 {
1879 op0 = convert_to_mode (mode, op0, 0);
1880 break;
1881 }
1882
1883 if (mode != tmode)
1884 target = 0;
1885
1886 if (GET_MODE_BITSIZE (mode) != (bitsize + bitnum))
1887 {
1888 int amount = GET_MODE_BITSIZE (mode) - (bitsize + bitnum);
1889 /* Maybe propagate the target for the shift. */
1890 rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
1891 op0 = expand_shift (LSHIFT_EXPR, mode, op0, amount, subtarget, 1);
1892 }
1893
1894 return expand_shift (RSHIFT_EXPR, mode, op0,
1895 GET_MODE_BITSIZE (mode) - bitsize, target, 0);
1896 }
1897 \f
1898 /* Return a constant integer (CONST_INT or CONST_DOUBLE) mask value
1899 of mode MODE with BITSIZE ones followed by BITPOS zeros, or the
1900 complement of that if COMPLEMENT. The mask is truncated if
1901 necessary to the width of mode MODE. The mask is zero-extended if
1902 BITSIZE+BITPOS is too small for MODE. */
1903
1904 static rtx
1905 mask_rtx (enum machine_mode mode, int bitpos, int bitsize, int complement)
1906 {
1907 double_int mask;
1908
1909 mask = double_int::mask (bitsize);
1910 mask = mask.llshift (bitpos, HOST_BITS_PER_DOUBLE_INT);
1911
1912 if (complement)
1913 mask = ~mask;
1914
1915 return immed_double_int_const (mask, mode);
1916 }
1917
1918 /* Return a constant integer (CONST_INT or CONST_DOUBLE) rtx with the value
1919 VALUE truncated to BITSIZE bits and then shifted left BITPOS bits. */
1920
1921 static rtx
1922 lshift_value (enum machine_mode mode, rtx value, int bitpos, int bitsize)
1923 {
1924 double_int val;
1925
1926 val = double_int::from_uhwi (INTVAL (value)).zext (bitsize);
1927 val = val.llshift (bitpos, HOST_BITS_PER_DOUBLE_INT);
1928
1929 return immed_double_int_const (val, mode);
1930 }
1931 \f
1932 /* Extract a bit field that is split across two words
1933 and return an RTX for the result.
1934
1935 OP0 is the REG, SUBREG or MEM rtx for the first of the two words.
1936 BITSIZE is the field width; BITPOS, position of its first bit, in the word.
1937 UNSIGNEDP is 1 if should zero-extend the contents; else sign-extend. */
1938
1939 static rtx
1940 extract_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1941 unsigned HOST_WIDE_INT bitpos, int unsignedp)
1942 {
1943 unsigned int unit;
1944 unsigned int bitsdone = 0;
1945 rtx result = NULL_RTX;
1946 int first = 1;
1947
1948 /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1949 much at a time. */
1950 if (REG_P (op0) || GET_CODE (op0) == SUBREG)
1951 unit = BITS_PER_WORD;
1952 else
1953 unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1954
1955 while (bitsdone < bitsize)
1956 {
1957 unsigned HOST_WIDE_INT thissize;
1958 rtx part, word;
1959 unsigned HOST_WIDE_INT thispos;
1960 unsigned HOST_WIDE_INT offset;
1961
1962 offset = (bitpos + bitsdone) / unit;
1963 thispos = (bitpos + bitsdone) % unit;
1964
1965 /* THISSIZE must not overrun a word boundary. Otherwise,
1966 extract_fixed_bit_field will call us again, and we will mutually
1967 recurse forever. */
1968 thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1969 thissize = MIN (thissize, unit - thispos);
1970
1971 /* If OP0 is a register, then handle OFFSET here.
1972
1973 When handling multiword bitfields, extract_bit_field may pass
1974 down a word_mode SUBREG of a larger REG for a bitfield that actually
1975 crosses a word boundary. Thus, for a SUBREG, we must find
1976 the current word starting from the base register. */
1977 if (GET_CODE (op0) == SUBREG)
1978 {
1979 int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset;
1980 word = operand_subword_force (SUBREG_REG (op0), word_offset,
1981 GET_MODE (SUBREG_REG (op0)));
1982 offset = 0;
1983 }
1984 else if (REG_P (op0))
1985 {
1986 word = operand_subword_force (op0, offset, GET_MODE (op0));
1987 offset = 0;
1988 }
1989 else
1990 word = op0;
1991
1992 /* Extract the parts in bit-counting order,
1993 whose meaning is determined by BYTES_PER_UNIT.
1994 OFFSET is in UNITs, and UNIT is in bits. */
1995 part = extract_fixed_bit_field (word_mode, word, thissize,
1996 offset * unit + thispos, 0, 1, false);
1997 bitsdone += thissize;
1998
1999 /* Shift this part into place for the result. */
2000 if (BYTES_BIG_ENDIAN)
2001 {
2002 if (bitsize != bitsdone)
2003 part = expand_shift (LSHIFT_EXPR, word_mode, part,
2004 bitsize - bitsdone, 0, 1);
2005 }
2006 else
2007 {
2008 if (bitsdone != thissize)
2009 part = expand_shift (LSHIFT_EXPR, word_mode, part,
2010 bitsdone - thissize, 0, 1);
2011 }
2012
2013 if (first)
2014 result = part;
2015 else
2016 /* Combine the parts with bitwise or. This works
2017 because we extracted each part as an unsigned bit field. */
2018 result = expand_binop (word_mode, ior_optab, part, result, NULL_RTX, 1,
2019 OPTAB_LIB_WIDEN);
2020
2021 first = 0;
2022 }
2023
2024 /* Unsigned bit field: we are done. */
2025 if (unsignedp)
2026 return result;
2027 /* Signed bit field: sign-extend with two arithmetic shifts. */
2028 result = expand_shift (LSHIFT_EXPR, word_mode, result,
2029 BITS_PER_WORD - bitsize, NULL_RTX, 0);
2030 return expand_shift (RSHIFT_EXPR, word_mode, result,
2031 BITS_PER_WORD - bitsize, NULL_RTX, 0);
2032 }
2033 \f
2034 /* Try to read the low bits of SRC as an rvalue of mode MODE, preserving
2035 the bit pattern. SRC_MODE is the mode of SRC; if this is smaller than
2036 MODE, fill the upper bits with zeros. Fail if the layout of either
2037 mode is unknown (as for CC modes) or if the extraction would involve
2038 unprofitable mode punning. Return the value on success, otherwise
2039 return null.
2040
2041 This is different from gen_lowpart* in these respects:
2042
2043 - the returned value must always be considered an rvalue
2044
2045 - when MODE is wider than SRC_MODE, the extraction involves
2046 a zero extension
2047
2048 - when MODE is smaller than SRC_MODE, the extraction involves
2049 a truncation (and is thus subject to TRULY_NOOP_TRUNCATION).
2050
2051 In other words, this routine performs a computation, whereas the
2052 gen_lowpart* routines are conceptually lvalue or rvalue subreg
2053 operations. */
2054
2055 rtx
2056 extract_low_bits (enum machine_mode mode, enum machine_mode src_mode, rtx src)
2057 {
2058 enum machine_mode int_mode, src_int_mode;
2059
2060 if (mode == src_mode)
2061 return src;
2062
2063 if (CONSTANT_P (src))
2064 {
2065 /* simplify_gen_subreg can't be used here, as if simplify_subreg
2066 fails, it will happily create (subreg (symbol_ref)) or similar
2067 invalid SUBREGs. */
2068 unsigned int byte = subreg_lowpart_offset (mode, src_mode);
2069 rtx ret = simplify_subreg (mode, src, src_mode, byte);
2070 if (ret)
2071 return ret;
2072
2073 if (GET_MODE (src) == VOIDmode
2074 || !validate_subreg (mode, src_mode, src, byte))
2075 return NULL_RTX;
2076
2077 src = force_reg (GET_MODE (src), src);
2078 return gen_rtx_SUBREG (mode, src, byte);
2079 }
2080
2081 if (GET_MODE_CLASS (mode) == MODE_CC || GET_MODE_CLASS (src_mode) == MODE_CC)
2082 return NULL_RTX;
2083
2084 if (GET_MODE_BITSIZE (mode) == GET_MODE_BITSIZE (src_mode)
2085 && MODES_TIEABLE_P (mode, src_mode))
2086 {
2087 rtx x = gen_lowpart_common (mode, src);
2088 if (x)
2089 return x;
2090 }
2091
2092 src_int_mode = int_mode_for_mode (src_mode);
2093 int_mode = int_mode_for_mode (mode);
2094 if (src_int_mode == BLKmode || int_mode == BLKmode)
2095 return NULL_RTX;
2096
2097 if (!MODES_TIEABLE_P (src_int_mode, src_mode))
2098 return NULL_RTX;
2099 if (!MODES_TIEABLE_P (int_mode, mode))
2100 return NULL_RTX;
2101
2102 src = gen_lowpart (src_int_mode, src);
2103 src = convert_modes (int_mode, src_int_mode, src, true);
2104 src = gen_lowpart (mode, src);
2105 return src;
2106 }
2107 \f
2108 /* Add INC into TARGET. */
2109
2110 void
2111 expand_inc (rtx target, rtx inc)
2112 {
2113 rtx value = expand_binop (GET_MODE (target), add_optab,
2114 target, inc,
2115 target, 0, OPTAB_LIB_WIDEN);
2116 if (value != target)
2117 emit_move_insn (target, value);
2118 }
2119
2120 /* Subtract DEC from TARGET. */
2121
2122 void
2123 expand_dec (rtx target, rtx dec)
2124 {
2125 rtx value = expand_binop (GET_MODE (target), sub_optab,
2126 target, dec,
2127 target, 0, OPTAB_LIB_WIDEN);
2128 if (value != target)
2129 emit_move_insn (target, value);
2130 }
2131 \f
2132 /* Output a shift instruction for expression code CODE,
2133 with SHIFTED being the rtx for the value to shift,
2134 and AMOUNT the rtx for the amount to shift by.
2135 Store the result in the rtx TARGET, if that is convenient.
2136 If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2137 Return the rtx for where the value is. */
2138
2139 static rtx
2140 expand_shift_1 (enum tree_code code, enum machine_mode mode, rtx shifted,
2141 rtx amount, rtx target, int unsignedp)
2142 {
2143 rtx op1, temp = 0;
2144 int left = (code == LSHIFT_EXPR || code == LROTATE_EXPR);
2145 int rotate = (code == LROTATE_EXPR || code == RROTATE_EXPR);
2146 optab lshift_optab = ashl_optab;
2147 optab rshift_arith_optab = ashr_optab;
2148 optab rshift_uns_optab = lshr_optab;
2149 optab lrotate_optab = rotl_optab;
2150 optab rrotate_optab = rotr_optab;
2151 enum machine_mode op1_mode;
2152 int attempt;
2153 bool speed = optimize_insn_for_speed_p ();
2154
2155 op1 = amount;
2156 op1_mode = GET_MODE (op1);
2157
2158 /* Determine whether the shift/rotate amount is a vector, or scalar. If the
2159 shift amount is a vector, use the vector/vector shift patterns. */
2160 if (VECTOR_MODE_P (mode) && VECTOR_MODE_P (op1_mode))
2161 {
2162 lshift_optab = vashl_optab;
2163 rshift_arith_optab = vashr_optab;
2164 rshift_uns_optab = vlshr_optab;
2165 lrotate_optab = vrotl_optab;
2166 rrotate_optab = vrotr_optab;
2167 }
2168
2169 /* Previously detected shift-counts computed by NEGATE_EXPR
2170 and shifted in the other direction; but that does not work
2171 on all machines. */
2172
2173 if (SHIFT_COUNT_TRUNCATED)
2174 {
2175 if (CONST_INT_P (op1)
2176 && ((unsigned HOST_WIDE_INT) INTVAL (op1) >=
2177 (unsigned HOST_WIDE_INT) GET_MODE_BITSIZE (mode)))
2178 op1 = GEN_INT ((unsigned HOST_WIDE_INT) INTVAL (op1)
2179 % GET_MODE_BITSIZE (mode));
2180 else if (GET_CODE (op1) == SUBREG
2181 && subreg_lowpart_p (op1)
2182 && INTEGRAL_MODE_P (GET_MODE (SUBREG_REG (op1)))
2183 && INTEGRAL_MODE_P (GET_MODE (op1)))
2184 op1 = SUBREG_REG (op1);
2185 }
2186
2187 if (op1 == const0_rtx)
2188 return shifted;
2189
2190 /* Check whether its cheaper to implement a left shift by a constant
2191 bit count by a sequence of additions. */
2192 if (code == LSHIFT_EXPR
2193 && CONST_INT_P (op1)
2194 && INTVAL (op1) > 0
2195 && INTVAL (op1) < GET_MODE_PRECISION (mode)
2196 && INTVAL (op1) < MAX_BITS_PER_WORD
2197 && (shift_cost (speed, mode, INTVAL (op1))
2198 > INTVAL (op1) * add_cost (speed, mode))
2199 && shift_cost (speed, mode, INTVAL (op1)) != MAX_COST)
2200 {
2201 int i;
2202 for (i = 0; i < INTVAL (op1); i++)
2203 {
2204 temp = force_reg (mode, shifted);
2205 shifted = expand_binop (mode, add_optab, temp, temp, NULL_RTX,
2206 unsignedp, OPTAB_LIB_WIDEN);
2207 }
2208 return shifted;
2209 }
2210
2211 for (attempt = 0; temp == 0 && attempt < 3; attempt++)
2212 {
2213 enum optab_methods methods;
2214
2215 if (attempt == 0)
2216 methods = OPTAB_DIRECT;
2217 else if (attempt == 1)
2218 methods = OPTAB_WIDEN;
2219 else
2220 methods = OPTAB_LIB_WIDEN;
2221
2222 if (rotate)
2223 {
2224 /* Widening does not work for rotation. */
2225 if (methods == OPTAB_WIDEN)
2226 continue;
2227 else if (methods == OPTAB_LIB_WIDEN)
2228 {
2229 /* If we have been unable to open-code this by a rotation,
2230 do it as the IOR of two shifts. I.e., to rotate A
2231 by N bits, compute (A << N) | ((unsigned) A >> (C - N))
2232 where C is the bitsize of A.
2233
2234 It is theoretically possible that the target machine might
2235 not be able to perform either shift and hence we would
2236 be making two libcalls rather than just the one for the
2237 shift (similarly if IOR could not be done). We will allow
2238 this extremely unlikely lossage to avoid complicating the
2239 code below. */
2240
2241 rtx subtarget = target == shifted ? 0 : target;
2242 rtx new_amount, other_amount;
2243 rtx temp1;
2244
2245 new_amount = op1;
2246 if (CONST_INT_P (op1))
2247 other_amount = GEN_INT (GET_MODE_BITSIZE (mode)
2248 - INTVAL (op1));
2249 else
2250 other_amount
2251 = simplify_gen_binary (MINUS, GET_MODE (op1),
2252 GEN_INT (GET_MODE_PRECISION (mode)),
2253 op1);
2254
2255 shifted = force_reg (mode, shifted);
2256
2257 temp = expand_shift_1 (left ? LSHIFT_EXPR : RSHIFT_EXPR,
2258 mode, shifted, new_amount, 0, 1);
2259 temp1 = expand_shift_1 (left ? RSHIFT_EXPR : LSHIFT_EXPR,
2260 mode, shifted, other_amount,
2261 subtarget, 1);
2262 return expand_binop (mode, ior_optab, temp, temp1, target,
2263 unsignedp, methods);
2264 }
2265
2266 temp = expand_binop (mode,
2267 left ? lrotate_optab : rrotate_optab,
2268 shifted, op1, target, unsignedp, methods);
2269 }
2270 else if (unsignedp)
2271 temp = expand_binop (mode,
2272 left ? lshift_optab : rshift_uns_optab,
2273 shifted, op1, target, unsignedp, methods);
2274
2275 /* Do arithmetic shifts.
2276 Also, if we are going to widen the operand, we can just as well
2277 use an arithmetic right-shift instead of a logical one. */
2278 if (temp == 0 && ! rotate
2279 && (! unsignedp || (! left && methods == OPTAB_WIDEN)))
2280 {
2281 enum optab_methods methods1 = methods;
2282
2283 /* If trying to widen a log shift to an arithmetic shift,
2284 don't accept an arithmetic shift of the same size. */
2285 if (unsignedp)
2286 methods1 = OPTAB_MUST_WIDEN;
2287
2288 /* Arithmetic shift */
2289
2290 temp = expand_binop (mode,
2291 left ? lshift_optab : rshift_arith_optab,
2292 shifted, op1, target, unsignedp, methods1);
2293 }
2294
2295 /* We used to try extzv here for logical right shifts, but that was
2296 only useful for one machine, the VAX, and caused poor code
2297 generation there for lshrdi3, so the code was deleted and a
2298 define_expand for lshrsi3 was added to vax.md. */
2299 }
2300
2301 gcc_assert (temp);
2302 return temp;
2303 }
2304
2305 /* Output a shift instruction for expression code CODE,
2306 with SHIFTED being the rtx for the value to shift,
2307 and AMOUNT the amount to shift by.
2308 Store the result in the rtx TARGET, if that is convenient.
2309 If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2310 Return the rtx for where the value is. */
2311
2312 rtx
2313 expand_shift (enum tree_code code, enum machine_mode mode, rtx shifted,
2314 int amount, rtx target, int unsignedp)
2315 {
2316 return expand_shift_1 (code, mode,
2317 shifted, GEN_INT (amount), target, unsignedp);
2318 }
2319
2320 /* Output a shift instruction for expression code CODE,
2321 with SHIFTED being the rtx for the value to shift,
2322 and AMOUNT the tree for the amount to shift by.
2323 Store the result in the rtx TARGET, if that is convenient.
2324 If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2325 Return the rtx for where the value is. */
2326
2327 rtx
2328 expand_variable_shift (enum tree_code code, enum machine_mode mode, rtx shifted,
2329 tree amount, rtx target, int unsignedp)
2330 {
2331 return expand_shift_1 (code, mode,
2332 shifted, expand_normal (amount), target, unsignedp);
2333 }
2334
2335 \f
2336 /* Indicates the type of fixup needed after a constant multiplication.
2337 BASIC_VARIANT means no fixup is needed, NEGATE_VARIANT means that
2338 the result should be negated, and ADD_VARIANT means that the
2339 multiplicand should be added to the result. */
2340 enum mult_variant {basic_variant, negate_variant, add_variant};
2341
2342 static void synth_mult (struct algorithm *, unsigned HOST_WIDE_INT,
2343 const struct mult_cost *, enum machine_mode mode);
2344 static bool choose_mult_variant (enum machine_mode, HOST_WIDE_INT,
2345 struct algorithm *, enum mult_variant *, int);
2346 static rtx expand_mult_const (enum machine_mode, rtx, HOST_WIDE_INT, rtx,
2347 const struct algorithm *, enum mult_variant);
2348 static unsigned HOST_WIDE_INT invert_mod2n (unsigned HOST_WIDE_INT, int);
2349 static rtx extract_high_half (enum machine_mode, rtx);
2350 static rtx expmed_mult_highpart (enum machine_mode, rtx, rtx, rtx, int, int);
2351 static rtx expmed_mult_highpart_optab (enum machine_mode, rtx, rtx, rtx,
2352 int, int);
2353 /* Compute and return the best algorithm for multiplying by T.
2354 The algorithm must cost less than cost_limit
2355 If retval.cost >= COST_LIMIT, no algorithm was found and all
2356 other field of the returned struct are undefined.
2357 MODE is the machine mode of the multiplication. */
2358
2359 static void
2360 synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
2361 const struct mult_cost *cost_limit, enum machine_mode mode)
2362 {
2363 int m;
2364 struct algorithm *alg_in, *best_alg;
2365 struct mult_cost best_cost;
2366 struct mult_cost new_limit;
2367 int op_cost, op_latency;
2368 unsigned HOST_WIDE_INT orig_t = t;
2369 unsigned HOST_WIDE_INT q;
2370 int maxm, hash_index;
2371 bool cache_hit = false;
2372 enum alg_code cache_alg = alg_zero;
2373 bool speed = optimize_insn_for_speed_p ();
2374 enum machine_mode imode;
2375 struct alg_hash_entry *entry_ptr;
2376
2377 /* Indicate that no algorithm is yet found. If no algorithm
2378 is found, this value will be returned and indicate failure. */
2379 alg_out->cost.cost = cost_limit->cost + 1;
2380 alg_out->cost.latency = cost_limit->latency + 1;
2381
2382 if (cost_limit->cost < 0
2383 || (cost_limit->cost == 0 && cost_limit->latency <= 0))
2384 return;
2385
2386 /* Be prepared for vector modes. */
2387 imode = GET_MODE_INNER (mode);
2388 if (imode == VOIDmode)
2389 imode = mode;
2390
2391 maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (imode));
2392
2393 /* Restrict the bits of "t" to the multiplication's mode. */
2394 t &= GET_MODE_MASK (imode);
2395
2396 /* t == 1 can be done in zero cost. */
2397 if (t == 1)
2398 {
2399 alg_out->ops = 1;
2400 alg_out->cost.cost = 0;
2401 alg_out->cost.latency = 0;
2402 alg_out->op[0] = alg_m;
2403 return;
2404 }
2405
2406 /* t == 0 sometimes has a cost. If it does and it exceeds our limit,
2407 fail now. */
2408 if (t == 0)
2409 {
2410 if (MULT_COST_LESS (cost_limit, zero_cost (speed)))
2411 return;
2412 else
2413 {
2414 alg_out->ops = 1;
2415 alg_out->cost.cost = zero_cost (speed);
2416 alg_out->cost.latency = zero_cost (speed);
2417 alg_out->op[0] = alg_zero;
2418 return;
2419 }
2420 }
2421
2422 /* We'll be needing a couple extra algorithm structures now. */
2423
2424 alg_in = XALLOCA (struct algorithm);
2425 best_alg = XALLOCA (struct algorithm);
2426 best_cost = *cost_limit;
2427
2428 /* Compute the hash index. */
2429 hash_index = (t ^ (unsigned int) mode ^ (speed * 256)) % NUM_ALG_HASH_ENTRIES;
2430
2431 /* See if we already know what to do for T. */
2432 entry_ptr = alg_hash_entry_ptr (hash_index);
2433 if (entry_ptr->t == t
2434 && entry_ptr->mode == mode
2435 && entry_ptr->mode == mode
2436 && entry_ptr->speed == speed
2437 && entry_ptr->alg != alg_unknown)
2438 {
2439 cache_alg = entry_ptr->alg;
2440
2441 if (cache_alg == alg_impossible)
2442 {
2443 /* The cache tells us that it's impossible to synthesize
2444 multiplication by T within entry_ptr->cost. */
2445 if (!CHEAPER_MULT_COST (&entry_ptr->cost, cost_limit))
2446 /* COST_LIMIT is at least as restrictive as the one
2447 recorded in the hash table, in which case we have no
2448 hope of synthesizing a multiplication. Just
2449 return. */
2450 return;
2451
2452 /* If we get here, COST_LIMIT is less restrictive than the
2453 one recorded in the hash table, so we may be able to
2454 synthesize a multiplication. Proceed as if we didn't
2455 have the cache entry. */
2456 }
2457 else
2458 {
2459 if (CHEAPER_MULT_COST (cost_limit, &entry_ptr->cost))
2460 /* The cached algorithm shows that this multiplication
2461 requires more cost than COST_LIMIT. Just return. This
2462 way, we don't clobber this cache entry with
2463 alg_impossible but retain useful information. */
2464 return;
2465
2466 cache_hit = true;
2467
2468 switch (cache_alg)
2469 {
2470 case alg_shift:
2471 goto do_alg_shift;
2472
2473 case alg_add_t_m2:
2474 case alg_sub_t_m2:
2475 goto do_alg_addsub_t_m2;
2476
2477 case alg_add_factor:
2478 case alg_sub_factor:
2479 goto do_alg_addsub_factor;
2480
2481 case alg_add_t2_m:
2482 goto do_alg_add_t2_m;
2483
2484 case alg_sub_t2_m:
2485 goto do_alg_sub_t2_m;
2486
2487 default:
2488 gcc_unreachable ();
2489 }
2490 }
2491 }
2492
2493 /* If we have a group of zero bits at the low-order part of T, try
2494 multiplying by the remaining bits and then doing a shift. */
2495
2496 if ((t & 1) == 0)
2497 {
2498 do_alg_shift:
2499 m = floor_log2 (t & -t); /* m = number of low zero bits */
2500 if (m < maxm)
2501 {
2502 q = t >> m;
2503 /* The function expand_shift will choose between a shift and
2504 a sequence of additions, so the observed cost is given as
2505 MIN (m * add_cost(speed, mode), shift_cost(speed, mode, m)). */
2506 op_cost = m * add_cost (speed, mode);
2507 if (shift_cost (speed, mode, m) < op_cost)
2508 op_cost = shift_cost (speed, mode, m);
2509 new_limit.cost = best_cost.cost - op_cost;
2510 new_limit.latency = best_cost.latency - op_cost;
2511 synth_mult (alg_in, q, &new_limit, mode);
2512
2513 alg_in->cost.cost += op_cost;
2514 alg_in->cost.latency += op_cost;
2515 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2516 {
2517 struct algorithm *x;
2518 best_cost = alg_in->cost;
2519 x = alg_in, alg_in = best_alg, best_alg = x;
2520 best_alg->log[best_alg->ops] = m;
2521 best_alg->op[best_alg->ops] = alg_shift;
2522 }
2523
2524 /* See if treating ORIG_T as a signed number yields a better
2525 sequence. Try this sequence only for a negative ORIG_T
2526 as it would be useless for a non-negative ORIG_T. */
2527 if ((HOST_WIDE_INT) orig_t < 0)
2528 {
2529 /* Shift ORIG_T as follows because a right shift of a
2530 negative-valued signed type is implementation
2531 defined. */
2532 q = ~(~orig_t >> m);
2533 /* The function expand_shift will choose between a shift
2534 and a sequence of additions, so the observed cost is
2535 given as MIN (m * add_cost(speed, mode),
2536 shift_cost(speed, mode, m)). */
2537 op_cost = m * add_cost (speed, mode);
2538 if (shift_cost (speed, mode, m) < op_cost)
2539 op_cost = shift_cost (speed, mode, m);
2540 new_limit.cost = best_cost.cost - op_cost;
2541 new_limit.latency = best_cost.latency - op_cost;
2542 synth_mult (alg_in, q, &new_limit, mode);
2543
2544 alg_in->cost.cost += op_cost;
2545 alg_in->cost.latency += op_cost;
2546 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2547 {
2548 struct algorithm *x;
2549 best_cost = alg_in->cost;
2550 x = alg_in, alg_in = best_alg, best_alg = x;
2551 best_alg->log[best_alg->ops] = m;
2552 best_alg->op[best_alg->ops] = alg_shift;
2553 }
2554 }
2555 }
2556 if (cache_hit)
2557 goto done;
2558 }
2559
2560 /* If we have an odd number, add or subtract one. */
2561 if ((t & 1) != 0)
2562 {
2563 unsigned HOST_WIDE_INT w;
2564
2565 do_alg_addsub_t_m2:
2566 for (w = 1; (w & t) != 0; w <<= 1)
2567 ;
2568 /* If T was -1, then W will be zero after the loop. This is another
2569 case where T ends with ...111. Handling this with (T + 1) and
2570 subtract 1 produces slightly better code and results in algorithm
2571 selection much faster than treating it like the ...0111 case
2572 below. */
2573 if (w == 0
2574 || (w > 2
2575 /* Reject the case where t is 3.
2576 Thus we prefer addition in that case. */
2577 && t != 3))
2578 {
2579 /* T ends with ...111. Multiply by (T + 1) and subtract 1. */
2580
2581 op_cost = add_cost (speed, mode);
2582 new_limit.cost = best_cost.cost - op_cost;
2583 new_limit.latency = best_cost.latency - op_cost;
2584 synth_mult (alg_in, t + 1, &new_limit, mode);
2585
2586 alg_in->cost.cost += op_cost;
2587 alg_in->cost.latency += op_cost;
2588 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2589 {
2590 struct algorithm *x;
2591 best_cost = alg_in->cost;
2592 x = alg_in, alg_in = best_alg, best_alg = x;
2593 best_alg->log[best_alg->ops] = 0;
2594 best_alg->op[best_alg->ops] = alg_sub_t_m2;
2595 }
2596 }
2597 else
2598 {
2599 /* T ends with ...01 or ...011. Multiply by (T - 1) and add 1. */
2600
2601 op_cost = add_cost (speed, mode);
2602 new_limit.cost = best_cost.cost - op_cost;
2603 new_limit.latency = best_cost.latency - op_cost;
2604 synth_mult (alg_in, t - 1, &new_limit, mode);
2605
2606 alg_in->cost.cost += op_cost;
2607 alg_in->cost.latency += op_cost;
2608 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2609 {
2610 struct algorithm *x;
2611 best_cost = alg_in->cost;
2612 x = alg_in, alg_in = best_alg, best_alg = x;
2613 best_alg->log[best_alg->ops] = 0;
2614 best_alg->op[best_alg->ops] = alg_add_t_m2;
2615 }
2616 }
2617
2618 /* We may be able to calculate a * -7, a * -15, a * -31, etc
2619 quickly with a - a * n for some appropriate constant n. */
2620 m = exact_log2 (-orig_t + 1);
2621 if (m >= 0 && m < maxm)
2622 {
2623 op_cost = shiftsub1_cost (speed, mode, m);
2624 new_limit.cost = best_cost.cost - op_cost;
2625 new_limit.latency = best_cost.latency - op_cost;
2626 synth_mult (alg_in, (unsigned HOST_WIDE_INT) (-orig_t + 1) >> m,
2627 &new_limit, mode);
2628
2629 alg_in->cost.cost += op_cost;
2630 alg_in->cost.latency += op_cost;
2631 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2632 {
2633 struct algorithm *x;
2634 best_cost = alg_in->cost;
2635 x = alg_in, alg_in = best_alg, best_alg = x;
2636 best_alg->log[best_alg->ops] = m;
2637 best_alg->op[best_alg->ops] = alg_sub_t_m2;
2638 }
2639 }
2640
2641 if (cache_hit)
2642 goto done;
2643 }
2644
2645 /* Look for factors of t of the form
2646 t = q(2**m +- 1), 2 <= m <= floor(log2(t - 1)).
2647 If we find such a factor, we can multiply by t using an algorithm that
2648 multiplies by q, shift the result by m and add/subtract it to itself.
2649
2650 We search for large factors first and loop down, even if large factors
2651 are less probable than small; if we find a large factor we will find a
2652 good sequence quickly, and therefore be able to prune (by decreasing
2653 COST_LIMIT) the search. */
2654
2655 do_alg_addsub_factor:
2656 for (m = floor_log2 (t - 1); m >= 2; m--)
2657 {
2658 unsigned HOST_WIDE_INT d;
2659
2660 d = ((unsigned HOST_WIDE_INT) 1 << m) + 1;
2661 if (t % d == 0 && t > d && m < maxm
2662 && (!cache_hit || cache_alg == alg_add_factor))
2663 {
2664 /* If the target has a cheap shift-and-add instruction use
2665 that in preference to a shift insn followed by an add insn.
2666 Assume that the shift-and-add is "atomic" with a latency
2667 equal to its cost, otherwise assume that on superscalar
2668 hardware the shift may be executed concurrently with the
2669 earlier steps in the algorithm. */
2670 op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
2671 if (shiftadd_cost (speed, mode, m) < op_cost)
2672 {
2673 op_cost = shiftadd_cost (speed, mode, m);
2674 op_latency = op_cost;
2675 }
2676 else
2677 op_latency = add_cost (speed, mode);
2678
2679 new_limit.cost = best_cost.cost - op_cost;
2680 new_limit.latency = best_cost.latency - op_latency;
2681 synth_mult (alg_in, t / d, &new_limit, mode);
2682
2683 alg_in->cost.cost += op_cost;
2684 alg_in->cost.latency += op_latency;
2685 if (alg_in->cost.latency < op_cost)
2686 alg_in->cost.latency = op_cost;
2687 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2688 {
2689 struct algorithm *x;
2690 best_cost = alg_in->cost;
2691 x = alg_in, alg_in = best_alg, best_alg = x;
2692 best_alg->log[best_alg->ops] = m;
2693 best_alg->op[best_alg->ops] = alg_add_factor;
2694 }
2695 /* Other factors will have been taken care of in the recursion. */
2696 break;
2697 }
2698
2699 d = ((unsigned HOST_WIDE_INT) 1 << m) - 1;
2700 if (t % d == 0 && t > d && m < maxm
2701 && (!cache_hit || cache_alg == alg_sub_factor))
2702 {
2703 /* If the target has a cheap shift-and-subtract insn use
2704 that in preference to a shift insn followed by a sub insn.
2705 Assume that the shift-and-sub is "atomic" with a latency
2706 equal to it's cost, otherwise assume that on superscalar
2707 hardware the shift may be executed concurrently with the
2708 earlier steps in the algorithm. */
2709 op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
2710 if (shiftsub0_cost (speed, mode, m) < op_cost)
2711 {
2712 op_cost = shiftsub0_cost (speed, mode, m);
2713 op_latency = op_cost;
2714 }
2715 else
2716 op_latency = add_cost (speed, mode);
2717
2718 new_limit.cost = best_cost.cost - op_cost;
2719 new_limit.latency = best_cost.latency - op_latency;
2720 synth_mult (alg_in, t / d, &new_limit, mode);
2721
2722 alg_in->cost.cost += op_cost;
2723 alg_in->cost.latency += op_latency;
2724 if (alg_in->cost.latency < op_cost)
2725 alg_in->cost.latency = op_cost;
2726 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2727 {
2728 struct algorithm *x;
2729 best_cost = alg_in->cost;
2730 x = alg_in, alg_in = best_alg, best_alg = x;
2731 best_alg->log[best_alg->ops] = m;
2732 best_alg->op[best_alg->ops] = alg_sub_factor;
2733 }
2734 break;
2735 }
2736 }
2737 if (cache_hit)
2738 goto done;
2739
2740 /* Try shift-and-add (load effective address) instructions,
2741 i.e. do a*3, a*5, a*9. */
2742 if ((t & 1) != 0)
2743 {
2744 do_alg_add_t2_m:
2745 q = t - 1;
2746 q = q & -q;
2747 m = exact_log2 (q);
2748 if (m >= 0 && m < maxm)
2749 {
2750 op_cost = shiftadd_cost (speed, mode, m);
2751 new_limit.cost = best_cost.cost - op_cost;
2752 new_limit.latency = best_cost.latency - op_cost;
2753 synth_mult (alg_in, (t - 1) >> m, &new_limit, mode);
2754
2755 alg_in->cost.cost += op_cost;
2756 alg_in->cost.latency += op_cost;
2757 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2758 {
2759 struct algorithm *x;
2760 best_cost = alg_in->cost;
2761 x = alg_in, alg_in = best_alg, best_alg = x;
2762 best_alg->log[best_alg->ops] = m;
2763 best_alg->op[best_alg->ops] = alg_add_t2_m;
2764 }
2765 }
2766 if (cache_hit)
2767 goto done;
2768
2769 do_alg_sub_t2_m:
2770 q = t + 1;
2771 q = q & -q;
2772 m = exact_log2 (q);
2773 if (m >= 0 && m < maxm)
2774 {
2775 op_cost = shiftsub0_cost (speed, mode, m);
2776 new_limit.cost = best_cost.cost - op_cost;
2777 new_limit.latency = best_cost.latency - op_cost;
2778 synth_mult (alg_in, (t + 1) >> m, &new_limit, mode);
2779
2780 alg_in->cost.cost += op_cost;
2781 alg_in->cost.latency += op_cost;
2782 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2783 {
2784 struct algorithm *x;
2785 best_cost = alg_in->cost;
2786 x = alg_in, alg_in = best_alg, best_alg = x;
2787 best_alg->log[best_alg->ops] = m;
2788 best_alg->op[best_alg->ops] = alg_sub_t2_m;
2789 }
2790 }
2791 if (cache_hit)
2792 goto done;
2793 }
2794
2795 done:
2796 /* If best_cost has not decreased, we have not found any algorithm. */
2797 if (!CHEAPER_MULT_COST (&best_cost, cost_limit))
2798 {
2799 /* We failed to find an algorithm. Record alg_impossible for
2800 this case (that is, <T, MODE, COST_LIMIT>) so that next time
2801 we are asked to find an algorithm for T within the same or
2802 lower COST_LIMIT, we can immediately return to the
2803 caller. */
2804 entry_ptr->t = t;
2805 entry_ptr->mode = mode;
2806 entry_ptr->speed = speed;
2807 entry_ptr->alg = alg_impossible;
2808 entry_ptr->cost = *cost_limit;
2809 return;
2810 }
2811
2812 /* Cache the result. */
2813 if (!cache_hit)
2814 {
2815 entry_ptr->t = t;
2816 entry_ptr->mode = mode;
2817 entry_ptr->speed = speed;
2818 entry_ptr->alg = best_alg->op[best_alg->ops];
2819 entry_ptr->cost.cost = best_cost.cost;
2820 entry_ptr->cost.latency = best_cost.latency;
2821 }
2822
2823 /* If we are getting a too long sequence for `struct algorithm'
2824 to record, make this search fail. */
2825 if (best_alg->ops == MAX_BITS_PER_WORD)
2826 return;
2827
2828 /* Copy the algorithm from temporary space to the space at alg_out.
2829 We avoid using structure assignment because the majority of
2830 best_alg is normally undefined, and this is a critical function. */
2831 alg_out->ops = best_alg->ops + 1;
2832 alg_out->cost = best_cost;
2833 memcpy (alg_out->op, best_alg->op,
2834 alg_out->ops * sizeof *alg_out->op);
2835 memcpy (alg_out->log, best_alg->log,
2836 alg_out->ops * sizeof *alg_out->log);
2837 }
2838 \f
2839 /* Find the cheapest way of multiplying a value of mode MODE by VAL.
2840 Try three variations:
2841
2842 - a shift/add sequence based on VAL itself
2843 - a shift/add sequence based on -VAL, followed by a negation
2844 - a shift/add sequence based on VAL - 1, followed by an addition.
2845
2846 Return true if the cheapest of these cost less than MULT_COST,
2847 describing the algorithm in *ALG and final fixup in *VARIANT. */
2848
2849 static bool
2850 choose_mult_variant (enum machine_mode mode, HOST_WIDE_INT val,
2851 struct algorithm *alg, enum mult_variant *variant,
2852 int mult_cost)
2853 {
2854 struct algorithm alg2;
2855 struct mult_cost limit;
2856 int op_cost;
2857 bool speed = optimize_insn_for_speed_p ();
2858
2859 /* Fail quickly for impossible bounds. */
2860 if (mult_cost < 0)
2861 return false;
2862
2863 /* Ensure that mult_cost provides a reasonable upper bound.
2864 Any constant multiplication can be performed with less
2865 than 2 * bits additions. */
2866 op_cost = 2 * GET_MODE_UNIT_BITSIZE (mode) * add_cost (speed, mode);
2867 if (mult_cost > op_cost)
2868 mult_cost = op_cost;
2869
2870 *variant = basic_variant;
2871 limit.cost = mult_cost;
2872 limit.latency = mult_cost;
2873 synth_mult (alg, val, &limit, mode);
2874
2875 /* This works only if the inverted value actually fits in an
2876 `unsigned int' */
2877 if (HOST_BITS_PER_INT >= GET_MODE_UNIT_BITSIZE (mode))
2878 {
2879 op_cost = neg_cost(speed, mode);
2880 if (MULT_COST_LESS (&alg->cost, mult_cost))
2881 {
2882 limit.cost = alg->cost.cost - op_cost;
2883 limit.latency = alg->cost.latency - op_cost;
2884 }
2885 else
2886 {
2887 limit.cost = mult_cost - op_cost;
2888 limit.latency = mult_cost - op_cost;
2889 }
2890
2891 synth_mult (&alg2, -val, &limit, mode);
2892 alg2.cost.cost += op_cost;
2893 alg2.cost.latency += op_cost;
2894 if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
2895 *alg = alg2, *variant = negate_variant;
2896 }
2897
2898 /* This proves very useful for division-by-constant. */
2899 op_cost = add_cost (speed, mode);
2900 if (MULT_COST_LESS (&alg->cost, mult_cost))
2901 {
2902 limit.cost = alg->cost.cost - op_cost;
2903 limit.latency = alg->cost.latency - op_cost;
2904 }
2905 else
2906 {
2907 limit.cost = mult_cost - op_cost;
2908 limit.latency = mult_cost - op_cost;
2909 }
2910
2911 synth_mult (&alg2, val - 1, &limit, mode);
2912 alg2.cost.cost += op_cost;
2913 alg2.cost.latency += op_cost;
2914 if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
2915 *alg = alg2, *variant = add_variant;
2916
2917 return MULT_COST_LESS (&alg->cost, mult_cost);
2918 }
2919
2920 /* A subroutine of expand_mult, used for constant multiplications.
2921 Multiply OP0 by VAL in mode MODE, storing the result in TARGET if
2922 convenient. Use the shift/add sequence described by ALG and apply
2923 the final fixup specified by VARIANT. */
2924
2925 static rtx
2926 expand_mult_const (enum machine_mode mode, rtx op0, HOST_WIDE_INT val,
2927 rtx target, const struct algorithm *alg,
2928 enum mult_variant variant)
2929 {
2930 HOST_WIDE_INT val_so_far;
2931 rtx insn, accum, tem;
2932 int opno;
2933 enum machine_mode nmode;
2934
2935 /* Avoid referencing memory over and over and invalid sharing
2936 on SUBREGs. */
2937 op0 = force_reg (mode, op0);
2938
2939 /* ACCUM starts out either as OP0 or as a zero, depending on
2940 the first operation. */
2941
2942 if (alg->op[0] == alg_zero)
2943 {
2944 accum = copy_to_mode_reg (mode, CONST0_RTX (mode));
2945 val_so_far = 0;
2946 }
2947 else if (alg->op[0] == alg_m)
2948 {
2949 accum = copy_to_mode_reg (mode, op0);
2950 val_so_far = 1;
2951 }
2952 else
2953 gcc_unreachable ();
2954
2955 for (opno = 1; opno < alg->ops; opno++)
2956 {
2957 int log = alg->log[opno];
2958 rtx shift_subtarget = optimize ? 0 : accum;
2959 rtx add_target
2960 = (opno == alg->ops - 1 && target != 0 && variant != add_variant
2961 && !optimize)
2962 ? target : 0;
2963 rtx accum_target = optimize ? 0 : accum;
2964 rtx accum_inner;
2965
2966 switch (alg->op[opno])
2967 {
2968 case alg_shift:
2969 tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
2970 /* REG_EQUAL note will be attached to the following insn. */
2971 emit_move_insn (accum, tem);
2972 val_so_far <<= log;
2973 break;
2974
2975 case alg_add_t_m2:
2976 tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
2977 accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
2978 add_target ? add_target : accum_target);
2979 val_so_far += (HOST_WIDE_INT) 1 << log;
2980 break;
2981
2982 case alg_sub_t_m2:
2983 tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
2984 accum = force_operand (gen_rtx_MINUS (mode, accum, tem),
2985 add_target ? add_target : accum_target);
2986 val_so_far -= (HOST_WIDE_INT) 1 << log;
2987 break;
2988
2989 case alg_add_t2_m:
2990 accum = expand_shift (LSHIFT_EXPR, mode, accum,
2991 log, shift_subtarget, 0);
2992 accum = force_operand (gen_rtx_PLUS (mode, accum, op0),
2993 add_target ? add_target : accum_target);
2994 val_so_far = (val_so_far << log) + 1;
2995 break;
2996
2997 case alg_sub_t2_m:
2998 accum = expand_shift (LSHIFT_EXPR, mode, accum,
2999 log, shift_subtarget, 0);
3000 accum = force_operand (gen_rtx_MINUS (mode, accum, op0),
3001 add_target ? add_target : accum_target);
3002 val_so_far = (val_so_far << log) - 1;
3003 break;
3004
3005 case alg_add_factor:
3006 tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3007 accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
3008 add_target ? add_target : accum_target);
3009 val_so_far += val_so_far << log;
3010 break;
3011
3012 case alg_sub_factor:
3013 tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3014 accum = force_operand (gen_rtx_MINUS (mode, tem, accum),
3015 (add_target
3016 ? add_target : (optimize ? 0 : tem)));
3017 val_so_far = (val_so_far << log) - val_so_far;
3018 break;
3019
3020 default:
3021 gcc_unreachable ();
3022 }
3023
3024 if (SCALAR_INT_MODE_P (mode))
3025 {
3026 /* Write a REG_EQUAL note on the last insn so that we can cse
3027 multiplication sequences. Note that if ACCUM is a SUBREG,
3028 we've set the inner register and must properly indicate that. */
3029 tem = op0, nmode = mode;
3030 accum_inner = accum;
3031 if (GET_CODE (accum) == SUBREG)
3032 {
3033 accum_inner = SUBREG_REG (accum);
3034 nmode = GET_MODE (accum_inner);
3035 tem = gen_lowpart (nmode, op0);
3036 }
3037
3038 insn = get_last_insn ();
3039 set_dst_reg_note (insn, REG_EQUAL,
3040 gen_rtx_MULT (nmode, tem, GEN_INT (val_so_far)),
3041 accum_inner);
3042 }
3043 }
3044
3045 if (variant == negate_variant)
3046 {
3047 val_so_far = -val_so_far;
3048 accum = expand_unop (mode, neg_optab, accum, target, 0);
3049 }
3050 else if (variant == add_variant)
3051 {
3052 val_so_far = val_so_far + 1;
3053 accum = force_operand (gen_rtx_PLUS (mode, accum, op0), target);
3054 }
3055
3056 /* Compare only the bits of val and val_so_far that are significant
3057 in the result mode, to avoid sign-/zero-extension confusion. */
3058 nmode = GET_MODE_INNER (mode);
3059 if (nmode == VOIDmode)
3060 nmode = mode;
3061 val &= GET_MODE_MASK (nmode);
3062 val_so_far &= GET_MODE_MASK (nmode);
3063 gcc_assert (val == val_so_far);
3064
3065 return accum;
3066 }
3067
3068 /* Perform a multiplication and return an rtx for the result.
3069 MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3070 TARGET is a suggestion for where to store the result (an rtx).
3071
3072 We check specially for a constant integer as OP1.
3073 If you want this check for OP0 as well, then before calling
3074 you should swap the two operands if OP0 would be constant. */
3075
3076 rtx
3077 expand_mult (enum machine_mode mode, rtx op0, rtx op1, rtx target,
3078 int unsignedp)
3079 {
3080 enum mult_variant variant;
3081 struct algorithm algorithm;
3082 rtx scalar_op1;
3083 int max_cost;
3084 bool speed = optimize_insn_for_speed_p ();
3085 bool do_trapv = flag_trapv && SCALAR_INT_MODE_P (mode) && !unsignedp;
3086
3087 if (CONSTANT_P (op0))
3088 {
3089 rtx temp = op0;
3090 op0 = op1;
3091 op1 = temp;
3092 }
3093
3094 /* For vectors, there are several simplifications that can be made if
3095 all elements of the vector constant are identical. */
3096 scalar_op1 = op1;
3097 if (GET_CODE (op1) == CONST_VECTOR)
3098 {
3099 int i, n = CONST_VECTOR_NUNITS (op1);
3100 scalar_op1 = CONST_VECTOR_ELT (op1, 0);
3101 for (i = 1; i < n; ++i)
3102 if (!rtx_equal_p (scalar_op1, CONST_VECTOR_ELT (op1, i)))
3103 goto skip_scalar;
3104 }
3105
3106 if (INTEGRAL_MODE_P (mode))
3107 {
3108 rtx fake_reg;
3109 HOST_WIDE_INT coeff;
3110 bool is_neg;
3111 int mode_bitsize;
3112
3113 if (op1 == CONST0_RTX (mode))
3114 return op1;
3115 if (op1 == CONST1_RTX (mode))
3116 return op0;
3117 if (op1 == CONSTM1_RTX (mode))
3118 return expand_unop (mode, do_trapv ? negv_optab : neg_optab,
3119 op0, target, 0);
3120
3121 if (do_trapv)
3122 goto skip_synth;
3123
3124 /* These are the operations that are potentially turned into
3125 a sequence of shifts and additions. */
3126 mode_bitsize = GET_MODE_UNIT_BITSIZE (mode);
3127
3128 /* synth_mult does an `unsigned int' multiply. As long as the mode is
3129 less than or equal in size to `unsigned int' this doesn't matter.
3130 If the mode is larger than `unsigned int', then synth_mult works
3131 only if the constant value exactly fits in an `unsigned int' without
3132 any truncation. This means that multiplying by negative values does
3133 not work; results are off by 2^32 on a 32 bit machine. */
3134
3135 if (CONST_INT_P (scalar_op1))
3136 {
3137 coeff = INTVAL (scalar_op1);
3138 is_neg = coeff < 0;
3139 }
3140 else if (CONST_DOUBLE_AS_INT_P (scalar_op1))
3141 {
3142 /* If we are multiplying in DImode, it may still be a win
3143 to try to work with shifts and adds. */
3144 if (CONST_DOUBLE_HIGH (scalar_op1) == 0
3145 && CONST_DOUBLE_LOW (scalar_op1) > 0)
3146 {
3147 coeff = CONST_DOUBLE_LOW (scalar_op1);
3148 is_neg = false;
3149 }
3150 else if (CONST_DOUBLE_LOW (scalar_op1) == 0)
3151 {
3152 coeff = CONST_DOUBLE_HIGH (scalar_op1);
3153 if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3154 {
3155 int shift = floor_log2 (coeff) + HOST_BITS_PER_WIDE_INT;
3156 if (shift < HOST_BITS_PER_DOUBLE_INT - 1
3157 || mode_bitsize <= HOST_BITS_PER_DOUBLE_INT)
3158 return expand_shift (LSHIFT_EXPR, mode, op0,
3159 shift, target, unsignedp);
3160 }
3161 goto skip_synth;
3162 }
3163 else
3164 goto skip_synth;
3165 }
3166 else
3167 goto skip_synth;
3168
3169 /* We used to test optimize here, on the grounds that it's better to
3170 produce a smaller program when -O is not used. But this causes
3171 such a terrible slowdown sometimes that it seems better to always
3172 use synth_mult. */
3173
3174 /* Special case powers of two. */
3175 if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3176 return expand_shift (LSHIFT_EXPR, mode, op0,
3177 floor_log2 (coeff), target, unsignedp);
3178
3179 fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3180
3181 /* Attempt to handle multiplication of DImode values by negative
3182 coefficients, by performing the multiplication by a positive
3183 multiplier and then inverting the result. */
3184 if (is_neg && mode_bitsize > HOST_BITS_PER_WIDE_INT)
3185 {
3186 /* Its safe to use -coeff even for INT_MIN, as the
3187 result is interpreted as an unsigned coefficient.
3188 Exclude cost of op0 from max_cost to match the cost
3189 calculation of the synth_mult. */
3190 max_cost = (set_src_cost (gen_rtx_MULT (mode, fake_reg, op1), speed)
3191 - neg_cost(speed, mode));
3192 if (max_cost > 0
3193 && choose_mult_variant (mode, -coeff, &algorithm,
3194 &variant, max_cost))
3195 {
3196 rtx temp = expand_mult_const (mode, op0, -coeff, NULL_RTX,
3197 &algorithm, variant);
3198 return expand_unop (mode, neg_optab, temp, target, 0);
3199 }
3200 goto skip_synth;
3201 }
3202
3203 /* Exclude cost of op0 from max_cost to match the cost
3204 calculation of the synth_mult. */
3205 max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, op1), speed);
3206 if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost))
3207 return expand_mult_const (mode, op0, coeff, target,
3208 &algorithm, variant);
3209 }
3210 skip_synth:
3211
3212 /* Expand x*2.0 as x+x. */
3213 if (CONST_DOUBLE_AS_FLOAT_P (scalar_op1))
3214 {
3215 REAL_VALUE_TYPE d;
3216 REAL_VALUE_FROM_CONST_DOUBLE (d, scalar_op1);
3217
3218 if (REAL_VALUES_EQUAL (d, dconst2))
3219 {
3220 op0 = force_reg (GET_MODE (op0), op0);
3221 return expand_binop (mode, add_optab, op0, op0,
3222 target, unsignedp, OPTAB_LIB_WIDEN);
3223 }
3224 }
3225 skip_scalar:
3226
3227 /* This used to use umul_optab if unsigned, but for non-widening multiply
3228 there is no difference between signed and unsigned. */
3229 op0 = expand_binop (mode, do_trapv ? smulv_optab : smul_optab,
3230 op0, op1, target, unsignedp, OPTAB_LIB_WIDEN);
3231 gcc_assert (op0);
3232 return op0;
3233 }
3234
3235 /* Return a cost estimate for multiplying a register by the given
3236 COEFFicient in the given MODE and SPEED. */
3237
3238 int
3239 mult_by_coeff_cost (HOST_WIDE_INT coeff, enum machine_mode mode, bool speed)
3240 {
3241 int max_cost;
3242 struct algorithm algorithm;
3243 enum mult_variant variant;
3244
3245 rtx fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3246 max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, fake_reg), speed);
3247 if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost))
3248 return algorithm.cost.cost;
3249 else
3250 return max_cost;
3251 }
3252
3253 /* Perform a widening multiplication and return an rtx for the result.
3254 MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3255 TARGET is a suggestion for where to store the result (an rtx).
3256 THIS_OPTAB is the optab we should use, it must be either umul_widen_optab
3257 or smul_widen_optab.
3258
3259 We check specially for a constant integer as OP1, comparing the
3260 cost of a widening multiply against the cost of a sequence of shifts
3261 and adds. */
3262
3263 rtx
3264 expand_widening_mult (enum machine_mode mode, rtx op0, rtx op1, rtx target,
3265 int unsignedp, optab this_optab)
3266 {
3267 bool speed = optimize_insn_for_speed_p ();
3268 rtx cop1;
3269
3270 if (CONST_INT_P (op1)
3271 && GET_MODE (op0) != VOIDmode
3272 && (cop1 = convert_modes (mode, GET_MODE (op0), op1,
3273 this_optab == umul_widen_optab))
3274 && CONST_INT_P (cop1)
3275 && (INTVAL (cop1) >= 0
3276 || HWI_COMPUTABLE_MODE_P (mode)))
3277 {
3278 HOST_WIDE_INT coeff = INTVAL (cop1);
3279 int max_cost;
3280 enum mult_variant variant;
3281 struct algorithm algorithm;
3282
3283 /* Special case powers of two. */
3284 if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3285 {
3286 op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3287 return expand_shift (LSHIFT_EXPR, mode, op0,
3288 floor_log2 (coeff), target, unsignedp);
3289 }
3290
3291 /* Exclude cost of op0 from max_cost to match the cost
3292 calculation of the synth_mult. */
3293 max_cost = mul_widen_cost (speed, mode);
3294 if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3295 max_cost))
3296 {
3297 op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3298 return expand_mult_const (mode, op0, coeff, target,
3299 &algorithm, variant);
3300 }
3301 }
3302 return expand_binop (mode, this_optab, op0, op1, target,
3303 unsignedp, OPTAB_LIB_WIDEN);
3304 }
3305 \f
3306 /* Choose a minimal N + 1 bit approximation to 1/D that can be used to
3307 replace division by D, and put the least significant N bits of the result
3308 in *MULTIPLIER_PTR and return the most significant bit.
3309
3310 The width of operations is N (should be <= HOST_BITS_PER_WIDE_INT), the
3311 needed precision is in PRECISION (should be <= N).
3312
3313 PRECISION should be as small as possible so this function can choose
3314 multiplier more freely.
3315
3316 The rounded-up logarithm of D is placed in *lgup_ptr. A shift count that
3317 is to be used for a final right shift is placed in *POST_SHIFT_PTR.
3318
3319 Using this function, x/D will be equal to (x * m) >> (*POST_SHIFT_PTR),
3320 where m is the full HOST_BITS_PER_WIDE_INT + 1 bit multiplier. */
3321
3322 unsigned HOST_WIDE_INT
3323 choose_multiplier (unsigned HOST_WIDE_INT d, int n, int precision,
3324 unsigned HOST_WIDE_INT *multiplier_ptr,
3325 int *post_shift_ptr, int *lgup_ptr)
3326 {
3327 double_int mhigh, mlow;
3328 int lgup, post_shift;
3329 int pow, pow2;
3330
3331 /* lgup = ceil(log2(divisor)); */
3332 lgup = ceil_log2 (d);
3333
3334 gcc_assert (lgup <= n);
3335
3336 pow = n + lgup;
3337 pow2 = n + lgup - precision;
3338
3339 /* We could handle this with some effort, but this case is much
3340 better handled directly with a scc insn, so rely on caller using
3341 that. */
3342 gcc_assert (pow != HOST_BITS_PER_DOUBLE_INT);
3343
3344 /* mlow = 2^(N + lgup)/d */
3345 double_int val = double_int_zero.set_bit (pow);
3346 mlow = val.div (double_int::from_uhwi (d), true, TRUNC_DIV_EXPR);
3347
3348 /* mhigh = (2^(N + lgup) + 2^(N + lgup - precision))/d */
3349 val |= double_int_zero.set_bit (pow2);
3350 mhigh = val.div (double_int::from_uhwi (d), true, TRUNC_DIV_EXPR);
3351
3352 gcc_assert (!mhigh.high || val.high - d < d);
3353 gcc_assert (mhigh.high <= 1 && mlow.high <= 1);
3354 /* Assert that mlow < mhigh. */
3355 gcc_assert (mlow.ult (mhigh));
3356
3357 /* If precision == N, then mlow, mhigh exceed 2^N
3358 (but they do not exceed 2^(N+1)). */
3359
3360 /* Reduce to lowest terms. */
3361 for (post_shift = lgup; post_shift > 0; post_shift--)
3362 {
3363 int shft = HOST_BITS_PER_WIDE_INT - 1;
3364 unsigned HOST_WIDE_INT ml_lo = (mlow.high << shft) | (mlow.low >> 1);
3365 unsigned HOST_WIDE_INT mh_lo = (mhigh.high << shft) | (mhigh.low >> 1);
3366 if (ml_lo >= mh_lo)
3367 break;
3368
3369 mlow = double_int::from_uhwi (ml_lo);
3370 mhigh = double_int::from_uhwi (mh_lo);
3371 }
3372
3373 *post_shift_ptr = post_shift;
3374 *lgup_ptr = lgup;
3375 if (n < HOST_BITS_PER_WIDE_INT)
3376 {
3377 unsigned HOST_WIDE_INT mask = ((unsigned HOST_WIDE_INT) 1 << n) - 1;
3378 *multiplier_ptr = mhigh.low & mask;
3379 return mhigh.low >= mask;
3380 }
3381 else
3382 {
3383 *multiplier_ptr = mhigh.low;
3384 return mhigh.high;
3385 }
3386 }
3387
3388 /* Compute the inverse of X mod 2**n, i.e., find Y such that X * Y is
3389 congruent to 1 (mod 2**N). */
3390
3391 static unsigned HOST_WIDE_INT
3392 invert_mod2n (unsigned HOST_WIDE_INT x, int n)
3393 {
3394 /* Solve x*y == 1 (mod 2^n), where x is odd. Return y. */
3395
3396 /* The algorithm notes that the choice y = x satisfies
3397 x*y == 1 mod 2^3, since x is assumed odd.
3398 Each iteration doubles the number of bits of significance in y. */
3399
3400 unsigned HOST_WIDE_INT mask;
3401 unsigned HOST_WIDE_INT y = x;
3402 int nbit = 3;
3403
3404 mask = (n == HOST_BITS_PER_WIDE_INT
3405 ? ~(unsigned HOST_WIDE_INT) 0
3406 : ((unsigned HOST_WIDE_INT) 1 << n) - 1);
3407
3408 while (nbit < n)
3409 {
3410 y = y * (2 - x*y) & mask; /* Modulo 2^N */
3411 nbit *= 2;
3412 }
3413 return y;
3414 }
3415
3416 /* Emit code to adjust ADJ_OPERAND after multiplication of wrong signedness
3417 flavor of OP0 and OP1. ADJ_OPERAND is already the high half of the
3418 product OP0 x OP1. If UNSIGNEDP is nonzero, adjust the signed product
3419 to become unsigned, if UNSIGNEDP is zero, adjust the unsigned product to
3420 become signed.
3421
3422 The result is put in TARGET if that is convenient.
3423
3424 MODE is the mode of operation. */
3425
3426 rtx
3427 expand_mult_highpart_adjust (enum machine_mode mode, rtx adj_operand, rtx op0,
3428 rtx op1, rtx target, int unsignedp)
3429 {
3430 rtx tem;
3431 enum rtx_code adj_code = unsignedp ? PLUS : MINUS;
3432
3433 tem = expand_shift (RSHIFT_EXPR, mode, op0,
3434 GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3435 tem = expand_and (mode, tem, op1, NULL_RTX);
3436 adj_operand
3437 = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3438 adj_operand);
3439
3440 tem = expand_shift (RSHIFT_EXPR, mode, op1,
3441 GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3442 tem = expand_and (mode, tem, op0, NULL_RTX);
3443 target = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3444 target);
3445
3446 return target;
3447 }
3448
3449 /* Subroutine of expmed_mult_highpart. Return the MODE high part of OP. */
3450
3451 static rtx
3452 extract_high_half (enum machine_mode mode, rtx op)
3453 {
3454 enum machine_mode wider_mode;
3455
3456 if (mode == word_mode)
3457 return gen_highpart (mode, op);
3458
3459 gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3460
3461 wider_mode = GET_MODE_WIDER_MODE (mode);
3462 op = expand_shift (RSHIFT_EXPR, wider_mode, op,
3463 GET_MODE_BITSIZE (mode), 0, 1);
3464 return convert_modes (mode, wider_mode, op, 0);
3465 }
3466
3467 /* Like expmed_mult_highpart, but only consider using a multiplication
3468 optab. OP1 is an rtx for the constant operand. */
3469
3470 static rtx
3471 expmed_mult_highpart_optab (enum machine_mode mode, rtx op0, rtx op1,
3472 rtx target, int unsignedp, int max_cost)
3473 {
3474 rtx narrow_op1 = gen_int_mode (INTVAL (op1), mode);
3475 enum machine_mode wider_mode;
3476 optab moptab;
3477 rtx tem;
3478 int size;
3479 bool speed = optimize_insn_for_speed_p ();
3480
3481 gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3482
3483 wider_mode = GET_MODE_WIDER_MODE (mode);
3484 size = GET_MODE_BITSIZE (mode);
3485
3486 /* Firstly, try using a multiplication insn that only generates the needed
3487 high part of the product, and in the sign flavor of unsignedp. */
3488 if (mul_highpart_cost (speed, mode) < max_cost)
3489 {
3490 moptab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
3491 tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3492 unsignedp, OPTAB_DIRECT);
3493 if (tem)
3494 return tem;
3495 }
3496
3497 /* Secondly, same as above, but use sign flavor opposite of unsignedp.
3498 Need to adjust the result after the multiplication. */
3499 if (size - 1 < BITS_PER_WORD
3500 && (mul_highpart_cost (speed, mode)
3501 + 2 * shift_cost (speed, mode, size-1)
3502 + 4 * add_cost (speed, mode) < max_cost))
3503 {
3504 moptab = unsignedp ? smul_highpart_optab : umul_highpart_optab;
3505 tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3506 unsignedp, OPTAB_DIRECT);
3507 if (tem)
3508 /* We used the wrong signedness. Adjust the result. */
3509 return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3510 tem, unsignedp);
3511 }
3512
3513 /* Try widening multiplication. */
3514 moptab = unsignedp ? umul_widen_optab : smul_widen_optab;
3515 if (widening_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3516 && mul_widen_cost (speed, wider_mode) < max_cost)
3517 {
3518 tem = expand_binop (wider_mode, moptab, op0, narrow_op1, 0,
3519 unsignedp, OPTAB_WIDEN);
3520 if (tem)
3521 return extract_high_half (mode, tem);
3522 }
3523
3524 /* Try widening the mode and perform a non-widening multiplication. */
3525 if (optab_handler (smul_optab, wider_mode) != CODE_FOR_nothing
3526 && size - 1 < BITS_PER_WORD
3527 && (mul_cost (speed, wider_mode) + shift_cost (speed, mode, size-1)
3528 < max_cost))
3529 {
3530 rtx insns, wop0, wop1;
3531
3532 /* We need to widen the operands, for example to ensure the
3533 constant multiplier is correctly sign or zero extended.
3534 Use a sequence to clean-up any instructions emitted by
3535 the conversions if things don't work out. */
3536 start_sequence ();
3537 wop0 = convert_modes (wider_mode, mode, op0, unsignedp);
3538 wop1 = convert_modes (wider_mode, mode, op1, unsignedp);
3539 tem = expand_binop (wider_mode, smul_optab, wop0, wop1, 0,
3540 unsignedp, OPTAB_WIDEN);
3541 insns = get_insns ();
3542 end_sequence ();
3543
3544 if (tem)
3545 {
3546 emit_insn (insns);
3547 return extract_high_half (mode, tem);
3548 }
3549 }
3550
3551 /* Try widening multiplication of opposite signedness, and adjust. */
3552 moptab = unsignedp ? smul_widen_optab : umul_widen_optab;
3553 if (widening_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3554 && size - 1 < BITS_PER_WORD
3555 && (mul_widen_cost (speed, wider_mode)
3556 + 2 * shift_cost (speed, mode, size-1)
3557 + 4 * add_cost (speed, mode) < max_cost))
3558 {
3559 tem = expand_binop (wider_mode, moptab, op0, narrow_op1,
3560 NULL_RTX, ! unsignedp, OPTAB_WIDEN);
3561 if (tem != 0)
3562 {
3563 tem = extract_high_half (mode, tem);
3564 /* We used the wrong signedness. Adjust the result. */
3565 return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3566 target, unsignedp);
3567 }
3568 }
3569
3570 return 0;
3571 }
3572
3573 /* Emit code to multiply OP0 and OP1 (where OP1 is an integer constant),
3574 putting the high half of the result in TARGET if that is convenient,
3575 and return where the result is. If the operation can not be performed,
3576 0 is returned.
3577
3578 MODE is the mode of operation and result.
3579
3580 UNSIGNEDP nonzero means unsigned multiply.
3581
3582 MAX_COST is the total allowed cost for the expanded RTL. */
3583
3584 static rtx
3585 expmed_mult_highpart (enum machine_mode mode, rtx op0, rtx op1,
3586 rtx target, int unsignedp, int max_cost)
3587 {
3588 enum machine_mode wider_mode = GET_MODE_WIDER_MODE (mode);
3589 unsigned HOST_WIDE_INT cnst1;
3590 int extra_cost;
3591 bool sign_adjust = false;
3592 enum mult_variant variant;
3593 struct algorithm alg;
3594 rtx tem;
3595 bool speed = optimize_insn_for_speed_p ();
3596
3597 gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3598 /* We can't support modes wider than HOST_BITS_PER_INT. */
3599 gcc_assert (HWI_COMPUTABLE_MODE_P (mode));
3600
3601 cnst1 = INTVAL (op1) & GET_MODE_MASK (mode);
3602
3603 /* We can't optimize modes wider than BITS_PER_WORD.
3604 ??? We might be able to perform double-word arithmetic if
3605 mode == word_mode, however all the cost calculations in
3606 synth_mult etc. assume single-word operations. */
3607 if (GET_MODE_BITSIZE (wider_mode) > BITS_PER_WORD)
3608 return expmed_mult_highpart_optab (mode, op0, op1, target,
3609 unsignedp, max_cost);
3610
3611 extra_cost = shift_cost (speed, mode, GET_MODE_BITSIZE (mode) - 1);
3612
3613 /* Check whether we try to multiply by a negative constant. */
3614 if (!unsignedp && ((cnst1 >> (GET_MODE_BITSIZE (mode) - 1)) & 1))
3615 {
3616 sign_adjust = true;
3617 extra_cost += add_cost (speed, mode);
3618 }
3619
3620 /* See whether shift/add multiplication is cheap enough. */
3621 if (choose_mult_variant (wider_mode, cnst1, &alg, &variant,
3622 max_cost - extra_cost))
3623 {
3624 /* See whether the specialized multiplication optabs are
3625 cheaper than the shift/add version. */
3626 tem = expmed_mult_highpart_optab (mode, op0, op1, target, unsignedp,
3627 alg.cost.cost + extra_cost);
3628 if (tem)
3629 return tem;
3630
3631 tem = convert_to_mode (wider_mode, op0, unsignedp);
3632 tem = expand_mult_const (wider_mode, tem, cnst1, 0, &alg, variant);
3633 tem = extract_high_half (mode, tem);
3634
3635 /* Adjust result for signedness. */
3636 if (sign_adjust)
3637 tem = force_operand (gen_rtx_MINUS (mode, tem, op0), tem);
3638
3639 return tem;
3640 }
3641 return expmed_mult_highpart_optab (mode, op0, op1, target,
3642 unsignedp, max_cost);
3643 }
3644
3645
3646 /* Expand signed modulus of OP0 by a power of two D in mode MODE. */
3647
3648 static rtx
3649 expand_smod_pow2 (enum machine_mode mode, rtx op0, HOST_WIDE_INT d)
3650 {
3651 unsigned HOST_WIDE_INT masklow, maskhigh;
3652 rtx result, temp, shift, label;
3653 int logd;
3654
3655 logd = floor_log2 (d);
3656 result = gen_reg_rtx (mode);
3657
3658 /* Avoid conditional branches when they're expensive. */
3659 if (BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2
3660 && optimize_insn_for_speed_p ())
3661 {
3662 rtx signmask = emit_store_flag (result, LT, op0, const0_rtx,
3663 mode, 0, -1);
3664 if (signmask)
3665 {
3666 signmask = force_reg (mode, signmask);
3667 masklow = ((HOST_WIDE_INT) 1 << logd) - 1;
3668 shift = GEN_INT (GET_MODE_BITSIZE (mode) - logd);
3669
3670 /* Use the rtx_cost of a LSHIFTRT instruction to determine
3671 which instruction sequence to use. If logical right shifts
3672 are expensive the use 2 XORs, 2 SUBs and an AND, otherwise
3673 use a LSHIFTRT, 1 ADD, 1 SUB and an AND. */
3674
3675 temp = gen_rtx_LSHIFTRT (mode, result, shift);
3676 if (optab_handler (lshr_optab, mode) == CODE_FOR_nothing
3677 || (set_src_cost (temp, optimize_insn_for_speed_p ())
3678 > COSTS_N_INSNS (2)))
3679 {
3680 temp = expand_binop (mode, xor_optab, op0, signmask,
3681 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3682 temp = expand_binop (mode, sub_optab, temp, signmask,
3683 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3684 temp = expand_binop (mode, and_optab, temp, GEN_INT (masklow),
3685 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3686 temp = expand_binop (mode, xor_optab, temp, signmask,
3687 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3688 temp = expand_binop (mode, sub_optab, temp, signmask,
3689 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3690 }
3691 else
3692 {
3693 signmask = expand_binop (mode, lshr_optab, signmask, shift,
3694 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3695 signmask = force_reg (mode, signmask);
3696
3697 temp = expand_binop (mode, add_optab, op0, signmask,
3698 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3699 temp = expand_binop (mode, and_optab, temp, GEN_INT (masklow),
3700 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3701 temp = expand_binop (mode, sub_optab, temp, signmask,
3702 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3703 }
3704 return temp;
3705 }
3706 }
3707
3708 /* Mask contains the mode's signbit and the significant bits of the
3709 modulus. By including the signbit in the operation, many targets
3710 can avoid an explicit compare operation in the following comparison
3711 against zero. */
3712
3713 masklow = ((HOST_WIDE_INT) 1 << logd) - 1;
3714 if (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
3715 {
3716 masklow |= (HOST_WIDE_INT) -1 << (GET_MODE_BITSIZE (mode) - 1);
3717 maskhigh = -1;
3718 }
3719 else
3720 maskhigh = (HOST_WIDE_INT) -1
3721 << (GET_MODE_BITSIZE (mode) - HOST_BITS_PER_WIDE_INT - 1);
3722
3723 temp = expand_binop (mode, and_optab, op0,
3724 immed_double_const (masklow, maskhigh, mode),
3725 result, 1, OPTAB_LIB_WIDEN);
3726 if (temp != result)
3727 emit_move_insn (result, temp);
3728
3729 label = gen_label_rtx ();
3730 do_cmp_and_jump (result, const0_rtx, GE, mode, label);
3731
3732 temp = expand_binop (mode, sub_optab, result, const1_rtx, result,
3733 0, OPTAB_LIB_WIDEN);
3734 masklow = (HOST_WIDE_INT) -1 << logd;
3735 maskhigh = -1;
3736 temp = expand_binop (mode, ior_optab, temp,
3737 immed_double_const (masklow, maskhigh, mode),
3738 result, 1, OPTAB_LIB_WIDEN);
3739 temp = expand_binop (mode, add_optab, temp, const1_rtx, result,
3740 0, OPTAB_LIB_WIDEN);
3741 if (temp != result)
3742 emit_move_insn (result, temp);
3743 emit_label (label);
3744 return result;
3745 }
3746
3747 /* Expand signed division of OP0 by a power of two D in mode MODE.
3748 This routine is only called for positive values of D. */
3749
3750 static rtx
3751 expand_sdiv_pow2 (enum machine_mode mode, rtx op0, HOST_WIDE_INT d)
3752 {
3753 rtx temp, label;
3754 int logd;
3755
3756 logd = floor_log2 (d);
3757
3758 if (d == 2
3759 && BRANCH_COST (optimize_insn_for_speed_p (),
3760 false) >= 1)
3761 {
3762 temp = gen_reg_rtx (mode);
3763 temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, 1);
3764 temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3765 0, OPTAB_LIB_WIDEN);
3766 return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3767 }
3768
3769 #ifdef HAVE_conditional_move
3770 if (BRANCH_COST (optimize_insn_for_speed_p (), false)
3771 >= 2)
3772 {
3773 rtx temp2;
3774
3775 /* ??? emit_conditional_move forces a stack adjustment via
3776 compare_from_rtx so, if the sequence is discarded, it will
3777 be lost. Do it now instead. */
3778 do_pending_stack_adjust ();
3779
3780 start_sequence ();
3781 temp2 = copy_to_mode_reg (mode, op0);
3782 temp = expand_binop (mode, add_optab, temp2, GEN_INT (d-1),
3783 NULL_RTX, 0, OPTAB_LIB_WIDEN);
3784 temp = force_reg (mode, temp);
3785
3786 /* Construct "temp2 = (temp2 < 0) ? temp : temp2". */
3787 temp2 = emit_conditional_move (temp2, LT, temp2, const0_rtx,
3788 mode, temp, temp2, mode, 0);
3789 if (temp2)
3790 {
3791 rtx seq = get_insns ();
3792 end_sequence ();
3793 emit_insn (seq);
3794 return expand_shift (RSHIFT_EXPR, mode, temp2, logd, NULL_RTX, 0);
3795 }
3796 end_sequence ();
3797 }
3798 #endif
3799
3800 if (BRANCH_COST (optimize_insn_for_speed_p (),
3801 false) >= 2)
3802 {
3803 int ushift = GET_MODE_BITSIZE (mode) - logd;
3804
3805 temp = gen_reg_rtx (mode);
3806 temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, -1);
3807 if (shift_cost (optimize_insn_for_speed_p (), mode, ushift)
3808 > COSTS_N_INSNS (1))
3809 temp = expand_binop (mode, and_optab, temp, GEN_INT (d - 1),
3810 NULL_RTX, 0, OPTAB_LIB_WIDEN);
3811 else
3812 temp = expand_shift (RSHIFT_EXPR, mode, temp,
3813 ushift, NULL_RTX, 1);
3814 temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3815 0, OPTAB_LIB_WIDEN);
3816 return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3817 }
3818
3819 label = gen_label_rtx ();
3820 temp = copy_to_mode_reg (mode, op0);
3821 do_cmp_and_jump (temp, const0_rtx, GE, mode, label);
3822 expand_inc (temp, GEN_INT (d - 1));
3823 emit_label (label);
3824 return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3825 }
3826 \f
3827 /* Emit the code to divide OP0 by OP1, putting the result in TARGET
3828 if that is convenient, and returning where the result is.
3829 You may request either the quotient or the remainder as the result;
3830 specify REM_FLAG nonzero to get the remainder.
3831
3832 CODE is the expression code for which kind of division this is;
3833 it controls how rounding is done. MODE is the machine mode to use.
3834 UNSIGNEDP nonzero means do unsigned division. */
3835
3836 /* ??? For CEIL_MOD_EXPR, can compute incorrect remainder with ANDI
3837 and then correct it by or'ing in missing high bits
3838 if result of ANDI is nonzero.
3839 For ROUND_MOD_EXPR, can use ANDI and then sign-extend the result.
3840 This could optimize to a bfexts instruction.
3841 But C doesn't use these operations, so their optimizations are
3842 left for later. */
3843 /* ??? For modulo, we don't actually need the highpart of the first product,
3844 the low part will do nicely. And for small divisors, the second multiply
3845 can also be a low-part only multiply or even be completely left out.
3846 E.g. to calculate the remainder of a division by 3 with a 32 bit
3847 multiply, multiply with 0x55555556 and extract the upper two bits;
3848 the result is exact for inputs up to 0x1fffffff.
3849 The input range can be reduced by using cross-sum rules.
3850 For odd divisors >= 3, the following table gives right shift counts
3851 so that if a number is shifted by an integer multiple of the given
3852 amount, the remainder stays the same:
3853 2, 4, 3, 6, 10, 12, 4, 8, 18, 6, 11, 20, 18, 0, 5, 10, 12, 0, 12, 20,
3854 14, 12, 23, 21, 8, 0, 20, 18, 0, 0, 6, 12, 0, 22, 0, 18, 20, 30, 0, 0,
3855 0, 8, 0, 11, 12, 10, 36, 0, 30, 0, 0, 12, 0, 0, 0, 0, 44, 12, 24, 0,
3856 20, 0, 7, 14, 0, 18, 36, 0, 0, 46, 60, 0, 42, 0, 15, 24, 20, 0, 0, 33,
3857 0, 20, 0, 0, 18, 0, 60, 0, 0, 0, 0, 0, 40, 18, 0, 0, 12
3858
3859 Cross-sum rules for even numbers can be derived by leaving as many bits
3860 to the right alone as the divisor has zeros to the right.
3861 E.g. if x is an unsigned 32 bit number:
3862 (x mod 12) == (((x & 1023) + ((x >> 8) & ~3)) * 0x15555558 >> 2 * 3) >> 28
3863 */
3864
3865 rtx
3866 expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode,
3867 rtx op0, rtx op1, rtx target, int unsignedp)
3868 {
3869 enum machine_mode compute_mode;
3870 rtx tquotient;
3871 rtx quotient = 0, remainder = 0;
3872 rtx last;
3873 int size;
3874 rtx insn;
3875 optab optab1, optab2;
3876 int op1_is_constant, op1_is_pow2 = 0;
3877 int max_cost, extra_cost;
3878 static HOST_WIDE_INT last_div_const = 0;
3879 static HOST_WIDE_INT ext_op1;
3880 bool speed = optimize_insn_for_speed_p ();
3881
3882 op1_is_constant = CONST_INT_P (op1);
3883 if (op1_is_constant)
3884 {
3885 ext_op1 = INTVAL (op1);
3886 if (unsignedp)
3887 ext_op1 &= GET_MODE_MASK (mode);
3888 op1_is_pow2 = ((EXACT_POWER_OF_2_OR_ZERO_P (ext_op1)
3889 || (! unsignedp && EXACT_POWER_OF_2_OR_ZERO_P (-ext_op1))));
3890 }
3891
3892 /*
3893 This is the structure of expand_divmod:
3894
3895 First comes code to fix up the operands so we can perform the operations
3896 correctly and efficiently.
3897
3898 Second comes a switch statement with code specific for each rounding mode.
3899 For some special operands this code emits all RTL for the desired
3900 operation, for other cases, it generates only a quotient and stores it in
3901 QUOTIENT. The case for trunc division/remainder might leave quotient = 0,
3902 to indicate that it has not done anything.
3903
3904 Last comes code that finishes the operation. If QUOTIENT is set and
3905 REM_FLAG is set, the remainder is computed as OP0 - QUOTIENT * OP1. If
3906 QUOTIENT is not set, it is computed using trunc rounding.
3907
3908 We try to generate special code for division and remainder when OP1 is a
3909 constant. If |OP1| = 2**n we can use shifts and some other fast
3910 operations. For other values of OP1, we compute a carefully selected
3911 fixed-point approximation m = 1/OP1, and generate code that multiplies OP0
3912 by m.
3913
3914 In all cases but EXACT_DIV_EXPR, this multiplication requires the upper
3915 half of the product. Different strategies for generating the product are
3916 implemented in expmed_mult_highpart.
3917
3918 If what we actually want is the remainder, we generate that by another
3919 by-constant multiplication and a subtraction. */
3920
3921 /* We shouldn't be called with OP1 == const1_rtx, but some of the
3922 code below will malfunction if we are, so check here and handle
3923 the special case if so. */
3924 if (op1 == const1_rtx)
3925 return rem_flag ? const0_rtx : op0;
3926
3927 /* When dividing by -1, we could get an overflow.
3928 negv_optab can handle overflows. */
3929 if (! unsignedp && op1 == constm1_rtx)
3930 {
3931 if (rem_flag)
3932 return const0_rtx;
3933 return expand_unop (mode, flag_trapv && GET_MODE_CLASS(mode) == MODE_INT
3934 ? negv_optab : neg_optab, op0, target, 0);
3935 }
3936
3937 if (target
3938 /* Don't use the function value register as a target
3939 since we have to read it as well as write it,
3940 and function-inlining gets confused by this. */
3941 && ((REG_P (target) && REG_FUNCTION_VALUE_P (target))
3942 /* Don't clobber an operand while doing a multi-step calculation. */
3943 || ((rem_flag || op1_is_constant)
3944 && (reg_mentioned_p (target, op0)
3945 || (MEM_P (op0) && MEM_P (target))))
3946 || reg_mentioned_p (target, op1)
3947 || (MEM_P (op1) && MEM_P (target))))
3948 target = 0;
3949
3950 /* Get the mode in which to perform this computation. Normally it will
3951 be MODE, but sometimes we can't do the desired operation in MODE.
3952 If so, pick a wider mode in which we can do the operation. Convert
3953 to that mode at the start to avoid repeated conversions.
3954
3955 First see what operations we need. These depend on the expression
3956 we are evaluating. (We assume that divxx3 insns exist under the
3957 same conditions that modxx3 insns and that these insns don't normally
3958 fail. If these assumptions are not correct, we may generate less
3959 efficient code in some cases.)
3960
3961 Then see if we find a mode in which we can open-code that operation
3962 (either a division, modulus, or shift). Finally, check for the smallest
3963 mode for which we can do the operation with a library call. */
3964
3965 /* We might want to refine this now that we have division-by-constant
3966 optimization. Since expmed_mult_highpart tries so many variants, it is
3967 not straightforward to generalize this. Maybe we should make an array
3968 of possible modes in init_expmed? Save this for GCC 2.7. */
3969
3970 optab1 = ((op1_is_pow2 && op1 != const0_rtx)
3971 ? (unsignedp ? lshr_optab : ashr_optab)
3972 : (unsignedp ? udiv_optab : sdiv_optab));
3973 optab2 = ((op1_is_pow2 && op1 != const0_rtx)
3974 ? optab1
3975 : (unsignedp ? udivmod_optab : sdivmod_optab));
3976
3977 for (compute_mode = mode; compute_mode != VOIDmode;
3978 compute_mode = GET_MODE_WIDER_MODE (compute_mode))
3979 if (optab_handler (optab1, compute_mode) != CODE_FOR_nothing
3980 || optab_handler (optab2, compute_mode) != CODE_FOR_nothing)
3981 break;
3982
3983 if (compute_mode == VOIDmode)
3984 for (compute_mode = mode; compute_mode != VOIDmode;
3985 compute_mode = GET_MODE_WIDER_MODE (compute_mode))
3986 if (optab_libfunc (optab1, compute_mode)
3987 || optab_libfunc (optab2, compute_mode))
3988 break;
3989
3990 /* If we still couldn't find a mode, use MODE, but expand_binop will
3991 probably die. */
3992 if (compute_mode == VOIDmode)
3993 compute_mode = mode;
3994
3995 if (target && GET_MODE (target) == compute_mode)
3996 tquotient = target;
3997 else
3998 tquotient = gen_reg_rtx (compute_mode);
3999
4000 size = GET_MODE_BITSIZE (compute_mode);
4001 #if 0
4002 /* It should be possible to restrict the precision to GET_MODE_BITSIZE
4003 (mode), and thereby get better code when OP1 is a constant. Do that
4004 later. It will require going over all usages of SIZE below. */
4005 size = GET_MODE_BITSIZE (mode);
4006 #endif
4007
4008 /* Only deduct something for a REM if the last divide done was
4009 for a different constant. Then set the constant of the last
4010 divide. */
4011 max_cost = (unsignedp
4012 ? udiv_cost (speed, compute_mode)
4013 : sdiv_cost (speed, compute_mode));
4014 if (rem_flag && ! (last_div_const != 0 && op1_is_constant
4015 && INTVAL (op1) == last_div_const))
4016 max_cost -= (mul_cost (speed, compute_mode)
4017 + add_cost (speed, compute_mode));
4018
4019 last_div_const = ! rem_flag && op1_is_constant ? INTVAL (op1) : 0;
4020
4021 /* Now convert to the best mode to use. */
4022 if (compute_mode != mode)
4023 {
4024 op0 = convert_modes (compute_mode, mode, op0, unsignedp);
4025 op1 = convert_modes (compute_mode, mode, op1, unsignedp);
4026
4027 /* convert_modes may have placed op1 into a register, so we
4028 must recompute the following. */
4029 op1_is_constant = CONST_INT_P (op1);
4030 op1_is_pow2 = (op1_is_constant
4031 && ((EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4032 || (! unsignedp
4033 && EXACT_POWER_OF_2_OR_ZERO_P (-INTVAL (op1)))))) ;
4034 }
4035
4036 /* If one of the operands is a volatile MEM, copy it into a register. */
4037
4038 if (MEM_P (op0) && MEM_VOLATILE_P (op0))
4039 op0 = force_reg (compute_mode, op0);
4040 if (MEM_P (op1) && MEM_VOLATILE_P (op1))
4041 op1 = force_reg (compute_mode, op1);
4042
4043 /* If we need the remainder or if OP1 is constant, we need to
4044 put OP0 in a register in case it has any queued subexpressions. */
4045 if (rem_flag || op1_is_constant)
4046 op0 = force_reg (compute_mode, op0);
4047
4048 last = get_last_insn ();
4049
4050 /* Promote floor rounding to trunc rounding for unsigned operations. */
4051 if (unsignedp)
4052 {
4053 if (code == FLOOR_DIV_EXPR)
4054 code = TRUNC_DIV_EXPR;
4055 if (code == FLOOR_MOD_EXPR)
4056 code = TRUNC_MOD_EXPR;
4057 if (code == EXACT_DIV_EXPR && op1_is_pow2)
4058 code = TRUNC_DIV_EXPR;
4059 }
4060
4061 if (op1 != const0_rtx)
4062 switch (code)
4063 {
4064 case TRUNC_MOD_EXPR:
4065 case TRUNC_DIV_EXPR:
4066 if (op1_is_constant)
4067 {
4068 if (unsignedp)
4069 {
4070 unsigned HOST_WIDE_INT mh, ml;
4071 int pre_shift, post_shift;
4072 int dummy;
4073 unsigned HOST_WIDE_INT d = (INTVAL (op1)
4074 & GET_MODE_MASK (compute_mode));
4075
4076 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4077 {
4078 pre_shift = floor_log2 (d);
4079 if (rem_flag)
4080 {
4081 remainder
4082 = expand_binop (compute_mode, and_optab, op0,
4083 GEN_INT (((HOST_WIDE_INT) 1 << pre_shift) - 1),
4084 remainder, 1,
4085 OPTAB_LIB_WIDEN);
4086 if (remainder)
4087 return gen_lowpart (mode, remainder);
4088 }
4089 quotient = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4090 pre_shift, tquotient, 1);
4091 }
4092 else if (size <= HOST_BITS_PER_WIDE_INT)
4093 {
4094 if (d >= ((unsigned HOST_WIDE_INT) 1 << (size - 1)))
4095 {
4096 /* Most significant bit of divisor is set; emit an scc
4097 insn. */
4098 quotient = emit_store_flag_force (tquotient, GEU, op0, op1,
4099 compute_mode, 1, 1);
4100 }
4101 else
4102 {
4103 /* Find a suitable multiplier and right shift count
4104 instead of multiplying with D. */
4105
4106 mh = choose_multiplier (d, size, size,
4107 &ml, &post_shift, &dummy);
4108
4109 /* If the suggested multiplier is more than SIZE bits,
4110 we can do better for even divisors, using an
4111 initial right shift. */
4112 if (mh != 0 && (d & 1) == 0)
4113 {
4114 pre_shift = floor_log2 (d & -d);
4115 mh = choose_multiplier (d >> pre_shift, size,
4116 size - pre_shift,
4117 &ml, &post_shift, &dummy);
4118 gcc_assert (!mh);
4119 }
4120 else
4121 pre_shift = 0;
4122
4123 if (mh != 0)
4124 {
4125 rtx t1, t2, t3, t4;
4126
4127 if (post_shift - 1 >= BITS_PER_WORD)
4128 goto fail1;
4129
4130 extra_cost
4131 = (shift_cost (speed, compute_mode, post_shift - 1)
4132 + shift_cost (speed, compute_mode, 1)
4133 + 2 * add_cost (speed, compute_mode));
4134 t1 = expmed_mult_highpart (compute_mode, op0,
4135 GEN_INT (ml),
4136 NULL_RTX, 1,
4137 max_cost - extra_cost);
4138 if (t1 == 0)
4139 goto fail1;
4140 t2 = force_operand (gen_rtx_MINUS (compute_mode,
4141 op0, t1),
4142 NULL_RTX);
4143 t3 = expand_shift (RSHIFT_EXPR, compute_mode,
4144 t2, 1, NULL_RTX, 1);
4145 t4 = force_operand (gen_rtx_PLUS (compute_mode,
4146 t1, t3),
4147 NULL_RTX);
4148 quotient = expand_shift
4149 (RSHIFT_EXPR, compute_mode, t4,
4150 post_shift - 1, tquotient, 1);
4151 }
4152 else
4153 {
4154 rtx t1, t2;
4155
4156 if (pre_shift >= BITS_PER_WORD
4157 || post_shift >= BITS_PER_WORD)
4158 goto fail1;
4159
4160 t1 = expand_shift
4161 (RSHIFT_EXPR, compute_mode, op0,
4162 pre_shift, NULL_RTX, 1);
4163 extra_cost
4164 = (shift_cost (speed, compute_mode, pre_shift)
4165 + shift_cost (speed, compute_mode, post_shift));
4166 t2 = expmed_mult_highpart (compute_mode, t1,
4167 GEN_INT (ml),
4168 NULL_RTX, 1,
4169 max_cost - extra_cost);
4170 if (t2 == 0)
4171 goto fail1;
4172 quotient = expand_shift
4173 (RSHIFT_EXPR, compute_mode, t2,
4174 post_shift, tquotient, 1);
4175 }
4176 }
4177 }
4178 else /* Too wide mode to use tricky code */
4179 break;
4180
4181 insn = get_last_insn ();
4182 if (insn != last)
4183 set_dst_reg_note (insn, REG_EQUAL,
4184 gen_rtx_UDIV (compute_mode, op0, op1),
4185 quotient);
4186 }
4187 else /* TRUNC_DIV, signed */
4188 {
4189 unsigned HOST_WIDE_INT ml;
4190 int lgup, post_shift;
4191 rtx mlr;
4192 HOST_WIDE_INT d = INTVAL (op1);
4193 unsigned HOST_WIDE_INT abs_d;
4194
4195 /* Since d might be INT_MIN, we have to cast to
4196 unsigned HOST_WIDE_INT before negating to avoid
4197 undefined signed overflow. */
4198 abs_d = (d >= 0
4199 ? (unsigned HOST_WIDE_INT) d
4200 : - (unsigned HOST_WIDE_INT) d);
4201
4202 /* n rem d = n rem -d */
4203 if (rem_flag && d < 0)
4204 {
4205 d = abs_d;
4206 op1 = gen_int_mode (abs_d, compute_mode);
4207 }
4208
4209 if (d == 1)
4210 quotient = op0;
4211 else if (d == -1)
4212 quotient = expand_unop (compute_mode, neg_optab, op0,
4213 tquotient, 0);
4214 else if (HOST_BITS_PER_WIDE_INT >= size
4215 && abs_d == (unsigned HOST_WIDE_INT) 1 << (size - 1))
4216 {
4217 /* This case is not handled correctly below. */
4218 quotient = emit_store_flag (tquotient, EQ, op0, op1,
4219 compute_mode, 1, 1);
4220 if (quotient == 0)
4221 goto fail1;
4222 }
4223 else if (EXACT_POWER_OF_2_OR_ZERO_P (d)
4224 && (rem_flag
4225 ? smod_pow2_cheap (speed, compute_mode)
4226 : sdiv_pow2_cheap (speed, compute_mode))
4227 /* We assume that cheap metric is true if the
4228 optab has an expander for this mode. */
4229 && ((optab_handler ((rem_flag ? smod_optab
4230 : sdiv_optab),
4231 compute_mode)
4232 != CODE_FOR_nothing)
4233 || (optab_handler (sdivmod_optab,
4234 compute_mode)
4235 != CODE_FOR_nothing)))
4236 ;
4237 else if (EXACT_POWER_OF_2_OR_ZERO_P (abs_d))
4238 {
4239 if (rem_flag)
4240 {
4241 remainder = expand_smod_pow2 (compute_mode, op0, d);
4242 if (remainder)
4243 return gen_lowpart (mode, remainder);
4244 }
4245
4246 if (sdiv_pow2_cheap (speed, compute_mode)
4247 && ((optab_handler (sdiv_optab, compute_mode)
4248 != CODE_FOR_nothing)
4249 || (optab_handler (sdivmod_optab, compute_mode)
4250 != CODE_FOR_nothing)))
4251 quotient = expand_divmod (0, TRUNC_DIV_EXPR,
4252 compute_mode, op0,
4253 gen_int_mode (abs_d,
4254 compute_mode),
4255 NULL_RTX, 0);
4256 else
4257 quotient = expand_sdiv_pow2 (compute_mode, op0, abs_d);
4258
4259 /* We have computed OP0 / abs(OP1). If OP1 is negative,
4260 negate the quotient. */
4261 if (d < 0)
4262 {
4263 insn = get_last_insn ();
4264 if (insn != last
4265 && abs_d < ((unsigned HOST_WIDE_INT) 1
4266 << (HOST_BITS_PER_WIDE_INT - 1)))
4267 set_dst_reg_note (insn, REG_EQUAL,
4268 gen_rtx_DIV (compute_mode, op0,
4269 gen_int_mode
4270 (abs_d,
4271 compute_mode)),
4272 quotient);
4273
4274 quotient = expand_unop (compute_mode, neg_optab,
4275 quotient, quotient, 0);
4276 }
4277 }
4278 else if (size <= HOST_BITS_PER_WIDE_INT)
4279 {
4280 choose_multiplier (abs_d, size, size - 1,
4281 &ml, &post_shift, &lgup);
4282 if (ml < (unsigned HOST_WIDE_INT) 1 << (size - 1))
4283 {
4284 rtx t1, t2, t3;
4285
4286 if (post_shift >= BITS_PER_WORD
4287 || size - 1 >= BITS_PER_WORD)
4288 goto fail1;
4289
4290 extra_cost = (shift_cost (speed, compute_mode, post_shift)
4291 + shift_cost (speed, compute_mode, size - 1)
4292 + add_cost (speed, compute_mode));
4293 t1 = expmed_mult_highpart (compute_mode, op0,
4294 GEN_INT (ml), NULL_RTX, 0,
4295 max_cost - extra_cost);
4296 if (t1 == 0)
4297 goto fail1;
4298 t2 = expand_shift
4299 (RSHIFT_EXPR, compute_mode, t1,
4300 post_shift, NULL_RTX, 0);
4301 t3 = expand_shift
4302 (RSHIFT_EXPR, compute_mode, op0,
4303 size - 1, NULL_RTX, 0);
4304 if (d < 0)
4305 quotient
4306 = force_operand (gen_rtx_MINUS (compute_mode,
4307 t3, t2),
4308 tquotient);
4309 else
4310 quotient
4311 = force_operand (gen_rtx_MINUS (compute_mode,
4312 t2, t3),
4313 tquotient);
4314 }
4315 else
4316 {
4317 rtx t1, t2, t3, t4;
4318
4319 if (post_shift >= BITS_PER_WORD
4320 || size - 1 >= BITS_PER_WORD)
4321 goto fail1;
4322
4323 ml |= (~(unsigned HOST_WIDE_INT) 0) << (size - 1);
4324 mlr = gen_int_mode (ml, compute_mode);
4325 extra_cost = (shift_cost (speed, compute_mode, post_shift)
4326 + shift_cost (speed, compute_mode, size - 1)
4327 + 2 * add_cost (speed, compute_mode));
4328 t1 = expmed_mult_highpart (compute_mode, op0, mlr,
4329 NULL_RTX, 0,
4330 max_cost - extra_cost);
4331 if (t1 == 0)
4332 goto fail1;
4333 t2 = force_operand (gen_rtx_PLUS (compute_mode,
4334 t1, op0),
4335 NULL_RTX);
4336 t3 = expand_shift
4337 (RSHIFT_EXPR, compute_mode, t2,
4338 post_shift, NULL_RTX, 0);
4339 t4 = expand_shift
4340 (RSHIFT_EXPR, compute_mode, op0,
4341 size - 1, NULL_RTX, 0);
4342 if (d < 0)
4343 quotient
4344 = force_operand (gen_rtx_MINUS (compute_mode,
4345 t4, t3),
4346 tquotient);
4347 else
4348 quotient
4349 = force_operand (gen_rtx_MINUS (compute_mode,
4350 t3, t4),
4351 tquotient);
4352 }
4353 }
4354 else /* Too wide mode to use tricky code */
4355 break;
4356
4357 insn = get_last_insn ();
4358 if (insn != last)
4359 set_dst_reg_note (insn, REG_EQUAL,
4360 gen_rtx_DIV (compute_mode, op0, op1),
4361 quotient);
4362 }
4363 break;
4364 }
4365 fail1:
4366 delete_insns_since (last);
4367 break;
4368
4369 case FLOOR_DIV_EXPR:
4370 case FLOOR_MOD_EXPR:
4371 /* We will come here only for signed operations. */
4372 if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4373 {
4374 unsigned HOST_WIDE_INT mh, ml;
4375 int pre_shift, lgup, post_shift;
4376 HOST_WIDE_INT d = INTVAL (op1);
4377
4378 if (d > 0)
4379 {
4380 /* We could just as easily deal with negative constants here,
4381 but it does not seem worth the trouble for GCC 2.6. */
4382 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4383 {
4384 pre_shift = floor_log2 (d);
4385 if (rem_flag)
4386 {
4387 remainder = expand_binop (compute_mode, and_optab, op0,
4388 GEN_INT (((HOST_WIDE_INT) 1 << pre_shift) - 1),
4389 remainder, 0, OPTAB_LIB_WIDEN);
4390 if (remainder)
4391 return gen_lowpart (mode, remainder);
4392 }
4393 quotient = expand_shift
4394 (RSHIFT_EXPR, compute_mode, op0,
4395 pre_shift, tquotient, 0);
4396 }
4397 else
4398 {
4399 rtx t1, t2, t3, t4;
4400
4401 mh = choose_multiplier (d, size, size - 1,
4402 &ml, &post_shift, &lgup);
4403 gcc_assert (!mh);
4404
4405 if (post_shift < BITS_PER_WORD
4406 && size - 1 < BITS_PER_WORD)
4407 {
4408 t1 = expand_shift
4409 (RSHIFT_EXPR, compute_mode, op0,
4410 size - 1, NULL_RTX, 0);
4411 t2 = expand_binop (compute_mode, xor_optab, op0, t1,
4412 NULL_RTX, 0, OPTAB_WIDEN);
4413 extra_cost = (shift_cost (speed, compute_mode, post_shift)
4414 + shift_cost (speed, compute_mode, size - 1)
4415 + 2 * add_cost (speed, compute_mode));
4416 t3 = expmed_mult_highpart (compute_mode, t2,
4417 GEN_INT (ml), NULL_RTX, 1,
4418 max_cost - extra_cost);
4419 if (t3 != 0)
4420 {
4421 t4 = expand_shift
4422 (RSHIFT_EXPR, compute_mode, t3,
4423 post_shift, NULL_RTX, 1);
4424 quotient = expand_binop (compute_mode, xor_optab,
4425 t4, t1, tquotient, 0,
4426 OPTAB_WIDEN);
4427 }
4428 }
4429 }
4430 }
4431 else
4432 {
4433 rtx nsign, t1, t2, t3, t4;
4434 t1 = force_operand (gen_rtx_PLUS (compute_mode,
4435 op0, constm1_rtx), NULL_RTX);
4436 t2 = expand_binop (compute_mode, ior_optab, op0, t1, NULL_RTX,
4437 0, OPTAB_WIDEN);
4438 nsign = expand_shift
4439 (RSHIFT_EXPR, compute_mode, t2,
4440 size - 1, NULL_RTX, 0);
4441 t3 = force_operand (gen_rtx_MINUS (compute_mode, t1, nsign),
4442 NULL_RTX);
4443 t4 = expand_divmod (0, TRUNC_DIV_EXPR, compute_mode, t3, op1,
4444 NULL_RTX, 0);
4445 if (t4)
4446 {
4447 rtx t5;
4448 t5 = expand_unop (compute_mode, one_cmpl_optab, nsign,
4449 NULL_RTX, 0);
4450 quotient = force_operand (gen_rtx_PLUS (compute_mode,
4451 t4, t5),
4452 tquotient);
4453 }
4454 }
4455 }
4456
4457 if (quotient != 0)
4458 break;
4459 delete_insns_since (last);
4460
4461 /* Try using an instruction that produces both the quotient and
4462 remainder, using truncation. We can easily compensate the quotient
4463 or remainder to get floor rounding, once we have the remainder.
4464 Notice that we compute also the final remainder value here,
4465 and return the result right away. */
4466 if (target == 0 || GET_MODE (target) != compute_mode)
4467 target = gen_reg_rtx (compute_mode);
4468
4469 if (rem_flag)
4470 {
4471 remainder
4472 = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4473 quotient = gen_reg_rtx (compute_mode);
4474 }
4475 else
4476 {
4477 quotient
4478 = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4479 remainder = gen_reg_rtx (compute_mode);
4480 }
4481
4482 if (expand_twoval_binop (sdivmod_optab, op0, op1,
4483 quotient, remainder, 0))
4484 {
4485 /* This could be computed with a branch-less sequence.
4486 Save that for later. */
4487 rtx tem;
4488 rtx label = gen_label_rtx ();
4489 do_cmp_and_jump (remainder, const0_rtx, EQ, compute_mode, label);
4490 tem = expand_binop (compute_mode, xor_optab, op0, op1,
4491 NULL_RTX, 0, OPTAB_WIDEN);
4492 do_cmp_and_jump (tem, const0_rtx, GE, compute_mode, label);
4493 expand_dec (quotient, const1_rtx);
4494 expand_inc (remainder, op1);
4495 emit_label (label);
4496 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4497 }
4498
4499 /* No luck with division elimination or divmod. Have to do it
4500 by conditionally adjusting op0 *and* the result. */
4501 {
4502 rtx label1, label2, label3, label4, label5;
4503 rtx adjusted_op0;
4504 rtx tem;
4505
4506 quotient = gen_reg_rtx (compute_mode);
4507 adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4508 label1 = gen_label_rtx ();
4509 label2 = gen_label_rtx ();
4510 label3 = gen_label_rtx ();
4511 label4 = gen_label_rtx ();
4512 label5 = gen_label_rtx ();
4513 do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4514 do_cmp_and_jump (adjusted_op0, const0_rtx, LT, compute_mode, label1);
4515 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4516 quotient, 0, OPTAB_LIB_WIDEN);
4517 if (tem != quotient)
4518 emit_move_insn (quotient, tem);
4519 emit_jump_insn (gen_jump (label5));
4520 emit_barrier ();
4521 emit_label (label1);
4522 expand_inc (adjusted_op0, const1_rtx);
4523 emit_jump_insn (gen_jump (label4));
4524 emit_barrier ();
4525 emit_label (label2);
4526 do_cmp_and_jump (adjusted_op0, const0_rtx, GT, compute_mode, label3);
4527 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4528 quotient, 0, OPTAB_LIB_WIDEN);
4529 if (tem != quotient)
4530 emit_move_insn (quotient, tem);
4531 emit_jump_insn (gen_jump (label5));
4532 emit_barrier ();
4533 emit_label (label3);
4534 expand_dec (adjusted_op0, const1_rtx);
4535 emit_label (label4);
4536 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4537 quotient, 0, OPTAB_LIB_WIDEN);
4538 if (tem != quotient)
4539 emit_move_insn (quotient, tem);
4540 expand_dec (quotient, const1_rtx);
4541 emit_label (label5);
4542 }
4543 break;
4544
4545 case CEIL_DIV_EXPR:
4546 case CEIL_MOD_EXPR:
4547 if (unsignedp)
4548 {
4549 if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1)))
4550 {
4551 rtx t1, t2, t3;
4552 unsigned HOST_WIDE_INT d = INTVAL (op1);
4553 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4554 floor_log2 (d), tquotient, 1);
4555 t2 = expand_binop (compute_mode, and_optab, op0,
4556 GEN_INT (d - 1),
4557 NULL_RTX, 1, OPTAB_LIB_WIDEN);
4558 t3 = gen_reg_rtx (compute_mode);
4559 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4560 compute_mode, 1, 1);
4561 if (t3 == 0)
4562 {
4563 rtx lab;
4564 lab = gen_label_rtx ();
4565 do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4566 expand_inc (t1, const1_rtx);
4567 emit_label (lab);
4568 quotient = t1;
4569 }
4570 else
4571 quotient = force_operand (gen_rtx_PLUS (compute_mode,
4572 t1, t3),
4573 tquotient);
4574 break;
4575 }
4576
4577 /* Try using an instruction that produces both the quotient and
4578 remainder, using truncation. We can easily compensate the
4579 quotient or remainder to get ceiling rounding, once we have the
4580 remainder. Notice that we compute also the final remainder
4581 value here, and return the result right away. */
4582 if (target == 0 || GET_MODE (target) != compute_mode)
4583 target = gen_reg_rtx (compute_mode);
4584
4585 if (rem_flag)
4586 {
4587 remainder = (REG_P (target)
4588 ? target : gen_reg_rtx (compute_mode));
4589 quotient = gen_reg_rtx (compute_mode);
4590 }
4591 else
4592 {
4593 quotient = (REG_P (target)
4594 ? target : gen_reg_rtx (compute_mode));
4595 remainder = gen_reg_rtx (compute_mode);
4596 }
4597
4598 if (expand_twoval_binop (udivmod_optab, op0, op1, quotient,
4599 remainder, 1))
4600 {
4601 /* This could be computed with a branch-less sequence.
4602 Save that for later. */
4603 rtx label = gen_label_rtx ();
4604 do_cmp_and_jump (remainder, const0_rtx, EQ,
4605 compute_mode, label);
4606 expand_inc (quotient, const1_rtx);
4607 expand_dec (remainder, op1);
4608 emit_label (label);
4609 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4610 }
4611
4612 /* No luck with division elimination or divmod. Have to do it
4613 by conditionally adjusting op0 *and* the result. */
4614 {
4615 rtx label1, label2;
4616 rtx adjusted_op0, tem;
4617
4618 quotient = gen_reg_rtx (compute_mode);
4619 adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4620 label1 = gen_label_rtx ();
4621 label2 = gen_label_rtx ();
4622 do_cmp_and_jump (adjusted_op0, const0_rtx, NE,
4623 compute_mode, label1);
4624 emit_move_insn (quotient, const0_rtx);
4625 emit_jump_insn (gen_jump (label2));
4626 emit_barrier ();
4627 emit_label (label1);
4628 expand_dec (adjusted_op0, const1_rtx);
4629 tem = expand_binop (compute_mode, udiv_optab, adjusted_op0, op1,
4630 quotient, 1, OPTAB_LIB_WIDEN);
4631 if (tem != quotient)
4632 emit_move_insn (quotient, tem);
4633 expand_inc (quotient, const1_rtx);
4634 emit_label (label2);
4635 }
4636 }
4637 else /* signed */
4638 {
4639 if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4640 && INTVAL (op1) >= 0)
4641 {
4642 /* This is extremely similar to the code for the unsigned case
4643 above. For 2.7 we should merge these variants, but for
4644 2.6.1 I don't want to touch the code for unsigned since that
4645 get used in C. The signed case will only be used by other
4646 languages (Ada). */
4647
4648 rtx t1, t2, t3;
4649 unsigned HOST_WIDE_INT d = INTVAL (op1);
4650 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4651 floor_log2 (d), tquotient, 0);
4652 t2 = expand_binop (compute_mode, and_optab, op0,
4653 GEN_INT (d - 1),
4654 NULL_RTX, 1, OPTAB_LIB_WIDEN);
4655 t3 = gen_reg_rtx (compute_mode);
4656 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4657 compute_mode, 1, 1);
4658 if (t3 == 0)
4659 {
4660 rtx lab;
4661 lab = gen_label_rtx ();
4662 do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4663 expand_inc (t1, const1_rtx);
4664 emit_label (lab);
4665 quotient = t1;
4666 }
4667 else
4668 quotient = force_operand (gen_rtx_PLUS (compute_mode,
4669 t1, t3),
4670 tquotient);
4671 break;
4672 }
4673
4674 /* Try using an instruction that produces both the quotient and
4675 remainder, using truncation. We can easily compensate the
4676 quotient or remainder to get ceiling rounding, once we have the
4677 remainder. Notice that we compute also the final remainder
4678 value here, and return the result right away. */
4679 if (target == 0 || GET_MODE (target) != compute_mode)
4680 target = gen_reg_rtx (compute_mode);
4681 if (rem_flag)
4682 {
4683 remainder= (REG_P (target)
4684 ? target : gen_reg_rtx (compute_mode));
4685 quotient = gen_reg_rtx (compute_mode);
4686 }
4687 else
4688 {
4689 quotient = (REG_P (target)
4690 ? target : gen_reg_rtx (compute_mode));
4691 remainder = gen_reg_rtx (compute_mode);
4692 }
4693
4694 if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient,
4695 remainder, 0))
4696 {
4697 /* This could be computed with a branch-less sequence.
4698 Save that for later. */
4699 rtx tem;
4700 rtx label = gen_label_rtx ();
4701 do_cmp_and_jump (remainder, const0_rtx, EQ,
4702 compute_mode, label);
4703 tem = expand_binop (compute_mode, xor_optab, op0, op1,
4704 NULL_RTX, 0, OPTAB_WIDEN);
4705 do_cmp_and_jump (tem, const0_rtx, LT, compute_mode, label);
4706 expand_inc (quotient, const1_rtx);
4707 expand_dec (remainder, op1);
4708 emit_label (label);
4709 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4710 }
4711
4712 /* No luck with division elimination or divmod. Have to do it
4713 by conditionally adjusting op0 *and* the result. */
4714 {
4715 rtx label1, label2, label3, label4, label5;
4716 rtx adjusted_op0;
4717 rtx tem;
4718
4719 quotient = gen_reg_rtx (compute_mode);
4720 adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4721 label1 = gen_label_rtx ();
4722 label2 = gen_label_rtx ();
4723 label3 = gen_label_rtx ();
4724 label4 = gen_label_rtx ();
4725 label5 = gen_label_rtx ();
4726 do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4727 do_cmp_and_jump (adjusted_op0, const0_rtx, GT,
4728 compute_mode, label1);
4729 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4730 quotient, 0, OPTAB_LIB_WIDEN);
4731 if (tem != quotient)
4732 emit_move_insn (quotient, tem);
4733 emit_jump_insn (gen_jump (label5));
4734 emit_barrier ();
4735 emit_label (label1);
4736 expand_dec (adjusted_op0, const1_rtx);
4737 emit_jump_insn (gen_jump (label4));
4738 emit_barrier ();
4739 emit_label (label2);
4740 do_cmp_and_jump (adjusted_op0, const0_rtx, LT,
4741 compute_mode, label3);
4742 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4743 quotient, 0, OPTAB_LIB_WIDEN);
4744 if (tem != quotient)
4745 emit_move_insn (quotient, tem);
4746 emit_jump_insn (gen_jump (label5));
4747 emit_barrier ();
4748 emit_label (label3);
4749 expand_inc (adjusted_op0, const1_rtx);
4750 emit_label (label4);
4751 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4752 quotient, 0, OPTAB_LIB_WIDEN);
4753 if (tem != quotient)
4754 emit_move_insn (quotient, tem);
4755 expand_inc (quotient, const1_rtx);
4756 emit_label (label5);
4757 }
4758 }
4759 break;
4760
4761 case EXACT_DIV_EXPR:
4762 if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4763 {
4764 HOST_WIDE_INT d = INTVAL (op1);
4765 unsigned HOST_WIDE_INT ml;
4766 int pre_shift;
4767 rtx t1;
4768
4769 pre_shift = floor_log2 (d & -d);
4770 ml = invert_mod2n (d >> pre_shift, size);
4771 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4772 pre_shift, NULL_RTX, unsignedp);
4773 quotient = expand_mult (compute_mode, t1,
4774 gen_int_mode (ml, compute_mode),
4775 NULL_RTX, 1);
4776
4777 insn = get_last_insn ();
4778 set_dst_reg_note (insn, REG_EQUAL,
4779 gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
4780 compute_mode, op0, op1),
4781 quotient);
4782 }
4783 break;
4784
4785 case ROUND_DIV_EXPR:
4786 case ROUND_MOD_EXPR:
4787 if (unsignedp)
4788 {
4789 rtx tem;
4790 rtx label;
4791 label = gen_label_rtx ();
4792 quotient = gen_reg_rtx (compute_mode);
4793 remainder = gen_reg_rtx (compute_mode);
4794 if (expand_twoval_binop (udivmod_optab, op0, op1, quotient, remainder, 1) == 0)
4795 {
4796 rtx tem;
4797 quotient = expand_binop (compute_mode, udiv_optab, op0, op1,
4798 quotient, 1, OPTAB_LIB_WIDEN);
4799 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 1);
4800 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4801 remainder, 1, OPTAB_LIB_WIDEN);
4802 }
4803 tem = plus_constant (compute_mode, op1, -1);
4804 tem = expand_shift (RSHIFT_EXPR, compute_mode, tem, 1, NULL_RTX, 1);
4805 do_cmp_and_jump (remainder, tem, LEU, compute_mode, label);
4806 expand_inc (quotient, const1_rtx);
4807 expand_dec (remainder, op1);
4808 emit_label (label);
4809 }
4810 else
4811 {
4812 rtx abs_rem, abs_op1, tem, mask;
4813 rtx label;
4814 label = gen_label_rtx ();
4815 quotient = gen_reg_rtx (compute_mode);
4816 remainder = gen_reg_rtx (compute_mode);
4817 if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient, remainder, 0) == 0)
4818 {
4819 rtx tem;
4820 quotient = expand_binop (compute_mode, sdiv_optab, op0, op1,
4821 quotient, 0, OPTAB_LIB_WIDEN);
4822 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 0);
4823 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4824 remainder, 0, OPTAB_LIB_WIDEN);
4825 }
4826 abs_rem = expand_abs (compute_mode, remainder, NULL_RTX, 1, 0);
4827 abs_op1 = expand_abs (compute_mode, op1, NULL_RTX, 1, 0);
4828 tem = expand_shift (LSHIFT_EXPR, compute_mode, abs_rem,
4829 1, NULL_RTX, 1);
4830 do_cmp_and_jump (tem, abs_op1, LTU, compute_mode, label);
4831 tem = expand_binop (compute_mode, xor_optab, op0, op1,
4832 NULL_RTX, 0, OPTAB_WIDEN);
4833 mask = expand_shift (RSHIFT_EXPR, compute_mode, tem,
4834 size - 1, NULL_RTX, 0);
4835 tem = expand_binop (compute_mode, xor_optab, mask, const1_rtx,
4836 NULL_RTX, 0, OPTAB_WIDEN);
4837 tem = expand_binop (compute_mode, sub_optab, tem, mask,
4838 NULL_RTX, 0, OPTAB_WIDEN);
4839 expand_inc (quotient, tem);
4840 tem = expand_binop (compute_mode, xor_optab, mask, op1,
4841 NULL_RTX, 0, OPTAB_WIDEN);
4842 tem = expand_binop (compute_mode, sub_optab, tem, mask,
4843 NULL_RTX, 0, OPTAB_WIDEN);
4844 expand_dec (remainder, tem);
4845 emit_label (label);
4846 }
4847 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4848
4849 default:
4850 gcc_unreachable ();
4851 }
4852
4853 if (quotient == 0)
4854 {
4855 if (target && GET_MODE (target) != compute_mode)
4856 target = 0;
4857
4858 if (rem_flag)
4859 {
4860 /* Try to produce the remainder without producing the quotient.
4861 If we seem to have a divmod pattern that does not require widening,
4862 don't try widening here. We should really have a WIDEN argument
4863 to expand_twoval_binop, since what we'd really like to do here is
4864 1) try a mod insn in compute_mode
4865 2) try a divmod insn in compute_mode
4866 3) try a div insn in compute_mode and multiply-subtract to get
4867 remainder
4868 4) try the same things with widening allowed. */
4869 remainder
4870 = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4871 op0, op1, target,
4872 unsignedp,
4873 ((optab_handler (optab2, compute_mode)
4874 != CODE_FOR_nothing)
4875 ? OPTAB_DIRECT : OPTAB_WIDEN));
4876 if (remainder == 0)
4877 {
4878 /* No luck there. Can we do remainder and divide at once
4879 without a library call? */
4880 remainder = gen_reg_rtx (compute_mode);
4881 if (! expand_twoval_binop ((unsignedp
4882 ? udivmod_optab
4883 : sdivmod_optab),
4884 op0, op1,
4885 NULL_RTX, remainder, unsignedp))
4886 remainder = 0;
4887 }
4888
4889 if (remainder)
4890 return gen_lowpart (mode, remainder);
4891 }
4892
4893 /* Produce the quotient. Try a quotient insn, but not a library call.
4894 If we have a divmod in this mode, use it in preference to widening
4895 the div (for this test we assume it will not fail). Note that optab2
4896 is set to the one of the two optabs that the call below will use. */
4897 quotient
4898 = sign_expand_binop (compute_mode, udiv_optab, sdiv_optab,
4899 op0, op1, rem_flag ? NULL_RTX : target,
4900 unsignedp,
4901 ((optab_handler (optab2, compute_mode)
4902 != CODE_FOR_nothing)
4903 ? OPTAB_DIRECT : OPTAB_WIDEN));
4904
4905 if (quotient == 0)
4906 {
4907 /* No luck there. Try a quotient-and-remainder insn,
4908 keeping the quotient alone. */
4909 quotient = gen_reg_rtx (compute_mode);
4910 if (! expand_twoval_binop (unsignedp ? udivmod_optab : sdivmod_optab,
4911 op0, op1,
4912 quotient, NULL_RTX, unsignedp))
4913 {
4914 quotient = 0;
4915 if (! rem_flag)
4916 /* Still no luck. If we are not computing the remainder,
4917 use a library call for the quotient. */
4918 quotient = sign_expand_binop (compute_mode,
4919 udiv_optab, sdiv_optab,
4920 op0, op1, target,
4921 unsignedp, OPTAB_LIB_WIDEN);
4922 }
4923 }
4924 }
4925
4926 if (rem_flag)
4927 {
4928 if (target && GET_MODE (target) != compute_mode)
4929 target = 0;
4930
4931 if (quotient == 0)
4932 {
4933 /* No divide instruction either. Use library for remainder. */
4934 remainder = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4935 op0, op1, target,
4936 unsignedp, OPTAB_LIB_WIDEN);
4937 /* No remainder function. Try a quotient-and-remainder
4938 function, keeping the remainder. */
4939 if (!remainder)
4940 {
4941 remainder = gen_reg_rtx (compute_mode);
4942 if (!expand_twoval_binop_libfunc
4943 (unsignedp ? udivmod_optab : sdivmod_optab,
4944 op0, op1,
4945 NULL_RTX, remainder,
4946 unsignedp ? UMOD : MOD))
4947 remainder = NULL_RTX;
4948 }
4949 }
4950 else
4951 {
4952 /* We divided. Now finish doing X - Y * (X / Y). */
4953 remainder = expand_mult (compute_mode, quotient, op1,
4954 NULL_RTX, unsignedp);
4955 remainder = expand_binop (compute_mode, sub_optab, op0,
4956 remainder, target, unsignedp,
4957 OPTAB_LIB_WIDEN);
4958 }
4959 }
4960
4961 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4962 }
4963 \f
4964 /* Return a tree node with data type TYPE, describing the value of X.
4965 Usually this is an VAR_DECL, if there is no obvious better choice.
4966 X may be an expression, however we only support those expressions
4967 generated by loop.c. */
4968
4969 tree
4970 make_tree (tree type, rtx x)
4971 {
4972 tree t;
4973
4974 switch (GET_CODE (x))
4975 {
4976 case CONST_INT:
4977 {
4978 HOST_WIDE_INT hi = 0;
4979
4980 if (INTVAL (x) < 0
4981 && !(TYPE_UNSIGNED (type)
4982 && (GET_MODE_BITSIZE (TYPE_MODE (type))
4983 < HOST_BITS_PER_WIDE_INT)))
4984 hi = -1;
4985
4986 t = build_int_cst_wide (type, INTVAL (x), hi);
4987
4988 return t;
4989 }
4990
4991 case CONST_DOUBLE:
4992 if (GET_MODE (x) == VOIDmode)
4993 t = build_int_cst_wide (type,
4994 CONST_DOUBLE_LOW (x), CONST_DOUBLE_HIGH (x));
4995 else
4996 {
4997 REAL_VALUE_TYPE d;
4998
4999 REAL_VALUE_FROM_CONST_DOUBLE (d, x);
5000 t = build_real (type, d);
5001 }
5002
5003 return t;
5004
5005 case CONST_VECTOR:
5006 {
5007 int units = CONST_VECTOR_NUNITS (x);
5008 tree itype = TREE_TYPE (type);
5009 tree *elts;
5010 int i;
5011
5012 /* Build a tree with vector elements. */
5013 elts = XALLOCAVEC (tree, units);
5014 for (i = units - 1; i >= 0; --i)
5015 {
5016 rtx elt = CONST_VECTOR_ELT (x, i);
5017 elts[i] = make_tree (itype, elt);
5018 }
5019
5020 return build_vector (type, elts);
5021 }
5022
5023 case PLUS:
5024 return fold_build2 (PLUS_EXPR, type, make_tree (type, XEXP (x, 0)),
5025 make_tree (type, XEXP (x, 1)));
5026
5027 case MINUS:
5028 return fold_build2 (MINUS_EXPR, type, make_tree (type, XEXP (x, 0)),
5029 make_tree (type, XEXP (x, 1)));
5030
5031 case NEG:
5032 return fold_build1 (NEGATE_EXPR, type, make_tree (type, XEXP (x, 0)));
5033
5034 case MULT:
5035 return fold_build2 (MULT_EXPR, type, make_tree (type, XEXP (x, 0)),
5036 make_tree (type, XEXP (x, 1)));
5037
5038 case ASHIFT:
5039 return fold_build2 (LSHIFT_EXPR, type, make_tree (type, XEXP (x, 0)),
5040 make_tree (type, XEXP (x, 1)));
5041
5042 case LSHIFTRT:
5043 t = unsigned_type_for (type);
5044 return fold_convert (type, build2 (RSHIFT_EXPR, t,
5045 make_tree (t, XEXP (x, 0)),
5046 make_tree (type, XEXP (x, 1))));
5047
5048 case ASHIFTRT:
5049 t = signed_type_for (type);
5050 return fold_convert (type, build2 (RSHIFT_EXPR, t,
5051 make_tree (t, XEXP (x, 0)),
5052 make_tree (type, XEXP (x, 1))));
5053
5054 case DIV:
5055 if (TREE_CODE (type) != REAL_TYPE)
5056 t = signed_type_for (type);
5057 else
5058 t = type;
5059
5060 return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5061 make_tree (t, XEXP (x, 0)),
5062 make_tree (t, XEXP (x, 1))));
5063 case UDIV:
5064 t = unsigned_type_for (type);
5065 return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5066 make_tree (t, XEXP (x, 0)),
5067 make_tree (t, XEXP (x, 1))));
5068
5069 case SIGN_EXTEND:
5070 case ZERO_EXTEND:
5071 t = lang_hooks.types.type_for_mode (GET_MODE (XEXP (x, 0)),
5072 GET_CODE (x) == ZERO_EXTEND);
5073 return fold_convert (type, make_tree (t, XEXP (x, 0)));
5074
5075 case CONST:
5076 return make_tree (type, XEXP (x, 0));
5077
5078 case SYMBOL_REF:
5079 t = SYMBOL_REF_DECL (x);
5080 if (t)
5081 return fold_convert (type, build_fold_addr_expr (t));
5082 /* else fall through. */
5083
5084 default:
5085 t = build_decl (RTL_LOCATION (x), VAR_DECL, NULL_TREE, type);
5086
5087 /* If TYPE is a POINTER_TYPE, we might need to convert X from
5088 address mode to pointer mode. */
5089 if (POINTER_TYPE_P (type))
5090 x = convert_memory_address_addr_space
5091 (TYPE_MODE (type), x, TYPE_ADDR_SPACE (TREE_TYPE (type)));
5092
5093 /* Note that we do *not* use SET_DECL_RTL here, because we do not
5094 want set_decl_rtl to go adjusting REG_ATTRS for this temporary. */
5095 t->decl_with_rtl.rtl = x;
5096
5097 return t;
5098 }
5099 }
5100 \f
5101 /* Compute the logical-and of OP0 and OP1, storing it in TARGET
5102 and returning TARGET.
5103
5104 If TARGET is 0, a pseudo-register or constant is returned. */
5105
5106 rtx
5107 expand_and (enum machine_mode mode, rtx op0, rtx op1, rtx target)
5108 {
5109 rtx tem = 0;
5110
5111 if (GET_MODE (op0) == VOIDmode && GET_MODE (op1) == VOIDmode)
5112 tem = simplify_binary_operation (AND, mode, op0, op1);
5113 if (tem == 0)
5114 tem = expand_binop (mode, and_optab, op0, op1, target, 0, OPTAB_LIB_WIDEN);
5115
5116 if (target == 0)
5117 target = tem;
5118 else if (tem != target)
5119 emit_move_insn (target, tem);
5120 return target;
5121 }
5122
5123 /* Helper function for emit_store_flag. */
5124 static rtx
5125 emit_cstore (rtx target, enum insn_code icode, enum rtx_code code,
5126 enum machine_mode mode, enum machine_mode compare_mode,
5127 int unsignedp, rtx x, rtx y, int normalizep,
5128 enum machine_mode target_mode)
5129 {
5130 struct expand_operand ops[4];
5131 rtx op0, last, comparison, subtarget;
5132 enum machine_mode result_mode = insn_data[(int) icode].operand[0].mode;
5133
5134 last = get_last_insn ();
5135 x = prepare_operand (icode, x, 2, mode, compare_mode, unsignedp);
5136 y = prepare_operand (icode, y, 3, mode, compare_mode, unsignedp);
5137 if (!x || !y)
5138 {
5139 delete_insns_since (last);
5140 return NULL_RTX;
5141 }
5142
5143 if (target_mode == VOIDmode)
5144 target_mode = result_mode;
5145 if (!target)
5146 target = gen_reg_rtx (target_mode);
5147
5148 comparison = gen_rtx_fmt_ee (code, result_mode, x, y);
5149
5150 create_output_operand (&ops[0], optimize ? NULL_RTX : target, result_mode);
5151 create_fixed_operand (&ops[1], comparison);
5152 create_fixed_operand (&ops[2], x);
5153 create_fixed_operand (&ops[3], y);
5154 if (!maybe_expand_insn (icode, 4, ops))
5155 {
5156 delete_insns_since (last);
5157 return NULL_RTX;
5158 }
5159 subtarget = ops[0].value;
5160
5161 /* If we are converting to a wider mode, first convert to
5162 TARGET_MODE, then normalize. This produces better combining
5163 opportunities on machines that have a SIGN_EXTRACT when we are
5164 testing a single bit. This mostly benefits the 68k.
5165
5166 If STORE_FLAG_VALUE does not have the sign bit set when
5167 interpreted in MODE, we can do this conversion as unsigned, which
5168 is usually more efficient. */
5169 if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (result_mode))
5170 {
5171 convert_move (target, subtarget,
5172 val_signbit_known_clear_p (result_mode,
5173 STORE_FLAG_VALUE));
5174 op0 = target;
5175 result_mode = target_mode;
5176 }
5177 else
5178 op0 = subtarget;
5179
5180 /* If we want to keep subexpressions around, don't reuse our last
5181 target. */
5182 if (optimize)
5183 subtarget = 0;
5184
5185 /* Now normalize to the proper value in MODE. Sometimes we don't
5186 have to do anything. */
5187 if (normalizep == 0 || normalizep == STORE_FLAG_VALUE)
5188 ;
5189 /* STORE_FLAG_VALUE might be the most negative number, so write
5190 the comparison this way to avoid a compiler-time warning. */
5191 else if (- normalizep == STORE_FLAG_VALUE)
5192 op0 = expand_unop (result_mode, neg_optab, op0, subtarget, 0);
5193
5194 /* We don't want to use STORE_FLAG_VALUE < 0 below since this makes
5195 it hard to use a value of just the sign bit due to ANSI integer
5196 constant typing rules. */
5197 else if (val_signbit_known_set_p (result_mode, STORE_FLAG_VALUE))
5198 op0 = expand_shift (RSHIFT_EXPR, result_mode, op0,
5199 GET_MODE_BITSIZE (result_mode) - 1, subtarget,
5200 normalizep == 1);
5201 else
5202 {
5203 gcc_assert (STORE_FLAG_VALUE & 1);
5204
5205 op0 = expand_and (result_mode, op0, const1_rtx, subtarget);
5206 if (normalizep == -1)
5207 op0 = expand_unop (result_mode, neg_optab, op0, op0, 0);
5208 }
5209
5210 /* If we were converting to a smaller mode, do the conversion now. */
5211 if (target_mode != result_mode)
5212 {
5213 convert_move (target, op0, 0);
5214 return target;
5215 }
5216 else
5217 return op0;
5218 }
5219
5220
5221 /* A subroutine of emit_store_flag only including "tricks" that do not
5222 need a recursive call. These are kept separate to avoid infinite
5223 loops. */
5224
5225 static rtx
5226 emit_store_flag_1 (rtx target, enum rtx_code code, rtx op0, rtx op1,
5227 enum machine_mode mode, int unsignedp, int normalizep,
5228 enum machine_mode target_mode)
5229 {
5230 rtx subtarget;
5231 enum insn_code icode;
5232 enum machine_mode compare_mode;
5233 enum mode_class mclass;
5234 enum rtx_code scode;
5235 rtx tem;
5236
5237 if (unsignedp)
5238 code = unsigned_condition (code);
5239 scode = swap_condition (code);
5240
5241 /* If one operand is constant, make it the second one. Only do this
5242 if the other operand is not constant as well. */
5243
5244 if (swap_commutative_operands_p (op0, op1))
5245 {
5246 tem = op0;
5247 op0 = op1;
5248 op1 = tem;
5249 code = swap_condition (code);
5250 }
5251
5252 if (mode == VOIDmode)
5253 mode = GET_MODE (op0);
5254
5255 /* For some comparisons with 1 and -1, we can convert this to
5256 comparisons with zero. This will often produce more opportunities for
5257 store-flag insns. */
5258
5259 switch (code)
5260 {
5261 case LT:
5262 if (op1 == const1_rtx)
5263 op1 = const0_rtx, code = LE;
5264 break;
5265 case LE:
5266 if (op1 == constm1_rtx)
5267 op1 = const0_rtx, code = LT;
5268 break;
5269 case GE:
5270 if (op1 == const1_rtx)
5271 op1 = const0_rtx, code = GT;
5272 break;
5273 case GT:
5274 if (op1 == constm1_rtx)
5275 op1 = const0_rtx, code = GE;
5276 break;
5277 case GEU:
5278 if (op1 == const1_rtx)
5279 op1 = const0_rtx, code = NE;
5280 break;
5281 case LTU:
5282 if (op1 == const1_rtx)
5283 op1 = const0_rtx, code = EQ;
5284 break;
5285 default:
5286 break;
5287 }
5288
5289 /* If we are comparing a double-word integer with zero or -1, we can
5290 convert the comparison into one involving a single word. */
5291 if (GET_MODE_BITSIZE (mode) == BITS_PER_WORD * 2
5292 && GET_MODE_CLASS (mode) == MODE_INT
5293 && (!MEM_P (op0) || ! MEM_VOLATILE_P (op0)))
5294 {
5295 if ((code == EQ || code == NE)
5296 && (op1 == const0_rtx || op1 == constm1_rtx))
5297 {
5298 rtx op00, op01;
5299
5300 /* Do a logical OR or AND of the two words and compare the
5301 result. */
5302 op00 = simplify_gen_subreg (word_mode, op0, mode, 0);
5303 op01 = simplify_gen_subreg (word_mode, op0, mode, UNITS_PER_WORD);
5304 tem = expand_binop (word_mode,
5305 op1 == const0_rtx ? ior_optab : and_optab,
5306 op00, op01, NULL_RTX, unsignedp,
5307 OPTAB_DIRECT);
5308
5309 if (tem != 0)
5310 tem = emit_store_flag (NULL_RTX, code, tem, op1, word_mode,
5311 unsignedp, normalizep);
5312 }
5313 else if ((code == LT || code == GE) && op1 == const0_rtx)
5314 {
5315 rtx op0h;
5316
5317 /* If testing the sign bit, can just test on high word. */
5318 op0h = simplify_gen_subreg (word_mode, op0, mode,
5319 subreg_highpart_offset (word_mode,
5320 mode));
5321 tem = emit_store_flag (NULL_RTX, code, op0h, op1, word_mode,
5322 unsignedp, normalizep);
5323 }
5324 else
5325 tem = NULL_RTX;
5326
5327 if (tem)
5328 {
5329 if (target_mode == VOIDmode || GET_MODE (tem) == target_mode)
5330 return tem;
5331 if (!target)
5332 target = gen_reg_rtx (target_mode);
5333
5334 convert_move (target, tem,
5335 !val_signbit_known_set_p (word_mode,
5336 (normalizep ? normalizep
5337 : STORE_FLAG_VALUE)));
5338 return target;
5339 }
5340 }
5341
5342 /* If this is A < 0 or A >= 0, we can do this by taking the ones
5343 complement of A (for GE) and shifting the sign bit to the low bit. */
5344 if (op1 == const0_rtx && (code == LT || code == GE)
5345 && GET_MODE_CLASS (mode) == MODE_INT
5346 && (normalizep || STORE_FLAG_VALUE == 1
5347 || val_signbit_p (mode, STORE_FLAG_VALUE)))
5348 {
5349 subtarget = target;
5350
5351 if (!target)
5352 target_mode = mode;
5353
5354 /* If the result is to be wider than OP0, it is best to convert it
5355 first. If it is to be narrower, it is *incorrect* to convert it
5356 first. */
5357 else if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (mode))
5358 {
5359 op0 = convert_modes (target_mode, mode, op0, 0);
5360 mode = target_mode;
5361 }
5362
5363 if (target_mode != mode)
5364 subtarget = 0;
5365
5366 if (code == GE)
5367 op0 = expand_unop (mode, one_cmpl_optab, op0,
5368 ((STORE_FLAG_VALUE == 1 || normalizep)
5369 ? 0 : subtarget), 0);
5370
5371 if (STORE_FLAG_VALUE == 1 || normalizep)
5372 /* If we are supposed to produce a 0/1 value, we want to do
5373 a logical shift from the sign bit to the low-order bit; for
5374 a -1/0 value, we do an arithmetic shift. */
5375 op0 = expand_shift (RSHIFT_EXPR, mode, op0,
5376 GET_MODE_BITSIZE (mode) - 1,
5377 subtarget, normalizep != -1);
5378
5379 if (mode != target_mode)
5380 op0 = convert_modes (target_mode, mode, op0, 0);
5381
5382 return op0;
5383 }
5384
5385 mclass = GET_MODE_CLASS (mode);
5386 for (compare_mode = mode; compare_mode != VOIDmode;
5387 compare_mode = GET_MODE_WIDER_MODE (compare_mode))
5388 {
5389 enum machine_mode optab_mode = mclass == MODE_CC ? CCmode : compare_mode;
5390 icode = optab_handler (cstore_optab, optab_mode);
5391 if (icode != CODE_FOR_nothing)
5392 {
5393 do_pending_stack_adjust ();
5394 tem = emit_cstore (target, icode, code, mode, compare_mode,
5395 unsignedp, op0, op1, normalizep, target_mode);
5396 if (tem)
5397 return tem;
5398
5399 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5400 {
5401 tem = emit_cstore (target, icode, scode, mode, compare_mode,
5402 unsignedp, op1, op0, normalizep, target_mode);
5403 if (tem)
5404 return tem;
5405 }
5406 break;
5407 }
5408 }
5409
5410 return 0;
5411 }
5412
5413 /* Emit a store-flags instruction for comparison CODE on OP0 and OP1
5414 and storing in TARGET. Normally return TARGET.
5415 Return 0 if that cannot be done.
5416
5417 MODE is the mode to use for OP0 and OP1 should they be CONST_INTs. If
5418 it is VOIDmode, they cannot both be CONST_INT.
5419
5420 UNSIGNEDP is for the case where we have to widen the operands
5421 to perform the operation. It says to use zero-extension.
5422
5423 NORMALIZEP is 1 if we should convert the result to be either zero
5424 or one. Normalize is -1 if we should convert the result to be
5425 either zero or -1. If NORMALIZEP is zero, the result will be left
5426 "raw" out of the scc insn. */
5427
5428 rtx
5429 emit_store_flag (rtx target, enum rtx_code code, rtx op0, rtx op1,
5430 enum machine_mode mode, int unsignedp, int normalizep)
5431 {
5432 enum machine_mode target_mode = target ? GET_MODE (target) : VOIDmode;
5433 enum rtx_code rcode;
5434 rtx subtarget;
5435 rtx tem, last, trueval;
5436
5437 tem = emit_store_flag_1 (target, code, op0, op1, mode, unsignedp, normalizep,
5438 target_mode);
5439 if (tem)
5440 return tem;
5441
5442 /* If we reached here, we can't do this with a scc insn, however there
5443 are some comparisons that can be done in other ways. Don't do any
5444 of these cases if branches are very cheap. */
5445 if (BRANCH_COST (optimize_insn_for_speed_p (), false) == 0)
5446 return 0;
5447
5448 /* See what we need to return. We can only return a 1, -1, or the
5449 sign bit. */
5450
5451 if (normalizep == 0)
5452 {
5453 if (STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1)
5454 normalizep = STORE_FLAG_VALUE;
5455
5456 else if (val_signbit_p (mode, STORE_FLAG_VALUE))
5457 ;
5458 else
5459 return 0;
5460 }
5461
5462 last = get_last_insn ();
5463
5464 /* If optimizing, use different pseudo registers for each insn, instead
5465 of reusing the same pseudo. This leads to better CSE, but slows
5466 down the compiler, since there are more pseudos */
5467 subtarget = (!optimize
5468 && (target_mode == mode)) ? target : NULL_RTX;
5469 trueval = GEN_INT (normalizep ? normalizep : STORE_FLAG_VALUE);
5470
5471 /* For floating-point comparisons, try the reverse comparison or try
5472 changing the "orderedness" of the comparison. */
5473 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5474 {
5475 enum rtx_code first_code;
5476 bool and_them;
5477
5478 rcode = reverse_condition_maybe_unordered (code);
5479 if (can_compare_p (rcode, mode, ccp_store_flag)
5480 && (code == ORDERED || code == UNORDERED
5481 || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
5482 || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
5483 {
5484 int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5485 || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5486
5487 /* For the reverse comparison, use either an addition or a XOR. */
5488 if (want_add
5489 && rtx_cost (GEN_INT (normalizep), PLUS, 1,
5490 optimize_insn_for_speed_p ()) == 0)
5491 {
5492 tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5493 STORE_FLAG_VALUE, target_mode);
5494 if (tem)
5495 return expand_binop (target_mode, add_optab, tem,
5496 GEN_INT (normalizep),
5497 target, 0, OPTAB_WIDEN);
5498 }
5499 else if (!want_add
5500 && rtx_cost (trueval, XOR, 1,
5501 optimize_insn_for_speed_p ()) == 0)
5502 {
5503 tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5504 normalizep, target_mode);
5505 if (tem)
5506 return expand_binop (target_mode, xor_optab, tem, trueval,
5507 target, INTVAL (trueval) >= 0, OPTAB_WIDEN);
5508 }
5509 }
5510
5511 delete_insns_since (last);
5512
5513 /* Cannot split ORDERED and UNORDERED, only try the above trick. */
5514 if (code == ORDERED || code == UNORDERED)
5515 return 0;
5516
5517 and_them = split_comparison (code, mode, &first_code, &code);
5518
5519 /* If there are no NaNs, the first comparison should always fall through.
5520 Effectively change the comparison to the other one. */
5521 if (!HONOR_NANS (mode))
5522 {
5523 gcc_assert (first_code == (and_them ? ORDERED : UNORDERED));
5524 return emit_store_flag_1 (target, code, op0, op1, mode, 0, normalizep,
5525 target_mode);
5526 }
5527
5528 #ifdef HAVE_conditional_move
5529 /* Try using a setcc instruction for ORDERED/UNORDERED, followed by a
5530 conditional move. */
5531 tem = emit_store_flag_1 (subtarget, first_code, op0, op1, mode, 0,
5532 normalizep, target_mode);
5533 if (tem == 0)
5534 return 0;
5535
5536 if (and_them)
5537 tem = emit_conditional_move (target, code, op0, op1, mode,
5538 tem, const0_rtx, GET_MODE (tem), 0);
5539 else
5540 tem = emit_conditional_move (target, code, op0, op1, mode,
5541 trueval, tem, GET_MODE (tem), 0);
5542
5543 if (tem == 0)
5544 delete_insns_since (last);
5545 return tem;
5546 #else
5547 return 0;
5548 #endif
5549 }
5550
5551 /* The remaining tricks only apply to integer comparisons. */
5552
5553 if (GET_MODE_CLASS (mode) != MODE_INT)
5554 return 0;
5555
5556 /* If this is an equality comparison of integers, we can try to exclusive-or
5557 (or subtract) the two operands and use a recursive call to try the
5558 comparison with zero. Don't do any of these cases if branches are
5559 very cheap. */
5560
5561 if ((code == EQ || code == NE) && op1 != const0_rtx)
5562 {
5563 tem = expand_binop (mode, xor_optab, op0, op1, subtarget, 1,
5564 OPTAB_WIDEN);
5565
5566 if (tem == 0)
5567 tem = expand_binop (mode, sub_optab, op0, op1, subtarget, 1,
5568 OPTAB_WIDEN);
5569 if (tem != 0)
5570 tem = emit_store_flag (target, code, tem, const0_rtx,
5571 mode, unsignedp, normalizep);
5572 if (tem != 0)
5573 return tem;
5574
5575 delete_insns_since (last);
5576 }
5577
5578 /* For integer comparisons, try the reverse comparison. However, for
5579 small X and if we'd have anyway to extend, implementing "X != 0"
5580 as "-(int)X >> 31" is still cheaper than inverting "(int)X == 0". */
5581 rcode = reverse_condition (code);
5582 if (can_compare_p (rcode, mode, ccp_store_flag)
5583 && ! (optab_handler (cstore_optab, mode) == CODE_FOR_nothing
5584 && code == NE
5585 && GET_MODE_SIZE (mode) < UNITS_PER_WORD
5586 && op1 == const0_rtx))
5587 {
5588 int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5589 || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5590
5591 /* Again, for the reverse comparison, use either an addition or a XOR. */
5592 if (want_add
5593 && rtx_cost (GEN_INT (normalizep), PLUS, 1,
5594 optimize_insn_for_speed_p ()) == 0)
5595 {
5596 tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5597 STORE_FLAG_VALUE, target_mode);
5598 if (tem != 0)
5599 tem = expand_binop (target_mode, add_optab, tem,
5600 GEN_INT (normalizep), target, 0, OPTAB_WIDEN);
5601 }
5602 else if (!want_add
5603 && rtx_cost (trueval, XOR, 1,
5604 optimize_insn_for_speed_p ()) == 0)
5605 {
5606 tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5607 normalizep, target_mode);
5608 if (tem != 0)
5609 tem = expand_binop (target_mode, xor_optab, tem, trueval, target,
5610 INTVAL (trueval) >= 0, OPTAB_WIDEN);
5611 }
5612
5613 if (tem != 0)
5614 return tem;
5615 delete_insns_since (last);
5616 }
5617
5618 /* Some other cases we can do are EQ, NE, LE, and GT comparisons with
5619 the constant zero. Reject all other comparisons at this point. Only
5620 do LE and GT if branches are expensive since they are expensive on
5621 2-operand machines. */
5622
5623 if (op1 != const0_rtx
5624 || (code != EQ && code != NE
5625 && (BRANCH_COST (optimize_insn_for_speed_p (),
5626 false) <= 1 || (code != LE && code != GT))))
5627 return 0;
5628
5629 /* Try to put the result of the comparison in the sign bit. Assume we can't
5630 do the necessary operation below. */
5631
5632 tem = 0;
5633
5634 /* To see if A <= 0, compute (A | (A - 1)). A <= 0 iff that result has
5635 the sign bit set. */
5636
5637 if (code == LE)
5638 {
5639 /* This is destructive, so SUBTARGET can't be OP0. */
5640 if (rtx_equal_p (subtarget, op0))
5641 subtarget = 0;
5642
5643 tem = expand_binop (mode, sub_optab, op0, const1_rtx, subtarget, 0,
5644 OPTAB_WIDEN);
5645 if (tem)
5646 tem = expand_binop (mode, ior_optab, op0, tem, subtarget, 0,
5647 OPTAB_WIDEN);
5648 }
5649
5650 /* To see if A > 0, compute (((signed) A) << BITS) - A, where BITS is the
5651 number of bits in the mode of OP0, minus one. */
5652
5653 if (code == GT)
5654 {
5655 if (rtx_equal_p (subtarget, op0))
5656 subtarget = 0;
5657
5658 tem = expand_shift (RSHIFT_EXPR, mode, op0,
5659 GET_MODE_BITSIZE (mode) - 1,
5660 subtarget, 0);
5661 tem = expand_binop (mode, sub_optab, tem, op0, subtarget, 0,
5662 OPTAB_WIDEN);
5663 }
5664
5665 if (code == EQ || code == NE)
5666 {
5667 /* For EQ or NE, one way to do the comparison is to apply an operation
5668 that converts the operand into a positive number if it is nonzero
5669 or zero if it was originally zero. Then, for EQ, we subtract 1 and
5670 for NE we negate. This puts the result in the sign bit. Then we
5671 normalize with a shift, if needed.
5672
5673 Two operations that can do the above actions are ABS and FFS, so try
5674 them. If that doesn't work, and MODE is smaller than a full word,
5675 we can use zero-extension to the wider mode (an unsigned conversion)
5676 as the operation. */
5677
5678 /* Note that ABS doesn't yield a positive number for INT_MIN, but
5679 that is compensated by the subsequent overflow when subtracting
5680 one / negating. */
5681
5682 if (optab_handler (abs_optab, mode) != CODE_FOR_nothing)
5683 tem = expand_unop (mode, abs_optab, op0, subtarget, 1);
5684 else if (optab_handler (ffs_optab, mode) != CODE_FOR_nothing)
5685 tem = expand_unop (mode, ffs_optab, op0, subtarget, 1);
5686 else if (GET_MODE_SIZE (mode) < UNITS_PER_WORD)
5687 {
5688 tem = convert_modes (word_mode, mode, op0, 1);
5689 mode = word_mode;
5690 }
5691
5692 if (tem != 0)
5693 {
5694 if (code == EQ)
5695 tem = expand_binop (mode, sub_optab, tem, const1_rtx, subtarget,
5696 0, OPTAB_WIDEN);
5697 else
5698 tem = expand_unop (mode, neg_optab, tem, subtarget, 0);
5699 }
5700
5701 /* If we couldn't do it that way, for NE we can "or" the two's complement
5702 of the value with itself. For EQ, we take the one's complement of
5703 that "or", which is an extra insn, so we only handle EQ if branches
5704 are expensive. */
5705
5706 if (tem == 0
5707 && (code == NE
5708 || BRANCH_COST (optimize_insn_for_speed_p (),
5709 false) > 1))
5710 {
5711 if (rtx_equal_p (subtarget, op0))
5712 subtarget = 0;
5713
5714 tem = expand_unop (mode, neg_optab, op0, subtarget, 0);
5715 tem = expand_binop (mode, ior_optab, tem, op0, subtarget, 0,
5716 OPTAB_WIDEN);
5717
5718 if (tem && code == EQ)
5719 tem = expand_unop (mode, one_cmpl_optab, tem, subtarget, 0);
5720 }
5721 }
5722
5723 if (tem && normalizep)
5724 tem = expand_shift (RSHIFT_EXPR, mode, tem,
5725 GET_MODE_BITSIZE (mode) - 1,
5726 subtarget, normalizep == 1);
5727
5728 if (tem)
5729 {
5730 if (!target)
5731 ;
5732 else if (GET_MODE (tem) != target_mode)
5733 {
5734 convert_move (target, tem, 0);
5735 tem = target;
5736 }
5737 else if (!subtarget)
5738 {
5739 emit_move_insn (target, tem);
5740 tem = target;
5741 }
5742 }
5743 else
5744 delete_insns_since (last);
5745
5746 return tem;
5747 }
5748
5749 /* Like emit_store_flag, but always succeeds. */
5750
5751 rtx
5752 emit_store_flag_force (rtx target, enum rtx_code code, rtx op0, rtx op1,
5753 enum machine_mode mode, int unsignedp, int normalizep)
5754 {
5755 rtx tem, label;
5756 rtx trueval, falseval;
5757
5758 /* First see if emit_store_flag can do the job. */
5759 tem = emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep);
5760 if (tem != 0)
5761 return tem;
5762
5763 if (!target)
5764 target = gen_reg_rtx (word_mode);
5765
5766 /* If this failed, we have to do this with set/compare/jump/set code.
5767 For foo != 0, if foo is in OP0, just replace it with 1 if nonzero. */
5768 trueval = normalizep ? GEN_INT (normalizep) : const1_rtx;
5769 if (code == NE
5770 && GET_MODE_CLASS (mode) == MODE_INT
5771 && REG_P (target)
5772 && op0 == target
5773 && op1 == const0_rtx)
5774 {
5775 label = gen_label_rtx ();
5776 do_compare_rtx_and_jump (target, const0_rtx, EQ, unsignedp,
5777 mode, NULL_RTX, NULL_RTX, label, -1);
5778 emit_move_insn (target, trueval);
5779 emit_label (label);
5780 return target;
5781 }
5782
5783 if (!REG_P (target)
5784 || reg_mentioned_p (target, op0) || reg_mentioned_p (target, op1))
5785 target = gen_reg_rtx (GET_MODE (target));
5786
5787 /* Jump in the right direction if the target cannot implement CODE
5788 but can jump on its reverse condition. */
5789 falseval = const0_rtx;
5790 if (! can_compare_p (code, mode, ccp_jump)
5791 && (! FLOAT_MODE_P (mode)
5792 || code == ORDERED || code == UNORDERED
5793 || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
5794 || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
5795 {
5796 enum rtx_code rcode;
5797 if (FLOAT_MODE_P (mode))
5798 rcode = reverse_condition_maybe_unordered (code);
5799 else
5800 rcode = reverse_condition (code);
5801
5802 /* Canonicalize to UNORDERED for the libcall. */
5803 if (can_compare_p (rcode, mode, ccp_jump)
5804 || (code == ORDERED && ! can_compare_p (ORDERED, mode, ccp_jump)))
5805 {
5806 falseval = trueval;
5807 trueval = const0_rtx;
5808 code = rcode;
5809 }
5810 }
5811
5812 emit_move_insn (target, trueval);
5813 label = gen_label_rtx ();
5814 do_compare_rtx_and_jump (op0, op1, code, unsignedp, mode, NULL_RTX,
5815 NULL_RTX, label, -1);
5816
5817 emit_move_insn (target, falseval);
5818 emit_label (label);
5819
5820 return target;
5821 }
5822 \f
5823 /* Perform possibly multi-word comparison and conditional jump to LABEL
5824 if ARG1 OP ARG2 true where ARG1 and ARG2 are of mode MODE. This is
5825 now a thin wrapper around do_compare_rtx_and_jump. */
5826
5827 static void
5828 do_cmp_and_jump (rtx arg1, rtx arg2, enum rtx_code op, enum machine_mode mode,
5829 rtx label)
5830 {
5831 int unsignedp = (op == LTU || op == LEU || op == GTU || op == GEU);
5832 do_compare_rtx_and_jump (arg1, arg2, op, unsignedp, mode,
5833 NULL_RTX, NULL_RTX, label, -1);
5834 }