expmed.c (store_bit_field_using_insv): New function, split out from...
[gcc.git] / gcc / expmed.c
1 /* Medium-level subroutines: convert bit-field store and extract
2 and shifts, multiplies and divides to rtl instructions.
3 Copyright (C) 1987, 1988, 1989, 1992, 1993, 1994, 1995, 1996, 1997, 1998,
4 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010,
5 2011, 2012
6 Free Software Foundation, Inc.
7
8 This file is part of GCC.
9
10 GCC is free software; you can redistribute it and/or modify it under
11 the terms of the GNU General Public License as published by the Free
12 Software Foundation; either version 3, or (at your option) any later
13 version.
14
15 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
16 WARRANTY; without even the implied warranty of MERCHANTABILITY or
17 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 for more details.
19
20 You should have received a copy of the GNU General Public License
21 along with GCC; see the file COPYING3. If not see
22 <http://www.gnu.org/licenses/>. */
23
24
25 #include "config.h"
26 #include "system.h"
27 #include "coretypes.h"
28 #include "tm.h"
29 #include "diagnostic-core.h"
30 #include "rtl.h"
31 #include "tree.h"
32 #include "tm_p.h"
33 #include "flags.h"
34 #include "insn-config.h"
35 #include "expr.h"
36 #include "optabs.h"
37 #include "recog.h"
38 #include "langhooks.h"
39 #include "df.h"
40 #include "target.h"
41 #include "expmed.h"
42
43 struct target_expmed default_target_expmed;
44 #if SWITCHABLE_TARGET
45 struct target_expmed *this_target_expmed = &default_target_expmed;
46 #endif
47
48 static void store_fixed_bit_field (rtx, unsigned HOST_WIDE_INT,
49 unsigned HOST_WIDE_INT,
50 unsigned HOST_WIDE_INT,
51 unsigned HOST_WIDE_INT,
52 rtx);
53 static void store_split_bit_field (rtx, unsigned HOST_WIDE_INT,
54 unsigned HOST_WIDE_INT,
55 unsigned HOST_WIDE_INT,
56 unsigned HOST_WIDE_INT,
57 rtx);
58 static rtx extract_fixed_bit_field (enum machine_mode, rtx,
59 unsigned HOST_WIDE_INT,
60 unsigned HOST_WIDE_INT, rtx, int, bool);
61 static rtx mask_rtx (enum machine_mode, int, int, int);
62 static rtx lshift_value (enum machine_mode, rtx, int, int);
63 static rtx extract_split_bit_field (rtx, unsigned HOST_WIDE_INT,
64 unsigned HOST_WIDE_INT, int);
65 static void do_cmp_and_jump (rtx, rtx, enum rtx_code, enum machine_mode, rtx);
66 static rtx expand_smod_pow2 (enum machine_mode, rtx, HOST_WIDE_INT);
67 static rtx expand_sdiv_pow2 (enum machine_mode, rtx, HOST_WIDE_INT);
68
69 /* Test whether a value is zero of a power of two. */
70 #define EXACT_POWER_OF_2_OR_ZERO_P(x) (((x) & ((x) - 1)) == 0)
71
72 /* Reduce conditional compilation elsewhere. */
73 #ifndef HAVE_insv
74 #define HAVE_insv 0
75 #define CODE_FOR_insv CODE_FOR_nothing
76 #define gen_insv(a,b,c,d) NULL_RTX
77 #endif
78 #ifndef HAVE_extv
79 #define HAVE_extv 0
80 #define CODE_FOR_extv CODE_FOR_nothing
81 #define gen_extv(a,b,c,d) NULL_RTX
82 #endif
83 #ifndef HAVE_extzv
84 #define HAVE_extzv 0
85 #define CODE_FOR_extzv CODE_FOR_nothing
86 #define gen_extzv(a,b,c,d) NULL_RTX
87 #endif
88
89 struct init_expmed_rtl
90 {
91 struct rtx_def reg; rtunion reg_fld[2];
92 struct rtx_def plus; rtunion plus_fld1;
93 struct rtx_def neg;
94 struct rtx_def mult; rtunion mult_fld1;
95 struct rtx_def sdiv; rtunion sdiv_fld1;
96 struct rtx_def udiv; rtunion udiv_fld1;
97 struct rtx_def sdiv_32; rtunion sdiv_32_fld1;
98 struct rtx_def smod_32; rtunion smod_32_fld1;
99 struct rtx_def wide_mult; rtunion wide_mult_fld1;
100 struct rtx_def wide_lshr; rtunion wide_lshr_fld1;
101 struct rtx_def wide_trunc;
102 struct rtx_def shift; rtunion shift_fld1;
103 struct rtx_def shift_mult; rtunion shift_mult_fld1;
104 struct rtx_def shift_add; rtunion shift_add_fld1;
105 struct rtx_def shift_sub0; rtunion shift_sub0_fld1;
106 struct rtx_def shift_sub1; rtunion shift_sub1_fld1;
107 struct rtx_def zext;
108 struct rtx_def trunc;
109
110 rtx pow2[MAX_BITS_PER_WORD];
111 rtx cint[MAX_BITS_PER_WORD];
112 };
113
114 static void
115 init_expmed_one_conv (struct init_expmed_rtl *all, enum machine_mode to_mode,
116 enum machine_mode from_mode, bool speed)
117 {
118 int to_size, from_size;
119 rtx which;
120
121 /* We're given no information about the true size of a partial integer,
122 only the size of the "full" integer it requires for storage. For
123 comparison purposes here, reduce the bit size by one in that case. */
124 to_size = (GET_MODE_BITSIZE (to_mode)
125 - (GET_MODE_CLASS (to_mode) == MODE_PARTIAL_INT));
126 from_size = (GET_MODE_BITSIZE (from_mode)
127 - (GET_MODE_CLASS (from_mode) == MODE_PARTIAL_INT));
128
129 /* Assume cost of zero-extend and sign-extend is the same. */
130 which = (to_size < from_size ? &all->trunc : &all->zext);
131
132 PUT_MODE (&all->reg, from_mode);
133 set_convert_cost (to_mode, from_mode, speed, set_src_cost (which, speed));
134 }
135
136 static void
137 init_expmed_one_mode (struct init_expmed_rtl *all,
138 enum machine_mode mode, int speed)
139 {
140 int m, n, mode_bitsize;
141 enum machine_mode mode_from;
142
143 mode_bitsize = GET_MODE_UNIT_BITSIZE (mode);
144
145 PUT_MODE (&all->reg, mode);
146 PUT_MODE (&all->plus, mode);
147 PUT_MODE (&all->neg, mode);
148 PUT_MODE (&all->mult, mode);
149 PUT_MODE (&all->sdiv, mode);
150 PUT_MODE (&all->udiv, mode);
151 PUT_MODE (&all->sdiv_32, mode);
152 PUT_MODE (&all->smod_32, mode);
153 PUT_MODE (&all->wide_trunc, mode);
154 PUT_MODE (&all->shift, mode);
155 PUT_MODE (&all->shift_mult, mode);
156 PUT_MODE (&all->shift_add, mode);
157 PUT_MODE (&all->shift_sub0, mode);
158 PUT_MODE (&all->shift_sub1, mode);
159 PUT_MODE (&all->zext, mode);
160 PUT_MODE (&all->trunc, mode);
161
162 set_add_cost (speed, mode, set_src_cost (&all->plus, speed));
163 set_neg_cost (speed, mode, set_src_cost (&all->neg, speed));
164 set_mul_cost (speed, mode, set_src_cost (&all->mult, speed));
165 set_sdiv_cost (speed, mode, set_src_cost (&all->sdiv, speed));
166 set_udiv_cost (speed, mode, set_src_cost (&all->udiv, speed));
167
168 set_sdiv_pow2_cheap (speed, mode, (set_src_cost (&all->sdiv_32, speed)
169 <= 2 * add_cost (speed, mode)));
170 set_smod_pow2_cheap (speed, mode, (set_src_cost (&all->smod_32, speed)
171 <= 4 * add_cost (speed, mode)));
172
173 set_shift_cost (speed, mode, 0, 0);
174 {
175 int cost = add_cost (speed, mode);
176 set_shiftadd_cost (speed, mode, 0, cost);
177 set_shiftsub0_cost (speed, mode, 0, cost);
178 set_shiftsub1_cost (speed, mode, 0, cost);
179 }
180
181 n = MIN (MAX_BITS_PER_WORD, mode_bitsize);
182 for (m = 1; m < n; m++)
183 {
184 XEXP (&all->shift, 1) = all->cint[m];
185 XEXP (&all->shift_mult, 1) = all->pow2[m];
186
187 set_shift_cost (speed, mode, m, set_src_cost (&all->shift, speed));
188 set_shiftadd_cost (speed, mode, m, set_src_cost (&all->shift_add, speed));
189 set_shiftsub0_cost (speed, mode, m, set_src_cost (&all->shift_sub0, speed));
190 set_shiftsub1_cost (speed, mode, m, set_src_cost (&all->shift_sub1, speed));
191 }
192
193 if (SCALAR_INT_MODE_P (mode))
194 {
195 for (mode_from = MIN_MODE_INT; mode_from <= MAX_MODE_INT;
196 mode_from = (enum machine_mode)(mode_from + 1))
197 init_expmed_one_conv (all, mode, mode_from, speed);
198 }
199 if (GET_MODE_CLASS (mode) == MODE_INT)
200 {
201 enum machine_mode wider_mode = GET_MODE_WIDER_MODE (mode);
202 if (wider_mode != VOIDmode)
203 {
204 PUT_MODE (&all->zext, wider_mode);
205 PUT_MODE (&all->wide_mult, wider_mode);
206 PUT_MODE (&all->wide_lshr, wider_mode);
207 XEXP (&all->wide_lshr, 1) = GEN_INT (mode_bitsize);
208
209 set_mul_widen_cost (speed, wider_mode,
210 set_src_cost (&all->wide_mult, speed));
211 set_mul_highpart_cost (speed, mode,
212 set_src_cost (&all->wide_trunc, speed));
213 }
214 }
215 }
216
217 void
218 init_expmed (void)
219 {
220 struct init_expmed_rtl all;
221 enum machine_mode mode;
222 int m, speed;
223
224 memset (&all, 0, sizeof all);
225 for (m = 1; m < MAX_BITS_PER_WORD; m++)
226 {
227 all.pow2[m] = GEN_INT ((HOST_WIDE_INT) 1 << m);
228 all.cint[m] = GEN_INT (m);
229 }
230
231 PUT_CODE (&all.reg, REG);
232 /* Avoid using hard regs in ways which may be unsupported. */
233 SET_REGNO (&all.reg, LAST_VIRTUAL_REGISTER + 1);
234
235 PUT_CODE (&all.plus, PLUS);
236 XEXP (&all.plus, 0) = &all.reg;
237 XEXP (&all.plus, 1) = &all.reg;
238
239 PUT_CODE (&all.neg, NEG);
240 XEXP (&all.neg, 0) = &all.reg;
241
242 PUT_CODE (&all.mult, MULT);
243 XEXP (&all.mult, 0) = &all.reg;
244 XEXP (&all.mult, 1) = &all.reg;
245
246 PUT_CODE (&all.sdiv, DIV);
247 XEXP (&all.sdiv, 0) = &all.reg;
248 XEXP (&all.sdiv, 1) = &all.reg;
249
250 PUT_CODE (&all.udiv, UDIV);
251 XEXP (&all.udiv, 0) = &all.reg;
252 XEXP (&all.udiv, 1) = &all.reg;
253
254 PUT_CODE (&all.sdiv_32, DIV);
255 XEXP (&all.sdiv_32, 0) = &all.reg;
256 XEXP (&all.sdiv_32, 1) = 32 < MAX_BITS_PER_WORD ? all.cint[32] : GEN_INT (32);
257
258 PUT_CODE (&all.smod_32, MOD);
259 XEXP (&all.smod_32, 0) = &all.reg;
260 XEXP (&all.smod_32, 1) = XEXP (&all.sdiv_32, 1);
261
262 PUT_CODE (&all.zext, ZERO_EXTEND);
263 XEXP (&all.zext, 0) = &all.reg;
264
265 PUT_CODE (&all.wide_mult, MULT);
266 XEXP (&all.wide_mult, 0) = &all.zext;
267 XEXP (&all.wide_mult, 1) = &all.zext;
268
269 PUT_CODE (&all.wide_lshr, LSHIFTRT);
270 XEXP (&all.wide_lshr, 0) = &all.wide_mult;
271
272 PUT_CODE (&all.wide_trunc, TRUNCATE);
273 XEXP (&all.wide_trunc, 0) = &all.wide_lshr;
274
275 PUT_CODE (&all.shift, ASHIFT);
276 XEXP (&all.shift, 0) = &all.reg;
277
278 PUT_CODE (&all.shift_mult, MULT);
279 XEXP (&all.shift_mult, 0) = &all.reg;
280
281 PUT_CODE (&all.shift_add, PLUS);
282 XEXP (&all.shift_add, 0) = &all.shift_mult;
283 XEXP (&all.shift_add, 1) = &all.reg;
284
285 PUT_CODE (&all.shift_sub0, MINUS);
286 XEXP (&all.shift_sub0, 0) = &all.shift_mult;
287 XEXP (&all.shift_sub0, 1) = &all.reg;
288
289 PUT_CODE (&all.shift_sub1, MINUS);
290 XEXP (&all.shift_sub1, 0) = &all.reg;
291 XEXP (&all.shift_sub1, 1) = &all.shift_mult;
292
293 PUT_CODE (&all.trunc, TRUNCATE);
294 XEXP (&all.trunc, 0) = &all.reg;
295
296 for (speed = 0; speed < 2; speed++)
297 {
298 crtl->maybe_hot_insn_p = speed;
299 set_zero_cost (speed, set_src_cost (const0_rtx, speed));
300
301 for (mode = MIN_MODE_INT; mode <= MAX_MODE_INT;
302 mode = (enum machine_mode)(mode + 1))
303 init_expmed_one_mode (&all, mode, speed);
304
305 if (MIN_MODE_PARTIAL_INT != VOIDmode)
306 for (mode = MIN_MODE_PARTIAL_INT; mode <= MAX_MODE_PARTIAL_INT;
307 mode = (enum machine_mode)(mode + 1))
308 init_expmed_one_mode (&all, mode, speed);
309
310 if (MIN_MODE_VECTOR_INT != VOIDmode)
311 for (mode = MIN_MODE_VECTOR_INT; mode <= MAX_MODE_VECTOR_INT;
312 mode = (enum machine_mode)(mode + 1))
313 init_expmed_one_mode (&all, mode, speed);
314 }
315
316 if (alg_hash_used_p ())
317 {
318 struct alg_hash_entry *p = alg_hash_entry_ptr (0);
319 memset (p, 0, sizeof (*p) * NUM_ALG_HASH_ENTRIES);
320 }
321 else
322 set_alg_hash_used_p (true);
323 default_rtl_profile ();
324 }
325
326 /* Return an rtx representing minus the value of X.
327 MODE is the intended mode of the result,
328 useful if X is a CONST_INT. */
329
330 rtx
331 negate_rtx (enum machine_mode mode, rtx x)
332 {
333 rtx result = simplify_unary_operation (NEG, mode, x, mode);
334
335 if (result == 0)
336 result = expand_unop (mode, neg_optab, x, NULL_RTX, 0);
337
338 return result;
339 }
340
341 /* Report on the availability of insv/extv/extzv and the desired mode
342 of each of their operands. Returns MAX_MACHINE_MODE if HAVE_foo
343 is false; else the mode of the specified operand. If OPNO is -1,
344 all the caller cares about is whether the insn is available. */
345 enum machine_mode
346 mode_for_extraction (enum extraction_pattern pattern, int opno)
347 {
348 const struct insn_data_d *data;
349
350 switch (pattern)
351 {
352 case EP_insv:
353 if (HAVE_insv)
354 {
355 data = &insn_data[CODE_FOR_insv];
356 break;
357 }
358 return MAX_MACHINE_MODE;
359
360 case EP_extv:
361 if (HAVE_extv)
362 {
363 data = &insn_data[CODE_FOR_extv];
364 break;
365 }
366 return MAX_MACHINE_MODE;
367
368 case EP_extzv:
369 if (HAVE_extzv)
370 {
371 data = &insn_data[CODE_FOR_extzv];
372 break;
373 }
374 return MAX_MACHINE_MODE;
375
376 default:
377 gcc_unreachable ();
378 }
379
380 if (opno == -1)
381 return VOIDmode;
382
383 /* Everyone who uses this function used to follow it with
384 if (result == VOIDmode) result = word_mode; */
385 if (data->operand[opno].mode == VOIDmode)
386 return word_mode;
387 return data->operand[opno].mode;
388 }
389
390 /* Return true if a bitfield of size BITSIZE at bit number BITNUM within
391 a structure of mode STRUCT_MODE represents a lowpart subreg. The subreg
392 offset is then BITNUM / BITS_PER_UNIT. */
393
394 static bool
395 lowpart_bit_field_p (unsigned HOST_WIDE_INT bitnum,
396 unsigned HOST_WIDE_INT bitsize,
397 enum machine_mode struct_mode)
398 {
399 if (BYTES_BIG_ENDIAN)
400 return (bitnum % BITS_PER_UNIT == 0
401 && (bitnum + bitsize == GET_MODE_BITSIZE (struct_mode)
402 || (bitnum + bitsize) % BITS_PER_WORD == 0));
403 else
404 return bitnum % BITS_PER_WORD == 0;
405 }
406 \f
407 /* Try to use an insv pattern to store VALUE into a field of OP0.
408 OP_MODE is the mode of the insertion and BITSIZE and BITNUM are
409 as for store_bit_field. */
410
411 static bool
412 store_bit_field_using_insv (rtx op0, unsigned HOST_WIDE_INT bitsize,
413 unsigned HOST_WIDE_INT bitnum, rtx value,
414 enum machine_mode op_mode)
415 {
416 struct expand_operand ops[4];
417 rtx value1;
418 rtx xop0 = op0;
419 rtx last = get_last_insn ();
420 bool copy_back = false;
421
422 unsigned int unit = GET_MODE_BITSIZE (op_mode);
423 if (bitsize == 0 || bitsize > unit)
424 return false;
425
426 if (MEM_P (xop0))
427 {
428 /* Get a reference to the first byte of the field. */
429 xop0 = adjust_bitfield_address (xop0, byte_mode, bitnum / BITS_PER_UNIT);
430 bitnum %= BITS_PER_UNIT;
431 }
432 else
433 {
434 /* Convert from counting within OP0 to counting in OP_MODE. */
435 if (BYTES_BIG_ENDIAN)
436 bitnum += unit - GET_MODE_BITSIZE (GET_MODE (op0));
437
438 /* If xop0 is a register, we need it in OP_MODE
439 to make it acceptable to the format of insv. */
440 if (GET_CODE (xop0) == SUBREG)
441 /* We can't just change the mode, because this might clobber op0,
442 and we will need the original value of op0 if insv fails. */
443 xop0 = gen_rtx_SUBREG (op_mode, SUBREG_REG (xop0), SUBREG_BYTE (xop0));
444 if (REG_P (xop0) && GET_MODE (xop0) != op_mode)
445 xop0 = gen_lowpart_SUBREG (op_mode, xop0);
446 }
447
448 /* If the destination is a paradoxical subreg such that we need a
449 truncate to the inner mode, perform the insertion on a temporary and
450 truncate the result to the original destination. Note that we can't
451 just truncate the paradoxical subreg as (truncate:N (subreg:W (reg:N
452 X) 0)) is (reg:N X). */
453 if (GET_CODE (xop0) == SUBREG
454 && REG_P (SUBREG_REG (xop0))
455 && !TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (SUBREG_REG (xop0)),
456 op_mode))
457 {
458 rtx tem = gen_reg_rtx (op_mode);
459 emit_move_insn (tem, xop0);
460 xop0 = tem;
461 copy_back = true;
462 }
463
464 /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
465 "backwards" from the size of the unit we are inserting into.
466 Otherwise, we count bits from the most significant on a
467 BYTES/BITS_BIG_ENDIAN machine. */
468
469 if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
470 bitnum = unit - bitsize - bitnum;
471
472 /* Convert VALUE to op_mode (which insv insn wants) in VALUE1. */
473 value1 = value;
474 if (GET_MODE (value) != op_mode)
475 {
476 if (GET_MODE_BITSIZE (GET_MODE (value)) >= bitsize)
477 {
478 /* Optimization: Don't bother really extending VALUE
479 if it has all the bits we will actually use. However,
480 if we must narrow it, be sure we do it correctly. */
481
482 if (GET_MODE_SIZE (GET_MODE (value)) < GET_MODE_SIZE (op_mode))
483 {
484 rtx tmp;
485
486 tmp = simplify_subreg (op_mode, value1, GET_MODE (value), 0);
487 if (! tmp)
488 tmp = simplify_gen_subreg (op_mode,
489 force_reg (GET_MODE (value),
490 value1),
491 GET_MODE (value), 0);
492 value1 = tmp;
493 }
494 else
495 value1 = gen_lowpart (op_mode, value1);
496 }
497 else if (CONST_INT_P (value))
498 value1 = gen_int_mode (INTVAL (value), op_mode);
499 else
500 /* Parse phase is supposed to make VALUE's data type
501 match that of the component reference, which is a type
502 at least as wide as the field; so VALUE should have
503 a mode that corresponds to that type. */
504 gcc_assert (CONSTANT_P (value));
505 }
506
507 create_fixed_operand (&ops[0], xop0);
508 create_integer_operand (&ops[1], bitsize);
509 create_integer_operand (&ops[2], bitnum);
510 create_input_operand (&ops[3], value1, op_mode);
511 if (maybe_expand_insn (CODE_FOR_insv, 4, ops))
512 {
513 if (copy_back)
514 convert_move (op0, xop0, true);
515 return true;
516 }
517 delete_insns_since (last);
518 return false;
519 }
520
521 /* A subroutine of store_bit_field, with the same arguments. Return true
522 if the operation could be implemented.
523
524 If FALLBACK_P is true, fall back to store_fixed_bit_field if we have
525 no other way of implementing the operation. If FALLBACK_P is false,
526 return false instead. */
527
528 static bool
529 store_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
530 unsigned HOST_WIDE_INT bitnum,
531 unsigned HOST_WIDE_INT bitregion_start,
532 unsigned HOST_WIDE_INT bitregion_end,
533 enum machine_mode fieldmode,
534 rtx value, bool fallback_p)
535 {
536 rtx op0 = str_rtx;
537 rtx orig_value;
538
539 while (GET_CODE (op0) == SUBREG)
540 {
541 /* The following line once was done only if WORDS_BIG_ENDIAN,
542 but I think that is a mistake. WORDS_BIG_ENDIAN is
543 meaningful at a much higher level; when structures are copied
544 between memory and regs, the higher-numbered regs
545 always get higher addresses. */
546 int inner_mode_size = GET_MODE_SIZE (GET_MODE (SUBREG_REG (op0)));
547 int outer_mode_size = GET_MODE_SIZE (GET_MODE (op0));
548 int byte_offset = 0;
549
550 /* Paradoxical subregs need special handling on big endian machines. */
551 if (SUBREG_BYTE (op0) == 0 && inner_mode_size < outer_mode_size)
552 {
553 int difference = inner_mode_size - outer_mode_size;
554
555 if (WORDS_BIG_ENDIAN)
556 byte_offset += (difference / UNITS_PER_WORD) * UNITS_PER_WORD;
557 if (BYTES_BIG_ENDIAN)
558 byte_offset += difference % UNITS_PER_WORD;
559 }
560 else
561 byte_offset = SUBREG_BYTE (op0);
562
563 bitnum += byte_offset * BITS_PER_UNIT;
564 op0 = SUBREG_REG (op0);
565 }
566
567 /* No action is needed if the target is a register and if the field
568 lies completely outside that register. This can occur if the source
569 code contains an out-of-bounds access to a small array. */
570 if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
571 return true;
572
573 /* Use vec_set patterns for inserting parts of vectors whenever
574 available. */
575 if (VECTOR_MODE_P (GET_MODE (op0))
576 && !MEM_P (op0)
577 && optab_handler (vec_set_optab, GET_MODE (op0)) != CODE_FOR_nothing
578 && fieldmode == GET_MODE_INNER (GET_MODE (op0))
579 && bitsize == GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
580 && !(bitnum % GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
581 {
582 struct expand_operand ops[3];
583 enum machine_mode outermode = GET_MODE (op0);
584 enum machine_mode innermode = GET_MODE_INNER (outermode);
585 enum insn_code icode = optab_handler (vec_set_optab, outermode);
586 int pos = bitnum / GET_MODE_BITSIZE (innermode);
587
588 create_fixed_operand (&ops[0], op0);
589 create_input_operand (&ops[1], value, innermode);
590 create_integer_operand (&ops[2], pos);
591 if (maybe_expand_insn (icode, 3, ops))
592 return true;
593 }
594
595 /* If the target is a register, overwriting the entire object, or storing
596 a full-word or multi-word field can be done with just a SUBREG. */
597 if (!MEM_P (op0)
598 && bitsize == GET_MODE_BITSIZE (fieldmode)
599 && ((bitsize == GET_MODE_BITSIZE (GET_MODE (op0)) && bitnum == 0)
600 || (bitsize % BITS_PER_WORD == 0 && bitnum % BITS_PER_WORD == 0)))
601 {
602 /* Use the subreg machinery either to narrow OP0 to the required
603 words or to cope with mode punning between equal-sized modes. */
604 rtx sub = simplify_gen_subreg (fieldmode, op0, GET_MODE (op0),
605 bitnum / BITS_PER_UNIT);
606 if (sub)
607 {
608 emit_move_insn (sub, value);
609 return true;
610 }
611 }
612
613 /* If the target is memory, storing any naturally aligned field can be
614 done with a simple store. For targets that support fast unaligned
615 memory, any naturally sized, unit aligned field can be done directly. */
616 if (MEM_P (op0)
617 && bitnum % BITS_PER_UNIT == 0
618 && bitsize == GET_MODE_BITSIZE (fieldmode)
619 && (!SLOW_UNALIGNED_ACCESS (fieldmode, MEM_ALIGN (op0))
620 || (bitnum % bitsize == 0
621 && MEM_ALIGN (op0) % bitsize == 0)))
622 {
623 op0 = adjust_bitfield_address (op0, fieldmode, bitnum / BITS_PER_UNIT);
624 emit_move_insn (op0, value);
625 return true;
626 }
627
628 /* Make sure we are playing with integral modes. Pun with subregs
629 if we aren't. This must come after the entire register case above,
630 since that case is valid for any mode. The following cases are only
631 valid for integral modes. */
632 {
633 enum machine_mode imode = int_mode_for_mode (GET_MODE (op0));
634 if (imode != GET_MODE (op0))
635 {
636 if (MEM_P (op0))
637 op0 = adjust_bitfield_address (op0, imode, 0);
638 else
639 {
640 gcc_assert (imode != BLKmode);
641 op0 = gen_lowpart (imode, op0);
642 }
643 }
644 }
645
646 /* Storing an lsb-aligned field in a register
647 can be done with a movstrict instruction. */
648
649 if (!MEM_P (op0)
650 && lowpart_bit_field_p (bitnum, bitsize, GET_MODE (op0))
651 && bitsize == GET_MODE_BITSIZE (fieldmode)
652 && optab_handler (movstrict_optab, fieldmode) != CODE_FOR_nothing)
653 {
654 struct expand_operand ops[2];
655 enum insn_code icode = optab_handler (movstrict_optab, fieldmode);
656 rtx arg0 = op0;
657 unsigned HOST_WIDE_INT subreg_off;
658
659 if (GET_CODE (arg0) == SUBREG)
660 {
661 /* Else we've got some float mode source being extracted into
662 a different float mode destination -- this combination of
663 subregs results in Severe Tire Damage. */
664 gcc_assert (GET_MODE (SUBREG_REG (arg0)) == fieldmode
665 || GET_MODE_CLASS (fieldmode) == MODE_INT
666 || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT);
667 arg0 = SUBREG_REG (arg0);
668 }
669
670 subreg_off = bitnum / BITS_PER_UNIT;
671 if (validate_subreg (fieldmode, GET_MODE (arg0), arg0, subreg_off))
672 {
673 arg0 = gen_rtx_SUBREG (fieldmode, arg0, subreg_off);
674
675 create_fixed_operand (&ops[0], arg0);
676 /* Shrink the source operand to FIELDMODE. */
677 create_convert_operand_to (&ops[1], value, fieldmode, false);
678 if (maybe_expand_insn (icode, 2, ops))
679 return true;
680 }
681 }
682
683 /* Handle fields bigger than a word. */
684
685 if (bitsize > BITS_PER_WORD)
686 {
687 /* Here we transfer the words of the field
688 in the order least significant first.
689 This is because the most significant word is the one which may
690 be less than full.
691 However, only do that if the value is not BLKmode. */
692
693 unsigned int backwards = WORDS_BIG_ENDIAN && fieldmode != BLKmode;
694 unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
695 unsigned int i;
696 rtx last;
697
698 /* This is the mode we must force value to, so that there will be enough
699 subwords to extract. Note that fieldmode will often (always?) be
700 VOIDmode, because that is what store_field uses to indicate that this
701 is a bit field, but passing VOIDmode to operand_subword_force
702 is not allowed. */
703 fieldmode = GET_MODE (value);
704 if (fieldmode == VOIDmode)
705 fieldmode = smallest_mode_for_size (nwords * BITS_PER_WORD, MODE_INT);
706
707 last = get_last_insn ();
708 for (i = 0; i < nwords; i++)
709 {
710 /* If I is 0, use the low-order word in both field and target;
711 if I is 1, use the next to lowest word; and so on. */
712 unsigned int wordnum = (backwards
713 ? GET_MODE_SIZE (fieldmode) / UNITS_PER_WORD
714 - i - 1
715 : i);
716 unsigned int bit_offset = (backwards
717 ? MAX ((int) bitsize - ((int) i + 1)
718 * BITS_PER_WORD,
719 0)
720 : (int) i * BITS_PER_WORD);
721 rtx value_word = operand_subword_force (value, wordnum, fieldmode);
722 unsigned HOST_WIDE_INT new_bitsize =
723 MIN (BITS_PER_WORD, bitsize - i * BITS_PER_WORD);
724
725 /* If the remaining chunk doesn't have full wordsize we have
726 to make sure that for big endian machines the higher order
727 bits are used. */
728 if (new_bitsize < BITS_PER_WORD && BYTES_BIG_ENDIAN && !backwards)
729 value_word = simplify_expand_binop (word_mode, lshr_optab,
730 value_word,
731 GEN_INT (BITS_PER_WORD
732 - new_bitsize),
733 NULL_RTX, true,
734 OPTAB_LIB_WIDEN);
735
736 if (!store_bit_field_1 (op0, new_bitsize,
737 bitnum + bit_offset,
738 bitregion_start, bitregion_end,
739 word_mode,
740 value_word, fallback_p))
741 {
742 delete_insns_since (last);
743 return false;
744 }
745 }
746 return true;
747 }
748
749 /* If VALUE has a floating-point or complex mode, access it as an
750 integer of the corresponding size. This can occur on a machine
751 with 64 bit registers that uses SFmode for float. It can also
752 occur for unaligned float or complex fields. */
753 orig_value = value;
754 if (GET_MODE (value) != VOIDmode
755 && GET_MODE_CLASS (GET_MODE (value)) != MODE_INT
756 && GET_MODE_CLASS (GET_MODE (value)) != MODE_PARTIAL_INT)
757 {
758 value = gen_reg_rtx (int_mode_for_mode (GET_MODE (value)));
759 emit_move_insn (gen_lowpart (GET_MODE (orig_value), value), orig_value);
760 }
761
762 /* If OP0 is a multi-word register, narrow it to the affected word.
763 If the region spans two words, defer to store_split_bit_field. */
764 if (!MEM_P (op0) && GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
765 {
766 op0 = simplify_gen_subreg (word_mode, op0, GET_MODE (op0),
767 bitnum / BITS_PER_WORD * UNITS_PER_WORD);
768 gcc_assert (op0);
769 bitnum %= BITS_PER_WORD;
770 if (bitnum + bitsize > BITS_PER_WORD)
771 {
772 if (!fallback_p)
773 return false;
774
775 store_split_bit_field (op0, bitsize, bitnum, bitregion_start,
776 bitregion_end, value);
777 return true;
778 }
779 }
780
781 /* From here on we can assume that the field to be stored in fits
782 within a word. If the destination is a register, it too fits
783 in a word. */
784
785 enum machine_mode op_mode = mode_for_extraction (EP_insv, 3);
786 if (op_mode != MAX_MACHINE_MODE
787 /* Do not use insv for volatile bitfields when
788 -fstrict-volatile-bitfields is in effect. */
789 && !(MEM_P (op0) && MEM_VOLATILE_P (op0)
790 && flag_strict_volatile_bitfields > 0)
791 /* Do not use insv if the bit region is restricted and
792 op_mode integer at offset doesn't fit into the
793 restricted region. */
794 && !(MEM_P (op0) && bitregion_end
795 && bitnum - (bitnum % BITS_PER_UNIT) + GET_MODE_BITSIZE (op_mode)
796 > bitregion_end + 1)
797 && store_bit_field_using_insv (op0, bitsize, bitnum, value, op_mode))
798 return true;
799
800 /* If OP0 is a memory, try copying it to a register and seeing if a
801 cheap register alternative is available. */
802 if (op_mode != MAX_MACHINE_MODE && MEM_P (op0))
803 {
804 enum machine_mode bestmode;
805 unsigned HOST_WIDE_INT maxbits = MAX_FIXED_MODE_SIZE;
806
807 if (bitregion_end)
808 maxbits = bitregion_end - bitregion_start + 1;
809
810 /* Get the mode to use for inserting into this field. If OP0 is
811 BLKmode, get the smallest mode consistent with the alignment. If
812 OP0 is a non-BLKmode object that is no wider than OP_MODE, use its
813 mode. Otherwise, use the smallest mode containing the field. */
814
815 if (GET_MODE (op0) == BLKmode
816 || GET_MODE_BITSIZE (GET_MODE (op0)) > maxbits
817 || GET_MODE_SIZE (GET_MODE (op0)) > GET_MODE_SIZE (op_mode))
818 bestmode = get_best_mode (bitsize, bitnum,
819 bitregion_start, bitregion_end,
820 MEM_ALIGN (op0), op_mode,
821 MEM_VOLATILE_P (op0));
822 else
823 bestmode = GET_MODE (op0);
824
825 if (bestmode != VOIDmode
826 && GET_MODE_SIZE (bestmode) >= GET_MODE_SIZE (fieldmode)
827 && !(SLOW_UNALIGNED_ACCESS (bestmode, MEM_ALIGN (op0))
828 && GET_MODE_BITSIZE (bestmode) > MEM_ALIGN (op0)))
829 {
830 rtx last, tempreg, xop0;
831 unsigned int unit;
832 unsigned HOST_WIDE_INT offset, bitpos;
833
834 last = get_last_insn ();
835
836 /* Adjust address to point to the containing unit of
837 that mode. Compute the offset as a multiple of this unit,
838 counting in bytes. */
839 unit = GET_MODE_BITSIZE (bestmode);
840 offset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
841 bitpos = bitnum % unit;
842 xop0 = adjust_bitfield_address (op0, bestmode, offset);
843
844 /* Fetch that unit, store the bitfield in it, then store
845 the unit. */
846 tempreg = copy_to_reg (xop0);
847 if (store_bit_field_1 (tempreg, bitsize, bitpos,
848 bitregion_start, bitregion_end,
849 fieldmode, orig_value, false))
850 {
851 emit_move_insn (xop0, tempreg);
852 return true;
853 }
854 delete_insns_since (last);
855 }
856 }
857
858 if (!fallback_p)
859 return false;
860
861 store_fixed_bit_field (op0, bitsize, bitnum, bitregion_start,
862 bitregion_end, value);
863 return true;
864 }
865
866 /* Generate code to store value from rtx VALUE
867 into a bit-field within structure STR_RTX
868 containing BITSIZE bits starting at bit BITNUM.
869
870 BITREGION_START is bitpos of the first bitfield in this region.
871 BITREGION_END is the bitpos of the ending bitfield in this region.
872 These two fields are 0, if the C++ memory model does not apply,
873 or we are not interested in keeping track of bitfield regions.
874
875 FIELDMODE is the machine-mode of the FIELD_DECL node for this field. */
876
877 void
878 store_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
879 unsigned HOST_WIDE_INT bitnum,
880 unsigned HOST_WIDE_INT bitregion_start,
881 unsigned HOST_WIDE_INT bitregion_end,
882 enum machine_mode fieldmode,
883 rtx value)
884 {
885 /* Under the C++0x memory model, we must not touch bits outside the
886 bit region. Adjust the address to start at the beginning of the
887 bit region. */
888 if (MEM_P (str_rtx) && bitregion_start > 0)
889 {
890 enum machine_mode bestmode;
891 enum machine_mode op_mode;
892 unsigned HOST_WIDE_INT offset;
893
894 op_mode = mode_for_extraction (EP_insv, 3);
895 if (op_mode == MAX_MACHINE_MODE)
896 op_mode = VOIDmode;
897
898 gcc_assert ((bitregion_start % BITS_PER_UNIT) == 0);
899
900 offset = bitregion_start / BITS_PER_UNIT;
901 bitnum -= bitregion_start;
902 bitregion_end -= bitregion_start;
903 bitregion_start = 0;
904 bestmode = get_best_mode (bitsize, bitnum,
905 bitregion_start, bitregion_end,
906 MEM_ALIGN (str_rtx),
907 op_mode,
908 MEM_VOLATILE_P (str_rtx));
909 str_rtx = adjust_address (str_rtx, bestmode, offset);
910 }
911
912 if (!store_bit_field_1 (str_rtx, bitsize, bitnum,
913 bitregion_start, bitregion_end,
914 fieldmode, value, true))
915 gcc_unreachable ();
916 }
917 \f
918 /* Use shifts and boolean operations to store VALUE into a bit field of
919 width BITSIZE in OP0, starting at bit BITNUM. */
920
921 static void
922 store_fixed_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
923 unsigned HOST_WIDE_INT bitnum,
924 unsigned HOST_WIDE_INT bitregion_start,
925 unsigned HOST_WIDE_INT bitregion_end,
926 rtx value)
927 {
928 enum machine_mode mode;
929 rtx temp;
930 int all_zero = 0;
931 int all_one = 0;
932
933 /* There is a case not handled here:
934 a structure with a known alignment of just a halfword
935 and a field split across two aligned halfwords within the structure.
936 Or likewise a structure with a known alignment of just a byte
937 and a field split across two bytes.
938 Such cases are not supposed to be able to occur. */
939
940 if (MEM_P (op0))
941 {
942 unsigned HOST_WIDE_INT maxbits = MAX_FIXED_MODE_SIZE;
943
944 if (bitregion_end)
945 maxbits = bitregion_end - bitregion_start + 1;
946
947 /* Get the proper mode to use for this field. We want a mode that
948 includes the entire field. If such a mode would be larger than
949 a word, we won't be doing the extraction the normal way.
950 We don't want a mode bigger than the destination. */
951
952 mode = GET_MODE (op0);
953 if (GET_MODE_BITSIZE (mode) == 0
954 || GET_MODE_BITSIZE (mode) > GET_MODE_BITSIZE (word_mode))
955 mode = word_mode;
956
957 if (MEM_VOLATILE_P (op0)
958 && GET_MODE_BITSIZE (GET_MODE (op0)) > 0
959 && GET_MODE_BITSIZE (GET_MODE (op0)) <= maxbits
960 && flag_strict_volatile_bitfields > 0)
961 mode = GET_MODE (op0);
962 else
963 mode = get_best_mode (bitsize, bitnum, bitregion_start, bitregion_end,
964 MEM_ALIGN (op0), mode, MEM_VOLATILE_P (op0));
965
966 if (mode == VOIDmode)
967 {
968 /* The only way this should occur is if the field spans word
969 boundaries. */
970 store_split_bit_field (op0, bitsize, bitnum, bitregion_start,
971 bitregion_end, value);
972 return;
973 }
974
975 HOST_WIDE_INT bit_offset = bitnum - bitnum % GET_MODE_BITSIZE (mode);
976 op0 = adjust_bitfield_address (op0, mode, bit_offset / BITS_PER_UNIT);
977 bitnum -= bit_offset;
978 }
979
980 mode = GET_MODE (op0);
981 gcc_assert (SCALAR_INT_MODE_P (mode));
982
983 /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode)
984 for invalid input, such as f5 from gcc.dg/pr48335-2.c. */
985
986 if (BYTES_BIG_ENDIAN)
987 /* BITNUM is the distance between our msb
988 and that of the containing datum.
989 Convert it to the distance from the lsb. */
990 bitnum = GET_MODE_BITSIZE (mode) - bitsize - bitnum;
991
992 /* Now BITNUM is always the distance between our lsb
993 and that of OP0. */
994
995 /* Shift VALUE left by BITNUM bits. If VALUE is not constant,
996 we must first convert its mode to MODE. */
997
998 if (CONST_INT_P (value))
999 {
1000 HOST_WIDE_INT v = INTVAL (value);
1001
1002 if (bitsize < HOST_BITS_PER_WIDE_INT)
1003 v &= ((HOST_WIDE_INT) 1 << bitsize) - 1;
1004
1005 if (v == 0)
1006 all_zero = 1;
1007 else if ((bitsize < HOST_BITS_PER_WIDE_INT
1008 && v == ((HOST_WIDE_INT) 1 << bitsize) - 1)
1009 || (bitsize == HOST_BITS_PER_WIDE_INT && v == -1))
1010 all_one = 1;
1011
1012 value = lshift_value (mode, value, bitnum, bitsize);
1013 }
1014 else
1015 {
1016 int must_and = (GET_MODE_BITSIZE (GET_MODE (value)) != bitsize
1017 && bitnum + bitsize != GET_MODE_BITSIZE (mode));
1018
1019 if (GET_MODE (value) != mode)
1020 value = convert_to_mode (mode, value, 1);
1021
1022 if (must_and)
1023 value = expand_binop (mode, and_optab, value,
1024 mask_rtx (mode, 0, bitsize, 0),
1025 NULL_RTX, 1, OPTAB_LIB_WIDEN);
1026 if (bitnum > 0)
1027 value = expand_shift (LSHIFT_EXPR, mode, value,
1028 bitnum, NULL_RTX, 1);
1029 }
1030
1031 /* Now clear the chosen bits in OP0,
1032 except that if VALUE is -1 we need not bother. */
1033 /* We keep the intermediates in registers to allow CSE to combine
1034 consecutive bitfield assignments. */
1035
1036 temp = force_reg (mode, op0);
1037
1038 if (! all_one)
1039 {
1040 temp = expand_binop (mode, and_optab, temp,
1041 mask_rtx (mode, bitnum, bitsize, 1),
1042 NULL_RTX, 1, OPTAB_LIB_WIDEN);
1043 temp = force_reg (mode, temp);
1044 }
1045
1046 /* Now logical-or VALUE into OP0, unless it is zero. */
1047
1048 if (! all_zero)
1049 {
1050 temp = expand_binop (mode, ior_optab, temp, value,
1051 NULL_RTX, 1, OPTAB_LIB_WIDEN);
1052 temp = force_reg (mode, temp);
1053 }
1054
1055 if (op0 != temp)
1056 {
1057 op0 = copy_rtx (op0);
1058 emit_move_insn (op0, temp);
1059 }
1060 }
1061 \f
1062 /* Store a bit field that is split across multiple accessible memory objects.
1063
1064 OP0 is the REG, SUBREG or MEM rtx for the first of the objects.
1065 BITSIZE is the field width; BITPOS the position of its first bit
1066 (within the word).
1067 VALUE is the value to store.
1068
1069 This does not yet handle fields wider than BITS_PER_WORD. */
1070
1071 static void
1072 store_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1073 unsigned HOST_WIDE_INT bitpos,
1074 unsigned HOST_WIDE_INT bitregion_start,
1075 unsigned HOST_WIDE_INT bitregion_end,
1076 rtx value)
1077 {
1078 unsigned int unit;
1079 unsigned int bitsdone = 0;
1080
1081 /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1082 much at a time. */
1083 if (REG_P (op0) || GET_CODE (op0) == SUBREG)
1084 unit = BITS_PER_WORD;
1085 else
1086 unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1087
1088 /* If VALUE is a constant other than a CONST_INT, get it into a register in
1089 WORD_MODE. If we can do this using gen_lowpart_common, do so. Note
1090 that VALUE might be a floating-point constant. */
1091 if (CONSTANT_P (value) && !CONST_INT_P (value))
1092 {
1093 rtx word = gen_lowpart_common (word_mode, value);
1094
1095 if (word && (value != word))
1096 value = word;
1097 else
1098 value = gen_lowpart_common (word_mode,
1099 force_reg (GET_MODE (value) != VOIDmode
1100 ? GET_MODE (value)
1101 : word_mode, value));
1102 }
1103
1104 while (bitsdone < bitsize)
1105 {
1106 unsigned HOST_WIDE_INT thissize;
1107 rtx part, word;
1108 unsigned HOST_WIDE_INT thispos;
1109 unsigned HOST_WIDE_INT offset;
1110
1111 offset = (bitpos + bitsdone) / unit;
1112 thispos = (bitpos + bitsdone) % unit;
1113
1114 /* When region of bytes we can touch is restricted, decrease
1115 UNIT close to the end of the region as needed. */
1116 if (bitregion_end
1117 && unit > BITS_PER_UNIT
1118 && bitpos + bitsdone - thispos + unit > bitregion_end + 1)
1119 {
1120 unit = unit / 2;
1121 continue;
1122 }
1123
1124 /* THISSIZE must not overrun a word boundary. Otherwise,
1125 store_fixed_bit_field will call us again, and we will mutually
1126 recurse forever. */
1127 thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1128 thissize = MIN (thissize, unit - thispos);
1129
1130 if (BYTES_BIG_ENDIAN)
1131 {
1132 /* Fetch successively less significant portions. */
1133 if (CONST_INT_P (value))
1134 part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1135 >> (bitsize - bitsdone - thissize))
1136 & (((HOST_WIDE_INT) 1 << thissize) - 1));
1137 else
1138 {
1139 int total_bits = GET_MODE_BITSIZE (GET_MODE (value));
1140 /* The args are chosen so that the last part includes the
1141 lsb. Give extract_bit_field the value it needs (with
1142 endianness compensation) to fetch the piece we want. */
1143 part = extract_fixed_bit_field (word_mode, value, thissize,
1144 total_bits - bitsize + bitsdone,
1145 NULL_RTX, 1, false);
1146 }
1147 }
1148 else
1149 {
1150 /* Fetch successively more significant portions. */
1151 if (CONST_INT_P (value))
1152 part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1153 >> bitsdone)
1154 & (((HOST_WIDE_INT) 1 << thissize) - 1));
1155 else
1156 part = extract_fixed_bit_field (word_mode, value, thissize,
1157 bitsdone, NULL_RTX, 1, false);
1158 }
1159
1160 /* If OP0 is a register, then handle OFFSET here.
1161
1162 When handling multiword bitfields, extract_bit_field may pass
1163 down a word_mode SUBREG of a larger REG for a bitfield that actually
1164 crosses a word boundary. Thus, for a SUBREG, we must find
1165 the current word starting from the base register. */
1166 if (GET_CODE (op0) == SUBREG)
1167 {
1168 int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset;
1169 enum machine_mode sub_mode = GET_MODE (SUBREG_REG (op0));
1170 if (sub_mode != BLKmode && GET_MODE_SIZE (sub_mode) < UNITS_PER_WORD)
1171 word = word_offset ? const0_rtx : op0;
1172 else
1173 word = operand_subword_force (SUBREG_REG (op0), word_offset,
1174 GET_MODE (SUBREG_REG (op0)));
1175 offset = 0;
1176 }
1177 else if (REG_P (op0))
1178 {
1179 enum machine_mode op0_mode = GET_MODE (op0);
1180 if (op0_mode != BLKmode && GET_MODE_SIZE (op0_mode) < UNITS_PER_WORD)
1181 word = offset ? const0_rtx : op0;
1182 else
1183 word = operand_subword_force (op0, offset, GET_MODE (op0));
1184 offset = 0;
1185 }
1186 else
1187 word = op0;
1188
1189 /* OFFSET is in UNITs, and UNIT is in bits. If WORD is const0_rtx,
1190 it is just an out-of-bounds access. Ignore it. */
1191 if (word != const0_rtx)
1192 store_fixed_bit_field (word, thissize, offset * unit + thispos,
1193 bitregion_start, bitregion_end, part);
1194 bitsdone += thissize;
1195 }
1196 }
1197 \f
1198 /* A subroutine of extract_bit_field_1 that converts return value X
1199 to either MODE or TMODE. MODE, TMODE and UNSIGNEDP are arguments
1200 to extract_bit_field. */
1201
1202 static rtx
1203 convert_extracted_bit_field (rtx x, enum machine_mode mode,
1204 enum machine_mode tmode, bool unsignedp)
1205 {
1206 if (GET_MODE (x) == tmode || GET_MODE (x) == mode)
1207 return x;
1208
1209 /* If the x mode is not a scalar integral, first convert to the
1210 integer mode of that size and then access it as a floating-point
1211 value via a SUBREG. */
1212 if (!SCALAR_INT_MODE_P (tmode))
1213 {
1214 enum machine_mode smode;
1215
1216 smode = mode_for_size (GET_MODE_BITSIZE (tmode), MODE_INT, 0);
1217 x = convert_to_mode (smode, x, unsignedp);
1218 x = force_reg (smode, x);
1219 return gen_lowpart (tmode, x);
1220 }
1221
1222 return convert_to_mode (tmode, x, unsignedp);
1223 }
1224
1225 /* Try to use an ext(z)v pattern to extract a field from OP0.
1226 Return the extracted value on success, otherwise return null.
1227 EXT_MODE is the mode of the extraction and the other arguments
1228 are as for extract_bit_field. */
1229
1230 static rtx
1231 extract_bit_field_using_extv (rtx op0, unsigned HOST_WIDE_INT bitsize,
1232 unsigned HOST_WIDE_INT bitnum,
1233 int unsignedp, rtx target,
1234 enum machine_mode mode, enum machine_mode tmode,
1235 enum machine_mode ext_mode)
1236 {
1237 struct expand_operand ops[4];
1238 rtx spec_target = target;
1239 rtx spec_target_subreg = 0;
1240 unsigned unit = GET_MODE_BITSIZE (ext_mode);
1241
1242 if (bitsize == 0 || unit < bitsize)
1243 return NULL_RTX;
1244
1245 if (MEM_P (op0))
1246 {
1247 /* Get a reference to the first byte of the field. */
1248 op0 = adjust_bitfield_address (op0, byte_mode, bitnum / BITS_PER_UNIT);
1249 bitnum %= BITS_PER_UNIT;
1250 }
1251 else
1252 {
1253 /* Convert from counting within OP0 to counting in EXT_MODE. */
1254 if (BYTES_BIG_ENDIAN)
1255 bitnum += unit - GET_MODE_BITSIZE (GET_MODE (op0));
1256
1257 /* If op0 is a register, we need it in EXT_MODE to make it
1258 acceptable to the format of ext(z)v. */
1259 if (GET_CODE (op0) == SUBREG && GET_MODE (op0) != ext_mode)
1260 return NULL_RTX;
1261 if (REG_P (op0) && GET_MODE (op0) != ext_mode)
1262 op0 = gen_lowpart_SUBREG (ext_mode, op0);
1263 }
1264
1265 /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
1266 "backwards" from the size of the unit we are extracting from.
1267 Otherwise, we count bits from the most significant on a
1268 BYTES/BITS_BIG_ENDIAN machine. */
1269
1270 if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1271 bitnum = unit - bitsize - bitnum;
1272
1273 if (target == 0)
1274 target = spec_target = gen_reg_rtx (tmode);
1275
1276 if (GET_MODE (target) != ext_mode)
1277 {
1278 /* Don't use LHS paradoxical subreg if explicit truncation is needed
1279 between the mode of the extraction (word_mode) and the target
1280 mode. Instead, create a temporary and use convert_move to set
1281 the target. */
1282 if (REG_P (target)
1283 && TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (target), ext_mode))
1284 {
1285 target = gen_lowpart (ext_mode, target);
1286 if (GET_MODE_PRECISION (ext_mode)
1287 > GET_MODE_PRECISION (GET_MODE (spec_target)))
1288 spec_target_subreg = target;
1289 }
1290 else
1291 target = gen_reg_rtx (ext_mode);
1292 }
1293
1294 create_output_operand (&ops[0], target, ext_mode);
1295 create_fixed_operand (&ops[1], op0);
1296 create_integer_operand (&ops[2], bitsize);
1297 create_integer_operand (&ops[3], bitnum);
1298 if (maybe_expand_insn (unsignedp ? CODE_FOR_extzv : CODE_FOR_extv, 4, ops))
1299 {
1300 target = ops[0].value;
1301 if (target == spec_target)
1302 return target;
1303 if (target == spec_target_subreg)
1304 return spec_target;
1305 return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1306 }
1307 return NULL_RTX;
1308 }
1309
1310 /* A subroutine of extract_bit_field, with the same arguments.
1311 If FALLBACK_P is true, fall back to extract_fixed_bit_field
1312 if we can find no other means of implementing the operation.
1313 if FALLBACK_P is false, return NULL instead. */
1314
1315 static rtx
1316 extract_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1317 unsigned HOST_WIDE_INT bitnum,
1318 int unsignedp, bool packedp, rtx target,
1319 enum machine_mode mode, enum machine_mode tmode,
1320 bool fallback_p)
1321 {
1322 rtx op0 = str_rtx;
1323 enum machine_mode int_mode;
1324 enum machine_mode ext_mode;
1325 enum machine_mode mode1;
1326
1327 if (tmode == VOIDmode)
1328 tmode = mode;
1329
1330 while (GET_CODE (op0) == SUBREG)
1331 {
1332 bitnum += SUBREG_BYTE (op0) * BITS_PER_UNIT;
1333 op0 = SUBREG_REG (op0);
1334 }
1335
1336 /* If we have an out-of-bounds access to a register, just return an
1337 uninitialized register of the required mode. This can occur if the
1338 source code contains an out-of-bounds access to a small array. */
1339 if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
1340 return gen_reg_rtx (tmode);
1341
1342 if (REG_P (op0)
1343 && mode == GET_MODE (op0)
1344 && bitnum == 0
1345 && bitsize == GET_MODE_BITSIZE (GET_MODE (op0)))
1346 {
1347 /* We're trying to extract a full register from itself. */
1348 return op0;
1349 }
1350
1351 /* See if we can get a better vector mode before extracting. */
1352 if (VECTOR_MODE_P (GET_MODE (op0))
1353 && !MEM_P (op0)
1354 && GET_MODE_INNER (GET_MODE (op0)) != tmode)
1355 {
1356 enum machine_mode new_mode;
1357
1358 if (GET_MODE_CLASS (tmode) == MODE_FLOAT)
1359 new_mode = MIN_MODE_VECTOR_FLOAT;
1360 else if (GET_MODE_CLASS (tmode) == MODE_FRACT)
1361 new_mode = MIN_MODE_VECTOR_FRACT;
1362 else if (GET_MODE_CLASS (tmode) == MODE_UFRACT)
1363 new_mode = MIN_MODE_VECTOR_UFRACT;
1364 else if (GET_MODE_CLASS (tmode) == MODE_ACCUM)
1365 new_mode = MIN_MODE_VECTOR_ACCUM;
1366 else if (GET_MODE_CLASS (tmode) == MODE_UACCUM)
1367 new_mode = MIN_MODE_VECTOR_UACCUM;
1368 else
1369 new_mode = MIN_MODE_VECTOR_INT;
1370
1371 for (; new_mode != VOIDmode ; new_mode = GET_MODE_WIDER_MODE (new_mode))
1372 if (GET_MODE_SIZE (new_mode) == GET_MODE_SIZE (GET_MODE (op0))
1373 && targetm.vector_mode_supported_p (new_mode))
1374 break;
1375 if (new_mode != VOIDmode)
1376 op0 = gen_lowpart (new_mode, op0);
1377 }
1378
1379 /* Use vec_extract patterns for extracting parts of vectors whenever
1380 available. */
1381 if (VECTOR_MODE_P (GET_MODE (op0))
1382 && !MEM_P (op0)
1383 && optab_handler (vec_extract_optab, GET_MODE (op0)) != CODE_FOR_nothing
1384 && ((bitnum + bitsize - 1) / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
1385 == bitnum / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
1386 {
1387 struct expand_operand ops[3];
1388 enum machine_mode outermode = GET_MODE (op0);
1389 enum machine_mode innermode = GET_MODE_INNER (outermode);
1390 enum insn_code icode = optab_handler (vec_extract_optab, outermode);
1391 unsigned HOST_WIDE_INT pos = bitnum / GET_MODE_BITSIZE (innermode);
1392
1393 create_output_operand (&ops[0], target, innermode);
1394 create_input_operand (&ops[1], op0, outermode);
1395 create_integer_operand (&ops[2], pos);
1396 if (maybe_expand_insn (icode, 3, ops))
1397 {
1398 target = ops[0].value;
1399 if (GET_MODE (target) != mode)
1400 return gen_lowpart (tmode, target);
1401 return target;
1402 }
1403 }
1404
1405 /* Make sure we are playing with integral modes. Pun with subregs
1406 if we aren't. */
1407 {
1408 enum machine_mode imode = int_mode_for_mode (GET_MODE (op0));
1409 if (imode != GET_MODE (op0))
1410 {
1411 if (MEM_P (op0))
1412 op0 = adjust_bitfield_address (op0, imode, 0);
1413 else if (imode != BLKmode)
1414 {
1415 op0 = gen_lowpart (imode, op0);
1416
1417 /* If we got a SUBREG, force it into a register since we
1418 aren't going to be able to do another SUBREG on it. */
1419 if (GET_CODE (op0) == SUBREG)
1420 op0 = force_reg (imode, op0);
1421 }
1422 else if (REG_P (op0))
1423 {
1424 rtx reg, subreg;
1425 imode = smallest_mode_for_size (GET_MODE_BITSIZE (GET_MODE (op0)),
1426 MODE_INT);
1427 reg = gen_reg_rtx (imode);
1428 subreg = gen_lowpart_SUBREG (GET_MODE (op0), reg);
1429 emit_move_insn (subreg, op0);
1430 op0 = reg;
1431 bitnum += SUBREG_BYTE (subreg) * BITS_PER_UNIT;
1432 }
1433 else
1434 {
1435 rtx mem = assign_stack_temp (GET_MODE (op0),
1436 GET_MODE_SIZE (GET_MODE (op0)));
1437 emit_move_insn (mem, op0);
1438 op0 = adjust_bitfield_address (mem, BLKmode, 0);
1439 }
1440 }
1441 }
1442
1443 /* ??? We currently assume TARGET is at least as big as BITSIZE.
1444 If that's wrong, the solution is to test for it and set TARGET to 0
1445 if needed. */
1446
1447 /* If the bitfield is volatile, we need to make sure the access
1448 remains on a type-aligned boundary. */
1449 if (GET_CODE (op0) == MEM
1450 && MEM_VOLATILE_P (op0)
1451 && GET_MODE_BITSIZE (GET_MODE (op0)) > 0
1452 && flag_strict_volatile_bitfields > 0)
1453 goto no_subreg_mode_swap;
1454
1455 /* Only scalar integer modes can be converted via subregs. There is an
1456 additional problem for FP modes here in that they can have a precision
1457 which is different from the size. mode_for_size uses precision, but
1458 we want a mode based on the size, so we must avoid calling it for FP
1459 modes. */
1460 mode1 = mode;
1461 if (SCALAR_INT_MODE_P (tmode))
1462 {
1463 enum machine_mode try_mode = mode_for_size (bitsize,
1464 GET_MODE_CLASS (tmode), 0);
1465 if (try_mode != BLKmode)
1466 mode1 = try_mode;
1467 }
1468 gcc_assert (mode1 != BLKmode);
1469
1470 /* Extraction of a full MODE1 value can be done with a subreg as long
1471 as the least significant bit of the value is the least significant
1472 bit of either OP0 or a word of OP0. */
1473 if (!MEM_P (op0)
1474 && lowpart_bit_field_p (bitnum, bitsize, GET_MODE (op0))
1475 && bitsize == GET_MODE_BITSIZE (mode1)
1476 && TRULY_NOOP_TRUNCATION_MODES_P (mode1, GET_MODE (op0)))
1477 {
1478 rtx sub = simplify_gen_subreg (mode1, op0, GET_MODE (op0),
1479 bitnum / BITS_PER_UNIT);
1480 if (sub)
1481 return convert_extracted_bit_field (sub, mode, tmode, unsignedp);
1482 }
1483
1484 /* Extraction of a full MODE1 value can be done with a load as long as
1485 the field is on a byte boundary and is sufficiently aligned. */
1486 if (MEM_P (op0)
1487 && bitnum % BITS_PER_UNIT == 0
1488 && bitsize == GET_MODE_BITSIZE (mode1)
1489 && (!SLOW_UNALIGNED_ACCESS (mode1, MEM_ALIGN (op0))
1490 || (bitnum % bitsize == 0
1491 && MEM_ALIGN (op0) % bitsize == 0)))
1492 {
1493 op0 = adjust_bitfield_address (op0, mode1, bitnum / BITS_PER_UNIT);
1494 return convert_extracted_bit_field (op0, mode, tmode, unsignedp);
1495 }
1496
1497 no_subreg_mode_swap:
1498
1499 /* Handle fields bigger than a word. */
1500
1501 if (bitsize > BITS_PER_WORD)
1502 {
1503 /* Here we transfer the words of the field
1504 in the order least significant first.
1505 This is because the most significant word is the one which may
1506 be less than full. */
1507
1508 unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
1509 unsigned int i;
1510 rtx last;
1511
1512 if (target == 0 || !REG_P (target) || !valid_multiword_target_p (target))
1513 target = gen_reg_rtx (mode);
1514
1515 /* Indicate for flow that the entire target reg is being set. */
1516 emit_clobber (target);
1517
1518 last = get_last_insn ();
1519 for (i = 0; i < nwords; i++)
1520 {
1521 /* If I is 0, use the low-order word in both field and target;
1522 if I is 1, use the next to lowest word; and so on. */
1523 /* Word number in TARGET to use. */
1524 unsigned int wordnum
1525 = (WORDS_BIG_ENDIAN
1526 ? GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD - i - 1
1527 : i);
1528 /* Offset from start of field in OP0. */
1529 unsigned int bit_offset = (WORDS_BIG_ENDIAN
1530 ? MAX (0, ((int) bitsize - ((int) i + 1)
1531 * (int) BITS_PER_WORD))
1532 : (int) i * BITS_PER_WORD);
1533 rtx target_part = operand_subword (target, wordnum, 1, VOIDmode);
1534 rtx result_part
1535 = extract_bit_field_1 (op0, MIN (BITS_PER_WORD,
1536 bitsize - i * BITS_PER_WORD),
1537 bitnum + bit_offset, 1, false, target_part,
1538 mode, word_mode, fallback_p);
1539
1540 gcc_assert (target_part);
1541 if (!result_part)
1542 {
1543 delete_insns_since (last);
1544 return NULL;
1545 }
1546
1547 if (result_part != target_part)
1548 emit_move_insn (target_part, result_part);
1549 }
1550
1551 if (unsignedp)
1552 {
1553 /* Unless we've filled TARGET, the upper regs in a multi-reg value
1554 need to be zero'd out. */
1555 if (GET_MODE_SIZE (GET_MODE (target)) > nwords * UNITS_PER_WORD)
1556 {
1557 unsigned int i, total_words;
1558
1559 total_words = GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD;
1560 for (i = nwords; i < total_words; i++)
1561 emit_move_insn
1562 (operand_subword (target,
1563 WORDS_BIG_ENDIAN ? total_words - i - 1 : i,
1564 1, VOIDmode),
1565 const0_rtx);
1566 }
1567 return target;
1568 }
1569
1570 /* Signed bit field: sign-extend with two arithmetic shifts. */
1571 target = expand_shift (LSHIFT_EXPR, mode, target,
1572 GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1573 return expand_shift (RSHIFT_EXPR, mode, target,
1574 GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1575 }
1576
1577 /* If OP0 is a multi-word register, narrow it to the affected word.
1578 If the region spans two words, defer to extract_split_bit_field. */
1579 if (!MEM_P (op0) && GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
1580 {
1581 op0 = simplify_gen_subreg (word_mode, op0, GET_MODE (op0),
1582 bitnum / BITS_PER_WORD * UNITS_PER_WORD);
1583 bitnum %= BITS_PER_WORD;
1584 if (bitnum + bitsize > BITS_PER_WORD)
1585 {
1586 if (!fallback_p)
1587 return NULL_RTX;
1588 target = extract_split_bit_field (op0, bitsize, bitnum, unsignedp);
1589 return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1590 }
1591 }
1592
1593 /* From here on we know the desired field is smaller than a word.
1594 If OP0 is a register, it too fits within a word. */
1595
1596 ext_mode = mode_for_extraction (unsignedp ? EP_extzv : EP_extv, 0);
1597 if (ext_mode != MAX_MACHINE_MODE
1598 /* Do not use extv/extzv for volatile bitfields when
1599 -fstrict-volatile-bitfields is in effect. */
1600 && !(MEM_P (op0) && MEM_VOLATILE_P (op0)
1601 && flag_strict_volatile_bitfields > 0))
1602 {
1603 rtx result = extract_bit_field_using_extv (op0, bitsize, bitnum,
1604 unsignedp, target, mode,
1605 tmode, ext_mode);
1606 if (result)
1607 return result;
1608 }
1609
1610 /* If OP0 is a memory, try copying it to a register and seeing if a
1611 cheap register alternative is available. */
1612 if (ext_mode != MAX_MACHINE_MODE && MEM_P (op0))
1613 {
1614 enum machine_mode bestmode;
1615
1616 /* Get the mode to use for inserting into this field. If
1617 OP0 is BLKmode, get the smallest mode consistent with the
1618 alignment. If OP0 is a non-BLKmode object that is no
1619 wider than EXT_MODE, use its mode. Otherwise, use the
1620 smallest mode containing the field. */
1621
1622 if (GET_MODE (op0) == BLKmode
1623 || GET_MODE_SIZE (GET_MODE (op0)) > GET_MODE_SIZE (ext_mode))
1624 bestmode = get_best_mode (bitsize, bitnum, 0, 0, MEM_ALIGN (op0),
1625 ext_mode, MEM_VOLATILE_P (op0));
1626 else
1627 bestmode = GET_MODE (op0);
1628
1629 if (bestmode != VOIDmode
1630 && !(SLOW_UNALIGNED_ACCESS (bestmode, MEM_ALIGN (op0))
1631 && GET_MODE_BITSIZE (bestmode) > MEM_ALIGN (op0)))
1632 {
1633 unsigned HOST_WIDE_INT offset, bitpos;
1634
1635 /* Compute the offset as a multiple of this unit,
1636 counting in bytes. */
1637 unsigned int unit = GET_MODE_BITSIZE (bestmode);
1638 offset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
1639 bitpos = bitnum % unit;
1640
1641 /* Make sure the register is big enough for the whole field. */
1642 if (bitpos + bitsize <= unit)
1643 {
1644 rtx last, result, xop0;
1645
1646 last = get_last_insn ();
1647
1648 /* Fetch it to a register in that size. */
1649 xop0 = adjust_bitfield_address (op0, bestmode, offset);
1650 xop0 = force_reg (bestmode, xop0);
1651 result = extract_bit_field_1 (xop0, bitsize, bitpos,
1652 unsignedp, packedp, target,
1653 mode, tmode, false);
1654 if (result)
1655 return result;
1656
1657 delete_insns_since (last);
1658 }
1659 }
1660 }
1661
1662 if (!fallback_p)
1663 return NULL;
1664
1665 /* Find a correspondingly-sized integer field, so we can apply
1666 shifts and masks to it. */
1667 int_mode = int_mode_for_mode (tmode);
1668 if (int_mode == BLKmode)
1669 int_mode = int_mode_for_mode (mode);
1670 /* Should probably push op0 out to memory and then do a load. */
1671 gcc_assert (int_mode != BLKmode);
1672
1673 target = extract_fixed_bit_field (int_mode, op0, bitsize, bitnum,
1674 target, unsignedp, packedp);
1675 return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1676 }
1677
1678 /* Generate code to extract a byte-field from STR_RTX
1679 containing BITSIZE bits, starting at BITNUM,
1680 and put it in TARGET if possible (if TARGET is nonzero).
1681 Regardless of TARGET, we return the rtx for where the value is placed.
1682
1683 STR_RTX is the structure containing the byte (a REG or MEM).
1684 UNSIGNEDP is nonzero if this is an unsigned bit field.
1685 PACKEDP is nonzero if the field has the packed attribute.
1686 MODE is the natural mode of the field value once extracted.
1687 TMODE is the mode the caller would like the value to have;
1688 but the value may be returned with type MODE instead.
1689
1690 If a TARGET is specified and we can store in it at no extra cost,
1691 we do so, and return TARGET.
1692 Otherwise, we return a REG of mode TMODE or MODE, with TMODE preferred
1693 if they are equally easy. */
1694
1695 rtx
1696 extract_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1697 unsigned HOST_WIDE_INT bitnum, int unsignedp, bool packedp,
1698 rtx target, enum machine_mode mode, enum machine_mode tmode)
1699 {
1700 return extract_bit_field_1 (str_rtx, bitsize, bitnum, unsignedp, packedp,
1701 target, mode, tmode, true);
1702 }
1703 \f
1704 /* Use shifts and boolean operations to extract a field of BITSIZE bits
1705 from bit BITNUM of OP0.
1706
1707 UNSIGNEDP is nonzero for an unsigned bit field (don't sign-extend value).
1708 PACKEDP is true if the field has the packed attribute.
1709
1710 If TARGET is nonzero, attempts to store the value there
1711 and return TARGET, but this is not guaranteed.
1712 If TARGET is not used, create a pseudo-reg of mode TMODE for the value. */
1713
1714 static rtx
1715 extract_fixed_bit_field (enum machine_mode tmode, rtx op0,
1716 unsigned HOST_WIDE_INT bitsize,
1717 unsigned HOST_WIDE_INT bitnum, rtx target,
1718 int unsignedp, bool packedp)
1719 {
1720 enum machine_mode mode;
1721
1722 if (MEM_P (op0))
1723 {
1724 /* Get the proper mode to use for this field. We want a mode that
1725 includes the entire field. If such a mode would be larger than
1726 a word, we won't be doing the extraction the normal way. */
1727
1728 if (MEM_VOLATILE_P (op0)
1729 && flag_strict_volatile_bitfields > 0)
1730 {
1731 if (GET_MODE_BITSIZE (GET_MODE (op0)) > 0)
1732 mode = GET_MODE (op0);
1733 else if (target && GET_MODE_BITSIZE (GET_MODE (target)) > 0)
1734 mode = GET_MODE (target);
1735 else
1736 mode = tmode;
1737 }
1738 else
1739 mode = get_best_mode (bitsize, bitnum, 0, 0,
1740 MEM_ALIGN (op0), word_mode, MEM_VOLATILE_P (op0));
1741
1742 if (mode == VOIDmode)
1743 /* The only way this should occur is if the field spans word
1744 boundaries. */
1745 return extract_split_bit_field (op0, bitsize, bitnum, unsignedp);
1746
1747 unsigned int total_bits = GET_MODE_BITSIZE (mode);
1748 HOST_WIDE_INT bit_offset = bitnum - bitnum % total_bits;
1749
1750 /* If we're accessing a volatile MEM, we can't apply BIT_OFFSET
1751 if it results in a multi-word access where we otherwise wouldn't
1752 have one. So, check for that case here. */
1753 if (MEM_P (op0)
1754 && MEM_VOLATILE_P (op0)
1755 && flag_strict_volatile_bitfields > 0
1756 && bitnum % BITS_PER_UNIT + bitsize <= total_bits
1757 && bitnum % GET_MODE_BITSIZE (mode) + bitsize > total_bits)
1758 {
1759 if (STRICT_ALIGNMENT)
1760 {
1761 static bool informed_about_misalignment = false;
1762
1763 if (packedp)
1764 {
1765 if (bitsize == total_bits)
1766 warning_at (input_location, OPT_fstrict_volatile_bitfields,
1767 "multiple accesses to volatile structure"
1768 " member because of packed attribute");
1769 else
1770 warning_at (input_location, OPT_fstrict_volatile_bitfields,
1771 "multiple accesses to volatile structure"
1772 " bitfield because of packed attribute");
1773
1774 return extract_split_bit_field (op0, bitsize, bitnum,
1775 unsignedp);
1776 }
1777
1778 if (bitsize == total_bits)
1779 warning_at (input_location, OPT_fstrict_volatile_bitfields,
1780 "mis-aligned access used for structure member");
1781 else
1782 warning_at (input_location, OPT_fstrict_volatile_bitfields,
1783 "mis-aligned access used for structure bitfield");
1784
1785 if (! informed_about_misalignment)
1786 {
1787 informed_about_misalignment = true;
1788 inform (input_location,
1789 "when a volatile object spans multiple type-sized"
1790 " locations, the compiler must choose between using"
1791 " a single mis-aligned access to preserve the"
1792 " volatility, or using multiple aligned accesses"
1793 " to avoid runtime faults; this code may fail at"
1794 " runtime if the hardware does not allow this"
1795 " access");
1796 }
1797 }
1798 bit_offset = bitnum - bitnum % BITS_PER_UNIT;
1799 }
1800 op0 = adjust_bitfield_address (op0, mode, bit_offset / BITS_PER_UNIT);
1801 bitnum -= bit_offset;
1802 }
1803
1804 mode = GET_MODE (op0);
1805 gcc_assert (SCALAR_INT_MODE_P (mode));
1806
1807 /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode)
1808 for invalid input, such as extract equivalent of f5 from
1809 gcc.dg/pr48335-2.c. */
1810
1811 if (BYTES_BIG_ENDIAN)
1812 /* BITNUM is the distance between our msb and that of OP0.
1813 Convert it to the distance from the lsb. */
1814 bitnum = GET_MODE_BITSIZE (mode) - bitsize - bitnum;
1815
1816 /* Now BITNUM is always the distance between the field's lsb and that of OP0.
1817 We have reduced the big-endian case to the little-endian case. */
1818
1819 if (unsignedp)
1820 {
1821 if (bitnum)
1822 {
1823 /* If the field does not already start at the lsb,
1824 shift it so it does. */
1825 /* Maybe propagate the target for the shift. */
1826 rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
1827 if (tmode != mode)
1828 subtarget = 0;
1829 op0 = expand_shift (RSHIFT_EXPR, mode, op0, bitnum, subtarget, 1);
1830 }
1831 /* Convert the value to the desired mode. */
1832 if (mode != tmode)
1833 op0 = convert_to_mode (tmode, op0, 1);
1834
1835 /* Unless the msb of the field used to be the msb when we shifted,
1836 mask out the upper bits. */
1837
1838 if (GET_MODE_BITSIZE (mode) != bitnum + bitsize)
1839 return expand_binop (GET_MODE (op0), and_optab, op0,
1840 mask_rtx (GET_MODE (op0), 0, bitsize, 0),
1841 target, 1, OPTAB_LIB_WIDEN);
1842 return op0;
1843 }
1844
1845 /* To extract a signed bit-field, first shift its msb to the msb of the word,
1846 then arithmetic-shift its lsb to the lsb of the word. */
1847 op0 = force_reg (mode, op0);
1848
1849 /* Find the narrowest integer mode that contains the field. */
1850
1851 for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT); mode != VOIDmode;
1852 mode = GET_MODE_WIDER_MODE (mode))
1853 if (GET_MODE_BITSIZE (mode) >= bitsize + bitnum)
1854 {
1855 op0 = convert_to_mode (mode, op0, 0);
1856 break;
1857 }
1858
1859 if (mode != tmode)
1860 target = 0;
1861
1862 if (GET_MODE_BITSIZE (mode) != (bitsize + bitnum))
1863 {
1864 int amount = GET_MODE_BITSIZE (mode) - (bitsize + bitnum);
1865 /* Maybe propagate the target for the shift. */
1866 rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
1867 op0 = expand_shift (LSHIFT_EXPR, mode, op0, amount, subtarget, 1);
1868 }
1869
1870 return expand_shift (RSHIFT_EXPR, mode, op0,
1871 GET_MODE_BITSIZE (mode) - bitsize, target, 0);
1872 }
1873 \f
1874 /* Return a constant integer (CONST_INT or CONST_DOUBLE) mask value
1875 of mode MODE with BITSIZE ones followed by BITPOS zeros, or the
1876 complement of that if COMPLEMENT. The mask is truncated if
1877 necessary to the width of mode MODE. The mask is zero-extended if
1878 BITSIZE+BITPOS is too small for MODE. */
1879
1880 static rtx
1881 mask_rtx (enum machine_mode mode, int bitpos, int bitsize, int complement)
1882 {
1883 double_int mask;
1884
1885 mask = double_int::mask (bitsize);
1886 mask = mask.llshift (bitpos, HOST_BITS_PER_DOUBLE_INT);
1887
1888 if (complement)
1889 mask = ~mask;
1890
1891 return immed_double_int_const (mask, mode);
1892 }
1893
1894 /* Return a constant integer (CONST_INT or CONST_DOUBLE) rtx with the value
1895 VALUE truncated to BITSIZE bits and then shifted left BITPOS bits. */
1896
1897 static rtx
1898 lshift_value (enum machine_mode mode, rtx value, int bitpos, int bitsize)
1899 {
1900 double_int val;
1901
1902 val = double_int::from_uhwi (INTVAL (value)).zext (bitsize);
1903 val = val.llshift (bitpos, HOST_BITS_PER_DOUBLE_INT);
1904
1905 return immed_double_int_const (val, mode);
1906 }
1907 \f
1908 /* Extract a bit field that is split across two words
1909 and return an RTX for the result.
1910
1911 OP0 is the REG, SUBREG or MEM rtx for the first of the two words.
1912 BITSIZE is the field width; BITPOS, position of its first bit, in the word.
1913 UNSIGNEDP is 1 if should zero-extend the contents; else sign-extend. */
1914
1915 static rtx
1916 extract_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1917 unsigned HOST_WIDE_INT bitpos, int unsignedp)
1918 {
1919 unsigned int unit;
1920 unsigned int bitsdone = 0;
1921 rtx result = NULL_RTX;
1922 int first = 1;
1923
1924 /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1925 much at a time. */
1926 if (REG_P (op0) || GET_CODE (op0) == SUBREG)
1927 unit = BITS_PER_WORD;
1928 else
1929 unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1930
1931 while (bitsdone < bitsize)
1932 {
1933 unsigned HOST_WIDE_INT thissize;
1934 rtx part, word;
1935 unsigned HOST_WIDE_INT thispos;
1936 unsigned HOST_WIDE_INT offset;
1937
1938 offset = (bitpos + bitsdone) / unit;
1939 thispos = (bitpos + bitsdone) % unit;
1940
1941 /* THISSIZE must not overrun a word boundary. Otherwise,
1942 extract_fixed_bit_field will call us again, and we will mutually
1943 recurse forever. */
1944 thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1945 thissize = MIN (thissize, unit - thispos);
1946
1947 /* If OP0 is a register, then handle OFFSET here.
1948
1949 When handling multiword bitfields, extract_bit_field may pass
1950 down a word_mode SUBREG of a larger REG for a bitfield that actually
1951 crosses a word boundary. Thus, for a SUBREG, we must find
1952 the current word starting from the base register. */
1953 if (GET_CODE (op0) == SUBREG)
1954 {
1955 int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset;
1956 word = operand_subword_force (SUBREG_REG (op0), word_offset,
1957 GET_MODE (SUBREG_REG (op0)));
1958 offset = 0;
1959 }
1960 else if (REG_P (op0))
1961 {
1962 word = operand_subword_force (op0, offset, GET_MODE (op0));
1963 offset = 0;
1964 }
1965 else
1966 word = op0;
1967
1968 /* Extract the parts in bit-counting order,
1969 whose meaning is determined by BYTES_PER_UNIT.
1970 OFFSET is in UNITs, and UNIT is in bits. */
1971 part = extract_fixed_bit_field (word_mode, word, thissize,
1972 offset * unit + thispos, 0, 1, false);
1973 bitsdone += thissize;
1974
1975 /* Shift this part into place for the result. */
1976 if (BYTES_BIG_ENDIAN)
1977 {
1978 if (bitsize != bitsdone)
1979 part = expand_shift (LSHIFT_EXPR, word_mode, part,
1980 bitsize - bitsdone, 0, 1);
1981 }
1982 else
1983 {
1984 if (bitsdone != thissize)
1985 part = expand_shift (LSHIFT_EXPR, word_mode, part,
1986 bitsdone - thissize, 0, 1);
1987 }
1988
1989 if (first)
1990 result = part;
1991 else
1992 /* Combine the parts with bitwise or. This works
1993 because we extracted each part as an unsigned bit field. */
1994 result = expand_binop (word_mode, ior_optab, part, result, NULL_RTX, 1,
1995 OPTAB_LIB_WIDEN);
1996
1997 first = 0;
1998 }
1999
2000 /* Unsigned bit field: we are done. */
2001 if (unsignedp)
2002 return result;
2003 /* Signed bit field: sign-extend with two arithmetic shifts. */
2004 result = expand_shift (LSHIFT_EXPR, word_mode, result,
2005 BITS_PER_WORD - bitsize, NULL_RTX, 0);
2006 return expand_shift (RSHIFT_EXPR, word_mode, result,
2007 BITS_PER_WORD - bitsize, NULL_RTX, 0);
2008 }
2009 \f
2010 /* Try to read the low bits of SRC as an rvalue of mode MODE, preserving
2011 the bit pattern. SRC_MODE is the mode of SRC; if this is smaller than
2012 MODE, fill the upper bits with zeros. Fail if the layout of either
2013 mode is unknown (as for CC modes) or if the extraction would involve
2014 unprofitable mode punning. Return the value on success, otherwise
2015 return null.
2016
2017 This is different from gen_lowpart* in these respects:
2018
2019 - the returned value must always be considered an rvalue
2020
2021 - when MODE is wider than SRC_MODE, the extraction involves
2022 a zero extension
2023
2024 - when MODE is smaller than SRC_MODE, the extraction involves
2025 a truncation (and is thus subject to TRULY_NOOP_TRUNCATION).
2026
2027 In other words, this routine performs a computation, whereas the
2028 gen_lowpart* routines are conceptually lvalue or rvalue subreg
2029 operations. */
2030
2031 rtx
2032 extract_low_bits (enum machine_mode mode, enum machine_mode src_mode, rtx src)
2033 {
2034 enum machine_mode int_mode, src_int_mode;
2035
2036 if (mode == src_mode)
2037 return src;
2038
2039 if (CONSTANT_P (src))
2040 {
2041 /* simplify_gen_subreg can't be used here, as if simplify_subreg
2042 fails, it will happily create (subreg (symbol_ref)) or similar
2043 invalid SUBREGs. */
2044 unsigned int byte = subreg_lowpart_offset (mode, src_mode);
2045 rtx ret = simplify_subreg (mode, src, src_mode, byte);
2046 if (ret)
2047 return ret;
2048
2049 if (GET_MODE (src) == VOIDmode
2050 || !validate_subreg (mode, src_mode, src, byte))
2051 return NULL_RTX;
2052
2053 src = force_reg (GET_MODE (src), src);
2054 return gen_rtx_SUBREG (mode, src, byte);
2055 }
2056
2057 if (GET_MODE_CLASS (mode) == MODE_CC || GET_MODE_CLASS (src_mode) == MODE_CC)
2058 return NULL_RTX;
2059
2060 if (GET_MODE_BITSIZE (mode) == GET_MODE_BITSIZE (src_mode)
2061 && MODES_TIEABLE_P (mode, src_mode))
2062 {
2063 rtx x = gen_lowpart_common (mode, src);
2064 if (x)
2065 return x;
2066 }
2067
2068 src_int_mode = int_mode_for_mode (src_mode);
2069 int_mode = int_mode_for_mode (mode);
2070 if (src_int_mode == BLKmode || int_mode == BLKmode)
2071 return NULL_RTX;
2072
2073 if (!MODES_TIEABLE_P (src_int_mode, src_mode))
2074 return NULL_RTX;
2075 if (!MODES_TIEABLE_P (int_mode, mode))
2076 return NULL_RTX;
2077
2078 src = gen_lowpart (src_int_mode, src);
2079 src = convert_modes (int_mode, src_int_mode, src, true);
2080 src = gen_lowpart (mode, src);
2081 return src;
2082 }
2083 \f
2084 /* Add INC into TARGET. */
2085
2086 void
2087 expand_inc (rtx target, rtx inc)
2088 {
2089 rtx value = expand_binop (GET_MODE (target), add_optab,
2090 target, inc,
2091 target, 0, OPTAB_LIB_WIDEN);
2092 if (value != target)
2093 emit_move_insn (target, value);
2094 }
2095
2096 /* Subtract DEC from TARGET. */
2097
2098 void
2099 expand_dec (rtx target, rtx dec)
2100 {
2101 rtx value = expand_binop (GET_MODE (target), sub_optab,
2102 target, dec,
2103 target, 0, OPTAB_LIB_WIDEN);
2104 if (value != target)
2105 emit_move_insn (target, value);
2106 }
2107 \f
2108 /* Output a shift instruction for expression code CODE,
2109 with SHIFTED being the rtx for the value to shift,
2110 and AMOUNT the rtx for the amount to shift by.
2111 Store the result in the rtx TARGET, if that is convenient.
2112 If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2113 Return the rtx for where the value is. */
2114
2115 static rtx
2116 expand_shift_1 (enum tree_code code, enum machine_mode mode, rtx shifted,
2117 rtx amount, rtx target, int unsignedp)
2118 {
2119 rtx op1, temp = 0;
2120 int left = (code == LSHIFT_EXPR || code == LROTATE_EXPR);
2121 int rotate = (code == LROTATE_EXPR || code == RROTATE_EXPR);
2122 optab lshift_optab = ashl_optab;
2123 optab rshift_arith_optab = ashr_optab;
2124 optab rshift_uns_optab = lshr_optab;
2125 optab lrotate_optab = rotl_optab;
2126 optab rrotate_optab = rotr_optab;
2127 enum machine_mode op1_mode;
2128 int attempt;
2129 bool speed = optimize_insn_for_speed_p ();
2130
2131 op1 = amount;
2132 op1_mode = GET_MODE (op1);
2133
2134 /* Determine whether the shift/rotate amount is a vector, or scalar. If the
2135 shift amount is a vector, use the vector/vector shift patterns. */
2136 if (VECTOR_MODE_P (mode) && VECTOR_MODE_P (op1_mode))
2137 {
2138 lshift_optab = vashl_optab;
2139 rshift_arith_optab = vashr_optab;
2140 rshift_uns_optab = vlshr_optab;
2141 lrotate_optab = vrotl_optab;
2142 rrotate_optab = vrotr_optab;
2143 }
2144
2145 /* Previously detected shift-counts computed by NEGATE_EXPR
2146 and shifted in the other direction; but that does not work
2147 on all machines. */
2148
2149 if (SHIFT_COUNT_TRUNCATED)
2150 {
2151 if (CONST_INT_P (op1)
2152 && ((unsigned HOST_WIDE_INT) INTVAL (op1) >=
2153 (unsigned HOST_WIDE_INT) GET_MODE_BITSIZE (mode)))
2154 op1 = GEN_INT ((unsigned HOST_WIDE_INT) INTVAL (op1)
2155 % GET_MODE_BITSIZE (mode));
2156 else if (GET_CODE (op1) == SUBREG
2157 && subreg_lowpart_p (op1)
2158 && INTEGRAL_MODE_P (GET_MODE (SUBREG_REG (op1))))
2159 op1 = SUBREG_REG (op1);
2160 }
2161
2162 if (op1 == const0_rtx)
2163 return shifted;
2164
2165 /* Check whether its cheaper to implement a left shift by a constant
2166 bit count by a sequence of additions. */
2167 if (code == LSHIFT_EXPR
2168 && CONST_INT_P (op1)
2169 && INTVAL (op1) > 0
2170 && INTVAL (op1) < GET_MODE_PRECISION (mode)
2171 && INTVAL (op1) < MAX_BITS_PER_WORD
2172 && (shift_cost (speed, mode, INTVAL (op1))
2173 > INTVAL (op1) * add_cost (speed, mode))
2174 && shift_cost (speed, mode, INTVAL (op1)) != MAX_COST)
2175 {
2176 int i;
2177 for (i = 0; i < INTVAL (op1); i++)
2178 {
2179 temp = force_reg (mode, shifted);
2180 shifted = expand_binop (mode, add_optab, temp, temp, NULL_RTX,
2181 unsignedp, OPTAB_LIB_WIDEN);
2182 }
2183 return shifted;
2184 }
2185
2186 for (attempt = 0; temp == 0 && attempt < 3; attempt++)
2187 {
2188 enum optab_methods methods;
2189
2190 if (attempt == 0)
2191 methods = OPTAB_DIRECT;
2192 else if (attempt == 1)
2193 methods = OPTAB_WIDEN;
2194 else
2195 methods = OPTAB_LIB_WIDEN;
2196
2197 if (rotate)
2198 {
2199 /* Widening does not work for rotation. */
2200 if (methods == OPTAB_WIDEN)
2201 continue;
2202 else if (methods == OPTAB_LIB_WIDEN)
2203 {
2204 /* If we have been unable to open-code this by a rotation,
2205 do it as the IOR of two shifts. I.e., to rotate A
2206 by N bits, compute (A << N) | ((unsigned) A >> (C - N))
2207 where C is the bitsize of A.
2208
2209 It is theoretically possible that the target machine might
2210 not be able to perform either shift and hence we would
2211 be making two libcalls rather than just the one for the
2212 shift (similarly if IOR could not be done). We will allow
2213 this extremely unlikely lossage to avoid complicating the
2214 code below. */
2215
2216 rtx subtarget = target == shifted ? 0 : target;
2217 rtx new_amount, other_amount;
2218 rtx temp1;
2219
2220 new_amount = op1;
2221 if (CONST_INT_P (op1))
2222 other_amount = GEN_INT (GET_MODE_BITSIZE (mode)
2223 - INTVAL (op1));
2224 else
2225 other_amount
2226 = simplify_gen_binary (MINUS, GET_MODE (op1),
2227 GEN_INT (GET_MODE_PRECISION (mode)),
2228 op1);
2229
2230 shifted = force_reg (mode, shifted);
2231
2232 temp = expand_shift_1 (left ? LSHIFT_EXPR : RSHIFT_EXPR,
2233 mode, shifted, new_amount, 0, 1);
2234 temp1 = expand_shift_1 (left ? RSHIFT_EXPR : LSHIFT_EXPR,
2235 mode, shifted, other_amount,
2236 subtarget, 1);
2237 return expand_binop (mode, ior_optab, temp, temp1, target,
2238 unsignedp, methods);
2239 }
2240
2241 temp = expand_binop (mode,
2242 left ? lrotate_optab : rrotate_optab,
2243 shifted, op1, target, unsignedp, methods);
2244 }
2245 else if (unsignedp)
2246 temp = expand_binop (mode,
2247 left ? lshift_optab : rshift_uns_optab,
2248 shifted, op1, target, unsignedp, methods);
2249
2250 /* Do arithmetic shifts.
2251 Also, if we are going to widen the operand, we can just as well
2252 use an arithmetic right-shift instead of a logical one. */
2253 if (temp == 0 && ! rotate
2254 && (! unsignedp || (! left && methods == OPTAB_WIDEN)))
2255 {
2256 enum optab_methods methods1 = methods;
2257
2258 /* If trying to widen a log shift to an arithmetic shift,
2259 don't accept an arithmetic shift of the same size. */
2260 if (unsignedp)
2261 methods1 = OPTAB_MUST_WIDEN;
2262
2263 /* Arithmetic shift */
2264
2265 temp = expand_binop (mode,
2266 left ? lshift_optab : rshift_arith_optab,
2267 shifted, op1, target, unsignedp, methods1);
2268 }
2269
2270 /* We used to try extzv here for logical right shifts, but that was
2271 only useful for one machine, the VAX, and caused poor code
2272 generation there for lshrdi3, so the code was deleted and a
2273 define_expand for lshrsi3 was added to vax.md. */
2274 }
2275
2276 gcc_assert (temp);
2277 return temp;
2278 }
2279
2280 /* Output a shift instruction for expression code CODE,
2281 with SHIFTED being the rtx for the value to shift,
2282 and AMOUNT the amount to shift by.
2283 Store the result in the rtx TARGET, if that is convenient.
2284 If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2285 Return the rtx for where the value is. */
2286
2287 rtx
2288 expand_shift (enum tree_code code, enum machine_mode mode, rtx shifted,
2289 int amount, rtx target, int unsignedp)
2290 {
2291 return expand_shift_1 (code, mode,
2292 shifted, GEN_INT (amount), target, unsignedp);
2293 }
2294
2295 /* Output a shift instruction for expression code CODE,
2296 with SHIFTED being the rtx for the value to shift,
2297 and AMOUNT the tree for the amount to shift by.
2298 Store the result in the rtx TARGET, if that is convenient.
2299 If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2300 Return the rtx for where the value is. */
2301
2302 rtx
2303 expand_variable_shift (enum tree_code code, enum machine_mode mode, rtx shifted,
2304 tree amount, rtx target, int unsignedp)
2305 {
2306 return expand_shift_1 (code, mode,
2307 shifted, expand_normal (amount), target, unsignedp);
2308 }
2309
2310 \f
2311 /* Indicates the type of fixup needed after a constant multiplication.
2312 BASIC_VARIANT means no fixup is needed, NEGATE_VARIANT means that
2313 the result should be negated, and ADD_VARIANT means that the
2314 multiplicand should be added to the result. */
2315 enum mult_variant {basic_variant, negate_variant, add_variant};
2316
2317 static void synth_mult (struct algorithm *, unsigned HOST_WIDE_INT,
2318 const struct mult_cost *, enum machine_mode mode);
2319 static bool choose_mult_variant (enum machine_mode, HOST_WIDE_INT,
2320 struct algorithm *, enum mult_variant *, int);
2321 static rtx expand_mult_const (enum machine_mode, rtx, HOST_WIDE_INT, rtx,
2322 const struct algorithm *, enum mult_variant);
2323 static unsigned HOST_WIDE_INT invert_mod2n (unsigned HOST_WIDE_INT, int);
2324 static rtx extract_high_half (enum machine_mode, rtx);
2325 static rtx expmed_mult_highpart (enum machine_mode, rtx, rtx, rtx, int, int);
2326 static rtx expmed_mult_highpart_optab (enum machine_mode, rtx, rtx, rtx,
2327 int, int);
2328 /* Compute and return the best algorithm for multiplying by T.
2329 The algorithm must cost less than cost_limit
2330 If retval.cost >= COST_LIMIT, no algorithm was found and all
2331 other field of the returned struct are undefined.
2332 MODE is the machine mode of the multiplication. */
2333
2334 static void
2335 synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
2336 const struct mult_cost *cost_limit, enum machine_mode mode)
2337 {
2338 int m;
2339 struct algorithm *alg_in, *best_alg;
2340 struct mult_cost best_cost;
2341 struct mult_cost new_limit;
2342 int op_cost, op_latency;
2343 unsigned HOST_WIDE_INT orig_t = t;
2344 unsigned HOST_WIDE_INT q;
2345 int maxm, hash_index;
2346 bool cache_hit = false;
2347 enum alg_code cache_alg = alg_zero;
2348 bool speed = optimize_insn_for_speed_p ();
2349 enum machine_mode imode;
2350 struct alg_hash_entry *entry_ptr;
2351
2352 /* Indicate that no algorithm is yet found. If no algorithm
2353 is found, this value will be returned and indicate failure. */
2354 alg_out->cost.cost = cost_limit->cost + 1;
2355 alg_out->cost.latency = cost_limit->latency + 1;
2356
2357 if (cost_limit->cost < 0
2358 || (cost_limit->cost == 0 && cost_limit->latency <= 0))
2359 return;
2360
2361 /* Be prepared for vector modes. */
2362 imode = GET_MODE_INNER (mode);
2363 if (imode == VOIDmode)
2364 imode = mode;
2365
2366 maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (imode));
2367
2368 /* Restrict the bits of "t" to the multiplication's mode. */
2369 t &= GET_MODE_MASK (imode);
2370
2371 /* t == 1 can be done in zero cost. */
2372 if (t == 1)
2373 {
2374 alg_out->ops = 1;
2375 alg_out->cost.cost = 0;
2376 alg_out->cost.latency = 0;
2377 alg_out->op[0] = alg_m;
2378 return;
2379 }
2380
2381 /* t == 0 sometimes has a cost. If it does and it exceeds our limit,
2382 fail now. */
2383 if (t == 0)
2384 {
2385 if (MULT_COST_LESS (cost_limit, zero_cost (speed)))
2386 return;
2387 else
2388 {
2389 alg_out->ops = 1;
2390 alg_out->cost.cost = zero_cost (speed);
2391 alg_out->cost.latency = zero_cost (speed);
2392 alg_out->op[0] = alg_zero;
2393 return;
2394 }
2395 }
2396
2397 /* We'll be needing a couple extra algorithm structures now. */
2398
2399 alg_in = XALLOCA (struct algorithm);
2400 best_alg = XALLOCA (struct algorithm);
2401 best_cost = *cost_limit;
2402
2403 /* Compute the hash index. */
2404 hash_index = (t ^ (unsigned int) mode ^ (speed * 256)) % NUM_ALG_HASH_ENTRIES;
2405
2406 /* See if we already know what to do for T. */
2407 entry_ptr = alg_hash_entry_ptr (hash_index);
2408 if (entry_ptr->t == t
2409 && entry_ptr->mode == mode
2410 && entry_ptr->mode == mode
2411 && entry_ptr->speed == speed
2412 && entry_ptr->alg != alg_unknown)
2413 {
2414 cache_alg = entry_ptr->alg;
2415
2416 if (cache_alg == alg_impossible)
2417 {
2418 /* The cache tells us that it's impossible to synthesize
2419 multiplication by T within entry_ptr->cost. */
2420 if (!CHEAPER_MULT_COST (&entry_ptr->cost, cost_limit))
2421 /* COST_LIMIT is at least as restrictive as the one
2422 recorded in the hash table, in which case we have no
2423 hope of synthesizing a multiplication. Just
2424 return. */
2425 return;
2426
2427 /* If we get here, COST_LIMIT is less restrictive than the
2428 one recorded in the hash table, so we may be able to
2429 synthesize a multiplication. Proceed as if we didn't
2430 have the cache entry. */
2431 }
2432 else
2433 {
2434 if (CHEAPER_MULT_COST (cost_limit, &entry_ptr->cost))
2435 /* The cached algorithm shows that this multiplication
2436 requires more cost than COST_LIMIT. Just return. This
2437 way, we don't clobber this cache entry with
2438 alg_impossible but retain useful information. */
2439 return;
2440
2441 cache_hit = true;
2442
2443 switch (cache_alg)
2444 {
2445 case alg_shift:
2446 goto do_alg_shift;
2447
2448 case alg_add_t_m2:
2449 case alg_sub_t_m2:
2450 goto do_alg_addsub_t_m2;
2451
2452 case alg_add_factor:
2453 case alg_sub_factor:
2454 goto do_alg_addsub_factor;
2455
2456 case alg_add_t2_m:
2457 goto do_alg_add_t2_m;
2458
2459 case alg_sub_t2_m:
2460 goto do_alg_sub_t2_m;
2461
2462 default:
2463 gcc_unreachable ();
2464 }
2465 }
2466 }
2467
2468 /* If we have a group of zero bits at the low-order part of T, try
2469 multiplying by the remaining bits and then doing a shift. */
2470
2471 if ((t & 1) == 0)
2472 {
2473 do_alg_shift:
2474 m = floor_log2 (t & -t); /* m = number of low zero bits */
2475 if (m < maxm)
2476 {
2477 q = t >> m;
2478 /* The function expand_shift will choose between a shift and
2479 a sequence of additions, so the observed cost is given as
2480 MIN (m * add_cost(speed, mode), shift_cost(speed, mode, m)). */
2481 op_cost = m * add_cost (speed, mode);
2482 if (shift_cost (speed, mode, m) < op_cost)
2483 op_cost = shift_cost (speed, mode, m);
2484 new_limit.cost = best_cost.cost - op_cost;
2485 new_limit.latency = best_cost.latency - op_cost;
2486 synth_mult (alg_in, q, &new_limit, mode);
2487
2488 alg_in->cost.cost += op_cost;
2489 alg_in->cost.latency += op_cost;
2490 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2491 {
2492 struct algorithm *x;
2493 best_cost = alg_in->cost;
2494 x = alg_in, alg_in = best_alg, best_alg = x;
2495 best_alg->log[best_alg->ops] = m;
2496 best_alg->op[best_alg->ops] = alg_shift;
2497 }
2498
2499 /* See if treating ORIG_T as a signed number yields a better
2500 sequence. Try this sequence only for a negative ORIG_T
2501 as it would be useless for a non-negative ORIG_T. */
2502 if ((HOST_WIDE_INT) orig_t < 0)
2503 {
2504 /* Shift ORIG_T as follows because a right shift of a
2505 negative-valued signed type is implementation
2506 defined. */
2507 q = ~(~orig_t >> m);
2508 /* The function expand_shift will choose between a shift
2509 and a sequence of additions, so the observed cost is
2510 given as MIN (m * add_cost(speed, mode),
2511 shift_cost(speed, mode, m)). */
2512 op_cost = m * add_cost (speed, mode);
2513 if (shift_cost (speed, mode, m) < op_cost)
2514 op_cost = shift_cost (speed, mode, m);
2515 new_limit.cost = best_cost.cost - op_cost;
2516 new_limit.latency = best_cost.latency - op_cost;
2517 synth_mult (alg_in, q, &new_limit, mode);
2518
2519 alg_in->cost.cost += op_cost;
2520 alg_in->cost.latency += op_cost;
2521 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2522 {
2523 struct algorithm *x;
2524 best_cost = alg_in->cost;
2525 x = alg_in, alg_in = best_alg, best_alg = x;
2526 best_alg->log[best_alg->ops] = m;
2527 best_alg->op[best_alg->ops] = alg_shift;
2528 }
2529 }
2530 }
2531 if (cache_hit)
2532 goto done;
2533 }
2534
2535 /* If we have an odd number, add or subtract one. */
2536 if ((t & 1) != 0)
2537 {
2538 unsigned HOST_WIDE_INT w;
2539
2540 do_alg_addsub_t_m2:
2541 for (w = 1; (w & t) != 0; w <<= 1)
2542 ;
2543 /* If T was -1, then W will be zero after the loop. This is another
2544 case where T ends with ...111. Handling this with (T + 1) and
2545 subtract 1 produces slightly better code and results in algorithm
2546 selection much faster than treating it like the ...0111 case
2547 below. */
2548 if (w == 0
2549 || (w > 2
2550 /* Reject the case where t is 3.
2551 Thus we prefer addition in that case. */
2552 && t != 3))
2553 {
2554 /* T ends with ...111. Multiply by (T + 1) and subtract 1. */
2555
2556 op_cost = add_cost (speed, mode);
2557 new_limit.cost = best_cost.cost - op_cost;
2558 new_limit.latency = best_cost.latency - op_cost;
2559 synth_mult (alg_in, t + 1, &new_limit, mode);
2560
2561 alg_in->cost.cost += op_cost;
2562 alg_in->cost.latency += op_cost;
2563 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2564 {
2565 struct algorithm *x;
2566 best_cost = alg_in->cost;
2567 x = alg_in, alg_in = best_alg, best_alg = x;
2568 best_alg->log[best_alg->ops] = 0;
2569 best_alg->op[best_alg->ops] = alg_sub_t_m2;
2570 }
2571 }
2572 else
2573 {
2574 /* T ends with ...01 or ...011. Multiply by (T - 1) and add 1. */
2575
2576 op_cost = add_cost (speed, mode);
2577 new_limit.cost = best_cost.cost - op_cost;
2578 new_limit.latency = best_cost.latency - op_cost;
2579 synth_mult (alg_in, t - 1, &new_limit, mode);
2580
2581 alg_in->cost.cost += op_cost;
2582 alg_in->cost.latency += op_cost;
2583 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2584 {
2585 struct algorithm *x;
2586 best_cost = alg_in->cost;
2587 x = alg_in, alg_in = best_alg, best_alg = x;
2588 best_alg->log[best_alg->ops] = 0;
2589 best_alg->op[best_alg->ops] = alg_add_t_m2;
2590 }
2591 }
2592
2593 /* We may be able to calculate a * -7, a * -15, a * -31, etc
2594 quickly with a - a * n for some appropriate constant n. */
2595 m = exact_log2 (-orig_t + 1);
2596 if (m >= 0 && m < maxm)
2597 {
2598 op_cost = shiftsub1_cost (speed, mode, m);
2599 new_limit.cost = best_cost.cost - op_cost;
2600 new_limit.latency = best_cost.latency - op_cost;
2601 synth_mult (alg_in, (unsigned HOST_WIDE_INT) (-orig_t + 1) >> m,
2602 &new_limit, mode);
2603
2604 alg_in->cost.cost += op_cost;
2605 alg_in->cost.latency += op_cost;
2606 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2607 {
2608 struct algorithm *x;
2609 best_cost = alg_in->cost;
2610 x = alg_in, alg_in = best_alg, best_alg = x;
2611 best_alg->log[best_alg->ops] = m;
2612 best_alg->op[best_alg->ops] = alg_sub_t_m2;
2613 }
2614 }
2615
2616 if (cache_hit)
2617 goto done;
2618 }
2619
2620 /* Look for factors of t of the form
2621 t = q(2**m +- 1), 2 <= m <= floor(log2(t - 1)).
2622 If we find such a factor, we can multiply by t using an algorithm that
2623 multiplies by q, shift the result by m and add/subtract it to itself.
2624
2625 We search for large factors first and loop down, even if large factors
2626 are less probable than small; if we find a large factor we will find a
2627 good sequence quickly, and therefore be able to prune (by decreasing
2628 COST_LIMIT) the search. */
2629
2630 do_alg_addsub_factor:
2631 for (m = floor_log2 (t - 1); m >= 2; m--)
2632 {
2633 unsigned HOST_WIDE_INT d;
2634
2635 d = ((unsigned HOST_WIDE_INT) 1 << m) + 1;
2636 if (t % d == 0 && t > d && m < maxm
2637 && (!cache_hit || cache_alg == alg_add_factor))
2638 {
2639 /* If the target has a cheap shift-and-add instruction use
2640 that in preference to a shift insn followed by an add insn.
2641 Assume that the shift-and-add is "atomic" with a latency
2642 equal to its cost, otherwise assume that on superscalar
2643 hardware the shift may be executed concurrently with the
2644 earlier steps in the algorithm. */
2645 op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
2646 if (shiftadd_cost (speed, mode, m) < op_cost)
2647 {
2648 op_cost = shiftadd_cost (speed, mode, m);
2649 op_latency = op_cost;
2650 }
2651 else
2652 op_latency = add_cost (speed, mode);
2653
2654 new_limit.cost = best_cost.cost - op_cost;
2655 new_limit.latency = best_cost.latency - op_latency;
2656 synth_mult (alg_in, t / d, &new_limit, mode);
2657
2658 alg_in->cost.cost += op_cost;
2659 alg_in->cost.latency += op_latency;
2660 if (alg_in->cost.latency < op_cost)
2661 alg_in->cost.latency = op_cost;
2662 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2663 {
2664 struct algorithm *x;
2665 best_cost = alg_in->cost;
2666 x = alg_in, alg_in = best_alg, best_alg = x;
2667 best_alg->log[best_alg->ops] = m;
2668 best_alg->op[best_alg->ops] = alg_add_factor;
2669 }
2670 /* Other factors will have been taken care of in the recursion. */
2671 break;
2672 }
2673
2674 d = ((unsigned HOST_WIDE_INT) 1 << m) - 1;
2675 if (t % d == 0 && t > d && m < maxm
2676 && (!cache_hit || cache_alg == alg_sub_factor))
2677 {
2678 /* If the target has a cheap shift-and-subtract insn use
2679 that in preference to a shift insn followed by a sub insn.
2680 Assume that the shift-and-sub is "atomic" with a latency
2681 equal to it's cost, otherwise assume that on superscalar
2682 hardware the shift may be executed concurrently with the
2683 earlier steps in the algorithm. */
2684 op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
2685 if (shiftsub0_cost (speed, mode, m) < op_cost)
2686 {
2687 op_cost = shiftsub0_cost (speed, mode, m);
2688 op_latency = op_cost;
2689 }
2690 else
2691 op_latency = add_cost (speed, mode);
2692
2693 new_limit.cost = best_cost.cost - op_cost;
2694 new_limit.latency = best_cost.latency - op_latency;
2695 synth_mult (alg_in, t / d, &new_limit, mode);
2696
2697 alg_in->cost.cost += op_cost;
2698 alg_in->cost.latency += op_latency;
2699 if (alg_in->cost.latency < op_cost)
2700 alg_in->cost.latency = op_cost;
2701 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2702 {
2703 struct algorithm *x;
2704 best_cost = alg_in->cost;
2705 x = alg_in, alg_in = best_alg, best_alg = x;
2706 best_alg->log[best_alg->ops] = m;
2707 best_alg->op[best_alg->ops] = alg_sub_factor;
2708 }
2709 break;
2710 }
2711 }
2712 if (cache_hit)
2713 goto done;
2714
2715 /* Try shift-and-add (load effective address) instructions,
2716 i.e. do a*3, a*5, a*9. */
2717 if ((t & 1) != 0)
2718 {
2719 do_alg_add_t2_m:
2720 q = t - 1;
2721 q = q & -q;
2722 m = exact_log2 (q);
2723 if (m >= 0 && m < maxm)
2724 {
2725 op_cost = shiftadd_cost (speed, mode, m);
2726 new_limit.cost = best_cost.cost - op_cost;
2727 new_limit.latency = best_cost.latency - op_cost;
2728 synth_mult (alg_in, (t - 1) >> m, &new_limit, mode);
2729
2730 alg_in->cost.cost += op_cost;
2731 alg_in->cost.latency += op_cost;
2732 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2733 {
2734 struct algorithm *x;
2735 best_cost = alg_in->cost;
2736 x = alg_in, alg_in = best_alg, best_alg = x;
2737 best_alg->log[best_alg->ops] = m;
2738 best_alg->op[best_alg->ops] = alg_add_t2_m;
2739 }
2740 }
2741 if (cache_hit)
2742 goto done;
2743
2744 do_alg_sub_t2_m:
2745 q = t + 1;
2746 q = q & -q;
2747 m = exact_log2 (q);
2748 if (m >= 0 && m < maxm)
2749 {
2750 op_cost = shiftsub0_cost (speed, mode, m);
2751 new_limit.cost = best_cost.cost - op_cost;
2752 new_limit.latency = best_cost.latency - op_cost;
2753 synth_mult (alg_in, (t + 1) >> m, &new_limit, mode);
2754
2755 alg_in->cost.cost += op_cost;
2756 alg_in->cost.latency += op_cost;
2757 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2758 {
2759 struct algorithm *x;
2760 best_cost = alg_in->cost;
2761 x = alg_in, alg_in = best_alg, best_alg = x;
2762 best_alg->log[best_alg->ops] = m;
2763 best_alg->op[best_alg->ops] = alg_sub_t2_m;
2764 }
2765 }
2766 if (cache_hit)
2767 goto done;
2768 }
2769
2770 done:
2771 /* If best_cost has not decreased, we have not found any algorithm. */
2772 if (!CHEAPER_MULT_COST (&best_cost, cost_limit))
2773 {
2774 /* We failed to find an algorithm. Record alg_impossible for
2775 this case (that is, <T, MODE, COST_LIMIT>) so that next time
2776 we are asked to find an algorithm for T within the same or
2777 lower COST_LIMIT, we can immediately return to the
2778 caller. */
2779 entry_ptr->t = t;
2780 entry_ptr->mode = mode;
2781 entry_ptr->speed = speed;
2782 entry_ptr->alg = alg_impossible;
2783 entry_ptr->cost = *cost_limit;
2784 return;
2785 }
2786
2787 /* Cache the result. */
2788 if (!cache_hit)
2789 {
2790 entry_ptr->t = t;
2791 entry_ptr->mode = mode;
2792 entry_ptr->speed = speed;
2793 entry_ptr->alg = best_alg->op[best_alg->ops];
2794 entry_ptr->cost.cost = best_cost.cost;
2795 entry_ptr->cost.latency = best_cost.latency;
2796 }
2797
2798 /* If we are getting a too long sequence for `struct algorithm'
2799 to record, make this search fail. */
2800 if (best_alg->ops == MAX_BITS_PER_WORD)
2801 return;
2802
2803 /* Copy the algorithm from temporary space to the space at alg_out.
2804 We avoid using structure assignment because the majority of
2805 best_alg is normally undefined, and this is a critical function. */
2806 alg_out->ops = best_alg->ops + 1;
2807 alg_out->cost = best_cost;
2808 memcpy (alg_out->op, best_alg->op,
2809 alg_out->ops * sizeof *alg_out->op);
2810 memcpy (alg_out->log, best_alg->log,
2811 alg_out->ops * sizeof *alg_out->log);
2812 }
2813 \f
2814 /* Find the cheapest way of multiplying a value of mode MODE by VAL.
2815 Try three variations:
2816
2817 - a shift/add sequence based on VAL itself
2818 - a shift/add sequence based on -VAL, followed by a negation
2819 - a shift/add sequence based on VAL - 1, followed by an addition.
2820
2821 Return true if the cheapest of these cost less than MULT_COST,
2822 describing the algorithm in *ALG and final fixup in *VARIANT. */
2823
2824 static bool
2825 choose_mult_variant (enum machine_mode mode, HOST_WIDE_INT val,
2826 struct algorithm *alg, enum mult_variant *variant,
2827 int mult_cost)
2828 {
2829 struct algorithm alg2;
2830 struct mult_cost limit;
2831 int op_cost;
2832 bool speed = optimize_insn_for_speed_p ();
2833
2834 /* Fail quickly for impossible bounds. */
2835 if (mult_cost < 0)
2836 return false;
2837
2838 /* Ensure that mult_cost provides a reasonable upper bound.
2839 Any constant multiplication can be performed with less
2840 than 2 * bits additions. */
2841 op_cost = 2 * GET_MODE_UNIT_BITSIZE (mode) * add_cost (speed, mode);
2842 if (mult_cost > op_cost)
2843 mult_cost = op_cost;
2844
2845 *variant = basic_variant;
2846 limit.cost = mult_cost;
2847 limit.latency = mult_cost;
2848 synth_mult (alg, val, &limit, mode);
2849
2850 /* This works only if the inverted value actually fits in an
2851 `unsigned int' */
2852 if (HOST_BITS_PER_INT >= GET_MODE_UNIT_BITSIZE (mode))
2853 {
2854 op_cost = neg_cost(speed, mode);
2855 if (MULT_COST_LESS (&alg->cost, mult_cost))
2856 {
2857 limit.cost = alg->cost.cost - op_cost;
2858 limit.latency = alg->cost.latency - op_cost;
2859 }
2860 else
2861 {
2862 limit.cost = mult_cost - op_cost;
2863 limit.latency = mult_cost - op_cost;
2864 }
2865
2866 synth_mult (&alg2, -val, &limit, mode);
2867 alg2.cost.cost += op_cost;
2868 alg2.cost.latency += op_cost;
2869 if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
2870 *alg = alg2, *variant = negate_variant;
2871 }
2872
2873 /* This proves very useful for division-by-constant. */
2874 op_cost = add_cost (speed, mode);
2875 if (MULT_COST_LESS (&alg->cost, mult_cost))
2876 {
2877 limit.cost = alg->cost.cost - op_cost;
2878 limit.latency = alg->cost.latency - op_cost;
2879 }
2880 else
2881 {
2882 limit.cost = mult_cost - op_cost;
2883 limit.latency = mult_cost - op_cost;
2884 }
2885
2886 synth_mult (&alg2, val - 1, &limit, mode);
2887 alg2.cost.cost += op_cost;
2888 alg2.cost.latency += op_cost;
2889 if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
2890 *alg = alg2, *variant = add_variant;
2891
2892 return MULT_COST_LESS (&alg->cost, mult_cost);
2893 }
2894
2895 /* A subroutine of expand_mult, used for constant multiplications.
2896 Multiply OP0 by VAL in mode MODE, storing the result in TARGET if
2897 convenient. Use the shift/add sequence described by ALG and apply
2898 the final fixup specified by VARIANT. */
2899
2900 static rtx
2901 expand_mult_const (enum machine_mode mode, rtx op0, HOST_WIDE_INT val,
2902 rtx target, const struct algorithm *alg,
2903 enum mult_variant variant)
2904 {
2905 HOST_WIDE_INT val_so_far;
2906 rtx insn, accum, tem;
2907 int opno;
2908 enum machine_mode nmode;
2909
2910 /* Avoid referencing memory over and over and invalid sharing
2911 on SUBREGs. */
2912 op0 = force_reg (mode, op0);
2913
2914 /* ACCUM starts out either as OP0 or as a zero, depending on
2915 the first operation. */
2916
2917 if (alg->op[0] == alg_zero)
2918 {
2919 accum = copy_to_mode_reg (mode, CONST0_RTX (mode));
2920 val_so_far = 0;
2921 }
2922 else if (alg->op[0] == alg_m)
2923 {
2924 accum = copy_to_mode_reg (mode, op0);
2925 val_so_far = 1;
2926 }
2927 else
2928 gcc_unreachable ();
2929
2930 for (opno = 1; opno < alg->ops; opno++)
2931 {
2932 int log = alg->log[opno];
2933 rtx shift_subtarget = optimize ? 0 : accum;
2934 rtx add_target
2935 = (opno == alg->ops - 1 && target != 0 && variant != add_variant
2936 && !optimize)
2937 ? target : 0;
2938 rtx accum_target = optimize ? 0 : accum;
2939 rtx accum_inner;
2940
2941 switch (alg->op[opno])
2942 {
2943 case alg_shift:
2944 tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
2945 /* REG_EQUAL note will be attached to the following insn. */
2946 emit_move_insn (accum, tem);
2947 val_so_far <<= log;
2948 break;
2949
2950 case alg_add_t_m2:
2951 tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
2952 accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
2953 add_target ? add_target : accum_target);
2954 val_so_far += (HOST_WIDE_INT) 1 << log;
2955 break;
2956
2957 case alg_sub_t_m2:
2958 tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
2959 accum = force_operand (gen_rtx_MINUS (mode, accum, tem),
2960 add_target ? add_target : accum_target);
2961 val_so_far -= (HOST_WIDE_INT) 1 << log;
2962 break;
2963
2964 case alg_add_t2_m:
2965 accum = expand_shift (LSHIFT_EXPR, mode, accum,
2966 log, shift_subtarget, 0);
2967 accum = force_operand (gen_rtx_PLUS (mode, accum, op0),
2968 add_target ? add_target : accum_target);
2969 val_so_far = (val_so_far << log) + 1;
2970 break;
2971
2972 case alg_sub_t2_m:
2973 accum = expand_shift (LSHIFT_EXPR, mode, accum,
2974 log, shift_subtarget, 0);
2975 accum = force_operand (gen_rtx_MINUS (mode, accum, op0),
2976 add_target ? add_target : accum_target);
2977 val_so_far = (val_so_far << log) - 1;
2978 break;
2979
2980 case alg_add_factor:
2981 tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
2982 accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
2983 add_target ? add_target : accum_target);
2984 val_so_far += val_so_far << log;
2985 break;
2986
2987 case alg_sub_factor:
2988 tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
2989 accum = force_operand (gen_rtx_MINUS (mode, tem, accum),
2990 (add_target
2991 ? add_target : (optimize ? 0 : tem)));
2992 val_so_far = (val_so_far << log) - val_so_far;
2993 break;
2994
2995 default:
2996 gcc_unreachable ();
2997 }
2998
2999 if (SCALAR_INT_MODE_P (mode))
3000 {
3001 /* Write a REG_EQUAL note on the last insn so that we can cse
3002 multiplication sequences. Note that if ACCUM is a SUBREG,
3003 we've set the inner register and must properly indicate that. */
3004 tem = op0, nmode = mode;
3005 accum_inner = accum;
3006 if (GET_CODE (accum) == SUBREG)
3007 {
3008 accum_inner = SUBREG_REG (accum);
3009 nmode = GET_MODE (accum_inner);
3010 tem = gen_lowpart (nmode, op0);
3011 }
3012
3013 insn = get_last_insn ();
3014 set_dst_reg_note (insn, REG_EQUAL,
3015 gen_rtx_MULT (nmode, tem, GEN_INT (val_so_far)),
3016 accum_inner);
3017 }
3018 }
3019
3020 if (variant == negate_variant)
3021 {
3022 val_so_far = -val_so_far;
3023 accum = expand_unop (mode, neg_optab, accum, target, 0);
3024 }
3025 else if (variant == add_variant)
3026 {
3027 val_so_far = val_so_far + 1;
3028 accum = force_operand (gen_rtx_PLUS (mode, accum, op0), target);
3029 }
3030
3031 /* Compare only the bits of val and val_so_far that are significant
3032 in the result mode, to avoid sign-/zero-extension confusion. */
3033 nmode = GET_MODE_INNER (mode);
3034 if (nmode == VOIDmode)
3035 nmode = mode;
3036 val &= GET_MODE_MASK (nmode);
3037 val_so_far &= GET_MODE_MASK (nmode);
3038 gcc_assert (val == val_so_far);
3039
3040 return accum;
3041 }
3042
3043 /* Perform a multiplication and return an rtx for the result.
3044 MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3045 TARGET is a suggestion for where to store the result (an rtx).
3046
3047 We check specially for a constant integer as OP1.
3048 If you want this check for OP0 as well, then before calling
3049 you should swap the two operands if OP0 would be constant. */
3050
3051 rtx
3052 expand_mult (enum machine_mode mode, rtx op0, rtx op1, rtx target,
3053 int unsignedp)
3054 {
3055 enum mult_variant variant;
3056 struct algorithm algorithm;
3057 rtx scalar_op1;
3058 int max_cost;
3059 bool speed = optimize_insn_for_speed_p ();
3060 bool do_trapv = flag_trapv && SCALAR_INT_MODE_P (mode) && !unsignedp;
3061
3062 if (CONSTANT_P (op0))
3063 {
3064 rtx temp = op0;
3065 op0 = op1;
3066 op1 = temp;
3067 }
3068
3069 /* For vectors, there are several simplifications that can be made if
3070 all elements of the vector constant are identical. */
3071 scalar_op1 = op1;
3072 if (GET_CODE (op1) == CONST_VECTOR)
3073 {
3074 int i, n = CONST_VECTOR_NUNITS (op1);
3075 scalar_op1 = CONST_VECTOR_ELT (op1, 0);
3076 for (i = 1; i < n; ++i)
3077 if (!rtx_equal_p (scalar_op1, CONST_VECTOR_ELT (op1, i)))
3078 goto skip_scalar;
3079 }
3080
3081 if (INTEGRAL_MODE_P (mode))
3082 {
3083 rtx fake_reg;
3084 HOST_WIDE_INT coeff;
3085 bool is_neg;
3086 int mode_bitsize;
3087
3088 if (op1 == CONST0_RTX (mode))
3089 return op1;
3090 if (op1 == CONST1_RTX (mode))
3091 return op0;
3092 if (op1 == CONSTM1_RTX (mode))
3093 return expand_unop (mode, do_trapv ? negv_optab : neg_optab,
3094 op0, target, 0);
3095
3096 if (do_trapv)
3097 goto skip_synth;
3098
3099 /* These are the operations that are potentially turned into
3100 a sequence of shifts and additions. */
3101 mode_bitsize = GET_MODE_UNIT_BITSIZE (mode);
3102
3103 /* synth_mult does an `unsigned int' multiply. As long as the mode is
3104 less than or equal in size to `unsigned int' this doesn't matter.
3105 If the mode is larger than `unsigned int', then synth_mult works
3106 only if the constant value exactly fits in an `unsigned int' without
3107 any truncation. This means that multiplying by negative values does
3108 not work; results are off by 2^32 on a 32 bit machine. */
3109
3110 if (CONST_INT_P (scalar_op1))
3111 {
3112 coeff = INTVAL (scalar_op1);
3113 is_neg = coeff < 0;
3114 }
3115 else if (CONST_DOUBLE_AS_INT_P (scalar_op1))
3116 {
3117 /* If we are multiplying in DImode, it may still be a win
3118 to try to work with shifts and adds. */
3119 if (CONST_DOUBLE_HIGH (scalar_op1) == 0
3120 && CONST_DOUBLE_LOW (scalar_op1) > 0)
3121 {
3122 coeff = CONST_DOUBLE_LOW (scalar_op1);
3123 is_neg = false;
3124 }
3125 else if (CONST_DOUBLE_LOW (scalar_op1) == 0)
3126 {
3127 coeff = CONST_DOUBLE_HIGH (scalar_op1);
3128 if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3129 {
3130 int shift = floor_log2 (coeff) + HOST_BITS_PER_WIDE_INT;
3131 if (shift < HOST_BITS_PER_DOUBLE_INT - 1
3132 || mode_bitsize <= HOST_BITS_PER_DOUBLE_INT)
3133 return expand_shift (LSHIFT_EXPR, mode, op0,
3134 shift, target, unsignedp);
3135 }
3136 goto skip_synth;
3137 }
3138 else
3139 goto skip_synth;
3140 }
3141 else
3142 goto skip_synth;
3143
3144 /* We used to test optimize here, on the grounds that it's better to
3145 produce a smaller program when -O is not used. But this causes
3146 such a terrible slowdown sometimes that it seems better to always
3147 use synth_mult. */
3148
3149 /* Special case powers of two. */
3150 if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3151 return expand_shift (LSHIFT_EXPR, mode, op0,
3152 floor_log2 (coeff), target, unsignedp);
3153
3154 fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3155
3156 /* Attempt to handle multiplication of DImode values by negative
3157 coefficients, by performing the multiplication by a positive
3158 multiplier and then inverting the result. */
3159 if (is_neg && mode_bitsize > HOST_BITS_PER_WIDE_INT)
3160 {
3161 /* Its safe to use -coeff even for INT_MIN, as the
3162 result is interpreted as an unsigned coefficient.
3163 Exclude cost of op0 from max_cost to match the cost
3164 calculation of the synth_mult. */
3165 max_cost = (set_src_cost (gen_rtx_MULT (mode, fake_reg, op1), speed)
3166 - neg_cost(speed, mode));
3167 if (max_cost > 0
3168 && choose_mult_variant (mode, -coeff, &algorithm,
3169 &variant, max_cost))
3170 {
3171 rtx temp = expand_mult_const (mode, op0, -coeff, NULL_RTX,
3172 &algorithm, variant);
3173 return expand_unop (mode, neg_optab, temp, target, 0);
3174 }
3175 goto skip_synth;
3176 }
3177
3178 /* Exclude cost of op0 from max_cost to match the cost
3179 calculation of the synth_mult. */
3180 max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, op1), speed);
3181 if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost))
3182 return expand_mult_const (mode, op0, coeff, target,
3183 &algorithm, variant);
3184 }
3185 skip_synth:
3186
3187 /* Expand x*2.0 as x+x. */
3188 if (CONST_DOUBLE_AS_FLOAT_P (scalar_op1))
3189 {
3190 REAL_VALUE_TYPE d;
3191 REAL_VALUE_FROM_CONST_DOUBLE (d, scalar_op1);
3192
3193 if (REAL_VALUES_EQUAL (d, dconst2))
3194 {
3195 op0 = force_reg (GET_MODE (op0), op0);
3196 return expand_binop (mode, add_optab, op0, op0,
3197 target, unsignedp, OPTAB_LIB_WIDEN);
3198 }
3199 }
3200 skip_scalar:
3201
3202 /* This used to use umul_optab if unsigned, but for non-widening multiply
3203 there is no difference between signed and unsigned. */
3204 op0 = expand_binop (mode, do_trapv ? smulv_optab : smul_optab,
3205 op0, op1, target, unsignedp, OPTAB_LIB_WIDEN);
3206 gcc_assert (op0);
3207 return op0;
3208 }
3209
3210 /* Return a cost estimate for multiplying a register by the given
3211 COEFFicient in the given MODE and SPEED. */
3212
3213 int
3214 mult_by_coeff_cost (HOST_WIDE_INT coeff, enum machine_mode mode, bool speed)
3215 {
3216 int max_cost;
3217 struct algorithm algorithm;
3218 enum mult_variant variant;
3219
3220 rtx fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3221 max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, fake_reg), speed);
3222 if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost))
3223 return algorithm.cost.cost;
3224 else
3225 return max_cost;
3226 }
3227
3228 /* Perform a widening multiplication and return an rtx for the result.
3229 MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3230 TARGET is a suggestion for where to store the result (an rtx).
3231 THIS_OPTAB is the optab we should use, it must be either umul_widen_optab
3232 or smul_widen_optab.
3233
3234 We check specially for a constant integer as OP1, comparing the
3235 cost of a widening multiply against the cost of a sequence of shifts
3236 and adds. */
3237
3238 rtx
3239 expand_widening_mult (enum machine_mode mode, rtx op0, rtx op1, rtx target,
3240 int unsignedp, optab this_optab)
3241 {
3242 bool speed = optimize_insn_for_speed_p ();
3243 rtx cop1;
3244
3245 if (CONST_INT_P (op1)
3246 && GET_MODE (op0) != VOIDmode
3247 && (cop1 = convert_modes (mode, GET_MODE (op0), op1,
3248 this_optab == umul_widen_optab))
3249 && CONST_INT_P (cop1)
3250 && (INTVAL (cop1) >= 0
3251 || HWI_COMPUTABLE_MODE_P (mode)))
3252 {
3253 HOST_WIDE_INT coeff = INTVAL (cop1);
3254 int max_cost;
3255 enum mult_variant variant;
3256 struct algorithm algorithm;
3257
3258 /* Special case powers of two. */
3259 if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3260 {
3261 op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3262 return expand_shift (LSHIFT_EXPR, mode, op0,
3263 floor_log2 (coeff), target, unsignedp);
3264 }
3265
3266 /* Exclude cost of op0 from max_cost to match the cost
3267 calculation of the synth_mult. */
3268 max_cost = mul_widen_cost (speed, mode);
3269 if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3270 max_cost))
3271 {
3272 op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3273 return expand_mult_const (mode, op0, coeff, target,
3274 &algorithm, variant);
3275 }
3276 }
3277 return expand_binop (mode, this_optab, op0, op1, target,
3278 unsignedp, OPTAB_LIB_WIDEN);
3279 }
3280 \f
3281 /* Choose a minimal N + 1 bit approximation to 1/D that can be used to
3282 replace division by D, and put the least significant N bits of the result
3283 in *MULTIPLIER_PTR and return the most significant bit.
3284
3285 The width of operations is N (should be <= HOST_BITS_PER_WIDE_INT), the
3286 needed precision is in PRECISION (should be <= N).
3287
3288 PRECISION should be as small as possible so this function can choose
3289 multiplier more freely.
3290
3291 The rounded-up logarithm of D is placed in *lgup_ptr. A shift count that
3292 is to be used for a final right shift is placed in *POST_SHIFT_PTR.
3293
3294 Using this function, x/D will be equal to (x * m) >> (*POST_SHIFT_PTR),
3295 where m is the full HOST_BITS_PER_WIDE_INT + 1 bit multiplier. */
3296
3297 unsigned HOST_WIDE_INT
3298 choose_multiplier (unsigned HOST_WIDE_INT d, int n, int precision,
3299 unsigned HOST_WIDE_INT *multiplier_ptr,
3300 int *post_shift_ptr, int *lgup_ptr)
3301 {
3302 double_int mhigh, mlow;
3303 int lgup, post_shift;
3304 int pow, pow2;
3305
3306 /* lgup = ceil(log2(divisor)); */
3307 lgup = ceil_log2 (d);
3308
3309 gcc_assert (lgup <= n);
3310
3311 pow = n + lgup;
3312 pow2 = n + lgup - precision;
3313
3314 /* We could handle this with some effort, but this case is much
3315 better handled directly with a scc insn, so rely on caller using
3316 that. */
3317 gcc_assert (pow != HOST_BITS_PER_DOUBLE_INT);
3318
3319 /* mlow = 2^(N + lgup)/d */
3320 double_int val = double_int_zero.set_bit (pow);
3321 mlow = val.div (double_int::from_uhwi (d), true, TRUNC_DIV_EXPR);
3322
3323 /* mhigh = (2^(N + lgup) + 2^(N + lgup - precision))/d */
3324 val |= double_int_zero.set_bit (pow2);
3325 mhigh = val.div (double_int::from_uhwi (d), true, TRUNC_DIV_EXPR);
3326
3327 gcc_assert (!mhigh.high || val.high - d < d);
3328 gcc_assert (mhigh.high <= 1 && mlow.high <= 1);
3329 /* Assert that mlow < mhigh. */
3330 gcc_assert (mlow.ult (mhigh));
3331
3332 /* If precision == N, then mlow, mhigh exceed 2^N
3333 (but they do not exceed 2^(N+1)). */
3334
3335 /* Reduce to lowest terms. */
3336 for (post_shift = lgup; post_shift > 0; post_shift--)
3337 {
3338 int shft = HOST_BITS_PER_WIDE_INT - 1;
3339 unsigned HOST_WIDE_INT ml_lo = (mlow.high << shft) | (mlow.low >> 1);
3340 unsigned HOST_WIDE_INT mh_lo = (mhigh.high << shft) | (mhigh.low >> 1);
3341 if (ml_lo >= mh_lo)
3342 break;
3343
3344 mlow = double_int::from_uhwi (ml_lo);
3345 mhigh = double_int::from_uhwi (mh_lo);
3346 }
3347
3348 *post_shift_ptr = post_shift;
3349 *lgup_ptr = lgup;
3350 if (n < HOST_BITS_PER_WIDE_INT)
3351 {
3352 unsigned HOST_WIDE_INT mask = ((unsigned HOST_WIDE_INT) 1 << n) - 1;
3353 *multiplier_ptr = mhigh.low & mask;
3354 return mhigh.low >= mask;
3355 }
3356 else
3357 {
3358 *multiplier_ptr = mhigh.low;
3359 return mhigh.high;
3360 }
3361 }
3362
3363 /* Compute the inverse of X mod 2**n, i.e., find Y such that X * Y is
3364 congruent to 1 (mod 2**N). */
3365
3366 static unsigned HOST_WIDE_INT
3367 invert_mod2n (unsigned HOST_WIDE_INT x, int n)
3368 {
3369 /* Solve x*y == 1 (mod 2^n), where x is odd. Return y. */
3370
3371 /* The algorithm notes that the choice y = x satisfies
3372 x*y == 1 mod 2^3, since x is assumed odd.
3373 Each iteration doubles the number of bits of significance in y. */
3374
3375 unsigned HOST_WIDE_INT mask;
3376 unsigned HOST_WIDE_INT y = x;
3377 int nbit = 3;
3378
3379 mask = (n == HOST_BITS_PER_WIDE_INT
3380 ? ~(unsigned HOST_WIDE_INT) 0
3381 : ((unsigned HOST_WIDE_INT) 1 << n) - 1);
3382
3383 while (nbit < n)
3384 {
3385 y = y * (2 - x*y) & mask; /* Modulo 2^N */
3386 nbit *= 2;
3387 }
3388 return y;
3389 }
3390
3391 /* Emit code to adjust ADJ_OPERAND after multiplication of wrong signedness
3392 flavor of OP0 and OP1. ADJ_OPERAND is already the high half of the
3393 product OP0 x OP1. If UNSIGNEDP is nonzero, adjust the signed product
3394 to become unsigned, if UNSIGNEDP is zero, adjust the unsigned product to
3395 become signed.
3396
3397 The result is put in TARGET if that is convenient.
3398
3399 MODE is the mode of operation. */
3400
3401 rtx
3402 expand_mult_highpart_adjust (enum machine_mode mode, rtx adj_operand, rtx op0,
3403 rtx op1, rtx target, int unsignedp)
3404 {
3405 rtx tem;
3406 enum rtx_code adj_code = unsignedp ? PLUS : MINUS;
3407
3408 tem = expand_shift (RSHIFT_EXPR, mode, op0,
3409 GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3410 tem = expand_and (mode, tem, op1, NULL_RTX);
3411 adj_operand
3412 = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3413 adj_operand);
3414
3415 tem = expand_shift (RSHIFT_EXPR, mode, op1,
3416 GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3417 tem = expand_and (mode, tem, op0, NULL_RTX);
3418 target = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3419 target);
3420
3421 return target;
3422 }
3423
3424 /* Subroutine of expmed_mult_highpart. Return the MODE high part of OP. */
3425
3426 static rtx
3427 extract_high_half (enum machine_mode mode, rtx op)
3428 {
3429 enum machine_mode wider_mode;
3430
3431 if (mode == word_mode)
3432 return gen_highpart (mode, op);
3433
3434 gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3435
3436 wider_mode = GET_MODE_WIDER_MODE (mode);
3437 op = expand_shift (RSHIFT_EXPR, wider_mode, op,
3438 GET_MODE_BITSIZE (mode), 0, 1);
3439 return convert_modes (mode, wider_mode, op, 0);
3440 }
3441
3442 /* Like expmed_mult_highpart, but only consider using a multiplication
3443 optab. OP1 is an rtx for the constant operand. */
3444
3445 static rtx
3446 expmed_mult_highpart_optab (enum machine_mode mode, rtx op0, rtx op1,
3447 rtx target, int unsignedp, int max_cost)
3448 {
3449 rtx narrow_op1 = gen_int_mode (INTVAL (op1), mode);
3450 enum machine_mode wider_mode;
3451 optab moptab;
3452 rtx tem;
3453 int size;
3454 bool speed = optimize_insn_for_speed_p ();
3455
3456 gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3457
3458 wider_mode = GET_MODE_WIDER_MODE (mode);
3459 size = GET_MODE_BITSIZE (mode);
3460
3461 /* Firstly, try using a multiplication insn that only generates the needed
3462 high part of the product, and in the sign flavor of unsignedp. */
3463 if (mul_highpart_cost (speed, mode) < max_cost)
3464 {
3465 moptab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
3466 tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3467 unsignedp, OPTAB_DIRECT);
3468 if (tem)
3469 return tem;
3470 }
3471
3472 /* Secondly, same as above, but use sign flavor opposite of unsignedp.
3473 Need to adjust the result after the multiplication. */
3474 if (size - 1 < BITS_PER_WORD
3475 && (mul_highpart_cost (speed, mode)
3476 + 2 * shift_cost (speed, mode, size-1)
3477 + 4 * add_cost (speed, mode) < max_cost))
3478 {
3479 moptab = unsignedp ? smul_highpart_optab : umul_highpart_optab;
3480 tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3481 unsignedp, OPTAB_DIRECT);
3482 if (tem)
3483 /* We used the wrong signedness. Adjust the result. */
3484 return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3485 tem, unsignedp);
3486 }
3487
3488 /* Try widening multiplication. */
3489 moptab = unsignedp ? umul_widen_optab : smul_widen_optab;
3490 if (widening_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3491 && mul_widen_cost (speed, wider_mode) < max_cost)
3492 {
3493 tem = expand_binop (wider_mode, moptab, op0, narrow_op1, 0,
3494 unsignedp, OPTAB_WIDEN);
3495 if (tem)
3496 return extract_high_half (mode, tem);
3497 }
3498
3499 /* Try widening the mode and perform a non-widening multiplication. */
3500 if (optab_handler (smul_optab, wider_mode) != CODE_FOR_nothing
3501 && size - 1 < BITS_PER_WORD
3502 && (mul_cost (speed, wider_mode) + shift_cost (speed, mode, size-1)
3503 < max_cost))
3504 {
3505 rtx insns, wop0, wop1;
3506
3507 /* We need to widen the operands, for example to ensure the
3508 constant multiplier is correctly sign or zero extended.
3509 Use a sequence to clean-up any instructions emitted by
3510 the conversions if things don't work out. */
3511 start_sequence ();
3512 wop0 = convert_modes (wider_mode, mode, op0, unsignedp);
3513 wop1 = convert_modes (wider_mode, mode, op1, unsignedp);
3514 tem = expand_binop (wider_mode, smul_optab, wop0, wop1, 0,
3515 unsignedp, OPTAB_WIDEN);
3516 insns = get_insns ();
3517 end_sequence ();
3518
3519 if (tem)
3520 {
3521 emit_insn (insns);
3522 return extract_high_half (mode, tem);
3523 }
3524 }
3525
3526 /* Try widening multiplication of opposite signedness, and adjust. */
3527 moptab = unsignedp ? smul_widen_optab : umul_widen_optab;
3528 if (widening_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3529 && size - 1 < BITS_PER_WORD
3530 && (mul_widen_cost (speed, wider_mode)
3531 + 2 * shift_cost (speed, mode, size-1)
3532 + 4 * add_cost (speed, mode) < max_cost))
3533 {
3534 tem = expand_binop (wider_mode, moptab, op0, narrow_op1,
3535 NULL_RTX, ! unsignedp, OPTAB_WIDEN);
3536 if (tem != 0)
3537 {
3538 tem = extract_high_half (mode, tem);
3539 /* We used the wrong signedness. Adjust the result. */
3540 return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3541 target, unsignedp);
3542 }
3543 }
3544
3545 return 0;
3546 }
3547
3548 /* Emit code to multiply OP0 and OP1 (where OP1 is an integer constant),
3549 putting the high half of the result in TARGET if that is convenient,
3550 and return where the result is. If the operation can not be performed,
3551 0 is returned.
3552
3553 MODE is the mode of operation and result.
3554
3555 UNSIGNEDP nonzero means unsigned multiply.
3556
3557 MAX_COST is the total allowed cost for the expanded RTL. */
3558
3559 static rtx
3560 expmed_mult_highpart (enum machine_mode mode, rtx op0, rtx op1,
3561 rtx target, int unsignedp, int max_cost)
3562 {
3563 enum machine_mode wider_mode = GET_MODE_WIDER_MODE (mode);
3564 unsigned HOST_WIDE_INT cnst1;
3565 int extra_cost;
3566 bool sign_adjust = false;
3567 enum mult_variant variant;
3568 struct algorithm alg;
3569 rtx tem;
3570 bool speed = optimize_insn_for_speed_p ();
3571
3572 gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3573 /* We can't support modes wider than HOST_BITS_PER_INT. */
3574 gcc_assert (HWI_COMPUTABLE_MODE_P (mode));
3575
3576 cnst1 = INTVAL (op1) & GET_MODE_MASK (mode);
3577
3578 /* We can't optimize modes wider than BITS_PER_WORD.
3579 ??? We might be able to perform double-word arithmetic if
3580 mode == word_mode, however all the cost calculations in
3581 synth_mult etc. assume single-word operations. */
3582 if (GET_MODE_BITSIZE (wider_mode) > BITS_PER_WORD)
3583 return expmed_mult_highpart_optab (mode, op0, op1, target,
3584 unsignedp, max_cost);
3585
3586 extra_cost = shift_cost (speed, mode, GET_MODE_BITSIZE (mode) - 1);
3587
3588 /* Check whether we try to multiply by a negative constant. */
3589 if (!unsignedp && ((cnst1 >> (GET_MODE_BITSIZE (mode) - 1)) & 1))
3590 {
3591 sign_adjust = true;
3592 extra_cost += add_cost (speed, mode);
3593 }
3594
3595 /* See whether shift/add multiplication is cheap enough. */
3596 if (choose_mult_variant (wider_mode, cnst1, &alg, &variant,
3597 max_cost - extra_cost))
3598 {
3599 /* See whether the specialized multiplication optabs are
3600 cheaper than the shift/add version. */
3601 tem = expmed_mult_highpart_optab (mode, op0, op1, target, unsignedp,
3602 alg.cost.cost + extra_cost);
3603 if (tem)
3604 return tem;
3605
3606 tem = convert_to_mode (wider_mode, op0, unsignedp);
3607 tem = expand_mult_const (wider_mode, tem, cnst1, 0, &alg, variant);
3608 tem = extract_high_half (mode, tem);
3609
3610 /* Adjust result for signedness. */
3611 if (sign_adjust)
3612 tem = force_operand (gen_rtx_MINUS (mode, tem, op0), tem);
3613
3614 return tem;
3615 }
3616 return expmed_mult_highpart_optab (mode, op0, op1, target,
3617 unsignedp, max_cost);
3618 }
3619
3620
3621 /* Expand signed modulus of OP0 by a power of two D in mode MODE. */
3622
3623 static rtx
3624 expand_smod_pow2 (enum machine_mode mode, rtx op0, HOST_WIDE_INT d)
3625 {
3626 unsigned HOST_WIDE_INT masklow, maskhigh;
3627 rtx result, temp, shift, label;
3628 int logd;
3629
3630 logd = floor_log2 (d);
3631 result = gen_reg_rtx (mode);
3632
3633 /* Avoid conditional branches when they're expensive. */
3634 if (BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2
3635 && optimize_insn_for_speed_p ())
3636 {
3637 rtx signmask = emit_store_flag (result, LT, op0, const0_rtx,
3638 mode, 0, -1);
3639 if (signmask)
3640 {
3641 signmask = force_reg (mode, signmask);
3642 masklow = ((HOST_WIDE_INT) 1 << logd) - 1;
3643 shift = GEN_INT (GET_MODE_BITSIZE (mode) - logd);
3644
3645 /* Use the rtx_cost of a LSHIFTRT instruction to determine
3646 which instruction sequence to use. If logical right shifts
3647 are expensive the use 2 XORs, 2 SUBs and an AND, otherwise
3648 use a LSHIFTRT, 1 ADD, 1 SUB and an AND. */
3649
3650 temp = gen_rtx_LSHIFTRT (mode, result, shift);
3651 if (optab_handler (lshr_optab, mode) == CODE_FOR_nothing
3652 || (set_src_cost (temp, optimize_insn_for_speed_p ())
3653 > COSTS_N_INSNS (2)))
3654 {
3655 temp = expand_binop (mode, xor_optab, op0, signmask,
3656 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3657 temp = expand_binop (mode, sub_optab, temp, signmask,
3658 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3659 temp = expand_binop (mode, and_optab, temp, GEN_INT (masklow),
3660 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3661 temp = expand_binop (mode, xor_optab, temp, signmask,
3662 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3663 temp = expand_binop (mode, sub_optab, temp, signmask,
3664 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3665 }
3666 else
3667 {
3668 signmask = expand_binop (mode, lshr_optab, signmask, shift,
3669 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3670 signmask = force_reg (mode, signmask);
3671
3672 temp = expand_binop (mode, add_optab, op0, signmask,
3673 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3674 temp = expand_binop (mode, and_optab, temp, GEN_INT (masklow),
3675 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3676 temp = expand_binop (mode, sub_optab, temp, signmask,
3677 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3678 }
3679 return temp;
3680 }
3681 }
3682
3683 /* Mask contains the mode's signbit and the significant bits of the
3684 modulus. By including the signbit in the operation, many targets
3685 can avoid an explicit compare operation in the following comparison
3686 against zero. */
3687
3688 masklow = ((HOST_WIDE_INT) 1 << logd) - 1;
3689 if (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
3690 {
3691 masklow |= (HOST_WIDE_INT) -1 << (GET_MODE_BITSIZE (mode) - 1);
3692 maskhigh = -1;
3693 }
3694 else
3695 maskhigh = (HOST_WIDE_INT) -1
3696 << (GET_MODE_BITSIZE (mode) - HOST_BITS_PER_WIDE_INT - 1);
3697
3698 temp = expand_binop (mode, and_optab, op0,
3699 immed_double_const (masklow, maskhigh, mode),
3700 result, 1, OPTAB_LIB_WIDEN);
3701 if (temp != result)
3702 emit_move_insn (result, temp);
3703
3704 label = gen_label_rtx ();
3705 do_cmp_and_jump (result, const0_rtx, GE, mode, label);
3706
3707 temp = expand_binop (mode, sub_optab, result, const1_rtx, result,
3708 0, OPTAB_LIB_WIDEN);
3709 masklow = (HOST_WIDE_INT) -1 << logd;
3710 maskhigh = -1;
3711 temp = expand_binop (mode, ior_optab, temp,
3712 immed_double_const (masklow, maskhigh, mode),
3713 result, 1, OPTAB_LIB_WIDEN);
3714 temp = expand_binop (mode, add_optab, temp, const1_rtx, result,
3715 0, OPTAB_LIB_WIDEN);
3716 if (temp != result)
3717 emit_move_insn (result, temp);
3718 emit_label (label);
3719 return result;
3720 }
3721
3722 /* Expand signed division of OP0 by a power of two D in mode MODE.
3723 This routine is only called for positive values of D. */
3724
3725 static rtx
3726 expand_sdiv_pow2 (enum machine_mode mode, rtx op0, HOST_WIDE_INT d)
3727 {
3728 rtx temp, label;
3729 int logd;
3730
3731 logd = floor_log2 (d);
3732
3733 if (d == 2
3734 && BRANCH_COST (optimize_insn_for_speed_p (),
3735 false) >= 1)
3736 {
3737 temp = gen_reg_rtx (mode);
3738 temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, 1);
3739 temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3740 0, OPTAB_LIB_WIDEN);
3741 return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3742 }
3743
3744 #ifdef HAVE_conditional_move
3745 if (BRANCH_COST (optimize_insn_for_speed_p (), false)
3746 >= 2)
3747 {
3748 rtx temp2;
3749
3750 /* ??? emit_conditional_move forces a stack adjustment via
3751 compare_from_rtx so, if the sequence is discarded, it will
3752 be lost. Do it now instead. */
3753 do_pending_stack_adjust ();
3754
3755 start_sequence ();
3756 temp2 = copy_to_mode_reg (mode, op0);
3757 temp = expand_binop (mode, add_optab, temp2, GEN_INT (d-1),
3758 NULL_RTX, 0, OPTAB_LIB_WIDEN);
3759 temp = force_reg (mode, temp);
3760
3761 /* Construct "temp2 = (temp2 < 0) ? temp : temp2". */
3762 temp2 = emit_conditional_move (temp2, LT, temp2, const0_rtx,
3763 mode, temp, temp2, mode, 0);
3764 if (temp2)
3765 {
3766 rtx seq = get_insns ();
3767 end_sequence ();
3768 emit_insn (seq);
3769 return expand_shift (RSHIFT_EXPR, mode, temp2, logd, NULL_RTX, 0);
3770 }
3771 end_sequence ();
3772 }
3773 #endif
3774
3775 if (BRANCH_COST (optimize_insn_for_speed_p (),
3776 false) >= 2)
3777 {
3778 int ushift = GET_MODE_BITSIZE (mode) - logd;
3779
3780 temp = gen_reg_rtx (mode);
3781 temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, -1);
3782 if (shift_cost (optimize_insn_for_speed_p (), mode, ushift)
3783 > COSTS_N_INSNS (1))
3784 temp = expand_binop (mode, and_optab, temp, GEN_INT (d - 1),
3785 NULL_RTX, 0, OPTAB_LIB_WIDEN);
3786 else
3787 temp = expand_shift (RSHIFT_EXPR, mode, temp,
3788 ushift, NULL_RTX, 1);
3789 temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3790 0, OPTAB_LIB_WIDEN);
3791 return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3792 }
3793
3794 label = gen_label_rtx ();
3795 temp = copy_to_mode_reg (mode, op0);
3796 do_cmp_and_jump (temp, const0_rtx, GE, mode, label);
3797 expand_inc (temp, GEN_INT (d - 1));
3798 emit_label (label);
3799 return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3800 }
3801 \f
3802 /* Emit the code to divide OP0 by OP1, putting the result in TARGET
3803 if that is convenient, and returning where the result is.
3804 You may request either the quotient or the remainder as the result;
3805 specify REM_FLAG nonzero to get the remainder.
3806
3807 CODE is the expression code for which kind of division this is;
3808 it controls how rounding is done. MODE is the machine mode to use.
3809 UNSIGNEDP nonzero means do unsigned division. */
3810
3811 /* ??? For CEIL_MOD_EXPR, can compute incorrect remainder with ANDI
3812 and then correct it by or'ing in missing high bits
3813 if result of ANDI is nonzero.
3814 For ROUND_MOD_EXPR, can use ANDI and then sign-extend the result.
3815 This could optimize to a bfexts instruction.
3816 But C doesn't use these operations, so their optimizations are
3817 left for later. */
3818 /* ??? For modulo, we don't actually need the highpart of the first product,
3819 the low part will do nicely. And for small divisors, the second multiply
3820 can also be a low-part only multiply or even be completely left out.
3821 E.g. to calculate the remainder of a division by 3 with a 32 bit
3822 multiply, multiply with 0x55555556 and extract the upper two bits;
3823 the result is exact for inputs up to 0x1fffffff.
3824 The input range can be reduced by using cross-sum rules.
3825 For odd divisors >= 3, the following table gives right shift counts
3826 so that if a number is shifted by an integer multiple of the given
3827 amount, the remainder stays the same:
3828 2, 4, 3, 6, 10, 12, 4, 8, 18, 6, 11, 20, 18, 0, 5, 10, 12, 0, 12, 20,
3829 14, 12, 23, 21, 8, 0, 20, 18, 0, 0, 6, 12, 0, 22, 0, 18, 20, 30, 0, 0,
3830 0, 8, 0, 11, 12, 10, 36, 0, 30, 0, 0, 12, 0, 0, 0, 0, 44, 12, 24, 0,
3831 20, 0, 7, 14, 0, 18, 36, 0, 0, 46, 60, 0, 42, 0, 15, 24, 20, 0, 0, 33,
3832 0, 20, 0, 0, 18, 0, 60, 0, 0, 0, 0, 0, 40, 18, 0, 0, 12
3833
3834 Cross-sum rules for even numbers can be derived by leaving as many bits
3835 to the right alone as the divisor has zeros to the right.
3836 E.g. if x is an unsigned 32 bit number:
3837 (x mod 12) == (((x & 1023) + ((x >> 8) & ~3)) * 0x15555558 >> 2 * 3) >> 28
3838 */
3839
3840 rtx
3841 expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode,
3842 rtx op0, rtx op1, rtx target, int unsignedp)
3843 {
3844 enum machine_mode compute_mode;
3845 rtx tquotient;
3846 rtx quotient = 0, remainder = 0;
3847 rtx last;
3848 int size;
3849 rtx insn;
3850 optab optab1, optab2;
3851 int op1_is_constant, op1_is_pow2 = 0;
3852 int max_cost, extra_cost;
3853 static HOST_WIDE_INT last_div_const = 0;
3854 static HOST_WIDE_INT ext_op1;
3855 bool speed = optimize_insn_for_speed_p ();
3856
3857 op1_is_constant = CONST_INT_P (op1);
3858 if (op1_is_constant)
3859 {
3860 ext_op1 = INTVAL (op1);
3861 if (unsignedp)
3862 ext_op1 &= GET_MODE_MASK (mode);
3863 op1_is_pow2 = ((EXACT_POWER_OF_2_OR_ZERO_P (ext_op1)
3864 || (! unsignedp && EXACT_POWER_OF_2_OR_ZERO_P (-ext_op1))));
3865 }
3866
3867 /*
3868 This is the structure of expand_divmod:
3869
3870 First comes code to fix up the operands so we can perform the operations
3871 correctly and efficiently.
3872
3873 Second comes a switch statement with code specific for each rounding mode.
3874 For some special operands this code emits all RTL for the desired
3875 operation, for other cases, it generates only a quotient and stores it in
3876 QUOTIENT. The case for trunc division/remainder might leave quotient = 0,
3877 to indicate that it has not done anything.
3878
3879 Last comes code that finishes the operation. If QUOTIENT is set and
3880 REM_FLAG is set, the remainder is computed as OP0 - QUOTIENT * OP1. If
3881 QUOTIENT is not set, it is computed using trunc rounding.
3882
3883 We try to generate special code for division and remainder when OP1 is a
3884 constant. If |OP1| = 2**n we can use shifts and some other fast
3885 operations. For other values of OP1, we compute a carefully selected
3886 fixed-point approximation m = 1/OP1, and generate code that multiplies OP0
3887 by m.
3888
3889 In all cases but EXACT_DIV_EXPR, this multiplication requires the upper
3890 half of the product. Different strategies for generating the product are
3891 implemented in expmed_mult_highpart.
3892
3893 If what we actually want is the remainder, we generate that by another
3894 by-constant multiplication and a subtraction. */
3895
3896 /* We shouldn't be called with OP1 == const1_rtx, but some of the
3897 code below will malfunction if we are, so check here and handle
3898 the special case if so. */
3899 if (op1 == const1_rtx)
3900 return rem_flag ? const0_rtx : op0;
3901
3902 /* When dividing by -1, we could get an overflow.
3903 negv_optab can handle overflows. */
3904 if (! unsignedp && op1 == constm1_rtx)
3905 {
3906 if (rem_flag)
3907 return const0_rtx;
3908 return expand_unop (mode, flag_trapv && GET_MODE_CLASS(mode) == MODE_INT
3909 ? negv_optab : neg_optab, op0, target, 0);
3910 }
3911
3912 if (target
3913 /* Don't use the function value register as a target
3914 since we have to read it as well as write it,
3915 and function-inlining gets confused by this. */
3916 && ((REG_P (target) && REG_FUNCTION_VALUE_P (target))
3917 /* Don't clobber an operand while doing a multi-step calculation. */
3918 || ((rem_flag || op1_is_constant)
3919 && (reg_mentioned_p (target, op0)
3920 || (MEM_P (op0) && MEM_P (target))))
3921 || reg_mentioned_p (target, op1)
3922 || (MEM_P (op1) && MEM_P (target))))
3923 target = 0;
3924
3925 /* Get the mode in which to perform this computation. Normally it will
3926 be MODE, but sometimes we can't do the desired operation in MODE.
3927 If so, pick a wider mode in which we can do the operation. Convert
3928 to that mode at the start to avoid repeated conversions.
3929
3930 First see what operations we need. These depend on the expression
3931 we are evaluating. (We assume that divxx3 insns exist under the
3932 same conditions that modxx3 insns and that these insns don't normally
3933 fail. If these assumptions are not correct, we may generate less
3934 efficient code in some cases.)
3935
3936 Then see if we find a mode in which we can open-code that operation
3937 (either a division, modulus, or shift). Finally, check for the smallest
3938 mode for which we can do the operation with a library call. */
3939
3940 /* We might want to refine this now that we have division-by-constant
3941 optimization. Since expmed_mult_highpart tries so many variants, it is
3942 not straightforward to generalize this. Maybe we should make an array
3943 of possible modes in init_expmed? Save this for GCC 2.7. */
3944
3945 optab1 = ((op1_is_pow2 && op1 != const0_rtx)
3946 ? (unsignedp ? lshr_optab : ashr_optab)
3947 : (unsignedp ? udiv_optab : sdiv_optab));
3948 optab2 = ((op1_is_pow2 && op1 != const0_rtx)
3949 ? optab1
3950 : (unsignedp ? udivmod_optab : sdivmod_optab));
3951
3952 for (compute_mode = mode; compute_mode != VOIDmode;
3953 compute_mode = GET_MODE_WIDER_MODE (compute_mode))
3954 if (optab_handler (optab1, compute_mode) != CODE_FOR_nothing
3955 || optab_handler (optab2, compute_mode) != CODE_FOR_nothing)
3956 break;
3957
3958 if (compute_mode == VOIDmode)
3959 for (compute_mode = mode; compute_mode != VOIDmode;
3960 compute_mode = GET_MODE_WIDER_MODE (compute_mode))
3961 if (optab_libfunc (optab1, compute_mode)
3962 || optab_libfunc (optab2, compute_mode))
3963 break;
3964
3965 /* If we still couldn't find a mode, use MODE, but expand_binop will
3966 probably die. */
3967 if (compute_mode == VOIDmode)
3968 compute_mode = mode;
3969
3970 if (target && GET_MODE (target) == compute_mode)
3971 tquotient = target;
3972 else
3973 tquotient = gen_reg_rtx (compute_mode);
3974
3975 size = GET_MODE_BITSIZE (compute_mode);
3976 #if 0
3977 /* It should be possible to restrict the precision to GET_MODE_BITSIZE
3978 (mode), and thereby get better code when OP1 is a constant. Do that
3979 later. It will require going over all usages of SIZE below. */
3980 size = GET_MODE_BITSIZE (mode);
3981 #endif
3982
3983 /* Only deduct something for a REM if the last divide done was
3984 for a different constant. Then set the constant of the last
3985 divide. */
3986 max_cost = (unsignedp
3987 ? udiv_cost (speed, compute_mode)
3988 : sdiv_cost (speed, compute_mode));
3989 if (rem_flag && ! (last_div_const != 0 && op1_is_constant
3990 && INTVAL (op1) == last_div_const))
3991 max_cost -= (mul_cost (speed, compute_mode)
3992 + add_cost (speed, compute_mode));
3993
3994 last_div_const = ! rem_flag && op1_is_constant ? INTVAL (op1) : 0;
3995
3996 /* Now convert to the best mode to use. */
3997 if (compute_mode != mode)
3998 {
3999 op0 = convert_modes (compute_mode, mode, op0, unsignedp);
4000 op1 = convert_modes (compute_mode, mode, op1, unsignedp);
4001
4002 /* convert_modes may have placed op1 into a register, so we
4003 must recompute the following. */
4004 op1_is_constant = CONST_INT_P (op1);
4005 op1_is_pow2 = (op1_is_constant
4006 && ((EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4007 || (! unsignedp
4008 && EXACT_POWER_OF_2_OR_ZERO_P (-INTVAL (op1)))))) ;
4009 }
4010
4011 /* If one of the operands is a volatile MEM, copy it into a register. */
4012
4013 if (MEM_P (op0) && MEM_VOLATILE_P (op0))
4014 op0 = force_reg (compute_mode, op0);
4015 if (MEM_P (op1) && MEM_VOLATILE_P (op1))
4016 op1 = force_reg (compute_mode, op1);
4017
4018 /* If we need the remainder or if OP1 is constant, we need to
4019 put OP0 in a register in case it has any queued subexpressions. */
4020 if (rem_flag || op1_is_constant)
4021 op0 = force_reg (compute_mode, op0);
4022
4023 last = get_last_insn ();
4024
4025 /* Promote floor rounding to trunc rounding for unsigned operations. */
4026 if (unsignedp)
4027 {
4028 if (code == FLOOR_DIV_EXPR)
4029 code = TRUNC_DIV_EXPR;
4030 if (code == FLOOR_MOD_EXPR)
4031 code = TRUNC_MOD_EXPR;
4032 if (code == EXACT_DIV_EXPR && op1_is_pow2)
4033 code = TRUNC_DIV_EXPR;
4034 }
4035
4036 if (op1 != const0_rtx)
4037 switch (code)
4038 {
4039 case TRUNC_MOD_EXPR:
4040 case TRUNC_DIV_EXPR:
4041 if (op1_is_constant)
4042 {
4043 if (unsignedp)
4044 {
4045 unsigned HOST_WIDE_INT mh, ml;
4046 int pre_shift, post_shift;
4047 int dummy;
4048 unsigned HOST_WIDE_INT d = (INTVAL (op1)
4049 & GET_MODE_MASK (compute_mode));
4050
4051 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4052 {
4053 pre_shift = floor_log2 (d);
4054 if (rem_flag)
4055 {
4056 remainder
4057 = expand_binop (compute_mode, and_optab, op0,
4058 GEN_INT (((HOST_WIDE_INT) 1 << pre_shift) - 1),
4059 remainder, 1,
4060 OPTAB_LIB_WIDEN);
4061 if (remainder)
4062 return gen_lowpart (mode, remainder);
4063 }
4064 quotient = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4065 pre_shift, tquotient, 1);
4066 }
4067 else if (size <= HOST_BITS_PER_WIDE_INT)
4068 {
4069 if (d >= ((unsigned HOST_WIDE_INT) 1 << (size - 1)))
4070 {
4071 /* Most significant bit of divisor is set; emit an scc
4072 insn. */
4073 quotient = emit_store_flag_force (tquotient, GEU, op0, op1,
4074 compute_mode, 1, 1);
4075 }
4076 else
4077 {
4078 /* Find a suitable multiplier and right shift count
4079 instead of multiplying with D. */
4080
4081 mh = choose_multiplier (d, size, size,
4082 &ml, &post_shift, &dummy);
4083
4084 /* If the suggested multiplier is more than SIZE bits,
4085 we can do better for even divisors, using an
4086 initial right shift. */
4087 if (mh != 0 && (d & 1) == 0)
4088 {
4089 pre_shift = floor_log2 (d & -d);
4090 mh = choose_multiplier (d >> pre_shift, size,
4091 size - pre_shift,
4092 &ml, &post_shift, &dummy);
4093 gcc_assert (!mh);
4094 }
4095 else
4096 pre_shift = 0;
4097
4098 if (mh != 0)
4099 {
4100 rtx t1, t2, t3, t4;
4101
4102 if (post_shift - 1 >= BITS_PER_WORD)
4103 goto fail1;
4104
4105 extra_cost
4106 = (shift_cost (speed, compute_mode, post_shift - 1)
4107 + shift_cost (speed, compute_mode, 1)
4108 + 2 * add_cost (speed, compute_mode));
4109 t1 = expmed_mult_highpart (compute_mode, op0,
4110 GEN_INT (ml),
4111 NULL_RTX, 1,
4112 max_cost - extra_cost);
4113 if (t1 == 0)
4114 goto fail1;
4115 t2 = force_operand (gen_rtx_MINUS (compute_mode,
4116 op0, t1),
4117 NULL_RTX);
4118 t3 = expand_shift (RSHIFT_EXPR, compute_mode,
4119 t2, 1, NULL_RTX, 1);
4120 t4 = force_operand (gen_rtx_PLUS (compute_mode,
4121 t1, t3),
4122 NULL_RTX);
4123 quotient = expand_shift
4124 (RSHIFT_EXPR, compute_mode, t4,
4125 post_shift - 1, tquotient, 1);
4126 }
4127 else
4128 {
4129 rtx t1, t2;
4130
4131 if (pre_shift >= BITS_PER_WORD
4132 || post_shift >= BITS_PER_WORD)
4133 goto fail1;
4134
4135 t1 = expand_shift
4136 (RSHIFT_EXPR, compute_mode, op0,
4137 pre_shift, NULL_RTX, 1);
4138 extra_cost
4139 = (shift_cost (speed, compute_mode, pre_shift)
4140 + shift_cost (speed, compute_mode, post_shift));
4141 t2 = expmed_mult_highpart (compute_mode, t1,
4142 GEN_INT (ml),
4143 NULL_RTX, 1,
4144 max_cost - extra_cost);
4145 if (t2 == 0)
4146 goto fail1;
4147 quotient = expand_shift
4148 (RSHIFT_EXPR, compute_mode, t2,
4149 post_shift, tquotient, 1);
4150 }
4151 }
4152 }
4153 else /* Too wide mode to use tricky code */
4154 break;
4155
4156 insn = get_last_insn ();
4157 if (insn != last)
4158 set_dst_reg_note (insn, REG_EQUAL,
4159 gen_rtx_UDIV (compute_mode, op0, op1),
4160 quotient);
4161 }
4162 else /* TRUNC_DIV, signed */
4163 {
4164 unsigned HOST_WIDE_INT ml;
4165 int lgup, post_shift;
4166 rtx mlr;
4167 HOST_WIDE_INT d = INTVAL (op1);
4168 unsigned HOST_WIDE_INT abs_d;
4169
4170 /* Since d might be INT_MIN, we have to cast to
4171 unsigned HOST_WIDE_INT before negating to avoid
4172 undefined signed overflow. */
4173 abs_d = (d >= 0
4174 ? (unsigned HOST_WIDE_INT) d
4175 : - (unsigned HOST_WIDE_INT) d);
4176
4177 /* n rem d = n rem -d */
4178 if (rem_flag && d < 0)
4179 {
4180 d = abs_d;
4181 op1 = gen_int_mode (abs_d, compute_mode);
4182 }
4183
4184 if (d == 1)
4185 quotient = op0;
4186 else if (d == -1)
4187 quotient = expand_unop (compute_mode, neg_optab, op0,
4188 tquotient, 0);
4189 else if (HOST_BITS_PER_WIDE_INT >= size
4190 && abs_d == (unsigned HOST_WIDE_INT) 1 << (size - 1))
4191 {
4192 /* This case is not handled correctly below. */
4193 quotient = emit_store_flag (tquotient, EQ, op0, op1,
4194 compute_mode, 1, 1);
4195 if (quotient == 0)
4196 goto fail1;
4197 }
4198 else if (EXACT_POWER_OF_2_OR_ZERO_P (d)
4199 && (rem_flag
4200 ? smod_pow2_cheap (speed, compute_mode)
4201 : sdiv_pow2_cheap (speed, compute_mode))
4202 /* We assume that cheap metric is true if the
4203 optab has an expander for this mode. */
4204 && ((optab_handler ((rem_flag ? smod_optab
4205 : sdiv_optab),
4206 compute_mode)
4207 != CODE_FOR_nothing)
4208 || (optab_handler (sdivmod_optab,
4209 compute_mode)
4210 != CODE_FOR_nothing)))
4211 ;
4212 else if (EXACT_POWER_OF_2_OR_ZERO_P (abs_d))
4213 {
4214 if (rem_flag)
4215 {
4216 remainder = expand_smod_pow2 (compute_mode, op0, d);
4217 if (remainder)
4218 return gen_lowpart (mode, remainder);
4219 }
4220
4221 if (sdiv_pow2_cheap (speed, compute_mode)
4222 && ((optab_handler (sdiv_optab, compute_mode)
4223 != CODE_FOR_nothing)
4224 || (optab_handler (sdivmod_optab, compute_mode)
4225 != CODE_FOR_nothing)))
4226 quotient = expand_divmod (0, TRUNC_DIV_EXPR,
4227 compute_mode, op0,
4228 gen_int_mode (abs_d,
4229 compute_mode),
4230 NULL_RTX, 0);
4231 else
4232 quotient = expand_sdiv_pow2 (compute_mode, op0, abs_d);
4233
4234 /* We have computed OP0 / abs(OP1). If OP1 is negative,
4235 negate the quotient. */
4236 if (d < 0)
4237 {
4238 insn = get_last_insn ();
4239 if (insn != last
4240 && abs_d < ((unsigned HOST_WIDE_INT) 1
4241 << (HOST_BITS_PER_WIDE_INT - 1)))
4242 set_dst_reg_note (insn, REG_EQUAL,
4243 gen_rtx_DIV (compute_mode, op0,
4244 gen_int_mode
4245 (abs_d,
4246 compute_mode)),
4247 quotient);
4248
4249 quotient = expand_unop (compute_mode, neg_optab,
4250 quotient, quotient, 0);
4251 }
4252 }
4253 else if (size <= HOST_BITS_PER_WIDE_INT)
4254 {
4255 choose_multiplier (abs_d, size, size - 1,
4256 &ml, &post_shift, &lgup);
4257 if (ml < (unsigned HOST_WIDE_INT) 1 << (size - 1))
4258 {
4259 rtx t1, t2, t3;
4260
4261 if (post_shift >= BITS_PER_WORD
4262 || size - 1 >= BITS_PER_WORD)
4263 goto fail1;
4264
4265 extra_cost = (shift_cost (speed, compute_mode, post_shift)
4266 + shift_cost (speed, compute_mode, size - 1)
4267 + add_cost (speed, compute_mode));
4268 t1 = expmed_mult_highpart (compute_mode, op0,
4269 GEN_INT (ml), NULL_RTX, 0,
4270 max_cost - extra_cost);
4271 if (t1 == 0)
4272 goto fail1;
4273 t2 = expand_shift
4274 (RSHIFT_EXPR, compute_mode, t1,
4275 post_shift, NULL_RTX, 0);
4276 t3 = expand_shift
4277 (RSHIFT_EXPR, compute_mode, op0,
4278 size - 1, NULL_RTX, 0);
4279 if (d < 0)
4280 quotient
4281 = force_operand (gen_rtx_MINUS (compute_mode,
4282 t3, t2),
4283 tquotient);
4284 else
4285 quotient
4286 = force_operand (gen_rtx_MINUS (compute_mode,
4287 t2, t3),
4288 tquotient);
4289 }
4290 else
4291 {
4292 rtx t1, t2, t3, t4;
4293
4294 if (post_shift >= BITS_PER_WORD
4295 || size - 1 >= BITS_PER_WORD)
4296 goto fail1;
4297
4298 ml |= (~(unsigned HOST_WIDE_INT) 0) << (size - 1);
4299 mlr = gen_int_mode (ml, compute_mode);
4300 extra_cost = (shift_cost (speed, compute_mode, post_shift)
4301 + shift_cost (speed, compute_mode, size - 1)
4302 + 2 * add_cost (speed, compute_mode));
4303 t1 = expmed_mult_highpart (compute_mode, op0, mlr,
4304 NULL_RTX, 0,
4305 max_cost - extra_cost);
4306 if (t1 == 0)
4307 goto fail1;
4308 t2 = force_operand (gen_rtx_PLUS (compute_mode,
4309 t1, op0),
4310 NULL_RTX);
4311 t3 = expand_shift
4312 (RSHIFT_EXPR, compute_mode, t2,
4313 post_shift, NULL_RTX, 0);
4314 t4 = expand_shift
4315 (RSHIFT_EXPR, compute_mode, op0,
4316 size - 1, NULL_RTX, 0);
4317 if (d < 0)
4318 quotient
4319 = force_operand (gen_rtx_MINUS (compute_mode,
4320 t4, t3),
4321 tquotient);
4322 else
4323 quotient
4324 = force_operand (gen_rtx_MINUS (compute_mode,
4325 t3, t4),
4326 tquotient);
4327 }
4328 }
4329 else /* Too wide mode to use tricky code */
4330 break;
4331
4332 insn = get_last_insn ();
4333 if (insn != last)
4334 set_dst_reg_note (insn, REG_EQUAL,
4335 gen_rtx_DIV (compute_mode, op0, op1),
4336 quotient);
4337 }
4338 break;
4339 }
4340 fail1:
4341 delete_insns_since (last);
4342 break;
4343
4344 case FLOOR_DIV_EXPR:
4345 case FLOOR_MOD_EXPR:
4346 /* We will come here only for signed operations. */
4347 if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4348 {
4349 unsigned HOST_WIDE_INT mh, ml;
4350 int pre_shift, lgup, post_shift;
4351 HOST_WIDE_INT d = INTVAL (op1);
4352
4353 if (d > 0)
4354 {
4355 /* We could just as easily deal with negative constants here,
4356 but it does not seem worth the trouble for GCC 2.6. */
4357 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4358 {
4359 pre_shift = floor_log2 (d);
4360 if (rem_flag)
4361 {
4362 remainder = expand_binop (compute_mode, and_optab, op0,
4363 GEN_INT (((HOST_WIDE_INT) 1 << pre_shift) - 1),
4364 remainder, 0, OPTAB_LIB_WIDEN);
4365 if (remainder)
4366 return gen_lowpart (mode, remainder);
4367 }
4368 quotient = expand_shift
4369 (RSHIFT_EXPR, compute_mode, op0,
4370 pre_shift, tquotient, 0);
4371 }
4372 else
4373 {
4374 rtx t1, t2, t3, t4;
4375
4376 mh = choose_multiplier (d, size, size - 1,
4377 &ml, &post_shift, &lgup);
4378 gcc_assert (!mh);
4379
4380 if (post_shift < BITS_PER_WORD
4381 && size - 1 < BITS_PER_WORD)
4382 {
4383 t1 = expand_shift
4384 (RSHIFT_EXPR, compute_mode, op0,
4385 size - 1, NULL_RTX, 0);
4386 t2 = expand_binop (compute_mode, xor_optab, op0, t1,
4387 NULL_RTX, 0, OPTAB_WIDEN);
4388 extra_cost = (shift_cost (speed, compute_mode, post_shift)
4389 + shift_cost (speed, compute_mode, size - 1)
4390 + 2 * add_cost (speed, compute_mode));
4391 t3 = expmed_mult_highpart (compute_mode, t2,
4392 GEN_INT (ml), NULL_RTX, 1,
4393 max_cost - extra_cost);
4394 if (t3 != 0)
4395 {
4396 t4 = expand_shift
4397 (RSHIFT_EXPR, compute_mode, t3,
4398 post_shift, NULL_RTX, 1);
4399 quotient = expand_binop (compute_mode, xor_optab,
4400 t4, t1, tquotient, 0,
4401 OPTAB_WIDEN);
4402 }
4403 }
4404 }
4405 }
4406 else
4407 {
4408 rtx nsign, t1, t2, t3, t4;
4409 t1 = force_operand (gen_rtx_PLUS (compute_mode,
4410 op0, constm1_rtx), NULL_RTX);
4411 t2 = expand_binop (compute_mode, ior_optab, op0, t1, NULL_RTX,
4412 0, OPTAB_WIDEN);
4413 nsign = expand_shift
4414 (RSHIFT_EXPR, compute_mode, t2,
4415 size - 1, NULL_RTX, 0);
4416 t3 = force_operand (gen_rtx_MINUS (compute_mode, t1, nsign),
4417 NULL_RTX);
4418 t4 = expand_divmod (0, TRUNC_DIV_EXPR, compute_mode, t3, op1,
4419 NULL_RTX, 0);
4420 if (t4)
4421 {
4422 rtx t5;
4423 t5 = expand_unop (compute_mode, one_cmpl_optab, nsign,
4424 NULL_RTX, 0);
4425 quotient = force_operand (gen_rtx_PLUS (compute_mode,
4426 t4, t5),
4427 tquotient);
4428 }
4429 }
4430 }
4431
4432 if (quotient != 0)
4433 break;
4434 delete_insns_since (last);
4435
4436 /* Try using an instruction that produces both the quotient and
4437 remainder, using truncation. We can easily compensate the quotient
4438 or remainder to get floor rounding, once we have the remainder.
4439 Notice that we compute also the final remainder value here,
4440 and return the result right away. */
4441 if (target == 0 || GET_MODE (target) != compute_mode)
4442 target = gen_reg_rtx (compute_mode);
4443
4444 if (rem_flag)
4445 {
4446 remainder
4447 = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4448 quotient = gen_reg_rtx (compute_mode);
4449 }
4450 else
4451 {
4452 quotient
4453 = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4454 remainder = gen_reg_rtx (compute_mode);
4455 }
4456
4457 if (expand_twoval_binop (sdivmod_optab, op0, op1,
4458 quotient, remainder, 0))
4459 {
4460 /* This could be computed with a branch-less sequence.
4461 Save that for later. */
4462 rtx tem;
4463 rtx label = gen_label_rtx ();
4464 do_cmp_and_jump (remainder, const0_rtx, EQ, compute_mode, label);
4465 tem = expand_binop (compute_mode, xor_optab, op0, op1,
4466 NULL_RTX, 0, OPTAB_WIDEN);
4467 do_cmp_and_jump (tem, const0_rtx, GE, compute_mode, label);
4468 expand_dec (quotient, const1_rtx);
4469 expand_inc (remainder, op1);
4470 emit_label (label);
4471 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4472 }
4473
4474 /* No luck with division elimination or divmod. Have to do it
4475 by conditionally adjusting op0 *and* the result. */
4476 {
4477 rtx label1, label2, label3, label4, label5;
4478 rtx adjusted_op0;
4479 rtx tem;
4480
4481 quotient = gen_reg_rtx (compute_mode);
4482 adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4483 label1 = gen_label_rtx ();
4484 label2 = gen_label_rtx ();
4485 label3 = gen_label_rtx ();
4486 label4 = gen_label_rtx ();
4487 label5 = gen_label_rtx ();
4488 do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4489 do_cmp_and_jump (adjusted_op0, const0_rtx, LT, compute_mode, label1);
4490 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4491 quotient, 0, OPTAB_LIB_WIDEN);
4492 if (tem != quotient)
4493 emit_move_insn (quotient, tem);
4494 emit_jump_insn (gen_jump (label5));
4495 emit_barrier ();
4496 emit_label (label1);
4497 expand_inc (adjusted_op0, const1_rtx);
4498 emit_jump_insn (gen_jump (label4));
4499 emit_barrier ();
4500 emit_label (label2);
4501 do_cmp_and_jump (adjusted_op0, const0_rtx, GT, compute_mode, label3);
4502 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4503 quotient, 0, OPTAB_LIB_WIDEN);
4504 if (tem != quotient)
4505 emit_move_insn (quotient, tem);
4506 emit_jump_insn (gen_jump (label5));
4507 emit_barrier ();
4508 emit_label (label3);
4509 expand_dec (adjusted_op0, const1_rtx);
4510 emit_label (label4);
4511 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4512 quotient, 0, OPTAB_LIB_WIDEN);
4513 if (tem != quotient)
4514 emit_move_insn (quotient, tem);
4515 expand_dec (quotient, const1_rtx);
4516 emit_label (label5);
4517 }
4518 break;
4519
4520 case CEIL_DIV_EXPR:
4521 case CEIL_MOD_EXPR:
4522 if (unsignedp)
4523 {
4524 if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1)))
4525 {
4526 rtx t1, t2, t3;
4527 unsigned HOST_WIDE_INT d = INTVAL (op1);
4528 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4529 floor_log2 (d), tquotient, 1);
4530 t2 = expand_binop (compute_mode, and_optab, op0,
4531 GEN_INT (d - 1),
4532 NULL_RTX, 1, OPTAB_LIB_WIDEN);
4533 t3 = gen_reg_rtx (compute_mode);
4534 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4535 compute_mode, 1, 1);
4536 if (t3 == 0)
4537 {
4538 rtx lab;
4539 lab = gen_label_rtx ();
4540 do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4541 expand_inc (t1, const1_rtx);
4542 emit_label (lab);
4543 quotient = t1;
4544 }
4545 else
4546 quotient = force_operand (gen_rtx_PLUS (compute_mode,
4547 t1, t3),
4548 tquotient);
4549 break;
4550 }
4551
4552 /* Try using an instruction that produces both the quotient and
4553 remainder, using truncation. We can easily compensate the
4554 quotient or remainder to get ceiling rounding, once we have the
4555 remainder. Notice that we compute also the final remainder
4556 value here, and return the result right away. */
4557 if (target == 0 || GET_MODE (target) != compute_mode)
4558 target = gen_reg_rtx (compute_mode);
4559
4560 if (rem_flag)
4561 {
4562 remainder = (REG_P (target)
4563 ? target : gen_reg_rtx (compute_mode));
4564 quotient = gen_reg_rtx (compute_mode);
4565 }
4566 else
4567 {
4568 quotient = (REG_P (target)
4569 ? target : gen_reg_rtx (compute_mode));
4570 remainder = gen_reg_rtx (compute_mode);
4571 }
4572
4573 if (expand_twoval_binop (udivmod_optab, op0, op1, quotient,
4574 remainder, 1))
4575 {
4576 /* This could be computed with a branch-less sequence.
4577 Save that for later. */
4578 rtx label = gen_label_rtx ();
4579 do_cmp_and_jump (remainder, const0_rtx, EQ,
4580 compute_mode, label);
4581 expand_inc (quotient, const1_rtx);
4582 expand_dec (remainder, op1);
4583 emit_label (label);
4584 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4585 }
4586
4587 /* No luck with division elimination or divmod. Have to do it
4588 by conditionally adjusting op0 *and* the result. */
4589 {
4590 rtx label1, label2;
4591 rtx adjusted_op0, tem;
4592
4593 quotient = gen_reg_rtx (compute_mode);
4594 adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4595 label1 = gen_label_rtx ();
4596 label2 = gen_label_rtx ();
4597 do_cmp_and_jump (adjusted_op0, const0_rtx, NE,
4598 compute_mode, label1);
4599 emit_move_insn (quotient, const0_rtx);
4600 emit_jump_insn (gen_jump (label2));
4601 emit_barrier ();
4602 emit_label (label1);
4603 expand_dec (adjusted_op0, const1_rtx);
4604 tem = expand_binop (compute_mode, udiv_optab, adjusted_op0, op1,
4605 quotient, 1, OPTAB_LIB_WIDEN);
4606 if (tem != quotient)
4607 emit_move_insn (quotient, tem);
4608 expand_inc (quotient, const1_rtx);
4609 emit_label (label2);
4610 }
4611 }
4612 else /* signed */
4613 {
4614 if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4615 && INTVAL (op1) >= 0)
4616 {
4617 /* This is extremely similar to the code for the unsigned case
4618 above. For 2.7 we should merge these variants, but for
4619 2.6.1 I don't want to touch the code for unsigned since that
4620 get used in C. The signed case will only be used by other
4621 languages (Ada). */
4622
4623 rtx t1, t2, t3;
4624 unsigned HOST_WIDE_INT d = INTVAL (op1);
4625 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4626 floor_log2 (d), tquotient, 0);
4627 t2 = expand_binop (compute_mode, and_optab, op0,
4628 GEN_INT (d - 1),
4629 NULL_RTX, 1, OPTAB_LIB_WIDEN);
4630 t3 = gen_reg_rtx (compute_mode);
4631 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4632 compute_mode, 1, 1);
4633 if (t3 == 0)
4634 {
4635 rtx lab;
4636 lab = gen_label_rtx ();
4637 do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4638 expand_inc (t1, const1_rtx);
4639 emit_label (lab);
4640 quotient = t1;
4641 }
4642 else
4643 quotient = force_operand (gen_rtx_PLUS (compute_mode,
4644 t1, t3),
4645 tquotient);
4646 break;
4647 }
4648
4649 /* Try using an instruction that produces both the quotient and
4650 remainder, using truncation. We can easily compensate the
4651 quotient or remainder to get ceiling rounding, once we have the
4652 remainder. Notice that we compute also the final remainder
4653 value here, and return the result right away. */
4654 if (target == 0 || GET_MODE (target) != compute_mode)
4655 target = gen_reg_rtx (compute_mode);
4656 if (rem_flag)
4657 {
4658 remainder= (REG_P (target)
4659 ? target : gen_reg_rtx (compute_mode));
4660 quotient = gen_reg_rtx (compute_mode);
4661 }
4662 else
4663 {
4664 quotient = (REG_P (target)
4665 ? target : gen_reg_rtx (compute_mode));
4666 remainder = gen_reg_rtx (compute_mode);
4667 }
4668
4669 if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient,
4670 remainder, 0))
4671 {
4672 /* This could be computed with a branch-less sequence.
4673 Save that for later. */
4674 rtx tem;
4675 rtx label = gen_label_rtx ();
4676 do_cmp_and_jump (remainder, const0_rtx, EQ,
4677 compute_mode, label);
4678 tem = expand_binop (compute_mode, xor_optab, op0, op1,
4679 NULL_RTX, 0, OPTAB_WIDEN);
4680 do_cmp_and_jump (tem, const0_rtx, LT, compute_mode, label);
4681 expand_inc (quotient, const1_rtx);
4682 expand_dec (remainder, op1);
4683 emit_label (label);
4684 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4685 }
4686
4687 /* No luck with division elimination or divmod. Have to do it
4688 by conditionally adjusting op0 *and* the result. */
4689 {
4690 rtx label1, label2, label3, label4, label5;
4691 rtx adjusted_op0;
4692 rtx tem;
4693
4694 quotient = gen_reg_rtx (compute_mode);
4695 adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4696 label1 = gen_label_rtx ();
4697 label2 = gen_label_rtx ();
4698 label3 = gen_label_rtx ();
4699 label4 = gen_label_rtx ();
4700 label5 = gen_label_rtx ();
4701 do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4702 do_cmp_and_jump (adjusted_op0, const0_rtx, GT,
4703 compute_mode, label1);
4704 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4705 quotient, 0, OPTAB_LIB_WIDEN);
4706 if (tem != quotient)
4707 emit_move_insn (quotient, tem);
4708 emit_jump_insn (gen_jump (label5));
4709 emit_barrier ();
4710 emit_label (label1);
4711 expand_dec (adjusted_op0, const1_rtx);
4712 emit_jump_insn (gen_jump (label4));
4713 emit_barrier ();
4714 emit_label (label2);
4715 do_cmp_and_jump (adjusted_op0, const0_rtx, LT,
4716 compute_mode, label3);
4717 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4718 quotient, 0, OPTAB_LIB_WIDEN);
4719 if (tem != quotient)
4720 emit_move_insn (quotient, tem);
4721 emit_jump_insn (gen_jump (label5));
4722 emit_barrier ();
4723 emit_label (label3);
4724 expand_inc (adjusted_op0, const1_rtx);
4725 emit_label (label4);
4726 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4727 quotient, 0, OPTAB_LIB_WIDEN);
4728 if (tem != quotient)
4729 emit_move_insn (quotient, tem);
4730 expand_inc (quotient, const1_rtx);
4731 emit_label (label5);
4732 }
4733 }
4734 break;
4735
4736 case EXACT_DIV_EXPR:
4737 if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4738 {
4739 HOST_WIDE_INT d = INTVAL (op1);
4740 unsigned HOST_WIDE_INT ml;
4741 int pre_shift;
4742 rtx t1;
4743
4744 pre_shift = floor_log2 (d & -d);
4745 ml = invert_mod2n (d >> pre_shift, size);
4746 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4747 pre_shift, NULL_RTX, unsignedp);
4748 quotient = expand_mult (compute_mode, t1,
4749 gen_int_mode (ml, compute_mode),
4750 NULL_RTX, 1);
4751
4752 insn = get_last_insn ();
4753 set_dst_reg_note (insn, REG_EQUAL,
4754 gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
4755 compute_mode, op0, op1),
4756 quotient);
4757 }
4758 break;
4759
4760 case ROUND_DIV_EXPR:
4761 case ROUND_MOD_EXPR:
4762 if (unsignedp)
4763 {
4764 rtx tem;
4765 rtx label;
4766 label = gen_label_rtx ();
4767 quotient = gen_reg_rtx (compute_mode);
4768 remainder = gen_reg_rtx (compute_mode);
4769 if (expand_twoval_binop (udivmod_optab, op0, op1, quotient, remainder, 1) == 0)
4770 {
4771 rtx tem;
4772 quotient = expand_binop (compute_mode, udiv_optab, op0, op1,
4773 quotient, 1, OPTAB_LIB_WIDEN);
4774 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 1);
4775 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4776 remainder, 1, OPTAB_LIB_WIDEN);
4777 }
4778 tem = plus_constant (compute_mode, op1, -1);
4779 tem = expand_shift (RSHIFT_EXPR, compute_mode, tem, 1, NULL_RTX, 1);
4780 do_cmp_and_jump (remainder, tem, LEU, compute_mode, label);
4781 expand_inc (quotient, const1_rtx);
4782 expand_dec (remainder, op1);
4783 emit_label (label);
4784 }
4785 else
4786 {
4787 rtx abs_rem, abs_op1, tem, mask;
4788 rtx label;
4789 label = gen_label_rtx ();
4790 quotient = gen_reg_rtx (compute_mode);
4791 remainder = gen_reg_rtx (compute_mode);
4792 if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient, remainder, 0) == 0)
4793 {
4794 rtx tem;
4795 quotient = expand_binop (compute_mode, sdiv_optab, op0, op1,
4796 quotient, 0, OPTAB_LIB_WIDEN);
4797 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 0);
4798 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4799 remainder, 0, OPTAB_LIB_WIDEN);
4800 }
4801 abs_rem = expand_abs (compute_mode, remainder, NULL_RTX, 1, 0);
4802 abs_op1 = expand_abs (compute_mode, op1, NULL_RTX, 1, 0);
4803 tem = expand_shift (LSHIFT_EXPR, compute_mode, abs_rem,
4804 1, NULL_RTX, 1);
4805 do_cmp_and_jump (tem, abs_op1, LTU, compute_mode, label);
4806 tem = expand_binop (compute_mode, xor_optab, op0, op1,
4807 NULL_RTX, 0, OPTAB_WIDEN);
4808 mask = expand_shift (RSHIFT_EXPR, compute_mode, tem,
4809 size - 1, NULL_RTX, 0);
4810 tem = expand_binop (compute_mode, xor_optab, mask, const1_rtx,
4811 NULL_RTX, 0, OPTAB_WIDEN);
4812 tem = expand_binop (compute_mode, sub_optab, tem, mask,
4813 NULL_RTX, 0, OPTAB_WIDEN);
4814 expand_inc (quotient, tem);
4815 tem = expand_binop (compute_mode, xor_optab, mask, op1,
4816 NULL_RTX, 0, OPTAB_WIDEN);
4817 tem = expand_binop (compute_mode, sub_optab, tem, mask,
4818 NULL_RTX, 0, OPTAB_WIDEN);
4819 expand_dec (remainder, tem);
4820 emit_label (label);
4821 }
4822 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4823
4824 default:
4825 gcc_unreachable ();
4826 }
4827
4828 if (quotient == 0)
4829 {
4830 if (target && GET_MODE (target) != compute_mode)
4831 target = 0;
4832
4833 if (rem_flag)
4834 {
4835 /* Try to produce the remainder without producing the quotient.
4836 If we seem to have a divmod pattern that does not require widening,
4837 don't try widening here. We should really have a WIDEN argument
4838 to expand_twoval_binop, since what we'd really like to do here is
4839 1) try a mod insn in compute_mode
4840 2) try a divmod insn in compute_mode
4841 3) try a div insn in compute_mode and multiply-subtract to get
4842 remainder
4843 4) try the same things with widening allowed. */
4844 remainder
4845 = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4846 op0, op1, target,
4847 unsignedp,
4848 ((optab_handler (optab2, compute_mode)
4849 != CODE_FOR_nothing)
4850 ? OPTAB_DIRECT : OPTAB_WIDEN));
4851 if (remainder == 0)
4852 {
4853 /* No luck there. Can we do remainder and divide at once
4854 without a library call? */
4855 remainder = gen_reg_rtx (compute_mode);
4856 if (! expand_twoval_binop ((unsignedp
4857 ? udivmod_optab
4858 : sdivmod_optab),
4859 op0, op1,
4860 NULL_RTX, remainder, unsignedp))
4861 remainder = 0;
4862 }
4863
4864 if (remainder)
4865 return gen_lowpart (mode, remainder);
4866 }
4867
4868 /* Produce the quotient. Try a quotient insn, but not a library call.
4869 If we have a divmod in this mode, use it in preference to widening
4870 the div (for this test we assume it will not fail). Note that optab2
4871 is set to the one of the two optabs that the call below will use. */
4872 quotient
4873 = sign_expand_binop (compute_mode, udiv_optab, sdiv_optab,
4874 op0, op1, rem_flag ? NULL_RTX : target,
4875 unsignedp,
4876 ((optab_handler (optab2, compute_mode)
4877 != CODE_FOR_nothing)
4878 ? OPTAB_DIRECT : OPTAB_WIDEN));
4879
4880 if (quotient == 0)
4881 {
4882 /* No luck there. Try a quotient-and-remainder insn,
4883 keeping the quotient alone. */
4884 quotient = gen_reg_rtx (compute_mode);
4885 if (! expand_twoval_binop (unsignedp ? udivmod_optab : sdivmod_optab,
4886 op0, op1,
4887 quotient, NULL_RTX, unsignedp))
4888 {
4889 quotient = 0;
4890 if (! rem_flag)
4891 /* Still no luck. If we are not computing the remainder,
4892 use a library call for the quotient. */
4893 quotient = sign_expand_binop (compute_mode,
4894 udiv_optab, sdiv_optab,
4895 op0, op1, target,
4896 unsignedp, OPTAB_LIB_WIDEN);
4897 }
4898 }
4899 }
4900
4901 if (rem_flag)
4902 {
4903 if (target && GET_MODE (target) != compute_mode)
4904 target = 0;
4905
4906 if (quotient == 0)
4907 {
4908 /* No divide instruction either. Use library for remainder. */
4909 remainder = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4910 op0, op1, target,
4911 unsignedp, OPTAB_LIB_WIDEN);
4912 /* No remainder function. Try a quotient-and-remainder
4913 function, keeping the remainder. */
4914 if (!remainder)
4915 {
4916 remainder = gen_reg_rtx (compute_mode);
4917 if (!expand_twoval_binop_libfunc
4918 (unsignedp ? udivmod_optab : sdivmod_optab,
4919 op0, op1,
4920 NULL_RTX, remainder,
4921 unsignedp ? UMOD : MOD))
4922 remainder = NULL_RTX;
4923 }
4924 }
4925 else
4926 {
4927 /* We divided. Now finish doing X - Y * (X / Y). */
4928 remainder = expand_mult (compute_mode, quotient, op1,
4929 NULL_RTX, unsignedp);
4930 remainder = expand_binop (compute_mode, sub_optab, op0,
4931 remainder, target, unsignedp,
4932 OPTAB_LIB_WIDEN);
4933 }
4934 }
4935
4936 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4937 }
4938 \f
4939 /* Return a tree node with data type TYPE, describing the value of X.
4940 Usually this is an VAR_DECL, if there is no obvious better choice.
4941 X may be an expression, however we only support those expressions
4942 generated by loop.c. */
4943
4944 tree
4945 make_tree (tree type, rtx x)
4946 {
4947 tree t;
4948
4949 switch (GET_CODE (x))
4950 {
4951 case CONST_INT:
4952 {
4953 HOST_WIDE_INT hi = 0;
4954
4955 if (INTVAL (x) < 0
4956 && !(TYPE_UNSIGNED (type)
4957 && (GET_MODE_BITSIZE (TYPE_MODE (type))
4958 < HOST_BITS_PER_WIDE_INT)))
4959 hi = -1;
4960
4961 t = build_int_cst_wide (type, INTVAL (x), hi);
4962
4963 return t;
4964 }
4965
4966 case CONST_DOUBLE:
4967 if (GET_MODE (x) == VOIDmode)
4968 t = build_int_cst_wide (type,
4969 CONST_DOUBLE_LOW (x), CONST_DOUBLE_HIGH (x));
4970 else
4971 {
4972 REAL_VALUE_TYPE d;
4973
4974 REAL_VALUE_FROM_CONST_DOUBLE (d, x);
4975 t = build_real (type, d);
4976 }
4977
4978 return t;
4979
4980 case CONST_VECTOR:
4981 {
4982 int units = CONST_VECTOR_NUNITS (x);
4983 tree itype = TREE_TYPE (type);
4984 tree *elts;
4985 int i;
4986
4987 /* Build a tree with vector elements. */
4988 elts = XALLOCAVEC (tree, units);
4989 for (i = units - 1; i >= 0; --i)
4990 {
4991 rtx elt = CONST_VECTOR_ELT (x, i);
4992 elts[i] = make_tree (itype, elt);
4993 }
4994
4995 return build_vector (type, elts);
4996 }
4997
4998 case PLUS:
4999 return fold_build2 (PLUS_EXPR, type, make_tree (type, XEXP (x, 0)),
5000 make_tree (type, XEXP (x, 1)));
5001
5002 case MINUS:
5003 return fold_build2 (MINUS_EXPR, type, make_tree (type, XEXP (x, 0)),
5004 make_tree (type, XEXP (x, 1)));
5005
5006 case NEG:
5007 return fold_build1 (NEGATE_EXPR, type, make_tree (type, XEXP (x, 0)));
5008
5009 case MULT:
5010 return fold_build2 (MULT_EXPR, type, make_tree (type, XEXP (x, 0)),
5011 make_tree (type, XEXP (x, 1)));
5012
5013 case ASHIFT:
5014 return fold_build2 (LSHIFT_EXPR, type, make_tree (type, XEXP (x, 0)),
5015 make_tree (type, XEXP (x, 1)));
5016
5017 case LSHIFTRT:
5018 t = unsigned_type_for (type);
5019 return fold_convert (type, build2 (RSHIFT_EXPR, t,
5020 make_tree (t, XEXP (x, 0)),
5021 make_tree (type, XEXP (x, 1))));
5022
5023 case ASHIFTRT:
5024 t = signed_type_for (type);
5025 return fold_convert (type, build2 (RSHIFT_EXPR, t,
5026 make_tree (t, XEXP (x, 0)),
5027 make_tree (type, XEXP (x, 1))));
5028
5029 case DIV:
5030 if (TREE_CODE (type) != REAL_TYPE)
5031 t = signed_type_for (type);
5032 else
5033 t = type;
5034
5035 return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5036 make_tree (t, XEXP (x, 0)),
5037 make_tree (t, XEXP (x, 1))));
5038 case UDIV:
5039 t = unsigned_type_for (type);
5040 return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5041 make_tree (t, XEXP (x, 0)),
5042 make_tree (t, XEXP (x, 1))));
5043
5044 case SIGN_EXTEND:
5045 case ZERO_EXTEND:
5046 t = lang_hooks.types.type_for_mode (GET_MODE (XEXP (x, 0)),
5047 GET_CODE (x) == ZERO_EXTEND);
5048 return fold_convert (type, make_tree (t, XEXP (x, 0)));
5049
5050 case CONST:
5051 return make_tree (type, XEXP (x, 0));
5052
5053 case SYMBOL_REF:
5054 t = SYMBOL_REF_DECL (x);
5055 if (t)
5056 return fold_convert (type, build_fold_addr_expr (t));
5057 /* else fall through. */
5058
5059 default:
5060 t = build_decl (RTL_LOCATION (x), VAR_DECL, NULL_TREE, type);
5061
5062 /* If TYPE is a POINTER_TYPE, we might need to convert X from
5063 address mode to pointer mode. */
5064 if (POINTER_TYPE_P (type))
5065 x = convert_memory_address_addr_space
5066 (TYPE_MODE (type), x, TYPE_ADDR_SPACE (TREE_TYPE (type)));
5067
5068 /* Note that we do *not* use SET_DECL_RTL here, because we do not
5069 want set_decl_rtl to go adjusting REG_ATTRS for this temporary. */
5070 t->decl_with_rtl.rtl = x;
5071
5072 return t;
5073 }
5074 }
5075 \f
5076 /* Compute the logical-and of OP0 and OP1, storing it in TARGET
5077 and returning TARGET.
5078
5079 If TARGET is 0, a pseudo-register or constant is returned. */
5080
5081 rtx
5082 expand_and (enum machine_mode mode, rtx op0, rtx op1, rtx target)
5083 {
5084 rtx tem = 0;
5085
5086 if (GET_MODE (op0) == VOIDmode && GET_MODE (op1) == VOIDmode)
5087 tem = simplify_binary_operation (AND, mode, op0, op1);
5088 if (tem == 0)
5089 tem = expand_binop (mode, and_optab, op0, op1, target, 0, OPTAB_LIB_WIDEN);
5090
5091 if (target == 0)
5092 target = tem;
5093 else if (tem != target)
5094 emit_move_insn (target, tem);
5095 return target;
5096 }
5097
5098 /* Helper function for emit_store_flag. */
5099 static rtx
5100 emit_cstore (rtx target, enum insn_code icode, enum rtx_code code,
5101 enum machine_mode mode, enum machine_mode compare_mode,
5102 int unsignedp, rtx x, rtx y, int normalizep,
5103 enum machine_mode target_mode)
5104 {
5105 struct expand_operand ops[4];
5106 rtx op0, last, comparison, subtarget;
5107 enum machine_mode result_mode = insn_data[(int) icode].operand[0].mode;
5108
5109 last = get_last_insn ();
5110 x = prepare_operand (icode, x, 2, mode, compare_mode, unsignedp);
5111 y = prepare_operand (icode, y, 3, mode, compare_mode, unsignedp);
5112 if (!x || !y)
5113 {
5114 delete_insns_since (last);
5115 return NULL_RTX;
5116 }
5117
5118 if (target_mode == VOIDmode)
5119 target_mode = result_mode;
5120 if (!target)
5121 target = gen_reg_rtx (target_mode);
5122
5123 comparison = gen_rtx_fmt_ee (code, result_mode, x, y);
5124
5125 create_output_operand (&ops[0], optimize ? NULL_RTX : target, result_mode);
5126 create_fixed_operand (&ops[1], comparison);
5127 create_fixed_operand (&ops[2], x);
5128 create_fixed_operand (&ops[3], y);
5129 if (!maybe_expand_insn (icode, 4, ops))
5130 {
5131 delete_insns_since (last);
5132 return NULL_RTX;
5133 }
5134 subtarget = ops[0].value;
5135
5136 /* If we are converting to a wider mode, first convert to
5137 TARGET_MODE, then normalize. This produces better combining
5138 opportunities on machines that have a SIGN_EXTRACT when we are
5139 testing a single bit. This mostly benefits the 68k.
5140
5141 If STORE_FLAG_VALUE does not have the sign bit set when
5142 interpreted in MODE, we can do this conversion as unsigned, which
5143 is usually more efficient. */
5144 if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (result_mode))
5145 {
5146 convert_move (target, subtarget,
5147 val_signbit_known_clear_p (result_mode,
5148 STORE_FLAG_VALUE));
5149 op0 = target;
5150 result_mode = target_mode;
5151 }
5152 else
5153 op0 = subtarget;
5154
5155 /* If we want to keep subexpressions around, don't reuse our last
5156 target. */
5157 if (optimize)
5158 subtarget = 0;
5159
5160 /* Now normalize to the proper value in MODE. Sometimes we don't
5161 have to do anything. */
5162 if (normalizep == 0 || normalizep == STORE_FLAG_VALUE)
5163 ;
5164 /* STORE_FLAG_VALUE might be the most negative number, so write
5165 the comparison this way to avoid a compiler-time warning. */
5166 else if (- normalizep == STORE_FLAG_VALUE)
5167 op0 = expand_unop (result_mode, neg_optab, op0, subtarget, 0);
5168
5169 /* We don't want to use STORE_FLAG_VALUE < 0 below since this makes
5170 it hard to use a value of just the sign bit due to ANSI integer
5171 constant typing rules. */
5172 else if (val_signbit_known_set_p (result_mode, STORE_FLAG_VALUE))
5173 op0 = expand_shift (RSHIFT_EXPR, result_mode, op0,
5174 GET_MODE_BITSIZE (result_mode) - 1, subtarget,
5175 normalizep == 1);
5176 else
5177 {
5178 gcc_assert (STORE_FLAG_VALUE & 1);
5179
5180 op0 = expand_and (result_mode, op0, const1_rtx, subtarget);
5181 if (normalizep == -1)
5182 op0 = expand_unop (result_mode, neg_optab, op0, op0, 0);
5183 }
5184
5185 /* If we were converting to a smaller mode, do the conversion now. */
5186 if (target_mode != result_mode)
5187 {
5188 convert_move (target, op0, 0);
5189 return target;
5190 }
5191 else
5192 return op0;
5193 }
5194
5195
5196 /* A subroutine of emit_store_flag only including "tricks" that do not
5197 need a recursive call. These are kept separate to avoid infinite
5198 loops. */
5199
5200 static rtx
5201 emit_store_flag_1 (rtx target, enum rtx_code code, rtx op0, rtx op1,
5202 enum machine_mode mode, int unsignedp, int normalizep,
5203 enum machine_mode target_mode)
5204 {
5205 rtx subtarget;
5206 enum insn_code icode;
5207 enum machine_mode compare_mode;
5208 enum mode_class mclass;
5209 enum rtx_code scode;
5210 rtx tem;
5211
5212 if (unsignedp)
5213 code = unsigned_condition (code);
5214 scode = swap_condition (code);
5215
5216 /* If one operand is constant, make it the second one. Only do this
5217 if the other operand is not constant as well. */
5218
5219 if (swap_commutative_operands_p (op0, op1))
5220 {
5221 tem = op0;
5222 op0 = op1;
5223 op1 = tem;
5224 code = swap_condition (code);
5225 }
5226
5227 if (mode == VOIDmode)
5228 mode = GET_MODE (op0);
5229
5230 /* For some comparisons with 1 and -1, we can convert this to
5231 comparisons with zero. This will often produce more opportunities for
5232 store-flag insns. */
5233
5234 switch (code)
5235 {
5236 case LT:
5237 if (op1 == const1_rtx)
5238 op1 = const0_rtx, code = LE;
5239 break;
5240 case LE:
5241 if (op1 == constm1_rtx)
5242 op1 = const0_rtx, code = LT;
5243 break;
5244 case GE:
5245 if (op1 == const1_rtx)
5246 op1 = const0_rtx, code = GT;
5247 break;
5248 case GT:
5249 if (op1 == constm1_rtx)
5250 op1 = const0_rtx, code = GE;
5251 break;
5252 case GEU:
5253 if (op1 == const1_rtx)
5254 op1 = const0_rtx, code = NE;
5255 break;
5256 case LTU:
5257 if (op1 == const1_rtx)
5258 op1 = const0_rtx, code = EQ;
5259 break;
5260 default:
5261 break;
5262 }
5263
5264 /* If we are comparing a double-word integer with zero or -1, we can
5265 convert the comparison into one involving a single word. */
5266 if (GET_MODE_BITSIZE (mode) == BITS_PER_WORD * 2
5267 && GET_MODE_CLASS (mode) == MODE_INT
5268 && (!MEM_P (op0) || ! MEM_VOLATILE_P (op0)))
5269 {
5270 if ((code == EQ || code == NE)
5271 && (op1 == const0_rtx || op1 == constm1_rtx))
5272 {
5273 rtx op00, op01;
5274
5275 /* Do a logical OR or AND of the two words and compare the
5276 result. */
5277 op00 = simplify_gen_subreg (word_mode, op0, mode, 0);
5278 op01 = simplify_gen_subreg (word_mode, op0, mode, UNITS_PER_WORD);
5279 tem = expand_binop (word_mode,
5280 op1 == const0_rtx ? ior_optab : and_optab,
5281 op00, op01, NULL_RTX, unsignedp,
5282 OPTAB_DIRECT);
5283
5284 if (tem != 0)
5285 tem = emit_store_flag (NULL_RTX, code, tem, op1, word_mode,
5286 unsignedp, normalizep);
5287 }
5288 else if ((code == LT || code == GE) && op1 == const0_rtx)
5289 {
5290 rtx op0h;
5291
5292 /* If testing the sign bit, can just test on high word. */
5293 op0h = simplify_gen_subreg (word_mode, op0, mode,
5294 subreg_highpart_offset (word_mode,
5295 mode));
5296 tem = emit_store_flag (NULL_RTX, code, op0h, op1, word_mode,
5297 unsignedp, normalizep);
5298 }
5299 else
5300 tem = NULL_RTX;
5301
5302 if (tem)
5303 {
5304 if (target_mode == VOIDmode || GET_MODE (tem) == target_mode)
5305 return tem;
5306 if (!target)
5307 target = gen_reg_rtx (target_mode);
5308
5309 convert_move (target, tem,
5310 !val_signbit_known_set_p (word_mode,
5311 (normalizep ? normalizep
5312 : STORE_FLAG_VALUE)));
5313 return target;
5314 }
5315 }
5316
5317 /* If this is A < 0 or A >= 0, we can do this by taking the ones
5318 complement of A (for GE) and shifting the sign bit to the low bit. */
5319 if (op1 == const0_rtx && (code == LT || code == GE)
5320 && GET_MODE_CLASS (mode) == MODE_INT
5321 && (normalizep || STORE_FLAG_VALUE == 1
5322 || val_signbit_p (mode, STORE_FLAG_VALUE)))
5323 {
5324 subtarget = target;
5325
5326 if (!target)
5327 target_mode = mode;
5328
5329 /* If the result is to be wider than OP0, it is best to convert it
5330 first. If it is to be narrower, it is *incorrect* to convert it
5331 first. */
5332 else if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (mode))
5333 {
5334 op0 = convert_modes (target_mode, mode, op0, 0);
5335 mode = target_mode;
5336 }
5337
5338 if (target_mode != mode)
5339 subtarget = 0;
5340
5341 if (code == GE)
5342 op0 = expand_unop (mode, one_cmpl_optab, op0,
5343 ((STORE_FLAG_VALUE == 1 || normalizep)
5344 ? 0 : subtarget), 0);
5345
5346 if (STORE_FLAG_VALUE == 1 || normalizep)
5347 /* If we are supposed to produce a 0/1 value, we want to do
5348 a logical shift from the sign bit to the low-order bit; for
5349 a -1/0 value, we do an arithmetic shift. */
5350 op0 = expand_shift (RSHIFT_EXPR, mode, op0,
5351 GET_MODE_BITSIZE (mode) - 1,
5352 subtarget, normalizep != -1);
5353
5354 if (mode != target_mode)
5355 op0 = convert_modes (target_mode, mode, op0, 0);
5356
5357 return op0;
5358 }
5359
5360 mclass = GET_MODE_CLASS (mode);
5361 for (compare_mode = mode; compare_mode != VOIDmode;
5362 compare_mode = GET_MODE_WIDER_MODE (compare_mode))
5363 {
5364 enum machine_mode optab_mode = mclass == MODE_CC ? CCmode : compare_mode;
5365 icode = optab_handler (cstore_optab, optab_mode);
5366 if (icode != CODE_FOR_nothing)
5367 {
5368 do_pending_stack_adjust ();
5369 tem = emit_cstore (target, icode, code, mode, compare_mode,
5370 unsignedp, op0, op1, normalizep, target_mode);
5371 if (tem)
5372 return tem;
5373
5374 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5375 {
5376 tem = emit_cstore (target, icode, scode, mode, compare_mode,
5377 unsignedp, op1, op0, normalizep, target_mode);
5378 if (tem)
5379 return tem;
5380 }
5381 break;
5382 }
5383 }
5384
5385 return 0;
5386 }
5387
5388 /* Emit a store-flags instruction for comparison CODE on OP0 and OP1
5389 and storing in TARGET. Normally return TARGET.
5390 Return 0 if that cannot be done.
5391
5392 MODE is the mode to use for OP0 and OP1 should they be CONST_INTs. If
5393 it is VOIDmode, they cannot both be CONST_INT.
5394
5395 UNSIGNEDP is for the case where we have to widen the operands
5396 to perform the operation. It says to use zero-extension.
5397
5398 NORMALIZEP is 1 if we should convert the result to be either zero
5399 or one. Normalize is -1 if we should convert the result to be
5400 either zero or -1. If NORMALIZEP is zero, the result will be left
5401 "raw" out of the scc insn. */
5402
5403 rtx
5404 emit_store_flag (rtx target, enum rtx_code code, rtx op0, rtx op1,
5405 enum machine_mode mode, int unsignedp, int normalizep)
5406 {
5407 enum machine_mode target_mode = target ? GET_MODE (target) : VOIDmode;
5408 enum rtx_code rcode;
5409 rtx subtarget;
5410 rtx tem, last, trueval;
5411
5412 tem = emit_store_flag_1 (target, code, op0, op1, mode, unsignedp, normalizep,
5413 target_mode);
5414 if (tem)
5415 return tem;
5416
5417 /* If we reached here, we can't do this with a scc insn, however there
5418 are some comparisons that can be done in other ways. Don't do any
5419 of these cases if branches are very cheap. */
5420 if (BRANCH_COST (optimize_insn_for_speed_p (), false) == 0)
5421 return 0;
5422
5423 /* See what we need to return. We can only return a 1, -1, or the
5424 sign bit. */
5425
5426 if (normalizep == 0)
5427 {
5428 if (STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1)
5429 normalizep = STORE_FLAG_VALUE;
5430
5431 else if (val_signbit_p (mode, STORE_FLAG_VALUE))
5432 ;
5433 else
5434 return 0;
5435 }
5436
5437 last = get_last_insn ();
5438
5439 /* If optimizing, use different pseudo registers for each insn, instead
5440 of reusing the same pseudo. This leads to better CSE, but slows
5441 down the compiler, since there are more pseudos */
5442 subtarget = (!optimize
5443 && (target_mode == mode)) ? target : NULL_RTX;
5444 trueval = GEN_INT (normalizep ? normalizep : STORE_FLAG_VALUE);
5445
5446 /* For floating-point comparisons, try the reverse comparison or try
5447 changing the "orderedness" of the comparison. */
5448 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5449 {
5450 enum rtx_code first_code;
5451 bool and_them;
5452
5453 rcode = reverse_condition_maybe_unordered (code);
5454 if (can_compare_p (rcode, mode, ccp_store_flag)
5455 && (code == ORDERED || code == UNORDERED
5456 || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
5457 || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
5458 {
5459 int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5460 || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5461
5462 /* For the reverse comparison, use either an addition or a XOR. */
5463 if (want_add
5464 && rtx_cost (GEN_INT (normalizep), PLUS, 1,
5465 optimize_insn_for_speed_p ()) == 0)
5466 {
5467 tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5468 STORE_FLAG_VALUE, target_mode);
5469 if (tem)
5470 return expand_binop (target_mode, add_optab, tem,
5471 GEN_INT (normalizep),
5472 target, 0, OPTAB_WIDEN);
5473 }
5474 else if (!want_add
5475 && rtx_cost (trueval, XOR, 1,
5476 optimize_insn_for_speed_p ()) == 0)
5477 {
5478 tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5479 normalizep, target_mode);
5480 if (tem)
5481 return expand_binop (target_mode, xor_optab, tem, trueval,
5482 target, INTVAL (trueval) >= 0, OPTAB_WIDEN);
5483 }
5484 }
5485
5486 delete_insns_since (last);
5487
5488 /* Cannot split ORDERED and UNORDERED, only try the above trick. */
5489 if (code == ORDERED || code == UNORDERED)
5490 return 0;
5491
5492 and_them = split_comparison (code, mode, &first_code, &code);
5493
5494 /* If there are no NaNs, the first comparison should always fall through.
5495 Effectively change the comparison to the other one. */
5496 if (!HONOR_NANS (mode))
5497 {
5498 gcc_assert (first_code == (and_them ? ORDERED : UNORDERED));
5499 return emit_store_flag_1 (target, code, op0, op1, mode, 0, normalizep,
5500 target_mode);
5501 }
5502
5503 #ifdef HAVE_conditional_move
5504 /* Try using a setcc instruction for ORDERED/UNORDERED, followed by a
5505 conditional move. */
5506 tem = emit_store_flag_1 (subtarget, first_code, op0, op1, mode, 0,
5507 normalizep, target_mode);
5508 if (tem == 0)
5509 return 0;
5510
5511 if (and_them)
5512 tem = emit_conditional_move (target, code, op0, op1, mode,
5513 tem, const0_rtx, GET_MODE (tem), 0);
5514 else
5515 tem = emit_conditional_move (target, code, op0, op1, mode,
5516 trueval, tem, GET_MODE (tem), 0);
5517
5518 if (tem == 0)
5519 delete_insns_since (last);
5520 return tem;
5521 #else
5522 return 0;
5523 #endif
5524 }
5525
5526 /* The remaining tricks only apply to integer comparisons. */
5527
5528 if (GET_MODE_CLASS (mode) != MODE_INT)
5529 return 0;
5530
5531 /* If this is an equality comparison of integers, we can try to exclusive-or
5532 (or subtract) the two operands and use a recursive call to try the
5533 comparison with zero. Don't do any of these cases if branches are
5534 very cheap. */
5535
5536 if ((code == EQ || code == NE) && op1 != const0_rtx)
5537 {
5538 tem = expand_binop (mode, xor_optab, op0, op1, subtarget, 1,
5539 OPTAB_WIDEN);
5540
5541 if (tem == 0)
5542 tem = expand_binop (mode, sub_optab, op0, op1, subtarget, 1,
5543 OPTAB_WIDEN);
5544 if (tem != 0)
5545 tem = emit_store_flag (target, code, tem, const0_rtx,
5546 mode, unsignedp, normalizep);
5547 if (tem != 0)
5548 return tem;
5549
5550 delete_insns_since (last);
5551 }
5552
5553 /* For integer comparisons, try the reverse comparison. However, for
5554 small X and if we'd have anyway to extend, implementing "X != 0"
5555 as "-(int)X >> 31" is still cheaper than inverting "(int)X == 0". */
5556 rcode = reverse_condition (code);
5557 if (can_compare_p (rcode, mode, ccp_store_flag)
5558 && ! (optab_handler (cstore_optab, mode) == CODE_FOR_nothing
5559 && code == NE
5560 && GET_MODE_SIZE (mode) < UNITS_PER_WORD
5561 && op1 == const0_rtx))
5562 {
5563 int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5564 || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5565
5566 /* Again, for the reverse comparison, use either an addition or a XOR. */
5567 if (want_add
5568 && rtx_cost (GEN_INT (normalizep), PLUS, 1,
5569 optimize_insn_for_speed_p ()) == 0)
5570 {
5571 tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5572 STORE_FLAG_VALUE, target_mode);
5573 if (tem != 0)
5574 tem = expand_binop (target_mode, add_optab, tem,
5575 GEN_INT (normalizep), target, 0, OPTAB_WIDEN);
5576 }
5577 else if (!want_add
5578 && rtx_cost (trueval, XOR, 1,
5579 optimize_insn_for_speed_p ()) == 0)
5580 {
5581 tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5582 normalizep, target_mode);
5583 if (tem != 0)
5584 tem = expand_binop (target_mode, xor_optab, tem, trueval, target,
5585 INTVAL (trueval) >= 0, OPTAB_WIDEN);
5586 }
5587
5588 if (tem != 0)
5589 return tem;
5590 delete_insns_since (last);
5591 }
5592
5593 /* Some other cases we can do are EQ, NE, LE, and GT comparisons with
5594 the constant zero. Reject all other comparisons at this point. Only
5595 do LE and GT if branches are expensive since they are expensive on
5596 2-operand machines. */
5597
5598 if (op1 != const0_rtx
5599 || (code != EQ && code != NE
5600 && (BRANCH_COST (optimize_insn_for_speed_p (),
5601 false) <= 1 || (code != LE && code != GT))))
5602 return 0;
5603
5604 /* Try to put the result of the comparison in the sign bit. Assume we can't
5605 do the necessary operation below. */
5606
5607 tem = 0;
5608
5609 /* To see if A <= 0, compute (A | (A - 1)). A <= 0 iff that result has
5610 the sign bit set. */
5611
5612 if (code == LE)
5613 {
5614 /* This is destructive, so SUBTARGET can't be OP0. */
5615 if (rtx_equal_p (subtarget, op0))
5616 subtarget = 0;
5617
5618 tem = expand_binop (mode, sub_optab, op0, const1_rtx, subtarget, 0,
5619 OPTAB_WIDEN);
5620 if (tem)
5621 tem = expand_binop (mode, ior_optab, op0, tem, subtarget, 0,
5622 OPTAB_WIDEN);
5623 }
5624
5625 /* To see if A > 0, compute (((signed) A) << BITS) - A, where BITS is the
5626 number of bits in the mode of OP0, minus one. */
5627
5628 if (code == GT)
5629 {
5630 if (rtx_equal_p (subtarget, op0))
5631 subtarget = 0;
5632
5633 tem = expand_shift (RSHIFT_EXPR, mode, op0,
5634 GET_MODE_BITSIZE (mode) - 1,
5635 subtarget, 0);
5636 tem = expand_binop (mode, sub_optab, tem, op0, subtarget, 0,
5637 OPTAB_WIDEN);
5638 }
5639
5640 if (code == EQ || code == NE)
5641 {
5642 /* For EQ or NE, one way to do the comparison is to apply an operation
5643 that converts the operand into a positive number if it is nonzero
5644 or zero if it was originally zero. Then, for EQ, we subtract 1 and
5645 for NE we negate. This puts the result in the sign bit. Then we
5646 normalize with a shift, if needed.
5647
5648 Two operations that can do the above actions are ABS and FFS, so try
5649 them. If that doesn't work, and MODE is smaller than a full word,
5650 we can use zero-extension to the wider mode (an unsigned conversion)
5651 as the operation. */
5652
5653 /* Note that ABS doesn't yield a positive number for INT_MIN, but
5654 that is compensated by the subsequent overflow when subtracting
5655 one / negating. */
5656
5657 if (optab_handler (abs_optab, mode) != CODE_FOR_nothing)
5658 tem = expand_unop (mode, abs_optab, op0, subtarget, 1);
5659 else if (optab_handler (ffs_optab, mode) != CODE_FOR_nothing)
5660 tem = expand_unop (mode, ffs_optab, op0, subtarget, 1);
5661 else if (GET_MODE_SIZE (mode) < UNITS_PER_WORD)
5662 {
5663 tem = convert_modes (word_mode, mode, op0, 1);
5664 mode = word_mode;
5665 }
5666
5667 if (tem != 0)
5668 {
5669 if (code == EQ)
5670 tem = expand_binop (mode, sub_optab, tem, const1_rtx, subtarget,
5671 0, OPTAB_WIDEN);
5672 else
5673 tem = expand_unop (mode, neg_optab, tem, subtarget, 0);
5674 }
5675
5676 /* If we couldn't do it that way, for NE we can "or" the two's complement
5677 of the value with itself. For EQ, we take the one's complement of
5678 that "or", which is an extra insn, so we only handle EQ if branches
5679 are expensive. */
5680
5681 if (tem == 0
5682 && (code == NE
5683 || BRANCH_COST (optimize_insn_for_speed_p (),
5684 false) > 1))
5685 {
5686 if (rtx_equal_p (subtarget, op0))
5687 subtarget = 0;
5688
5689 tem = expand_unop (mode, neg_optab, op0, subtarget, 0);
5690 tem = expand_binop (mode, ior_optab, tem, op0, subtarget, 0,
5691 OPTAB_WIDEN);
5692
5693 if (tem && code == EQ)
5694 tem = expand_unop (mode, one_cmpl_optab, tem, subtarget, 0);
5695 }
5696 }
5697
5698 if (tem && normalizep)
5699 tem = expand_shift (RSHIFT_EXPR, mode, tem,
5700 GET_MODE_BITSIZE (mode) - 1,
5701 subtarget, normalizep == 1);
5702
5703 if (tem)
5704 {
5705 if (!target)
5706 ;
5707 else if (GET_MODE (tem) != target_mode)
5708 {
5709 convert_move (target, tem, 0);
5710 tem = target;
5711 }
5712 else if (!subtarget)
5713 {
5714 emit_move_insn (target, tem);
5715 tem = target;
5716 }
5717 }
5718 else
5719 delete_insns_since (last);
5720
5721 return tem;
5722 }
5723
5724 /* Like emit_store_flag, but always succeeds. */
5725
5726 rtx
5727 emit_store_flag_force (rtx target, enum rtx_code code, rtx op0, rtx op1,
5728 enum machine_mode mode, int unsignedp, int normalizep)
5729 {
5730 rtx tem, label;
5731 rtx trueval, falseval;
5732
5733 /* First see if emit_store_flag can do the job. */
5734 tem = emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep);
5735 if (tem != 0)
5736 return tem;
5737
5738 if (!target)
5739 target = gen_reg_rtx (word_mode);
5740
5741 /* If this failed, we have to do this with set/compare/jump/set code.
5742 For foo != 0, if foo is in OP0, just replace it with 1 if nonzero. */
5743 trueval = normalizep ? GEN_INT (normalizep) : const1_rtx;
5744 if (code == NE
5745 && GET_MODE_CLASS (mode) == MODE_INT
5746 && REG_P (target)
5747 && op0 == target
5748 && op1 == const0_rtx)
5749 {
5750 label = gen_label_rtx ();
5751 do_compare_rtx_and_jump (target, const0_rtx, EQ, unsignedp,
5752 mode, NULL_RTX, NULL_RTX, label, -1);
5753 emit_move_insn (target, trueval);
5754 emit_label (label);
5755 return target;
5756 }
5757
5758 if (!REG_P (target)
5759 || reg_mentioned_p (target, op0) || reg_mentioned_p (target, op1))
5760 target = gen_reg_rtx (GET_MODE (target));
5761
5762 /* Jump in the right direction if the target cannot implement CODE
5763 but can jump on its reverse condition. */
5764 falseval = const0_rtx;
5765 if (! can_compare_p (code, mode, ccp_jump)
5766 && (! FLOAT_MODE_P (mode)
5767 || code == ORDERED || code == UNORDERED
5768 || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
5769 || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
5770 {
5771 enum rtx_code rcode;
5772 if (FLOAT_MODE_P (mode))
5773 rcode = reverse_condition_maybe_unordered (code);
5774 else
5775 rcode = reverse_condition (code);
5776
5777 /* Canonicalize to UNORDERED for the libcall. */
5778 if (can_compare_p (rcode, mode, ccp_jump)
5779 || (code == ORDERED && ! can_compare_p (ORDERED, mode, ccp_jump)))
5780 {
5781 falseval = trueval;
5782 trueval = const0_rtx;
5783 code = rcode;
5784 }
5785 }
5786
5787 emit_move_insn (target, trueval);
5788 label = gen_label_rtx ();
5789 do_compare_rtx_and_jump (op0, op1, code, unsignedp, mode, NULL_RTX,
5790 NULL_RTX, label, -1);
5791
5792 emit_move_insn (target, falseval);
5793 emit_label (label);
5794
5795 return target;
5796 }
5797 \f
5798 /* Perform possibly multi-word comparison and conditional jump to LABEL
5799 if ARG1 OP ARG2 true where ARG1 and ARG2 are of mode MODE. This is
5800 now a thin wrapper around do_compare_rtx_and_jump. */
5801
5802 static void
5803 do_cmp_and_jump (rtx arg1, rtx arg2, enum rtx_code op, enum machine_mode mode,
5804 rtx label)
5805 {
5806 int unsignedp = (op == LTU || op == LEU || op == GTU || op == GEU);
5807 do_compare_rtx_and_jump (arg1, arg2, op, unsignedp, mode,
5808 NULL_RTX, NULL_RTX, label, -1);
5809 }