expmed.c (extract_bit_field_1): Pass the full width of the structure to get_best_reg_...
[gcc.git] / gcc / expmed.c
1 /* Medium-level subroutines: convert bit-field store and extract
2 and shifts, multiplies and divides to rtl instructions.
3 Copyright (C) 1987-2013 Free Software Foundation, Inc.
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
11
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
21
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "tm.h"
26 #include "diagnostic-core.h"
27 #include "rtl.h"
28 #include "tree.h"
29 #include "tm_p.h"
30 #include "flags.h"
31 #include "insn-config.h"
32 #include "expr.h"
33 #include "optabs.h"
34 #include "recog.h"
35 #include "langhooks.h"
36 #include "df.h"
37 #include "target.h"
38 #include "expmed.h"
39
40 struct target_expmed default_target_expmed;
41 #if SWITCHABLE_TARGET
42 struct target_expmed *this_target_expmed = &default_target_expmed;
43 #endif
44
45 static void store_fixed_bit_field (rtx, unsigned HOST_WIDE_INT,
46 unsigned HOST_WIDE_INT,
47 unsigned HOST_WIDE_INT,
48 unsigned HOST_WIDE_INT,
49 rtx);
50 static void store_split_bit_field (rtx, unsigned HOST_WIDE_INT,
51 unsigned HOST_WIDE_INT,
52 unsigned HOST_WIDE_INT,
53 unsigned HOST_WIDE_INT,
54 rtx);
55 static rtx extract_fixed_bit_field (enum machine_mode, rtx,
56 unsigned HOST_WIDE_INT,
57 unsigned HOST_WIDE_INT, rtx, int, bool);
58 static rtx mask_rtx (enum machine_mode, int, int, int);
59 static rtx lshift_value (enum machine_mode, rtx, int, int);
60 static rtx extract_split_bit_field (rtx, unsigned HOST_WIDE_INT,
61 unsigned HOST_WIDE_INT, int);
62 static void do_cmp_and_jump (rtx, rtx, enum rtx_code, enum machine_mode, rtx);
63 static rtx expand_smod_pow2 (enum machine_mode, rtx, HOST_WIDE_INT);
64 static rtx expand_sdiv_pow2 (enum machine_mode, rtx, HOST_WIDE_INT);
65
66 /* Test whether a value is zero of a power of two. */
67 #define EXACT_POWER_OF_2_OR_ZERO_P(x) (((x) & ((x) - 1)) == 0)
68
69 struct init_expmed_rtl
70 {
71 struct rtx_def reg; rtunion reg_fld[2];
72 struct rtx_def plus; rtunion plus_fld1;
73 struct rtx_def neg;
74 struct rtx_def mult; rtunion mult_fld1;
75 struct rtx_def sdiv; rtunion sdiv_fld1;
76 struct rtx_def udiv; rtunion udiv_fld1;
77 struct rtx_def sdiv_32; rtunion sdiv_32_fld1;
78 struct rtx_def smod_32; rtunion smod_32_fld1;
79 struct rtx_def wide_mult; rtunion wide_mult_fld1;
80 struct rtx_def wide_lshr; rtunion wide_lshr_fld1;
81 struct rtx_def wide_trunc;
82 struct rtx_def shift; rtunion shift_fld1;
83 struct rtx_def shift_mult; rtunion shift_mult_fld1;
84 struct rtx_def shift_add; rtunion shift_add_fld1;
85 struct rtx_def shift_sub0; rtunion shift_sub0_fld1;
86 struct rtx_def shift_sub1; rtunion shift_sub1_fld1;
87 struct rtx_def zext;
88 struct rtx_def trunc;
89
90 rtx pow2[MAX_BITS_PER_WORD];
91 rtx cint[MAX_BITS_PER_WORD];
92 };
93
94 static void
95 init_expmed_one_conv (struct init_expmed_rtl *all, enum machine_mode to_mode,
96 enum machine_mode from_mode, bool speed)
97 {
98 int to_size, from_size;
99 rtx which;
100
101 /* We're given no information about the true size of a partial integer,
102 only the size of the "full" integer it requires for storage. For
103 comparison purposes here, reduce the bit size by one in that case. */
104 to_size = (GET_MODE_BITSIZE (to_mode)
105 - (GET_MODE_CLASS (to_mode) == MODE_PARTIAL_INT));
106 from_size = (GET_MODE_BITSIZE (from_mode)
107 - (GET_MODE_CLASS (from_mode) == MODE_PARTIAL_INT));
108
109 /* Assume cost of zero-extend and sign-extend is the same. */
110 which = (to_size < from_size ? &all->trunc : &all->zext);
111
112 PUT_MODE (&all->reg, from_mode);
113 set_convert_cost (to_mode, from_mode, speed, set_src_cost (which, speed));
114 }
115
116 static void
117 init_expmed_one_mode (struct init_expmed_rtl *all,
118 enum machine_mode mode, int speed)
119 {
120 int m, n, mode_bitsize;
121 enum machine_mode mode_from;
122
123 mode_bitsize = GET_MODE_UNIT_BITSIZE (mode);
124
125 PUT_MODE (&all->reg, mode);
126 PUT_MODE (&all->plus, mode);
127 PUT_MODE (&all->neg, mode);
128 PUT_MODE (&all->mult, mode);
129 PUT_MODE (&all->sdiv, mode);
130 PUT_MODE (&all->udiv, mode);
131 PUT_MODE (&all->sdiv_32, mode);
132 PUT_MODE (&all->smod_32, mode);
133 PUT_MODE (&all->wide_trunc, mode);
134 PUT_MODE (&all->shift, mode);
135 PUT_MODE (&all->shift_mult, mode);
136 PUT_MODE (&all->shift_add, mode);
137 PUT_MODE (&all->shift_sub0, mode);
138 PUT_MODE (&all->shift_sub1, mode);
139 PUT_MODE (&all->zext, mode);
140 PUT_MODE (&all->trunc, mode);
141
142 set_add_cost (speed, mode, set_src_cost (&all->plus, speed));
143 set_neg_cost (speed, mode, set_src_cost (&all->neg, speed));
144 set_mul_cost (speed, mode, set_src_cost (&all->mult, speed));
145 set_sdiv_cost (speed, mode, set_src_cost (&all->sdiv, speed));
146 set_udiv_cost (speed, mode, set_src_cost (&all->udiv, speed));
147
148 set_sdiv_pow2_cheap (speed, mode, (set_src_cost (&all->sdiv_32, speed)
149 <= 2 * add_cost (speed, mode)));
150 set_smod_pow2_cheap (speed, mode, (set_src_cost (&all->smod_32, speed)
151 <= 4 * add_cost (speed, mode)));
152
153 set_shift_cost (speed, mode, 0, 0);
154 {
155 int cost = add_cost (speed, mode);
156 set_shiftadd_cost (speed, mode, 0, cost);
157 set_shiftsub0_cost (speed, mode, 0, cost);
158 set_shiftsub1_cost (speed, mode, 0, cost);
159 }
160
161 n = MIN (MAX_BITS_PER_WORD, mode_bitsize);
162 for (m = 1; m < n; m++)
163 {
164 XEXP (&all->shift, 1) = all->cint[m];
165 XEXP (&all->shift_mult, 1) = all->pow2[m];
166
167 set_shift_cost (speed, mode, m, set_src_cost (&all->shift, speed));
168 set_shiftadd_cost (speed, mode, m, set_src_cost (&all->shift_add, speed));
169 set_shiftsub0_cost (speed, mode, m, set_src_cost (&all->shift_sub0, speed));
170 set_shiftsub1_cost (speed, mode, m, set_src_cost (&all->shift_sub1, speed));
171 }
172
173 if (SCALAR_INT_MODE_P (mode))
174 {
175 for (mode_from = MIN_MODE_INT; mode_from <= MAX_MODE_INT;
176 mode_from = (enum machine_mode)(mode_from + 1))
177 init_expmed_one_conv (all, mode, mode_from, speed);
178 }
179 if (GET_MODE_CLASS (mode) == MODE_INT)
180 {
181 enum machine_mode wider_mode = GET_MODE_WIDER_MODE (mode);
182 if (wider_mode != VOIDmode)
183 {
184 PUT_MODE (&all->zext, wider_mode);
185 PUT_MODE (&all->wide_mult, wider_mode);
186 PUT_MODE (&all->wide_lshr, wider_mode);
187 XEXP (&all->wide_lshr, 1) = GEN_INT (mode_bitsize);
188
189 set_mul_widen_cost (speed, wider_mode,
190 set_src_cost (&all->wide_mult, speed));
191 set_mul_highpart_cost (speed, mode,
192 set_src_cost (&all->wide_trunc, speed));
193 }
194 }
195 }
196
197 void
198 init_expmed (void)
199 {
200 struct init_expmed_rtl all;
201 enum machine_mode mode;
202 int m, speed;
203
204 memset (&all, 0, sizeof all);
205 for (m = 1; m < MAX_BITS_PER_WORD; m++)
206 {
207 all.pow2[m] = GEN_INT ((HOST_WIDE_INT) 1 << m);
208 all.cint[m] = GEN_INT (m);
209 }
210
211 PUT_CODE (&all.reg, REG);
212 /* Avoid using hard regs in ways which may be unsupported. */
213 SET_REGNO (&all.reg, LAST_VIRTUAL_REGISTER + 1);
214
215 PUT_CODE (&all.plus, PLUS);
216 XEXP (&all.plus, 0) = &all.reg;
217 XEXP (&all.plus, 1) = &all.reg;
218
219 PUT_CODE (&all.neg, NEG);
220 XEXP (&all.neg, 0) = &all.reg;
221
222 PUT_CODE (&all.mult, MULT);
223 XEXP (&all.mult, 0) = &all.reg;
224 XEXP (&all.mult, 1) = &all.reg;
225
226 PUT_CODE (&all.sdiv, DIV);
227 XEXP (&all.sdiv, 0) = &all.reg;
228 XEXP (&all.sdiv, 1) = &all.reg;
229
230 PUT_CODE (&all.udiv, UDIV);
231 XEXP (&all.udiv, 0) = &all.reg;
232 XEXP (&all.udiv, 1) = &all.reg;
233
234 PUT_CODE (&all.sdiv_32, DIV);
235 XEXP (&all.sdiv_32, 0) = &all.reg;
236 XEXP (&all.sdiv_32, 1) = 32 < MAX_BITS_PER_WORD ? all.cint[32] : GEN_INT (32);
237
238 PUT_CODE (&all.smod_32, MOD);
239 XEXP (&all.smod_32, 0) = &all.reg;
240 XEXP (&all.smod_32, 1) = XEXP (&all.sdiv_32, 1);
241
242 PUT_CODE (&all.zext, ZERO_EXTEND);
243 XEXP (&all.zext, 0) = &all.reg;
244
245 PUT_CODE (&all.wide_mult, MULT);
246 XEXP (&all.wide_mult, 0) = &all.zext;
247 XEXP (&all.wide_mult, 1) = &all.zext;
248
249 PUT_CODE (&all.wide_lshr, LSHIFTRT);
250 XEXP (&all.wide_lshr, 0) = &all.wide_mult;
251
252 PUT_CODE (&all.wide_trunc, TRUNCATE);
253 XEXP (&all.wide_trunc, 0) = &all.wide_lshr;
254
255 PUT_CODE (&all.shift, ASHIFT);
256 XEXP (&all.shift, 0) = &all.reg;
257
258 PUT_CODE (&all.shift_mult, MULT);
259 XEXP (&all.shift_mult, 0) = &all.reg;
260
261 PUT_CODE (&all.shift_add, PLUS);
262 XEXP (&all.shift_add, 0) = &all.shift_mult;
263 XEXP (&all.shift_add, 1) = &all.reg;
264
265 PUT_CODE (&all.shift_sub0, MINUS);
266 XEXP (&all.shift_sub0, 0) = &all.shift_mult;
267 XEXP (&all.shift_sub0, 1) = &all.reg;
268
269 PUT_CODE (&all.shift_sub1, MINUS);
270 XEXP (&all.shift_sub1, 0) = &all.reg;
271 XEXP (&all.shift_sub1, 1) = &all.shift_mult;
272
273 PUT_CODE (&all.trunc, TRUNCATE);
274 XEXP (&all.trunc, 0) = &all.reg;
275
276 for (speed = 0; speed < 2; speed++)
277 {
278 crtl->maybe_hot_insn_p = speed;
279 set_zero_cost (speed, set_src_cost (const0_rtx, speed));
280
281 for (mode = MIN_MODE_INT; mode <= MAX_MODE_INT;
282 mode = (enum machine_mode)(mode + 1))
283 init_expmed_one_mode (&all, mode, speed);
284
285 if (MIN_MODE_PARTIAL_INT != VOIDmode)
286 for (mode = MIN_MODE_PARTIAL_INT; mode <= MAX_MODE_PARTIAL_INT;
287 mode = (enum machine_mode)(mode + 1))
288 init_expmed_one_mode (&all, mode, speed);
289
290 if (MIN_MODE_VECTOR_INT != VOIDmode)
291 for (mode = MIN_MODE_VECTOR_INT; mode <= MAX_MODE_VECTOR_INT;
292 mode = (enum machine_mode)(mode + 1))
293 init_expmed_one_mode (&all, mode, speed);
294 }
295
296 if (alg_hash_used_p ())
297 {
298 struct alg_hash_entry *p = alg_hash_entry_ptr (0);
299 memset (p, 0, sizeof (*p) * NUM_ALG_HASH_ENTRIES);
300 }
301 else
302 set_alg_hash_used_p (true);
303 default_rtl_profile ();
304 }
305
306 /* Return an rtx representing minus the value of X.
307 MODE is the intended mode of the result,
308 useful if X is a CONST_INT. */
309
310 rtx
311 negate_rtx (enum machine_mode mode, rtx x)
312 {
313 rtx result = simplify_unary_operation (NEG, mode, x, mode);
314
315 if (result == 0)
316 result = expand_unop (mode, neg_optab, x, NULL_RTX, 0);
317
318 return result;
319 }
320
321 /* Adjust bitfield memory MEM so that it points to the first unit of mode
322 MODE that contains a bitfield of size BITSIZE at bit position BITNUM.
323 If MODE is BLKmode, return a reference to every byte in the bitfield.
324 Set *NEW_BITNUM to the bit position of the field within the new memory. */
325
326 static rtx
327 narrow_bit_field_mem (rtx mem, enum machine_mode mode,
328 unsigned HOST_WIDE_INT bitsize,
329 unsigned HOST_WIDE_INT bitnum,
330 unsigned HOST_WIDE_INT *new_bitnum)
331 {
332 if (mode == BLKmode)
333 {
334 *new_bitnum = bitnum % BITS_PER_UNIT;
335 HOST_WIDE_INT offset = bitnum / BITS_PER_UNIT;
336 HOST_WIDE_INT size = ((*new_bitnum + bitsize + BITS_PER_UNIT - 1)
337 / BITS_PER_UNIT);
338 return adjust_bitfield_address_size (mem, mode, offset, size);
339 }
340 else
341 {
342 unsigned int unit = GET_MODE_BITSIZE (mode);
343 *new_bitnum = bitnum % unit;
344 HOST_WIDE_INT offset = (bitnum - *new_bitnum) / BITS_PER_UNIT;
345 return adjust_bitfield_address (mem, mode, offset);
346 }
347 }
348
349 /* The caller wants to perform insertion or extraction PATTERN on a
350 bitfield of size BITSIZE at BITNUM bits into memory operand OP0.
351 BITREGION_START and BITREGION_END are as for store_bit_field
352 and FIELDMODE is the natural mode of the field.
353
354 Search for a mode that is compatible with the memory access
355 restrictions and (where applicable) with a register insertion or
356 extraction. Return the new memory on success, storing the adjusted
357 bit position in *NEW_BITNUM. Return null otherwise. */
358
359 static rtx
360 adjust_bit_field_mem_for_reg (enum extraction_pattern pattern,
361 rtx op0, HOST_WIDE_INT bitsize,
362 HOST_WIDE_INT bitnum,
363 unsigned HOST_WIDE_INT bitregion_start,
364 unsigned HOST_WIDE_INT bitregion_end,
365 enum machine_mode fieldmode,
366 unsigned HOST_WIDE_INT *new_bitnum)
367 {
368 bit_field_mode_iterator iter (bitsize, bitnum, bitregion_start,
369 bitregion_end, MEM_ALIGN (op0),
370 MEM_VOLATILE_P (op0));
371 enum machine_mode best_mode;
372 if (iter.next_mode (&best_mode))
373 {
374 /* We can use a memory in BEST_MODE. See whether this is true for
375 any wider modes. All other things being equal, we prefer to
376 use the widest mode possible because it tends to expose more
377 CSE opportunities. */
378 if (!iter.prefer_smaller_modes ())
379 {
380 /* Limit the search to the mode required by the corresponding
381 register insertion or extraction instruction, if any. */
382 enum machine_mode limit_mode = word_mode;
383 extraction_insn insn;
384 if (get_best_reg_extraction_insn (&insn, pattern,
385 GET_MODE_BITSIZE (best_mode),
386 fieldmode))
387 limit_mode = insn.field_mode;
388
389 enum machine_mode wider_mode;
390 while (iter.next_mode (&wider_mode)
391 && GET_MODE_SIZE (wider_mode) <= GET_MODE_SIZE (limit_mode))
392 best_mode = wider_mode;
393 }
394 return narrow_bit_field_mem (op0, best_mode, bitsize, bitnum,
395 new_bitnum);
396 }
397 return NULL_RTX;
398 }
399
400 /* Return true if a bitfield of size BITSIZE at bit number BITNUM within
401 a structure of mode STRUCT_MODE represents a lowpart subreg. The subreg
402 offset is then BITNUM / BITS_PER_UNIT. */
403
404 static bool
405 lowpart_bit_field_p (unsigned HOST_WIDE_INT bitnum,
406 unsigned HOST_WIDE_INT bitsize,
407 enum machine_mode struct_mode)
408 {
409 if (BYTES_BIG_ENDIAN)
410 return (bitnum % BITS_PER_UNIT == 0
411 && (bitnum + bitsize == GET_MODE_BITSIZE (struct_mode)
412 || (bitnum + bitsize) % BITS_PER_WORD == 0));
413 else
414 return bitnum % BITS_PER_WORD == 0;
415 }
416
417 /* Return true if OP is a memory and if a bitfield of size BITSIZE at
418 bit number BITNUM can be treated as a simple value of mode MODE. */
419
420 static bool
421 simple_mem_bitfield_p (rtx op0, unsigned HOST_WIDE_INT bitsize,
422 unsigned HOST_WIDE_INT bitnum, enum machine_mode mode)
423 {
424 return (MEM_P (op0)
425 && bitnum % BITS_PER_UNIT == 0
426 && bitsize == GET_MODE_BITSIZE (mode)
427 && (!SLOW_UNALIGNED_ACCESS (mode, MEM_ALIGN (op0))
428 || (bitnum % GET_MODE_ALIGNMENT (mode) == 0
429 && MEM_ALIGN (op0) >= GET_MODE_ALIGNMENT (mode))));
430 }
431 \f
432 /* Try to use instruction INSV to store VALUE into a field of OP0.
433 BITSIZE and BITNUM are as for store_bit_field. */
434
435 static bool
436 store_bit_field_using_insv (const extraction_insn *insv, rtx op0,
437 unsigned HOST_WIDE_INT bitsize,
438 unsigned HOST_WIDE_INT bitnum, rtx value)
439 {
440 struct expand_operand ops[4];
441 rtx value1;
442 rtx xop0 = op0;
443 rtx last = get_last_insn ();
444 bool copy_back = false;
445
446 enum machine_mode op_mode = insv->field_mode;
447 unsigned int unit = GET_MODE_BITSIZE (op_mode);
448 if (bitsize == 0 || bitsize > unit)
449 return false;
450
451 if (MEM_P (xop0))
452 /* Get a reference to the first byte of the field. */
453 xop0 = narrow_bit_field_mem (xop0, insv->struct_mode, bitsize, bitnum,
454 &bitnum);
455 else
456 {
457 /* Convert from counting within OP0 to counting in OP_MODE. */
458 if (BYTES_BIG_ENDIAN)
459 bitnum += unit - GET_MODE_BITSIZE (GET_MODE (op0));
460
461 /* If xop0 is a register, we need it in OP_MODE
462 to make it acceptable to the format of insv. */
463 if (GET_CODE (xop0) == SUBREG)
464 /* We can't just change the mode, because this might clobber op0,
465 and we will need the original value of op0 if insv fails. */
466 xop0 = gen_rtx_SUBREG (op_mode, SUBREG_REG (xop0), SUBREG_BYTE (xop0));
467 if (REG_P (xop0) && GET_MODE (xop0) != op_mode)
468 xop0 = gen_lowpart_SUBREG (op_mode, xop0);
469 }
470
471 /* If the destination is a paradoxical subreg such that we need a
472 truncate to the inner mode, perform the insertion on a temporary and
473 truncate the result to the original destination. Note that we can't
474 just truncate the paradoxical subreg as (truncate:N (subreg:W (reg:N
475 X) 0)) is (reg:N X). */
476 if (GET_CODE (xop0) == SUBREG
477 && REG_P (SUBREG_REG (xop0))
478 && !TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (SUBREG_REG (xop0)),
479 op_mode))
480 {
481 rtx tem = gen_reg_rtx (op_mode);
482 emit_move_insn (tem, xop0);
483 xop0 = tem;
484 copy_back = true;
485 }
486
487 /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
488 "backwards" from the size of the unit we are inserting into.
489 Otherwise, we count bits from the most significant on a
490 BYTES/BITS_BIG_ENDIAN machine. */
491
492 if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
493 bitnum = unit - bitsize - bitnum;
494
495 /* Convert VALUE to op_mode (which insv insn wants) in VALUE1. */
496 value1 = value;
497 if (GET_MODE (value) != op_mode)
498 {
499 if (GET_MODE_BITSIZE (GET_MODE (value)) >= bitsize)
500 {
501 /* Optimization: Don't bother really extending VALUE
502 if it has all the bits we will actually use. However,
503 if we must narrow it, be sure we do it correctly. */
504
505 if (GET_MODE_SIZE (GET_MODE (value)) < GET_MODE_SIZE (op_mode))
506 {
507 rtx tmp;
508
509 tmp = simplify_subreg (op_mode, value1, GET_MODE (value), 0);
510 if (! tmp)
511 tmp = simplify_gen_subreg (op_mode,
512 force_reg (GET_MODE (value),
513 value1),
514 GET_MODE (value), 0);
515 value1 = tmp;
516 }
517 else
518 value1 = gen_lowpart (op_mode, value1);
519 }
520 else if (CONST_INT_P (value))
521 value1 = gen_int_mode (INTVAL (value), op_mode);
522 else
523 /* Parse phase is supposed to make VALUE's data type
524 match that of the component reference, which is a type
525 at least as wide as the field; so VALUE should have
526 a mode that corresponds to that type. */
527 gcc_assert (CONSTANT_P (value));
528 }
529
530 create_fixed_operand (&ops[0], xop0);
531 create_integer_operand (&ops[1], bitsize);
532 create_integer_operand (&ops[2], bitnum);
533 create_input_operand (&ops[3], value1, op_mode);
534 if (maybe_expand_insn (insv->icode, 4, ops))
535 {
536 if (copy_back)
537 convert_move (op0, xop0, true);
538 return true;
539 }
540 delete_insns_since (last);
541 return false;
542 }
543
544 /* A subroutine of store_bit_field, with the same arguments. Return true
545 if the operation could be implemented.
546
547 If FALLBACK_P is true, fall back to store_fixed_bit_field if we have
548 no other way of implementing the operation. If FALLBACK_P is false,
549 return false instead. */
550
551 static bool
552 store_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
553 unsigned HOST_WIDE_INT bitnum,
554 unsigned HOST_WIDE_INT bitregion_start,
555 unsigned HOST_WIDE_INT bitregion_end,
556 enum machine_mode fieldmode,
557 rtx value, bool fallback_p)
558 {
559 rtx op0 = str_rtx;
560 rtx orig_value;
561
562 while (GET_CODE (op0) == SUBREG)
563 {
564 /* The following line once was done only if WORDS_BIG_ENDIAN,
565 but I think that is a mistake. WORDS_BIG_ENDIAN is
566 meaningful at a much higher level; when structures are copied
567 between memory and regs, the higher-numbered regs
568 always get higher addresses. */
569 int inner_mode_size = GET_MODE_SIZE (GET_MODE (SUBREG_REG (op0)));
570 int outer_mode_size = GET_MODE_SIZE (GET_MODE (op0));
571 int byte_offset = 0;
572
573 /* Paradoxical subregs need special handling on big endian machines. */
574 if (SUBREG_BYTE (op0) == 0 && inner_mode_size < outer_mode_size)
575 {
576 int difference = inner_mode_size - outer_mode_size;
577
578 if (WORDS_BIG_ENDIAN)
579 byte_offset += (difference / UNITS_PER_WORD) * UNITS_PER_WORD;
580 if (BYTES_BIG_ENDIAN)
581 byte_offset += difference % UNITS_PER_WORD;
582 }
583 else
584 byte_offset = SUBREG_BYTE (op0);
585
586 bitnum += byte_offset * BITS_PER_UNIT;
587 op0 = SUBREG_REG (op0);
588 }
589
590 /* No action is needed if the target is a register and if the field
591 lies completely outside that register. This can occur if the source
592 code contains an out-of-bounds access to a small array. */
593 if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
594 return true;
595
596 /* Use vec_set patterns for inserting parts of vectors whenever
597 available. */
598 if (VECTOR_MODE_P (GET_MODE (op0))
599 && !MEM_P (op0)
600 && optab_handler (vec_set_optab, GET_MODE (op0)) != CODE_FOR_nothing
601 && fieldmode == GET_MODE_INNER (GET_MODE (op0))
602 && bitsize == GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
603 && !(bitnum % GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
604 {
605 struct expand_operand ops[3];
606 enum machine_mode outermode = GET_MODE (op0);
607 enum machine_mode innermode = GET_MODE_INNER (outermode);
608 enum insn_code icode = optab_handler (vec_set_optab, outermode);
609 int pos = bitnum / GET_MODE_BITSIZE (innermode);
610
611 create_fixed_operand (&ops[0], op0);
612 create_input_operand (&ops[1], value, innermode);
613 create_integer_operand (&ops[2], pos);
614 if (maybe_expand_insn (icode, 3, ops))
615 return true;
616 }
617
618 /* If the target is a register, overwriting the entire object, or storing
619 a full-word or multi-word field can be done with just a SUBREG. */
620 if (!MEM_P (op0)
621 && bitsize == GET_MODE_BITSIZE (fieldmode)
622 && ((bitsize == GET_MODE_BITSIZE (GET_MODE (op0)) && bitnum == 0)
623 || (bitsize % BITS_PER_WORD == 0 && bitnum % BITS_PER_WORD == 0)))
624 {
625 /* Use the subreg machinery either to narrow OP0 to the required
626 words or to cope with mode punning between equal-sized modes. */
627 rtx sub = simplify_gen_subreg (fieldmode, op0, GET_MODE (op0),
628 bitnum / BITS_PER_UNIT);
629 if (sub)
630 {
631 emit_move_insn (sub, value);
632 return true;
633 }
634 }
635
636 /* If the target is memory, storing any naturally aligned field can be
637 done with a simple store. For targets that support fast unaligned
638 memory, any naturally sized, unit aligned field can be done directly. */
639 if (simple_mem_bitfield_p (op0, bitsize, bitnum, fieldmode))
640 {
641 op0 = adjust_bitfield_address (op0, fieldmode, bitnum / BITS_PER_UNIT);
642 emit_move_insn (op0, value);
643 return true;
644 }
645
646 /* Make sure we are playing with integral modes. Pun with subregs
647 if we aren't. This must come after the entire register case above,
648 since that case is valid for any mode. The following cases are only
649 valid for integral modes. */
650 {
651 enum machine_mode imode = int_mode_for_mode (GET_MODE (op0));
652 if (imode != GET_MODE (op0))
653 {
654 if (MEM_P (op0))
655 op0 = adjust_bitfield_address_size (op0, imode, 0, MEM_SIZE (op0));
656 else
657 {
658 gcc_assert (imode != BLKmode);
659 op0 = gen_lowpart (imode, op0);
660 }
661 }
662 }
663
664 /* Storing an lsb-aligned field in a register
665 can be done with a movstrict instruction. */
666
667 if (!MEM_P (op0)
668 && lowpart_bit_field_p (bitnum, bitsize, GET_MODE (op0))
669 && bitsize == GET_MODE_BITSIZE (fieldmode)
670 && optab_handler (movstrict_optab, fieldmode) != CODE_FOR_nothing)
671 {
672 struct expand_operand ops[2];
673 enum insn_code icode = optab_handler (movstrict_optab, fieldmode);
674 rtx arg0 = op0;
675 unsigned HOST_WIDE_INT subreg_off;
676
677 if (GET_CODE (arg0) == SUBREG)
678 {
679 /* Else we've got some float mode source being extracted into
680 a different float mode destination -- this combination of
681 subregs results in Severe Tire Damage. */
682 gcc_assert (GET_MODE (SUBREG_REG (arg0)) == fieldmode
683 || GET_MODE_CLASS (fieldmode) == MODE_INT
684 || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT);
685 arg0 = SUBREG_REG (arg0);
686 }
687
688 subreg_off = bitnum / BITS_PER_UNIT;
689 if (validate_subreg (fieldmode, GET_MODE (arg0), arg0, subreg_off))
690 {
691 arg0 = gen_rtx_SUBREG (fieldmode, arg0, subreg_off);
692
693 create_fixed_operand (&ops[0], arg0);
694 /* Shrink the source operand to FIELDMODE. */
695 create_convert_operand_to (&ops[1], value, fieldmode, false);
696 if (maybe_expand_insn (icode, 2, ops))
697 return true;
698 }
699 }
700
701 /* Handle fields bigger than a word. */
702
703 if (bitsize > BITS_PER_WORD)
704 {
705 /* Here we transfer the words of the field
706 in the order least significant first.
707 This is because the most significant word is the one which may
708 be less than full.
709 However, only do that if the value is not BLKmode. */
710
711 unsigned int backwards = WORDS_BIG_ENDIAN && fieldmode != BLKmode;
712 unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
713 unsigned int i;
714 rtx last;
715
716 /* This is the mode we must force value to, so that there will be enough
717 subwords to extract. Note that fieldmode will often (always?) be
718 VOIDmode, because that is what store_field uses to indicate that this
719 is a bit field, but passing VOIDmode to operand_subword_force
720 is not allowed. */
721 fieldmode = GET_MODE (value);
722 if (fieldmode == VOIDmode)
723 fieldmode = smallest_mode_for_size (nwords * BITS_PER_WORD, MODE_INT);
724
725 last = get_last_insn ();
726 for (i = 0; i < nwords; i++)
727 {
728 /* If I is 0, use the low-order word in both field and target;
729 if I is 1, use the next to lowest word; and so on. */
730 unsigned int wordnum = (backwards
731 ? GET_MODE_SIZE (fieldmode) / UNITS_PER_WORD
732 - i - 1
733 : i);
734 unsigned int bit_offset = (backwards
735 ? MAX ((int) bitsize - ((int) i + 1)
736 * BITS_PER_WORD,
737 0)
738 : (int) i * BITS_PER_WORD);
739 rtx value_word = operand_subword_force (value, wordnum, fieldmode);
740 unsigned HOST_WIDE_INT new_bitsize =
741 MIN (BITS_PER_WORD, bitsize - i * BITS_PER_WORD);
742
743 /* If the remaining chunk doesn't have full wordsize we have
744 to make sure that for big endian machines the higher order
745 bits are used. */
746 if (new_bitsize < BITS_PER_WORD && BYTES_BIG_ENDIAN && !backwards)
747 value_word = simplify_expand_binop (word_mode, lshr_optab,
748 value_word,
749 GEN_INT (BITS_PER_WORD
750 - new_bitsize),
751 NULL_RTX, true,
752 OPTAB_LIB_WIDEN);
753
754 if (!store_bit_field_1 (op0, new_bitsize,
755 bitnum + bit_offset,
756 bitregion_start, bitregion_end,
757 word_mode,
758 value_word, fallback_p))
759 {
760 delete_insns_since (last);
761 return false;
762 }
763 }
764 return true;
765 }
766
767 /* If VALUE has a floating-point or complex mode, access it as an
768 integer of the corresponding size. This can occur on a machine
769 with 64 bit registers that uses SFmode for float. It can also
770 occur for unaligned float or complex fields. */
771 orig_value = value;
772 if (GET_MODE (value) != VOIDmode
773 && GET_MODE_CLASS (GET_MODE (value)) != MODE_INT
774 && GET_MODE_CLASS (GET_MODE (value)) != MODE_PARTIAL_INT)
775 {
776 value = gen_reg_rtx (int_mode_for_mode (GET_MODE (value)));
777 emit_move_insn (gen_lowpart (GET_MODE (orig_value), value), orig_value);
778 }
779
780 /* If OP0 is a multi-word register, narrow it to the affected word.
781 If the region spans two words, defer to store_split_bit_field. */
782 if (!MEM_P (op0) && GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
783 {
784 op0 = simplify_gen_subreg (word_mode, op0, GET_MODE (op0),
785 bitnum / BITS_PER_WORD * UNITS_PER_WORD);
786 gcc_assert (op0);
787 bitnum %= BITS_PER_WORD;
788 if (bitnum + bitsize > BITS_PER_WORD)
789 {
790 if (!fallback_p)
791 return false;
792
793 store_split_bit_field (op0, bitsize, bitnum, bitregion_start,
794 bitregion_end, value);
795 return true;
796 }
797 }
798
799 /* From here on we can assume that the field to be stored in fits
800 within a word. If the destination is a register, it too fits
801 in a word. */
802
803 extraction_insn insv;
804 if (!MEM_P (op0)
805 && get_best_reg_extraction_insn (&insv, EP_insv,
806 GET_MODE_BITSIZE (GET_MODE (op0)),
807 fieldmode)
808 && store_bit_field_using_insv (&insv, op0, bitsize, bitnum, value))
809 return true;
810
811 /* If OP0 is a memory, try copying it to a register and seeing if a
812 cheap register alternative is available. */
813 if (MEM_P (op0))
814 {
815 /* Do not use unaligned memory insvs for volatile bitfields when
816 -fstrict-volatile-bitfields is in effect. */
817 if (!(MEM_VOLATILE_P (op0)
818 && flag_strict_volatile_bitfields > 0)
819 && get_best_mem_extraction_insn (&insv, EP_insv, bitsize, bitnum,
820 fieldmode)
821 && store_bit_field_using_insv (&insv, op0, bitsize, bitnum, value))
822 return true;
823
824 rtx last = get_last_insn ();
825
826 /* Try loading part of OP0 into a register, inserting the bitfield
827 into that, and then copying the result back to OP0. */
828 unsigned HOST_WIDE_INT bitpos;
829 rtx xop0 = adjust_bit_field_mem_for_reg (EP_insv, op0, bitsize, bitnum,
830 bitregion_start, bitregion_end,
831 fieldmode, &bitpos);
832 if (xop0)
833 {
834 rtx tempreg = copy_to_reg (xop0);
835 if (store_bit_field_1 (tempreg, bitsize, bitpos,
836 bitregion_start, bitregion_end,
837 fieldmode, orig_value, false))
838 {
839 emit_move_insn (xop0, tempreg);
840 return true;
841 }
842 delete_insns_since (last);
843 }
844 }
845
846 if (!fallback_p)
847 return false;
848
849 store_fixed_bit_field (op0, bitsize, bitnum, bitregion_start,
850 bitregion_end, value);
851 return true;
852 }
853
854 /* Generate code to store value from rtx VALUE
855 into a bit-field within structure STR_RTX
856 containing BITSIZE bits starting at bit BITNUM.
857
858 BITREGION_START is bitpos of the first bitfield in this region.
859 BITREGION_END is the bitpos of the ending bitfield in this region.
860 These two fields are 0, if the C++ memory model does not apply,
861 or we are not interested in keeping track of bitfield regions.
862
863 FIELDMODE is the machine-mode of the FIELD_DECL node for this field. */
864
865 void
866 store_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
867 unsigned HOST_WIDE_INT bitnum,
868 unsigned HOST_WIDE_INT bitregion_start,
869 unsigned HOST_WIDE_INT bitregion_end,
870 enum machine_mode fieldmode,
871 rtx value)
872 {
873 /* Under the C++0x memory model, we must not touch bits outside the
874 bit region. Adjust the address to start at the beginning of the
875 bit region. */
876 if (MEM_P (str_rtx) && bitregion_start > 0)
877 {
878 enum machine_mode bestmode;
879 HOST_WIDE_INT offset, size;
880
881 gcc_assert ((bitregion_start % BITS_PER_UNIT) == 0);
882
883 offset = bitregion_start / BITS_PER_UNIT;
884 bitnum -= bitregion_start;
885 size = (bitnum + bitsize + BITS_PER_UNIT - 1) / BITS_PER_UNIT;
886 bitregion_end -= bitregion_start;
887 bitregion_start = 0;
888 bestmode = get_best_mode (bitsize, bitnum,
889 bitregion_start, bitregion_end,
890 MEM_ALIGN (str_rtx), VOIDmode,
891 MEM_VOLATILE_P (str_rtx));
892 str_rtx = adjust_bitfield_address_size (str_rtx, bestmode, offset, size);
893 }
894
895 if (!store_bit_field_1 (str_rtx, bitsize, bitnum,
896 bitregion_start, bitregion_end,
897 fieldmode, value, true))
898 gcc_unreachable ();
899 }
900 \f
901 /* Use shifts and boolean operations to store VALUE into a bit field of
902 width BITSIZE in OP0, starting at bit BITNUM. */
903
904 static void
905 store_fixed_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
906 unsigned HOST_WIDE_INT bitnum,
907 unsigned HOST_WIDE_INT bitregion_start,
908 unsigned HOST_WIDE_INT bitregion_end,
909 rtx value)
910 {
911 enum machine_mode mode;
912 rtx temp;
913 int all_zero = 0;
914 int all_one = 0;
915
916 /* There is a case not handled here:
917 a structure with a known alignment of just a halfword
918 and a field split across two aligned halfwords within the structure.
919 Or likewise a structure with a known alignment of just a byte
920 and a field split across two bytes.
921 Such cases are not supposed to be able to occur. */
922
923 if (MEM_P (op0))
924 {
925 unsigned HOST_WIDE_INT maxbits = MAX_FIXED_MODE_SIZE;
926
927 if (bitregion_end)
928 maxbits = bitregion_end - bitregion_start + 1;
929
930 /* Get the proper mode to use for this field. We want a mode that
931 includes the entire field. If such a mode would be larger than
932 a word, we won't be doing the extraction the normal way.
933 We don't want a mode bigger than the destination. */
934
935 mode = GET_MODE (op0);
936 if (GET_MODE_BITSIZE (mode) == 0
937 || GET_MODE_BITSIZE (mode) > GET_MODE_BITSIZE (word_mode))
938 mode = word_mode;
939
940 if (MEM_VOLATILE_P (op0)
941 && GET_MODE_BITSIZE (GET_MODE (op0)) > 0
942 && GET_MODE_BITSIZE (GET_MODE (op0)) <= maxbits
943 && flag_strict_volatile_bitfields > 0)
944 mode = GET_MODE (op0);
945 else
946 mode = get_best_mode (bitsize, bitnum, bitregion_start, bitregion_end,
947 MEM_ALIGN (op0), mode, MEM_VOLATILE_P (op0));
948
949 if (mode == VOIDmode)
950 {
951 /* The only way this should occur is if the field spans word
952 boundaries. */
953 store_split_bit_field (op0, bitsize, bitnum, bitregion_start,
954 bitregion_end, value);
955 return;
956 }
957
958 op0 = narrow_bit_field_mem (op0, mode, bitsize, bitnum, &bitnum);
959 }
960
961 mode = GET_MODE (op0);
962 gcc_assert (SCALAR_INT_MODE_P (mode));
963
964 /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode)
965 for invalid input, such as f5 from gcc.dg/pr48335-2.c. */
966
967 if (BYTES_BIG_ENDIAN)
968 /* BITNUM is the distance between our msb
969 and that of the containing datum.
970 Convert it to the distance from the lsb. */
971 bitnum = GET_MODE_BITSIZE (mode) - bitsize - bitnum;
972
973 /* Now BITNUM is always the distance between our lsb
974 and that of OP0. */
975
976 /* Shift VALUE left by BITNUM bits. If VALUE is not constant,
977 we must first convert its mode to MODE. */
978
979 if (CONST_INT_P (value))
980 {
981 HOST_WIDE_INT v = INTVAL (value);
982
983 if (bitsize < HOST_BITS_PER_WIDE_INT)
984 v &= ((HOST_WIDE_INT) 1 << bitsize) - 1;
985
986 if (v == 0)
987 all_zero = 1;
988 else if ((bitsize < HOST_BITS_PER_WIDE_INT
989 && v == ((HOST_WIDE_INT) 1 << bitsize) - 1)
990 || (bitsize == HOST_BITS_PER_WIDE_INT && v == -1))
991 all_one = 1;
992
993 value = lshift_value (mode, value, bitnum, bitsize);
994 }
995 else
996 {
997 int must_and = (GET_MODE_BITSIZE (GET_MODE (value)) != bitsize
998 && bitnum + bitsize != GET_MODE_BITSIZE (mode));
999
1000 if (GET_MODE (value) != mode)
1001 value = convert_to_mode (mode, value, 1);
1002
1003 if (must_and)
1004 value = expand_binop (mode, and_optab, value,
1005 mask_rtx (mode, 0, bitsize, 0),
1006 NULL_RTX, 1, OPTAB_LIB_WIDEN);
1007 if (bitnum > 0)
1008 value = expand_shift (LSHIFT_EXPR, mode, value,
1009 bitnum, NULL_RTX, 1);
1010 }
1011
1012 /* Now clear the chosen bits in OP0,
1013 except that if VALUE is -1 we need not bother. */
1014 /* We keep the intermediates in registers to allow CSE to combine
1015 consecutive bitfield assignments. */
1016
1017 temp = force_reg (mode, op0);
1018
1019 if (! all_one)
1020 {
1021 temp = expand_binop (mode, and_optab, temp,
1022 mask_rtx (mode, bitnum, bitsize, 1),
1023 NULL_RTX, 1, OPTAB_LIB_WIDEN);
1024 temp = force_reg (mode, temp);
1025 }
1026
1027 /* Now logical-or VALUE into OP0, unless it is zero. */
1028
1029 if (! all_zero)
1030 {
1031 temp = expand_binop (mode, ior_optab, temp, value,
1032 NULL_RTX, 1, OPTAB_LIB_WIDEN);
1033 temp = force_reg (mode, temp);
1034 }
1035
1036 if (op0 != temp)
1037 {
1038 op0 = copy_rtx (op0);
1039 emit_move_insn (op0, temp);
1040 }
1041 }
1042 \f
1043 /* Store a bit field that is split across multiple accessible memory objects.
1044
1045 OP0 is the REG, SUBREG or MEM rtx for the first of the objects.
1046 BITSIZE is the field width; BITPOS the position of its first bit
1047 (within the word).
1048 VALUE is the value to store.
1049
1050 This does not yet handle fields wider than BITS_PER_WORD. */
1051
1052 static void
1053 store_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1054 unsigned HOST_WIDE_INT bitpos,
1055 unsigned HOST_WIDE_INT bitregion_start,
1056 unsigned HOST_WIDE_INT bitregion_end,
1057 rtx value)
1058 {
1059 unsigned int unit;
1060 unsigned int bitsdone = 0;
1061
1062 /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1063 much at a time. */
1064 if (REG_P (op0) || GET_CODE (op0) == SUBREG)
1065 unit = BITS_PER_WORD;
1066 else
1067 unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1068
1069 /* If VALUE is a constant other than a CONST_INT, get it into a register in
1070 WORD_MODE. If we can do this using gen_lowpart_common, do so. Note
1071 that VALUE might be a floating-point constant. */
1072 if (CONSTANT_P (value) && !CONST_INT_P (value))
1073 {
1074 rtx word = gen_lowpart_common (word_mode, value);
1075
1076 if (word && (value != word))
1077 value = word;
1078 else
1079 value = gen_lowpart_common (word_mode,
1080 force_reg (GET_MODE (value) != VOIDmode
1081 ? GET_MODE (value)
1082 : word_mode, value));
1083 }
1084
1085 while (bitsdone < bitsize)
1086 {
1087 unsigned HOST_WIDE_INT thissize;
1088 rtx part, word;
1089 unsigned HOST_WIDE_INT thispos;
1090 unsigned HOST_WIDE_INT offset;
1091
1092 offset = (bitpos + bitsdone) / unit;
1093 thispos = (bitpos + bitsdone) % unit;
1094
1095 /* When region of bytes we can touch is restricted, decrease
1096 UNIT close to the end of the region as needed. */
1097 if (bitregion_end
1098 && unit > BITS_PER_UNIT
1099 && bitpos + bitsdone - thispos + unit > bitregion_end + 1)
1100 {
1101 unit = unit / 2;
1102 continue;
1103 }
1104
1105 /* THISSIZE must not overrun a word boundary. Otherwise,
1106 store_fixed_bit_field will call us again, and we will mutually
1107 recurse forever. */
1108 thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1109 thissize = MIN (thissize, unit - thispos);
1110
1111 if (BYTES_BIG_ENDIAN)
1112 {
1113 /* Fetch successively less significant portions. */
1114 if (CONST_INT_P (value))
1115 part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1116 >> (bitsize - bitsdone - thissize))
1117 & (((HOST_WIDE_INT) 1 << thissize) - 1));
1118 else
1119 {
1120 int total_bits = GET_MODE_BITSIZE (GET_MODE (value));
1121 /* The args are chosen so that the last part includes the
1122 lsb. Give extract_bit_field the value it needs (with
1123 endianness compensation) to fetch the piece we want. */
1124 part = extract_fixed_bit_field (word_mode, value, thissize,
1125 total_bits - bitsize + bitsdone,
1126 NULL_RTX, 1, false);
1127 }
1128 }
1129 else
1130 {
1131 /* Fetch successively more significant portions. */
1132 if (CONST_INT_P (value))
1133 part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1134 >> bitsdone)
1135 & (((HOST_WIDE_INT) 1 << thissize) - 1));
1136 else
1137 part = extract_fixed_bit_field (word_mode, value, thissize,
1138 bitsdone, NULL_RTX, 1, false);
1139 }
1140
1141 /* If OP0 is a register, then handle OFFSET here.
1142
1143 When handling multiword bitfields, extract_bit_field may pass
1144 down a word_mode SUBREG of a larger REG for a bitfield that actually
1145 crosses a word boundary. Thus, for a SUBREG, we must find
1146 the current word starting from the base register. */
1147 if (GET_CODE (op0) == SUBREG)
1148 {
1149 int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset;
1150 enum machine_mode sub_mode = GET_MODE (SUBREG_REG (op0));
1151 if (sub_mode != BLKmode && GET_MODE_SIZE (sub_mode) < UNITS_PER_WORD)
1152 word = word_offset ? const0_rtx : op0;
1153 else
1154 word = operand_subword_force (SUBREG_REG (op0), word_offset,
1155 GET_MODE (SUBREG_REG (op0)));
1156 offset = 0;
1157 }
1158 else if (REG_P (op0))
1159 {
1160 enum machine_mode op0_mode = GET_MODE (op0);
1161 if (op0_mode != BLKmode && GET_MODE_SIZE (op0_mode) < UNITS_PER_WORD)
1162 word = offset ? const0_rtx : op0;
1163 else
1164 word = operand_subword_force (op0, offset, GET_MODE (op0));
1165 offset = 0;
1166 }
1167 else
1168 word = op0;
1169
1170 /* OFFSET is in UNITs, and UNIT is in bits. If WORD is const0_rtx,
1171 it is just an out-of-bounds access. Ignore it. */
1172 if (word != const0_rtx)
1173 store_fixed_bit_field (word, thissize, offset * unit + thispos,
1174 bitregion_start, bitregion_end, part);
1175 bitsdone += thissize;
1176 }
1177 }
1178 \f
1179 /* A subroutine of extract_bit_field_1 that converts return value X
1180 to either MODE or TMODE. MODE, TMODE and UNSIGNEDP are arguments
1181 to extract_bit_field. */
1182
1183 static rtx
1184 convert_extracted_bit_field (rtx x, enum machine_mode mode,
1185 enum machine_mode tmode, bool unsignedp)
1186 {
1187 if (GET_MODE (x) == tmode || GET_MODE (x) == mode)
1188 return x;
1189
1190 /* If the x mode is not a scalar integral, first convert to the
1191 integer mode of that size and then access it as a floating-point
1192 value via a SUBREG. */
1193 if (!SCALAR_INT_MODE_P (tmode))
1194 {
1195 enum machine_mode smode;
1196
1197 smode = mode_for_size (GET_MODE_BITSIZE (tmode), MODE_INT, 0);
1198 x = convert_to_mode (smode, x, unsignedp);
1199 x = force_reg (smode, x);
1200 return gen_lowpart (tmode, x);
1201 }
1202
1203 return convert_to_mode (tmode, x, unsignedp);
1204 }
1205
1206 /* Try to use an ext(z)v pattern to extract a field from OP0.
1207 Return the extracted value on success, otherwise return null.
1208 EXT_MODE is the mode of the extraction and the other arguments
1209 are as for extract_bit_field. */
1210
1211 static rtx
1212 extract_bit_field_using_extv (const extraction_insn *extv, rtx op0,
1213 unsigned HOST_WIDE_INT bitsize,
1214 unsigned HOST_WIDE_INT bitnum,
1215 int unsignedp, rtx target,
1216 enum machine_mode mode, enum machine_mode tmode)
1217 {
1218 struct expand_operand ops[4];
1219 rtx spec_target = target;
1220 rtx spec_target_subreg = 0;
1221 enum machine_mode ext_mode = extv->field_mode;
1222 unsigned unit = GET_MODE_BITSIZE (ext_mode);
1223
1224 if (bitsize == 0 || unit < bitsize)
1225 return NULL_RTX;
1226
1227 if (MEM_P (op0))
1228 /* Get a reference to the first byte of the field. */
1229 op0 = narrow_bit_field_mem (op0, extv->struct_mode, bitsize, bitnum,
1230 &bitnum);
1231 else
1232 {
1233 /* Convert from counting within OP0 to counting in EXT_MODE. */
1234 if (BYTES_BIG_ENDIAN)
1235 bitnum += unit - GET_MODE_BITSIZE (GET_MODE (op0));
1236
1237 /* If op0 is a register, we need it in EXT_MODE to make it
1238 acceptable to the format of ext(z)v. */
1239 if (GET_CODE (op0) == SUBREG && GET_MODE (op0) != ext_mode)
1240 return NULL_RTX;
1241 if (REG_P (op0) && GET_MODE (op0) != ext_mode)
1242 op0 = gen_lowpart_SUBREG (ext_mode, op0);
1243 }
1244
1245 /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
1246 "backwards" from the size of the unit we are extracting from.
1247 Otherwise, we count bits from the most significant on a
1248 BYTES/BITS_BIG_ENDIAN machine. */
1249
1250 if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1251 bitnum = unit - bitsize - bitnum;
1252
1253 if (target == 0)
1254 target = spec_target = gen_reg_rtx (tmode);
1255
1256 if (GET_MODE (target) != ext_mode)
1257 {
1258 /* Don't use LHS paradoxical subreg if explicit truncation is needed
1259 between the mode of the extraction (word_mode) and the target
1260 mode. Instead, create a temporary and use convert_move to set
1261 the target. */
1262 if (REG_P (target)
1263 && TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (target), ext_mode))
1264 {
1265 target = gen_lowpart (ext_mode, target);
1266 if (GET_MODE_PRECISION (ext_mode)
1267 > GET_MODE_PRECISION (GET_MODE (spec_target)))
1268 spec_target_subreg = target;
1269 }
1270 else
1271 target = gen_reg_rtx (ext_mode);
1272 }
1273
1274 create_output_operand (&ops[0], target, ext_mode);
1275 create_fixed_operand (&ops[1], op0);
1276 create_integer_operand (&ops[2], bitsize);
1277 create_integer_operand (&ops[3], bitnum);
1278 if (maybe_expand_insn (extv->icode, 4, ops))
1279 {
1280 target = ops[0].value;
1281 if (target == spec_target)
1282 return target;
1283 if (target == spec_target_subreg)
1284 return spec_target;
1285 return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1286 }
1287 return NULL_RTX;
1288 }
1289
1290 /* A subroutine of extract_bit_field, with the same arguments.
1291 If FALLBACK_P is true, fall back to extract_fixed_bit_field
1292 if we can find no other means of implementing the operation.
1293 if FALLBACK_P is false, return NULL instead. */
1294
1295 static rtx
1296 extract_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1297 unsigned HOST_WIDE_INT bitnum,
1298 int unsignedp, bool packedp, rtx target,
1299 enum machine_mode mode, enum machine_mode tmode,
1300 bool fallback_p)
1301 {
1302 rtx op0 = str_rtx;
1303 enum machine_mode int_mode;
1304 enum machine_mode mode1;
1305
1306 if (tmode == VOIDmode)
1307 tmode = mode;
1308
1309 while (GET_CODE (op0) == SUBREG)
1310 {
1311 bitnum += SUBREG_BYTE (op0) * BITS_PER_UNIT;
1312 op0 = SUBREG_REG (op0);
1313 }
1314
1315 /* If we have an out-of-bounds access to a register, just return an
1316 uninitialized register of the required mode. This can occur if the
1317 source code contains an out-of-bounds access to a small array. */
1318 if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
1319 return gen_reg_rtx (tmode);
1320
1321 if (REG_P (op0)
1322 && mode == GET_MODE (op0)
1323 && bitnum == 0
1324 && bitsize == GET_MODE_BITSIZE (GET_MODE (op0)))
1325 {
1326 /* We're trying to extract a full register from itself. */
1327 return op0;
1328 }
1329
1330 /* See if we can get a better vector mode before extracting. */
1331 if (VECTOR_MODE_P (GET_MODE (op0))
1332 && !MEM_P (op0)
1333 && GET_MODE_INNER (GET_MODE (op0)) != tmode)
1334 {
1335 enum machine_mode new_mode;
1336
1337 if (GET_MODE_CLASS (tmode) == MODE_FLOAT)
1338 new_mode = MIN_MODE_VECTOR_FLOAT;
1339 else if (GET_MODE_CLASS (tmode) == MODE_FRACT)
1340 new_mode = MIN_MODE_VECTOR_FRACT;
1341 else if (GET_MODE_CLASS (tmode) == MODE_UFRACT)
1342 new_mode = MIN_MODE_VECTOR_UFRACT;
1343 else if (GET_MODE_CLASS (tmode) == MODE_ACCUM)
1344 new_mode = MIN_MODE_VECTOR_ACCUM;
1345 else if (GET_MODE_CLASS (tmode) == MODE_UACCUM)
1346 new_mode = MIN_MODE_VECTOR_UACCUM;
1347 else
1348 new_mode = MIN_MODE_VECTOR_INT;
1349
1350 for (; new_mode != VOIDmode ; new_mode = GET_MODE_WIDER_MODE (new_mode))
1351 if (GET_MODE_SIZE (new_mode) == GET_MODE_SIZE (GET_MODE (op0))
1352 && targetm.vector_mode_supported_p (new_mode))
1353 break;
1354 if (new_mode != VOIDmode)
1355 op0 = gen_lowpart (new_mode, op0);
1356 }
1357
1358 /* Use vec_extract patterns for extracting parts of vectors whenever
1359 available. */
1360 if (VECTOR_MODE_P (GET_MODE (op0))
1361 && !MEM_P (op0)
1362 && optab_handler (vec_extract_optab, GET_MODE (op0)) != CODE_FOR_nothing
1363 && ((bitnum + bitsize - 1) / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
1364 == bitnum / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
1365 {
1366 struct expand_operand ops[3];
1367 enum machine_mode outermode = GET_MODE (op0);
1368 enum machine_mode innermode = GET_MODE_INNER (outermode);
1369 enum insn_code icode = optab_handler (vec_extract_optab, outermode);
1370 unsigned HOST_WIDE_INT pos = bitnum / GET_MODE_BITSIZE (innermode);
1371
1372 create_output_operand (&ops[0], target, innermode);
1373 create_input_operand (&ops[1], op0, outermode);
1374 create_integer_operand (&ops[2], pos);
1375 if (maybe_expand_insn (icode, 3, ops))
1376 {
1377 target = ops[0].value;
1378 if (GET_MODE (target) != mode)
1379 return gen_lowpart (tmode, target);
1380 return target;
1381 }
1382 }
1383
1384 /* Make sure we are playing with integral modes. Pun with subregs
1385 if we aren't. */
1386 {
1387 enum machine_mode imode = int_mode_for_mode (GET_MODE (op0));
1388 if (imode != GET_MODE (op0))
1389 {
1390 if (MEM_P (op0))
1391 op0 = adjust_bitfield_address_size (op0, imode, 0, MEM_SIZE (op0));
1392 else if (imode != BLKmode)
1393 {
1394 op0 = gen_lowpart (imode, op0);
1395
1396 /* If we got a SUBREG, force it into a register since we
1397 aren't going to be able to do another SUBREG on it. */
1398 if (GET_CODE (op0) == SUBREG)
1399 op0 = force_reg (imode, op0);
1400 }
1401 else if (REG_P (op0))
1402 {
1403 rtx reg, subreg;
1404 imode = smallest_mode_for_size (GET_MODE_BITSIZE (GET_MODE (op0)),
1405 MODE_INT);
1406 reg = gen_reg_rtx (imode);
1407 subreg = gen_lowpart_SUBREG (GET_MODE (op0), reg);
1408 emit_move_insn (subreg, op0);
1409 op0 = reg;
1410 bitnum += SUBREG_BYTE (subreg) * BITS_PER_UNIT;
1411 }
1412 else
1413 {
1414 HOST_WIDE_INT size = GET_MODE_SIZE (GET_MODE (op0));
1415 rtx mem = assign_stack_temp (GET_MODE (op0), size);
1416 emit_move_insn (mem, op0);
1417 op0 = adjust_bitfield_address_size (mem, BLKmode, 0, size);
1418 }
1419 }
1420 }
1421
1422 /* ??? We currently assume TARGET is at least as big as BITSIZE.
1423 If that's wrong, the solution is to test for it and set TARGET to 0
1424 if needed. */
1425
1426 /* If the bitfield is volatile, we need to make sure the access
1427 remains on a type-aligned boundary. */
1428 if (GET_CODE (op0) == MEM
1429 && MEM_VOLATILE_P (op0)
1430 && GET_MODE_BITSIZE (GET_MODE (op0)) > 0
1431 && flag_strict_volatile_bitfields > 0)
1432 goto no_subreg_mode_swap;
1433
1434 /* Only scalar integer modes can be converted via subregs. There is an
1435 additional problem for FP modes here in that they can have a precision
1436 which is different from the size. mode_for_size uses precision, but
1437 we want a mode based on the size, so we must avoid calling it for FP
1438 modes. */
1439 mode1 = mode;
1440 if (SCALAR_INT_MODE_P (tmode))
1441 {
1442 enum machine_mode try_mode = mode_for_size (bitsize,
1443 GET_MODE_CLASS (tmode), 0);
1444 if (try_mode != BLKmode)
1445 mode1 = try_mode;
1446 }
1447 gcc_assert (mode1 != BLKmode);
1448
1449 /* Extraction of a full MODE1 value can be done with a subreg as long
1450 as the least significant bit of the value is the least significant
1451 bit of either OP0 or a word of OP0. */
1452 if (!MEM_P (op0)
1453 && lowpart_bit_field_p (bitnum, bitsize, GET_MODE (op0))
1454 && bitsize == GET_MODE_BITSIZE (mode1)
1455 && TRULY_NOOP_TRUNCATION_MODES_P (mode1, GET_MODE (op0)))
1456 {
1457 rtx sub = simplify_gen_subreg (mode1, op0, GET_MODE (op0),
1458 bitnum / BITS_PER_UNIT);
1459 if (sub)
1460 return convert_extracted_bit_field (sub, mode, tmode, unsignedp);
1461 }
1462
1463 /* Extraction of a full MODE1 value can be done with a load as long as
1464 the field is on a byte boundary and is sufficiently aligned. */
1465 if (simple_mem_bitfield_p (op0, bitsize, bitnum, mode1))
1466 {
1467 op0 = adjust_bitfield_address (op0, mode1, bitnum / BITS_PER_UNIT);
1468 return convert_extracted_bit_field (op0, mode, tmode, unsignedp);
1469 }
1470
1471 no_subreg_mode_swap:
1472
1473 /* Handle fields bigger than a word. */
1474
1475 if (bitsize > BITS_PER_WORD)
1476 {
1477 /* Here we transfer the words of the field
1478 in the order least significant first.
1479 This is because the most significant word is the one which may
1480 be less than full. */
1481
1482 unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
1483 unsigned int i;
1484 rtx last;
1485
1486 if (target == 0 || !REG_P (target) || !valid_multiword_target_p (target))
1487 target = gen_reg_rtx (mode);
1488
1489 /* Indicate for flow that the entire target reg is being set. */
1490 emit_clobber (target);
1491
1492 last = get_last_insn ();
1493 for (i = 0; i < nwords; i++)
1494 {
1495 /* If I is 0, use the low-order word in both field and target;
1496 if I is 1, use the next to lowest word; and so on. */
1497 /* Word number in TARGET to use. */
1498 unsigned int wordnum
1499 = (WORDS_BIG_ENDIAN
1500 ? GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD - i - 1
1501 : i);
1502 /* Offset from start of field in OP0. */
1503 unsigned int bit_offset = (WORDS_BIG_ENDIAN
1504 ? MAX (0, ((int) bitsize - ((int) i + 1)
1505 * (int) BITS_PER_WORD))
1506 : (int) i * BITS_PER_WORD);
1507 rtx target_part = operand_subword (target, wordnum, 1, VOIDmode);
1508 rtx result_part
1509 = extract_bit_field_1 (op0, MIN (BITS_PER_WORD,
1510 bitsize - i * BITS_PER_WORD),
1511 bitnum + bit_offset, 1, false, target_part,
1512 mode, word_mode, fallback_p);
1513
1514 gcc_assert (target_part);
1515 if (!result_part)
1516 {
1517 delete_insns_since (last);
1518 return NULL;
1519 }
1520
1521 if (result_part != target_part)
1522 emit_move_insn (target_part, result_part);
1523 }
1524
1525 if (unsignedp)
1526 {
1527 /* Unless we've filled TARGET, the upper regs in a multi-reg value
1528 need to be zero'd out. */
1529 if (GET_MODE_SIZE (GET_MODE (target)) > nwords * UNITS_PER_WORD)
1530 {
1531 unsigned int i, total_words;
1532
1533 total_words = GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD;
1534 for (i = nwords; i < total_words; i++)
1535 emit_move_insn
1536 (operand_subword (target,
1537 WORDS_BIG_ENDIAN ? total_words - i - 1 : i,
1538 1, VOIDmode),
1539 const0_rtx);
1540 }
1541 return target;
1542 }
1543
1544 /* Signed bit field: sign-extend with two arithmetic shifts. */
1545 target = expand_shift (LSHIFT_EXPR, mode, target,
1546 GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1547 return expand_shift (RSHIFT_EXPR, mode, target,
1548 GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1549 }
1550
1551 /* If OP0 is a multi-word register, narrow it to the affected word.
1552 If the region spans two words, defer to extract_split_bit_field. */
1553 if (!MEM_P (op0) && GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
1554 {
1555 op0 = simplify_gen_subreg (word_mode, op0, GET_MODE (op0),
1556 bitnum / BITS_PER_WORD * UNITS_PER_WORD);
1557 bitnum %= BITS_PER_WORD;
1558 if (bitnum + bitsize > BITS_PER_WORD)
1559 {
1560 if (!fallback_p)
1561 return NULL_RTX;
1562 target = extract_split_bit_field (op0, bitsize, bitnum, unsignedp);
1563 return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1564 }
1565 }
1566
1567 /* From here on we know the desired field is smaller than a word.
1568 If OP0 is a register, it too fits within a word. */
1569 enum extraction_pattern pattern = unsignedp ? EP_extzv : EP_extv;
1570 extraction_insn extv;
1571 if (!MEM_P (op0)
1572 /* ??? We could limit the structure size to the part of OP0 that
1573 contains the field, with appropriate checks for endianness
1574 and TRULY_NOOP_TRUNCATION. */
1575 && get_best_reg_extraction_insn (&extv, pattern,
1576 GET_MODE_BITSIZE (GET_MODE (op0)),
1577 tmode))
1578 {
1579 rtx result = extract_bit_field_using_extv (&extv, op0, bitsize, bitnum,
1580 unsignedp, target, mode,
1581 tmode);
1582 if (result)
1583 return result;
1584 }
1585
1586 /* If OP0 is a memory, try copying it to a register and seeing if a
1587 cheap register alternative is available. */
1588 if (MEM_P (op0))
1589 {
1590 /* Do not use extv/extzv for volatile bitfields when
1591 -fstrict-volatile-bitfields is in effect. */
1592 if (!(MEM_VOLATILE_P (op0) && flag_strict_volatile_bitfields > 0)
1593 && get_best_mem_extraction_insn (&extv, pattern, bitsize, bitnum,
1594 tmode))
1595 {
1596 rtx result = extract_bit_field_using_extv (&extv, op0, bitsize,
1597 bitnum, unsignedp,
1598 target, mode,
1599 tmode);
1600 if (result)
1601 return result;
1602 }
1603
1604 rtx last = get_last_insn ();
1605
1606 /* Try loading part of OP0 into a register and extracting the
1607 bitfield from that. */
1608 unsigned HOST_WIDE_INT bitpos;
1609 rtx xop0 = adjust_bit_field_mem_for_reg (pattern, op0, bitsize, bitnum,
1610 0, 0, tmode, &bitpos);
1611 if (xop0)
1612 {
1613 xop0 = copy_to_reg (xop0);
1614 rtx result = extract_bit_field_1 (xop0, bitsize, bitpos,
1615 unsignedp, packedp, target,
1616 mode, tmode, false);
1617 if (result)
1618 return result;
1619 delete_insns_since (last);
1620 }
1621 }
1622
1623 if (!fallback_p)
1624 return NULL;
1625
1626 /* Find a correspondingly-sized integer field, so we can apply
1627 shifts and masks to it. */
1628 int_mode = int_mode_for_mode (tmode);
1629 if (int_mode == BLKmode)
1630 int_mode = int_mode_for_mode (mode);
1631 /* Should probably push op0 out to memory and then do a load. */
1632 gcc_assert (int_mode != BLKmode);
1633
1634 target = extract_fixed_bit_field (int_mode, op0, bitsize, bitnum,
1635 target, unsignedp, packedp);
1636 return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1637 }
1638
1639 /* Generate code to extract a byte-field from STR_RTX
1640 containing BITSIZE bits, starting at BITNUM,
1641 and put it in TARGET if possible (if TARGET is nonzero).
1642 Regardless of TARGET, we return the rtx for where the value is placed.
1643
1644 STR_RTX is the structure containing the byte (a REG or MEM).
1645 UNSIGNEDP is nonzero if this is an unsigned bit field.
1646 PACKEDP is nonzero if the field has the packed attribute.
1647 MODE is the natural mode of the field value once extracted.
1648 TMODE is the mode the caller would like the value to have;
1649 but the value may be returned with type MODE instead.
1650
1651 If a TARGET is specified and we can store in it at no extra cost,
1652 we do so, and return TARGET.
1653 Otherwise, we return a REG of mode TMODE or MODE, with TMODE preferred
1654 if they are equally easy. */
1655
1656 rtx
1657 extract_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1658 unsigned HOST_WIDE_INT bitnum, int unsignedp, bool packedp,
1659 rtx target, enum machine_mode mode, enum machine_mode tmode)
1660 {
1661 return extract_bit_field_1 (str_rtx, bitsize, bitnum, unsignedp, packedp,
1662 target, mode, tmode, true);
1663 }
1664 \f
1665 /* Use shifts and boolean operations to extract a field of BITSIZE bits
1666 from bit BITNUM of OP0.
1667
1668 UNSIGNEDP is nonzero for an unsigned bit field (don't sign-extend value).
1669 PACKEDP is true if the field has the packed attribute.
1670
1671 If TARGET is nonzero, attempts to store the value there
1672 and return TARGET, but this is not guaranteed.
1673 If TARGET is not used, create a pseudo-reg of mode TMODE for the value. */
1674
1675 static rtx
1676 extract_fixed_bit_field (enum machine_mode tmode, rtx op0,
1677 unsigned HOST_WIDE_INT bitsize,
1678 unsigned HOST_WIDE_INT bitnum, rtx target,
1679 int unsignedp, bool packedp)
1680 {
1681 enum machine_mode mode;
1682
1683 if (MEM_P (op0))
1684 {
1685 /* Get the proper mode to use for this field. We want a mode that
1686 includes the entire field. If such a mode would be larger than
1687 a word, we won't be doing the extraction the normal way. */
1688
1689 if (MEM_VOLATILE_P (op0)
1690 && flag_strict_volatile_bitfields > 0)
1691 {
1692 if (GET_MODE_BITSIZE (GET_MODE (op0)) > 0)
1693 mode = GET_MODE (op0);
1694 else if (target && GET_MODE_BITSIZE (GET_MODE (target)) > 0)
1695 mode = GET_MODE (target);
1696 else
1697 mode = tmode;
1698 }
1699 else
1700 mode = get_best_mode (bitsize, bitnum, 0, 0,
1701 MEM_ALIGN (op0), word_mode, MEM_VOLATILE_P (op0));
1702
1703 if (mode == VOIDmode)
1704 /* The only way this should occur is if the field spans word
1705 boundaries. */
1706 return extract_split_bit_field (op0, bitsize, bitnum, unsignedp);
1707
1708 unsigned int total_bits = GET_MODE_BITSIZE (mode);
1709 HOST_WIDE_INT bit_offset = bitnum - bitnum % total_bits;
1710
1711 /* If we're accessing a volatile MEM, we can't apply BIT_OFFSET
1712 if it results in a multi-word access where we otherwise wouldn't
1713 have one. So, check for that case here. */
1714 if (MEM_P (op0)
1715 && MEM_VOLATILE_P (op0)
1716 && flag_strict_volatile_bitfields > 0
1717 && bitnum % BITS_PER_UNIT + bitsize <= total_bits
1718 && bitnum % GET_MODE_BITSIZE (mode) + bitsize > total_bits)
1719 {
1720 if (STRICT_ALIGNMENT)
1721 {
1722 static bool informed_about_misalignment = false;
1723
1724 if (packedp)
1725 {
1726 if (bitsize == total_bits)
1727 warning_at (input_location, OPT_fstrict_volatile_bitfields,
1728 "multiple accesses to volatile structure"
1729 " member because of packed attribute");
1730 else
1731 warning_at (input_location, OPT_fstrict_volatile_bitfields,
1732 "multiple accesses to volatile structure"
1733 " bitfield because of packed attribute");
1734
1735 return extract_split_bit_field (op0, bitsize, bitnum,
1736 unsignedp);
1737 }
1738
1739 if (bitsize == total_bits)
1740 warning_at (input_location, OPT_fstrict_volatile_bitfields,
1741 "mis-aligned access used for structure member");
1742 else
1743 warning_at (input_location, OPT_fstrict_volatile_bitfields,
1744 "mis-aligned access used for structure bitfield");
1745
1746 if (! informed_about_misalignment)
1747 {
1748 informed_about_misalignment = true;
1749 inform (input_location,
1750 "when a volatile object spans multiple type-sized"
1751 " locations, the compiler must choose between using"
1752 " a single mis-aligned access to preserve the"
1753 " volatility, or using multiple aligned accesses"
1754 " to avoid runtime faults; this code may fail at"
1755 " runtime if the hardware does not allow this"
1756 " access");
1757 }
1758 }
1759 bit_offset = bitnum - bitnum % BITS_PER_UNIT;
1760 }
1761 op0 = adjust_bitfield_address (op0, mode, bit_offset / BITS_PER_UNIT);
1762 bitnum -= bit_offset;
1763 }
1764
1765 mode = GET_MODE (op0);
1766 gcc_assert (SCALAR_INT_MODE_P (mode));
1767
1768 /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode)
1769 for invalid input, such as extract equivalent of f5 from
1770 gcc.dg/pr48335-2.c. */
1771
1772 if (BYTES_BIG_ENDIAN)
1773 /* BITNUM is the distance between our msb and that of OP0.
1774 Convert it to the distance from the lsb. */
1775 bitnum = GET_MODE_BITSIZE (mode) - bitsize - bitnum;
1776
1777 /* Now BITNUM is always the distance between the field's lsb and that of OP0.
1778 We have reduced the big-endian case to the little-endian case. */
1779
1780 if (unsignedp)
1781 {
1782 if (bitnum)
1783 {
1784 /* If the field does not already start at the lsb,
1785 shift it so it does. */
1786 /* Maybe propagate the target for the shift. */
1787 rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
1788 if (tmode != mode)
1789 subtarget = 0;
1790 op0 = expand_shift (RSHIFT_EXPR, mode, op0, bitnum, subtarget, 1);
1791 }
1792 /* Convert the value to the desired mode. */
1793 if (mode != tmode)
1794 op0 = convert_to_mode (tmode, op0, 1);
1795
1796 /* Unless the msb of the field used to be the msb when we shifted,
1797 mask out the upper bits. */
1798
1799 if (GET_MODE_BITSIZE (mode) != bitnum + bitsize)
1800 return expand_binop (GET_MODE (op0), and_optab, op0,
1801 mask_rtx (GET_MODE (op0), 0, bitsize, 0),
1802 target, 1, OPTAB_LIB_WIDEN);
1803 return op0;
1804 }
1805
1806 /* To extract a signed bit-field, first shift its msb to the msb of the word,
1807 then arithmetic-shift its lsb to the lsb of the word. */
1808 op0 = force_reg (mode, op0);
1809
1810 /* Find the narrowest integer mode that contains the field. */
1811
1812 for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT); mode != VOIDmode;
1813 mode = GET_MODE_WIDER_MODE (mode))
1814 if (GET_MODE_BITSIZE (mode) >= bitsize + bitnum)
1815 {
1816 op0 = convert_to_mode (mode, op0, 0);
1817 break;
1818 }
1819
1820 if (mode != tmode)
1821 target = 0;
1822
1823 if (GET_MODE_BITSIZE (mode) != (bitsize + bitnum))
1824 {
1825 int amount = GET_MODE_BITSIZE (mode) - (bitsize + bitnum);
1826 /* Maybe propagate the target for the shift. */
1827 rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
1828 op0 = expand_shift (LSHIFT_EXPR, mode, op0, amount, subtarget, 1);
1829 }
1830
1831 return expand_shift (RSHIFT_EXPR, mode, op0,
1832 GET_MODE_BITSIZE (mode) - bitsize, target, 0);
1833 }
1834 \f
1835 /* Return a constant integer (CONST_INT or CONST_DOUBLE) mask value
1836 of mode MODE with BITSIZE ones followed by BITPOS zeros, or the
1837 complement of that if COMPLEMENT. The mask is truncated if
1838 necessary to the width of mode MODE. The mask is zero-extended if
1839 BITSIZE+BITPOS is too small for MODE. */
1840
1841 static rtx
1842 mask_rtx (enum machine_mode mode, int bitpos, int bitsize, int complement)
1843 {
1844 double_int mask;
1845
1846 mask = double_int::mask (bitsize);
1847 mask = mask.llshift (bitpos, HOST_BITS_PER_DOUBLE_INT);
1848
1849 if (complement)
1850 mask = ~mask;
1851
1852 return immed_double_int_const (mask, mode);
1853 }
1854
1855 /* Return a constant integer (CONST_INT or CONST_DOUBLE) rtx with the value
1856 VALUE truncated to BITSIZE bits and then shifted left BITPOS bits. */
1857
1858 static rtx
1859 lshift_value (enum machine_mode mode, rtx value, int bitpos, int bitsize)
1860 {
1861 double_int val;
1862
1863 val = double_int::from_uhwi (INTVAL (value)).zext (bitsize);
1864 val = val.llshift (bitpos, HOST_BITS_PER_DOUBLE_INT);
1865
1866 return immed_double_int_const (val, mode);
1867 }
1868 \f
1869 /* Extract a bit field that is split across two words
1870 and return an RTX for the result.
1871
1872 OP0 is the REG, SUBREG or MEM rtx for the first of the two words.
1873 BITSIZE is the field width; BITPOS, position of its first bit, in the word.
1874 UNSIGNEDP is 1 if should zero-extend the contents; else sign-extend. */
1875
1876 static rtx
1877 extract_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1878 unsigned HOST_WIDE_INT bitpos, int unsignedp)
1879 {
1880 unsigned int unit;
1881 unsigned int bitsdone = 0;
1882 rtx result = NULL_RTX;
1883 int first = 1;
1884
1885 /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1886 much at a time. */
1887 if (REG_P (op0) || GET_CODE (op0) == SUBREG)
1888 unit = BITS_PER_WORD;
1889 else
1890 unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1891
1892 while (bitsdone < bitsize)
1893 {
1894 unsigned HOST_WIDE_INT thissize;
1895 rtx part, word;
1896 unsigned HOST_WIDE_INT thispos;
1897 unsigned HOST_WIDE_INT offset;
1898
1899 offset = (bitpos + bitsdone) / unit;
1900 thispos = (bitpos + bitsdone) % unit;
1901
1902 /* THISSIZE must not overrun a word boundary. Otherwise,
1903 extract_fixed_bit_field will call us again, and we will mutually
1904 recurse forever. */
1905 thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1906 thissize = MIN (thissize, unit - thispos);
1907
1908 /* If OP0 is a register, then handle OFFSET here.
1909
1910 When handling multiword bitfields, extract_bit_field may pass
1911 down a word_mode SUBREG of a larger REG for a bitfield that actually
1912 crosses a word boundary. Thus, for a SUBREG, we must find
1913 the current word starting from the base register. */
1914 if (GET_CODE (op0) == SUBREG)
1915 {
1916 int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset;
1917 word = operand_subword_force (SUBREG_REG (op0), word_offset,
1918 GET_MODE (SUBREG_REG (op0)));
1919 offset = 0;
1920 }
1921 else if (REG_P (op0))
1922 {
1923 word = operand_subword_force (op0, offset, GET_MODE (op0));
1924 offset = 0;
1925 }
1926 else
1927 word = op0;
1928
1929 /* Extract the parts in bit-counting order,
1930 whose meaning is determined by BYTES_PER_UNIT.
1931 OFFSET is in UNITs, and UNIT is in bits. */
1932 part = extract_fixed_bit_field (word_mode, word, thissize,
1933 offset * unit + thispos, 0, 1, false);
1934 bitsdone += thissize;
1935
1936 /* Shift this part into place for the result. */
1937 if (BYTES_BIG_ENDIAN)
1938 {
1939 if (bitsize != bitsdone)
1940 part = expand_shift (LSHIFT_EXPR, word_mode, part,
1941 bitsize - bitsdone, 0, 1);
1942 }
1943 else
1944 {
1945 if (bitsdone != thissize)
1946 part = expand_shift (LSHIFT_EXPR, word_mode, part,
1947 bitsdone - thissize, 0, 1);
1948 }
1949
1950 if (first)
1951 result = part;
1952 else
1953 /* Combine the parts with bitwise or. This works
1954 because we extracted each part as an unsigned bit field. */
1955 result = expand_binop (word_mode, ior_optab, part, result, NULL_RTX, 1,
1956 OPTAB_LIB_WIDEN);
1957
1958 first = 0;
1959 }
1960
1961 /* Unsigned bit field: we are done. */
1962 if (unsignedp)
1963 return result;
1964 /* Signed bit field: sign-extend with two arithmetic shifts. */
1965 result = expand_shift (LSHIFT_EXPR, word_mode, result,
1966 BITS_PER_WORD - bitsize, NULL_RTX, 0);
1967 return expand_shift (RSHIFT_EXPR, word_mode, result,
1968 BITS_PER_WORD - bitsize, NULL_RTX, 0);
1969 }
1970 \f
1971 /* Try to read the low bits of SRC as an rvalue of mode MODE, preserving
1972 the bit pattern. SRC_MODE is the mode of SRC; if this is smaller than
1973 MODE, fill the upper bits with zeros. Fail if the layout of either
1974 mode is unknown (as for CC modes) or if the extraction would involve
1975 unprofitable mode punning. Return the value on success, otherwise
1976 return null.
1977
1978 This is different from gen_lowpart* in these respects:
1979
1980 - the returned value must always be considered an rvalue
1981
1982 - when MODE is wider than SRC_MODE, the extraction involves
1983 a zero extension
1984
1985 - when MODE is smaller than SRC_MODE, the extraction involves
1986 a truncation (and is thus subject to TRULY_NOOP_TRUNCATION).
1987
1988 In other words, this routine performs a computation, whereas the
1989 gen_lowpart* routines are conceptually lvalue or rvalue subreg
1990 operations. */
1991
1992 rtx
1993 extract_low_bits (enum machine_mode mode, enum machine_mode src_mode, rtx src)
1994 {
1995 enum machine_mode int_mode, src_int_mode;
1996
1997 if (mode == src_mode)
1998 return src;
1999
2000 if (CONSTANT_P (src))
2001 {
2002 /* simplify_gen_subreg can't be used here, as if simplify_subreg
2003 fails, it will happily create (subreg (symbol_ref)) or similar
2004 invalid SUBREGs. */
2005 unsigned int byte = subreg_lowpart_offset (mode, src_mode);
2006 rtx ret = simplify_subreg (mode, src, src_mode, byte);
2007 if (ret)
2008 return ret;
2009
2010 if (GET_MODE (src) == VOIDmode
2011 || !validate_subreg (mode, src_mode, src, byte))
2012 return NULL_RTX;
2013
2014 src = force_reg (GET_MODE (src), src);
2015 return gen_rtx_SUBREG (mode, src, byte);
2016 }
2017
2018 if (GET_MODE_CLASS (mode) == MODE_CC || GET_MODE_CLASS (src_mode) == MODE_CC)
2019 return NULL_RTX;
2020
2021 if (GET_MODE_BITSIZE (mode) == GET_MODE_BITSIZE (src_mode)
2022 && MODES_TIEABLE_P (mode, src_mode))
2023 {
2024 rtx x = gen_lowpart_common (mode, src);
2025 if (x)
2026 return x;
2027 }
2028
2029 src_int_mode = int_mode_for_mode (src_mode);
2030 int_mode = int_mode_for_mode (mode);
2031 if (src_int_mode == BLKmode || int_mode == BLKmode)
2032 return NULL_RTX;
2033
2034 if (!MODES_TIEABLE_P (src_int_mode, src_mode))
2035 return NULL_RTX;
2036 if (!MODES_TIEABLE_P (int_mode, mode))
2037 return NULL_RTX;
2038
2039 src = gen_lowpart (src_int_mode, src);
2040 src = convert_modes (int_mode, src_int_mode, src, true);
2041 src = gen_lowpart (mode, src);
2042 return src;
2043 }
2044 \f
2045 /* Add INC into TARGET. */
2046
2047 void
2048 expand_inc (rtx target, rtx inc)
2049 {
2050 rtx value = expand_binop (GET_MODE (target), add_optab,
2051 target, inc,
2052 target, 0, OPTAB_LIB_WIDEN);
2053 if (value != target)
2054 emit_move_insn (target, value);
2055 }
2056
2057 /* Subtract DEC from TARGET. */
2058
2059 void
2060 expand_dec (rtx target, rtx dec)
2061 {
2062 rtx value = expand_binop (GET_MODE (target), sub_optab,
2063 target, dec,
2064 target, 0, OPTAB_LIB_WIDEN);
2065 if (value != target)
2066 emit_move_insn (target, value);
2067 }
2068 \f
2069 /* Output a shift instruction for expression code CODE,
2070 with SHIFTED being the rtx for the value to shift,
2071 and AMOUNT the rtx for the amount to shift by.
2072 Store the result in the rtx TARGET, if that is convenient.
2073 If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2074 Return the rtx for where the value is. */
2075
2076 static rtx
2077 expand_shift_1 (enum tree_code code, enum machine_mode mode, rtx shifted,
2078 rtx amount, rtx target, int unsignedp)
2079 {
2080 rtx op1, temp = 0;
2081 int left = (code == LSHIFT_EXPR || code == LROTATE_EXPR);
2082 int rotate = (code == LROTATE_EXPR || code == RROTATE_EXPR);
2083 optab lshift_optab = ashl_optab;
2084 optab rshift_arith_optab = ashr_optab;
2085 optab rshift_uns_optab = lshr_optab;
2086 optab lrotate_optab = rotl_optab;
2087 optab rrotate_optab = rotr_optab;
2088 enum machine_mode op1_mode;
2089 int attempt;
2090 bool speed = optimize_insn_for_speed_p ();
2091
2092 op1 = amount;
2093 op1_mode = GET_MODE (op1);
2094
2095 /* Determine whether the shift/rotate amount is a vector, or scalar. If the
2096 shift amount is a vector, use the vector/vector shift patterns. */
2097 if (VECTOR_MODE_P (mode) && VECTOR_MODE_P (op1_mode))
2098 {
2099 lshift_optab = vashl_optab;
2100 rshift_arith_optab = vashr_optab;
2101 rshift_uns_optab = vlshr_optab;
2102 lrotate_optab = vrotl_optab;
2103 rrotate_optab = vrotr_optab;
2104 }
2105
2106 /* Previously detected shift-counts computed by NEGATE_EXPR
2107 and shifted in the other direction; but that does not work
2108 on all machines. */
2109
2110 if (SHIFT_COUNT_TRUNCATED)
2111 {
2112 if (CONST_INT_P (op1)
2113 && ((unsigned HOST_WIDE_INT) INTVAL (op1) >=
2114 (unsigned HOST_WIDE_INT) GET_MODE_BITSIZE (mode)))
2115 op1 = GEN_INT ((unsigned HOST_WIDE_INT) INTVAL (op1)
2116 % GET_MODE_BITSIZE (mode));
2117 else if (GET_CODE (op1) == SUBREG
2118 && subreg_lowpart_p (op1)
2119 && INTEGRAL_MODE_P (GET_MODE (SUBREG_REG (op1)))
2120 && INTEGRAL_MODE_P (GET_MODE (op1)))
2121 op1 = SUBREG_REG (op1);
2122 }
2123
2124 if (op1 == const0_rtx)
2125 return shifted;
2126
2127 /* Check whether its cheaper to implement a left shift by a constant
2128 bit count by a sequence of additions. */
2129 if (code == LSHIFT_EXPR
2130 && CONST_INT_P (op1)
2131 && INTVAL (op1) > 0
2132 && INTVAL (op1) < GET_MODE_PRECISION (mode)
2133 && INTVAL (op1) < MAX_BITS_PER_WORD
2134 && (shift_cost (speed, mode, INTVAL (op1))
2135 > INTVAL (op1) * add_cost (speed, mode))
2136 && shift_cost (speed, mode, INTVAL (op1)) != MAX_COST)
2137 {
2138 int i;
2139 for (i = 0; i < INTVAL (op1); i++)
2140 {
2141 temp = force_reg (mode, shifted);
2142 shifted = expand_binop (mode, add_optab, temp, temp, NULL_RTX,
2143 unsignedp, OPTAB_LIB_WIDEN);
2144 }
2145 return shifted;
2146 }
2147
2148 for (attempt = 0; temp == 0 && attempt < 3; attempt++)
2149 {
2150 enum optab_methods methods;
2151
2152 if (attempt == 0)
2153 methods = OPTAB_DIRECT;
2154 else if (attempt == 1)
2155 methods = OPTAB_WIDEN;
2156 else
2157 methods = OPTAB_LIB_WIDEN;
2158
2159 if (rotate)
2160 {
2161 /* Widening does not work for rotation. */
2162 if (methods == OPTAB_WIDEN)
2163 continue;
2164 else if (methods == OPTAB_LIB_WIDEN)
2165 {
2166 /* If we have been unable to open-code this by a rotation,
2167 do it as the IOR of two shifts. I.e., to rotate A
2168 by N bits, compute (A << N) | ((unsigned) A >> (C - N))
2169 where C is the bitsize of A.
2170
2171 It is theoretically possible that the target machine might
2172 not be able to perform either shift and hence we would
2173 be making two libcalls rather than just the one for the
2174 shift (similarly if IOR could not be done). We will allow
2175 this extremely unlikely lossage to avoid complicating the
2176 code below. */
2177
2178 rtx subtarget = target == shifted ? 0 : target;
2179 rtx new_amount, other_amount;
2180 rtx temp1;
2181
2182 new_amount = op1;
2183 if (CONST_INT_P (op1))
2184 other_amount = GEN_INT (GET_MODE_BITSIZE (mode)
2185 - INTVAL (op1));
2186 else
2187 other_amount
2188 = simplify_gen_binary (MINUS, GET_MODE (op1),
2189 GEN_INT (GET_MODE_PRECISION (mode)),
2190 op1);
2191
2192 shifted = force_reg (mode, shifted);
2193
2194 temp = expand_shift_1 (left ? LSHIFT_EXPR : RSHIFT_EXPR,
2195 mode, shifted, new_amount, 0, 1);
2196 temp1 = expand_shift_1 (left ? RSHIFT_EXPR : LSHIFT_EXPR,
2197 mode, shifted, other_amount,
2198 subtarget, 1);
2199 return expand_binop (mode, ior_optab, temp, temp1, target,
2200 unsignedp, methods);
2201 }
2202
2203 temp = expand_binop (mode,
2204 left ? lrotate_optab : rrotate_optab,
2205 shifted, op1, target, unsignedp, methods);
2206 }
2207 else if (unsignedp)
2208 temp = expand_binop (mode,
2209 left ? lshift_optab : rshift_uns_optab,
2210 shifted, op1, target, unsignedp, methods);
2211
2212 /* Do arithmetic shifts.
2213 Also, if we are going to widen the operand, we can just as well
2214 use an arithmetic right-shift instead of a logical one. */
2215 if (temp == 0 && ! rotate
2216 && (! unsignedp || (! left && methods == OPTAB_WIDEN)))
2217 {
2218 enum optab_methods methods1 = methods;
2219
2220 /* If trying to widen a log shift to an arithmetic shift,
2221 don't accept an arithmetic shift of the same size. */
2222 if (unsignedp)
2223 methods1 = OPTAB_MUST_WIDEN;
2224
2225 /* Arithmetic shift */
2226
2227 temp = expand_binop (mode,
2228 left ? lshift_optab : rshift_arith_optab,
2229 shifted, op1, target, unsignedp, methods1);
2230 }
2231
2232 /* We used to try extzv here for logical right shifts, but that was
2233 only useful for one machine, the VAX, and caused poor code
2234 generation there for lshrdi3, so the code was deleted and a
2235 define_expand for lshrsi3 was added to vax.md. */
2236 }
2237
2238 gcc_assert (temp);
2239 return temp;
2240 }
2241
2242 /* Output a shift instruction for expression code CODE,
2243 with SHIFTED being the rtx for the value to shift,
2244 and AMOUNT the amount to shift by.
2245 Store the result in the rtx TARGET, if that is convenient.
2246 If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2247 Return the rtx for where the value is. */
2248
2249 rtx
2250 expand_shift (enum tree_code code, enum machine_mode mode, rtx shifted,
2251 int amount, rtx target, int unsignedp)
2252 {
2253 return expand_shift_1 (code, mode,
2254 shifted, GEN_INT (amount), target, unsignedp);
2255 }
2256
2257 /* Output a shift instruction for expression code CODE,
2258 with SHIFTED being the rtx for the value to shift,
2259 and AMOUNT the tree for the amount to shift by.
2260 Store the result in the rtx TARGET, if that is convenient.
2261 If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2262 Return the rtx for where the value is. */
2263
2264 rtx
2265 expand_variable_shift (enum tree_code code, enum machine_mode mode, rtx shifted,
2266 tree amount, rtx target, int unsignedp)
2267 {
2268 return expand_shift_1 (code, mode,
2269 shifted, expand_normal (amount), target, unsignedp);
2270 }
2271
2272 \f
2273 /* Indicates the type of fixup needed after a constant multiplication.
2274 BASIC_VARIANT means no fixup is needed, NEGATE_VARIANT means that
2275 the result should be negated, and ADD_VARIANT means that the
2276 multiplicand should be added to the result. */
2277 enum mult_variant {basic_variant, negate_variant, add_variant};
2278
2279 static void synth_mult (struct algorithm *, unsigned HOST_WIDE_INT,
2280 const struct mult_cost *, enum machine_mode mode);
2281 static bool choose_mult_variant (enum machine_mode, HOST_WIDE_INT,
2282 struct algorithm *, enum mult_variant *, int);
2283 static rtx expand_mult_const (enum machine_mode, rtx, HOST_WIDE_INT, rtx,
2284 const struct algorithm *, enum mult_variant);
2285 static unsigned HOST_WIDE_INT invert_mod2n (unsigned HOST_WIDE_INT, int);
2286 static rtx extract_high_half (enum machine_mode, rtx);
2287 static rtx expmed_mult_highpart (enum machine_mode, rtx, rtx, rtx, int, int);
2288 static rtx expmed_mult_highpart_optab (enum machine_mode, rtx, rtx, rtx,
2289 int, int);
2290 /* Compute and return the best algorithm for multiplying by T.
2291 The algorithm must cost less than cost_limit
2292 If retval.cost >= COST_LIMIT, no algorithm was found and all
2293 other field of the returned struct are undefined.
2294 MODE is the machine mode of the multiplication. */
2295
2296 static void
2297 synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
2298 const struct mult_cost *cost_limit, enum machine_mode mode)
2299 {
2300 int m;
2301 struct algorithm *alg_in, *best_alg;
2302 struct mult_cost best_cost;
2303 struct mult_cost new_limit;
2304 int op_cost, op_latency;
2305 unsigned HOST_WIDE_INT orig_t = t;
2306 unsigned HOST_WIDE_INT q;
2307 int maxm, hash_index;
2308 bool cache_hit = false;
2309 enum alg_code cache_alg = alg_zero;
2310 bool speed = optimize_insn_for_speed_p ();
2311 enum machine_mode imode;
2312 struct alg_hash_entry *entry_ptr;
2313
2314 /* Indicate that no algorithm is yet found. If no algorithm
2315 is found, this value will be returned and indicate failure. */
2316 alg_out->cost.cost = cost_limit->cost + 1;
2317 alg_out->cost.latency = cost_limit->latency + 1;
2318
2319 if (cost_limit->cost < 0
2320 || (cost_limit->cost == 0 && cost_limit->latency <= 0))
2321 return;
2322
2323 /* Be prepared for vector modes. */
2324 imode = GET_MODE_INNER (mode);
2325 if (imode == VOIDmode)
2326 imode = mode;
2327
2328 maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (imode));
2329
2330 /* Restrict the bits of "t" to the multiplication's mode. */
2331 t &= GET_MODE_MASK (imode);
2332
2333 /* t == 1 can be done in zero cost. */
2334 if (t == 1)
2335 {
2336 alg_out->ops = 1;
2337 alg_out->cost.cost = 0;
2338 alg_out->cost.latency = 0;
2339 alg_out->op[0] = alg_m;
2340 return;
2341 }
2342
2343 /* t == 0 sometimes has a cost. If it does and it exceeds our limit,
2344 fail now. */
2345 if (t == 0)
2346 {
2347 if (MULT_COST_LESS (cost_limit, zero_cost (speed)))
2348 return;
2349 else
2350 {
2351 alg_out->ops = 1;
2352 alg_out->cost.cost = zero_cost (speed);
2353 alg_out->cost.latency = zero_cost (speed);
2354 alg_out->op[0] = alg_zero;
2355 return;
2356 }
2357 }
2358
2359 /* We'll be needing a couple extra algorithm structures now. */
2360
2361 alg_in = XALLOCA (struct algorithm);
2362 best_alg = XALLOCA (struct algorithm);
2363 best_cost = *cost_limit;
2364
2365 /* Compute the hash index. */
2366 hash_index = (t ^ (unsigned int) mode ^ (speed * 256)) % NUM_ALG_HASH_ENTRIES;
2367
2368 /* See if we already know what to do for T. */
2369 entry_ptr = alg_hash_entry_ptr (hash_index);
2370 if (entry_ptr->t == t
2371 && entry_ptr->mode == mode
2372 && entry_ptr->mode == mode
2373 && entry_ptr->speed == speed
2374 && entry_ptr->alg != alg_unknown)
2375 {
2376 cache_alg = entry_ptr->alg;
2377
2378 if (cache_alg == alg_impossible)
2379 {
2380 /* The cache tells us that it's impossible to synthesize
2381 multiplication by T within entry_ptr->cost. */
2382 if (!CHEAPER_MULT_COST (&entry_ptr->cost, cost_limit))
2383 /* COST_LIMIT is at least as restrictive as the one
2384 recorded in the hash table, in which case we have no
2385 hope of synthesizing a multiplication. Just
2386 return. */
2387 return;
2388
2389 /* If we get here, COST_LIMIT is less restrictive than the
2390 one recorded in the hash table, so we may be able to
2391 synthesize a multiplication. Proceed as if we didn't
2392 have the cache entry. */
2393 }
2394 else
2395 {
2396 if (CHEAPER_MULT_COST (cost_limit, &entry_ptr->cost))
2397 /* The cached algorithm shows that this multiplication
2398 requires more cost than COST_LIMIT. Just return. This
2399 way, we don't clobber this cache entry with
2400 alg_impossible but retain useful information. */
2401 return;
2402
2403 cache_hit = true;
2404
2405 switch (cache_alg)
2406 {
2407 case alg_shift:
2408 goto do_alg_shift;
2409
2410 case alg_add_t_m2:
2411 case alg_sub_t_m2:
2412 goto do_alg_addsub_t_m2;
2413
2414 case alg_add_factor:
2415 case alg_sub_factor:
2416 goto do_alg_addsub_factor;
2417
2418 case alg_add_t2_m:
2419 goto do_alg_add_t2_m;
2420
2421 case alg_sub_t2_m:
2422 goto do_alg_sub_t2_m;
2423
2424 default:
2425 gcc_unreachable ();
2426 }
2427 }
2428 }
2429
2430 /* If we have a group of zero bits at the low-order part of T, try
2431 multiplying by the remaining bits and then doing a shift. */
2432
2433 if ((t & 1) == 0)
2434 {
2435 do_alg_shift:
2436 m = floor_log2 (t & -t); /* m = number of low zero bits */
2437 if (m < maxm)
2438 {
2439 q = t >> m;
2440 /* The function expand_shift will choose between a shift and
2441 a sequence of additions, so the observed cost is given as
2442 MIN (m * add_cost(speed, mode), shift_cost(speed, mode, m)). */
2443 op_cost = m * add_cost (speed, mode);
2444 if (shift_cost (speed, mode, m) < op_cost)
2445 op_cost = shift_cost (speed, mode, m);
2446 new_limit.cost = best_cost.cost - op_cost;
2447 new_limit.latency = best_cost.latency - op_cost;
2448 synth_mult (alg_in, q, &new_limit, mode);
2449
2450 alg_in->cost.cost += op_cost;
2451 alg_in->cost.latency += op_cost;
2452 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2453 {
2454 struct algorithm *x;
2455 best_cost = alg_in->cost;
2456 x = alg_in, alg_in = best_alg, best_alg = x;
2457 best_alg->log[best_alg->ops] = m;
2458 best_alg->op[best_alg->ops] = alg_shift;
2459 }
2460
2461 /* See if treating ORIG_T as a signed number yields a better
2462 sequence. Try this sequence only for a negative ORIG_T
2463 as it would be useless for a non-negative ORIG_T. */
2464 if ((HOST_WIDE_INT) orig_t < 0)
2465 {
2466 /* Shift ORIG_T as follows because a right shift of a
2467 negative-valued signed type is implementation
2468 defined. */
2469 q = ~(~orig_t >> m);
2470 /* The function expand_shift will choose between a shift
2471 and a sequence of additions, so the observed cost is
2472 given as MIN (m * add_cost(speed, mode),
2473 shift_cost(speed, mode, m)). */
2474 op_cost = m * add_cost (speed, mode);
2475 if (shift_cost (speed, mode, m) < op_cost)
2476 op_cost = shift_cost (speed, mode, m);
2477 new_limit.cost = best_cost.cost - op_cost;
2478 new_limit.latency = best_cost.latency - op_cost;
2479 synth_mult (alg_in, q, &new_limit, mode);
2480
2481 alg_in->cost.cost += op_cost;
2482 alg_in->cost.latency += op_cost;
2483 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2484 {
2485 struct algorithm *x;
2486 best_cost = alg_in->cost;
2487 x = alg_in, alg_in = best_alg, best_alg = x;
2488 best_alg->log[best_alg->ops] = m;
2489 best_alg->op[best_alg->ops] = alg_shift;
2490 }
2491 }
2492 }
2493 if (cache_hit)
2494 goto done;
2495 }
2496
2497 /* If we have an odd number, add or subtract one. */
2498 if ((t & 1) != 0)
2499 {
2500 unsigned HOST_WIDE_INT w;
2501
2502 do_alg_addsub_t_m2:
2503 for (w = 1; (w & t) != 0; w <<= 1)
2504 ;
2505 /* If T was -1, then W will be zero after the loop. This is another
2506 case where T ends with ...111. Handling this with (T + 1) and
2507 subtract 1 produces slightly better code and results in algorithm
2508 selection much faster than treating it like the ...0111 case
2509 below. */
2510 if (w == 0
2511 || (w > 2
2512 /* Reject the case where t is 3.
2513 Thus we prefer addition in that case. */
2514 && t != 3))
2515 {
2516 /* T ends with ...111. Multiply by (T + 1) and subtract 1. */
2517
2518 op_cost = add_cost (speed, mode);
2519 new_limit.cost = best_cost.cost - op_cost;
2520 new_limit.latency = best_cost.latency - op_cost;
2521 synth_mult (alg_in, t + 1, &new_limit, mode);
2522
2523 alg_in->cost.cost += op_cost;
2524 alg_in->cost.latency += op_cost;
2525 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2526 {
2527 struct algorithm *x;
2528 best_cost = alg_in->cost;
2529 x = alg_in, alg_in = best_alg, best_alg = x;
2530 best_alg->log[best_alg->ops] = 0;
2531 best_alg->op[best_alg->ops] = alg_sub_t_m2;
2532 }
2533 }
2534 else
2535 {
2536 /* T ends with ...01 or ...011. Multiply by (T - 1) and add 1. */
2537
2538 op_cost = add_cost (speed, mode);
2539 new_limit.cost = best_cost.cost - op_cost;
2540 new_limit.latency = best_cost.latency - op_cost;
2541 synth_mult (alg_in, t - 1, &new_limit, mode);
2542
2543 alg_in->cost.cost += op_cost;
2544 alg_in->cost.latency += op_cost;
2545 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2546 {
2547 struct algorithm *x;
2548 best_cost = alg_in->cost;
2549 x = alg_in, alg_in = best_alg, best_alg = x;
2550 best_alg->log[best_alg->ops] = 0;
2551 best_alg->op[best_alg->ops] = alg_add_t_m2;
2552 }
2553 }
2554
2555 /* We may be able to calculate a * -7, a * -15, a * -31, etc
2556 quickly with a - a * n for some appropriate constant n. */
2557 m = exact_log2 (-orig_t + 1);
2558 if (m >= 0 && m < maxm)
2559 {
2560 op_cost = shiftsub1_cost (speed, mode, m);
2561 new_limit.cost = best_cost.cost - op_cost;
2562 new_limit.latency = best_cost.latency - op_cost;
2563 synth_mult (alg_in, (unsigned HOST_WIDE_INT) (-orig_t + 1) >> m,
2564 &new_limit, mode);
2565
2566 alg_in->cost.cost += op_cost;
2567 alg_in->cost.latency += op_cost;
2568 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2569 {
2570 struct algorithm *x;
2571 best_cost = alg_in->cost;
2572 x = alg_in, alg_in = best_alg, best_alg = x;
2573 best_alg->log[best_alg->ops] = m;
2574 best_alg->op[best_alg->ops] = alg_sub_t_m2;
2575 }
2576 }
2577
2578 if (cache_hit)
2579 goto done;
2580 }
2581
2582 /* Look for factors of t of the form
2583 t = q(2**m +- 1), 2 <= m <= floor(log2(t - 1)).
2584 If we find such a factor, we can multiply by t using an algorithm that
2585 multiplies by q, shift the result by m and add/subtract it to itself.
2586
2587 We search for large factors first and loop down, even if large factors
2588 are less probable than small; if we find a large factor we will find a
2589 good sequence quickly, and therefore be able to prune (by decreasing
2590 COST_LIMIT) the search. */
2591
2592 do_alg_addsub_factor:
2593 for (m = floor_log2 (t - 1); m >= 2; m--)
2594 {
2595 unsigned HOST_WIDE_INT d;
2596
2597 d = ((unsigned HOST_WIDE_INT) 1 << m) + 1;
2598 if (t % d == 0 && t > d && m < maxm
2599 && (!cache_hit || cache_alg == alg_add_factor))
2600 {
2601 /* If the target has a cheap shift-and-add instruction use
2602 that in preference to a shift insn followed by an add insn.
2603 Assume that the shift-and-add is "atomic" with a latency
2604 equal to its cost, otherwise assume that on superscalar
2605 hardware the shift may be executed concurrently with the
2606 earlier steps in the algorithm. */
2607 op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
2608 if (shiftadd_cost (speed, mode, m) < op_cost)
2609 {
2610 op_cost = shiftadd_cost (speed, mode, m);
2611 op_latency = op_cost;
2612 }
2613 else
2614 op_latency = add_cost (speed, mode);
2615
2616 new_limit.cost = best_cost.cost - op_cost;
2617 new_limit.latency = best_cost.latency - op_latency;
2618 synth_mult (alg_in, t / d, &new_limit, mode);
2619
2620 alg_in->cost.cost += op_cost;
2621 alg_in->cost.latency += op_latency;
2622 if (alg_in->cost.latency < op_cost)
2623 alg_in->cost.latency = op_cost;
2624 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2625 {
2626 struct algorithm *x;
2627 best_cost = alg_in->cost;
2628 x = alg_in, alg_in = best_alg, best_alg = x;
2629 best_alg->log[best_alg->ops] = m;
2630 best_alg->op[best_alg->ops] = alg_add_factor;
2631 }
2632 /* Other factors will have been taken care of in the recursion. */
2633 break;
2634 }
2635
2636 d = ((unsigned HOST_WIDE_INT) 1 << m) - 1;
2637 if (t % d == 0 && t > d && m < maxm
2638 && (!cache_hit || cache_alg == alg_sub_factor))
2639 {
2640 /* If the target has a cheap shift-and-subtract insn use
2641 that in preference to a shift insn followed by a sub insn.
2642 Assume that the shift-and-sub is "atomic" with a latency
2643 equal to it's cost, otherwise assume that on superscalar
2644 hardware the shift may be executed concurrently with the
2645 earlier steps in the algorithm. */
2646 op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
2647 if (shiftsub0_cost (speed, mode, m) < op_cost)
2648 {
2649 op_cost = shiftsub0_cost (speed, mode, m);
2650 op_latency = op_cost;
2651 }
2652 else
2653 op_latency = add_cost (speed, mode);
2654
2655 new_limit.cost = best_cost.cost - op_cost;
2656 new_limit.latency = best_cost.latency - op_latency;
2657 synth_mult (alg_in, t / d, &new_limit, mode);
2658
2659 alg_in->cost.cost += op_cost;
2660 alg_in->cost.latency += op_latency;
2661 if (alg_in->cost.latency < op_cost)
2662 alg_in->cost.latency = op_cost;
2663 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2664 {
2665 struct algorithm *x;
2666 best_cost = alg_in->cost;
2667 x = alg_in, alg_in = best_alg, best_alg = x;
2668 best_alg->log[best_alg->ops] = m;
2669 best_alg->op[best_alg->ops] = alg_sub_factor;
2670 }
2671 break;
2672 }
2673 }
2674 if (cache_hit)
2675 goto done;
2676
2677 /* Try shift-and-add (load effective address) instructions,
2678 i.e. do a*3, a*5, a*9. */
2679 if ((t & 1) != 0)
2680 {
2681 do_alg_add_t2_m:
2682 q = t - 1;
2683 q = q & -q;
2684 m = exact_log2 (q);
2685 if (m >= 0 && m < maxm)
2686 {
2687 op_cost = shiftadd_cost (speed, mode, m);
2688 new_limit.cost = best_cost.cost - op_cost;
2689 new_limit.latency = best_cost.latency - op_cost;
2690 synth_mult (alg_in, (t - 1) >> m, &new_limit, mode);
2691
2692 alg_in->cost.cost += op_cost;
2693 alg_in->cost.latency += op_cost;
2694 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2695 {
2696 struct algorithm *x;
2697 best_cost = alg_in->cost;
2698 x = alg_in, alg_in = best_alg, best_alg = x;
2699 best_alg->log[best_alg->ops] = m;
2700 best_alg->op[best_alg->ops] = alg_add_t2_m;
2701 }
2702 }
2703 if (cache_hit)
2704 goto done;
2705
2706 do_alg_sub_t2_m:
2707 q = t + 1;
2708 q = q & -q;
2709 m = exact_log2 (q);
2710 if (m >= 0 && m < maxm)
2711 {
2712 op_cost = shiftsub0_cost (speed, mode, m);
2713 new_limit.cost = best_cost.cost - op_cost;
2714 new_limit.latency = best_cost.latency - op_cost;
2715 synth_mult (alg_in, (t + 1) >> m, &new_limit, mode);
2716
2717 alg_in->cost.cost += op_cost;
2718 alg_in->cost.latency += op_cost;
2719 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2720 {
2721 struct algorithm *x;
2722 best_cost = alg_in->cost;
2723 x = alg_in, alg_in = best_alg, best_alg = x;
2724 best_alg->log[best_alg->ops] = m;
2725 best_alg->op[best_alg->ops] = alg_sub_t2_m;
2726 }
2727 }
2728 if (cache_hit)
2729 goto done;
2730 }
2731
2732 done:
2733 /* If best_cost has not decreased, we have not found any algorithm. */
2734 if (!CHEAPER_MULT_COST (&best_cost, cost_limit))
2735 {
2736 /* We failed to find an algorithm. Record alg_impossible for
2737 this case (that is, <T, MODE, COST_LIMIT>) so that next time
2738 we are asked to find an algorithm for T within the same or
2739 lower COST_LIMIT, we can immediately return to the
2740 caller. */
2741 entry_ptr->t = t;
2742 entry_ptr->mode = mode;
2743 entry_ptr->speed = speed;
2744 entry_ptr->alg = alg_impossible;
2745 entry_ptr->cost = *cost_limit;
2746 return;
2747 }
2748
2749 /* Cache the result. */
2750 if (!cache_hit)
2751 {
2752 entry_ptr->t = t;
2753 entry_ptr->mode = mode;
2754 entry_ptr->speed = speed;
2755 entry_ptr->alg = best_alg->op[best_alg->ops];
2756 entry_ptr->cost.cost = best_cost.cost;
2757 entry_ptr->cost.latency = best_cost.latency;
2758 }
2759
2760 /* If we are getting a too long sequence for `struct algorithm'
2761 to record, make this search fail. */
2762 if (best_alg->ops == MAX_BITS_PER_WORD)
2763 return;
2764
2765 /* Copy the algorithm from temporary space to the space at alg_out.
2766 We avoid using structure assignment because the majority of
2767 best_alg is normally undefined, and this is a critical function. */
2768 alg_out->ops = best_alg->ops + 1;
2769 alg_out->cost = best_cost;
2770 memcpy (alg_out->op, best_alg->op,
2771 alg_out->ops * sizeof *alg_out->op);
2772 memcpy (alg_out->log, best_alg->log,
2773 alg_out->ops * sizeof *alg_out->log);
2774 }
2775 \f
2776 /* Find the cheapest way of multiplying a value of mode MODE by VAL.
2777 Try three variations:
2778
2779 - a shift/add sequence based on VAL itself
2780 - a shift/add sequence based on -VAL, followed by a negation
2781 - a shift/add sequence based on VAL - 1, followed by an addition.
2782
2783 Return true if the cheapest of these cost less than MULT_COST,
2784 describing the algorithm in *ALG and final fixup in *VARIANT. */
2785
2786 static bool
2787 choose_mult_variant (enum machine_mode mode, HOST_WIDE_INT val,
2788 struct algorithm *alg, enum mult_variant *variant,
2789 int mult_cost)
2790 {
2791 struct algorithm alg2;
2792 struct mult_cost limit;
2793 int op_cost;
2794 bool speed = optimize_insn_for_speed_p ();
2795
2796 /* Fail quickly for impossible bounds. */
2797 if (mult_cost < 0)
2798 return false;
2799
2800 /* Ensure that mult_cost provides a reasonable upper bound.
2801 Any constant multiplication can be performed with less
2802 than 2 * bits additions. */
2803 op_cost = 2 * GET_MODE_UNIT_BITSIZE (mode) * add_cost (speed, mode);
2804 if (mult_cost > op_cost)
2805 mult_cost = op_cost;
2806
2807 *variant = basic_variant;
2808 limit.cost = mult_cost;
2809 limit.latency = mult_cost;
2810 synth_mult (alg, val, &limit, mode);
2811
2812 /* This works only if the inverted value actually fits in an
2813 `unsigned int' */
2814 if (HOST_BITS_PER_INT >= GET_MODE_UNIT_BITSIZE (mode))
2815 {
2816 op_cost = neg_cost(speed, mode);
2817 if (MULT_COST_LESS (&alg->cost, mult_cost))
2818 {
2819 limit.cost = alg->cost.cost - op_cost;
2820 limit.latency = alg->cost.latency - op_cost;
2821 }
2822 else
2823 {
2824 limit.cost = mult_cost - op_cost;
2825 limit.latency = mult_cost - op_cost;
2826 }
2827
2828 synth_mult (&alg2, -val, &limit, mode);
2829 alg2.cost.cost += op_cost;
2830 alg2.cost.latency += op_cost;
2831 if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
2832 *alg = alg2, *variant = negate_variant;
2833 }
2834
2835 /* This proves very useful for division-by-constant. */
2836 op_cost = add_cost (speed, mode);
2837 if (MULT_COST_LESS (&alg->cost, mult_cost))
2838 {
2839 limit.cost = alg->cost.cost - op_cost;
2840 limit.latency = alg->cost.latency - op_cost;
2841 }
2842 else
2843 {
2844 limit.cost = mult_cost - op_cost;
2845 limit.latency = mult_cost - op_cost;
2846 }
2847
2848 synth_mult (&alg2, val - 1, &limit, mode);
2849 alg2.cost.cost += op_cost;
2850 alg2.cost.latency += op_cost;
2851 if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
2852 *alg = alg2, *variant = add_variant;
2853
2854 return MULT_COST_LESS (&alg->cost, mult_cost);
2855 }
2856
2857 /* A subroutine of expand_mult, used for constant multiplications.
2858 Multiply OP0 by VAL in mode MODE, storing the result in TARGET if
2859 convenient. Use the shift/add sequence described by ALG and apply
2860 the final fixup specified by VARIANT. */
2861
2862 static rtx
2863 expand_mult_const (enum machine_mode mode, rtx op0, HOST_WIDE_INT val,
2864 rtx target, const struct algorithm *alg,
2865 enum mult_variant variant)
2866 {
2867 HOST_WIDE_INT val_so_far;
2868 rtx insn, accum, tem;
2869 int opno;
2870 enum machine_mode nmode;
2871
2872 /* Avoid referencing memory over and over and invalid sharing
2873 on SUBREGs. */
2874 op0 = force_reg (mode, op0);
2875
2876 /* ACCUM starts out either as OP0 or as a zero, depending on
2877 the first operation. */
2878
2879 if (alg->op[0] == alg_zero)
2880 {
2881 accum = copy_to_mode_reg (mode, CONST0_RTX (mode));
2882 val_so_far = 0;
2883 }
2884 else if (alg->op[0] == alg_m)
2885 {
2886 accum = copy_to_mode_reg (mode, op0);
2887 val_so_far = 1;
2888 }
2889 else
2890 gcc_unreachable ();
2891
2892 for (opno = 1; opno < alg->ops; opno++)
2893 {
2894 int log = alg->log[opno];
2895 rtx shift_subtarget = optimize ? 0 : accum;
2896 rtx add_target
2897 = (opno == alg->ops - 1 && target != 0 && variant != add_variant
2898 && !optimize)
2899 ? target : 0;
2900 rtx accum_target = optimize ? 0 : accum;
2901 rtx accum_inner;
2902
2903 switch (alg->op[opno])
2904 {
2905 case alg_shift:
2906 tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
2907 /* REG_EQUAL note will be attached to the following insn. */
2908 emit_move_insn (accum, tem);
2909 val_so_far <<= log;
2910 break;
2911
2912 case alg_add_t_m2:
2913 tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
2914 accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
2915 add_target ? add_target : accum_target);
2916 val_so_far += (HOST_WIDE_INT) 1 << log;
2917 break;
2918
2919 case alg_sub_t_m2:
2920 tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
2921 accum = force_operand (gen_rtx_MINUS (mode, accum, tem),
2922 add_target ? add_target : accum_target);
2923 val_so_far -= (HOST_WIDE_INT) 1 << log;
2924 break;
2925
2926 case alg_add_t2_m:
2927 accum = expand_shift (LSHIFT_EXPR, mode, accum,
2928 log, shift_subtarget, 0);
2929 accum = force_operand (gen_rtx_PLUS (mode, accum, op0),
2930 add_target ? add_target : accum_target);
2931 val_so_far = (val_so_far << log) + 1;
2932 break;
2933
2934 case alg_sub_t2_m:
2935 accum = expand_shift (LSHIFT_EXPR, mode, accum,
2936 log, shift_subtarget, 0);
2937 accum = force_operand (gen_rtx_MINUS (mode, accum, op0),
2938 add_target ? add_target : accum_target);
2939 val_so_far = (val_so_far << log) - 1;
2940 break;
2941
2942 case alg_add_factor:
2943 tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
2944 accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
2945 add_target ? add_target : accum_target);
2946 val_so_far += val_so_far << log;
2947 break;
2948
2949 case alg_sub_factor:
2950 tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
2951 accum = force_operand (gen_rtx_MINUS (mode, tem, accum),
2952 (add_target
2953 ? add_target : (optimize ? 0 : tem)));
2954 val_so_far = (val_so_far << log) - val_so_far;
2955 break;
2956
2957 default:
2958 gcc_unreachable ();
2959 }
2960
2961 if (SCALAR_INT_MODE_P (mode))
2962 {
2963 /* Write a REG_EQUAL note on the last insn so that we can cse
2964 multiplication sequences. Note that if ACCUM is a SUBREG,
2965 we've set the inner register and must properly indicate that. */
2966 tem = op0, nmode = mode;
2967 accum_inner = accum;
2968 if (GET_CODE (accum) == SUBREG)
2969 {
2970 accum_inner = SUBREG_REG (accum);
2971 nmode = GET_MODE (accum_inner);
2972 tem = gen_lowpart (nmode, op0);
2973 }
2974
2975 insn = get_last_insn ();
2976 set_dst_reg_note (insn, REG_EQUAL,
2977 gen_rtx_MULT (nmode, tem, GEN_INT (val_so_far)),
2978 accum_inner);
2979 }
2980 }
2981
2982 if (variant == negate_variant)
2983 {
2984 val_so_far = -val_so_far;
2985 accum = expand_unop (mode, neg_optab, accum, target, 0);
2986 }
2987 else if (variant == add_variant)
2988 {
2989 val_so_far = val_so_far + 1;
2990 accum = force_operand (gen_rtx_PLUS (mode, accum, op0), target);
2991 }
2992
2993 /* Compare only the bits of val and val_so_far that are significant
2994 in the result mode, to avoid sign-/zero-extension confusion. */
2995 nmode = GET_MODE_INNER (mode);
2996 if (nmode == VOIDmode)
2997 nmode = mode;
2998 val &= GET_MODE_MASK (nmode);
2999 val_so_far &= GET_MODE_MASK (nmode);
3000 gcc_assert (val == val_so_far);
3001
3002 return accum;
3003 }
3004
3005 /* Perform a multiplication and return an rtx for the result.
3006 MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3007 TARGET is a suggestion for where to store the result (an rtx).
3008
3009 We check specially for a constant integer as OP1.
3010 If you want this check for OP0 as well, then before calling
3011 you should swap the two operands if OP0 would be constant. */
3012
3013 rtx
3014 expand_mult (enum machine_mode mode, rtx op0, rtx op1, rtx target,
3015 int unsignedp)
3016 {
3017 enum mult_variant variant;
3018 struct algorithm algorithm;
3019 rtx scalar_op1;
3020 int max_cost;
3021 bool speed = optimize_insn_for_speed_p ();
3022 bool do_trapv = flag_trapv && SCALAR_INT_MODE_P (mode) && !unsignedp;
3023
3024 if (CONSTANT_P (op0))
3025 {
3026 rtx temp = op0;
3027 op0 = op1;
3028 op1 = temp;
3029 }
3030
3031 /* For vectors, there are several simplifications that can be made if
3032 all elements of the vector constant are identical. */
3033 scalar_op1 = op1;
3034 if (GET_CODE (op1) == CONST_VECTOR)
3035 {
3036 int i, n = CONST_VECTOR_NUNITS (op1);
3037 scalar_op1 = CONST_VECTOR_ELT (op1, 0);
3038 for (i = 1; i < n; ++i)
3039 if (!rtx_equal_p (scalar_op1, CONST_VECTOR_ELT (op1, i)))
3040 goto skip_scalar;
3041 }
3042
3043 if (INTEGRAL_MODE_P (mode))
3044 {
3045 rtx fake_reg;
3046 HOST_WIDE_INT coeff;
3047 bool is_neg;
3048 int mode_bitsize;
3049
3050 if (op1 == CONST0_RTX (mode))
3051 return op1;
3052 if (op1 == CONST1_RTX (mode))
3053 return op0;
3054 if (op1 == CONSTM1_RTX (mode))
3055 return expand_unop (mode, do_trapv ? negv_optab : neg_optab,
3056 op0, target, 0);
3057
3058 if (do_trapv)
3059 goto skip_synth;
3060
3061 /* These are the operations that are potentially turned into
3062 a sequence of shifts and additions. */
3063 mode_bitsize = GET_MODE_UNIT_BITSIZE (mode);
3064
3065 /* synth_mult does an `unsigned int' multiply. As long as the mode is
3066 less than or equal in size to `unsigned int' this doesn't matter.
3067 If the mode is larger than `unsigned int', then synth_mult works
3068 only if the constant value exactly fits in an `unsigned int' without
3069 any truncation. This means that multiplying by negative values does
3070 not work; results are off by 2^32 on a 32 bit machine. */
3071
3072 if (CONST_INT_P (scalar_op1))
3073 {
3074 coeff = INTVAL (scalar_op1);
3075 is_neg = coeff < 0;
3076 }
3077 else if (CONST_DOUBLE_AS_INT_P (scalar_op1))
3078 {
3079 /* If we are multiplying in DImode, it may still be a win
3080 to try to work with shifts and adds. */
3081 if (CONST_DOUBLE_HIGH (scalar_op1) == 0
3082 && CONST_DOUBLE_LOW (scalar_op1) > 0)
3083 {
3084 coeff = CONST_DOUBLE_LOW (scalar_op1);
3085 is_neg = false;
3086 }
3087 else if (CONST_DOUBLE_LOW (scalar_op1) == 0)
3088 {
3089 coeff = CONST_DOUBLE_HIGH (scalar_op1);
3090 if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3091 {
3092 int shift = floor_log2 (coeff) + HOST_BITS_PER_WIDE_INT;
3093 if (shift < HOST_BITS_PER_DOUBLE_INT - 1
3094 || mode_bitsize <= HOST_BITS_PER_DOUBLE_INT)
3095 return expand_shift (LSHIFT_EXPR, mode, op0,
3096 shift, target, unsignedp);
3097 }
3098 goto skip_synth;
3099 }
3100 else
3101 goto skip_synth;
3102 }
3103 else
3104 goto skip_synth;
3105
3106 /* We used to test optimize here, on the grounds that it's better to
3107 produce a smaller program when -O is not used. But this causes
3108 such a terrible slowdown sometimes that it seems better to always
3109 use synth_mult. */
3110
3111 /* Special case powers of two. */
3112 if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3113 return expand_shift (LSHIFT_EXPR, mode, op0,
3114 floor_log2 (coeff), target, unsignedp);
3115
3116 fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3117
3118 /* Attempt to handle multiplication of DImode values by negative
3119 coefficients, by performing the multiplication by a positive
3120 multiplier and then inverting the result. */
3121 if (is_neg && mode_bitsize > HOST_BITS_PER_WIDE_INT)
3122 {
3123 /* Its safe to use -coeff even for INT_MIN, as the
3124 result is interpreted as an unsigned coefficient.
3125 Exclude cost of op0 from max_cost to match the cost
3126 calculation of the synth_mult. */
3127 max_cost = (set_src_cost (gen_rtx_MULT (mode, fake_reg, op1), speed)
3128 - neg_cost(speed, mode));
3129 if (max_cost > 0
3130 && choose_mult_variant (mode, -coeff, &algorithm,
3131 &variant, max_cost))
3132 {
3133 rtx temp = expand_mult_const (mode, op0, -coeff, NULL_RTX,
3134 &algorithm, variant);
3135 return expand_unop (mode, neg_optab, temp, target, 0);
3136 }
3137 goto skip_synth;
3138 }
3139
3140 /* Exclude cost of op0 from max_cost to match the cost
3141 calculation of the synth_mult. */
3142 max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, op1), speed);
3143 if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost))
3144 return expand_mult_const (mode, op0, coeff, target,
3145 &algorithm, variant);
3146 }
3147 skip_synth:
3148
3149 /* Expand x*2.0 as x+x. */
3150 if (CONST_DOUBLE_AS_FLOAT_P (scalar_op1))
3151 {
3152 REAL_VALUE_TYPE d;
3153 REAL_VALUE_FROM_CONST_DOUBLE (d, scalar_op1);
3154
3155 if (REAL_VALUES_EQUAL (d, dconst2))
3156 {
3157 op0 = force_reg (GET_MODE (op0), op0);
3158 return expand_binop (mode, add_optab, op0, op0,
3159 target, unsignedp, OPTAB_LIB_WIDEN);
3160 }
3161 }
3162 skip_scalar:
3163
3164 /* This used to use umul_optab if unsigned, but for non-widening multiply
3165 there is no difference between signed and unsigned. */
3166 op0 = expand_binop (mode, do_trapv ? smulv_optab : smul_optab,
3167 op0, op1, target, unsignedp, OPTAB_LIB_WIDEN);
3168 gcc_assert (op0);
3169 return op0;
3170 }
3171
3172 /* Return a cost estimate for multiplying a register by the given
3173 COEFFicient in the given MODE and SPEED. */
3174
3175 int
3176 mult_by_coeff_cost (HOST_WIDE_INT coeff, enum machine_mode mode, bool speed)
3177 {
3178 int max_cost;
3179 struct algorithm algorithm;
3180 enum mult_variant variant;
3181
3182 rtx fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3183 max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, fake_reg), speed);
3184 if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost))
3185 return algorithm.cost.cost;
3186 else
3187 return max_cost;
3188 }
3189
3190 /* Perform a widening multiplication and return an rtx for the result.
3191 MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3192 TARGET is a suggestion for where to store the result (an rtx).
3193 THIS_OPTAB is the optab we should use, it must be either umul_widen_optab
3194 or smul_widen_optab.
3195
3196 We check specially for a constant integer as OP1, comparing the
3197 cost of a widening multiply against the cost of a sequence of shifts
3198 and adds. */
3199
3200 rtx
3201 expand_widening_mult (enum machine_mode mode, rtx op0, rtx op1, rtx target,
3202 int unsignedp, optab this_optab)
3203 {
3204 bool speed = optimize_insn_for_speed_p ();
3205 rtx cop1;
3206
3207 if (CONST_INT_P (op1)
3208 && GET_MODE (op0) != VOIDmode
3209 && (cop1 = convert_modes (mode, GET_MODE (op0), op1,
3210 this_optab == umul_widen_optab))
3211 && CONST_INT_P (cop1)
3212 && (INTVAL (cop1) >= 0
3213 || HWI_COMPUTABLE_MODE_P (mode)))
3214 {
3215 HOST_WIDE_INT coeff = INTVAL (cop1);
3216 int max_cost;
3217 enum mult_variant variant;
3218 struct algorithm algorithm;
3219
3220 /* Special case powers of two. */
3221 if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3222 {
3223 op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3224 return expand_shift (LSHIFT_EXPR, mode, op0,
3225 floor_log2 (coeff), target, unsignedp);
3226 }
3227
3228 /* Exclude cost of op0 from max_cost to match the cost
3229 calculation of the synth_mult. */
3230 max_cost = mul_widen_cost (speed, mode);
3231 if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3232 max_cost))
3233 {
3234 op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3235 return expand_mult_const (mode, op0, coeff, target,
3236 &algorithm, variant);
3237 }
3238 }
3239 return expand_binop (mode, this_optab, op0, op1, target,
3240 unsignedp, OPTAB_LIB_WIDEN);
3241 }
3242 \f
3243 /* Choose a minimal N + 1 bit approximation to 1/D that can be used to
3244 replace division by D, and put the least significant N bits of the result
3245 in *MULTIPLIER_PTR and return the most significant bit.
3246
3247 The width of operations is N (should be <= HOST_BITS_PER_WIDE_INT), the
3248 needed precision is in PRECISION (should be <= N).
3249
3250 PRECISION should be as small as possible so this function can choose
3251 multiplier more freely.
3252
3253 The rounded-up logarithm of D is placed in *lgup_ptr. A shift count that
3254 is to be used for a final right shift is placed in *POST_SHIFT_PTR.
3255
3256 Using this function, x/D will be equal to (x * m) >> (*POST_SHIFT_PTR),
3257 where m is the full HOST_BITS_PER_WIDE_INT + 1 bit multiplier. */
3258
3259 unsigned HOST_WIDE_INT
3260 choose_multiplier (unsigned HOST_WIDE_INT d, int n, int precision,
3261 unsigned HOST_WIDE_INT *multiplier_ptr,
3262 int *post_shift_ptr, int *lgup_ptr)
3263 {
3264 double_int mhigh, mlow;
3265 int lgup, post_shift;
3266 int pow, pow2;
3267
3268 /* lgup = ceil(log2(divisor)); */
3269 lgup = ceil_log2 (d);
3270
3271 gcc_assert (lgup <= n);
3272
3273 pow = n + lgup;
3274 pow2 = n + lgup - precision;
3275
3276 /* We could handle this with some effort, but this case is much
3277 better handled directly with a scc insn, so rely on caller using
3278 that. */
3279 gcc_assert (pow != HOST_BITS_PER_DOUBLE_INT);
3280
3281 /* mlow = 2^(N + lgup)/d */
3282 double_int val = double_int_zero.set_bit (pow);
3283 mlow = val.div (double_int::from_uhwi (d), true, TRUNC_DIV_EXPR);
3284
3285 /* mhigh = (2^(N + lgup) + 2^(N + lgup - precision))/d */
3286 val |= double_int_zero.set_bit (pow2);
3287 mhigh = val.div (double_int::from_uhwi (d), true, TRUNC_DIV_EXPR);
3288
3289 gcc_assert (!mhigh.high || val.high - d < d);
3290 gcc_assert (mhigh.high <= 1 && mlow.high <= 1);
3291 /* Assert that mlow < mhigh. */
3292 gcc_assert (mlow.ult (mhigh));
3293
3294 /* If precision == N, then mlow, mhigh exceed 2^N
3295 (but they do not exceed 2^(N+1)). */
3296
3297 /* Reduce to lowest terms. */
3298 for (post_shift = lgup; post_shift > 0; post_shift--)
3299 {
3300 int shft = HOST_BITS_PER_WIDE_INT - 1;
3301 unsigned HOST_WIDE_INT ml_lo = (mlow.high << shft) | (mlow.low >> 1);
3302 unsigned HOST_WIDE_INT mh_lo = (mhigh.high << shft) | (mhigh.low >> 1);
3303 if (ml_lo >= mh_lo)
3304 break;
3305
3306 mlow = double_int::from_uhwi (ml_lo);
3307 mhigh = double_int::from_uhwi (mh_lo);
3308 }
3309
3310 *post_shift_ptr = post_shift;
3311 *lgup_ptr = lgup;
3312 if (n < HOST_BITS_PER_WIDE_INT)
3313 {
3314 unsigned HOST_WIDE_INT mask = ((unsigned HOST_WIDE_INT) 1 << n) - 1;
3315 *multiplier_ptr = mhigh.low & mask;
3316 return mhigh.low >= mask;
3317 }
3318 else
3319 {
3320 *multiplier_ptr = mhigh.low;
3321 return mhigh.high;
3322 }
3323 }
3324
3325 /* Compute the inverse of X mod 2**n, i.e., find Y such that X * Y is
3326 congruent to 1 (mod 2**N). */
3327
3328 static unsigned HOST_WIDE_INT
3329 invert_mod2n (unsigned HOST_WIDE_INT x, int n)
3330 {
3331 /* Solve x*y == 1 (mod 2^n), where x is odd. Return y. */
3332
3333 /* The algorithm notes that the choice y = x satisfies
3334 x*y == 1 mod 2^3, since x is assumed odd.
3335 Each iteration doubles the number of bits of significance in y. */
3336
3337 unsigned HOST_WIDE_INT mask;
3338 unsigned HOST_WIDE_INT y = x;
3339 int nbit = 3;
3340
3341 mask = (n == HOST_BITS_PER_WIDE_INT
3342 ? ~(unsigned HOST_WIDE_INT) 0
3343 : ((unsigned HOST_WIDE_INT) 1 << n) - 1);
3344
3345 while (nbit < n)
3346 {
3347 y = y * (2 - x*y) & mask; /* Modulo 2^N */
3348 nbit *= 2;
3349 }
3350 return y;
3351 }
3352
3353 /* Emit code to adjust ADJ_OPERAND after multiplication of wrong signedness
3354 flavor of OP0 and OP1. ADJ_OPERAND is already the high half of the
3355 product OP0 x OP1. If UNSIGNEDP is nonzero, adjust the signed product
3356 to become unsigned, if UNSIGNEDP is zero, adjust the unsigned product to
3357 become signed.
3358
3359 The result is put in TARGET if that is convenient.
3360
3361 MODE is the mode of operation. */
3362
3363 rtx
3364 expand_mult_highpart_adjust (enum machine_mode mode, rtx adj_operand, rtx op0,
3365 rtx op1, rtx target, int unsignedp)
3366 {
3367 rtx tem;
3368 enum rtx_code adj_code = unsignedp ? PLUS : MINUS;
3369
3370 tem = expand_shift (RSHIFT_EXPR, mode, op0,
3371 GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3372 tem = expand_and (mode, tem, op1, NULL_RTX);
3373 adj_operand
3374 = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3375 adj_operand);
3376
3377 tem = expand_shift (RSHIFT_EXPR, mode, op1,
3378 GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3379 tem = expand_and (mode, tem, op0, NULL_RTX);
3380 target = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3381 target);
3382
3383 return target;
3384 }
3385
3386 /* Subroutine of expmed_mult_highpart. Return the MODE high part of OP. */
3387
3388 static rtx
3389 extract_high_half (enum machine_mode mode, rtx op)
3390 {
3391 enum machine_mode wider_mode;
3392
3393 if (mode == word_mode)
3394 return gen_highpart (mode, op);
3395
3396 gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3397
3398 wider_mode = GET_MODE_WIDER_MODE (mode);
3399 op = expand_shift (RSHIFT_EXPR, wider_mode, op,
3400 GET_MODE_BITSIZE (mode), 0, 1);
3401 return convert_modes (mode, wider_mode, op, 0);
3402 }
3403
3404 /* Like expmed_mult_highpart, but only consider using a multiplication
3405 optab. OP1 is an rtx for the constant operand. */
3406
3407 static rtx
3408 expmed_mult_highpart_optab (enum machine_mode mode, rtx op0, rtx op1,
3409 rtx target, int unsignedp, int max_cost)
3410 {
3411 rtx narrow_op1 = gen_int_mode (INTVAL (op1), mode);
3412 enum machine_mode wider_mode;
3413 optab moptab;
3414 rtx tem;
3415 int size;
3416 bool speed = optimize_insn_for_speed_p ();
3417
3418 gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3419
3420 wider_mode = GET_MODE_WIDER_MODE (mode);
3421 size = GET_MODE_BITSIZE (mode);
3422
3423 /* Firstly, try using a multiplication insn that only generates the needed
3424 high part of the product, and in the sign flavor of unsignedp. */
3425 if (mul_highpart_cost (speed, mode) < max_cost)
3426 {
3427 moptab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
3428 tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3429 unsignedp, OPTAB_DIRECT);
3430 if (tem)
3431 return tem;
3432 }
3433
3434 /* Secondly, same as above, but use sign flavor opposite of unsignedp.
3435 Need to adjust the result after the multiplication. */
3436 if (size - 1 < BITS_PER_WORD
3437 && (mul_highpart_cost (speed, mode)
3438 + 2 * shift_cost (speed, mode, size-1)
3439 + 4 * add_cost (speed, mode) < max_cost))
3440 {
3441 moptab = unsignedp ? smul_highpart_optab : umul_highpart_optab;
3442 tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3443 unsignedp, OPTAB_DIRECT);
3444 if (tem)
3445 /* We used the wrong signedness. Adjust the result. */
3446 return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3447 tem, unsignedp);
3448 }
3449
3450 /* Try widening multiplication. */
3451 moptab = unsignedp ? umul_widen_optab : smul_widen_optab;
3452 if (widening_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3453 && mul_widen_cost (speed, wider_mode) < max_cost)
3454 {
3455 tem = expand_binop (wider_mode, moptab, op0, narrow_op1, 0,
3456 unsignedp, OPTAB_WIDEN);
3457 if (tem)
3458 return extract_high_half (mode, tem);
3459 }
3460
3461 /* Try widening the mode and perform a non-widening multiplication. */
3462 if (optab_handler (smul_optab, wider_mode) != CODE_FOR_nothing
3463 && size - 1 < BITS_PER_WORD
3464 && (mul_cost (speed, wider_mode) + shift_cost (speed, mode, size-1)
3465 < max_cost))
3466 {
3467 rtx insns, wop0, wop1;
3468
3469 /* We need to widen the operands, for example to ensure the
3470 constant multiplier is correctly sign or zero extended.
3471 Use a sequence to clean-up any instructions emitted by
3472 the conversions if things don't work out. */
3473 start_sequence ();
3474 wop0 = convert_modes (wider_mode, mode, op0, unsignedp);
3475 wop1 = convert_modes (wider_mode, mode, op1, unsignedp);
3476 tem = expand_binop (wider_mode, smul_optab, wop0, wop1, 0,
3477 unsignedp, OPTAB_WIDEN);
3478 insns = get_insns ();
3479 end_sequence ();
3480
3481 if (tem)
3482 {
3483 emit_insn (insns);
3484 return extract_high_half (mode, tem);
3485 }
3486 }
3487
3488 /* Try widening multiplication of opposite signedness, and adjust. */
3489 moptab = unsignedp ? smul_widen_optab : umul_widen_optab;
3490 if (widening_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3491 && size - 1 < BITS_PER_WORD
3492 && (mul_widen_cost (speed, wider_mode)
3493 + 2 * shift_cost (speed, mode, size-1)
3494 + 4 * add_cost (speed, mode) < max_cost))
3495 {
3496 tem = expand_binop (wider_mode, moptab, op0, narrow_op1,
3497 NULL_RTX, ! unsignedp, OPTAB_WIDEN);
3498 if (tem != 0)
3499 {
3500 tem = extract_high_half (mode, tem);
3501 /* We used the wrong signedness. Adjust the result. */
3502 return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3503 target, unsignedp);
3504 }
3505 }
3506
3507 return 0;
3508 }
3509
3510 /* Emit code to multiply OP0 and OP1 (where OP1 is an integer constant),
3511 putting the high half of the result in TARGET if that is convenient,
3512 and return where the result is. If the operation can not be performed,
3513 0 is returned.
3514
3515 MODE is the mode of operation and result.
3516
3517 UNSIGNEDP nonzero means unsigned multiply.
3518
3519 MAX_COST is the total allowed cost for the expanded RTL. */
3520
3521 static rtx
3522 expmed_mult_highpart (enum machine_mode mode, rtx op0, rtx op1,
3523 rtx target, int unsignedp, int max_cost)
3524 {
3525 enum machine_mode wider_mode = GET_MODE_WIDER_MODE (mode);
3526 unsigned HOST_WIDE_INT cnst1;
3527 int extra_cost;
3528 bool sign_adjust = false;
3529 enum mult_variant variant;
3530 struct algorithm alg;
3531 rtx tem;
3532 bool speed = optimize_insn_for_speed_p ();
3533
3534 gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3535 /* We can't support modes wider than HOST_BITS_PER_INT. */
3536 gcc_assert (HWI_COMPUTABLE_MODE_P (mode));
3537
3538 cnst1 = INTVAL (op1) & GET_MODE_MASK (mode);
3539
3540 /* We can't optimize modes wider than BITS_PER_WORD.
3541 ??? We might be able to perform double-word arithmetic if
3542 mode == word_mode, however all the cost calculations in
3543 synth_mult etc. assume single-word operations. */
3544 if (GET_MODE_BITSIZE (wider_mode) > BITS_PER_WORD)
3545 return expmed_mult_highpart_optab (mode, op0, op1, target,
3546 unsignedp, max_cost);
3547
3548 extra_cost = shift_cost (speed, mode, GET_MODE_BITSIZE (mode) - 1);
3549
3550 /* Check whether we try to multiply by a negative constant. */
3551 if (!unsignedp && ((cnst1 >> (GET_MODE_BITSIZE (mode) - 1)) & 1))
3552 {
3553 sign_adjust = true;
3554 extra_cost += add_cost (speed, mode);
3555 }
3556
3557 /* See whether shift/add multiplication is cheap enough. */
3558 if (choose_mult_variant (wider_mode, cnst1, &alg, &variant,
3559 max_cost - extra_cost))
3560 {
3561 /* See whether the specialized multiplication optabs are
3562 cheaper than the shift/add version. */
3563 tem = expmed_mult_highpart_optab (mode, op0, op1, target, unsignedp,
3564 alg.cost.cost + extra_cost);
3565 if (tem)
3566 return tem;
3567
3568 tem = convert_to_mode (wider_mode, op0, unsignedp);
3569 tem = expand_mult_const (wider_mode, tem, cnst1, 0, &alg, variant);
3570 tem = extract_high_half (mode, tem);
3571
3572 /* Adjust result for signedness. */
3573 if (sign_adjust)
3574 tem = force_operand (gen_rtx_MINUS (mode, tem, op0), tem);
3575
3576 return tem;
3577 }
3578 return expmed_mult_highpart_optab (mode, op0, op1, target,
3579 unsignedp, max_cost);
3580 }
3581
3582
3583 /* Expand signed modulus of OP0 by a power of two D in mode MODE. */
3584
3585 static rtx
3586 expand_smod_pow2 (enum machine_mode mode, rtx op0, HOST_WIDE_INT d)
3587 {
3588 unsigned HOST_WIDE_INT masklow, maskhigh;
3589 rtx result, temp, shift, label;
3590 int logd;
3591
3592 logd = floor_log2 (d);
3593 result = gen_reg_rtx (mode);
3594
3595 /* Avoid conditional branches when they're expensive. */
3596 if (BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2
3597 && optimize_insn_for_speed_p ())
3598 {
3599 rtx signmask = emit_store_flag (result, LT, op0, const0_rtx,
3600 mode, 0, -1);
3601 if (signmask)
3602 {
3603 signmask = force_reg (mode, signmask);
3604 masklow = ((HOST_WIDE_INT) 1 << logd) - 1;
3605 shift = GEN_INT (GET_MODE_BITSIZE (mode) - logd);
3606
3607 /* Use the rtx_cost of a LSHIFTRT instruction to determine
3608 which instruction sequence to use. If logical right shifts
3609 are expensive the use 2 XORs, 2 SUBs and an AND, otherwise
3610 use a LSHIFTRT, 1 ADD, 1 SUB and an AND. */
3611
3612 temp = gen_rtx_LSHIFTRT (mode, result, shift);
3613 if (optab_handler (lshr_optab, mode) == CODE_FOR_nothing
3614 || (set_src_cost (temp, optimize_insn_for_speed_p ())
3615 > COSTS_N_INSNS (2)))
3616 {
3617 temp = expand_binop (mode, xor_optab, op0, signmask,
3618 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3619 temp = expand_binop (mode, sub_optab, temp, signmask,
3620 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3621 temp = expand_binop (mode, and_optab, temp, GEN_INT (masklow),
3622 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3623 temp = expand_binop (mode, xor_optab, temp, signmask,
3624 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3625 temp = expand_binop (mode, sub_optab, temp, signmask,
3626 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3627 }
3628 else
3629 {
3630 signmask = expand_binop (mode, lshr_optab, signmask, shift,
3631 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3632 signmask = force_reg (mode, signmask);
3633
3634 temp = expand_binop (mode, add_optab, op0, signmask,
3635 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3636 temp = expand_binop (mode, and_optab, temp, GEN_INT (masklow),
3637 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3638 temp = expand_binop (mode, sub_optab, temp, signmask,
3639 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3640 }
3641 return temp;
3642 }
3643 }
3644
3645 /* Mask contains the mode's signbit and the significant bits of the
3646 modulus. By including the signbit in the operation, many targets
3647 can avoid an explicit compare operation in the following comparison
3648 against zero. */
3649
3650 masklow = ((HOST_WIDE_INT) 1 << logd) - 1;
3651 if (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
3652 {
3653 masklow |= (HOST_WIDE_INT) -1 << (GET_MODE_BITSIZE (mode) - 1);
3654 maskhigh = -1;
3655 }
3656 else
3657 maskhigh = (HOST_WIDE_INT) -1
3658 << (GET_MODE_BITSIZE (mode) - HOST_BITS_PER_WIDE_INT - 1);
3659
3660 temp = expand_binop (mode, and_optab, op0,
3661 immed_double_const (masklow, maskhigh, mode),
3662 result, 1, OPTAB_LIB_WIDEN);
3663 if (temp != result)
3664 emit_move_insn (result, temp);
3665
3666 label = gen_label_rtx ();
3667 do_cmp_and_jump (result, const0_rtx, GE, mode, label);
3668
3669 temp = expand_binop (mode, sub_optab, result, const1_rtx, result,
3670 0, OPTAB_LIB_WIDEN);
3671 masklow = (HOST_WIDE_INT) -1 << logd;
3672 maskhigh = -1;
3673 temp = expand_binop (mode, ior_optab, temp,
3674 immed_double_const (masklow, maskhigh, mode),
3675 result, 1, OPTAB_LIB_WIDEN);
3676 temp = expand_binop (mode, add_optab, temp, const1_rtx, result,
3677 0, OPTAB_LIB_WIDEN);
3678 if (temp != result)
3679 emit_move_insn (result, temp);
3680 emit_label (label);
3681 return result;
3682 }
3683
3684 /* Expand signed division of OP0 by a power of two D in mode MODE.
3685 This routine is only called for positive values of D. */
3686
3687 static rtx
3688 expand_sdiv_pow2 (enum machine_mode mode, rtx op0, HOST_WIDE_INT d)
3689 {
3690 rtx temp, label;
3691 int logd;
3692
3693 logd = floor_log2 (d);
3694
3695 if (d == 2
3696 && BRANCH_COST (optimize_insn_for_speed_p (),
3697 false) >= 1)
3698 {
3699 temp = gen_reg_rtx (mode);
3700 temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, 1);
3701 temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3702 0, OPTAB_LIB_WIDEN);
3703 return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3704 }
3705
3706 #ifdef HAVE_conditional_move
3707 if (BRANCH_COST (optimize_insn_for_speed_p (), false)
3708 >= 2)
3709 {
3710 rtx temp2;
3711
3712 /* ??? emit_conditional_move forces a stack adjustment via
3713 compare_from_rtx so, if the sequence is discarded, it will
3714 be lost. Do it now instead. */
3715 do_pending_stack_adjust ();
3716
3717 start_sequence ();
3718 temp2 = copy_to_mode_reg (mode, op0);
3719 temp = expand_binop (mode, add_optab, temp2, GEN_INT (d-1),
3720 NULL_RTX, 0, OPTAB_LIB_WIDEN);
3721 temp = force_reg (mode, temp);
3722
3723 /* Construct "temp2 = (temp2 < 0) ? temp : temp2". */
3724 temp2 = emit_conditional_move (temp2, LT, temp2, const0_rtx,
3725 mode, temp, temp2, mode, 0);
3726 if (temp2)
3727 {
3728 rtx seq = get_insns ();
3729 end_sequence ();
3730 emit_insn (seq);
3731 return expand_shift (RSHIFT_EXPR, mode, temp2, logd, NULL_RTX, 0);
3732 }
3733 end_sequence ();
3734 }
3735 #endif
3736
3737 if (BRANCH_COST (optimize_insn_for_speed_p (),
3738 false) >= 2)
3739 {
3740 int ushift = GET_MODE_BITSIZE (mode) - logd;
3741
3742 temp = gen_reg_rtx (mode);
3743 temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, -1);
3744 if (shift_cost (optimize_insn_for_speed_p (), mode, ushift)
3745 > COSTS_N_INSNS (1))
3746 temp = expand_binop (mode, and_optab, temp, GEN_INT (d - 1),
3747 NULL_RTX, 0, OPTAB_LIB_WIDEN);
3748 else
3749 temp = expand_shift (RSHIFT_EXPR, mode, temp,
3750 ushift, NULL_RTX, 1);
3751 temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3752 0, OPTAB_LIB_WIDEN);
3753 return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3754 }
3755
3756 label = gen_label_rtx ();
3757 temp = copy_to_mode_reg (mode, op0);
3758 do_cmp_and_jump (temp, const0_rtx, GE, mode, label);
3759 expand_inc (temp, GEN_INT (d - 1));
3760 emit_label (label);
3761 return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3762 }
3763 \f
3764 /* Emit the code to divide OP0 by OP1, putting the result in TARGET
3765 if that is convenient, and returning where the result is.
3766 You may request either the quotient or the remainder as the result;
3767 specify REM_FLAG nonzero to get the remainder.
3768
3769 CODE is the expression code for which kind of division this is;
3770 it controls how rounding is done. MODE is the machine mode to use.
3771 UNSIGNEDP nonzero means do unsigned division. */
3772
3773 /* ??? For CEIL_MOD_EXPR, can compute incorrect remainder with ANDI
3774 and then correct it by or'ing in missing high bits
3775 if result of ANDI is nonzero.
3776 For ROUND_MOD_EXPR, can use ANDI and then sign-extend the result.
3777 This could optimize to a bfexts instruction.
3778 But C doesn't use these operations, so their optimizations are
3779 left for later. */
3780 /* ??? For modulo, we don't actually need the highpart of the first product,
3781 the low part will do nicely. And for small divisors, the second multiply
3782 can also be a low-part only multiply or even be completely left out.
3783 E.g. to calculate the remainder of a division by 3 with a 32 bit
3784 multiply, multiply with 0x55555556 and extract the upper two bits;
3785 the result is exact for inputs up to 0x1fffffff.
3786 The input range can be reduced by using cross-sum rules.
3787 For odd divisors >= 3, the following table gives right shift counts
3788 so that if a number is shifted by an integer multiple of the given
3789 amount, the remainder stays the same:
3790 2, 4, 3, 6, 10, 12, 4, 8, 18, 6, 11, 20, 18, 0, 5, 10, 12, 0, 12, 20,
3791 14, 12, 23, 21, 8, 0, 20, 18, 0, 0, 6, 12, 0, 22, 0, 18, 20, 30, 0, 0,
3792 0, 8, 0, 11, 12, 10, 36, 0, 30, 0, 0, 12, 0, 0, 0, 0, 44, 12, 24, 0,
3793 20, 0, 7, 14, 0, 18, 36, 0, 0, 46, 60, 0, 42, 0, 15, 24, 20, 0, 0, 33,
3794 0, 20, 0, 0, 18, 0, 60, 0, 0, 0, 0, 0, 40, 18, 0, 0, 12
3795
3796 Cross-sum rules for even numbers can be derived by leaving as many bits
3797 to the right alone as the divisor has zeros to the right.
3798 E.g. if x is an unsigned 32 bit number:
3799 (x mod 12) == (((x & 1023) + ((x >> 8) & ~3)) * 0x15555558 >> 2 * 3) >> 28
3800 */
3801
3802 rtx
3803 expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode,
3804 rtx op0, rtx op1, rtx target, int unsignedp)
3805 {
3806 enum machine_mode compute_mode;
3807 rtx tquotient;
3808 rtx quotient = 0, remainder = 0;
3809 rtx last;
3810 int size;
3811 rtx insn;
3812 optab optab1, optab2;
3813 int op1_is_constant, op1_is_pow2 = 0;
3814 int max_cost, extra_cost;
3815 static HOST_WIDE_INT last_div_const = 0;
3816 static HOST_WIDE_INT ext_op1;
3817 bool speed = optimize_insn_for_speed_p ();
3818
3819 op1_is_constant = CONST_INT_P (op1);
3820 if (op1_is_constant)
3821 {
3822 ext_op1 = INTVAL (op1);
3823 if (unsignedp)
3824 ext_op1 &= GET_MODE_MASK (mode);
3825 op1_is_pow2 = ((EXACT_POWER_OF_2_OR_ZERO_P (ext_op1)
3826 || (! unsignedp && EXACT_POWER_OF_2_OR_ZERO_P (-ext_op1))));
3827 }
3828
3829 /*
3830 This is the structure of expand_divmod:
3831
3832 First comes code to fix up the operands so we can perform the operations
3833 correctly and efficiently.
3834
3835 Second comes a switch statement with code specific for each rounding mode.
3836 For some special operands this code emits all RTL for the desired
3837 operation, for other cases, it generates only a quotient and stores it in
3838 QUOTIENT. The case for trunc division/remainder might leave quotient = 0,
3839 to indicate that it has not done anything.
3840
3841 Last comes code that finishes the operation. If QUOTIENT is set and
3842 REM_FLAG is set, the remainder is computed as OP0 - QUOTIENT * OP1. If
3843 QUOTIENT is not set, it is computed using trunc rounding.
3844
3845 We try to generate special code for division and remainder when OP1 is a
3846 constant. If |OP1| = 2**n we can use shifts and some other fast
3847 operations. For other values of OP1, we compute a carefully selected
3848 fixed-point approximation m = 1/OP1, and generate code that multiplies OP0
3849 by m.
3850
3851 In all cases but EXACT_DIV_EXPR, this multiplication requires the upper
3852 half of the product. Different strategies for generating the product are
3853 implemented in expmed_mult_highpart.
3854
3855 If what we actually want is the remainder, we generate that by another
3856 by-constant multiplication and a subtraction. */
3857
3858 /* We shouldn't be called with OP1 == const1_rtx, but some of the
3859 code below will malfunction if we are, so check here and handle
3860 the special case if so. */
3861 if (op1 == const1_rtx)
3862 return rem_flag ? const0_rtx : op0;
3863
3864 /* When dividing by -1, we could get an overflow.
3865 negv_optab can handle overflows. */
3866 if (! unsignedp && op1 == constm1_rtx)
3867 {
3868 if (rem_flag)
3869 return const0_rtx;
3870 return expand_unop (mode, flag_trapv && GET_MODE_CLASS(mode) == MODE_INT
3871 ? negv_optab : neg_optab, op0, target, 0);
3872 }
3873
3874 if (target
3875 /* Don't use the function value register as a target
3876 since we have to read it as well as write it,
3877 and function-inlining gets confused by this. */
3878 && ((REG_P (target) && REG_FUNCTION_VALUE_P (target))
3879 /* Don't clobber an operand while doing a multi-step calculation. */
3880 || ((rem_flag || op1_is_constant)
3881 && (reg_mentioned_p (target, op0)
3882 || (MEM_P (op0) && MEM_P (target))))
3883 || reg_mentioned_p (target, op1)
3884 || (MEM_P (op1) && MEM_P (target))))
3885 target = 0;
3886
3887 /* Get the mode in which to perform this computation. Normally it will
3888 be MODE, but sometimes we can't do the desired operation in MODE.
3889 If so, pick a wider mode in which we can do the operation. Convert
3890 to that mode at the start to avoid repeated conversions.
3891
3892 First see what operations we need. These depend on the expression
3893 we are evaluating. (We assume that divxx3 insns exist under the
3894 same conditions that modxx3 insns and that these insns don't normally
3895 fail. If these assumptions are not correct, we may generate less
3896 efficient code in some cases.)
3897
3898 Then see if we find a mode in which we can open-code that operation
3899 (either a division, modulus, or shift). Finally, check for the smallest
3900 mode for which we can do the operation with a library call. */
3901
3902 /* We might want to refine this now that we have division-by-constant
3903 optimization. Since expmed_mult_highpart tries so many variants, it is
3904 not straightforward to generalize this. Maybe we should make an array
3905 of possible modes in init_expmed? Save this for GCC 2.7. */
3906
3907 optab1 = ((op1_is_pow2 && op1 != const0_rtx)
3908 ? (unsignedp ? lshr_optab : ashr_optab)
3909 : (unsignedp ? udiv_optab : sdiv_optab));
3910 optab2 = ((op1_is_pow2 && op1 != const0_rtx)
3911 ? optab1
3912 : (unsignedp ? udivmod_optab : sdivmod_optab));
3913
3914 for (compute_mode = mode; compute_mode != VOIDmode;
3915 compute_mode = GET_MODE_WIDER_MODE (compute_mode))
3916 if (optab_handler (optab1, compute_mode) != CODE_FOR_nothing
3917 || optab_handler (optab2, compute_mode) != CODE_FOR_nothing)
3918 break;
3919
3920 if (compute_mode == VOIDmode)
3921 for (compute_mode = mode; compute_mode != VOIDmode;
3922 compute_mode = GET_MODE_WIDER_MODE (compute_mode))
3923 if (optab_libfunc (optab1, compute_mode)
3924 || optab_libfunc (optab2, compute_mode))
3925 break;
3926
3927 /* If we still couldn't find a mode, use MODE, but expand_binop will
3928 probably die. */
3929 if (compute_mode == VOIDmode)
3930 compute_mode = mode;
3931
3932 if (target && GET_MODE (target) == compute_mode)
3933 tquotient = target;
3934 else
3935 tquotient = gen_reg_rtx (compute_mode);
3936
3937 size = GET_MODE_BITSIZE (compute_mode);
3938 #if 0
3939 /* It should be possible to restrict the precision to GET_MODE_BITSIZE
3940 (mode), and thereby get better code when OP1 is a constant. Do that
3941 later. It will require going over all usages of SIZE below. */
3942 size = GET_MODE_BITSIZE (mode);
3943 #endif
3944
3945 /* Only deduct something for a REM if the last divide done was
3946 for a different constant. Then set the constant of the last
3947 divide. */
3948 max_cost = (unsignedp
3949 ? udiv_cost (speed, compute_mode)
3950 : sdiv_cost (speed, compute_mode));
3951 if (rem_flag && ! (last_div_const != 0 && op1_is_constant
3952 && INTVAL (op1) == last_div_const))
3953 max_cost -= (mul_cost (speed, compute_mode)
3954 + add_cost (speed, compute_mode));
3955
3956 last_div_const = ! rem_flag && op1_is_constant ? INTVAL (op1) : 0;
3957
3958 /* Now convert to the best mode to use. */
3959 if (compute_mode != mode)
3960 {
3961 op0 = convert_modes (compute_mode, mode, op0, unsignedp);
3962 op1 = convert_modes (compute_mode, mode, op1, unsignedp);
3963
3964 /* convert_modes may have placed op1 into a register, so we
3965 must recompute the following. */
3966 op1_is_constant = CONST_INT_P (op1);
3967 op1_is_pow2 = (op1_is_constant
3968 && ((EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
3969 || (! unsignedp
3970 && EXACT_POWER_OF_2_OR_ZERO_P (-INTVAL (op1)))))) ;
3971 }
3972
3973 /* If one of the operands is a volatile MEM, copy it into a register. */
3974
3975 if (MEM_P (op0) && MEM_VOLATILE_P (op0))
3976 op0 = force_reg (compute_mode, op0);
3977 if (MEM_P (op1) && MEM_VOLATILE_P (op1))
3978 op1 = force_reg (compute_mode, op1);
3979
3980 /* If we need the remainder or if OP1 is constant, we need to
3981 put OP0 in a register in case it has any queued subexpressions. */
3982 if (rem_flag || op1_is_constant)
3983 op0 = force_reg (compute_mode, op0);
3984
3985 last = get_last_insn ();
3986
3987 /* Promote floor rounding to trunc rounding for unsigned operations. */
3988 if (unsignedp)
3989 {
3990 if (code == FLOOR_DIV_EXPR)
3991 code = TRUNC_DIV_EXPR;
3992 if (code == FLOOR_MOD_EXPR)
3993 code = TRUNC_MOD_EXPR;
3994 if (code == EXACT_DIV_EXPR && op1_is_pow2)
3995 code = TRUNC_DIV_EXPR;
3996 }
3997
3998 if (op1 != const0_rtx)
3999 switch (code)
4000 {
4001 case TRUNC_MOD_EXPR:
4002 case TRUNC_DIV_EXPR:
4003 if (op1_is_constant)
4004 {
4005 if (unsignedp)
4006 {
4007 unsigned HOST_WIDE_INT mh, ml;
4008 int pre_shift, post_shift;
4009 int dummy;
4010 unsigned HOST_WIDE_INT d = (INTVAL (op1)
4011 & GET_MODE_MASK (compute_mode));
4012
4013 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4014 {
4015 pre_shift = floor_log2 (d);
4016 if (rem_flag)
4017 {
4018 remainder
4019 = expand_binop (compute_mode, and_optab, op0,
4020 GEN_INT (((HOST_WIDE_INT) 1 << pre_shift) - 1),
4021 remainder, 1,
4022 OPTAB_LIB_WIDEN);
4023 if (remainder)
4024 return gen_lowpart (mode, remainder);
4025 }
4026 quotient = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4027 pre_shift, tquotient, 1);
4028 }
4029 else if (size <= HOST_BITS_PER_WIDE_INT)
4030 {
4031 if (d >= ((unsigned HOST_WIDE_INT) 1 << (size - 1)))
4032 {
4033 /* Most significant bit of divisor is set; emit an scc
4034 insn. */
4035 quotient = emit_store_flag_force (tquotient, GEU, op0, op1,
4036 compute_mode, 1, 1);
4037 }
4038 else
4039 {
4040 /* Find a suitable multiplier and right shift count
4041 instead of multiplying with D. */
4042
4043 mh = choose_multiplier (d, size, size,
4044 &ml, &post_shift, &dummy);
4045
4046 /* If the suggested multiplier is more than SIZE bits,
4047 we can do better for even divisors, using an
4048 initial right shift. */
4049 if (mh != 0 && (d & 1) == 0)
4050 {
4051 pre_shift = floor_log2 (d & -d);
4052 mh = choose_multiplier (d >> pre_shift, size,
4053 size - pre_shift,
4054 &ml, &post_shift, &dummy);
4055 gcc_assert (!mh);
4056 }
4057 else
4058 pre_shift = 0;
4059
4060 if (mh != 0)
4061 {
4062 rtx t1, t2, t3, t4;
4063
4064 if (post_shift - 1 >= BITS_PER_WORD)
4065 goto fail1;
4066
4067 extra_cost
4068 = (shift_cost (speed, compute_mode, post_shift - 1)
4069 + shift_cost (speed, compute_mode, 1)
4070 + 2 * add_cost (speed, compute_mode));
4071 t1 = expmed_mult_highpart (compute_mode, op0,
4072 GEN_INT (ml),
4073 NULL_RTX, 1,
4074 max_cost - extra_cost);
4075 if (t1 == 0)
4076 goto fail1;
4077 t2 = force_operand (gen_rtx_MINUS (compute_mode,
4078 op0, t1),
4079 NULL_RTX);
4080 t3 = expand_shift (RSHIFT_EXPR, compute_mode,
4081 t2, 1, NULL_RTX, 1);
4082 t4 = force_operand (gen_rtx_PLUS (compute_mode,
4083 t1, t3),
4084 NULL_RTX);
4085 quotient = expand_shift
4086 (RSHIFT_EXPR, compute_mode, t4,
4087 post_shift - 1, tquotient, 1);
4088 }
4089 else
4090 {
4091 rtx t1, t2;
4092
4093 if (pre_shift >= BITS_PER_WORD
4094 || post_shift >= BITS_PER_WORD)
4095 goto fail1;
4096
4097 t1 = expand_shift
4098 (RSHIFT_EXPR, compute_mode, op0,
4099 pre_shift, NULL_RTX, 1);
4100 extra_cost
4101 = (shift_cost (speed, compute_mode, pre_shift)
4102 + shift_cost (speed, compute_mode, post_shift));
4103 t2 = expmed_mult_highpart (compute_mode, t1,
4104 GEN_INT (ml),
4105 NULL_RTX, 1,
4106 max_cost - extra_cost);
4107 if (t2 == 0)
4108 goto fail1;
4109 quotient = expand_shift
4110 (RSHIFT_EXPR, compute_mode, t2,
4111 post_shift, tquotient, 1);
4112 }
4113 }
4114 }
4115 else /* Too wide mode to use tricky code */
4116 break;
4117
4118 insn = get_last_insn ();
4119 if (insn != last)
4120 set_dst_reg_note (insn, REG_EQUAL,
4121 gen_rtx_UDIV (compute_mode, op0, op1),
4122 quotient);
4123 }
4124 else /* TRUNC_DIV, signed */
4125 {
4126 unsigned HOST_WIDE_INT ml;
4127 int lgup, post_shift;
4128 rtx mlr;
4129 HOST_WIDE_INT d = INTVAL (op1);
4130 unsigned HOST_WIDE_INT abs_d;
4131
4132 /* Since d might be INT_MIN, we have to cast to
4133 unsigned HOST_WIDE_INT before negating to avoid
4134 undefined signed overflow. */
4135 abs_d = (d >= 0
4136 ? (unsigned HOST_WIDE_INT) d
4137 : - (unsigned HOST_WIDE_INT) d);
4138
4139 /* n rem d = n rem -d */
4140 if (rem_flag && d < 0)
4141 {
4142 d = abs_d;
4143 op1 = gen_int_mode (abs_d, compute_mode);
4144 }
4145
4146 if (d == 1)
4147 quotient = op0;
4148 else if (d == -1)
4149 quotient = expand_unop (compute_mode, neg_optab, op0,
4150 tquotient, 0);
4151 else if (HOST_BITS_PER_WIDE_INT >= size
4152 && abs_d == (unsigned HOST_WIDE_INT) 1 << (size - 1))
4153 {
4154 /* This case is not handled correctly below. */
4155 quotient = emit_store_flag (tquotient, EQ, op0, op1,
4156 compute_mode, 1, 1);
4157 if (quotient == 0)
4158 goto fail1;
4159 }
4160 else if (EXACT_POWER_OF_2_OR_ZERO_P (d)
4161 && (rem_flag
4162 ? smod_pow2_cheap (speed, compute_mode)
4163 : sdiv_pow2_cheap (speed, compute_mode))
4164 /* We assume that cheap metric is true if the
4165 optab has an expander for this mode. */
4166 && ((optab_handler ((rem_flag ? smod_optab
4167 : sdiv_optab),
4168 compute_mode)
4169 != CODE_FOR_nothing)
4170 || (optab_handler (sdivmod_optab,
4171 compute_mode)
4172 != CODE_FOR_nothing)))
4173 ;
4174 else if (EXACT_POWER_OF_2_OR_ZERO_P (abs_d))
4175 {
4176 if (rem_flag)
4177 {
4178 remainder = expand_smod_pow2 (compute_mode, op0, d);
4179 if (remainder)
4180 return gen_lowpart (mode, remainder);
4181 }
4182
4183 if (sdiv_pow2_cheap (speed, compute_mode)
4184 && ((optab_handler (sdiv_optab, compute_mode)
4185 != CODE_FOR_nothing)
4186 || (optab_handler (sdivmod_optab, compute_mode)
4187 != CODE_FOR_nothing)))
4188 quotient = expand_divmod (0, TRUNC_DIV_EXPR,
4189 compute_mode, op0,
4190 gen_int_mode (abs_d,
4191 compute_mode),
4192 NULL_RTX, 0);
4193 else
4194 quotient = expand_sdiv_pow2 (compute_mode, op0, abs_d);
4195
4196 /* We have computed OP0 / abs(OP1). If OP1 is negative,
4197 negate the quotient. */
4198 if (d < 0)
4199 {
4200 insn = get_last_insn ();
4201 if (insn != last
4202 && abs_d < ((unsigned HOST_WIDE_INT) 1
4203 << (HOST_BITS_PER_WIDE_INT - 1)))
4204 set_dst_reg_note (insn, REG_EQUAL,
4205 gen_rtx_DIV (compute_mode, op0,
4206 gen_int_mode
4207 (abs_d,
4208 compute_mode)),
4209 quotient);
4210
4211 quotient = expand_unop (compute_mode, neg_optab,
4212 quotient, quotient, 0);
4213 }
4214 }
4215 else if (size <= HOST_BITS_PER_WIDE_INT)
4216 {
4217 choose_multiplier (abs_d, size, size - 1,
4218 &ml, &post_shift, &lgup);
4219 if (ml < (unsigned HOST_WIDE_INT) 1 << (size - 1))
4220 {
4221 rtx t1, t2, t3;
4222
4223 if (post_shift >= BITS_PER_WORD
4224 || size - 1 >= BITS_PER_WORD)
4225 goto fail1;
4226
4227 extra_cost = (shift_cost (speed, compute_mode, post_shift)
4228 + shift_cost (speed, compute_mode, size - 1)
4229 + add_cost (speed, compute_mode));
4230 t1 = expmed_mult_highpart (compute_mode, op0,
4231 GEN_INT (ml), NULL_RTX, 0,
4232 max_cost - extra_cost);
4233 if (t1 == 0)
4234 goto fail1;
4235 t2 = expand_shift
4236 (RSHIFT_EXPR, compute_mode, t1,
4237 post_shift, NULL_RTX, 0);
4238 t3 = expand_shift
4239 (RSHIFT_EXPR, compute_mode, op0,
4240 size - 1, NULL_RTX, 0);
4241 if (d < 0)
4242 quotient
4243 = force_operand (gen_rtx_MINUS (compute_mode,
4244 t3, t2),
4245 tquotient);
4246 else
4247 quotient
4248 = force_operand (gen_rtx_MINUS (compute_mode,
4249 t2, t3),
4250 tquotient);
4251 }
4252 else
4253 {
4254 rtx t1, t2, t3, t4;
4255
4256 if (post_shift >= BITS_PER_WORD
4257 || size - 1 >= BITS_PER_WORD)
4258 goto fail1;
4259
4260 ml |= (~(unsigned HOST_WIDE_INT) 0) << (size - 1);
4261 mlr = gen_int_mode (ml, compute_mode);
4262 extra_cost = (shift_cost (speed, compute_mode, post_shift)
4263 + shift_cost (speed, compute_mode, size - 1)
4264 + 2 * add_cost (speed, compute_mode));
4265 t1 = expmed_mult_highpart (compute_mode, op0, mlr,
4266 NULL_RTX, 0,
4267 max_cost - extra_cost);
4268 if (t1 == 0)
4269 goto fail1;
4270 t2 = force_operand (gen_rtx_PLUS (compute_mode,
4271 t1, op0),
4272 NULL_RTX);
4273 t3 = expand_shift
4274 (RSHIFT_EXPR, compute_mode, t2,
4275 post_shift, NULL_RTX, 0);
4276 t4 = expand_shift
4277 (RSHIFT_EXPR, compute_mode, op0,
4278 size - 1, NULL_RTX, 0);
4279 if (d < 0)
4280 quotient
4281 = force_operand (gen_rtx_MINUS (compute_mode,
4282 t4, t3),
4283 tquotient);
4284 else
4285 quotient
4286 = force_operand (gen_rtx_MINUS (compute_mode,
4287 t3, t4),
4288 tquotient);
4289 }
4290 }
4291 else /* Too wide mode to use tricky code */
4292 break;
4293
4294 insn = get_last_insn ();
4295 if (insn != last)
4296 set_dst_reg_note (insn, REG_EQUAL,
4297 gen_rtx_DIV (compute_mode, op0, op1),
4298 quotient);
4299 }
4300 break;
4301 }
4302 fail1:
4303 delete_insns_since (last);
4304 break;
4305
4306 case FLOOR_DIV_EXPR:
4307 case FLOOR_MOD_EXPR:
4308 /* We will come here only for signed operations. */
4309 if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4310 {
4311 unsigned HOST_WIDE_INT mh, ml;
4312 int pre_shift, lgup, post_shift;
4313 HOST_WIDE_INT d = INTVAL (op1);
4314
4315 if (d > 0)
4316 {
4317 /* We could just as easily deal with negative constants here,
4318 but it does not seem worth the trouble for GCC 2.6. */
4319 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4320 {
4321 pre_shift = floor_log2 (d);
4322 if (rem_flag)
4323 {
4324 remainder = expand_binop (compute_mode, and_optab, op0,
4325 GEN_INT (((HOST_WIDE_INT) 1 << pre_shift) - 1),
4326 remainder, 0, OPTAB_LIB_WIDEN);
4327 if (remainder)
4328 return gen_lowpart (mode, remainder);
4329 }
4330 quotient = expand_shift
4331 (RSHIFT_EXPR, compute_mode, op0,
4332 pre_shift, tquotient, 0);
4333 }
4334 else
4335 {
4336 rtx t1, t2, t3, t4;
4337
4338 mh = choose_multiplier (d, size, size - 1,
4339 &ml, &post_shift, &lgup);
4340 gcc_assert (!mh);
4341
4342 if (post_shift < BITS_PER_WORD
4343 && size - 1 < BITS_PER_WORD)
4344 {
4345 t1 = expand_shift
4346 (RSHIFT_EXPR, compute_mode, op0,
4347 size - 1, NULL_RTX, 0);
4348 t2 = expand_binop (compute_mode, xor_optab, op0, t1,
4349 NULL_RTX, 0, OPTAB_WIDEN);
4350 extra_cost = (shift_cost (speed, compute_mode, post_shift)
4351 + shift_cost (speed, compute_mode, size - 1)
4352 + 2 * add_cost (speed, compute_mode));
4353 t3 = expmed_mult_highpart (compute_mode, t2,
4354 GEN_INT (ml), NULL_RTX, 1,
4355 max_cost - extra_cost);
4356 if (t3 != 0)
4357 {
4358 t4 = expand_shift
4359 (RSHIFT_EXPR, compute_mode, t3,
4360 post_shift, NULL_RTX, 1);
4361 quotient = expand_binop (compute_mode, xor_optab,
4362 t4, t1, tquotient, 0,
4363 OPTAB_WIDEN);
4364 }
4365 }
4366 }
4367 }
4368 else
4369 {
4370 rtx nsign, t1, t2, t3, t4;
4371 t1 = force_operand (gen_rtx_PLUS (compute_mode,
4372 op0, constm1_rtx), NULL_RTX);
4373 t2 = expand_binop (compute_mode, ior_optab, op0, t1, NULL_RTX,
4374 0, OPTAB_WIDEN);
4375 nsign = expand_shift
4376 (RSHIFT_EXPR, compute_mode, t2,
4377 size - 1, NULL_RTX, 0);
4378 t3 = force_operand (gen_rtx_MINUS (compute_mode, t1, nsign),
4379 NULL_RTX);
4380 t4 = expand_divmod (0, TRUNC_DIV_EXPR, compute_mode, t3, op1,
4381 NULL_RTX, 0);
4382 if (t4)
4383 {
4384 rtx t5;
4385 t5 = expand_unop (compute_mode, one_cmpl_optab, nsign,
4386 NULL_RTX, 0);
4387 quotient = force_operand (gen_rtx_PLUS (compute_mode,
4388 t4, t5),
4389 tquotient);
4390 }
4391 }
4392 }
4393
4394 if (quotient != 0)
4395 break;
4396 delete_insns_since (last);
4397
4398 /* Try using an instruction that produces both the quotient and
4399 remainder, using truncation. We can easily compensate the quotient
4400 or remainder to get floor rounding, once we have the remainder.
4401 Notice that we compute also the final remainder value here,
4402 and return the result right away. */
4403 if (target == 0 || GET_MODE (target) != compute_mode)
4404 target = gen_reg_rtx (compute_mode);
4405
4406 if (rem_flag)
4407 {
4408 remainder
4409 = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4410 quotient = gen_reg_rtx (compute_mode);
4411 }
4412 else
4413 {
4414 quotient
4415 = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4416 remainder = gen_reg_rtx (compute_mode);
4417 }
4418
4419 if (expand_twoval_binop (sdivmod_optab, op0, op1,
4420 quotient, remainder, 0))
4421 {
4422 /* This could be computed with a branch-less sequence.
4423 Save that for later. */
4424 rtx tem;
4425 rtx label = gen_label_rtx ();
4426 do_cmp_and_jump (remainder, const0_rtx, EQ, compute_mode, label);
4427 tem = expand_binop (compute_mode, xor_optab, op0, op1,
4428 NULL_RTX, 0, OPTAB_WIDEN);
4429 do_cmp_and_jump (tem, const0_rtx, GE, compute_mode, label);
4430 expand_dec (quotient, const1_rtx);
4431 expand_inc (remainder, op1);
4432 emit_label (label);
4433 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4434 }
4435
4436 /* No luck with division elimination or divmod. Have to do it
4437 by conditionally adjusting op0 *and* the result. */
4438 {
4439 rtx label1, label2, label3, label4, label5;
4440 rtx adjusted_op0;
4441 rtx tem;
4442
4443 quotient = gen_reg_rtx (compute_mode);
4444 adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4445 label1 = gen_label_rtx ();
4446 label2 = gen_label_rtx ();
4447 label3 = gen_label_rtx ();
4448 label4 = gen_label_rtx ();
4449 label5 = gen_label_rtx ();
4450 do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4451 do_cmp_and_jump (adjusted_op0, const0_rtx, LT, compute_mode, label1);
4452 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4453 quotient, 0, OPTAB_LIB_WIDEN);
4454 if (tem != quotient)
4455 emit_move_insn (quotient, tem);
4456 emit_jump_insn (gen_jump (label5));
4457 emit_barrier ();
4458 emit_label (label1);
4459 expand_inc (adjusted_op0, const1_rtx);
4460 emit_jump_insn (gen_jump (label4));
4461 emit_barrier ();
4462 emit_label (label2);
4463 do_cmp_and_jump (adjusted_op0, const0_rtx, GT, compute_mode, label3);
4464 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4465 quotient, 0, OPTAB_LIB_WIDEN);
4466 if (tem != quotient)
4467 emit_move_insn (quotient, tem);
4468 emit_jump_insn (gen_jump (label5));
4469 emit_barrier ();
4470 emit_label (label3);
4471 expand_dec (adjusted_op0, const1_rtx);
4472 emit_label (label4);
4473 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4474 quotient, 0, OPTAB_LIB_WIDEN);
4475 if (tem != quotient)
4476 emit_move_insn (quotient, tem);
4477 expand_dec (quotient, const1_rtx);
4478 emit_label (label5);
4479 }
4480 break;
4481
4482 case CEIL_DIV_EXPR:
4483 case CEIL_MOD_EXPR:
4484 if (unsignedp)
4485 {
4486 if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1)))
4487 {
4488 rtx t1, t2, t3;
4489 unsigned HOST_WIDE_INT d = INTVAL (op1);
4490 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4491 floor_log2 (d), tquotient, 1);
4492 t2 = expand_binop (compute_mode, and_optab, op0,
4493 GEN_INT (d - 1),
4494 NULL_RTX, 1, OPTAB_LIB_WIDEN);
4495 t3 = gen_reg_rtx (compute_mode);
4496 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4497 compute_mode, 1, 1);
4498 if (t3 == 0)
4499 {
4500 rtx lab;
4501 lab = gen_label_rtx ();
4502 do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4503 expand_inc (t1, const1_rtx);
4504 emit_label (lab);
4505 quotient = t1;
4506 }
4507 else
4508 quotient = force_operand (gen_rtx_PLUS (compute_mode,
4509 t1, t3),
4510 tquotient);
4511 break;
4512 }
4513
4514 /* Try using an instruction that produces both the quotient and
4515 remainder, using truncation. We can easily compensate the
4516 quotient or remainder to get ceiling rounding, once we have the
4517 remainder. Notice that we compute also the final remainder
4518 value here, and return the result right away. */
4519 if (target == 0 || GET_MODE (target) != compute_mode)
4520 target = gen_reg_rtx (compute_mode);
4521
4522 if (rem_flag)
4523 {
4524 remainder = (REG_P (target)
4525 ? target : gen_reg_rtx (compute_mode));
4526 quotient = gen_reg_rtx (compute_mode);
4527 }
4528 else
4529 {
4530 quotient = (REG_P (target)
4531 ? target : gen_reg_rtx (compute_mode));
4532 remainder = gen_reg_rtx (compute_mode);
4533 }
4534
4535 if (expand_twoval_binop (udivmod_optab, op0, op1, quotient,
4536 remainder, 1))
4537 {
4538 /* This could be computed with a branch-less sequence.
4539 Save that for later. */
4540 rtx label = gen_label_rtx ();
4541 do_cmp_and_jump (remainder, const0_rtx, EQ,
4542 compute_mode, label);
4543 expand_inc (quotient, const1_rtx);
4544 expand_dec (remainder, op1);
4545 emit_label (label);
4546 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4547 }
4548
4549 /* No luck with division elimination or divmod. Have to do it
4550 by conditionally adjusting op0 *and* the result. */
4551 {
4552 rtx label1, label2;
4553 rtx adjusted_op0, tem;
4554
4555 quotient = gen_reg_rtx (compute_mode);
4556 adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4557 label1 = gen_label_rtx ();
4558 label2 = gen_label_rtx ();
4559 do_cmp_and_jump (adjusted_op0, const0_rtx, NE,
4560 compute_mode, label1);
4561 emit_move_insn (quotient, const0_rtx);
4562 emit_jump_insn (gen_jump (label2));
4563 emit_barrier ();
4564 emit_label (label1);
4565 expand_dec (adjusted_op0, const1_rtx);
4566 tem = expand_binop (compute_mode, udiv_optab, adjusted_op0, op1,
4567 quotient, 1, OPTAB_LIB_WIDEN);
4568 if (tem != quotient)
4569 emit_move_insn (quotient, tem);
4570 expand_inc (quotient, const1_rtx);
4571 emit_label (label2);
4572 }
4573 }
4574 else /* signed */
4575 {
4576 if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4577 && INTVAL (op1) >= 0)
4578 {
4579 /* This is extremely similar to the code for the unsigned case
4580 above. For 2.7 we should merge these variants, but for
4581 2.6.1 I don't want to touch the code for unsigned since that
4582 get used in C. The signed case will only be used by other
4583 languages (Ada). */
4584
4585 rtx t1, t2, t3;
4586 unsigned HOST_WIDE_INT d = INTVAL (op1);
4587 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4588 floor_log2 (d), tquotient, 0);
4589 t2 = expand_binop (compute_mode, and_optab, op0,
4590 GEN_INT (d - 1),
4591 NULL_RTX, 1, OPTAB_LIB_WIDEN);
4592 t3 = gen_reg_rtx (compute_mode);
4593 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4594 compute_mode, 1, 1);
4595 if (t3 == 0)
4596 {
4597 rtx lab;
4598 lab = gen_label_rtx ();
4599 do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4600 expand_inc (t1, const1_rtx);
4601 emit_label (lab);
4602 quotient = t1;
4603 }
4604 else
4605 quotient = force_operand (gen_rtx_PLUS (compute_mode,
4606 t1, t3),
4607 tquotient);
4608 break;
4609 }
4610
4611 /* Try using an instruction that produces both the quotient and
4612 remainder, using truncation. We can easily compensate the
4613 quotient or remainder to get ceiling rounding, once we have the
4614 remainder. Notice that we compute also the final remainder
4615 value here, and return the result right away. */
4616 if (target == 0 || GET_MODE (target) != compute_mode)
4617 target = gen_reg_rtx (compute_mode);
4618 if (rem_flag)
4619 {
4620 remainder= (REG_P (target)
4621 ? target : gen_reg_rtx (compute_mode));
4622 quotient = gen_reg_rtx (compute_mode);
4623 }
4624 else
4625 {
4626 quotient = (REG_P (target)
4627 ? target : gen_reg_rtx (compute_mode));
4628 remainder = gen_reg_rtx (compute_mode);
4629 }
4630
4631 if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient,
4632 remainder, 0))
4633 {
4634 /* This could be computed with a branch-less sequence.
4635 Save that for later. */
4636 rtx tem;
4637 rtx label = gen_label_rtx ();
4638 do_cmp_and_jump (remainder, const0_rtx, EQ,
4639 compute_mode, label);
4640 tem = expand_binop (compute_mode, xor_optab, op0, op1,
4641 NULL_RTX, 0, OPTAB_WIDEN);
4642 do_cmp_and_jump (tem, const0_rtx, LT, compute_mode, label);
4643 expand_inc (quotient, const1_rtx);
4644 expand_dec (remainder, op1);
4645 emit_label (label);
4646 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4647 }
4648
4649 /* No luck with division elimination or divmod. Have to do it
4650 by conditionally adjusting op0 *and* the result. */
4651 {
4652 rtx label1, label2, label3, label4, label5;
4653 rtx adjusted_op0;
4654 rtx tem;
4655
4656 quotient = gen_reg_rtx (compute_mode);
4657 adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4658 label1 = gen_label_rtx ();
4659 label2 = gen_label_rtx ();
4660 label3 = gen_label_rtx ();
4661 label4 = gen_label_rtx ();
4662 label5 = gen_label_rtx ();
4663 do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4664 do_cmp_and_jump (adjusted_op0, const0_rtx, GT,
4665 compute_mode, label1);
4666 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4667 quotient, 0, OPTAB_LIB_WIDEN);
4668 if (tem != quotient)
4669 emit_move_insn (quotient, tem);
4670 emit_jump_insn (gen_jump (label5));
4671 emit_barrier ();
4672 emit_label (label1);
4673 expand_dec (adjusted_op0, const1_rtx);
4674 emit_jump_insn (gen_jump (label4));
4675 emit_barrier ();
4676 emit_label (label2);
4677 do_cmp_and_jump (adjusted_op0, const0_rtx, LT,
4678 compute_mode, label3);
4679 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4680 quotient, 0, OPTAB_LIB_WIDEN);
4681 if (tem != quotient)
4682 emit_move_insn (quotient, tem);
4683 emit_jump_insn (gen_jump (label5));
4684 emit_barrier ();
4685 emit_label (label3);
4686 expand_inc (adjusted_op0, const1_rtx);
4687 emit_label (label4);
4688 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4689 quotient, 0, OPTAB_LIB_WIDEN);
4690 if (tem != quotient)
4691 emit_move_insn (quotient, tem);
4692 expand_inc (quotient, const1_rtx);
4693 emit_label (label5);
4694 }
4695 }
4696 break;
4697
4698 case EXACT_DIV_EXPR:
4699 if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4700 {
4701 HOST_WIDE_INT d = INTVAL (op1);
4702 unsigned HOST_WIDE_INT ml;
4703 int pre_shift;
4704 rtx t1;
4705
4706 pre_shift = floor_log2 (d & -d);
4707 ml = invert_mod2n (d >> pre_shift, size);
4708 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4709 pre_shift, NULL_RTX, unsignedp);
4710 quotient = expand_mult (compute_mode, t1,
4711 gen_int_mode (ml, compute_mode),
4712 NULL_RTX, 1);
4713
4714 insn = get_last_insn ();
4715 set_dst_reg_note (insn, REG_EQUAL,
4716 gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
4717 compute_mode, op0, op1),
4718 quotient);
4719 }
4720 break;
4721
4722 case ROUND_DIV_EXPR:
4723 case ROUND_MOD_EXPR:
4724 if (unsignedp)
4725 {
4726 rtx tem;
4727 rtx label;
4728 label = gen_label_rtx ();
4729 quotient = gen_reg_rtx (compute_mode);
4730 remainder = gen_reg_rtx (compute_mode);
4731 if (expand_twoval_binop (udivmod_optab, op0, op1, quotient, remainder, 1) == 0)
4732 {
4733 rtx tem;
4734 quotient = expand_binop (compute_mode, udiv_optab, op0, op1,
4735 quotient, 1, OPTAB_LIB_WIDEN);
4736 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 1);
4737 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4738 remainder, 1, OPTAB_LIB_WIDEN);
4739 }
4740 tem = plus_constant (compute_mode, op1, -1);
4741 tem = expand_shift (RSHIFT_EXPR, compute_mode, tem, 1, NULL_RTX, 1);
4742 do_cmp_and_jump (remainder, tem, LEU, compute_mode, label);
4743 expand_inc (quotient, const1_rtx);
4744 expand_dec (remainder, op1);
4745 emit_label (label);
4746 }
4747 else
4748 {
4749 rtx abs_rem, abs_op1, tem, mask;
4750 rtx label;
4751 label = gen_label_rtx ();
4752 quotient = gen_reg_rtx (compute_mode);
4753 remainder = gen_reg_rtx (compute_mode);
4754 if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient, remainder, 0) == 0)
4755 {
4756 rtx tem;
4757 quotient = expand_binop (compute_mode, sdiv_optab, op0, op1,
4758 quotient, 0, OPTAB_LIB_WIDEN);
4759 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 0);
4760 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4761 remainder, 0, OPTAB_LIB_WIDEN);
4762 }
4763 abs_rem = expand_abs (compute_mode, remainder, NULL_RTX, 1, 0);
4764 abs_op1 = expand_abs (compute_mode, op1, NULL_RTX, 1, 0);
4765 tem = expand_shift (LSHIFT_EXPR, compute_mode, abs_rem,
4766 1, NULL_RTX, 1);
4767 do_cmp_and_jump (tem, abs_op1, LTU, compute_mode, label);
4768 tem = expand_binop (compute_mode, xor_optab, op0, op1,
4769 NULL_RTX, 0, OPTAB_WIDEN);
4770 mask = expand_shift (RSHIFT_EXPR, compute_mode, tem,
4771 size - 1, NULL_RTX, 0);
4772 tem = expand_binop (compute_mode, xor_optab, mask, const1_rtx,
4773 NULL_RTX, 0, OPTAB_WIDEN);
4774 tem = expand_binop (compute_mode, sub_optab, tem, mask,
4775 NULL_RTX, 0, OPTAB_WIDEN);
4776 expand_inc (quotient, tem);
4777 tem = expand_binop (compute_mode, xor_optab, mask, op1,
4778 NULL_RTX, 0, OPTAB_WIDEN);
4779 tem = expand_binop (compute_mode, sub_optab, tem, mask,
4780 NULL_RTX, 0, OPTAB_WIDEN);
4781 expand_dec (remainder, tem);
4782 emit_label (label);
4783 }
4784 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4785
4786 default:
4787 gcc_unreachable ();
4788 }
4789
4790 if (quotient == 0)
4791 {
4792 if (target && GET_MODE (target) != compute_mode)
4793 target = 0;
4794
4795 if (rem_flag)
4796 {
4797 /* Try to produce the remainder without producing the quotient.
4798 If we seem to have a divmod pattern that does not require widening,
4799 don't try widening here. We should really have a WIDEN argument
4800 to expand_twoval_binop, since what we'd really like to do here is
4801 1) try a mod insn in compute_mode
4802 2) try a divmod insn in compute_mode
4803 3) try a div insn in compute_mode and multiply-subtract to get
4804 remainder
4805 4) try the same things with widening allowed. */
4806 remainder
4807 = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4808 op0, op1, target,
4809 unsignedp,
4810 ((optab_handler (optab2, compute_mode)
4811 != CODE_FOR_nothing)
4812 ? OPTAB_DIRECT : OPTAB_WIDEN));
4813 if (remainder == 0)
4814 {
4815 /* No luck there. Can we do remainder and divide at once
4816 without a library call? */
4817 remainder = gen_reg_rtx (compute_mode);
4818 if (! expand_twoval_binop ((unsignedp
4819 ? udivmod_optab
4820 : sdivmod_optab),
4821 op0, op1,
4822 NULL_RTX, remainder, unsignedp))
4823 remainder = 0;
4824 }
4825
4826 if (remainder)
4827 return gen_lowpart (mode, remainder);
4828 }
4829
4830 /* Produce the quotient. Try a quotient insn, but not a library call.
4831 If we have a divmod in this mode, use it in preference to widening
4832 the div (for this test we assume it will not fail). Note that optab2
4833 is set to the one of the two optabs that the call below will use. */
4834 quotient
4835 = sign_expand_binop (compute_mode, udiv_optab, sdiv_optab,
4836 op0, op1, rem_flag ? NULL_RTX : target,
4837 unsignedp,
4838 ((optab_handler (optab2, compute_mode)
4839 != CODE_FOR_nothing)
4840 ? OPTAB_DIRECT : OPTAB_WIDEN));
4841
4842 if (quotient == 0)
4843 {
4844 /* No luck there. Try a quotient-and-remainder insn,
4845 keeping the quotient alone. */
4846 quotient = gen_reg_rtx (compute_mode);
4847 if (! expand_twoval_binop (unsignedp ? udivmod_optab : sdivmod_optab,
4848 op0, op1,
4849 quotient, NULL_RTX, unsignedp))
4850 {
4851 quotient = 0;
4852 if (! rem_flag)
4853 /* Still no luck. If we are not computing the remainder,
4854 use a library call for the quotient. */
4855 quotient = sign_expand_binop (compute_mode,
4856 udiv_optab, sdiv_optab,
4857 op0, op1, target,
4858 unsignedp, OPTAB_LIB_WIDEN);
4859 }
4860 }
4861 }
4862
4863 if (rem_flag)
4864 {
4865 if (target && GET_MODE (target) != compute_mode)
4866 target = 0;
4867
4868 if (quotient == 0)
4869 {
4870 /* No divide instruction either. Use library for remainder. */
4871 remainder = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4872 op0, op1, target,
4873 unsignedp, OPTAB_LIB_WIDEN);
4874 /* No remainder function. Try a quotient-and-remainder
4875 function, keeping the remainder. */
4876 if (!remainder)
4877 {
4878 remainder = gen_reg_rtx (compute_mode);
4879 if (!expand_twoval_binop_libfunc
4880 (unsignedp ? udivmod_optab : sdivmod_optab,
4881 op0, op1,
4882 NULL_RTX, remainder,
4883 unsignedp ? UMOD : MOD))
4884 remainder = NULL_RTX;
4885 }
4886 }
4887 else
4888 {
4889 /* We divided. Now finish doing X - Y * (X / Y). */
4890 remainder = expand_mult (compute_mode, quotient, op1,
4891 NULL_RTX, unsignedp);
4892 remainder = expand_binop (compute_mode, sub_optab, op0,
4893 remainder, target, unsignedp,
4894 OPTAB_LIB_WIDEN);
4895 }
4896 }
4897
4898 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4899 }
4900 \f
4901 /* Return a tree node with data type TYPE, describing the value of X.
4902 Usually this is an VAR_DECL, if there is no obvious better choice.
4903 X may be an expression, however we only support those expressions
4904 generated by loop.c. */
4905
4906 tree
4907 make_tree (tree type, rtx x)
4908 {
4909 tree t;
4910
4911 switch (GET_CODE (x))
4912 {
4913 case CONST_INT:
4914 {
4915 HOST_WIDE_INT hi = 0;
4916
4917 if (INTVAL (x) < 0
4918 && !(TYPE_UNSIGNED (type)
4919 && (GET_MODE_BITSIZE (TYPE_MODE (type))
4920 < HOST_BITS_PER_WIDE_INT)))
4921 hi = -1;
4922
4923 t = build_int_cst_wide (type, INTVAL (x), hi);
4924
4925 return t;
4926 }
4927
4928 case CONST_DOUBLE:
4929 if (GET_MODE (x) == VOIDmode)
4930 t = build_int_cst_wide (type,
4931 CONST_DOUBLE_LOW (x), CONST_DOUBLE_HIGH (x));
4932 else
4933 {
4934 REAL_VALUE_TYPE d;
4935
4936 REAL_VALUE_FROM_CONST_DOUBLE (d, x);
4937 t = build_real (type, d);
4938 }
4939
4940 return t;
4941
4942 case CONST_VECTOR:
4943 {
4944 int units = CONST_VECTOR_NUNITS (x);
4945 tree itype = TREE_TYPE (type);
4946 tree *elts;
4947 int i;
4948
4949 /* Build a tree with vector elements. */
4950 elts = XALLOCAVEC (tree, units);
4951 for (i = units - 1; i >= 0; --i)
4952 {
4953 rtx elt = CONST_VECTOR_ELT (x, i);
4954 elts[i] = make_tree (itype, elt);
4955 }
4956
4957 return build_vector (type, elts);
4958 }
4959
4960 case PLUS:
4961 return fold_build2 (PLUS_EXPR, type, make_tree (type, XEXP (x, 0)),
4962 make_tree (type, XEXP (x, 1)));
4963
4964 case MINUS:
4965 return fold_build2 (MINUS_EXPR, type, make_tree (type, XEXP (x, 0)),
4966 make_tree (type, XEXP (x, 1)));
4967
4968 case NEG:
4969 return fold_build1 (NEGATE_EXPR, type, make_tree (type, XEXP (x, 0)));
4970
4971 case MULT:
4972 return fold_build2 (MULT_EXPR, type, make_tree (type, XEXP (x, 0)),
4973 make_tree (type, XEXP (x, 1)));
4974
4975 case ASHIFT:
4976 return fold_build2 (LSHIFT_EXPR, type, make_tree (type, XEXP (x, 0)),
4977 make_tree (type, XEXP (x, 1)));
4978
4979 case LSHIFTRT:
4980 t = unsigned_type_for (type);
4981 return fold_convert (type, build2 (RSHIFT_EXPR, t,
4982 make_tree (t, XEXP (x, 0)),
4983 make_tree (type, XEXP (x, 1))));
4984
4985 case ASHIFTRT:
4986 t = signed_type_for (type);
4987 return fold_convert (type, build2 (RSHIFT_EXPR, t,
4988 make_tree (t, XEXP (x, 0)),
4989 make_tree (type, XEXP (x, 1))));
4990
4991 case DIV:
4992 if (TREE_CODE (type) != REAL_TYPE)
4993 t = signed_type_for (type);
4994 else
4995 t = type;
4996
4997 return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
4998 make_tree (t, XEXP (x, 0)),
4999 make_tree (t, XEXP (x, 1))));
5000 case UDIV:
5001 t = unsigned_type_for (type);
5002 return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5003 make_tree (t, XEXP (x, 0)),
5004 make_tree (t, XEXP (x, 1))));
5005
5006 case SIGN_EXTEND:
5007 case ZERO_EXTEND:
5008 t = lang_hooks.types.type_for_mode (GET_MODE (XEXP (x, 0)),
5009 GET_CODE (x) == ZERO_EXTEND);
5010 return fold_convert (type, make_tree (t, XEXP (x, 0)));
5011
5012 case CONST:
5013 return make_tree (type, XEXP (x, 0));
5014
5015 case SYMBOL_REF:
5016 t = SYMBOL_REF_DECL (x);
5017 if (t)
5018 return fold_convert (type, build_fold_addr_expr (t));
5019 /* else fall through. */
5020
5021 default:
5022 t = build_decl (RTL_LOCATION (x), VAR_DECL, NULL_TREE, type);
5023
5024 /* If TYPE is a POINTER_TYPE, we might need to convert X from
5025 address mode to pointer mode. */
5026 if (POINTER_TYPE_P (type))
5027 x = convert_memory_address_addr_space
5028 (TYPE_MODE (type), x, TYPE_ADDR_SPACE (TREE_TYPE (type)));
5029
5030 /* Note that we do *not* use SET_DECL_RTL here, because we do not
5031 want set_decl_rtl to go adjusting REG_ATTRS for this temporary. */
5032 t->decl_with_rtl.rtl = x;
5033
5034 return t;
5035 }
5036 }
5037 \f
5038 /* Compute the logical-and of OP0 and OP1, storing it in TARGET
5039 and returning TARGET.
5040
5041 If TARGET is 0, a pseudo-register or constant is returned. */
5042
5043 rtx
5044 expand_and (enum machine_mode mode, rtx op0, rtx op1, rtx target)
5045 {
5046 rtx tem = 0;
5047
5048 if (GET_MODE (op0) == VOIDmode && GET_MODE (op1) == VOIDmode)
5049 tem = simplify_binary_operation (AND, mode, op0, op1);
5050 if (tem == 0)
5051 tem = expand_binop (mode, and_optab, op0, op1, target, 0, OPTAB_LIB_WIDEN);
5052
5053 if (target == 0)
5054 target = tem;
5055 else if (tem != target)
5056 emit_move_insn (target, tem);
5057 return target;
5058 }
5059
5060 /* Helper function for emit_store_flag. */
5061 static rtx
5062 emit_cstore (rtx target, enum insn_code icode, enum rtx_code code,
5063 enum machine_mode mode, enum machine_mode compare_mode,
5064 int unsignedp, rtx x, rtx y, int normalizep,
5065 enum machine_mode target_mode)
5066 {
5067 struct expand_operand ops[4];
5068 rtx op0, last, comparison, subtarget;
5069 enum machine_mode result_mode = insn_data[(int) icode].operand[0].mode;
5070
5071 last = get_last_insn ();
5072 x = prepare_operand (icode, x, 2, mode, compare_mode, unsignedp);
5073 y = prepare_operand (icode, y, 3, mode, compare_mode, unsignedp);
5074 if (!x || !y)
5075 {
5076 delete_insns_since (last);
5077 return NULL_RTX;
5078 }
5079
5080 if (target_mode == VOIDmode)
5081 target_mode = result_mode;
5082 if (!target)
5083 target = gen_reg_rtx (target_mode);
5084
5085 comparison = gen_rtx_fmt_ee (code, result_mode, x, y);
5086
5087 create_output_operand (&ops[0], optimize ? NULL_RTX : target, result_mode);
5088 create_fixed_operand (&ops[1], comparison);
5089 create_fixed_operand (&ops[2], x);
5090 create_fixed_operand (&ops[3], y);
5091 if (!maybe_expand_insn (icode, 4, ops))
5092 {
5093 delete_insns_since (last);
5094 return NULL_RTX;
5095 }
5096 subtarget = ops[0].value;
5097
5098 /* If we are converting to a wider mode, first convert to
5099 TARGET_MODE, then normalize. This produces better combining
5100 opportunities on machines that have a SIGN_EXTRACT when we are
5101 testing a single bit. This mostly benefits the 68k.
5102
5103 If STORE_FLAG_VALUE does not have the sign bit set when
5104 interpreted in MODE, we can do this conversion as unsigned, which
5105 is usually more efficient. */
5106 if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (result_mode))
5107 {
5108 convert_move (target, subtarget,
5109 val_signbit_known_clear_p (result_mode,
5110 STORE_FLAG_VALUE));
5111 op0 = target;
5112 result_mode = target_mode;
5113 }
5114 else
5115 op0 = subtarget;
5116
5117 /* If we want to keep subexpressions around, don't reuse our last
5118 target. */
5119 if (optimize)
5120 subtarget = 0;
5121
5122 /* Now normalize to the proper value in MODE. Sometimes we don't
5123 have to do anything. */
5124 if (normalizep == 0 || normalizep == STORE_FLAG_VALUE)
5125 ;
5126 /* STORE_FLAG_VALUE might be the most negative number, so write
5127 the comparison this way to avoid a compiler-time warning. */
5128 else if (- normalizep == STORE_FLAG_VALUE)
5129 op0 = expand_unop (result_mode, neg_optab, op0, subtarget, 0);
5130
5131 /* We don't want to use STORE_FLAG_VALUE < 0 below since this makes
5132 it hard to use a value of just the sign bit due to ANSI integer
5133 constant typing rules. */
5134 else if (val_signbit_known_set_p (result_mode, STORE_FLAG_VALUE))
5135 op0 = expand_shift (RSHIFT_EXPR, result_mode, op0,
5136 GET_MODE_BITSIZE (result_mode) - 1, subtarget,
5137 normalizep == 1);
5138 else
5139 {
5140 gcc_assert (STORE_FLAG_VALUE & 1);
5141
5142 op0 = expand_and (result_mode, op0, const1_rtx, subtarget);
5143 if (normalizep == -1)
5144 op0 = expand_unop (result_mode, neg_optab, op0, op0, 0);
5145 }
5146
5147 /* If we were converting to a smaller mode, do the conversion now. */
5148 if (target_mode != result_mode)
5149 {
5150 convert_move (target, op0, 0);
5151 return target;
5152 }
5153 else
5154 return op0;
5155 }
5156
5157
5158 /* A subroutine of emit_store_flag only including "tricks" that do not
5159 need a recursive call. These are kept separate to avoid infinite
5160 loops. */
5161
5162 static rtx
5163 emit_store_flag_1 (rtx target, enum rtx_code code, rtx op0, rtx op1,
5164 enum machine_mode mode, int unsignedp, int normalizep,
5165 enum machine_mode target_mode)
5166 {
5167 rtx subtarget;
5168 enum insn_code icode;
5169 enum machine_mode compare_mode;
5170 enum mode_class mclass;
5171 enum rtx_code scode;
5172 rtx tem;
5173
5174 if (unsignedp)
5175 code = unsigned_condition (code);
5176 scode = swap_condition (code);
5177
5178 /* If one operand is constant, make it the second one. Only do this
5179 if the other operand is not constant as well. */
5180
5181 if (swap_commutative_operands_p (op0, op1))
5182 {
5183 tem = op0;
5184 op0 = op1;
5185 op1 = tem;
5186 code = swap_condition (code);
5187 }
5188
5189 if (mode == VOIDmode)
5190 mode = GET_MODE (op0);
5191
5192 /* For some comparisons with 1 and -1, we can convert this to
5193 comparisons with zero. This will often produce more opportunities for
5194 store-flag insns. */
5195
5196 switch (code)
5197 {
5198 case LT:
5199 if (op1 == const1_rtx)
5200 op1 = const0_rtx, code = LE;
5201 break;
5202 case LE:
5203 if (op1 == constm1_rtx)
5204 op1 = const0_rtx, code = LT;
5205 break;
5206 case GE:
5207 if (op1 == const1_rtx)
5208 op1 = const0_rtx, code = GT;
5209 break;
5210 case GT:
5211 if (op1 == constm1_rtx)
5212 op1 = const0_rtx, code = GE;
5213 break;
5214 case GEU:
5215 if (op1 == const1_rtx)
5216 op1 = const0_rtx, code = NE;
5217 break;
5218 case LTU:
5219 if (op1 == const1_rtx)
5220 op1 = const0_rtx, code = EQ;
5221 break;
5222 default:
5223 break;
5224 }
5225
5226 /* If we are comparing a double-word integer with zero or -1, we can
5227 convert the comparison into one involving a single word. */
5228 if (GET_MODE_BITSIZE (mode) == BITS_PER_WORD * 2
5229 && GET_MODE_CLASS (mode) == MODE_INT
5230 && (!MEM_P (op0) || ! MEM_VOLATILE_P (op0)))
5231 {
5232 if ((code == EQ || code == NE)
5233 && (op1 == const0_rtx || op1 == constm1_rtx))
5234 {
5235 rtx op00, op01;
5236
5237 /* Do a logical OR or AND of the two words and compare the
5238 result. */
5239 op00 = simplify_gen_subreg (word_mode, op0, mode, 0);
5240 op01 = simplify_gen_subreg (word_mode, op0, mode, UNITS_PER_WORD);
5241 tem = expand_binop (word_mode,
5242 op1 == const0_rtx ? ior_optab : and_optab,
5243 op00, op01, NULL_RTX, unsignedp,
5244 OPTAB_DIRECT);
5245
5246 if (tem != 0)
5247 tem = emit_store_flag (NULL_RTX, code, tem, op1, word_mode,
5248 unsignedp, normalizep);
5249 }
5250 else if ((code == LT || code == GE) && op1 == const0_rtx)
5251 {
5252 rtx op0h;
5253
5254 /* If testing the sign bit, can just test on high word. */
5255 op0h = simplify_gen_subreg (word_mode, op0, mode,
5256 subreg_highpart_offset (word_mode,
5257 mode));
5258 tem = emit_store_flag (NULL_RTX, code, op0h, op1, word_mode,
5259 unsignedp, normalizep);
5260 }
5261 else
5262 tem = NULL_RTX;
5263
5264 if (tem)
5265 {
5266 if (target_mode == VOIDmode || GET_MODE (tem) == target_mode)
5267 return tem;
5268 if (!target)
5269 target = gen_reg_rtx (target_mode);
5270
5271 convert_move (target, tem,
5272 !val_signbit_known_set_p (word_mode,
5273 (normalizep ? normalizep
5274 : STORE_FLAG_VALUE)));
5275 return target;
5276 }
5277 }
5278
5279 /* If this is A < 0 or A >= 0, we can do this by taking the ones
5280 complement of A (for GE) and shifting the sign bit to the low bit. */
5281 if (op1 == const0_rtx && (code == LT || code == GE)
5282 && GET_MODE_CLASS (mode) == MODE_INT
5283 && (normalizep || STORE_FLAG_VALUE == 1
5284 || val_signbit_p (mode, STORE_FLAG_VALUE)))
5285 {
5286 subtarget = target;
5287
5288 if (!target)
5289 target_mode = mode;
5290
5291 /* If the result is to be wider than OP0, it is best to convert it
5292 first. If it is to be narrower, it is *incorrect* to convert it
5293 first. */
5294 else if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (mode))
5295 {
5296 op0 = convert_modes (target_mode, mode, op0, 0);
5297 mode = target_mode;
5298 }
5299
5300 if (target_mode != mode)
5301 subtarget = 0;
5302
5303 if (code == GE)
5304 op0 = expand_unop (mode, one_cmpl_optab, op0,
5305 ((STORE_FLAG_VALUE == 1 || normalizep)
5306 ? 0 : subtarget), 0);
5307
5308 if (STORE_FLAG_VALUE == 1 || normalizep)
5309 /* If we are supposed to produce a 0/1 value, we want to do
5310 a logical shift from the sign bit to the low-order bit; for
5311 a -1/0 value, we do an arithmetic shift. */
5312 op0 = expand_shift (RSHIFT_EXPR, mode, op0,
5313 GET_MODE_BITSIZE (mode) - 1,
5314 subtarget, normalizep != -1);
5315
5316 if (mode != target_mode)
5317 op0 = convert_modes (target_mode, mode, op0, 0);
5318
5319 return op0;
5320 }
5321
5322 mclass = GET_MODE_CLASS (mode);
5323 for (compare_mode = mode; compare_mode != VOIDmode;
5324 compare_mode = GET_MODE_WIDER_MODE (compare_mode))
5325 {
5326 enum machine_mode optab_mode = mclass == MODE_CC ? CCmode : compare_mode;
5327 icode = optab_handler (cstore_optab, optab_mode);
5328 if (icode != CODE_FOR_nothing)
5329 {
5330 do_pending_stack_adjust ();
5331 tem = emit_cstore (target, icode, code, mode, compare_mode,
5332 unsignedp, op0, op1, normalizep, target_mode);
5333 if (tem)
5334 return tem;
5335
5336 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5337 {
5338 tem = emit_cstore (target, icode, scode, mode, compare_mode,
5339 unsignedp, op1, op0, normalizep, target_mode);
5340 if (tem)
5341 return tem;
5342 }
5343 break;
5344 }
5345 }
5346
5347 return 0;
5348 }
5349
5350 /* Emit a store-flags instruction for comparison CODE on OP0 and OP1
5351 and storing in TARGET. Normally return TARGET.
5352 Return 0 if that cannot be done.
5353
5354 MODE is the mode to use for OP0 and OP1 should they be CONST_INTs. If
5355 it is VOIDmode, they cannot both be CONST_INT.
5356
5357 UNSIGNEDP is for the case where we have to widen the operands
5358 to perform the operation. It says to use zero-extension.
5359
5360 NORMALIZEP is 1 if we should convert the result to be either zero
5361 or one. Normalize is -1 if we should convert the result to be
5362 either zero or -1. If NORMALIZEP is zero, the result will be left
5363 "raw" out of the scc insn. */
5364
5365 rtx
5366 emit_store_flag (rtx target, enum rtx_code code, rtx op0, rtx op1,
5367 enum machine_mode mode, int unsignedp, int normalizep)
5368 {
5369 enum machine_mode target_mode = target ? GET_MODE (target) : VOIDmode;
5370 enum rtx_code rcode;
5371 rtx subtarget;
5372 rtx tem, last, trueval;
5373
5374 tem = emit_store_flag_1 (target, code, op0, op1, mode, unsignedp, normalizep,
5375 target_mode);
5376 if (tem)
5377 return tem;
5378
5379 /* If we reached here, we can't do this with a scc insn, however there
5380 are some comparisons that can be done in other ways. Don't do any
5381 of these cases if branches are very cheap. */
5382 if (BRANCH_COST (optimize_insn_for_speed_p (), false) == 0)
5383 return 0;
5384
5385 /* See what we need to return. We can only return a 1, -1, or the
5386 sign bit. */
5387
5388 if (normalizep == 0)
5389 {
5390 if (STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1)
5391 normalizep = STORE_FLAG_VALUE;
5392
5393 else if (val_signbit_p (mode, STORE_FLAG_VALUE))
5394 ;
5395 else
5396 return 0;
5397 }
5398
5399 last = get_last_insn ();
5400
5401 /* If optimizing, use different pseudo registers for each insn, instead
5402 of reusing the same pseudo. This leads to better CSE, but slows
5403 down the compiler, since there are more pseudos */
5404 subtarget = (!optimize
5405 && (target_mode == mode)) ? target : NULL_RTX;
5406 trueval = GEN_INT (normalizep ? normalizep : STORE_FLAG_VALUE);
5407
5408 /* For floating-point comparisons, try the reverse comparison or try
5409 changing the "orderedness" of the comparison. */
5410 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5411 {
5412 enum rtx_code first_code;
5413 bool and_them;
5414
5415 rcode = reverse_condition_maybe_unordered (code);
5416 if (can_compare_p (rcode, mode, ccp_store_flag)
5417 && (code == ORDERED || code == UNORDERED
5418 || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
5419 || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
5420 {
5421 int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5422 || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5423
5424 /* For the reverse comparison, use either an addition or a XOR. */
5425 if (want_add
5426 && rtx_cost (GEN_INT (normalizep), PLUS, 1,
5427 optimize_insn_for_speed_p ()) == 0)
5428 {
5429 tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5430 STORE_FLAG_VALUE, target_mode);
5431 if (tem)
5432 return expand_binop (target_mode, add_optab, tem,
5433 GEN_INT (normalizep),
5434 target, 0, OPTAB_WIDEN);
5435 }
5436 else if (!want_add
5437 && rtx_cost (trueval, XOR, 1,
5438 optimize_insn_for_speed_p ()) == 0)
5439 {
5440 tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5441 normalizep, target_mode);
5442 if (tem)
5443 return expand_binop (target_mode, xor_optab, tem, trueval,
5444 target, INTVAL (trueval) >= 0, OPTAB_WIDEN);
5445 }
5446 }
5447
5448 delete_insns_since (last);
5449
5450 /* Cannot split ORDERED and UNORDERED, only try the above trick. */
5451 if (code == ORDERED || code == UNORDERED)
5452 return 0;
5453
5454 and_them = split_comparison (code, mode, &first_code, &code);
5455
5456 /* If there are no NaNs, the first comparison should always fall through.
5457 Effectively change the comparison to the other one. */
5458 if (!HONOR_NANS (mode))
5459 {
5460 gcc_assert (first_code == (and_them ? ORDERED : UNORDERED));
5461 return emit_store_flag_1 (target, code, op0, op1, mode, 0, normalizep,
5462 target_mode);
5463 }
5464
5465 #ifdef HAVE_conditional_move
5466 /* Try using a setcc instruction for ORDERED/UNORDERED, followed by a
5467 conditional move. */
5468 tem = emit_store_flag_1 (subtarget, first_code, op0, op1, mode, 0,
5469 normalizep, target_mode);
5470 if (tem == 0)
5471 return 0;
5472
5473 if (and_them)
5474 tem = emit_conditional_move (target, code, op0, op1, mode,
5475 tem, const0_rtx, GET_MODE (tem), 0);
5476 else
5477 tem = emit_conditional_move (target, code, op0, op1, mode,
5478 trueval, tem, GET_MODE (tem), 0);
5479
5480 if (tem == 0)
5481 delete_insns_since (last);
5482 return tem;
5483 #else
5484 return 0;
5485 #endif
5486 }
5487
5488 /* The remaining tricks only apply to integer comparisons. */
5489
5490 if (GET_MODE_CLASS (mode) != MODE_INT)
5491 return 0;
5492
5493 /* If this is an equality comparison of integers, we can try to exclusive-or
5494 (or subtract) the two operands and use a recursive call to try the
5495 comparison with zero. Don't do any of these cases if branches are
5496 very cheap. */
5497
5498 if ((code == EQ || code == NE) && op1 != const0_rtx)
5499 {
5500 tem = expand_binop (mode, xor_optab, op0, op1, subtarget, 1,
5501 OPTAB_WIDEN);
5502
5503 if (tem == 0)
5504 tem = expand_binop (mode, sub_optab, op0, op1, subtarget, 1,
5505 OPTAB_WIDEN);
5506 if (tem != 0)
5507 tem = emit_store_flag (target, code, tem, const0_rtx,
5508 mode, unsignedp, normalizep);
5509 if (tem != 0)
5510 return tem;
5511
5512 delete_insns_since (last);
5513 }
5514
5515 /* For integer comparisons, try the reverse comparison. However, for
5516 small X and if we'd have anyway to extend, implementing "X != 0"
5517 as "-(int)X >> 31" is still cheaper than inverting "(int)X == 0". */
5518 rcode = reverse_condition (code);
5519 if (can_compare_p (rcode, mode, ccp_store_flag)
5520 && ! (optab_handler (cstore_optab, mode) == CODE_FOR_nothing
5521 && code == NE
5522 && GET_MODE_SIZE (mode) < UNITS_PER_WORD
5523 && op1 == const0_rtx))
5524 {
5525 int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5526 || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5527
5528 /* Again, for the reverse comparison, use either an addition or a XOR. */
5529 if (want_add
5530 && rtx_cost (GEN_INT (normalizep), PLUS, 1,
5531 optimize_insn_for_speed_p ()) == 0)
5532 {
5533 tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5534 STORE_FLAG_VALUE, target_mode);
5535 if (tem != 0)
5536 tem = expand_binop (target_mode, add_optab, tem,
5537 GEN_INT (normalizep), target, 0, OPTAB_WIDEN);
5538 }
5539 else if (!want_add
5540 && rtx_cost (trueval, XOR, 1,
5541 optimize_insn_for_speed_p ()) == 0)
5542 {
5543 tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5544 normalizep, target_mode);
5545 if (tem != 0)
5546 tem = expand_binop (target_mode, xor_optab, tem, trueval, target,
5547 INTVAL (trueval) >= 0, OPTAB_WIDEN);
5548 }
5549
5550 if (tem != 0)
5551 return tem;
5552 delete_insns_since (last);
5553 }
5554
5555 /* Some other cases we can do are EQ, NE, LE, and GT comparisons with
5556 the constant zero. Reject all other comparisons at this point. Only
5557 do LE and GT if branches are expensive since they are expensive on
5558 2-operand machines. */
5559
5560 if (op1 != const0_rtx
5561 || (code != EQ && code != NE
5562 && (BRANCH_COST (optimize_insn_for_speed_p (),
5563 false) <= 1 || (code != LE && code != GT))))
5564 return 0;
5565
5566 /* Try to put the result of the comparison in the sign bit. Assume we can't
5567 do the necessary operation below. */
5568
5569 tem = 0;
5570
5571 /* To see if A <= 0, compute (A | (A - 1)). A <= 0 iff that result has
5572 the sign bit set. */
5573
5574 if (code == LE)
5575 {
5576 /* This is destructive, so SUBTARGET can't be OP0. */
5577 if (rtx_equal_p (subtarget, op0))
5578 subtarget = 0;
5579
5580 tem = expand_binop (mode, sub_optab, op0, const1_rtx, subtarget, 0,
5581 OPTAB_WIDEN);
5582 if (tem)
5583 tem = expand_binop (mode, ior_optab, op0, tem, subtarget, 0,
5584 OPTAB_WIDEN);
5585 }
5586
5587 /* To see if A > 0, compute (((signed) A) << BITS) - A, where BITS is the
5588 number of bits in the mode of OP0, minus one. */
5589
5590 if (code == GT)
5591 {
5592 if (rtx_equal_p (subtarget, op0))
5593 subtarget = 0;
5594
5595 tem = expand_shift (RSHIFT_EXPR, mode, op0,
5596 GET_MODE_BITSIZE (mode) - 1,
5597 subtarget, 0);
5598 tem = expand_binop (mode, sub_optab, tem, op0, subtarget, 0,
5599 OPTAB_WIDEN);
5600 }
5601
5602 if (code == EQ || code == NE)
5603 {
5604 /* For EQ or NE, one way to do the comparison is to apply an operation
5605 that converts the operand into a positive number if it is nonzero
5606 or zero if it was originally zero. Then, for EQ, we subtract 1 and
5607 for NE we negate. This puts the result in the sign bit. Then we
5608 normalize with a shift, if needed.
5609
5610 Two operations that can do the above actions are ABS and FFS, so try
5611 them. If that doesn't work, and MODE is smaller than a full word,
5612 we can use zero-extension to the wider mode (an unsigned conversion)
5613 as the operation. */
5614
5615 /* Note that ABS doesn't yield a positive number for INT_MIN, but
5616 that is compensated by the subsequent overflow when subtracting
5617 one / negating. */
5618
5619 if (optab_handler (abs_optab, mode) != CODE_FOR_nothing)
5620 tem = expand_unop (mode, abs_optab, op0, subtarget, 1);
5621 else if (optab_handler (ffs_optab, mode) != CODE_FOR_nothing)
5622 tem = expand_unop (mode, ffs_optab, op0, subtarget, 1);
5623 else if (GET_MODE_SIZE (mode) < UNITS_PER_WORD)
5624 {
5625 tem = convert_modes (word_mode, mode, op0, 1);
5626 mode = word_mode;
5627 }
5628
5629 if (tem != 0)
5630 {
5631 if (code == EQ)
5632 tem = expand_binop (mode, sub_optab, tem, const1_rtx, subtarget,
5633 0, OPTAB_WIDEN);
5634 else
5635 tem = expand_unop (mode, neg_optab, tem, subtarget, 0);
5636 }
5637
5638 /* If we couldn't do it that way, for NE we can "or" the two's complement
5639 of the value with itself. For EQ, we take the one's complement of
5640 that "or", which is an extra insn, so we only handle EQ if branches
5641 are expensive. */
5642
5643 if (tem == 0
5644 && (code == NE
5645 || BRANCH_COST (optimize_insn_for_speed_p (),
5646 false) > 1))
5647 {
5648 if (rtx_equal_p (subtarget, op0))
5649 subtarget = 0;
5650
5651 tem = expand_unop (mode, neg_optab, op0, subtarget, 0);
5652 tem = expand_binop (mode, ior_optab, tem, op0, subtarget, 0,
5653 OPTAB_WIDEN);
5654
5655 if (tem && code == EQ)
5656 tem = expand_unop (mode, one_cmpl_optab, tem, subtarget, 0);
5657 }
5658 }
5659
5660 if (tem && normalizep)
5661 tem = expand_shift (RSHIFT_EXPR, mode, tem,
5662 GET_MODE_BITSIZE (mode) - 1,
5663 subtarget, normalizep == 1);
5664
5665 if (tem)
5666 {
5667 if (!target)
5668 ;
5669 else if (GET_MODE (tem) != target_mode)
5670 {
5671 convert_move (target, tem, 0);
5672 tem = target;
5673 }
5674 else if (!subtarget)
5675 {
5676 emit_move_insn (target, tem);
5677 tem = target;
5678 }
5679 }
5680 else
5681 delete_insns_since (last);
5682
5683 return tem;
5684 }
5685
5686 /* Like emit_store_flag, but always succeeds. */
5687
5688 rtx
5689 emit_store_flag_force (rtx target, enum rtx_code code, rtx op0, rtx op1,
5690 enum machine_mode mode, int unsignedp, int normalizep)
5691 {
5692 rtx tem, label;
5693 rtx trueval, falseval;
5694
5695 /* First see if emit_store_flag can do the job. */
5696 tem = emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep);
5697 if (tem != 0)
5698 return tem;
5699
5700 if (!target)
5701 target = gen_reg_rtx (word_mode);
5702
5703 /* If this failed, we have to do this with set/compare/jump/set code.
5704 For foo != 0, if foo is in OP0, just replace it with 1 if nonzero. */
5705 trueval = normalizep ? GEN_INT (normalizep) : const1_rtx;
5706 if (code == NE
5707 && GET_MODE_CLASS (mode) == MODE_INT
5708 && REG_P (target)
5709 && op0 == target
5710 && op1 == const0_rtx)
5711 {
5712 label = gen_label_rtx ();
5713 do_compare_rtx_and_jump (target, const0_rtx, EQ, unsignedp,
5714 mode, NULL_RTX, NULL_RTX, label, -1);
5715 emit_move_insn (target, trueval);
5716 emit_label (label);
5717 return target;
5718 }
5719
5720 if (!REG_P (target)
5721 || reg_mentioned_p (target, op0) || reg_mentioned_p (target, op1))
5722 target = gen_reg_rtx (GET_MODE (target));
5723
5724 /* Jump in the right direction if the target cannot implement CODE
5725 but can jump on its reverse condition. */
5726 falseval = const0_rtx;
5727 if (! can_compare_p (code, mode, ccp_jump)
5728 && (! FLOAT_MODE_P (mode)
5729 || code == ORDERED || code == UNORDERED
5730 || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
5731 || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
5732 {
5733 enum rtx_code rcode;
5734 if (FLOAT_MODE_P (mode))
5735 rcode = reverse_condition_maybe_unordered (code);
5736 else
5737 rcode = reverse_condition (code);
5738
5739 /* Canonicalize to UNORDERED for the libcall. */
5740 if (can_compare_p (rcode, mode, ccp_jump)
5741 || (code == ORDERED && ! can_compare_p (ORDERED, mode, ccp_jump)))
5742 {
5743 falseval = trueval;
5744 trueval = const0_rtx;
5745 code = rcode;
5746 }
5747 }
5748
5749 emit_move_insn (target, trueval);
5750 label = gen_label_rtx ();
5751 do_compare_rtx_and_jump (op0, op1, code, unsignedp, mode, NULL_RTX,
5752 NULL_RTX, label, -1);
5753
5754 emit_move_insn (target, falseval);
5755 emit_label (label);
5756
5757 return target;
5758 }
5759 \f
5760 /* Perform possibly multi-word comparison and conditional jump to LABEL
5761 if ARG1 OP ARG2 true where ARG1 and ARG2 are of mode MODE. This is
5762 now a thin wrapper around do_compare_rtx_and_jump. */
5763
5764 static void
5765 do_cmp_and_jump (rtx arg1, rtx arg2, enum rtx_code op, enum machine_mode mode,
5766 rtx label)
5767 {
5768 int unsignedp = (op == LTU || op == LEU || op == GTU || op == GEU);
5769 do_compare_rtx_and_jump (arg1, arg2, op, unsignedp, mode,
5770 NULL_RTX, NULL_RTX, label, -1);
5771 }