i386.c (legitimize_tls_address): Generate tls_initial_exec_64_sun only when !TARGET_X32.
[gcc.git] / gcc / expmed.c
1 /* Medium-level subroutines: convert bit-field store and extract
2 and shifts, multiplies and divides to rtl instructions.
3 Copyright (C) 1987, 1988, 1989, 1992, 1993, 1994, 1995, 1996, 1997, 1998,
4 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010,
5 2011
6 Free Software Foundation, Inc.
7
8 This file is part of GCC.
9
10 GCC is free software; you can redistribute it and/or modify it under
11 the terms of the GNU General Public License as published by the Free
12 Software Foundation; either version 3, or (at your option) any later
13 version.
14
15 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
16 WARRANTY; without even the implied warranty of MERCHANTABILITY or
17 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 for more details.
19
20 You should have received a copy of the GNU General Public License
21 along with GCC; see the file COPYING3. If not see
22 <http://www.gnu.org/licenses/>. */
23
24
25 #include "config.h"
26 #include "system.h"
27 #include "coretypes.h"
28 #include "tm.h"
29 #include "diagnostic-core.h"
30 #include "rtl.h"
31 #include "tree.h"
32 #include "tm_p.h"
33 #include "flags.h"
34 #include "insn-config.h"
35 #include "expr.h"
36 #include "optabs.h"
37 #include "recog.h"
38 #include "langhooks.h"
39 #include "df.h"
40 #include "target.h"
41 #include "expmed.h"
42
43 struct target_expmed default_target_expmed;
44 #if SWITCHABLE_TARGET
45 struct target_expmed *this_target_expmed = &default_target_expmed;
46 #endif
47
48 static void store_fixed_bit_field (rtx, unsigned HOST_WIDE_INT,
49 unsigned HOST_WIDE_INT,
50 unsigned HOST_WIDE_INT,
51 unsigned HOST_WIDE_INT,
52 unsigned HOST_WIDE_INT,
53 rtx);
54 static void store_split_bit_field (rtx, unsigned HOST_WIDE_INT,
55 unsigned HOST_WIDE_INT,
56 unsigned HOST_WIDE_INT,
57 unsigned HOST_WIDE_INT,
58 rtx);
59 static rtx extract_fixed_bit_field (enum machine_mode, rtx,
60 unsigned HOST_WIDE_INT,
61 unsigned HOST_WIDE_INT,
62 unsigned HOST_WIDE_INT, rtx, int, bool);
63 static rtx mask_rtx (enum machine_mode, int, int, int);
64 static rtx lshift_value (enum machine_mode, rtx, int, int);
65 static rtx extract_split_bit_field (rtx, unsigned HOST_WIDE_INT,
66 unsigned HOST_WIDE_INT, int);
67 static void do_cmp_and_jump (rtx, rtx, enum rtx_code, enum machine_mode, rtx);
68 static rtx expand_smod_pow2 (enum machine_mode, rtx, HOST_WIDE_INT);
69 static rtx expand_sdiv_pow2 (enum machine_mode, rtx, HOST_WIDE_INT);
70
71 /* Test whether a value is zero of a power of two. */
72 #define EXACT_POWER_OF_2_OR_ZERO_P(x) (((x) & ((x) - 1)) == 0)
73
74 #ifndef SLOW_UNALIGNED_ACCESS
75 #define SLOW_UNALIGNED_ACCESS(MODE, ALIGN) STRICT_ALIGNMENT
76 #endif
77
78
79 /* Reduce conditional compilation elsewhere. */
80 #ifndef HAVE_insv
81 #define HAVE_insv 0
82 #define CODE_FOR_insv CODE_FOR_nothing
83 #define gen_insv(a,b,c,d) NULL_RTX
84 #endif
85 #ifndef HAVE_extv
86 #define HAVE_extv 0
87 #define CODE_FOR_extv CODE_FOR_nothing
88 #define gen_extv(a,b,c,d) NULL_RTX
89 #endif
90 #ifndef HAVE_extzv
91 #define HAVE_extzv 0
92 #define CODE_FOR_extzv CODE_FOR_nothing
93 #define gen_extzv(a,b,c,d) NULL_RTX
94 #endif
95
96 void
97 init_expmed (void)
98 {
99 struct
100 {
101 struct rtx_def reg; rtunion reg_fld[2];
102 struct rtx_def plus; rtunion plus_fld1;
103 struct rtx_def neg;
104 struct rtx_def mult; rtunion mult_fld1;
105 struct rtx_def sdiv; rtunion sdiv_fld1;
106 struct rtx_def udiv; rtunion udiv_fld1;
107 struct rtx_def zext;
108 struct rtx_def sdiv_32; rtunion sdiv_32_fld1;
109 struct rtx_def smod_32; rtunion smod_32_fld1;
110 struct rtx_def wide_mult; rtunion wide_mult_fld1;
111 struct rtx_def wide_lshr; rtunion wide_lshr_fld1;
112 struct rtx_def wide_trunc;
113 struct rtx_def shift; rtunion shift_fld1;
114 struct rtx_def shift_mult; rtunion shift_mult_fld1;
115 struct rtx_def shift_add; rtunion shift_add_fld1;
116 struct rtx_def shift_sub0; rtunion shift_sub0_fld1;
117 struct rtx_def shift_sub1; rtunion shift_sub1_fld1;
118 } all;
119
120 rtx pow2[MAX_BITS_PER_WORD];
121 rtx cint[MAX_BITS_PER_WORD];
122 int m, n;
123 enum machine_mode mode, wider_mode;
124 int speed;
125
126
127 for (m = 1; m < MAX_BITS_PER_WORD; m++)
128 {
129 pow2[m] = GEN_INT ((HOST_WIDE_INT) 1 << m);
130 cint[m] = GEN_INT (m);
131 }
132 memset (&all, 0, sizeof all);
133
134 PUT_CODE (&all.reg, REG);
135 /* Avoid using hard regs in ways which may be unsupported. */
136 SET_REGNO (&all.reg, LAST_VIRTUAL_REGISTER + 1);
137
138 PUT_CODE (&all.plus, PLUS);
139 XEXP (&all.plus, 0) = &all.reg;
140 XEXP (&all.plus, 1) = &all.reg;
141
142 PUT_CODE (&all.neg, NEG);
143 XEXP (&all.neg, 0) = &all.reg;
144
145 PUT_CODE (&all.mult, MULT);
146 XEXP (&all.mult, 0) = &all.reg;
147 XEXP (&all.mult, 1) = &all.reg;
148
149 PUT_CODE (&all.sdiv, DIV);
150 XEXP (&all.sdiv, 0) = &all.reg;
151 XEXP (&all.sdiv, 1) = &all.reg;
152
153 PUT_CODE (&all.udiv, UDIV);
154 XEXP (&all.udiv, 0) = &all.reg;
155 XEXP (&all.udiv, 1) = &all.reg;
156
157 PUT_CODE (&all.sdiv_32, DIV);
158 XEXP (&all.sdiv_32, 0) = &all.reg;
159 XEXP (&all.sdiv_32, 1) = 32 < MAX_BITS_PER_WORD ? cint[32] : GEN_INT (32);
160
161 PUT_CODE (&all.smod_32, MOD);
162 XEXP (&all.smod_32, 0) = &all.reg;
163 XEXP (&all.smod_32, 1) = XEXP (&all.sdiv_32, 1);
164
165 PUT_CODE (&all.zext, ZERO_EXTEND);
166 XEXP (&all.zext, 0) = &all.reg;
167
168 PUT_CODE (&all.wide_mult, MULT);
169 XEXP (&all.wide_mult, 0) = &all.zext;
170 XEXP (&all.wide_mult, 1) = &all.zext;
171
172 PUT_CODE (&all.wide_lshr, LSHIFTRT);
173 XEXP (&all.wide_lshr, 0) = &all.wide_mult;
174
175 PUT_CODE (&all.wide_trunc, TRUNCATE);
176 XEXP (&all.wide_trunc, 0) = &all.wide_lshr;
177
178 PUT_CODE (&all.shift, ASHIFT);
179 XEXP (&all.shift, 0) = &all.reg;
180
181 PUT_CODE (&all.shift_mult, MULT);
182 XEXP (&all.shift_mult, 0) = &all.reg;
183
184 PUT_CODE (&all.shift_add, PLUS);
185 XEXP (&all.shift_add, 0) = &all.shift_mult;
186 XEXP (&all.shift_add, 1) = &all.reg;
187
188 PUT_CODE (&all.shift_sub0, MINUS);
189 XEXP (&all.shift_sub0, 0) = &all.shift_mult;
190 XEXP (&all.shift_sub0, 1) = &all.reg;
191
192 PUT_CODE (&all.shift_sub1, MINUS);
193 XEXP (&all.shift_sub1, 0) = &all.reg;
194 XEXP (&all.shift_sub1, 1) = &all.shift_mult;
195
196 for (speed = 0; speed < 2; speed++)
197 {
198 crtl->maybe_hot_insn_p = speed;
199 zero_cost[speed] = set_src_cost (const0_rtx, speed);
200
201 for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT);
202 mode != VOIDmode;
203 mode = GET_MODE_WIDER_MODE (mode))
204 {
205 PUT_MODE (&all.reg, mode);
206 PUT_MODE (&all.plus, mode);
207 PUT_MODE (&all.neg, mode);
208 PUT_MODE (&all.mult, mode);
209 PUT_MODE (&all.sdiv, mode);
210 PUT_MODE (&all.udiv, mode);
211 PUT_MODE (&all.sdiv_32, mode);
212 PUT_MODE (&all.smod_32, mode);
213 PUT_MODE (&all.wide_trunc, mode);
214 PUT_MODE (&all.shift, mode);
215 PUT_MODE (&all.shift_mult, mode);
216 PUT_MODE (&all.shift_add, mode);
217 PUT_MODE (&all.shift_sub0, mode);
218 PUT_MODE (&all.shift_sub1, mode);
219
220 add_cost[speed][mode] = set_src_cost (&all.plus, speed);
221 neg_cost[speed][mode] = set_src_cost (&all.neg, speed);
222 mul_cost[speed][mode] = set_src_cost (&all.mult, speed);
223 sdiv_cost[speed][mode] = set_src_cost (&all.sdiv, speed);
224 udiv_cost[speed][mode] = set_src_cost (&all.udiv, speed);
225
226 sdiv_pow2_cheap[speed][mode] = (set_src_cost (&all.sdiv_32, speed)
227 <= 2 * add_cost[speed][mode]);
228 smod_pow2_cheap[speed][mode] = (set_src_cost (&all.smod_32, speed)
229 <= 4 * add_cost[speed][mode]);
230
231 wider_mode = GET_MODE_WIDER_MODE (mode);
232 if (wider_mode != VOIDmode)
233 {
234 PUT_MODE (&all.zext, wider_mode);
235 PUT_MODE (&all.wide_mult, wider_mode);
236 PUT_MODE (&all.wide_lshr, wider_mode);
237 XEXP (&all.wide_lshr, 1) = GEN_INT (GET_MODE_BITSIZE (mode));
238
239 mul_widen_cost[speed][wider_mode]
240 = set_src_cost (&all.wide_mult, speed);
241 mul_highpart_cost[speed][mode]
242 = set_src_cost (&all.wide_trunc, speed);
243 }
244
245 shift_cost[speed][mode][0] = 0;
246 shiftadd_cost[speed][mode][0] = shiftsub0_cost[speed][mode][0]
247 = shiftsub1_cost[speed][mode][0] = add_cost[speed][mode];
248
249 n = MIN (MAX_BITS_PER_WORD, GET_MODE_BITSIZE (mode));
250 for (m = 1; m < n; m++)
251 {
252 XEXP (&all.shift, 1) = cint[m];
253 XEXP (&all.shift_mult, 1) = pow2[m];
254
255 shift_cost[speed][mode][m] = set_src_cost (&all.shift, speed);
256 shiftadd_cost[speed][mode][m] = set_src_cost (&all.shift_add,
257 speed);
258 shiftsub0_cost[speed][mode][m] = set_src_cost (&all.shift_sub0,
259 speed);
260 shiftsub1_cost[speed][mode][m] = set_src_cost (&all.shift_sub1,
261 speed);
262 }
263 }
264 }
265 if (alg_hash_used_p)
266 memset (alg_hash, 0, sizeof (alg_hash));
267 else
268 alg_hash_used_p = true;
269 default_rtl_profile ();
270 }
271
272 /* Return an rtx representing minus the value of X.
273 MODE is the intended mode of the result,
274 useful if X is a CONST_INT. */
275
276 rtx
277 negate_rtx (enum machine_mode mode, rtx x)
278 {
279 rtx result = simplify_unary_operation (NEG, mode, x, mode);
280
281 if (result == 0)
282 result = expand_unop (mode, neg_optab, x, NULL_RTX, 0);
283
284 return result;
285 }
286
287 /* Report on the availability of insv/extv/extzv and the desired mode
288 of each of their operands. Returns MAX_MACHINE_MODE if HAVE_foo
289 is false; else the mode of the specified operand. If OPNO is -1,
290 all the caller cares about is whether the insn is available. */
291 enum machine_mode
292 mode_for_extraction (enum extraction_pattern pattern, int opno)
293 {
294 const struct insn_data_d *data;
295
296 switch (pattern)
297 {
298 case EP_insv:
299 if (HAVE_insv)
300 {
301 data = &insn_data[CODE_FOR_insv];
302 break;
303 }
304 return MAX_MACHINE_MODE;
305
306 case EP_extv:
307 if (HAVE_extv)
308 {
309 data = &insn_data[CODE_FOR_extv];
310 break;
311 }
312 return MAX_MACHINE_MODE;
313
314 case EP_extzv:
315 if (HAVE_extzv)
316 {
317 data = &insn_data[CODE_FOR_extzv];
318 break;
319 }
320 return MAX_MACHINE_MODE;
321
322 default:
323 gcc_unreachable ();
324 }
325
326 if (opno == -1)
327 return VOIDmode;
328
329 /* Everyone who uses this function used to follow it with
330 if (result == VOIDmode) result = word_mode; */
331 if (data->operand[opno].mode == VOIDmode)
332 return word_mode;
333 return data->operand[opno].mode;
334 }
335 \f
336 /* A subroutine of store_bit_field, with the same arguments. Return true
337 if the operation could be implemented.
338
339 If FALLBACK_P is true, fall back to store_fixed_bit_field if we have
340 no other way of implementing the operation. If FALLBACK_P is false,
341 return false instead. */
342
343 static bool
344 store_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
345 unsigned HOST_WIDE_INT bitnum,
346 unsigned HOST_WIDE_INT bitregion_start,
347 unsigned HOST_WIDE_INT bitregion_end,
348 enum machine_mode fieldmode,
349 rtx value, bool fallback_p)
350 {
351 unsigned int unit
352 = (MEM_P (str_rtx)) ? BITS_PER_UNIT : BITS_PER_WORD;
353 unsigned HOST_WIDE_INT offset, bitpos;
354 rtx op0 = str_rtx;
355 int byte_offset;
356 rtx orig_value;
357
358 enum machine_mode op_mode = mode_for_extraction (EP_insv, 3);
359
360 while (GET_CODE (op0) == SUBREG)
361 {
362 /* The following line once was done only if WORDS_BIG_ENDIAN,
363 but I think that is a mistake. WORDS_BIG_ENDIAN is
364 meaningful at a much higher level; when structures are copied
365 between memory and regs, the higher-numbered regs
366 always get higher addresses. */
367 int inner_mode_size = GET_MODE_SIZE (GET_MODE (SUBREG_REG (op0)));
368 int outer_mode_size = GET_MODE_SIZE (GET_MODE (op0));
369
370 byte_offset = 0;
371
372 /* Paradoxical subregs need special handling on big endian machines. */
373 if (SUBREG_BYTE (op0) == 0 && inner_mode_size < outer_mode_size)
374 {
375 int difference = inner_mode_size - outer_mode_size;
376
377 if (WORDS_BIG_ENDIAN)
378 byte_offset += (difference / UNITS_PER_WORD) * UNITS_PER_WORD;
379 if (BYTES_BIG_ENDIAN)
380 byte_offset += difference % UNITS_PER_WORD;
381 }
382 else
383 byte_offset = SUBREG_BYTE (op0);
384
385 bitnum += byte_offset * BITS_PER_UNIT;
386 op0 = SUBREG_REG (op0);
387 }
388
389 /* No action is needed if the target is a register and if the field
390 lies completely outside that register. This can occur if the source
391 code contains an out-of-bounds access to a small array. */
392 if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
393 return true;
394
395 /* Use vec_set patterns for inserting parts of vectors whenever
396 available. */
397 if (VECTOR_MODE_P (GET_MODE (op0))
398 && !MEM_P (op0)
399 && optab_handler (vec_set_optab, GET_MODE (op0)) != CODE_FOR_nothing
400 && fieldmode == GET_MODE_INNER (GET_MODE (op0))
401 && bitsize == GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
402 && !(bitnum % GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
403 {
404 struct expand_operand ops[3];
405 enum machine_mode outermode = GET_MODE (op0);
406 enum machine_mode innermode = GET_MODE_INNER (outermode);
407 enum insn_code icode = optab_handler (vec_set_optab, outermode);
408 int pos = bitnum / GET_MODE_BITSIZE (innermode);
409
410 create_fixed_operand (&ops[0], op0);
411 create_input_operand (&ops[1], value, innermode);
412 create_integer_operand (&ops[2], pos);
413 if (maybe_expand_insn (icode, 3, ops))
414 return true;
415 }
416
417 /* If the target is a register, overwriting the entire object, or storing
418 a full-word or multi-word field can be done with just a SUBREG.
419
420 If the target is memory, storing any naturally aligned field can be
421 done with a simple store. For targets that support fast unaligned
422 memory, any naturally sized, unit aligned field can be done directly. */
423
424 offset = bitnum / unit;
425 bitpos = bitnum % unit;
426 byte_offset = (bitnum % BITS_PER_WORD) / BITS_PER_UNIT
427 + (offset * UNITS_PER_WORD);
428
429 if (bitpos == 0
430 && bitsize == GET_MODE_BITSIZE (fieldmode)
431 && (!MEM_P (op0)
432 ? ((GET_MODE_SIZE (fieldmode) >= UNITS_PER_WORD
433 || GET_MODE_SIZE (GET_MODE (op0)) == GET_MODE_SIZE (fieldmode))
434 && ((GET_MODE (op0) == fieldmode && byte_offset == 0)
435 || validate_subreg (fieldmode, GET_MODE (op0), op0,
436 byte_offset)))
437 : (! SLOW_UNALIGNED_ACCESS (fieldmode, MEM_ALIGN (op0))
438 || (offset * BITS_PER_UNIT % bitsize == 0
439 && MEM_ALIGN (op0) % GET_MODE_BITSIZE (fieldmode) == 0))))
440 {
441 if (MEM_P (op0))
442 op0 = adjust_address (op0, fieldmode, offset);
443 else if (GET_MODE (op0) != fieldmode)
444 op0 = simplify_gen_subreg (fieldmode, op0, GET_MODE (op0),
445 byte_offset);
446 emit_move_insn (op0, value);
447 return true;
448 }
449
450 /* Make sure we are playing with integral modes. Pun with subregs
451 if we aren't. This must come after the entire register case above,
452 since that case is valid for any mode. The following cases are only
453 valid for integral modes. */
454 {
455 enum machine_mode imode = int_mode_for_mode (GET_MODE (op0));
456 if (imode != GET_MODE (op0))
457 {
458 if (MEM_P (op0))
459 op0 = adjust_address (op0, imode, 0);
460 else
461 {
462 gcc_assert (imode != BLKmode);
463 op0 = gen_lowpart (imode, op0);
464 }
465 }
466 }
467
468 /* We may be accessing data outside the field, which means
469 we can alias adjacent data. */
470 /* ?? not always for C++0x memory model ?? */
471 if (MEM_P (op0))
472 {
473 op0 = shallow_copy_rtx (op0);
474 set_mem_alias_set (op0, 0);
475 set_mem_expr (op0, 0);
476 }
477
478 /* If OP0 is a register, BITPOS must count within a word.
479 But as we have it, it counts within whatever size OP0 now has.
480 On a bigendian machine, these are not the same, so convert. */
481 if (BYTES_BIG_ENDIAN
482 && !MEM_P (op0)
483 && unit > GET_MODE_BITSIZE (GET_MODE (op0)))
484 bitpos += unit - GET_MODE_BITSIZE (GET_MODE (op0));
485
486 /* Storing an lsb-aligned field in a register
487 can be done with a movestrict instruction. */
488
489 if (!MEM_P (op0)
490 && (BYTES_BIG_ENDIAN ? bitpos + bitsize == unit : bitpos == 0)
491 && bitsize == GET_MODE_BITSIZE (fieldmode)
492 && optab_handler (movstrict_optab, fieldmode) != CODE_FOR_nothing)
493 {
494 struct expand_operand ops[2];
495 enum insn_code icode = optab_handler (movstrict_optab, fieldmode);
496 rtx arg0 = op0;
497 unsigned HOST_WIDE_INT subreg_off;
498
499 if (GET_CODE (arg0) == SUBREG)
500 {
501 /* Else we've got some float mode source being extracted into
502 a different float mode destination -- this combination of
503 subregs results in Severe Tire Damage. */
504 gcc_assert (GET_MODE (SUBREG_REG (arg0)) == fieldmode
505 || GET_MODE_CLASS (fieldmode) == MODE_INT
506 || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT);
507 arg0 = SUBREG_REG (arg0);
508 }
509
510 subreg_off = (bitnum % BITS_PER_WORD) / BITS_PER_UNIT
511 + (offset * UNITS_PER_WORD);
512 if (validate_subreg (fieldmode, GET_MODE (arg0), arg0, subreg_off))
513 {
514 arg0 = gen_rtx_SUBREG (fieldmode, arg0, subreg_off);
515
516 create_fixed_operand (&ops[0], arg0);
517 /* Shrink the source operand to FIELDMODE. */
518 create_convert_operand_to (&ops[1], value, fieldmode, false);
519 if (maybe_expand_insn (icode, 2, ops))
520 return true;
521 }
522 }
523
524 /* Handle fields bigger than a word. */
525
526 if (bitsize > BITS_PER_WORD)
527 {
528 /* Here we transfer the words of the field
529 in the order least significant first.
530 This is because the most significant word is the one which may
531 be less than full.
532 However, only do that if the value is not BLKmode. */
533
534 unsigned int backwards = WORDS_BIG_ENDIAN && fieldmode != BLKmode;
535 unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
536 unsigned int i;
537 rtx last;
538
539 /* This is the mode we must force value to, so that there will be enough
540 subwords to extract. Note that fieldmode will often (always?) be
541 VOIDmode, because that is what store_field uses to indicate that this
542 is a bit field, but passing VOIDmode to operand_subword_force
543 is not allowed. */
544 fieldmode = GET_MODE (value);
545 if (fieldmode == VOIDmode)
546 fieldmode = smallest_mode_for_size (nwords * BITS_PER_WORD, MODE_INT);
547
548 last = get_last_insn ();
549 for (i = 0; i < nwords; i++)
550 {
551 /* If I is 0, use the low-order word in both field and target;
552 if I is 1, use the next to lowest word; and so on. */
553 unsigned int wordnum = (backwards
554 ? GET_MODE_SIZE (fieldmode) / UNITS_PER_WORD
555 - i - 1
556 : i);
557 unsigned int bit_offset = (backwards
558 ? MAX ((int) bitsize - ((int) i + 1)
559 * BITS_PER_WORD,
560 0)
561 : (int) i * BITS_PER_WORD);
562 rtx value_word = operand_subword_force (value, wordnum, fieldmode);
563 unsigned HOST_WIDE_INT new_bitsize =
564 MIN (BITS_PER_WORD, bitsize - i * BITS_PER_WORD);
565
566 /* If the remaining chunk doesn't have full wordsize we have
567 to make sure that for big endian machines the higher order
568 bits are used. */
569 if (new_bitsize < BITS_PER_WORD && BYTES_BIG_ENDIAN && !backwards)
570 value_word = simplify_expand_binop (word_mode, lshr_optab,
571 value_word,
572 GEN_INT (BITS_PER_WORD
573 - new_bitsize),
574 NULL_RTX, true,
575 OPTAB_LIB_WIDEN);
576
577 if (!store_bit_field_1 (op0, new_bitsize,
578 bitnum + bit_offset,
579 bitregion_start, bitregion_end,
580 word_mode,
581 value_word, fallback_p))
582 {
583 delete_insns_since (last);
584 return false;
585 }
586 }
587 return true;
588 }
589
590 /* From here on we can assume that the field to be stored in is
591 a full-word (whatever type that is), since it is shorter than a word. */
592
593 /* OFFSET is the number of words or bytes (UNIT says which)
594 from STR_RTX to the first word or byte containing part of the field. */
595
596 if (!MEM_P (op0))
597 {
598 if (offset != 0
599 || GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
600 {
601 if (!REG_P (op0))
602 {
603 /* Since this is a destination (lvalue), we can't copy
604 it to a pseudo. We can remove a SUBREG that does not
605 change the size of the operand. Such a SUBREG may
606 have been added above. */
607 gcc_assert (GET_CODE (op0) == SUBREG
608 && (GET_MODE_SIZE (GET_MODE (op0))
609 == GET_MODE_SIZE (GET_MODE (SUBREG_REG (op0)))));
610 op0 = SUBREG_REG (op0);
611 }
612 op0 = gen_rtx_SUBREG (mode_for_size (BITS_PER_WORD, MODE_INT, 0),
613 op0, (offset * UNITS_PER_WORD));
614 }
615 offset = 0;
616 }
617
618 /* If VALUE has a floating-point or complex mode, access it as an
619 integer of the corresponding size. This can occur on a machine
620 with 64 bit registers that uses SFmode for float. It can also
621 occur for unaligned float or complex fields. */
622 orig_value = value;
623 if (GET_MODE (value) != VOIDmode
624 && GET_MODE_CLASS (GET_MODE (value)) != MODE_INT
625 && GET_MODE_CLASS (GET_MODE (value)) != MODE_PARTIAL_INT)
626 {
627 value = gen_reg_rtx (int_mode_for_mode (GET_MODE (value)));
628 emit_move_insn (gen_lowpart (GET_MODE (orig_value), value), orig_value);
629 }
630
631 /* Now OFFSET is nonzero only if OP0 is memory
632 and is therefore always measured in bytes. */
633
634 if (HAVE_insv
635 && GET_MODE (value) != BLKmode
636 && bitsize > 0
637 && GET_MODE_BITSIZE (op_mode) >= bitsize
638 /* Do not use insv for volatile bitfields when
639 -fstrict-volatile-bitfields is in effect. */
640 && !(MEM_P (op0) && MEM_VOLATILE_P (op0)
641 && flag_strict_volatile_bitfields > 0)
642 && ! ((REG_P (op0) || GET_CODE (op0) == SUBREG)
643 && (bitsize + bitpos > GET_MODE_BITSIZE (op_mode)))
644 /* Do not use insv if the bit region is restricted and
645 op_mode integer at offset doesn't fit into the
646 restricted region. */
647 && !(MEM_P (op0) && bitregion_end
648 && bitnum - bitpos + GET_MODE_BITSIZE (op_mode)
649 > bitregion_end + 1))
650 {
651 struct expand_operand ops[4];
652 int xbitpos = bitpos;
653 rtx value1;
654 rtx xop0 = op0;
655 rtx last = get_last_insn ();
656 bool copy_back = false;
657
658 /* Add OFFSET into OP0's address. */
659 if (MEM_P (xop0))
660 xop0 = adjust_address (xop0, byte_mode, offset);
661
662 /* If xop0 is a register, we need it in OP_MODE
663 to make it acceptable to the format of insv. */
664 if (GET_CODE (xop0) == SUBREG)
665 /* We can't just change the mode, because this might clobber op0,
666 and we will need the original value of op0 if insv fails. */
667 xop0 = gen_rtx_SUBREG (op_mode, SUBREG_REG (xop0), SUBREG_BYTE (xop0));
668 if (REG_P (xop0) && GET_MODE (xop0) != op_mode)
669 xop0 = gen_lowpart_SUBREG (op_mode, xop0);
670
671 /* If the destination is a paradoxical subreg such that we need a
672 truncate to the inner mode, perform the insertion on a temporary and
673 truncate the result to the original destination. Note that we can't
674 just truncate the paradoxical subreg as (truncate:N (subreg:W (reg:N
675 X) 0)) is (reg:N X). */
676 if (GET_CODE (xop0) == SUBREG
677 && REG_P (SUBREG_REG (xop0))
678 && (!TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (SUBREG_REG (xop0)),
679 op_mode)))
680 {
681 rtx tem = gen_reg_rtx (op_mode);
682 emit_move_insn (tem, xop0);
683 xop0 = tem;
684 copy_back = true;
685 }
686
687 /* We have been counting XBITPOS within UNIT.
688 Count instead within the size of the register. */
689 if (BYTES_BIG_ENDIAN && !MEM_P (xop0))
690 xbitpos += GET_MODE_BITSIZE (op_mode) - unit;
691
692 unit = GET_MODE_BITSIZE (op_mode);
693
694 /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
695 "backwards" from the size of the unit we are inserting into.
696 Otherwise, we count bits from the most significant on a
697 BYTES/BITS_BIG_ENDIAN machine. */
698
699 if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
700 xbitpos = unit - bitsize - xbitpos;
701
702 /* Convert VALUE to op_mode (which insv insn wants) in VALUE1. */
703 value1 = value;
704 if (GET_MODE (value) != op_mode)
705 {
706 if (GET_MODE_BITSIZE (GET_MODE (value)) >= bitsize)
707 {
708 /* Optimization: Don't bother really extending VALUE
709 if it has all the bits we will actually use. However,
710 if we must narrow it, be sure we do it correctly. */
711
712 if (GET_MODE_SIZE (GET_MODE (value)) < GET_MODE_SIZE (op_mode))
713 {
714 rtx tmp;
715
716 tmp = simplify_subreg (op_mode, value1, GET_MODE (value), 0);
717 if (! tmp)
718 tmp = simplify_gen_subreg (op_mode,
719 force_reg (GET_MODE (value),
720 value1),
721 GET_MODE (value), 0);
722 value1 = tmp;
723 }
724 else
725 value1 = gen_lowpart (op_mode, value1);
726 }
727 else if (CONST_INT_P (value))
728 value1 = gen_int_mode (INTVAL (value), op_mode);
729 else
730 /* Parse phase is supposed to make VALUE's data type
731 match that of the component reference, which is a type
732 at least as wide as the field; so VALUE should have
733 a mode that corresponds to that type. */
734 gcc_assert (CONSTANT_P (value));
735 }
736
737 create_fixed_operand (&ops[0], xop0);
738 create_integer_operand (&ops[1], bitsize);
739 create_integer_operand (&ops[2], xbitpos);
740 create_input_operand (&ops[3], value1, op_mode);
741 if (maybe_expand_insn (CODE_FOR_insv, 4, ops))
742 {
743 if (copy_back)
744 convert_move (op0, xop0, true);
745 return true;
746 }
747 delete_insns_since (last);
748 }
749
750 /* If OP0 is a memory, try copying it to a register and seeing if a
751 cheap register alternative is available. */
752 if (HAVE_insv && MEM_P (op0))
753 {
754 enum machine_mode bestmode;
755 unsigned HOST_WIDE_INT maxbits = MAX_FIXED_MODE_SIZE;
756
757 if (bitregion_end)
758 maxbits = bitregion_end - bitregion_start + 1;
759
760 /* Get the mode to use for inserting into this field. If OP0 is
761 BLKmode, get the smallest mode consistent with the alignment. If
762 OP0 is a non-BLKmode object that is no wider than OP_MODE, use its
763 mode. Otherwise, use the smallest mode containing the field. */
764
765 if (GET_MODE (op0) == BLKmode
766 || GET_MODE_BITSIZE (GET_MODE (op0)) > maxbits
767 || (op_mode != MAX_MACHINE_MODE
768 && GET_MODE_SIZE (GET_MODE (op0)) > GET_MODE_SIZE (op_mode)))
769 bestmode = get_best_mode (bitsize, bitnum,
770 bitregion_start, bitregion_end,
771 MEM_ALIGN (op0),
772 (op_mode == MAX_MACHINE_MODE
773 ? VOIDmode : op_mode),
774 MEM_VOLATILE_P (op0));
775 else
776 bestmode = GET_MODE (op0);
777
778 if (bestmode != VOIDmode
779 && GET_MODE_SIZE (bestmode) >= GET_MODE_SIZE (fieldmode)
780 && !(SLOW_UNALIGNED_ACCESS (bestmode, MEM_ALIGN (op0))
781 && GET_MODE_BITSIZE (bestmode) > MEM_ALIGN (op0)))
782 {
783 rtx last, tempreg, xop0;
784 unsigned HOST_WIDE_INT xoffset, xbitpos;
785
786 last = get_last_insn ();
787
788 /* Adjust address to point to the containing unit of
789 that mode. Compute the offset as a multiple of this unit,
790 counting in bytes. */
791 unit = GET_MODE_BITSIZE (bestmode);
792 xoffset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
793 xbitpos = bitnum % unit;
794 xop0 = adjust_address (op0, bestmode, xoffset);
795
796 /* Fetch that unit, store the bitfield in it, then store
797 the unit. */
798 tempreg = copy_to_reg (xop0);
799 if (store_bit_field_1 (tempreg, bitsize, xbitpos,
800 bitregion_start, bitregion_end,
801 fieldmode, orig_value, false))
802 {
803 emit_move_insn (xop0, tempreg);
804 return true;
805 }
806 delete_insns_since (last);
807 }
808 }
809
810 if (!fallback_p)
811 return false;
812
813 store_fixed_bit_field (op0, offset, bitsize, bitpos,
814 bitregion_start, bitregion_end, value);
815 return true;
816 }
817
818 /* Generate code to store value from rtx VALUE
819 into a bit-field within structure STR_RTX
820 containing BITSIZE bits starting at bit BITNUM.
821
822 BITREGION_START is bitpos of the first bitfield in this region.
823 BITREGION_END is the bitpos of the ending bitfield in this region.
824 These two fields are 0, if the C++ memory model does not apply,
825 or we are not interested in keeping track of bitfield regions.
826
827 FIELDMODE is the machine-mode of the FIELD_DECL node for this field. */
828
829 void
830 store_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
831 unsigned HOST_WIDE_INT bitnum,
832 unsigned HOST_WIDE_INT bitregion_start,
833 unsigned HOST_WIDE_INT bitregion_end,
834 enum machine_mode fieldmode,
835 rtx value)
836 {
837 /* Under the C++0x memory model, we must not touch bits outside the
838 bit region. Adjust the address to start at the beginning of the
839 bit region. */
840 if (MEM_P (str_rtx) && bitregion_start > 0)
841 {
842 enum machine_mode bestmode;
843 enum machine_mode op_mode;
844 unsigned HOST_WIDE_INT offset;
845
846 op_mode = mode_for_extraction (EP_insv, 3);
847 if (op_mode == MAX_MACHINE_MODE)
848 op_mode = VOIDmode;
849
850 gcc_assert ((bitregion_start % BITS_PER_UNIT) == 0);
851
852 offset = bitregion_start / BITS_PER_UNIT;
853 bitnum -= bitregion_start;
854 bitregion_end -= bitregion_start;
855 bitregion_start = 0;
856 bestmode = get_best_mode (bitsize, bitnum,
857 bitregion_start, bitregion_end,
858 MEM_ALIGN (str_rtx),
859 op_mode,
860 MEM_VOLATILE_P (str_rtx));
861 str_rtx = adjust_address (str_rtx, bestmode, offset);
862 }
863
864 if (!store_bit_field_1 (str_rtx, bitsize, bitnum,
865 bitregion_start, bitregion_end,
866 fieldmode, value, true))
867 gcc_unreachable ();
868 }
869 \f
870 /* Use shifts and boolean operations to store VALUE
871 into a bit field of width BITSIZE
872 in a memory location specified by OP0 except offset by OFFSET bytes.
873 (OFFSET must be 0 if OP0 is a register.)
874 The field starts at position BITPOS within the byte.
875 (If OP0 is a register, it may be a full word or a narrower mode,
876 but BITPOS still counts within a full word,
877 which is significant on bigendian machines.) */
878
879 static void
880 store_fixed_bit_field (rtx op0, unsigned HOST_WIDE_INT offset,
881 unsigned HOST_WIDE_INT bitsize,
882 unsigned HOST_WIDE_INT bitpos,
883 unsigned HOST_WIDE_INT bitregion_start,
884 unsigned HOST_WIDE_INT bitregion_end,
885 rtx value)
886 {
887 enum machine_mode mode;
888 unsigned int total_bits = BITS_PER_WORD;
889 rtx temp;
890 int all_zero = 0;
891 int all_one = 0;
892
893 /* There is a case not handled here:
894 a structure with a known alignment of just a halfword
895 and a field split across two aligned halfwords within the structure.
896 Or likewise a structure with a known alignment of just a byte
897 and a field split across two bytes.
898 Such cases are not supposed to be able to occur. */
899
900 if (REG_P (op0) || GET_CODE (op0) == SUBREG)
901 {
902 gcc_assert (!offset);
903 /* Special treatment for a bit field split across two registers. */
904 if (bitsize + bitpos > BITS_PER_WORD)
905 {
906 store_split_bit_field (op0, bitsize, bitpos,
907 bitregion_start, bitregion_end,
908 value);
909 return;
910 }
911 }
912 else
913 {
914 unsigned HOST_WIDE_INT maxbits = MAX_FIXED_MODE_SIZE;
915
916 if (bitregion_end)
917 maxbits = bitregion_end - bitregion_start + 1;
918
919 /* Get the proper mode to use for this field. We want a mode that
920 includes the entire field. If such a mode would be larger than
921 a word, we won't be doing the extraction the normal way.
922 We don't want a mode bigger than the destination. */
923
924 mode = GET_MODE (op0);
925 if (GET_MODE_BITSIZE (mode) == 0
926 || GET_MODE_BITSIZE (mode) > GET_MODE_BITSIZE (word_mode))
927 mode = word_mode;
928
929 if (MEM_VOLATILE_P (op0)
930 && GET_MODE_BITSIZE (GET_MODE (op0)) > 0
931 && GET_MODE_BITSIZE (GET_MODE (op0)) <= maxbits
932 && flag_strict_volatile_bitfields > 0)
933 mode = GET_MODE (op0);
934 else
935 mode = get_best_mode (bitsize, bitpos + offset * BITS_PER_UNIT,
936 bitregion_start, bitregion_end,
937 MEM_ALIGN (op0), mode, MEM_VOLATILE_P (op0));
938
939 if (mode == VOIDmode)
940 {
941 /* The only way this should occur is if the field spans word
942 boundaries. */
943 store_split_bit_field (op0, bitsize, bitpos + offset * BITS_PER_UNIT,
944 bitregion_start, bitregion_end, value);
945 return;
946 }
947
948 total_bits = GET_MODE_BITSIZE (mode);
949
950 /* Make sure bitpos is valid for the chosen mode. Adjust BITPOS to
951 be in the range 0 to total_bits-1, and put any excess bytes in
952 OFFSET. */
953 if (bitpos >= total_bits)
954 {
955 offset += (bitpos / total_bits) * (total_bits / BITS_PER_UNIT);
956 bitpos -= ((bitpos / total_bits) * (total_bits / BITS_PER_UNIT)
957 * BITS_PER_UNIT);
958 }
959
960 /* Get ref to an aligned byte, halfword, or word containing the field.
961 Adjust BITPOS to be position within a word,
962 and OFFSET to be the offset of that word.
963 Then alter OP0 to refer to that word. */
964 bitpos += (offset % (total_bits / BITS_PER_UNIT)) * BITS_PER_UNIT;
965 offset -= (offset % (total_bits / BITS_PER_UNIT));
966 op0 = adjust_address (op0, mode, offset);
967 }
968
969 mode = GET_MODE (op0);
970
971 /* Now MODE is either some integral mode for a MEM as OP0,
972 or is a full-word for a REG as OP0. TOTAL_BITS corresponds.
973 The bit field is contained entirely within OP0.
974 BITPOS is the starting bit number within OP0.
975 (OP0's mode may actually be narrower than MODE.) */
976
977 if (BYTES_BIG_ENDIAN)
978 /* BITPOS is the distance between our msb
979 and that of the containing datum.
980 Convert it to the distance from the lsb. */
981 bitpos = total_bits - bitsize - bitpos;
982
983 /* Now BITPOS is always the distance between our lsb
984 and that of OP0. */
985
986 /* Shift VALUE left by BITPOS bits. If VALUE is not constant,
987 we must first convert its mode to MODE. */
988
989 if (CONST_INT_P (value))
990 {
991 HOST_WIDE_INT v = INTVAL (value);
992
993 if (bitsize < HOST_BITS_PER_WIDE_INT)
994 v &= ((HOST_WIDE_INT) 1 << bitsize) - 1;
995
996 if (v == 0)
997 all_zero = 1;
998 else if ((bitsize < HOST_BITS_PER_WIDE_INT
999 && v == ((HOST_WIDE_INT) 1 << bitsize) - 1)
1000 || (bitsize == HOST_BITS_PER_WIDE_INT && v == -1))
1001 all_one = 1;
1002
1003 value = lshift_value (mode, value, bitpos, bitsize);
1004 }
1005 else
1006 {
1007 int must_and = (GET_MODE_BITSIZE (GET_MODE (value)) != bitsize
1008 && bitpos + bitsize != GET_MODE_BITSIZE (mode));
1009
1010 if (GET_MODE (value) != mode)
1011 value = convert_to_mode (mode, value, 1);
1012
1013 if (must_and)
1014 value = expand_binop (mode, and_optab, value,
1015 mask_rtx (mode, 0, bitsize, 0),
1016 NULL_RTX, 1, OPTAB_LIB_WIDEN);
1017 if (bitpos > 0)
1018 value = expand_shift (LSHIFT_EXPR, mode, value,
1019 bitpos, NULL_RTX, 1);
1020 }
1021
1022 /* Now clear the chosen bits in OP0,
1023 except that if VALUE is -1 we need not bother. */
1024 /* We keep the intermediates in registers to allow CSE to combine
1025 consecutive bitfield assignments. */
1026
1027 temp = force_reg (mode, op0);
1028
1029 if (! all_one)
1030 {
1031 temp = expand_binop (mode, and_optab, temp,
1032 mask_rtx (mode, bitpos, bitsize, 1),
1033 NULL_RTX, 1, OPTAB_LIB_WIDEN);
1034 temp = force_reg (mode, temp);
1035 }
1036
1037 /* Now logical-or VALUE into OP0, unless it is zero. */
1038
1039 if (! all_zero)
1040 {
1041 temp = expand_binop (mode, ior_optab, temp, value,
1042 NULL_RTX, 1, OPTAB_LIB_WIDEN);
1043 temp = force_reg (mode, temp);
1044 }
1045
1046 if (op0 != temp)
1047 {
1048 op0 = copy_rtx (op0);
1049 emit_move_insn (op0, temp);
1050 }
1051 }
1052 \f
1053 /* Store a bit field that is split across multiple accessible memory objects.
1054
1055 OP0 is the REG, SUBREG or MEM rtx for the first of the objects.
1056 BITSIZE is the field width; BITPOS the position of its first bit
1057 (within the word).
1058 VALUE is the value to store.
1059
1060 This does not yet handle fields wider than BITS_PER_WORD. */
1061
1062 static void
1063 store_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1064 unsigned HOST_WIDE_INT bitpos,
1065 unsigned HOST_WIDE_INT bitregion_start,
1066 unsigned HOST_WIDE_INT bitregion_end,
1067 rtx value)
1068 {
1069 unsigned int unit;
1070 unsigned int bitsdone = 0;
1071
1072 /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1073 much at a time. */
1074 if (REG_P (op0) || GET_CODE (op0) == SUBREG)
1075 unit = BITS_PER_WORD;
1076 else
1077 unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1078
1079 /* If VALUE is a constant other than a CONST_INT, get it into a register in
1080 WORD_MODE. If we can do this using gen_lowpart_common, do so. Note
1081 that VALUE might be a floating-point constant. */
1082 if (CONSTANT_P (value) && !CONST_INT_P (value))
1083 {
1084 rtx word = gen_lowpart_common (word_mode, value);
1085
1086 if (word && (value != word))
1087 value = word;
1088 else
1089 value = gen_lowpart_common (word_mode,
1090 force_reg (GET_MODE (value) != VOIDmode
1091 ? GET_MODE (value)
1092 : word_mode, value));
1093 }
1094
1095 while (bitsdone < bitsize)
1096 {
1097 unsigned HOST_WIDE_INT thissize;
1098 rtx part, word;
1099 unsigned HOST_WIDE_INT thispos;
1100 unsigned HOST_WIDE_INT offset;
1101
1102 offset = (bitpos + bitsdone) / unit;
1103 thispos = (bitpos + bitsdone) % unit;
1104
1105 /* When region of bytes we can touch is restricted, decrease
1106 UNIT close to the end of the region as needed. */
1107 if (bitregion_end
1108 && unit > BITS_PER_UNIT
1109 && bitpos + bitsdone - thispos + unit > bitregion_end + 1)
1110 {
1111 unit = unit / 2;
1112 continue;
1113 }
1114
1115 /* THISSIZE must not overrun a word boundary. Otherwise,
1116 store_fixed_bit_field will call us again, and we will mutually
1117 recurse forever. */
1118 thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1119 thissize = MIN (thissize, unit - thispos);
1120
1121 if (BYTES_BIG_ENDIAN)
1122 {
1123 int total_bits;
1124
1125 /* We must do an endian conversion exactly the same way as it is
1126 done in extract_bit_field, so that the two calls to
1127 extract_fixed_bit_field will have comparable arguments. */
1128 if (!MEM_P (value) || GET_MODE (value) == BLKmode)
1129 total_bits = BITS_PER_WORD;
1130 else
1131 total_bits = GET_MODE_BITSIZE (GET_MODE (value));
1132
1133 /* Fetch successively less significant portions. */
1134 if (CONST_INT_P (value))
1135 part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1136 >> (bitsize - bitsdone - thissize))
1137 & (((HOST_WIDE_INT) 1 << thissize) - 1));
1138 else
1139 /* The args are chosen so that the last part includes the
1140 lsb. Give extract_bit_field the value it needs (with
1141 endianness compensation) to fetch the piece we want. */
1142 part = extract_fixed_bit_field (word_mode, value, 0, thissize,
1143 total_bits - bitsize + bitsdone,
1144 NULL_RTX, 1, false);
1145 }
1146 else
1147 {
1148 /* Fetch successively more significant portions. */
1149 if (CONST_INT_P (value))
1150 part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1151 >> bitsdone)
1152 & (((HOST_WIDE_INT) 1 << thissize) - 1));
1153 else
1154 part = extract_fixed_bit_field (word_mode, value, 0, thissize,
1155 bitsdone, NULL_RTX, 1, false);
1156 }
1157
1158 /* If OP0 is a register, then handle OFFSET here.
1159
1160 When handling multiword bitfields, extract_bit_field may pass
1161 down a word_mode SUBREG of a larger REG for a bitfield that actually
1162 crosses a word boundary. Thus, for a SUBREG, we must find
1163 the current word starting from the base register. */
1164 if (GET_CODE (op0) == SUBREG)
1165 {
1166 int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset;
1167 enum machine_mode sub_mode = GET_MODE (SUBREG_REG (op0));
1168 if (sub_mode != BLKmode && GET_MODE_SIZE (sub_mode) < UNITS_PER_WORD)
1169 word = word_offset ? const0_rtx : op0;
1170 else
1171 word = operand_subword_force (SUBREG_REG (op0), word_offset,
1172 GET_MODE (SUBREG_REG (op0)));
1173 offset = 0;
1174 }
1175 else if (REG_P (op0))
1176 {
1177 enum machine_mode op0_mode = GET_MODE (op0);
1178 if (op0_mode != BLKmode && GET_MODE_SIZE (op0_mode) < UNITS_PER_WORD)
1179 word = offset ? const0_rtx : op0;
1180 else
1181 word = operand_subword_force (op0, offset, GET_MODE (op0));
1182 offset = 0;
1183 }
1184 else
1185 word = op0;
1186
1187 /* OFFSET is in UNITs, and UNIT is in bits.
1188 store_fixed_bit_field wants offset in bytes. If WORD is const0_rtx,
1189 it is just an out-of-bounds access. Ignore it. */
1190 if (word != const0_rtx)
1191 store_fixed_bit_field (word, offset * unit / BITS_PER_UNIT, thissize,
1192 thispos, bitregion_start, bitregion_end, part);
1193 bitsdone += thissize;
1194 }
1195 }
1196 \f
1197 /* A subroutine of extract_bit_field_1 that converts return value X
1198 to either MODE or TMODE. MODE, TMODE and UNSIGNEDP are arguments
1199 to extract_bit_field. */
1200
1201 static rtx
1202 convert_extracted_bit_field (rtx x, enum machine_mode mode,
1203 enum machine_mode tmode, bool unsignedp)
1204 {
1205 if (GET_MODE (x) == tmode || GET_MODE (x) == mode)
1206 return x;
1207
1208 /* If the x mode is not a scalar integral, first convert to the
1209 integer mode of that size and then access it as a floating-point
1210 value via a SUBREG. */
1211 if (!SCALAR_INT_MODE_P (tmode))
1212 {
1213 enum machine_mode smode;
1214
1215 smode = mode_for_size (GET_MODE_BITSIZE (tmode), MODE_INT, 0);
1216 x = convert_to_mode (smode, x, unsignedp);
1217 x = force_reg (smode, x);
1218 return gen_lowpart (tmode, x);
1219 }
1220
1221 return convert_to_mode (tmode, x, unsignedp);
1222 }
1223
1224 /* A subroutine of extract_bit_field, with the same arguments.
1225 If FALLBACK_P is true, fall back to extract_fixed_bit_field
1226 if we can find no other means of implementing the operation.
1227 if FALLBACK_P is false, return NULL instead. */
1228
1229 static rtx
1230 extract_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1231 unsigned HOST_WIDE_INT bitnum,
1232 int unsignedp, bool packedp, rtx target,
1233 enum machine_mode mode, enum machine_mode tmode,
1234 bool fallback_p)
1235 {
1236 unsigned int unit
1237 = (MEM_P (str_rtx)) ? BITS_PER_UNIT : BITS_PER_WORD;
1238 unsigned HOST_WIDE_INT offset, bitpos;
1239 rtx op0 = str_rtx;
1240 enum machine_mode int_mode;
1241 enum machine_mode ext_mode;
1242 enum machine_mode mode1;
1243 int byte_offset;
1244
1245 if (tmode == VOIDmode)
1246 tmode = mode;
1247
1248 while (GET_CODE (op0) == SUBREG)
1249 {
1250 bitnum += SUBREG_BYTE (op0) * BITS_PER_UNIT;
1251 op0 = SUBREG_REG (op0);
1252 }
1253
1254 /* If we have an out-of-bounds access to a register, just return an
1255 uninitialized register of the required mode. This can occur if the
1256 source code contains an out-of-bounds access to a small array. */
1257 if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
1258 return gen_reg_rtx (tmode);
1259
1260 if (REG_P (op0)
1261 && mode == GET_MODE (op0)
1262 && bitnum == 0
1263 && bitsize == GET_MODE_BITSIZE (GET_MODE (op0)))
1264 {
1265 /* We're trying to extract a full register from itself. */
1266 return op0;
1267 }
1268
1269 /* See if we can get a better vector mode before extracting. */
1270 if (VECTOR_MODE_P (GET_MODE (op0))
1271 && !MEM_P (op0)
1272 && GET_MODE_INNER (GET_MODE (op0)) != tmode)
1273 {
1274 enum machine_mode new_mode;
1275
1276 if (GET_MODE_CLASS (tmode) == MODE_FLOAT)
1277 new_mode = MIN_MODE_VECTOR_FLOAT;
1278 else if (GET_MODE_CLASS (tmode) == MODE_FRACT)
1279 new_mode = MIN_MODE_VECTOR_FRACT;
1280 else if (GET_MODE_CLASS (tmode) == MODE_UFRACT)
1281 new_mode = MIN_MODE_VECTOR_UFRACT;
1282 else if (GET_MODE_CLASS (tmode) == MODE_ACCUM)
1283 new_mode = MIN_MODE_VECTOR_ACCUM;
1284 else if (GET_MODE_CLASS (tmode) == MODE_UACCUM)
1285 new_mode = MIN_MODE_VECTOR_UACCUM;
1286 else
1287 new_mode = MIN_MODE_VECTOR_INT;
1288
1289 for (; new_mode != VOIDmode ; new_mode = GET_MODE_WIDER_MODE (new_mode))
1290 if (GET_MODE_SIZE (new_mode) == GET_MODE_SIZE (GET_MODE (op0))
1291 && targetm.vector_mode_supported_p (new_mode))
1292 break;
1293 if (new_mode != VOIDmode)
1294 op0 = gen_lowpart (new_mode, op0);
1295 }
1296
1297 /* Use vec_extract patterns for extracting parts of vectors whenever
1298 available. */
1299 if (VECTOR_MODE_P (GET_MODE (op0))
1300 && !MEM_P (op0)
1301 && optab_handler (vec_extract_optab, GET_MODE (op0)) != CODE_FOR_nothing
1302 && ((bitnum + bitsize - 1) / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
1303 == bitnum / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
1304 {
1305 struct expand_operand ops[3];
1306 enum machine_mode outermode = GET_MODE (op0);
1307 enum machine_mode innermode = GET_MODE_INNER (outermode);
1308 enum insn_code icode = optab_handler (vec_extract_optab, outermode);
1309 unsigned HOST_WIDE_INT pos = bitnum / GET_MODE_BITSIZE (innermode);
1310
1311 create_output_operand (&ops[0], target, innermode);
1312 create_input_operand (&ops[1], op0, outermode);
1313 create_integer_operand (&ops[2], pos);
1314 if (maybe_expand_insn (icode, 3, ops))
1315 {
1316 target = ops[0].value;
1317 if (GET_MODE (target) != mode)
1318 return gen_lowpart (tmode, target);
1319 return target;
1320 }
1321 }
1322
1323 /* Make sure we are playing with integral modes. Pun with subregs
1324 if we aren't. */
1325 {
1326 enum machine_mode imode = int_mode_for_mode (GET_MODE (op0));
1327 if (imode != GET_MODE (op0))
1328 {
1329 if (MEM_P (op0))
1330 op0 = adjust_address (op0, imode, 0);
1331 else if (imode != BLKmode)
1332 {
1333 op0 = gen_lowpart (imode, op0);
1334
1335 /* If we got a SUBREG, force it into a register since we
1336 aren't going to be able to do another SUBREG on it. */
1337 if (GET_CODE (op0) == SUBREG)
1338 op0 = force_reg (imode, op0);
1339 }
1340 else if (REG_P (op0))
1341 {
1342 rtx reg, subreg;
1343 imode = smallest_mode_for_size (GET_MODE_BITSIZE (GET_MODE (op0)),
1344 MODE_INT);
1345 reg = gen_reg_rtx (imode);
1346 subreg = gen_lowpart_SUBREG (GET_MODE (op0), reg);
1347 emit_move_insn (subreg, op0);
1348 op0 = reg;
1349 bitnum += SUBREG_BYTE (subreg) * BITS_PER_UNIT;
1350 }
1351 else
1352 {
1353 rtx mem = assign_stack_temp (GET_MODE (op0),
1354 GET_MODE_SIZE (GET_MODE (op0)));
1355 emit_move_insn (mem, op0);
1356 op0 = adjust_address (mem, BLKmode, 0);
1357 }
1358 }
1359 }
1360
1361 /* We may be accessing data outside the field, which means
1362 we can alias adjacent data. */
1363 if (MEM_P (op0))
1364 {
1365 op0 = shallow_copy_rtx (op0);
1366 set_mem_alias_set (op0, 0);
1367 set_mem_expr (op0, 0);
1368 }
1369
1370 /* Extraction of a full-word or multi-word value from a structure
1371 in a register or aligned memory can be done with just a SUBREG.
1372 A subword value in the least significant part of a register
1373 can also be extracted with a SUBREG. For this, we need the
1374 byte offset of the value in op0. */
1375
1376 bitpos = bitnum % unit;
1377 offset = bitnum / unit;
1378 byte_offset = bitpos / BITS_PER_UNIT + offset * UNITS_PER_WORD;
1379
1380 /* If OP0 is a register, BITPOS must count within a word.
1381 But as we have it, it counts within whatever size OP0 now has.
1382 On a bigendian machine, these are not the same, so convert. */
1383 if (BYTES_BIG_ENDIAN
1384 && !MEM_P (op0)
1385 && unit > GET_MODE_BITSIZE (GET_MODE (op0)))
1386 bitpos += unit - GET_MODE_BITSIZE (GET_MODE (op0));
1387
1388 /* ??? We currently assume TARGET is at least as big as BITSIZE.
1389 If that's wrong, the solution is to test for it and set TARGET to 0
1390 if needed. */
1391
1392 /* Only scalar integer modes can be converted via subregs. There is an
1393 additional problem for FP modes here in that they can have a precision
1394 which is different from the size. mode_for_size uses precision, but
1395 we want a mode based on the size, so we must avoid calling it for FP
1396 modes. */
1397 mode1 = (SCALAR_INT_MODE_P (tmode)
1398 ? mode_for_size (bitsize, GET_MODE_CLASS (tmode), 0)
1399 : mode);
1400
1401 /* If the bitfield is volatile, we need to make sure the access
1402 remains on a type-aligned boundary. */
1403 if (GET_CODE (op0) == MEM
1404 && MEM_VOLATILE_P (op0)
1405 && GET_MODE_BITSIZE (GET_MODE (op0)) > 0
1406 && flag_strict_volatile_bitfields > 0)
1407 goto no_subreg_mode_swap;
1408
1409 if (((bitsize >= BITS_PER_WORD && bitsize == GET_MODE_BITSIZE (mode)
1410 && bitpos % BITS_PER_WORD == 0)
1411 || (mode1 != BLKmode
1412 /* ??? The big endian test here is wrong. This is correct
1413 if the value is in a register, and if mode_for_size is not
1414 the same mode as op0. This causes us to get unnecessarily
1415 inefficient code from the Thumb port when -mbig-endian. */
1416 && (BYTES_BIG_ENDIAN
1417 ? bitpos + bitsize == BITS_PER_WORD
1418 : bitpos == 0)))
1419 && ((!MEM_P (op0)
1420 && TRULY_NOOP_TRUNCATION_MODES_P (mode1, GET_MODE (op0))
1421 && GET_MODE_SIZE (mode1) != 0
1422 && byte_offset % GET_MODE_SIZE (mode1) == 0)
1423 || (MEM_P (op0)
1424 && (! SLOW_UNALIGNED_ACCESS (mode, MEM_ALIGN (op0))
1425 || (offset * BITS_PER_UNIT % bitsize == 0
1426 && MEM_ALIGN (op0) % bitsize == 0)))))
1427 {
1428 if (MEM_P (op0))
1429 op0 = adjust_address (op0, mode1, offset);
1430 else if (mode1 != GET_MODE (op0))
1431 {
1432 rtx sub = simplify_gen_subreg (mode1, op0, GET_MODE (op0),
1433 byte_offset);
1434 if (sub == NULL)
1435 goto no_subreg_mode_swap;
1436 op0 = sub;
1437 }
1438 if (mode1 != mode)
1439 return convert_to_mode (tmode, op0, unsignedp);
1440 return op0;
1441 }
1442 no_subreg_mode_swap:
1443
1444 /* Handle fields bigger than a word. */
1445
1446 if (bitsize > BITS_PER_WORD)
1447 {
1448 /* Here we transfer the words of the field
1449 in the order least significant first.
1450 This is because the most significant word is the one which may
1451 be less than full. */
1452
1453 unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
1454 unsigned int i;
1455
1456 if (target == 0 || !REG_P (target) || !valid_multiword_target_p (target))
1457 target = gen_reg_rtx (mode);
1458
1459 /* Indicate for flow that the entire target reg is being set. */
1460 emit_clobber (target);
1461
1462 for (i = 0; i < nwords; i++)
1463 {
1464 /* If I is 0, use the low-order word in both field and target;
1465 if I is 1, use the next to lowest word; and so on. */
1466 /* Word number in TARGET to use. */
1467 unsigned int wordnum
1468 = (WORDS_BIG_ENDIAN
1469 ? GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD - i - 1
1470 : i);
1471 /* Offset from start of field in OP0. */
1472 unsigned int bit_offset = (WORDS_BIG_ENDIAN
1473 ? MAX (0, ((int) bitsize - ((int) i + 1)
1474 * (int) BITS_PER_WORD))
1475 : (int) i * BITS_PER_WORD);
1476 rtx target_part = operand_subword (target, wordnum, 1, VOIDmode);
1477 rtx result_part
1478 = extract_bit_field (op0, MIN (BITS_PER_WORD,
1479 bitsize - i * BITS_PER_WORD),
1480 bitnum + bit_offset, 1, false, target_part, mode,
1481 word_mode);
1482
1483 gcc_assert (target_part);
1484
1485 if (result_part != target_part)
1486 emit_move_insn (target_part, result_part);
1487 }
1488
1489 if (unsignedp)
1490 {
1491 /* Unless we've filled TARGET, the upper regs in a multi-reg value
1492 need to be zero'd out. */
1493 if (GET_MODE_SIZE (GET_MODE (target)) > nwords * UNITS_PER_WORD)
1494 {
1495 unsigned int i, total_words;
1496
1497 total_words = GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD;
1498 for (i = nwords; i < total_words; i++)
1499 emit_move_insn
1500 (operand_subword (target,
1501 WORDS_BIG_ENDIAN ? total_words - i - 1 : i,
1502 1, VOIDmode),
1503 const0_rtx);
1504 }
1505 return target;
1506 }
1507
1508 /* Signed bit field: sign-extend with two arithmetic shifts. */
1509 target = expand_shift (LSHIFT_EXPR, mode, target,
1510 GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1511 return expand_shift (RSHIFT_EXPR, mode, target,
1512 GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1513 }
1514
1515 /* From here on we know the desired field is smaller than a word. */
1516
1517 /* Check if there is a correspondingly-sized integer field, so we can
1518 safely extract it as one size of integer, if necessary; then
1519 truncate or extend to the size that is wanted; then use SUBREGs or
1520 convert_to_mode to get one of the modes we really wanted. */
1521
1522 int_mode = int_mode_for_mode (tmode);
1523 if (int_mode == BLKmode)
1524 int_mode = int_mode_for_mode (mode);
1525 /* Should probably push op0 out to memory and then do a load. */
1526 gcc_assert (int_mode != BLKmode);
1527
1528 /* OFFSET is the number of words or bytes (UNIT says which)
1529 from STR_RTX to the first word or byte containing part of the field. */
1530 if (!MEM_P (op0))
1531 {
1532 if (offset != 0
1533 || GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
1534 {
1535 if (!REG_P (op0))
1536 op0 = copy_to_reg (op0);
1537 op0 = gen_rtx_SUBREG (mode_for_size (BITS_PER_WORD, MODE_INT, 0),
1538 op0, (offset * UNITS_PER_WORD));
1539 }
1540 offset = 0;
1541 }
1542
1543 /* Now OFFSET is nonzero only for memory operands. */
1544 ext_mode = mode_for_extraction (unsignedp ? EP_extzv : EP_extv, 0);
1545 if (ext_mode != MAX_MACHINE_MODE
1546 && bitsize > 0
1547 && GET_MODE_BITSIZE (ext_mode) >= bitsize
1548 /* Do not use extv/extzv for volatile bitfields when
1549 -fstrict-volatile-bitfields is in effect. */
1550 && !(MEM_P (op0) && MEM_VOLATILE_P (op0)
1551 && flag_strict_volatile_bitfields > 0)
1552 /* If op0 is a register, we need it in EXT_MODE to make it
1553 acceptable to the format of ext(z)v. */
1554 && !(GET_CODE (op0) == SUBREG && GET_MODE (op0) != ext_mode)
1555 && !((REG_P (op0) || GET_CODE (op0) == SUBREG)
1556 && (bitsize + bitpos > GET_MODE_BITSIZE (ext_mode))))
1557 {
1558 struct expand_operand ops[4];
1559 unsigned HOST_WIDE_INT xbitpos = bitpos, xoffset = offset;
1560 rtx xop0 = op0;
1561 rtx xtarget = target;
1562 rtx xspec_target = target;
1563 rtx xspec_target_subreg = 0;
1564
1565 /* If op0 is a register, we need it in EXT_MODE to make it
1566 acceptable to the format of ext(z)v. */
1567 if (REG_P (xop0) && GET_MODE (xop0) != ext_mode)
1568 xop0 = gen_lowpart_SUBREG (ext_mode, xop0);
1569 if (MEM_P (xop0))
1570 /* Get ref to first byte containing part of the field. */
1571 xop0 = adjust_address (xop0, byte_mode, xoffset);
1572
1573 /* Now convert from counting within UNIT to counting in EXT_MODE. */
1574 if (BYTES_BIG_ENDIAN && !MEM_P (xop0))
1575 xbitpos += GET_MODE_BITSIZE (ext_mode) - unit;
1576
1577 unit = GET_MODE_BITSIZE (ext_mode);
1578
1579 /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
1580 "backwards" from the size of the unit we are extracting from.
1581 Otherwise, we count bits from the most significant on a
1582 BYTES/BITS_BIG_ENDIAN machine. */
1583
1584 if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1585 xbitpos = unit - bitsize - xbitpos;
1586
1587 if (xtarget == 0)
1588 xtarget = xspec_target = gen_reg_rtx (tmode);
1589
1590 if (GET_MODE (xtarget) != ext_mode)
1591 {
1592 /* Don't use LHS paradoxical subreg if explicit truncation is needed
1593 between the mode of the extraction (word_mode) and the target
1594 mode. Instead, create a temporary and use convert_move to set
1595 the target. */
1596 if (REG_P (xtarget)
1597 && TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (xtarget), ext_mode))
1598 {
1599 xtarget = gen_lowpart (ext_mode, xtarget);
1600 if (GET_MODE_PRECISION (ext_mode)
1601 > GET_MODE_PRECISION (GET_MODE (xspec_target)))
1602 xspec_target_subreg = xtarget;
1603 }
1604 else
1605 xtarget = gen_reg_rtx (ext_mode);
1606 }
1607
1608 create_output_operand (&ops[0], xtarget, ext_mode);
1609 create_fixed_operand (&ops[1], xop0);
1610 create_integer_operand (&ops[2], bitsize);
1611 create_integer_operand (&ops[3], xbitpos);
1612 if (maybe_expand_insn (unsignedp ? CODE_FOR_extzv : CODE_FOR_extv,
1613 4, ops))
1614 {
1615 xtarget = ops[0].value;
1616 if (xtarget == xspec_target)
1617 return xtarget;
1618 if (xtarget == xspec_target_subreg)
1619 return xspec_target;
1620 return convert_extracted_bit_field (xtarget, mode, tmode, unsignedp);
1621 }
1622 }
1623
1624 /* If OP0 is a memory, try copying it to a register and seeing if a
1625 cheap register alternative is available. */
1626 if (ext_mode != MAX_MACHINE_MODE && MEM_P (op0))
1627 {
1628 enum machine_mode bestmode;
1629
1630 /* Get the mode to use for inserting into this field. If
1631 OP0 is BLKmode, get the smallest mode consistent with the
1632 alignment. If OP0 is a non-BLKmode object that is no
1633 wider than EXT_MODE, use its mode. Otherwise, use the
1634 smallest mode containing the field. */
1635
1636 if (GET_MODE (op0) == BLKmode
1637 || (ext_mode != MAX_MACHINE_MODE
1638 && GET_MODE_SIZE (GET_MODE (op0)) > GET_MODE_SIZE (ext_mode)))
1639 bestmode = get_best_mode (bitsize, bitnum, 0, 0, MEM_ALIGN (op0),
1640 (ext_mode == MAX_MACHINE_MODE
1641 ? VOIDmode : ext_mode),
1642 MEM_VOLATILE_P (op0));
1643 else
1644 bestmode = GET_MODE (op0);
1645
1646 if (bestmode != VOIDmode
1647 && !(SLOW_UNALIGNED_ACCESS (bestmode, MEM_ALIGN (op0))
1648 && GET_MODE_BITSIZE (bestmode) > MEM_ALIGN (op0)))
1649 {
1650 unsigned HOST_WIDE_INT xoffset, xbitpos;
1651
1652 /* Compute the offset as a multiple of this unit,
1653 counting in bytes. */
1654 unit = GET_MODE_BITSIZE (bestmode);
1655 xoffset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
1656 xbitpos = bitnum % unit;
1657
1658 /* Make sure the register is big enough for the whole field. */
1659 if (xoffset * BITS_PER_UNIT + unit
1660 >= offset * BITS_PER_UNIT + bitsize)
1661 {
1662 rtx last, result, xop0;
1663
1664 last = get_last_insn ();
1665
1666 /* Fetch it to a register in that size. */
1667 xop0 = adjust_address (op0, bestmode, xoffset);
1668 xop0 = force_reg (bestmode, xop0);
1669 result = extract_bit_field_1 (xop0, bitsize, xbitpos,
1670 unsignedp, packedp, target,
1671 mode, tmode, false);
1672 if (result)
1673 return result;
1674
1675 delete_insns_since (last);
1676 }
1677 }
1678 }
1679
1680 if (!fallback_p)
1681 return NULL;
1682
1683 target = extract_fixed_bit_field (int_mode, op0, offset, bitsize,
1684 bitpos, target, unsignedp, packedp);
1685 return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1686 }
1687
1688 /* Generate code to extract a byte-field from STR_RTX
1689 containing BITSIZE bits, starting at BITNUM,
1690 and put it in TARGET if possible (if TARGET is nonzero).
1691 Regardless of TARGET, we return the rtx for where the value is placed.
1692
1693 STR_RTX is the structure containing the byte (a REG or MEM).
1694 UNSIGNEDP is nonzero if this is an unsigned bit field.
1695 PACKEDP is nonzero if the field has the packed attribute.
1696 MODE is the natural mode of the field value once extracted.
1697 TMODE is the mode the caller would like the value to have;
1698 but the value may be returned with type MODE instead.
1699
1700 If a TARGET is specified and we can store in it at no extra cost,
1701 we do so, and return TARGET.
1702 Otherwise, we return a REG of mode TMODE or MODE, with TMODE preferred
1703 if they are equally easy. */
1704
1705 rtx
1706 extract_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1707 unsigned HOST_WIDE_INT bitnum, int unsignedp, bool packedp,
1708 rtx target, enum machine_mode mode, enum machine_mode tmode)
1709 {
1710 return extract_bit_field_1 (str_rtx, bitsize, bitnum, unsignedp, packedp,
1711 target, mode, tmode, true);
1712 }
1713 \f
1714 /* Extract a bit field using shifts and boolean operations
1715 Returns an rtx to represent the value.
1716 OP0 addresses a register (word) or memory (byte).
1717 BITPOS says which bit within the word or byte the bit field starts in.
1718 OFFSET says how many bytes farther the bit field starts;
1719 it is 0 if OP0 is a register.
1720 BITSIZE says how many bits long the bit field is.
1721 (If OP0 is a register, it may be narrower than a full word,
1722 but BITPOS still counts within a full word,
1723 which is significant on bigendian machines.)
1724
1725 UNSIGNEDP is nonzero for an unsigned bit field (don't sign-extend value).
1726 PACKEDP is true if the field has the packed attribute.
1727
1728 If TARGET is nonzero, attempts to store the value there
1729 and return TARGET, but this is not guaranteed.
1730 If TARGET is not used, create a pseudo-reg of mode TMODE for the value. */
1731
1732 static rtx
1733 extract_fixed_bit_field (enum machine_mode tmode, rtx op0,
1734 unsigned HOST_WIDE_INT offset,
1735 unsigned HOST_WIDE_INT bitsize,
1736 unsigned HOST_WIDE_INT bitpos, rtx target,
1737 int unsignedp, bool packedp)
1738 {
1739 unsigned int total_bits = BITS_PER_WORD;
1740 enum machine_mode mode;
1741
1742 if (GET_CODE (op0) == SUBREG || REG_P (op0))
1743 {
1744 /* Special treatment for a bit field split across two registers. */
1745 if (bitsize + bitpos > BITS_PER_WORD)
1746 return extract_split_bit_field (op0, bitsize, bitpos, unsignedp);
1747 }
1748 else
1749 {
1750 /* Get the proper mode to use for this field. We want a mode that
1751 includes the entire field. If such a mode would be larger than
1752 a word, we won't be doing the extraction the normal way. */
1753
1754 if (MEM_VOLATILE_P (op0)
1755 && flag_strict_volatile_bitfields > 0)
1756 {
1757 if (GET_MODE_BITSIZE (GET_MODE (op0)) > 0)
1758 mode = GET_MODE (op0);
1759 else if (target && GET_MODE_BITSIZE (GET_MODE (target)) > 0)
1760 mode = GET_MODE (target);
1761 else
1762 mode = tmode;
1763 }
1764 else
1765 mode = get_best_mode (bitsize, bitpos + offset * BITS_PER_UNIT, 0, 0,
1766 MEM_ALIGN (op0), word_mode, MEM_VOLATILE_P (op0));
1767
1768 if (mode == VOIDmode)
1769 /* The only way this should occur is if the field spans word
1770 boundaries. */
1771 return extract_split_bit_field (op0, bitsize,
1772 bitpos + offset * BITS_PER_UNIT,
1773 unsignedp);
1774
1775 total_bits = GET_MODE_BITSIZE (mode);
1776
1777 /* Make sure bitpos is valid for the chosen mode. Adjust BITPOS to
1778 be in the range 0 to total_bits-1, and put any excess bytes in
1779 OFFSET. */
1780 if (bitpos >= total_bits)
1781 {
1782 offset += (bitpos / total_bits) * (total_bits / BITS_PER_UNIT);
1783 bitpos -= ((bitpos / total_bits) * (total_bits / BITS_PER_UNIT)
1784 * BITS_PER_UNIT);
1785 }
1786
1787 /* If we're accessing a volatile MEM, we can't do the next
1788 alignment step if it results in a multi-word access where we
1789 otherwise wouldn't have one. So, check for that case
1790 here. */
1791 if (MEM_P (op0)
1792 && MEM_VOLATILE_P (op0)
1793 && flag_strict_volatile_bitfields > 0
1794 && bitpos + bitsize <= total_bits
1795 && bitpos + bitsize + (offset % (total_bits / BITS_PER_UNIT)) * BITS_PER_UNIT > total_bits)
1796 {
1797 if (STRICT_ALIGNMENT)
1798 {
1799 static bool informed_about_misalignment = false;
1800 bool warned;
1801
1802 if (packedp)
1803 {
1804 if (bitsize == total_bits)
1805 warned = warning_at (input_location, OPT_fstrict_volatile_bitfields,
1806 "multiple accesses to volatile structure member"
1807 " because of packed attribute");
1808 else
1809 warned = warning_at (input_location, OPT_fstrict_volatile_bitfields,
1810 "multiple accesses to volatile structure bitfield"
1811 " because of packed attribute");
1812
1813 return extract_split_bit_field (op0, bitsize,
1814 bitpos + offset * BITS_PER_UNIT,
1815 unsignedp);
1816 }
1817
1818 if (bitsize == total_bits)
1819 warned = warning_at (input_location, OPT_fstrict_volatile_bitfields,
1820 "mis-aligned access used for structure member");
1821 else
1822 warned = warning_at (input_location, OPT_fstrict_volatile_bitfields,
1823 "mis-aligned access used for structure bitfield");
1824
1825 if (! informed_about_misalignment && warned)
1826 {
1827 informed_about_misalignment = true;
1828 inform (input_location,
1829 "when a volatile object spans multiple type-sized locations,"
1830 " the compiler must choose between using a single mis-aligned access to"
1831 " preserve the volatility, or using multiple aligned accesses to avoid"
1832 " runtime faults; this code may fail at runtime if the hardware does"
1833 " not allow this access");
1834 }
1835 }
1836 }
1837 else
1838 {
1839
1840 /* Get ref to an aligned byte, halfword, or word containing the field.
1841 Adjust BITPOS to be position within a word,
1842 and OFFSET to be the offset of that word.
1843 Then alter OP0 to refer to that word. */
1844 bitpos += (offset % (total_bits / BITS_PER_UNIT)) * BITS_PER_UNIT;
1845 offset -= (offset % (total_bits / BITS_PER_UNIT));
1846 }
1847
1848 op0 = adjust_address (op0, mode, offset);
1849 }
1850
1851 mode = GET_MODE (op0);
1852
1853 if (BYTES_BIG_ENDIAN)
1854 /* BITPOS is the distance between our msb and that of OP0.
1855 Convert it to the distance from the lsb. */
1856 bitpos = total_bits - bitsize - bitpos;
1857
1858 /* Now BITPOS is always the distance between the field's lsb and that of OP0.
1859 We have reduced the big-endian case to the little-endian case. */
1860
1861 if (unsignedp)
1862 {
1863 if (bitpos)
1864 {
1865 /* If the field does not already start at the lsb,
1866 shift it so it does. */
1867 /* Maybe propagate the target for the shift. */
1868 rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
1869 if (tmode != mode)
1870 subtarget = 0;
1871 op0 = expand_shift (RSHIFT_EXPR, mode, op0, bitpos, subtarget, 1);
1872 }
1873 /* Convert the value to the desired mode. */
1874 if (mode != tmode)
1875 op0 = convert_to_mode (tmode, op0, 1);
1876
1877 /* Unless the msb of the field used to be the msb when we shifted,
1878 mask out the upper bits. */
1879
1880 if (GET_MODE_BITSIZE (mode) != bitpos + bitsize)
1881 return expand_binop (GET_MODE (op0), and_optab, op0,
1882 mask_rtx (GET_MODE (op0), 0, bitsize, 0),
1883 target, 1, OPTAB_LIB_WIDEN);
1884 return op0;
1885 }
1886
1887 /* To extract a signed bit-field, first shift its msb to the msb of the word,
1888 then arithmetic-shift its lsb to the lsb of the word. */
1889 op0 = force_reg (mode, op0);
1890
1891 /* Find the narrowest integer mode that contains the field. */
1892
1893 for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT); mode != VOIDmode;
1894 mode = GET_MODE_WIDER_MODE (mode))
1895 if (GET_MODE_BITSIZE (mode) >= bitsize + bitpos)
1896 {
1897 op0 = convert_to_mode (mode, op0, 0);
1898 break;
1899 }
1900
1901 if (mode != tmode)
1902 target = 0;
1903
1904 if (GET_MODE_BITSIZE (mode) != (bitsize + bitpos))
1905 {
1906 int amount = GET_MODE_BITSIZE (mode) - (bitsize + bitpos);
1907 /* Maybe propagate the target for the shift. */
1908 rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
1909 op0 = expand_shift (LSHIFT_EXPR, mode, op0, amount, subtarget, 1);
1910 }
1911
1912 return expand_shift (RSHIFT_EXPR, mode, op0,
1913 GET_MODE_BITSIZE (mode) - bitsize, target, 0);
1914 }
1915 \f
1916 /* Return a constant integer (CONST_INT or CONST_DOUBLE) mask value
1917 of mode MODE with BITSIZE ones followed by BITPOS zeros, or the
1918 complement of that if COMPLEMENT. The mask is truncated if
1919 necessary to the width of mode MODE. The mask is zero-extended if
1920 BITSIZE+BITPOS is too small for MODE. */
1921
1922 static rtx
1923 mask_rtx (enum machine_mode mode, int bitpos, int bitsize, int complement)
1924 {
1925 double_int mask;
1926
1927 mask = double_int_mask (bitsize);
1928 mask = double_int_lshift (mask, bitpos, HOST_BITS_PER_DOUBLE_INT, false);
1929
1930 if (complement)
1931 mask = double_int_not (mask);
1932
1933 return immed_double_int_const (mask, mode);
1934 }
1935
1936 /* Return a constant integer (CONST_INT or CONST_DOUBLE) rtx with the value
1937 VALUE truncated to BITSIZE bits and then shifted left BITPOS bits. */
1938
1939 static rtx
1940 lshift_value (enum machine_mode mode, rtx value, int bitpos, int bitsize)
1941 {
1942 double_int val;
1943
1944 val = double_int_zext (uhwi_to_double_int (INTVAL (value)), bitsize);
1945 val = double_int_lshift (val, bitpos, HOST_BITS_PER_DOUBLE_INT, false);
1946
1947 return immed_double_int_const (val, mode);
1948 }
1949 \f
1950 /* Extract a bit field that is split across two words
1951 and return an RTX for the result.
1952
1953 OP0 is the REG, SUBREG or MEM rtx for the first of the two words.
1954 BITSIZE is the field width; BITPOS, position of its first bit, in the word.
1955 UNSIGNEDP is 1 if should zero-extend the contents; else sign-extend. */
1956
1957 static rtx
1958 extract_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1959 unsigned HOST_WIDE_INT bitpos, int unsignedp)
1960 {
1961 unsigned int unit;
1962 unsigned int bitsdone = 0;
1963 rtx result = NULL_RTX;
1964 int first = 1;
1965
1966 /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1967 much at a time. */
1968 if (REG_P (op0) || GET_CODE (op0) == SUBREG)
1969 unit = BITS_PER_WORD;
1970 else
1971 unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1972
1973 while (bitsdone < bitsize)
1974 {
1975 unsigned HOST_WIDE_INT thissize;
1976 rtx part, word;
1977 unsigned HOST_WIDE_INT thispos;
1978 unsigned HOST_WIDE_INT offset;
1979
1980 offset = (bitpos + bitsdone) / unit;
1981 thispos = (bitpos + bitsdone) % unit;
1982
1983 /* THISSIZE must not overrun a word boundary. Otherwise,
1984 extract_fixed_bit_field will call us again, and we will mutually
1985 recurse forever. */
1986 thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1987 thissize = MIN (thissize, unit - thispos);
1988
1989 /* If OP0 is a register, then handle OFFSET here.
1990
1991 When handling multiword bitfields, extract_bit_field may pass
1992 down a word_mode SUBREG of a larger REG for a bitfield that actually
1993 crosses a word boundary. Thus, for a SUBREG, we must find
1994 the current word starting from the base register. */
1995 if (GET_CODE (op0) == SUBREG)
1996 {
1997 int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset;
1998 word = operand_subword_force (SUBREG_REG (op0), word_offset,
1999 GET_MODE (SUBREG_REG (op0)));
2000 offset = 0;
2001 }
2002 else if (REG_P (op0))
2003 {
2004 word = operand_subword_force (op0, offset, GET_MODE (op0));
2005 offset = 0;
2006 }
2007 else
2008 word = op0;
2009
2010 /* Extract the parts in bit-counting order,
2011 whose meaning is determined by BYTES_PER_UNIT.
2012 OFFSET is in UNITs, and UNIT is in bits.
2013 extract_fixed_bit_field wants offset in bytes. */
2014 part = extract_fixed_bit_field (word_mode, word,
2015 offset * unit / BITS_PER_UNIT,
2016 thissize, thispos, 0, 1, false);
2017 bitsdone += thissize;
2018
2019 /* Shift this part into place for the result. */
2020 if (BYTES_BIG_ENDIAN)
2021 {
2022 if (bitsize != bitsdone)
2023 part = expand_shift (LSHIFT_EXPR, word_mode, part,
2024 bitsize - bitsdone, 0, 1);
2025 }
2026 else
2027 {
2028 if (bitsdone != thissize)
2029 part = expand_shift (LSHIFT_EXPR, word_mode, part,
2030 bitsdone - thissize, 0, 1);
2031 }
2032
2033 if (first)
2034 result = part;
2035 else
2036 /* Combine the parts with bitwise or. This works
2037 because we extracted each part as an unsigned bit field. */
2038 result = expand_binop (word_mode, ior_optab, part, result, NULL_RTX, 1,
2039 OPTAB_LIB_WIDEN);
2040
2041 first = 0;
2042 }
2043
2044 /* Unsigned bit field: we are done. */
2045 if (unsignedp)
2046 return result;
2047 /* Signed bit field: sign-extend with two arithmetic shifts. */
2048 result = expand_shift (LSHIFT_EXPR, word_mode, result,
2049 BITS_PER_WORD - bitsize, NULL_RTX, 0);
2050 return expand_shift (RSHIFT_EXPR, word_mode, result,
2051 BITS_PER_WORD - bitsize, NULL_RTX, 0);
2052 }
2053 \f
2054 /* Try to read the low bits of SRC as an rvalue of mode MODE, preserving
2055 the bit pattern. SRC_MODE is the mode of SRC; if this is smaller than
2056 MODE, fill the upper bits with zeros. Fail if the layout of either
2057 mode is unknown (as for CC modes) or if the extraction would involve
2058 unprofitable mode punning. Return the value on success, otherwise
2059 return null.
2060
2061 This is different from gen_lowpart* in these respects:
2062
2063 - the returned value must always be considered an rvalue
2064
2065 - when MODE is wider than SRC_MODE, the extraction involves
2066 a zero extension
2067
2068 - when MODE is smaller than SRC_MODE, the extraction involves
2069 a truncation (and is thus subject to TRULY_NOOP_TRUNCATION).
2070
2071 In other words, this routine performs a computation, whereas the
2072 gen_lowpart* routines are conceptually lvalue or rvalue subreg
2073 operations. */
2074
2075 rtx
2076 extract_low_bits (enum machine_mode mode, enum machine_mode src_mode, rtx src)
2077 {
2078 enum machine_mode int_mode, src_int_mode;
2079
2080 if (mode == src_mode)
2081 return src;
2082
2083 if (CONSTANT_P (src))
2084 {
2085 /* simplify_gen_subreg can't be used here, as if simplify_subreg
2086 fails, it will happily create (subreg (symbol_ref)) or similar
2087 invalid SUBREGs. */
2088 unsigned int byte = subreg_lowpart_offset (mode, src_mode);
2089 rtx ret = simplify_subreg (mode, src, src_mode, byte);
2090 if (ret)
2091 return ret;
2092
2093 if (GET_MODE (src) == VOIDmode
2094 || !validate_subreg (mode, src_mode, src, byte))
2095 return NULL_RTX;
2096
2097 src = force_reg (GET_MODE (src), src);
2098 return gen_rtx_SUBREG (mode, src, byte);
2099 }
2100
2101 if (GET_MODE_CLASS (mode) == MODE_CC || GET_MODE_CLASS (src_mode) == MODE_CC)
2102 return NULL_RTX;
2103
2104 if (GET_MODE_BITSIZE (mode) == GET_MODE_BITSIZE (src_mode)
2105 && MODES_TIEABLE_P (mode, src_mode))
2106 {
2107 rtx x = gen_lowpart_common (mode, src);
2108 if (x)
2109 return x;
2110 }
2111
2112 src_int_mode = int_mode_for_mode (src_mode);
2113 int_mode = int_mode_for_mode (mode);
2114 if (src_int_mode == BLKmode || int_mode == BLKmode)
2115 return NULL_RTX;
2116
2117 if (!MODES_TIEABLE_P (src_int_mode, src_mode))
2118 return NULL_RTX;
2119 if (!MODES_TIEABLE_P (int_mode, mode))
2120 return NULL_RTX;
2121
2122 src = gen_lowpart (src_int_mode, src);
2123 src = convert_modes (int_mode, src_int_mode, src, true);
2124 src = gen_lowpart (mode, src);
2125 return src;
2126 }
2127 \f
2128 /* Add INC into TARGET. */
2129
2130 void
2131 expand_inc (rtx target, rtx inc)
2132 {
2133 rtx value = expand_binop (GET_MODE (target), add_optab,
2134 target, inc,
2135 target, 0, OPTAB_LIB_WIDEN);
2136 if (value != target)
2137 emit_move_insn (target, value);
2138 }
2139
2140 /* Subtract DEC from TARGET. */
2141
2142 void
2143 expand_dec (rtx target, rtx dec)
2144 {
2145 rtx value = expand_binop (GET_MODE (target), sub_optab,
2146 target, dec,
2147 target, 0, OPTAB_LIB_WIDEN);
2148 if (value != target)
2149 emit_move_insn (target, value);
2150 }
2151 \f
2152 /* Output a shift instruction for expression code CODE,
2153 with SHIFTED being the rtx for the value to shift,
2154 and AMOUNT the rtx for the amount to shift by.
2155 Store the result in the rtx TARGET, if that is convenient.
2156 If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2157 Return the rtx for where the value is. */
2158
2159 static rtx
2160 expand_shift_1 (enum tree_code code, enum machine_mode mode, rtx shifted,
2161 rtx amount, rtx target, int unsignedp)
2162 {
2163 rtx op1, temp = 0;
2164 int left = (code == LSHIFT_EXPR || code == LROTATE_EXPR);
2165 int rotate = (code == LROTATE_EXPR || code == RROTATE_EXPR);
2166 optab lshift_optab = ashl_optab;
2167 optab rshift_arith_optab = ashr_optab;
2168 optab rshift_uns_optab = lshr_optab;
2169 optab lrotate_optab = rotl_optab;
2170 optab rrotate_optab = rotr_optab;
2171 enum machine_mode op1_mode;
2172 int attempt;
2173 bool speed = optimize_insn_for_speed_p ();
2174
2175 op1 = amount;
2176 op1_mode = GET_MODE (op1);
2177
2178 /* Determine whether the shift/rotate amount is a vector, or scalar. If the
2179 shift amount is a vector, use the vector/vector shift patterns. */
2180 if (VECTOR_MODE_P (mode) && VECTOR_MODE_P (op1_mode))
2181 {
2182 lshift_optab = vashl_optab;
2183 rshift_arith_optab = vashr_optab;
2184 rshift_uns_optab = vlshr_optab;
2185 lrotate_optab = vrotl_optab;
2186 rrotate_optab = vrotr_optab;
2187 }
2188
2189 /* Previously detected shift-counts computed by NEGATE_EXPR
2190 and shifted in the other direction; but that does not work
2191 on all machines. */
2192
2193 if (SHIFT_COUNT_TRUNCATED)
2194 {
2195 if (CONST_INT_P (op1)
2196 && ((unsigned HOST_WIDE_INT) INTVAL (op1) >=
2197 (unsigned HOST_WIDE_INT) GET_MODE_BITSIZE (mode)))
2198 op1 = GEN_INT ((unsigned HOST_WIDE_INT) INTVAL (op1)
2199 % GET_MODE_BITSIZE (mode));
2200 else if (GET_CODE (op1) == SUBREG
2201 && subreg_lowpart_p (op1)
2202 && INTEGRAL_MODE_P (GET_MODE (SUBREG_REG (op1))))
2203 op1 = SUBREG_REG (op1);
2204 }
2205
2206 if (op1 == const0_rtx)
2207 return shifted;
2208
2209 /* Check whether its cheaper to implement a left shift by a constant
2210 bit count by a sequence of additions. */
2211 if (code == LSHIFT_EXPR
2212 && CONST_INT_P (op1)
2213 && INTVAL (op1) > 0
2214 && INTVAL (op1) < GET_MODE_PRECISION (mode)
2215 && INTVAL (op1) < MAX_BITS_PER_WORD
2216 && shift_cost[speed][mode][INTVAL (op1)] > INTVAL (op1) * add_cost[speed][mode]
2217 && shift_cost[speed][mode][INTVAL (op1)] != MAX_COST)
2218 {
2219 int i;
2220 for (i = 0; i < INTVAL (op1); i++)
2221 {
2222 temp = force_reg (mode, shifted);
2223 shifted = expand_binop (mode, add_optab, temp, temp, NULL_RTX,
2224 unsignedp, OPTAB_LIB_WIDEN);
2225 }
2226 return shifted;
2227 }
2228
2229 for (attempt = 0; temp == 0 && attempt < 3; attempt++)
2230 {
2231 enum optab_methods methods;
2232
2233 if (attempt == 0)
2234 methods = OPTAB_DIRECT;
2235 else if (attempt == 1)
2236 methods = OPTAB_WIDEN;
2237 else
2238 methods = OPTAB_LIB_WIDEN;
2239
2240 if (rotate)
2241 {
2242 /* Widening does not work for rotation. */
2243 if (methods == OPTAB_WIDEN)
2244 continue;
2245 else if (methods == OPTAB_LIB_WIDEN)
2246 {
2247 /* If we have been unable to open-code this by a rotation,
2248 do it as the IOR of two shifts. I.e., to rotate A
2249 by N bits, compute (A << N) | ((unsigned) A >> (C - N))
2250 where C is the bitsize of A.
2251
2252 It is theoretically possible that the target machine might
2253 not be able to perform either shift and hence we would
2254 be making two libcalls rather than just the one for the
2255 shift (similarly if IOR could not be done). We will allow
2256 this extremely unlikely lossage to avoid complicating the
2257 code below. */
2258
2259 rtx subtarget = target == shifted ? 0 : target;
2260 rtx new_amount, other_amount;
2261 rtx temp1;
2262
2263 new_amount = op1;
2264 if (CONST_INT_P (op1))
2265 other_amount = GEN_INT (GET_MODE_BITSIZE (mode)
2266 - INTVAL (op1));
2267 else
2268 other_amount
2269 = simplify_gen_binary (MINUS, GET_MODE (op1),
2270 GEN_INT (GET_MODE_PRECISION (mode)),
2271 op1);
2272
2273 shifted = force_reg (mode, shifted);
2274
2275 temp = expand_shift_1 (left ? LSHIFT_EXPR : RSHIFT_EXPR,
2276 mode, shifted, new_amount, 0, 1);
2277 temp1 = expand_shift_1 (left ? RSHIFT_EXPR : LSHIFT_EXPR,
2278 mode, shifted, other_amount,
2279 subtarget, 1);
2280 return expand_binop (mode, ior_optab, temp, temp1, target,
2281 unsignedp, methods);
2282 }
2283
2284 temp = expand_binop (mode,
2285 left ? lrotate_optab : rrotate_optab,
2286 shifted, op1, target, unsignedp, methods);
2287 }
2288 else if (unsignedp)
2289 temp = expand_binop (mode,
2290 left ? lshift_optab : rshift_uns_optab,
2291 shifted, op1, target, unsignedp, methods);
2292
2293 /* Do arithmetic shifts.
2294 Also, if we are going to widen the operand, we can just as well
2295 use an arithmetic right-shift instead of a logical one. */
2296 if (temp == 0 && ! rotate
2297 && (! unsignedp || (! left && methods == OPTAB_WIDEN)))
2298 {
2299 enum optab_methods methods1 = methods;
2300
2301 /* If trying to widen a log shift to an arithmetic shift,
2302 don't accept an arithmetic shift of the same size. */
2303 if (unsignedp)
2304 methods1 = OPTAB_MUST_WIDEN;
2305
2306 /* Arithmetic shift */
2307
2308 temp = expand_binop (mode,
2309 left ? lshift_optab : rshift_arith_optab,
2310 shifted, op1, target, unsignedp, methods1);
2311 }
2312
2313 /* We used to try extzv here for logical right shifts, but that was
2314 only useful for one machine, the VAX, and caused poor code
2315 generation there for lshrdi3, so the code was deleted and a
2316 define_expand for lshrsi3 was added to vax.md. */
2317 }
2318
2319 gcc_assert (temp);
2320 return temp;
2321 }
2322
2323 /* Output a shift instruction for expression code CODE,
2324 with SHIFTED being the rtx for the value to shift,
2325 and AMOUNT the amount to shift by.
2326 Store the result in the rtx TARGET, if that is convenient.
2327 If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2328 Return the rtx for where the value is. */
2329
2330 rtx
2331 expand_shift (enum tree_code code, enum machine_mode mode, rtx shifted,
2332 int amount, rtx target, int unsignedp)
2333 {
2334 return expand_shift_1 (code, mode,
2335 shifted, GEN_INT (amount), target, unsignedp);
2336 }
2337
2338 /* Output a shift instruction for expression code CODE,
2339 with SHIFTED being the rtx for the value to shift,
2340 and AMOUNT the tree for the amount to shift by.
2341 Store the result in the rtx TARGET, if that is convenient.
2342 If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2343 Return the rtx for where the value is. */
2344
2345 rtx
2346 expand_variable_shift (enum tree_code code, enum machine_mode mode, rtx shifted,
2347 tree amount, rtx target, int unsignedp)
2348 {
2349 return expand_shift_1 (code, mode,
2350 shifted, expand_normal (amount), target, unsignedp);
2351 }
2352
2353 \f
2354 /* Indicates the type of fixup needed after a constant multiplication.
2355 BASIC_VARIANT means no fixup is needed, NEGATE_VARIANT means that
2356 the result should be negated, and ADD_VARIANT means that the
2357 multiplicand should be added to the result. */
2358 enum mult_variant {basic_variant, negate_variant, add_variant};
2359
2360 static void synth_mult (struct algorithm *, unsigned HOST_WIDE_INT,
2361 const struct mult_cost *, enum machine_mode mode);
2362 static bool choose_mult_variant (enum machine_mode, HOST_WIDE_INT,
2363 struct algorithm *, enum mult_variant *, int);
2364 static rtx expand_mult_const (enum machine_mode, rtx, HOST_WIDE_INT, rtx,
2365 const struct algorithm *, enum mult_variant);
2366 static unsigned HOST_WIDE_INT choose_multiplier (unsigned HOST_WIDE_INT, int,
2367 int, rtx *, int *, int *);
2368 static unsigned HOST_WIDE_INT invert_mod2n (unsigned HOST_WIDE_INT, int);
2369 static rtx extract_high_half (enum machine_mode, rtx);
2370 static rtx expand_mult_highpart (enum machine_mode, rtx, rtx, rtx, int, int);
2371 static rtx expand_mult_highpart_optab (enum machine_mode, rtx, rtx, rtx,
2372 int, int);
2373 /* Compute and return the best algorithm for multiplying by T.
2374 The algorithm must cost less than cost_limit
2375 If retval.cost >= COST_LIMIT, no algorithm was found and all
2376 other field of the returned struct are undefined.
2377 MODE is the machine mode of the multiplication. */
2378
2379 static void
2380 synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
2381 const struct mult_cost *cost_limit, enum machine_mode mode)
2382 {
2383 int m;
2384 struct algorithm *alg_in, *best_alg;
2385 struct mult_cost best_cost;
2386 struct mult_cost new_limit;
2387 int op_cost, op_latency;
2388 unsigned HOST_WIDE_INT orig_t = t;
2389 unsigned HOST_WIDE_INT q;
2390 int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode));
2391 int hash_index;
2392 bool cache_hit = false;
2393 enum alg_code cache_alg = alg_zero;
2394 bool speed = optimize_insn_for_speed_p ();
2395
2396 /* Indicate that no algorithm is yet found. If no algorithm
2397 is found, this value will be returned and indicate failure. */
2398 alg_out->cost.cost = cost_limit->cost + 1;
2399 alg_out->cost.latency = cost_limit->latency + 1;
2400
2401 if (cost_limit->cost < 0
2402 || (cost_limit->cost == 0 && cost_limit->latency <= 0))
2403 return;
2404
2405 /* Restrict the bits of "t" to the multiplication's mode. */
2406 t &= GET_MODE_MASK (mode);
2407
2408 /* t == 1 can be done in zero cost. */
2409 if (t == 1)
2410 {
2411 alg_out->ops = 1;
2412 alg_out->cost.cost = 0;
2413 alg_out->cost.latency = 0;
2414 alg_out->op[0] = alg_m;
2415 return;
2416 }
2417
2418 /* t == 0 sometimes has a cost. If it does and it exceeds our limit,
2419 fail now. */
2420 if (t == 0)
2421 {
2422 if (MULT_COST_LESS (cost_limit, zero_cost[speed]))
2423 return;
2424 else
2425 {
2426 alg_out->ops = 1;
2427 alg_out->cost.cost = zero_cost[speed];
2428 alg_out->cost.latency = zero_cost[speed];
2429 alg_out->op[0] = alg_zero;
2430 return;
2431 }
2432 }
2433
2434 /* We'll be needing a couple extra algorithm structures now. */
2435
2436 alg_in = XALLOCA (struct algorithm);
2437 best_alg = XALLOCA (struct algorithm);
2438 best_cost = *cost_limit;
2439
2440 /* Compute the hash index. */
2441 hash_index = (t ^ (unsigned int) mode ^ (speed * 256)) % NUM_ALG_HASH_ENTRIES;
2442
2443 /* See if we already know what to do for T. */
2444 if (alg_hash[hash_index].t == t
2445 && alg_hash[hash_index].mode == mode
2446 && alg_hash[hash_index].mode == mode
2447 && alg_hash[hash_index].speed == speed
2448 && alg_hash[hash_index].alg != alg_unknown)
2449 {
2450 cache_alg = alg_hash[hash_index].alg;
2451
2452 if (cache_alg == alg_impossible)
2453 {
2454 /* The cache tells us that it's impossible to synthesize
2455 multiplication by T within alg_hash[hash_index].cost. */
2456 if (!CHEAPER_MULT_COST (&alg_hash[hash_index].cost, cost_limit))
2457 /* COST_LIMIT is at least as restrictive as the one
2458 recorded in the hash table, in which case we have no
2459 hope of synthesizing a multiplication. Just
2460 return. */
2461 return;
2462
2463 /* If we get here, COST_LIMIT is less restrictive than the
2464 one recorded in the hash table, so we may be able to
2465 synthesize a multiplication. Proceed as if we didn't
2466 have the cache entry. */
2467 }
2468 else
2469 {
2470 if (CHEAPER_MULT_COST (cost_limit, &alg_hash[hash_index].cost))
2471 /* The cached algorithm shows that this multiplication
2472 requires more cost than COST_LIMIT. Just return. This
2473 way, we don't clobber this cache entry with
2474 alg_impossible but retain useful information. */
2475 return;
2476
2477 cache_hit = true;
2478
2479 switch (cache_alg)
2480 {
2481 case alg_shift:
2482 goto do_alg_shift;
2483
2484 case alg_add_t_m2:
2485 case alg_sub_t_m2:
2486 goto do_alg_addsub_t_m2;
2487
2488 case alg_add_factor:
2489 case alg_sub_factor:
2490 goto do_alg_addsub_factor;
2491
2492 case alg_add_t2_m:
2493 goto do_alg_add_t2_m;
2494
2495 case alg_sub_t2_m:
2496 goto do_alg_sub_t2_m;
2497
2498 default:
2499 gcc_unreachable ();
2500 }
2501 }
2502 }
2503
2504 /* If we have a group of zero bits at the low-order part of T, try
2505 multiplying by the remaining bits and then doing a shift. */
2506
2507 if ((t & 1) == 0)
2508 {
2509 do_alg_shift:
2510 m = floor_log2 (t & -t); /* m = number of low zero bits */
2511 if (m < maxm)
2512 {
2513 q = t >> m;
2514 /* The function expand_shift will choose between a shift and
2515 a sequence of additions, so the observed cost is given as
2516 MIN (m * add_cost[speed][mode], shift_cost[speed][mode][m]). */
2517 op_cost = m * add_cost[speed][mode];
2518 if (shift_cost[speed][mode][m] < op_cost)
2519 op_cost = shift_cost[speed][mode][m];
2520 new_limit.cost = best_cost.cost - op_cost;
2521 new_limit.latency = best_cost.latency - op_cost;
2522 synth_mult (alg_in, q, &new_limit, mode);
2523
2524 alg_in->cost.cost += op_cost;
2525 alg_in->cost.latency += op_cost;
2526 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2527 {
2528 struct algorithm *x;
2529 best_cost = alg_in->cost;
2530 x = alg_in, alg_in = best_alg, best_alg = x;
2531 best_alg->log[best_alg->ops] = m;
2532 best_alg->op[best_alg->ops] = alg_shift;
2533 }
2534
2535 /* See if treating ORIG_T as a signed number yields a better
2536 sequence. Try this sequence only for a negative ORIG_T
2537 as it would be useless for a non-negative ORIG_T. */
2538 if ((HOST_WIDE_INT) orig_t < 0)
2539 {
2540 /* Shift ORIG_T as follows because a right shift of a
2541 negative-valued signed type is implementation
2542 defined. */
2543 q = ~(~orig_t >> m);
2544 /* The function expand_shift will choose between a shift
2545 and a sequence of additions, so the observed cost is
2546 given as MIN (m * add_cost[speed][mode],
2547 shift_cost[speed][mode][m]). */
2548 op_cost = m * add_cost[speed][mode];
2549 if (shift_cost[speed][mode][m] < op_cost)
2550 op_cost = shift_cost[speed][mode][m];
2551 new_limit.cost = best_cost.cost - op_cost;
2552 new_limit.latency = best_cost.latency - op_cost;
2553 synth_mult (alg_in, q, &new_limit, mode);
2554
2555 alg_in->cost.cost += op_cost;
2556 alg_in->cost.latency += op_cost;
2557 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2558 {
2559 struct algorithm *x;
2560 best_cost = alg_in->cost;
2561 x = alg_in, alg_in = best_alg, best_alg = x;
2562 best_alg->log[best_alg->ops] = m;
2563 best_alg->op[best_alg->ops] = alg_shift;
2564 }
2565 }
2566 }
2567 if (cache_hit)
2568 goto done;
2569 }
2570
2571 /* If we have an odd number, add or subtract one. */
2572 if ((t & 1) != 0)
2573 {
2574 unsigned HOST_WIDE_INT w;
2575
2576 do_alg_addsub_t_m2:
2577 for (w = 1; (w & t) != 0; w <<= 1)
2578 ;
2579 /* If T was -1, then W will be zero after the loop. This is another
2580 case where T ends with ...111. Handling this with (T + 1) and
2581 subtract 1 produces slightly better code and results in algorithm
2582 selection much faster than treating it like the ...0111 case
2583 below. */
2584 if (w == 0
2585 || (w > 2
2586 /* Reject the case where t is 3.
2587 Thus we prefer addition in that case. */
2588 && t != 3))
2589 {
2590 /* T ends with ...111. Multiply by (T + 1) and subtract 1. */
2591
2592 op_cost = add_cost[speed][mode];
2593 new_limit.cost = best_cost.cost - op_cost;
2594 new_limit.latency = best_cost.latency - op_cost;
2595 synth_mult (alg_in, t + 1, &new_limit, mode);
2596
2597 alg_in->cost.cost += op_cost;
2598 alg_in->cost.latency += op_cost;
2599 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2600 {
2601 struct algorithm *x;
2602 best_cost = alg_in->cost;
2603 x = alg_in, alg_in = best_alg, best_alg = x;
2604 best_alg->log[best_alg->ops] = 0;
2605 best_alg->op[best_alg->ops] = alg_sub_t_m2;
2606 }
2607 }
2608 else
2609 {
2610 /* T ends with ...01 or ...011. Multiply by (T - 1) and add 1. */
2611
2612 op_cost = add_cost[speed][mode];
2613 new_limit.cost = best_cost.cost - op_cost;
2614 new_limit.latency = best_cost.latency - op_cost;
2615 synth_mult (alg_in, t - 1, &new_limit, mode);
2616
2617 alg_in->cost.cost += op_cost;
2618 alg_in->cost.latency += op_cost;
2619 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2620 {
2621 struct algorithm *x;
2622 best_cost = alg_in->cost;
2623 x = alg_in, alg_in = best_alg, best_alg = x;
2624 best_alg->log[best_alg->ops] = 0;
2625 best_alg->op[best_alg->ops] = alg_add_t_m2;
2626 }
2627 }
2628
2629 /* We may be able to calculate a * -7, a * -15, a * -31, etc
2630 quickly with a - a * n for some appropriate constant n. */
2631 m = exact_log2 (-orig_t + 1);
2632 if (m >= 0 && m < maxm)
2633 {
2634 op_cost = shiftsub1_cost[speed][mode][m];
2635 new_limit.cost = best_cost.cost - op_cost;
2636 new_limit.latency = best_cost.latency - op_cost;
2637 synth_mult (alg_in, (unsigned HOST_WIDE_INT) (-orig_t + 1) >> m, &new_limit, mode);
2638
2639 alg_in->cost.cost += op_cost;
2640 alg_in->cost.latency += op_cost;
2641 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2642 {
2643 struct algorithm *x;
2644 best_cost = alg_in->cost;
2645 x = alg_in, alg_in = best_alg, best_alg = x;
2646 best_alg->log[best_alg->ops] = m;
2647 best_alg->op[best_alg->ops] = alg_sub_t_m2;
2648 }
2649 }
2650
2651 if (cache_hit)
2652 goto done;
2653 }
2654
2655 /* Look for factors of t of the form
2656 t = q(2**m +- 1), 2 <= m <= floor(log2(t - 1)).
2657 If we find such a factor, we can multiply by t using an algorithm that
2658 multiplies by q, shift the result by m and add/subtract it to itself.
2659
2660 We search for large factors first and loop down, even if large factors
2661 are less probable than small; if we find a large factor we will find a
2662 good sequence quickly, and therefore be able to prune (by decreasing
2663 COST_LIMIT) the search. */
2664
2665 do_alg_addsub_factor:
2666 for (m = floor_log2 (t - 1); m >= 2; m--)
2667 {
2668 unsigned HOST_WIDE_INT d;
2669
2670 d = ((unsigned HOST_WIDE_INT) 1 << m) + 1;
2671 if (t % d == 0 && t > d && m < maxm
2672 && (!cache_hit || cache_alg == alg_add_factor))
2673 {
2674 /* If the target has a cheap shift-and-add instruction use
2675 that in preference to a shift insn followed by an add insn.
2676 Assume that the shift-and-add is "atomic" with a latency
2677 equal to its cost, otherwise assume that on superscalar
2678 hardware the shift may be executed concurrently with the
2679 earlier steps in the algorithm. */
2680 op_cost = add_cost[speed][mode] + shift_cost[speed][mode][m];
2681 if (shiftadd_cost[speed][mode][m] < op_cost)
2682 {
2683 op_cost = shiftadd_cost[speed][mode][m];
2684 op_latency = op_cost;
2685 }
2686 else
2687 op_latency = add_cost[speed][mode];
2688
2689 new_limit.cost = best_cost.cost - op_cost;
2690 new_limit.latency = best_cost.latency - op_latency;
2691 synth_mult (alg_in, t / d, &new_limit, mode);
2692
2693 alg_in->cost.cost += op_cost;
2694 alg_in->cost.latency += op_latency;
2695 if (alg_in->cost.latency < op_cost)
2696 alg_in->cost.latency = op_cost;
2697 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2698 {
2699 struct algorithm *x;
2700 best_cost = alg_in->cost;
2701 x = alg_in, alg_in = best_alg, best_alg = x;
2702 best_alg->log[best_alg->ops] = m;
2703 best_alg->op[best_alg->ops] = alg_add_factor;
2704 }
2705 /* Other factors will have been taken care of in the recursion. */
2706 break;
2707 }
2708
2709 d = ((unsigned HOST_WIDE_INT) 1 << m) - 1;
2710 if (t % d == 0 && t > d && m < maxm
2711 && (!cache_hit || cache_alg == alg_sub_factor))
2712 {
2713 /* If the target has a cheap shift-and-subtract insn use
2714 that in preference to a shift insn followed by a sub insn.
2715 Assume that the shift-and-sub is "atomic" with a latency
2716 equal to it's cost, otherwise assume that on superscalar
2717 hardware the shift may be executed concurrently with the
2718 earlier steps in the algorithm. */
2719 op_cost = add_cost[speed][mode] + shift_cost[speed][mode][m];
2720 if (shiftsub0_cost[speed][mode][m] < op_cost)
2721 {
2722 op_cost = shiftsub0_cost[speed][mode][m];
2723 op_latency = op_cost;
2724 }
2725 else
2726 op_latency = add_cost[speed][mode];
2727
2728 new_limit.cost = best_cost.cost - op_cost;
2729 new_limit.latency = best_cost.latency - op_latency;
2730 synth_mult (alg_in, t / d, &new_limit, mode);
2731
2732 alg_in->cost.cost += op_cost;
2733 alg_in->cost.latency += op_latency;
2734 if (alg_in->cost.latency < op_cost)
2735 alg_in->cost.latency = op_cost;
2736 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2737 {
2738 struct algorithm *x;
2739 best_cost = alg_in->cost;
2740 x = alg_in, alg_in = best_alg, best_alg = x;
2741 best_alg->log[best_alg->ops] = m;
2742 best_alg->op[best_alg->ops] = alg_sub_factor;
2743 }
2744 break;
2745 }
2746 }
2747 if (cache_hit)
2748 goto done;
2749
2750 /* Try shift-and-add (load effective address) instructions,
2751 i.e. do a*3, a*5, a*9. */
2752 if ((t & 1) != 0)
2753 {
2754 do_alg_add_t2_m:
2755 q = t - 1;
2756 q = q & -q;
2757 m = exact_log2 (q);
2758 if (m >= 0 && m < maxm)
2759 {
2760 op_cost = shiftadd_cost[speed][mode][m];
2761 new_limit.cost = best_cost.cost - op_cost;
2762 new_limit.latency = best_cost.latency - op_cost;
2763 synth_mult (alg_in, (t - 1) >> m, &new_limit, mode);
2764
2765 alg_in->cost.cost += op_cost;
2766 alg_in->cost.latency += op_cost;
2767 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2768 {
2769 struct algorithm *x;
2770 best_cost = alg_in->cost;
2771 x = alg_in, alg_in = best_alg, best_alg = x;
2772 best_alg->log[best_alg->ops] = m;
2773 best_alg->op[best_alg->ops] = alg_add_t2_m;
2774 }
2775 }
2776 if (cache_hit)
2777 goto done;
2778
2779 do_alg_sub_t2_m:
2780 q = t + 1;
2781 q = q & -q;
2782 m = exact_log2 (q);
2783 if (m >= 0 && m < maxm)
2784 {
2785 op_cost = shiftsub0_cost[speed][mode][m];
2786 new_limit.cost = best_cost.cost - op_cost;
2787 new_limit.latency = best_cost.latency - op_cost;
2788 synth_mult (alg_in, (t + 1) >> m, &new_limit, mode);
2789
2790 alg_in->cost.cost += op_cost;
2791 alg_in->cost.latency += op_cost;
2792 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2793 {
2794 struct algorithm *x;
2795 best_cost = alg_in->cost;
2796 x = alg_in, alg_in = best_alg, best_alg = x;
2797 best_alg->log[best_alg->ops] = m;
2798 best_alg->op[best_alg->ops] = alg_sub_t2_m;
2799 }
2800 }
2801 if (cache_hit)
2802 goto done;
2803 }
2804
2805 done:
2806 /* If best_cost has not decreased, we have not found any algorithm. */
2807 if (!CHEAPER_MULT_COST (&best_cost, cost_limit))
2808 {
2809 /* We failed to find an algorithm. Record alg_impossible for
2810 this case (that is, <T, MODE, COST_LIMIT>) so that next time
2811 we are asked to find an algorithm for T within the same or
2812 lower COST_LIMIT, we can immediately return to the
2813 caller. */
2814 alg_hash[hash_index].t = t;
2815 alg_hash[hash_index].mode = mode;
2816 alg_hash[hash_index].speed = speed;
2817 alg_hash[hash_index].alg = alg_impossible;
2818 alg_hash[hash_index].cost = *cost_limit;
2819 return;
2820 }
2821
2822 /* Cache the result. */
2823 if (!cache_hit)
2824 {
2825 alg_hash[hash_index].t = t;
2826 alg_hash[hash_index].mode = mode;
2827 alg_hash[hash_index].speed = speed;
2828 alg_hash[hash_index].alg = best_alg->op[best_alg->ops];
2829 alg_hash[hash_index].cost.cost = best_cost.cost;
2830 alg_hash[hash_index].cost.latency = best_cost.latency;
2831 }
2832
2833 /* If we are getting a too long sequence for `struct algorithm'
2834 to record, make this search fail. */
2835 if (best_alg->ops == MAX_BITS_PER_WORD)
2836 return;
2837
2838 /* Copy the algorithm from temporary space to the space at alg_out.
2839 We avoid using structure assignment because the majority of
2840 best_alg is normally undefined, and this is a critical function. */
2841 alg_out->ops = best_alg->ops + 1;
2842 alg_out->cost = best_cost;
2843 memcpy (alg_out->op, best_alg->op,
2844 alg_out->ops * sizeof *alg_out->op);
2845 memcpy (alg_out->log, best_alg->log,
2846 alg_out->ops * sizeof *alg_out->log);
2847 }
2848 \f
2849 /* Find the cheapest way of multiplying a value of mode MODE by VAL.
2850 Try three variations:
2851
2852 - a shift/add sequence based on VAL itself
2853 - a shift/add sequence based on -VAL, followed by a negation
2854 - a shift/add sequence based on VAL - 1, followed by an addition.
2855
2856 Return true if the cheapest of these cost less than MULT_COST,
2857 describing the algorithm in *ALG and final fixup in *VARIANT. */
2858
2859 static bool
2860 choose_mult_variant (enum machine_mode mode, HOST_WIDE_INT val,
2861 struct algorithm *alg, enum mult_variant *variant,
2862 int mult_cost)
2863 {
2864 struct algorithm alg2;
2865 struct mult_cost limit;
2866 int op_cost;
2867 bool speed = optimize_insn_for_speed_p ();
2868
2869 /* Fail quickly for impossible bounds. */
2870 if (mult_cost < 0)
2871 return false;
2872
2873 /* Ensure that mult_cost provides a reasonable upper bound.
2874 Any constant multiplication can be performed with less
2875 than 2 * bits additions. */
2876 op_cost = 2 * GET_MODE_BITSIZE (mode) * add_cost[speed][mode];
2877 if (mult_cost > op_cost)
2878 mult_cost = op_cost;
2879
2880 *variant = basic_variant;
2881 limit.cost = mult_cost;
2882 limit.latency = mult_cost;
2883 synth_mult (alg, val, &limit, mode);
2884
2885 /* This works only if the inverted value actually fits in an
2886 `unsigned int' */
2887 if (HOST_BITS_PER_INT >= GET_MODE_BITSIZE (mode))
2888 {
2889 op_cost = neg_cost[speed][mode];
2890 if (MULT_COST_LESS (&alg->cost, mult_cost))
2891 {
2892 limit.cost = alg->cost.cost - op_cost;
2893 limit.latency = alg->cost.latency - op_cost;
2894 }
2895 else
2896 {
2897 limit.cost = mult_cost - op_cost;
2898 limit.latency = mult_cost - op_cost;
2899 }
2900
2901 synth_mult (&alg2, -val, &limit, mode);
2902 alg2.cost.cost += op_cost;
2903 alg2.cost.latency += op_cost;
2904 if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
2905 *alg = alg2, *variant = negate_variant;
2906 }
2907
2908 /* This proves very useful for division-by-constant. */
2909 op_cost = add_cost[speed][mode];
2910 if (MULT_COST_LESS (&alg->cost, mult_cost))
2911 {
2912 limit.cost = alg->cost.cost - op_cost;
2913 limit.latency = alg->cost.latency - op_cost;
2914 }
2915 else
2916 {
2917 limit.cost = mult_cost - op_cost;
2918 limit.latency = mult_cost - op_cost;
2919 }
2920
2921 synth_mult (&alg2, val - 1, &limit, mode);
2922 alg2.cost.cost += op_cost;
2923 alg2.cost.latency += op_cost;
2924 if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
2925 *alg = alg2, *variant = add_variant;
2926
2927 return MULT_COST_LESS (&alg->cost, mult_cost);
2928 }
2929
2930 /* A subroutine of expand_mult, used for constant multiplications.
2931 Multiply OP0 by VAL in mode MODE, storing the result in TARGET if
2932 convenient. Use the shift/add sequence described by ALG and apply
2933 the final fixup specified by VARIANT. */
2934
2935 static rtx
2936 expand_mult_const (enum machine_mode mode, rtx op0, HOST_WIDE_INT val,
2937 rtx target, const struct algorithm *alg,
2938 enum mult_variant variant)
2939 {
2940 HOST_WIDE_INT val_so_far;
2941 rtx insn, accum, tem;
2942 int opno;
2943 enum machine_mode nmode;
2944
2945 /* Avoid referencing memory over and over and invalid sharing
2946 on SUBREGs. */
2947 op0 = force_reg (mode, op0);
2948
2949 /* ACCUM starts out either as OP0 or as a zero, depending on
2950 the first operation. */
2951
2952 if (alg->op[0] == alg_zero)
2953 {
2954 accum = copy_to_mode_reg (mode, const0_rtx);
2955 val_so_far = 0;
2956 }
2957 else if (alg->op[0] == alg_m)
2958 {
2959 accum = copy_to_mode_reg (mode, op0);
2960 val_so_far = 1;
2961 }
2962 else
2963 gcc_unreachable ();
2964
2965 for (opno = 1; opno < alg->ops; opno++)
2966 {
2967 int log = alg->log[opno];
2968 rtx shift_subtarget = optimize ? 0 : accum;
2969 rtx add_target
2970 = (opno == alg->ops - 1 && target != 0 && variant != add_variant
2971 && !optimize)
2972 ? target : 0;
2973 rtx accum_target = optimize ? 0 : accum;
2974 rtx accum_inner;
2975
2976 switch (alg->op[opno])
2977 {
2978 case alg_shift:
2979 tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
2980 /* REG_EQUAL note will be attached to the following insn. */
2981 emit_move_insn (accum, tem);
2982 val_so_far <<= log;
2983 break;
2984
2985 case alg_add_t_m2:
2986 tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
2987 accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
2988 add_target ? add_target : accum_target);
2989 val_so_far += (HOST_WIDE_INT) 1 << log;
2990 break;
2991
2992 case alg_sub_t_m2:
2993 tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
2994 accum = force_operand (gen_rtx_MINUS (mode, accum, tem),
2995 add_target ? add_target : accum_target);
2996 val_so_far -= (HOST_WIDE_INT) 1 << log;
2997 break;
2998
2999 case alg_add_t2_m:
3000 accum = expand_shift (LSHIFT_EXPR, mode, accum,
3001 log, shift_subtarget, 0);
3002 accum = force_operand (gen_rtx_PLUS (mode, accum, op0),
3003 add_target ? add_target : accum_target);
3004 val_so_far = (val_so_far << log) + 1;
3005 break;
3006
3007 case alg_sub_t2_m:
3008 accum = expand_shift (LSHIFT_EXPR, mode, accum,
3009 log, shift_subtarget, 0);
3010 accum = force_operand (gen_rtx_MINUS (mode, accum, op0),
3011 add_target ? add_target : accum_target);
3012 val_so_far = (val_so_far << log) - 1;
3013 break;
3014
3015 case alg_add_factor:
3016 tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3017 accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
3018 add_target ? add_target : accum_target);
3019 val_so_far += val_so_far << log;
3020 break;
3021
3022 case alg_sub_factor:
3023 tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3024 accum = force_operand (gen_rtx_MINUS (mode, tem, accum),
3025 (add_target
3026 ? add_target : (optimize ? 0 : tem)));
3027 val_so_far = (val_so_far << log) - val_so_far;
3028 break;
3029
3030 default:
3031 gcc_unreachable ();
3032 }
3033
3034 /* Write a REG_EQUAL note on the last insn so that we can cse
3035 multiplication sequences. Note that if ACCUM is a SUBREG,
3036 we've set the inner register and must properly indicate
3037 that. */
3038
3039 tem = op0, nmode = mode;
3040 accum_inner = accum;
3041 if (GET_CODE (accum) == SUBREG)
3042 {
3043 accum_inner = SUBREG_REG (accum);
3044 nmode = GET_MODE (accum_inner);
3045 tem = gen_lowpart (nmode, op0);
3046 }
3047
3048 insn = get_last_insn ();
3049 set_dst_reg_note (insn, REG_EQUAL,
3050 gen_rtx_MULT (nmode, tem, GEN_INT (val_so_far)),
3051 accum_inner);
3052 }
3053
3054 if (variant == negate_variant)
3055 {
3056 val_so_far = -val_so_far;
3057 accum = expand_unop (mode, neg_optab, accum, target, 0);
3058 }
3059 else if (variant == add_variant)
3060 {
3061 val_so_far = val_so_far + 1;
3062 accum = force_operand (gen_rtx_PLUS (mode, accum, op0), target);
3063 }
3064
3065 /* Compare only the bits of val and val_so_far that are significant
3066 in the result mode, to avoid sign-/zero-extension confusion. */
3067 val &= GET_MODE_MASK (mode);
3068 val_so_far &= GET_MODE_MASK (mode);
3069 gcc_assert (val == val_so_far);
3070
3071 return accum;
3072 }
3073
3074 /* Perform a multiplication and return an rtx for the result.
3075 MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3076 TARGET is a suggestion for where to store the result (an rtx).
3077
3078 We check specially for a constant integer as OP1.
3079 If you want this check for OP0 as well, then before calling
3080 you should swap the two operands if OP0 would be constant. */
3081
3082 rtx
3083 expand_mult (enum machine_mode mode, rtx op0, rtx op1, rtx target,
3084 int unsignedp)
3085 {
3086 enum mult_variant variant;
3087 struct algorithm algorithm;
3088 int max_cost;
3089 bool speed = optimize_insn_for_speed_p ();
3090
3091 /* Handling const0_rtx here allows us to use zero as a rogue value for
3092 coeff below. */
3093 if (op1 == const0_rtx)
3094 return const0_rtx;
3095 if (op1 == const1_rtx)
3096 return op0;
3097 if (op1 == constm1_rtx)
3098 return expand_unop (mode,
3099 GET_MODE_CLASS (mode) == MODE_INT
3100 && !unsignedp && flag_trapv
3101 ? negv_optab : neg_optab,
3102 op0, target, 0);
3103
3104 /* These are the operations that are potentially turned into a sequence
3105 of shifts and additions. */
3106 if (SCALAR_INT_MODE_P (mode)
3107 && (unsignedp || !flag_trapv))
3108 {
3109 HOST_WIDE_INT coeff = 0;
3110 rtx fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3111
3112 /* synth_mult does an `unsigned int' multiply. As long as the mode is
3113 less than or equal in size to `unsigned int' this doesn't matter.
3114 If the mode is larger than `unsigned int', then synth_mult works
3115 only if the constant value exactly fits in an `unsigned int' without
3116 any truncation. This means that multiplying by negative values does
3117 not work; results are off by 2^32 on a 32 bit machine. */
3118
3119 if (CONST_INT_P (op1))
3120 {
3121 /* Attempt to handle multiplication of DImode values by negative
3122 coefficients, by performing the multiplication by a positive
3123 multiplier and then inverting the result. */
3124 if (INTVAL (op1) < 0
3125 && GET_MODE_BITSIZE (mode) > HOST_BITS_PER_WIDE_INT)
3126 {
3127 /* Its safe to use -INTVAL (op1) even for INT_MIN, as the
3128 result is interpreted as an unsigned coefficient.
3129 Exclude cost of op0 from max_cost to match the cost
3130 calculation of the synth_mult. */
3131 max_cost = (set_src_cost (gen_rtx_MULT (mode, fake_reg, op1),
3132 speed)
3133 - neg_cost[speed][mode]);
3134 if (max_cost > 0
3135 && choose_mult_variant (mode, -INTVAL (op1), &algorithm,
3136 &variant, max_cost))
3137 {
3138 rtx temp = expand_mult_const (mode, op0, -INTVAL (op1),
3139 NULL_RTX, &algorithm,
3140 variant);
3141 return expand_unop (mode, neg_optab, temp, target, 0);
3142 }
3143 }
3144 else coeff = INTVAL (op1);
3145 }
3146 else if (GET_CODE (op1) == CONST_DOUBLE)
3147 {
3148 /* If we are multiplying in DImode, it may still be a win
3149 to try to work with shifts and adds. */
3150 if (CONST_DOUBLE_HIGH (op1) == 0
3151 && CONST_DOUBLE_LOW (op1) > 0)
3152 coeff = CONST_DOUBLE_LOW (op1);
3153 else if (CONST_DOUBLE_LOW (op1) == 0
3154 && EXACT_POWER_OF_2_OR_ZERO_P (CONST_DOUBLE_HIGH (op1)))
3155 {
3156 int shift = floor_log2 (CONST_DOUBLE_HIGH (op1))
3157 + HOST_BITS_PER_WIDE_INT;
3158 if (shift < 2 * HOST_BITS_PER_WIDE_INT - 1
3159 || GET_MODE_BITSIZE (mode) <= 2 * HOST_BITS_PER_WIDE_INT)
3160 return expand_shift (LSHIFT_EXPR, mode, op0,
3161 shift, target, unsignedp);
3162 }
3163 }
3164
3165 /* We used to test optimize here, on the grounds that it's better to
3166 produce a smaller program when -O is not used. But this causes
3167 such a terrible slowdown sometimes that it seems better to always
3168 use synth_mult. */
3169 if (coeff != 0)
3170 {
3171 /* Special case powers of two. */
3172 if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3173 return expand_shift (LSHIFT_EXPR, mode, op0,
3174 floor_log2 (coeff), target, unsignedp);
3175
3176 /* Exclude cost of op0 from max_cost to match the cost
3177 calculation of the synth_mult. */
3178 max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, op1), speed);
3179 if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3180 max_cost))
3181 return expand_mult_const (mode, op0, coeff, target,
3182 &algorithm, variant);
3183 }
3184 }
3185
3186 if (GET_CODE (op0) == CONST_DOUBLE)
3187 {
3188 rtx temp = op0;
3189 op0 = op1;
3190 op1 = temp;
3191 }
3192
3193 /* Expand x*2.0 as x+x. */
3194 if (GET_CODE (op1) == CONST_DOUBLE
3195 && SCALAR_FLOAT_MODE_P (mode))
3196 {
3197 REAL_VALUE_TYPE d;
3198 REAL_VALUE_FROM_CONST_DOUBLE (d, op1);
3199
3200 if (REAL_VALUES_EQUAL (d, dconst2))
3201 {
3202 op0 = force_reg (GET_MODE (op0), op0);
3203 return expand_binop (mode, add_optab, op0, op0,
3204 target, unsignedp, OPTAB_LIB_WIDEN);
3205 }
3206 }
3207
3208 /* This used to use umul_optab if unsigned, but for non-widening multiply
3209 there is no difference between signed and unsigned. */
3210 op0 = expand_binop (mode,
3211 ! unsignedp
3212 && flag_trapv && (GET_MODE_CLASS(mode) == MODE_INT)
3213 ? smulv_optab : smul_optab,
3214 op0, op1, target, unsignedp, OPTAB_LIB_WIDEN);
3215 gcc_assert (op0);
3216 return op0;
3217 }
3218
3219 /* Perform a widening multiplication and return an rtx for the result.
3220 MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3221 TARGET is a suggestion for where to store the result (an rtx).
3222 THIS_OPTAB is the optab we should use, it must be either umul_widen_optab
3223 or smul_widen_optab.
3224
3225 We check specially for a constant integer as OP1, comparing the
3226 cost of a widening multiply against the cost of a sequence of shifts
3227 and adds. */
3228
3229 rtx
3230 expand_widening_mult (enum machine_mode mode, rtx op0, rtx op1, rtx target,
3231 int unsignedp, optab this_optab)
3232 {
3233 bool speed = optimize_insn_for_speed_p ();
3234 rtx cop1;
3235
3236 if (CONST_INT_P (op1)
3237 && GET_MODE (op0) != VOIDmode
3238 && (cop1 = convert_modes (mode, GET_MODE (op0), op1,
3239 this_optab == umul_widen_optab))
3240 && CONST_INT_P (cop1)
3241 && (INTVAL (cop1) >= 0
3242 || HWI_COMPUTABLE_MODE_P (mode)))
3243 {
3244 HOST_WIDE_INT coeff = INTVAL (cop1);
3245 int max_cost;
3246 enum mult_variant variant;
3247 struct algorithm algorithm;
3248
3249 /* Special case powers of two. */
3250 if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3251 {
3252 op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3253 return expand_shift (LSHIFT_EXPR, mode, op0,
3254 floor_log2 (coeff), target, unsignedp);
3255 }
3256
3257 /* Exclude cost of op0 from max_cost to match the cost
3258 calculation of the synth_mult. */
3259 max_cost = mul_widen_cost[speed][mode];
3260 if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3261 max_cost))
3262 {
3263 op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3264 return expand_mult_const (mode, op0, coeff, target,
3265 &algorithm, variant);
3266 }
3267 }
3268 return expand_binop (mode, this_optab, op0, op1, target,
3269 unsignedp, OPTAB_LIB_WIDEN);
3270 }
3271 \f
3272 /* Return the smallest n such that 2**n >= X. */
3273
3274 int
3275 ceil_log2 (unsigned HOST_WIDE_INT x)
3276 {
3277 return floor_log2 (x - 1) + 1;
3278 }
3279
3280 /* Choose a minimal N + 1 bit approximation to 1/D that can be used to
3281 replace division by D, and put the least significant N bits of the result
3282 in *MULTIPLIER_PTR and return the most significant bit.
3283
3284 The width of operations is N (should be <= HOST_BITS_PER_WIDE_INT), the
3285 needed precision is in PRECISION (should be <= N).
3286
3287 PRECISION should be as small as possible so this function can choose
3288 multiplier more freely.
3289
3290 The rounded-up logarithm of D is placed in *lgup_ptr. A shift count that
3291 is to be used for a final right shift is placed in *POST_SHIFT_PTR.
3292
3293 Using this function, x/D will be equal to (x * m) >> (*POST_SHIFT_PTR),
3294 where m is the full HOST_BITS_PER_WIDE_INT + 1 bit multiplier. */
3295
3296 static
3297 unsigned HOST_WIDE_INT
3298 choose_multiplier (unsigned HOST_WIDE_INT d, int n, int precision,
3299 rtx *multiplier_ptr, int *post_shift_ptr, int *lgup_ptr)
3300 {
3301 HOST_WIDE_INT mhigh_hi, mlow_hi;
3302 unsigned HOST_WIDE_INT mhigh_lo, mlow_lo;
3303 int lgup, post_shift;
3304 int pow, pow2;
3305 unsigned HOST_WIDE_INT nl, dummy1;
3306 HOST_WIDE_INT nh, dummy2;
3307
3308 /* lgup = ceil(log2(divisor)); */
3309 lgup = ceil_log2 (d);
3310
3311 gcc_assert (lgup <= n);
3312
3313 pow = n + lgup;
3314 pow2 = n + lgup - precision;
3315
3316 /* We could handle this with some effort, but this case is much
3317 better handled directly with a scc insn, so rely on caller using
3318 that. */
3319 gcc_assert (pow != 2 * HOST_BITS_PER_WIDE_INT);
3320
3321 /* mlow = 2^(N + lgup)/d */
3322 if (pow >= HOST_BITS_PER_WIDE_INT)
3323 {
3324 nh = (HOST_WIDE_INT) 1 << (pow - HOST_BITS_PER_WIDE_INT);
3325 nl = 0;
3326 }
3327 else
3328 {
3329 nh = 0;
3330 nl = (unsigned HOST_WIDE_INT) 1 << pow;
3331 }
3332 div_and_round_double (TRUNC_DIV_EXPR, 1, nl, nh, d, (HOST_WIDE_INT) 0,
3333 &mlow_lo, &mlow_hi, &dummy1, &dummy2);
3334
3335 /* mhigh = (2^(N + lgup) + 2^N + lgup - precision)/d */
3336 if (pow2 >= HOST_BITS_PER_WIDE_INT)
3337 nh |= (HOST_WIDE_INT) 1 << (pow2 - HOST_BITS_PER_WIDE_INT);
3338 else
3339 nl |= (unsigned HOST_WIDE_INT) 1 << pow2;
3340 div_and_round_double (TRUNC_DIV_EXPR, 1, nl, nh, d, (HOST_WIDE_INT) 0,
3341 &mhigh_lo, &mhigh_hi, &dummy1, &dummy2);
3342
3343 gcc_assert (!mhigh_hi || nh - d < d);
3344 gcc_assert (mhigh_hi <= 1 && mlow_hi <= 1);
3345 /* Assert that mlow < mhigh. */
3346 gcc_assert (mlow_hi < mhigh_hi
3347 || (mlow_hi == mhigh_hi && mlow_lo < mhigh_lo));
3348
3349 /* If precision == N, then mlow, mhigh exceed 2^N
3350 (but they do not exceed 2^(N+1)). */
3351
3352 /* Reduce to lowest terms. */
3353 for (post_shift = lgup; post_shift > 0; post_shift--)
3354 {
3355 unsigned HOST_WIDE_INT ml_lo = (mlow_hi << (HOST_BITS_PER_WIDE_INT - 1)) | (mlow_lo >> 1);
3356 unsigned HOST_WIDE_INT mh_lo = (mhigh_hi << (HOST_BITS_PER_WIDE_INT - 1)) | (mhigh_lo >> 1);
3357 if (ml_lo >= mh_lo)
3358 break;
3359
3360 mlow_hi = 0;
3361 mlow_lo = ml_lo;
3362 mhigh_hi = 0;
3363 mhigh_lo = mh_lo;
3364 }
3365
3366 *post_shift_ptr = post_shift;
3367 *lgup_ptr = lgup;
3368 if (n < HOST_BITS_PER_WIDE_INT)
3369 {
3370 unsigned HOST_WIDE_INT mask = ((unsigned HOST_WIDE_INT) 1 << n) - 1;
3371 *multiplier_ptr = GEN_INT (mhigh_lo & mask);
3372 return mhigh_lo >= mask;
3373 }
3374 else
3375 {
3376 *multiplier_ptr = GEN_INT (mhigh_lo);
3377 return mhigh_hi;
3378 }
3379 }
3380
3381 /* Compute the inverse of X mod 2**n, i.e., find Y such that X * Y is
3382 congruent to 1 (mod 2**N). */
3383
3384 static unsigned HOST_WIDE_INT
3385 invert_mod2n (unsigned HOST_WIDE_INT x, int n)
3386 {
3387 /* Solve x*y == 1 (mod 2^n), where x is odd. Return y. */
3388
3389 /* The algorithm notes that the choice y = x satisfies
3390 x*y == 1 mod 2^3, since x is assumed odd.
3391 Each iteration doubles the number of bits of significance in y. */
3392
3393 unsigned HOST_WIDE_INT mask;
3394 unsigned HOST_WIDE_INT y = x;
3395 int nbit = 3;
3396
3397 mask = (n == HOST_BITS_PER_WIDE_INT
3398 ? ~(unsigned HOST_WIDE_INT) 0
3399 : ((unsigned HOST_WIDE_INT) 1 << n) - 1);
3400
3401 while (nbit < n)
3402 {
3403 y = y * (2 - x*y) & mask; /* Modulo 2^N */
3404 nbit *= 2;
3405 }
3406 return y;
3407 }
3408
3409 /* Emit code to adjust ADJ_OPERAND after multiplication of wrong signedness
3410 flavor of OP0 and OP1. ADJ_OPERAND is already the high half of the
3411 product OP0 x OP1. If UNSIGNEDP is nonzero, adjust the signed product
3412 to become unsigned, if UNSIGNEDP is zero, adjust the unsigned product to
3413 become signed.
3414
3415 The result is put in TARGET if that is convenient.
3416
3417 MODE is the mode of operation. */
3418
3419 rtx
3420 expand_mult_highpart_adjust (enum machine_mode mode, rtx adj_operand, rtx op0,
3421 rtx op1, rtx target, int unsignedp)
3422 {
3423 rtx tem;
3424 enum rtx_code adj_code = unsignedp ? PLUS : MINUS;
3425
3426 tem = expand_shift (RSHIFT_EXPR, mode, op0,
3427 GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3428 tem = expand_and (mode, tem, op1, NULL_RTX);
3429 adj_operand
3430 = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3431 adj_operand);
3432
3433 tem = expand_shift (RSHIFT_EXPR, mode, op1,
3434 GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3435 tem = expand_and (mode, tem, op0, NULL_RTX);
3436 target = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3437 target);
3438
3439 return target;
3440 }
3441
3442 /* Subroutine of expand_mult_highpart. Return the MODE high part of OP. */
3443
3444 static rtx
3445 extract_high_half (enum machine_mode mode, rtx op)
3446 {
3447 enum machine_mode wider_mode;
3448
3449 if (mode == word_mode)
3450 return gen_highpart (mode, op);
3451
3452 gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3453
3454 wider_mode = GET_MODE_WIDER_MODE (mode);
3455 op = expand_shift (RSHIFT_EXPR, wider_mode, op,
3456 GET_MODE_BITSIZE (mode), 0, 1);
3457 return convert_modes (mode, wider_mode, op, 0);
3458 }
3459
3460 /* Like expand_mult_highpart, but only consider using a multiplication
3461 optab. OP1 is an rtx for the constant operand. */
3462
3463 static rtx
3464 expand_mult_highpart_optab (enum machine_mode mode, rtx op0, rtx op1,
3465 rtx target, int unsignedp, int max_cost)
3466 {
3467 rtx narrow_op1 = gen_int_mode (INTVAL (op1), mode);
3468 enum machine_mode wider_mode;
3469 optab moptab;
3470 rtx tem;
3471 int size;
3472 bool speed = optimize_insn_for_speed_p ();
3473
3474 gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3475
3476 wider_mode = GET_MODE_WIDER_MODE (mode);
3477 size = GET_MODE_BITSIZE (mode);
3478
3479 /* Firstly, try using a multiplication insn that only generates the needed
3480 high part of the product, and in the sign flavor of unsignedp. */
3481 if (mul_highpart_cost[speed][mode] < max_cost)
3482 {
3483 moptab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
3484 tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3485 unsignedp, OPTAB_DIRECT);
3486 if (tem)
3487 return tem;
3488 }
3489
3490 /* Secondly, same as above, but use sign flavor opposite of unsignedp.
3491 Need to adjust the result after the multiplication. */
3492 if (size - 1 < BITS_PER_WORD
3493 && (mul_highpart_cost[speed][mode] + 2 * shift_cost[speed][mode][size-1]
3494 + 4 * add_cost[speed][mode] < max_cost))
3495 {
3496 moptab = unsignedp ? smul_highpart_optab : umul_highpart_optab;
3497 tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3498 unsignedp, OPTAB_DIRECT);
3499 if (tem)
3500 /* We used the wrong signedness. Adjust the result. */
3501 return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3502 tem, unsignedp);
3503 }
3504
3505 /* Try widening multiplication. */
3506 moptab = unsignedp ? umul_widen_optab : smul_widen_optab;
3507 if (widening_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3508 && mul_widen_cost[speed][wider_mode] < max_cost)
3509 {
3510 tem = expand_binop (wider_mode, moptab, op0, narrow_op1, 0,
3511 unsignedp, OPTAB_WIDEN);
3512 if (tem)
3513 return extract_high_half (mode, tem);
3514 }
3515
3516 /* Try widening the mode and perform a non-widening multiplication. */
3517 if (optab_handler (smul_optab, wider_mode) != CODE_FOR_nothing
3518 && size - 1 < BITS_PER_WORD
3519 && mul_cost[speed][wider_mode] + shift_cost[speed][mode][size-1] < max_cost)
3520 {
3521 rtx insns, wop0, wop1;
3522
3523 /* We need to widen the operands, for example to ensure the
3524 constant multiplier is correctly sign or zero extended.
3525 Use a sequence to clean-up any instructions emitted by
3526 the conversions if things don't work out. */
3527 start_sequence ();
3528 wop0 = convert_modes (wider_mode, mode, op0, unsignedp);
3529 wop1 = convert_modes (wider_mode, mode, op1, unsignedp);
3530 tem = expand_binop (wider_mode, smul_optab, wop0, wop1, 0,
3531 unsignedp, OPTAB_WIDEN);
3532 insns = get_insns ();
3533 end_sequence ();
3534
3535 if (tem)
3536 {
3537 emit_insn (insns);
3538 return extract_high_half (mode, tem);
3539 }
3540 }
3541
3542 /* Try widening multiplication of opposite signedness, and adjust. */
3543 moptab = unsignedp ? smul_widen_optab : umul_widen_optab;
3544 if (widening_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3545 && size - 1 < BITS_PER_WORD
3546 && (mul_widen_cost[speed][wider_mode] + 2 * shift_cost[speed][mode][size-1]
3547 + 4 * add_cost[speed][mode] < max_cost))
3548 {
3549 tem = expand_binop (wider_mode, moptab, op0, narrow_op1,
3550 NULL_RTX, ! unsignedp, OPTAB_WIDEN);
3551 if (tem != 0)
3552 {
3553 tem = extract_high_half (mode, tem);
3554 /* We used the wrong signedness. Adjust the result. */
3555 return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3556 target, unsignedp);
3557 }
3558 }
3559
3560 return 0;
3561 }
3562
3563 /* Emit code to multiply OP0 and OP1 (where OP1 is an integer constant),
3564 putting the high half of the result in TARGET if that is convenient,
3565 and return where the result is. If the operation can not be performed,
3566 0 is returned.
3567
3568 MODE is the mode of operation and result.
3569
3570 UNSIGNEDP nonzero means unsigned multiply.
3571
3572 MAX_COST is the total allowed cost for the expanded RTL. */
3573
3574 static rtx
3575 expand_mult_highpart (enum machine_mode mode, rtx op0, rtx op1,
3576 rtx target, int unsignedp, int max_cost)
3577 {
3578 enum machine_mode wider_mode = GET_MODE_WIDER_MODE (mode);
3579 unsigned HOST_WIDE_INT cnst1;
3580 int extra_cost;
3581 bool sign_adjust = false;
3582 enum mult_variant variant;
3583 struct algorithm alg;
3584 rtx tem;
3585 bool speed = optimize_insn_for_speed_p ();
3586
3587 gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3588 /* We can't support modes wider than HOST_BITS_PER_INT. */
3589 gcc_assert (HWI_COMPUTABLE_MODE_P (mode));
3590
3591 cnst1 = INTVAL (op1) & GET_MODE_MASK (mode);
3592
3593 /* We can't optimize modes wider than BITS_PER_WORD.
3594 ??? We might be able to perform double-word arithmetic if
3595 mode == word_mode, however all the cost calculations in
3596 synth_mult etc. assume single-word operations. */
3597 if (GET_MODE_BITSIZE (wider_mode) > BITS_PER_WORD)
3598 return expand_mult_highpart_optab (mode, op0, op1, target,
3599 unsignedp, max_cost);
3600
3601 extra_cost = shift_cost[speed][mode][GET_MODE_BITSIZE (mode) - 1];
3602
3603 /* Check whether we try to multiply by a negative constant. */
3604 if (!unsignedp && ((cnst1 >> (GET_MODE_BITSIZE (mode) - 1)) & 1))
3605 {
3606 sign_adjust = true;
3607 extra_cost += add_cost[speed][mode];
3608 }
3609
3610 /* See whether shift/add multiplication is cheap enough. */
3611 if (choose_mult_variant (wider_mode, cnst1, &alg, &variant,
3612 max_cost - extra_cost))
3613 {
3614 /* See whether the specialized multiplication optabs are
3615 cheaper than the shift/add version. */
3616 tem = expand_mult_highpart_optab (mode, op0, op1, target, unsignedp,
3617 alg.cost.cost + extra_cost);
3618 if (tem)
3619 return tem;
3620
3621 tem = convert_to_mode (wider_mode, op0, unsignedp);
3622 tem = expand_mult_const (wider_mode, tem, cnst1, 0, &alg, variant);
3623 tem = extract_high_half (mode, tem);
3624
3625 /* Adjust result for signedness. */
3626 if (sign_adjust)
3627 tem = force_operand (gen_rtx_MINUS (mode, tem, op0), tem);
3628
3629 return tem;
3630 }
3631 return expand_mult_highpart_optab (mode, op0, op1, target,
3632 unsignedp, max_cost);
3633 }
3634
3635
3636 /* Expand signed modulus of OP0 by a power of two D in mode MODE. */
3637
3638 static rtx
3639 expand_smod_pow2 (enum machine_mode mode, rtx op0, HOST_WIDE_INT d)
3640 {
3641 unsigned HOST_WIDE_INT masklow, maskhigh;
3642 rtx result, temp, shift, label;
3643 int logd;
3644
3645 logd = floor_log2 (d);
3646 result = gen_reg_rtx (mode);
3647
3648 /* Avoid conditional branches when they're expensive. */
3649 if (BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2
3650 && optimize_insn_for_speed_p ())
3651 {
3652 rtx signmask = emit_store_flag (result, LT, op0, const0_rtx,
3653 mode, 0, -1);
3654 if (signmask)
3655 {
3656 signmask = force_reg (mode, signmask);
3657 masklow = ((HOST_WIDE_INT) 1 << logd) - 1;
3658 shift = GEN_INT (GET_MODE_BITSIZE (mode) - logd);
3659
3660 /* Use the rtx_cost of a LSHIFTRT instruction to determine
3661 which instruction sequence to use. If logical right shifts
3662 are expensive the use 2 XORs, 2 SUBs and an AND, otherwise
3663 use a LSHIFTRT, 1 ADD, 1 SUB and an AND. */
3664
3665 temp = gen_rtx_LSHIFTRT (mode, result, shift);
3666 if (optab_handler (lshr_optab, mode) == CODE_FOR_nothing
3667 || (set_src_cost (temp, optimize_insn_for_speed_p ())
3668 > COSTS_N_INSNS (2)))
3669 {
3670 temp = expand_binop (mode, xor_optab, op0, signmask,
3671 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3672 temp = expand_binop (mode, sub_optab, temp, signmask,
3673 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3674 temp = expand_binop (mode, and_optab, temp, GEN_INT (masklow),
3675 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3676 temp = expand_binop (mode, xor_optab, temp, signmask,
3677 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3678 temp = expand_binop (mode, sub_optab, temp, signmask,
3679 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3680 }
3681 else
3682 {
3683 signmask = expand_binop (mode, lshr_optab, signmask, shift,
3684 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3685 signmask = force_reg (mode, signmask);
3686
3687 temp = expand_binop (mode, add_optab, op0, signmask,
3688 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3689 temp = expand_binop (mode, and_optab, temp, GEN_INT (masklow),
3690 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3691 temp = expand_binop (mode, sub_optab, temp, signmask,
3692 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3693 }
3694 return temp;
3695 }
3696 }
3697
3698 /* Mask contains the mode's signbit and the significant bits of the
3699 modulus. By including the signbit in the operation, many targets
3700 can avoid an explicit compare operation in the following comparison
3701 against zero. */
3702
3703 masklow = ((HOST_WIDE_INT) 1 << logd) - 1;
3704 if (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
3705 {
3706 masklow |= (HOST_WIDE_INT) -1 << (GET_MODE_BITSIZE (mode) - 1);
3707 maskhigh = -1;
3708 }
3709 else
3710 maskhigh = (HOST_WIDE_INT) -1
3711 << (GET_MODE_BITSIZE (mode) - HOST_BITS_PER_WIDE_INT - 1);
3712
3713 temp = expand_binop (mode, and_optab, op0,
3714 immed_double_const (masklow, maskhigh, mode),
3715 result, 1, OPTAB_LIB_WIDEN);
3716 if (temp != result)
3717 emit_move_insn (result, temp);
3718
3719 label = gen_label_rtx ();
3720 do_cmp_and_jump (result, const0_rtx, GE, mode, label);
3721
3722 temp = expand_binop (mode, sub_optab, result, const1_rtx, result,
3723 0, OPTAB_LIB_WIDEN);
3724 masklow = (HOST_WIDE_INT) -1 << logd;
3725 maskhigh = -1;
3726 temp = expand_binop (mode, ior_optab, temp,
3727 immed_double_const (masklow, maskhigh, mode),
3728 result, 1, OPTAB_LIB_WIDEN);
3729 temp = expand_binop (mode, add_optab, temp, const1_rtx, result,
3730 0, OPTAB_LIB_WIDEN);
3731 if (temp != result)
3732 emit_move_insn (result, temp);
3733 emit_label (label);
3734 return result;
3735 }
3736
3737 /* Expand signed division of OP0 by a power of two D in mode MODE.
3738 This routine is only called for positive values of D. */
3739
3740 static rtx
3741 expand_sdiv_pow2 (enum machine_mode mode, rtx op0, HOST_WIDE_INT d)
3742 {
3743 rtx temp, label;
3744 int logd;
3745
3746 logd = floor_log2 (d);
3747
3748 if (d == 2
3749 && BRANCH_COST (optimize_insn_for_speed_p (),
3750 false) >= 1)
3751 {
3752 temp = gen_reg_rtx (mode);
3753 temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, 1);
3754 temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3755 0, OPTAB_LIB_WIDEN);
3756 return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3757 }
3758
3759 #ifdef HAVE_conditional_move
3760 if (BRANCH_COST (optimize_insn_for_speed_p (), false)
3761 >= 2)
3762 {
3763 rtx temp2;
3764
3765 /* ??? emit_conditional_move forces a stack adjustment via
3766 compare_from_rtx so, if the sequence is discarded, it will
3767 be lost. Do it now instead. */
3768 do_pending_stack_adjust ();
3769
3770 start_sequence ();
3771 temp2 = copy_to_mode_reg (mode, op0);
3772 temp = expand_binop (mode, add_optab, temp2, GEN_INT (d-1),
3773 NULL_RTX, 0, OPTAB_LIB_WIDEN);
3774 temp = force_reg (mode, temp);
3775
3776 /* Construct "temp2 = (temp2 < 0) ? temp : temp2". */
3777 temp2 = emit_conditional_move (temp2, LT, temp2, const0_rtx,
3778 mode, temp, temp2, mode, 0);
3779 if (temp2)
3780 {
3781 rtx seq = get_insns ();
3782 end_sequence ();
3783 emit_insn (seq);
3784 return expand_shift (RSHIFT_EXPR, mode, temp2, logd, NULL_RTX, 0);
3785 }
3786 end_sequence ();
3787 }
3788 #endif
3789
3790 if (BRANCH_COST (optimize_insn_for_speed_p (),
3791 false) >= 2)
3792 {
3793 int ushift = GET_MODE_BITSIZE (mode) - logd;
3794
3795 temp = gen_reg_rtx (mode);
3796 temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, -1);
3797 if (shift_cost[optimize_insn_for_speed_p ()][mode][ushift] > COSTS_N_INSNS (1))
3798 temp = expand_binop (mode, and_optab, temp, GEN_INT (d - 1),
3799 NULL_RTX, 0, OPTAB_LIB_WIDEN);
3800 else
3801 temp = expand_shift (RSHIFT_EXPR, mode, temp,
3802 ushift, NULL_RTX, 1);
3803 temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3804 0, OPTAB_LIB_WIDEN);
3805 return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3806 }
3807
3808 label = gen_label_rtx ();
3809 temp = copy_to_mode_reg (mode, op0);
3810 do_cmp_and_jump (temp, const0_rtx, GE, mode, label);
3811 expand_inc (temp, GEN_INT (d - 1));
3812 emit_label (label);
3813 return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3814 }
3815 \f
3816 /* Emit the code to divide OP0 by OP1, putting the result in TARGET
3817 if that is convenient, and returning where the result is.
3818 You may request either the quotient or the remainder as the result;
3819 specify REM_FLAG nonzero to get the remainder.
3820
3821 CODE is the expression code for which kind of division this is;
3822 it controls how rounding is done. MODE is the machine mode to use.
3823 UNSIGNEDP nonzero means do unsigned division. */
3824
3825 /* ??? For CEIL_MOD_EXPR, can compute incorrect remainder with ANDI
3826 and then correct it by or'ing in missing high bits
3827 if result of ANDI is nonzero.
3828 For ROUND_MOD_EXPR, can use ANDI and then sign-extend the result.
3829 This could optimize to a bfexts instruction.
3830 But C doesn't use these operations, so their optimizations are
3831 left for later. */
3832 /* ??? For modulo, we don't actually need the highpart of the first product,
3833 the low part will do nicely. And for small divisors, the second multiply
3834 can also be a low-part only multiply or even be completely left out.
3835 E.g. to calculate the remainder of a division by 3 with a 32 bit
3836 multiply, multiply with 0x55555556 and extract the upper two bits;
3837 the result is exact for inputs up to 0x1fffffff.
3838 The input range can be reduced by using cross-sum rules.
3839 For odd divisors >= 3, the following table gives right shift counts
3840 so that if a number is shifted by an integer multiple of the given
3841 amount, the remainder stays the same:
3842 2, 4, 3, 6, 10, 12, 4, 8, 18, 6, 11, 20, 18, 0, 5, 10, 12, 0, 12, 20,
3843 14, 12, 23, 21, 8, 0, 20, 18, 0, 0, 6, 12, 0, 22, 0, 18, 20, 30, 0, 0,
3844 0, 8, 0, 11, 12, 10, 36, 0, 30, 0, 0, 12, 0, 0, 0, 0, 44, 12, 24, 0,
3845 20, 0, 7, 14, 0, 18, 36, 0, 0, 46, 60, 0, 42, 0, 15, 24, 20, 0, 0, 33,
3846 0, 20, 0, 0, 18, 0, 60, 0, 0, 0, 0, 0, 40, 18, 0, 0, 12
3847
3848 Cross-sum rules for even numbers can be derived by leaving as many bits
3849 to the right alone as the divisor has zeros to the right.
3850 E.g. if x is an unsigned 32 bit number:
3851 (x mod 12) == (((x & 1023) + ((x >> 8) & ~3)) * 0x15555558 >> 2 * 3) >> 28
3852 */
3853
3854 rtx
3855 expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode,
3856 rtx op0, rtx op1, rtx target, int unsignedp)
3857 {
3858 enum machine_mode compute_mode;
3859 rtx tquotient;
3860 rtx quotient = 0, remainder = 0;
3861 rtx last;
3862 int size;
3863 rtx insn;
3864 optab optab1, optab2;
3865 int op1_is_constant, op1_is_pow2 = 0;
3866 int max_cost, extra_cost;
3867 static HOST_WIDE_INT last_div_const = 0;
3868 static HOST_WIDE_INT ext_op1;
3869 bool speed = optimize_insn_for_speed_p ();
3870
3871 op1_is_constant = CONST_INT_P (op1);
3872 if (op1_is_constant)
3873 {
3874 ext_op1 = INTVAL (op1);
3875 if (unsignedp)
3876 ext_op1 &= GET_MODE_MASK (mode);
3877 op1_is_pow2 = ((EXACT_POWER_OF_2_OR_ZERO_P (ext_op1)
3878 || (! unsignedp && EXACT_POWER_OF_2_OR_ZERO_P (-ext_op1))));
3879 }
3880
3881 /*
3882 This is the structure of expand_divmod:
3883
3884 First comes code to fix up the operands so we can perform the operations
3885 correctly and efficiently.
3886
3887 Second comes a switch statement with code specific for each rounding mode.
3888 For some special operands this code emits all RTL for the desired
3889 operation, for other cases, it generates only a quotient and stores it in
3890 QUOTIENT. The case for trunc division/remainder might leave quotient = 0,
3891 to indicate that it has not done anything.
3892
3893 Last comes code that finishes the operation. If QUOTIENT is set and
3894 REM_FLAG is set, the remainder is computed as OP0 - QUOTIENT * OP1. If
3895 QUOTIENT is not set, it is computed using trunc rounding.
3896
3897 We try to generate special code for division and remainder when OP1 is a
3898 constant. If |OP1| = 2**n we can use shifts and some other fast
3899 operations. For other values of OP1, we compute a carefully selected
3900 fixed-point approximation m = 1/OP1, and generate code that multiplies OP0
3901 by m.
3902
3903 In all cases but EXACT_DIV_EXPR, this multiplication requires the upper
3904 half of the product. Different strategies for generating the product are
3905 implemented in expand_mult_highpart.
3906
3907 If what we actually want is the remainder, we generate that by another
3908 by-constant multiplication and a subtraction. */
3909
3910 /* We shouldn't be called with OP1 == const1_rtx, but some of the
3911 code below will malfunction if we are, so check here and handle
3912 the special case if so. */
3913 if (op1 == const1_rtx)
3914 return rem_flag ? const0_rtx : op0;
3915
3916 /* When dividing by -1, we could get an overflow.
3917 negv_optab can handle overflows. */
3918 if (! unsignedp && op1 == constm1_rtx)
3919 {
3920 if (rem_flag)
3921 return const0_rtx;
3922 return expand_unop (mode, flag_trapv && GET_MODE_CLASS(mode) == MODE_INT
3923 ? negv_optab : neg_optab, op0, target, 0);
3924 }
3925
3926 if (target
3927 /* Don't use the function value register as a target
3928 since we have to read it as well as write it,
3929 and function-inlining gets confused by this. */
3930 && ((REG_P (target) && REG_FUNCTION_VALUE_P (target))
3931 /* Don't clobber an operand while doing a multi-step calculation. */
3932 || ((rem_flag || op1_is_constant)
3933 && (reg_mentioned_p (target, op0)
3934 || (MEM_P (op0) && MEM_P (target))))
3935 || reg_mentioned_p (target, op1)
3936 || (MEM_P (op1) && MEM_P (target))))
3937 target = 0;
3938
3939 /* Get the mode in which to perform this computation. Normally it will
3940 be MODE, but sometimes we can't do the desired operation in MODE.
3941 If so, pick a wider mode in which we can do the operation. Convert
3942 to that mode at the start to avoid repeated conversions.
3943
3944 First see what operations we need. These depend on the expression
3945 we are evaluating. (We assume that divxx3 insns exist under the
3946 same conditions that modxx3 insns and that these insns don't normally
3947 fail. If these assumptions are not correct, we may generate less
3948 efficient code in some cases.)
3949
3950 Then see if we find a mode in which we can open-code that operation
3951 (either a division, modulus, or shift). Finally, check for the smallest
3952 mode for which we can do the operation with a library call. */
3953
3954 /* We might want to refine this now that we have division-by-constant
3955 optimization. Since expand_mult_highpart tries so many variants, it is
3956 not straightforward to generalize this. Maybe we should make an array
3957 of possible modes in init_expmed? Save this for GCC 2.7. */
3958
3959 optab1 = ((op1_is_pow2 && op1 != const0_rtx)
3960 ? (unsignedp ? lshr_optab : ashr_optab)
3961 : (unsignedp ? udiv_optab : sdiv_optab));
3962 optab2 = ((op1_is_pow2 && op1 != const0_rtx)
3963 ? optab1
3964 : (unsignedp ? udivmod_optab : sdivmod_optab));
3965
3966 for (compute_mode = mode; compute_mode != VOIDmode;
3967 compute_mode = GET_MODE_WIDER_MODE (compute_mode))
3968 if (optab_handler (optab1, compute_mode) != CODE_FOR_nothing
3969 || optab_handler (optab2, compute_mode) != CODE_FOR_nothing)
3970 break;
3971
3972 if (compute_mode == VOIDmode)
3973 for (compute_mode = mode; compute_mode != VOIDmode;
3974 compute_mode = GET_MODE_WIDER_MODE (compute_mode))
3975 if (optab_libfunc (optab1, compute_mode)
3976 || optab_libfunc (optab2, compute_mode))
3977 break;
3978
3979 /* If we still couldn't find a mode, use MODE, but expand_binop will
3980 probably die. */
3981 if (compute_mode == VOIDmode)
3982 compute_mode = mode;
3983
3984 if (target && GET_MODE (target) == compute_mode)
3985 tquotient = target;
3986 else
3987 tquotient = gen_reg_rtx (compute_mode);
3988
3989 size = GET_MODE_BITSIZE (compute_mode);
3990 #if 0
3991 /* It should be possible to restrict the precision to GET_MODE_BITSIZE
3992 (mode), and thereby get better code when OP1 is a constant. Do that
3993 later. It will require going over all usages of SIZE below. */
3994 size = GET_MODE_BITSIZE (mode);
3995 #endif
3996
3997 /* Only deduct something for a REM if the last divide done was
3998 for a different constant. Then set the constant of the last
3999 divide. */
4000 max_cost = unsignedp ? udiv_cost[speed][compute_mode] : sdiv_cost[speed][compute_mode];
4001 if (rem_flag && ! (last_div_const != 0 && op1_is_constant
4002 && INTVAL (op1) == last_div_const))
4003 max_cost -= mul_cost[speed][compute_mode] + add_cost[speed][compute_mode];
4004
4005 last_div_const = ! rem_flag && op1_is_constant ? INTVAL (op1) : 0;
4006
4007 /* Now convert to the best mode to use. */
4008 if (compute_mode != mode)
4009 {
4010 op0 = convert_modes (compute_mode, mode, op0, unsignedp);
4011 op1 = convert_modes (compute_mode, mode, op1, unsignedp);
4012
4013 /* convert_modes may have placed op1 into a register, so we
4014 must recompute the following. */
4015 op1_is_constant = CONST_INT_P (op1);
4016 op1_is_pow2 = (op1_is_constant
4017 && ((EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4018 || (! unsignedp
4019 && EXACT_POWER_OF_2_OR_ZERO_P (-INTVAL (op1)))))) ;
4020 }
4021
4022 /* If one of the operands is a volatile MEM, copy it into a register. */
4023
4024 if (MEM_P (op0) && MEM_VOLATILE_P (op0))
4025 op0 = force_reg (compute_mode, op0);
4026 if (MEM_P (op1) && MEM_VOLATILE_P (op1))
4027 op1 = force_reg (compute_mode, op1);
4028
4029 /* If we need the remainder or if OP1 is constant, we need to
4030 put OP0 in a register in case it has any queued subexpressions. */
4031 if (rem_flag || op1_is_constant)
4032 op0 = force_reg (compute_mode, op0);
4033
4034 last = get_last_insn ();
4035
4036 /* Promote floor rounding to trunc rounding for unsigned operations. */
4037 if (unsignedp)
4038 {
4039 if (code == FLOOR_DIV_EXPR)
4040 code = TRUNC_DIV_EXPR;
4041 if (code == FLOOR_MOD_EXPR)
4042 code = TRUNC_MOD_EXPR;
4043 if (code == EXACT_DIV_EXPR && op1_is_pow2)
4044 code = TRUNC_DIV_EXPR;
4045 }
4046
4047 if (op1 != const0_rtx)
4048 switch (code)
4049 {
4050 case TRUNC_MOD_EXPR:
4051 case TRUNC_DIV_EXPR:
4052 if (op1_is_constant)
4053 {
4054 if (unsignedp)
4055 {
4056 unsigned HOST_WIDE_INT mh;
4057 int pre_shift, post_shift;
4058 int dummy;
4059 rtx ml;
4060 unsigned HOST_WIDE_INT d = (INTVAL (op1)
4061 & GET_MODE_MASK (compute_mode));
4062
4063 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4064 {
4065 pre_shift = floor_log2 (d);
4066 if (rem_flag)
4067 {
4068 remainder
4069 = expand_binop (compute_mode, and_optab, op0,
4070 GEN_INT (((HOST_WIDE_INT) 1 << pre_shift) - 1),
4071 remainder, 1,
4072 OPTAB_LIB_WIDEN);
4073 if (remainder)
4074 return gen_lowpart (mode, remainder);
4075 }
4076 quotient = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4077 pre_shift, tquotient, 1);
4078 }
4079 else if (size <= HOST_BITS_PER_WIDE_INT)
4080 {
4081 if (d >= ((unsigned HOST_WIDE_INT) 1 << (size - 1)))
4082 {
4083 /* Most significant bit of divisor is set; emit an scc
4084 insn. */
4085 quotient = emit_store_flag_force (tquotient, GEU, op0, op1,
4086 compute_mode, 1, 1);
4087 }
4088 else
4089 {
4090 /* Find a suitable multiplier and right shift count
4091 instead of multiplying with D. */
4092
4093 mh = choose_multiplier (d, size, size,
4094 &ml, &post_shift, &dummy);
4095
4096 /* If the suggested multiplier is more than SIZE bits,
4097 we can do better for even divisors, using an
4098 initial right shift. */
4099 if (mh != 0 && (d & 1) == 0)
4100 {
4101 pre_shift = floor_log2 (d & -d);
4102 mh = choose_multiplier (d >> pre_shift, size,
4103 size - pre_shift,
4104 &ml, &post_shift, &dummy);
4105 gcc_assert (!mh);
4106 }
4107 else
4108 pre_shift = 0;
4109
4110 if (mh != 0)
4111 {
4112 rtx t1, t2, t3, t4;
4113
4114 if (post_shift - 1 >= BITS_PER_WORD)
4115 goto fail1;
4116
4117 extra_cost
4118 = (shift_cost[speed][compute_mode][post_shift - 1]
4119 + shift_cost[speed][compute_mode][1]
4120 + 2 * add_cost[speed][compute_mode]);
4121 t1 = expand_mult_highpart (compute_mode, op0, ml,
4122 NULL_RTX, 1,
4123 max_cost - extra_cost);
4124 if (t1 == 0)
4125 goto fail1;
4126 t2 = force_operand (gen_rtx_MINUS (compute_mode,
4127 op0, t1),
4128 NULL_RTX);
4129 t3 = expand_shift (RSHIFT_EXPR, compute_mode,
4130 t2, 1, NULL_RTX, 1);
4131 t4 = force_operand (gen_rtx_PLUS (compute_mode,
4132 t1, t3),
4133 NULL_RTX);
4134 quotient = expand_shift
4135 (RSHIFT_EXPR, compute_mode, t4,
4136 post_shift - 1, tquotient, 1);
4137 }
4138 else
4139 {
4140 rtx t1, t2;
4141
4142 if (pre_shift >= BITS_PER_WORD
4143 || post_shift >= BITS_PER_WORD)
4144 goto fail1;
4145
4146 t1 = expand_shift
4147 (RSHIFT_EXPR, compute_mode, op0,
4148 pre_shift, NULL_RTX, 1);
4149 extra_cost
4150 = (shift_cost[speed][compute_mode][pre_shift]
4151 + shift_cost[speed][compute_mode][post_shift]);
4152 t2 = expand_mult_highpart (compute_mode, t1, ml,
4153 NULL_RTX, 1,
4154 max_cost - extra_cost);
4155 if (t2 == 0)
4156 goto fail1;
4157 quotient = expand_shift
4158 (RSHIFT_EXPR, compute_mode, t2,
4159 post_shift, tquotient, 1);
4160 }
4161 }
4162 }
4163 else /* Too wide mode to use tricky code */
4164 break;
4165
4166 insn = get_last_insn ();
4167 if (insn != last)
4168 set_dst_reg_note (insn, REG_EQUAL,
4169 gen_rtx_UDIV (compute_mode, op0, op1),
4170 quotient);
4171 }
4172 else /* TRUNC_DIV, signed */
4173 {
4174 unsigned HOST_WIDE_INT ml;
4175 int lgup, post_shift;
4176 rtx mlr;
4177 HOST_WIDE_INT d = INTVAL (op1);
4178 unsigned HOST_WIDE_INT abs_d;
4179
4180 /* Since d might be INT_MIN, we have to cast to
4181 unsigned HOST_WIDE_INT before negating to avoid
4182 undefined signed overflow. */
4183 abs_d = (d >= 0
4184 ? (unsigned HOST_WIDE_INT) d
4185 : - (unsigned HOST_WIDE_INT) d);
4186
4187 /* n rem d = n rem -d */
4188 if (rem_flag && d < 0)
4189 {
4190 d = abs_d;
4191 op1 = gen_int_mode (abs_d, compute_mode);
4192 }
4193
4194 if (d == 1)
4195 quotient = op0;
4196 else if (d == -1)
4197 quotient = expand_unop (compute_mode, neg_optab, op0,
4198 tquotient, 0);
4199 else if (HOST_BITS_PER_WIDE_INT >= size
4200 && abs_d == (unsigned HOST_WIDE_INT) 1 << (size - 1))
4201 {
4202 /* This case is not handled correctly below. */
4203 quotient = emit_store_flag (tquotient, EQ, op0, op1,
4204 compute_mode, 1, 1);
4205 if (quotient == 0)
4206 goto fail1;
4207 }
4208 else if (EXACT_POWER_OF_2_OR_ZERO_P (d)
4209 && (rem_flag ? smod_pow2_cheap[speed][compute_mode]
4210 : sdiv_pow2_cheap[speed][compute_mode])
4211 /* We assume that cheap metric is true if the
4212 optab has an expander for this mode. */
4213 && ((optab_handler ((rem_flag ? smod_optab
4214 : sdiv_optab),
4215 compute_mode)
4216 != CODE_FOR_nothing)
4217 || (optab_handler (sdivmod_optab,
4218 compute_mode)
4219 != CODE_FOR_nothing)))
4220 ;
4221 else if (EXACT_POWER_OF_2_OR_ZERO_P (abs_d))
4222 {
4223 if (rem_flag)
4224 {
4225 remainder = expand_smod_pow2 (compute_mode, op0, d);
4226 if (remainder)
4227 return gen_lowpart (mode, remainder);
4228 }
4229
4230 if (sdiv_pow2_cheap[speed][compute_mode]
4231 && ((optab_handler (sdiv_optab, compute_mode)
4232 != CODE_FOR_nothing)
4233 || (optab_handler (sdivmod_optab, compute_mode)
4234 != CODE_FOR_nothing)))
4235 quotient = expand_divmod (0, TRUNC_DIV_EXPR,
4236 compute_mode, op0,
4237 gen_int_mode (abs_d,
4238 compute_mode),
4239 NULL_RTX, 0);
4240 else
4241 quotient = expand_sdiv_pow2 (compute_mode, op0, abs_d);
4242
4243 /* We have computed OP0 / abs(OP1). If OP1 is negative,
4244 negate the quotient. */
4245 if (d < 0)
4246 {
4247 insn = get_last_insn ();
4248 if (insn != last
4249 && abs_d < ((unsigned HOST_WIDE_INT) 1
4250 << (HOST_BITS_PER_WIDE_INT - 1)))
4251 set_dst_reg_note (insn, REG_EQUAL,
4252 gen_rtx_DIV (compute_mode, op0,
4253 gen_int_mode
4254 (abs_d,
4255 compute_mode)),
4256 quotient);
4257
4258 quotient = expand_unop (compute_mode, neg_optab,
4259 quotient, quotient, 0);
4260 }
4261 }
4262 else if (size <= HOST_BITS_PER_WIDE_INT)
4263 {
4264 choose_multiplier (abs_d, size, size - 1,
4265 &mlr, &post_shift, &lgup);
4266 ml = (unsigned HOST_WIDE_INT) INTVAL (mlr);
4267 if (ml < (unsigned HOST_WIDE_INT) 1 << (size - 1))
4268 {
4269 rtx t1, t2, t3;
4270
4271 if (post_shift >= BITS_PER_WORD
4272 || size - 1 >= BITS_PER_WORD)
4273 goto fail1;
4274
4275 extra_cost = (shift_cost[speed][compute_mode][post_shift]
4276 + shift_cost[speed][compute_mode][size - 1]
4277 + add_cost[speed][compute_mode]);
4278 t1 = expand_mult_highpart (compute_mode, op0, mlr,
4279 NULL_RTX, 0,
4280 max_cost - extra_cost);
4281 if (t1 == 0)
4282 goto fail1;
4283 t2 = expand_shift
4284 (RSHIFT_EXPR, compute_mode, t1,
4285 post_shift, NULL_RTX, 0);
4286 t3 = expand_shift
4287 (RSHIFT_EXPR, compute_mode, op0,
4288 size - 1, NULL_RTX, 0);
4289 if (d < 0)
4290 quotient
4291 = force_operand (gen_rtx_MINUS (compute_mode,
4292 t3, t2),
4293 tquotient);
4294 else
4295 quotient
4296 = force_operand (gen_rtx_MINUS (compute_mode,
4297 t2, t3),
4298 tquotient);
4299 }
4300 else
4301 {
4302 rtx t1, t2, t3, t4;
4303
4304 if (post_shift >= BITS_PER_WORD
4305 || size - 1 >= BITS_PER_WORD)
4306 goto fail1;
4307
4308 ml |= (~(unsigned HOST_WIDE_INT) 0) << (size - 1);
4309 mlr = gen_int_mode (ml, compute_mode);
4310 extra_cost = (shift_cost[speed][compute_mode][post_shift]
4311 + shift_cost[speed][compute_mode][size - 1]
4312 + 2 * add_cost[speed][compute_mode]);
4313 t1 = expand_mult_highpart (compute_mode, op0, mlr,
4314 NULL_RTX, 0,
4315 max_cost - extra_cost);
4316 if (t1 == 0)
4317 goto fail1;
4318 t2 = force_operand (gen_rtx_PLUS (compute_mode,
4319 t1, op0),
4320 NULL_RTX);
4321 t3 = expand_shift
4322 (RSHIFT_EXPR, compute_mode, t2,
4323 post_shift, NULL_RTX, 0);
4324 t4 = expand_shift
4325 (RSHIFT_EXPR, compute_mode, op0,
4326 size - 1, NULL_RTX, 0);
4327 if (d < 0)
4328 quotient
4329 = force_operand (gen_rtx_MINUS (compute_mode,
4330 t4, t3),
4331 tquotient);
4332 else
4333 quotient
4334 = force_operand (gen_rtx_MINUS (compute_mode,
4335 t3, t4),
4336 tquotient);
4337 }
4338 }
4339 else /* Too wide mode to use tricky code */
4340 break;
4341
4342 insn = get_last_insn ();
4343 if (insn != last)
4344 set_dst_reg_note (insn, REG_EQUAL,
4345 gen_rtx_DIV (compute_mode, op0, op1),
4346 quotient);
4347 }
4348 break;
4349 }
4350 fail1:
4351 delete_insns_since (last);
4352 break;
4353
4354 case FLOOR_DIV_EXPR:
4355 case FLOOR_MOD_EXPR:
4356 /* We will come here only for signed operations. */
4357 if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4358 {
4359 unsigned HOST_WIDE_INT mh;
4360 int pre_shift, lgup, post_shift;
4361 HOST_WIDE_INT d = INTVAL (op1);
4362 rtx ml;
4363
4364 if (d > 0)
4365 {
4366 /* We could just as easily deal with negative constants here,
4367 but it does not seem worth the trouble for GCC 2.6. */
4368 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4369 {
4370 pre_shift = floor_log2 (d);
4371 if (rem_flag)
4372 {
4373 remainder = expand_binop (compute_mode, and_optab, op0,
4374 GEN_INT (((HOST_WIDE_INT) 1 << pre_shift) - 1),
4375 remainder, 0, OPTAB_LIB_WIDEN);
4376 if (remainder)
4377 return gen_lowpart (mode, remainder);
4378 }
4379 quotient = expand_shift
4380 (RSHIFT_EXPR, compute_mode, op0,
4381 pre_shift, tquotient, 0);
4382 }
4383 else
4384 {
4385 rtx t1, t2, t3, t4;
4386
4387 mh = choose_multiplier (d, size, size - 1,
4388 &ml, &post_shift, &lgup);
4389 gcc_assert (!mh);
4390
4391 if (post_shift < BITS_PER_WORD
4392 && size - 1 < BITS_PER_WORD)
4393 {
4394 t1 = expand_shift
4395 (RSHIFT_EXPR, compute_mode, op0,
4396 size - 1, NULL_RTX, 0);
4397 t2 = expand_binop (compute_mode, xor_optab, op0, t1,
4398 NULL_RTX, 0, OPTAB_WIDEN);
4399 extra_cost = (shift_cost[speed][compute_mode][post_shift]
4400 + shift_cost[speed][compute_mode][size - 1]
4401 + 2 * add_cost[speed][compute_mode]);
4402 t3 = expand_mult_highpart (compute_mode, t2, ml,
4403 NULL_RTX, 1,
4404 max_cost - extra_cost);
4405 if (t3 != 0)
4406 {
4407 t4 = expand_shift
4408 (RSHIFT_EXPR, compute_mode, t3,
4409 post_shift, NULL_RTX, 1);
4410 quotient = expand_binop (compute_mode, xor_optab,
4411 t4, t1, tquotient, 0,
4412 OPTAB_WIDEN);
4413 }
4414 }
4415 }
4416 }
4417 else
4418 {
4419 rtx nsign, t1, t2, t3, t4;
4420 t1 = force_operand (gen_rtx_PLUS (compute_mode,
4421 op0, constm1_rtx), NULL_RTX);
4422 t2 = expand_binop (compute_mode, ior_optab, op0, t1, NULL_RTX,
4423 0, OPTAB_WIDEN);
4424 nsign = expand_shift
4425 (RSHIFT_EXPR, compute_mode, t2,
4426 size - 1, NULL_RTX, 0);
4427 t3 = force_operand (gen_rtx_MINUS (compute_mode, t1, nsign),
4428 NULL_RTX);
4429 t4 = expand_divmod (0, TRUNC_DIV_EXPR, compute_mode, t3, op1,
4430 NULL_RTX, 0);
4431 if (t4)
4432 {
4433 rtx t5;
4434 t5 = expand_unop (compute_mode, one_cmpl_optab, nsign,
4435 NULL_RTX, 0);
4436 quotient = force_operand (gen_rtx_PLUS (compute_mode,
4437 t4, t5),
4438 tquotient);
4439 }
4440 }
4441 }
4442
4443 if (quotient != 0)
4444 break;
4445 delete_insns_since (last);
4446
4447 /* Try using an instruction that produces both the quotient and
4448 remainder, using truncation. We can easily compensate the quotient
4449 or remainder to get floor rounding, once we have the remainder.
4450 Notice that we compute also the final remainder value here,
4451 and return the result right away. */
4452 if (target == 0 || GET_MODE (target) != compute_mode)
4453 target = gen_reg_rtx (compute_mode);
4454
4455 if (rem_flag)
4456 {
4457 remainder
4458 = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4459 quotient = gen_reg_rtx (compute_mode);
4460 }
4461 else
4462 {
4463 quotient
4464 = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4465 remainder = gen_reg_rtx (compute_mode);
4466 }
4467
4468 if (expand_twoval_binop (sdivmod_optab, op0, op1,
4469 quotient, remainder, 0))
4470 {
4471 /* This could be computed with a branch-less sequence.
4472 Save that for later. */
4473 rtx tem;
4474 rtx label = gen_label_rtx ();
4475 do_cmp_and_jump (remainder, const0_rtx, EQ, compute_mode, label);
4476 tem = expand_binop (compute_mode, xor_optab, op0, op1,
4477 NULL_RTX, 0, OPTAB_WIDEN);
4478 do_cmp_and_jump (tem, const0_rtx, GE, compute_mode, label);
4479 expand_dec (quotient, const1_rtx);
4480 expand_inc (remainder, op1);
4481 emit_label (label);
4482 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4483 }
4484
4485 /* No luck with division elimination or divmod. Have to do it
4486 by conditionally adjusting op0 *and* the result. */
4487 {
4488 rtx label1, label2, label3, label4, label5;
4489 rtx adjusted_op0;
4490 rtx tem;
4491
4492 quotient = gen_reg_rtx (compute_mode);
4493 adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4494 label1 = gen_label_rtx ();
4495 label2 = gen_label_rtx ();
4496 label3 = gen_label_rtx ();
4497 label4 = gen_label_rtx ();
4498 label5 = gen_label_rtx ();
4499 do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4500 do_cmp_and_jump (adjusted_op0, const0_rtx, LT, compute_mode, label1);
4501 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4502 quotient, 0, OPTAB_LIB_WIDEN);
4503 if (tem != quotient)
4504 emit_move_insn (quotient, tem);
4505 emit_jump_insn (gen_jump (label5));
4506 emit_barrier ();
4507 emit_label (label1);
4508 expand_inc (adjusted_op0, const1_rtx);
4509 emit_jump_insn (gen_jump (label4));
4510 emit_barrier ();
4511 emit_label (label2);
4512 do_cmp_and_jump (adjusted_op0, const0_rtx, GT, compute_mode, label3);
4513 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4514 quotient, 0, OPTAB_LIB_WIDEN);
4515 if (tem != quotient)
4516 emit_move_insn (quotient, tem);
4517 emit_jump_insn (gen_jump (label5));
4518 emit_barrier ();
4519 emit_label (label3);
4520 expand_dec (adjusted_op0, const1_rtx);
4521 emit_label (label4);
4522 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4523 quotient, 0, OPTAB_LIB_WIDEN);
4524 if (tem != quotient)
4525 emit_move_insn (quotient, tem);
4526 expand_dec (quotient, const1_rtx);
4527 emit_label (label5);
4528 }
4529 break;
4530
4531 case CEIL_DIV_EXPR:
4532 case CEIL_MOD_EXPR:
4533 if (unsignedp)
4534 {
4535 if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1)))
4536 {
4537 rtx t1, t2, t3;
4538 unsigned HOST_WIDE_INT d = INTVAL (op1);
4539 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4540 floor_log2 (d), tquotient, 1);
4541 t2 = expand_binop (compute_mode, and_optab, op0,
4542 GEN_INT (d - 1),
4543 NULL_RTX, 1, OPTAB_LIB_WIDEN);
4544 t3 = gen_reg_rtx (compute_mode);
4545 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4546 compute_mode, 1, 1);
4547 if (t3 == 0)
4548 {
4549 rtx lab;
4550 lab = gen_label_rtx ();
4551 do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4552 expand_inc (t1, const1_rtx);
4553 emit_label (lab);
4554 quotient = t1;
4555 }
4556 else
4557 quotient = force_operand (gen_rtx_PLUS (compute_mode,
4558 t1, t3),
4559 tquotient);
4560 break;
4561 }
4562
4563 /* Try using an instruction that produces both the quotient and
4564 remainder, using truncation. We can easily compensate the
4565 quotient or remainder to get ceiling rounding, once we have the
4566 remainder. Notice that we compute also the final remainder
4567 value here, and return the result right away. */
4568 if (target == 0 || GET_MODE (target) != compute_mode)
4569 target = gen_reg_rtx (compute_mode);
4570
4571 if (rem_flag)
4572 {
4573 remainder = (REG_P (target)
4574 ? target : gen_reg_rtx (compute_mode));
4575 quotient = gen_reg_rtx (compute_mode);
4576 }
4577 else
4578 {
4579 quotient = (REG_P (target)
4580 ? target : gen_reg_rtx (compute_mode));
4581 remainder = gen_reg_rtx (compute_mode);
4582 }
4583
4584 if (expand_twoval_binop (udivmod_optab, op0, op1, quotient,
4585 remainder, 1))
4586 {
4587 /* This could be computed with a branch-less sequence.
4588 Save that for later. */
4589 rtx label = gen_label_rtx ();
4590 do_cmp_and_jump (remainder, const0_rtx, EQ,
4591 compute_mode, label);
4592 expand_inc (quotient, const1_rtx);
4593 expand_dec (remainder, op1);
4594 emit_label (label);
4595 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4596 }
4597
4598 /* No luck with division elimination or divmod. Have to do it
4599 by conditionally adjusting op0 *and* the result. */
4600 {
4601 rtx label1, label2;
4602 rtx adjusted_op0, tem;
4603
4604 quotient = gen_reg_rtx (compute_mode);
4605 adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4606 label1 = gen_label_rtx ();
4607 label2 = gen_label_rtx ();
4608 do_cmp_and_jump (adjusted_op0, const0_rtx, NE,
4609 compute_mode, label1);
4610 emit_move_insn (quotient, const0_rtx);
4611 emit_jump_insn (gen_jump (label2));
4612 emit_barrier ();
4613 emit_label (label1);
4614 expand_dec (adjusted_op0, const1_rtx);
4615 tem = expand_binop (compute_mode, udiv_optab, adjusted_op0, op1,
4616 quotient, 1, OPTAB_LIB_WIDEN);
4617 if (tem != quotient)
4618 emit_move_insn (quotient, tem);
4619 expand_inc (quotient, const1_rtx);
4620 emit_label (label2);
4621 }
4622 }
4623 else /* signed */
4624 {
4625 if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4626 && INTVAL (op1) >= 0)
4627 {
4628 /* This is extremely similar to the code for the unsigned case
4629 above. For 2.7 we should merge these variants, but for
4630 2.6.1 I don't want to touch the code for unsigned since that
4631 get used in C. The signed case will only be used by other
4632 languages (Ada). */
4633
4634 rtx t1, t2, t3;
4635 unsigned HOST_WIDE_INT d = INTVAL (op1);
4636 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4637 floor_log2 (d), tquotient, 0);
4638 t2 = expand_binop (compute_mode, and_optab, op0,
4639 GEN_INT (d - 1),
4640 NULL_RTX, 1, OPTAB_LIB_WIDEN);
4641 t3 = gen_reg_rtx (compute_mode);
4642 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4643 compute_mode, 1, 1);
4644 if (t3 == 0)
4645 {
4646 rtx lab;
4647 lab = gen_label_rtx ();
4648 do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4649 expand_inc (t1, const1_rtx);
4650 emit_label (lab);
4651 quotient = t1;
4652 }
4653 else
4654 quotient = force_operand (gen_rtx_PLUS (compute_mode,
4655 t1, t3),
4656 tquotient);
4657 break;
4658 }
4659
4660 /* Try using an instruction that produces both the quotient and
4661 remainder, using truncation. We can easily compensate the
4662 quotient or remainder to get ceiling rounding, once we have the
4663 remainder. Notice that we compute also the final remainder
4664 value here, and return the result right away. */
4665 if (target == 0 || GET_MODE (target) != compute_mode)
4666 target = gen_reg_rtx (compute_mode);
4667 if (rem_flag)
4668 {
4669 remainder= (REG_P (target)
4670 ? target : gen_reg_rtx (compute_mode));
4671 quotient = gen_reg_rtx (compute_mode);
4672 }
4673 else
4674 {
4675 quotient = (REG_P (target)
4676 ? target : gen_reg_rtx (compute_mode));
4677 remainder = gen_reg_rtx (compute_mode);
4678 }
4679
4680 if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient,
4681 remainder, 0))
4682 {
4683 /* This could be computed with a branch-less sequence.
4684 Save that for later. */
4685 rtx tem;
4686 rtx label = gen_label_rtx ();
4687 do_cmp_and_jump (remainder, const0_rtx, EQ,
4688 compute_mode, label);
4689 tem = expand_binop (compute_mode, xor_optab, op0, op1,
4690 NULL_RTX, 0, OPTAB_WIDEN);
4691 do_cmp_and_jump (tem, const0_rtx, LT, compute_mode, label);
4692 expand_inc (quotient, const1_rtx);
4693 expand_dec (remainder, op1);
4694 emit_label (label);
4695 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4696 }
4697
4698 /* No luck with division elimination or divmod. Have to do it
4699 by conditionally adjusting op0 *and* the result. */
4700 {
4701 rtx label1, label2, label3, label4, label5;
4702 rtx adjusted_op0;
4703 rtx tem;
4704
4705 quotient = gen_reg_rtx (compute_mode);
4706 adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4707 label1 = gen_label_rtx ();
4708 label2 = gen_label_rtx ();
4709 label3 = gen_label_rtx ();
4710 label4 = gen_label_rtx ();
4711 label5 = gen_label_rtx ();
4712 do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4713 do_cmp_and_jump (adjusted_op0, const0_rtx, GT,
4714 compute_mode, label1);
4715 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4716 quotient, 0, OPTAB_LIB_WIDEN);
4717 if (tem != quotient)
4718 emit_move_insn (quotient, tem);
4719 emit_jump_insn (gen_jump (label5));
4720 emit_barrier ();
4721 emit_label (label1);
4722 expand_dec (adjusted_op0, const1_rtx);
4723 emit_jump_insn (gen_jump (label4));
4724 emit_barrier ();
4725 emit_label (label2);
4726 do_cmp_and_jump (adjusted_op0, const0_rtx, LT,
4727 compute_mode, label3);
4728 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4729 quotient, 0, OPTAB_LIB_WIDEN);
4730 if (tem != quotient)
4731 emit_move_insn (quotient, tem);
4732 emit_jump_insn (gen_jump (label5));
4733 emit_barrier ();
4734 emit_label (label3);
4735 expand_inc (adjusted_op0, const1_rtx);
4736 emit_label (label4);
4737 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4738 quotient, 0, OPTAB_LIB_WIDEN);
4739 if (tem != quotient)
4740 emit_move_insn (quotient, tem);
4741 expand_inc (quotient, const1_rtx);
4742 emit_label (label5);
4743 }
4744 }
4745 break;
4746
4747 case EXACT_DIV_EXPR:
4748 if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4749 {
4750 HOST_WIDE_INT d = INTVAL (op1);
4751 unsigned HOST_WIDE_INT ml;
4752 int pre_shift;
4753 rtx t1;
4754
4755 pre_shift = floor_log2 (d & -d);
4756 ml = invert_mod2n (d >> pre_shift, size);
4757 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4758 pre_shift, NULL_RTX, unsignedp);
4759 quotient = expand_mult (compute_mode, t1,
4760 gen_int_mode (ml, compute_mode),
4761 NULL_RTX, 1);
4762
4763 insn = get_last_insn ();
4764 set_dst_reg_note (insn, REG_EQUAL,
4765 gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
4766 compute_mode, op0, op1),
4767 quotient);
4768 }
4769 break;
4770
4771 case ROUND_DIV_EXPR:
4772 case ROUND_MOD_EXPR:
4773 if (unsignedp)
4774 {
4775 rtx tem;
4776 rtx label;
4777 label = gen_label_rtx ();
4778 quotient = gen_reg_rtx (compute_mode);
4779 remainder = gen_reg_rtx (compute_mode);
4780 if (expand_twoval_binop (udivmod_optab, op0, op1, quotient, remainder, 1) == 0)
4781 {
4782 rtx tem;
4783 quotient = expand_binop (compute_mode, udiv_optab, op0, op1,
4784 quotient, 1, OPTAB_LIB_WIDEN);
4785 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 1);
4786 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4787 remainder, 1, OPTAB_LIB_WIDEN);
4788 }
4789 tem = plus_constant (compute_mode, op1, -1);
4790 tem = expand_shift (RSHIFT_EXPR, compute_mode, tem, 1, NULL_RTX, 1);
4791 do_cmp_and_jump (remainder, tem, LEU, compute_mode, label);
4792 expand_inc (quotient, const1_rtx);
4793 expand_dec (remainder, op1);
4794 emit_label (label);
4795 }
4796 else
4797 {
4798 rtx abs_rem, abs_op1, tem, mask;
4799 rtx label;
4800 label = gen_label_rtx ();
4801 quotient = gen_reg_rtx (compute_mode);
4802 remainder = gen_reg_rtx (compute_mode);
4803 if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient, remainder, 0) == 0)
4804 {
4805 rtx tem;
4806 quotient = expand_binop (compute_mode, sdiv_optab, op0, op1,
4807 quotient, 0, OPTAB_LIB_WIDEN);
4808 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 0);
4809 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4810 remainder, 0, OPTAB_LIB_WIDEN);
4811 }
4812 abs_rem = expand_abs (compute_mode, remainder, NULL_RTX, 1, 0);
4813 abs_op1 = expand_abs (compute_mode, op1, NULL_RTX, 1, 0);
4814 tem = expand_shift (LSHIFT_EXPR, compute_mode, abs_rem,
4815 1, NULL_RTX, 1);
4816 do_cmp_and_jump (tem, abs_op1, LTU, compute_mode, label);
4817 tem = expand_binop (compute_mode, xor_optab, op0, op1,
4818 NULL_RTX, 0, OPTAB_WIDEN);
4819 mask = expand_shift (RSHIFT_EXPR, compute_mode, tem,
4820 size - 1, NULL_RTX, 0);
4821 tem = expand_binop (compute_mode, xor_optab, mask, const1_rtx,
4822 NULL_RTX, 0, OPTAB_WIDEN);
4823 tem = expand_binop (compute_mode, sub_optab, tem, mask,
4824 NULL_RTX, 0, OPTAB_WIDEN);
4825 expand_inc (quotient, tem);
4826 tem = expand_binop (compute_mode, xor_optab, mask, op1,
4827 NULL_RTX, 0, OPTAB_WIDEN);
4828 tem = expand_binop (compute_mode, sub_optab, tem, mask,
4829 NULL_RTX, 0, OPTAB_WIDEN);
4830 expand_dec (remainder, tem);
4831 emit_label (label);
4832 }
4833 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4834
4835 default:
4836 gcc_unreachable ();
4837 }
4838
4839 if (quotient == 0)
4840 {
4841 if (target && GET_MODE (target) != compute_mode)
4842 target = 0;
4843
4844 if (rem_flag)
4845 {
4846 /* Try to produce the remainder without producing the quotient.
4847 If we seem to have a divmod pattern that does not require widening,
4848 don't try widening here. We should really have a WIDEN argument
4849 to expand_twoval_binop, since what we'd really like to do here is
4850 1) try a mod insn in compute_mode
4851 2) try a divmod insn in compute_mode
4852 3) try a div insn in compute_mode and multiply-subtract to get
4853 remainder
4854 4) try the same things with widening allowed. */
4855 remainder
4856 = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4857 op0, op1, target,
4858 unsignedp,
4859 ((optab_handler (optab2, compute_mode)
4860 != CODE_FOR_nothing)
4861 ? OPTAB_DIRECT : OPTAB_WIDEN));
4862 if (remainder == 0)
4863 {
4864 /* No luck there. Can we do remainder and divide at once
4865 without a library call? */
4866 remainder = gen_reg_rtx (compute_mode);
4867 if (! expand_twoval_binop ((unsignedp
4868 ? udivmod_optab
4869 : sdivmod_optab),
4870 op0, op1,
4871 NULL_RTX, remainder, unsignedp))
4872 remainder = 0;
4873 }
4874
4875 if (remainder)
4876 return gen_lowpart (mode, remainder);
4877 }
4878
4879 /* Produce the quotient. Try a quotient insn, but not a library call.
4880 If we have a divmod in this mode, use it in preference to widening
4881 the div (for this test we assume it will not fail). Note that optab2
4882 is set to the one of the two optabs that the call below will use. */
4883 quotient
4884 = sign_expand_binop (compute_mode, udiv_optab, sdiv_optab,
4885 op0, op1, rem_flag ? NULL_RTX : target,
4886 unsignedp,
4887 ((optab_handler (optab2, compute_mode)
4888 != CODE_FOR_nothing)
4889 ? OPTAB_DIRECT : OPTAB_WIDEN));
4890
4891 if (quotient == 0)
4892 {
4893 /* No luck there. Try a quotient-and-remainder insn,
4894 keeping the quotient alone. */
4895 quotient = gen_reg_rtx (compute_mode);
4896 if (! expand_twoval_binop (unsignedp ? udivmod_optab : sdivmod_optab,
4897 op0, op1,
4898 quotient, NULL_RTX, unsignedp))
4899 {
4900 quotient = 0;
4901 if (! rem_flag)
4902 /* Still no luck. If we are not computing the remainder,
4903 use a library call for the quotient. */
4904 quotient = sign_expand_binop (compute_mode,
4905 udiv_optab, sdiv_optab,
4906 op0, op1, target,
4907 unsignedp, OPTAB_LIB_WIDEN);
4908 }
4909 }
4910 }
4911
4912 if (rem_flag)
4913 {
4914 if (target && GET_MODE (target) != compute_mode)
4915 target = 0;
4916
4917 if (quotient == 0)
4918 {
4919 /* No divide instruction either. Use library for remainder. */
4920 remainder = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4921 op0, op1, target,
4922 unsignedp, OPTAB_LIB_WIDEN);
4923 /* No remainder function. Try a quotient-and-remainder
4924 function, keeping the remainder. */
4925 if (!remainder)
4926 {
4927 remainder = gen_reg_rtx (compute_mode);
4928 if (!expand_twoval_binop_libfunc
4929 (unsignedp ? udivmod_optab : sdivmod_optab,
4930 op0, op1,
4931 NULL_RTX, remainder,
4932 unsignedp ? UMOD : MOD))
4933 remainder = NULL_RTX;
4934 }
4935 }
4936 else
4937 {
4938 /* We divided. Now finish doing X - Y * (X / Y). */
4939 remainder = expand_mult (compute_mode, quotient, op1,
4940 NULL_RTX, unsignedp);
4941 remainder = expand_binop (compute_mode, sub_optab, op0,
4942 remainder, target, unsignedp,
4943 OPTAB_LIB_WIDEN);
4944 }
4945 }
4946
4947 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4948 }
4949 \f
4950 /* Return a tree node with data type TYPE, describing the value of X.
4951 Usually this is an VAR_DECL, if there is no obvious better choice.
4952 X may be an expression, however we only support those expressions
4953 generated by loop.c. */
4954
4955 tree
4956 make_tree (tree type, rtx x)
4957 {
4958 tree t;
4959
4960 switch (GET_CODE (x))
4961 {
4962 case CONST_INT:
4963 {
4964 HOST_WIDE_INT hi = 0;
4965
4966 if (INTVAL (x) < 0
4967 && !(TYPE_UNSIGNED (type)
4968 && (GET_MODE_BITSIZE (TYPE_MODE (type))
4969 < HOST_BITS_PER_WIDE_INT)))
4970 hi = -1;
4971
4972 t = build_int_cst_wide (type, INTVAL (x), hi);
4973
4974 return t;
4975 }
4976
4977 case CONST_DOUBLE:
4978 if (GET_MODE (x) == VOIDmode)
4979 t = build_int_cst_wide (type,
4980 CONST_DOUBLE_LOW (x), CONST_DOUBLE_HIGH (x));
4981 else
4982 {
4983 REAL_VALUE_TYPE d;
4984
4985 REAL_VALUE_FROM_CONST_DOUBLE (d, x);
4986 t = build_real (type, d);
4987 }
4988
4989 return t;
4990
4991 case CONST_VECTOR:
4992 {
4993 int units = CONST_VECTOR_NUNITS (x);
4994 tree itype = TREE_TYPE (type);
4995 tree *elts;
4996 int i;
4997
4998 /* Build a tree with vector elements. */
4999 elts = XALLOCAVEC (tree, units);
5000 for (i = units - 1; i >= 0; --i)
5001 {
5002 rtx elt = CONST_VECTOR_ELT (x, i);
5003 elts[i] = make_tree (itype, elt);
5004 }
5005
5006 return build_vector (type, elts);
5007 }
5008
5009 case PLUS:
5010 return fold_build2 (PLUS_EXPR, type, make_tree (type, XEXP (x, 0)),
5011 make_tree (type, XEXP (x, 1)));
5012
5013 case MINUS:
5014 return fold_build2 (MINUS_EXPR, type, make_tree (type, XEXP (x, 0)),
5015 make_tree (type, XEXP (x, 1)));
5016
5017 case NEG:
5018 return fold_build1 (NEGATE_EXPR, type, make_tree (type, XEXP (x, 0)));
5019
5020 case MULT:
5021 return fold_build2 (MULT_EXPR, type, make_tree (type, XEXP (x, 0)),
5022 make_tree (type, XEXP (x, 1)));
5023
5024 case ASHIFT:
5025 return fold_build2 (LSHIFT_EXPR, type, make_tree (type, XEXP (x, 0)),
5026 make_tree (type, XEXP (x, 1)));
5027
5028 case LSHIFTRT:
5029 t = unsigned_type_for (type);
5030 return fold_convert (type, build2 (RSHIFT_EXPR, t,
5031 make_tree (t, XEXP (x, 0)),
5032 make_tree (type, XEXP (x, 1))));
5033
5034 case ASHIFTRT:
5035 t = signed_type_for (type);
5036 return fold_convert (type, build2 (RSHIFT_EXPR, t,
5037 make_tree (t, XEXP (x, 0)),
5038 make_tree (type, XEXP (x, 1))));
5039
5040 case DIV:
5041 if (TREE_CODE (type) != REAL_TYPE)
5042 t = signed_type_for (type);
5043 else
5044 t = type;
5045
5046 return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5047 make_tree (t, XEXP (x, 0)),
5048 make_tree (t, XEXP (x, 1))));
5049 case UDIV:
5050 t = unsigned_type_for (type);
5051 return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5052 make_tree (t, XEXP (x, 0)),
5053 make_tree (t, XEXP (x, 1))));
5054
5055 case SIGN_EXTEND:
5056 case ZERO_EXTEND:
5057 t = lang_hooks.types.type_for_mode (GET_MODE (XEXP (x, 0)),
5058 GET_CODE (x) == ZERO_EXTEND);
5059 return fold_convert (type, make_tree (t, XEXP (x, 0)));
5060
5061 case CONST:
5062 return make_tree (type, XEXP (x, 0));
5063
5064 case SYMBOL_REF:
5065 t = SYMBOL_REF_DECL (x);
5066 if (t)
5067 return fold_convert (type, build_fold_addr_expr (t));
5068 /* else fall through. */
5069
5070 default:
5071 t = build_decl (RTL_LOCATION (x), VAR_DECL, NULL_TREE, type);
5072
5073 /* If TYPE is a POINTER_TYPE, we might need to convert X from
5074 address mode to pointer mode. */
5075 if (POINTER_TYPE_P (type))
5076 x = convert_memory_address_addr_space
5077 (TYPE_MODE (type), x, TYPE_ADDR_SPACE (TREE_TYPE (type)));
5078
5079 /* Note that we do *not* use SET_DECL_RTL here, because we do not
5080 want set_decl_rtl to go adjusting REG_ATTRS for this temporary. */
5081 t->decl_with_rtl.rtl = x;
5082
5083 return t;
5084 }
5085 }
5086 \f
5087 /* Compute the logical-and of OP0 and OP1, storing it in TARGET
5088 and returning TARGET.
5089
5090 If TARGET is 0, a pseudo-register or constant is returned. */
5091
5092 rtx
5093 expand_and (enum machine_mode mode, rtx op0, rtx op1, rtx target)
5094 {
5095 rtx tem = 0;
5096
5097 if (GET_MODE (op0) == VOIDmode && GET_MODE (op1) == VOIDmode)
5098 tem = simplify_binary_operation (AND, mode, op0, op1);
5099 if (tem == 0)
5100 tem = expand_binop (mode, and_optab, op0, op1, target, 0, OPTAB_LIB_WIDEN);
5101
5102 if (target == 0)
5103 target = tem;
5104 else if (tem != target)
5105 emit_move_insn (target, tem);
5106 return target;
5107 }
5108
5109 /* Helper function for emit_store_flag. */
5110 static rtx
5111 emit_cstore (rtx target, enum insn_code icode, enum rtx_code code,
5112 enum machine_mode mode, enum machine_mode compare_mode,
5113 int unsignedp, rtx x, rtx y, int normalizep,
5114 enum machine_mode target_mode)
5115 {
5116 struct expand_operand ops[4];
5117 rtx op0, last, comparison, subtarget;
5118 enum machine_mode result_mode = insn_data[(int) icode].operand[0].mode;
5119
5120 last = get_last_insn ();
5121 x = prepare_operand (icode, x, 2, mode, compare_mode, unsignedp);
5122 y = prepare_operand (icode, y, 3, mode, compare_mode, unsignedp);
5123 if (!x || !y)
5124 {
5125 delete_insns_since (last);
5126 return NULL_RTX;
5127 }
5128
5129 if (target_mode == VOIDmode)
5130 target_mode = result_mode;
5131 if (!target)
5132 target = gen_reg_rtx (target_mode);
5133
5134 comparison = gen_rtx_fmt_ee (code, result_mode, x, y);
5135
5136 create_output_operand (&ops[0], optimize ? NULL_RTX : target, result_mode);
5137 create_fixed_operand (&ops[1], comparison);
5138 create_fixed_operand (&ops[2], x);
5139 create_fixed_operand (&ops[3], y);
5140 if (!maybe_expand_insn (icode, 4, ops))
5141 {
5142 delete_insns_since (last);
5143 return NULL_RTX;
5144 }
5145 subtarget = ops[0].value;
5146
5147 /* If we are converting to a wider mode, first convert to
5148 TARGET_MODE, then normalize. This produces better combining
5149 opportunities on machines that have a SIGN_EXTRACT when we are
5150 testing a single bit. This mostly benefits the 68k.
5151
5152 If STORE_FLAG_VALUE does not have the sign bit set when
5153 interpreted in MODE, we can do this conversion as unsigned, which
5154 is usually more efficient. */
5155 if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (result_mode))
5156 {
5157 convert_move (target, subtarget,
5158 val_signbit_known_clear_p (result_mode,
5159 STORE_FLAG_VALUE));
5160 op0 = target;
5161 result_mode = target_mode;
5162 }
5163 else
5164 op0 = subtarget;
5165
5166 /* If we want to keep subexpressions around, don't reuse our last
5167 target. */
5168 if (optimize)
5169 subtarget = 0;
5170
5171 /* Now normalize to the proper value in MODE. Sometimes we don't
5172 have to do anything. */
5173 if (normalizep == 0 || normalizep == STORE_FLAG_VALUE)
5174 ;
5175 /* STORE_FLAG_VALUE might be the most negative number, so write
5176 the comparison this way to avoid a compiler-time warning. */
5177 else if (- normalizep == STORE_FLAG_VALUE)
5178 op0 = expand_unop (result_mode, neg_optab, op0, subtarget, 0);
5179
5180 /* We don't want to use STORE_FLAG_VALUE < 0 below since this makes
5181 it hard to use a value of just the sign bit due to ANSI integer
5182 constant typing rules. */
5183 else if (val_signbit_known_set_p (result_mode, STORE_FLAG_VALUE))
5184 op0 = expand_shift (RSHIFT_EXPR, result_mode, op0,
5185 GET_MODE_BITSIZE (result_mode) - 1, subtarget,
5186 normalizep == 1);
5187 else
5188 {
5189 gcc_assert (STORE_FLAG_VALUE & 1);
5190
5191 op0 = expand_and (result_mode, op0, const1_rtx, subtarget);
5192 if (normalizep == -1)
5193 op0 = expand_unop (result_mode, neg_optab, op0, op0, 0);
5194 }
5195
5196 /* If we were converting to a smaller mode, do the conversion now. */
5197 if (target_mode != result_mode)
5198 {
5199 convert_move (target, op0, 0);
5200 return target;
5201 }
5202 else
5203 return op0;
5204 }
5205
5206
5207 /* A subroutine of emit_store_flag only including "tricks" that do not
5208 need a recursive call. These are kept separate to avoid infinite
5209 loops. */
5210
5211 static rtx
5212 emit_store_flag_1 (rtx target, enum rtx_code code, rtx op0, rtx op1,
5213 enum machine_mode mode, int unsignedp, int normalizep,
5214 enum machine_mode target_mode)
5215 {
5216 rtx subtarget;
5217 enum insn_code icode;
5218 enum machine_mode compare_mode;
5219 enum mode_class mclass;
5220 enum rtx_code scode;
5221 rtx tem;
5222
5223 if (unsignedp)
5224 code = unsigned_condition (code);
5225 scode = swap_condition (code);
5226
5227 /* If one operand is constant, make it the second one. Only do this
5228 if the other operand is not constant as well. */
5229
5230 if (swap_commutative_operands_p (op0, op1))
5231 {
5232 tem = op0;
5233 op0 = op1;
5234 op1 = tem;
5235 code = swap_condition (code);
5236 }
5237
5238 if (mode == VOIDmode)
5239 mode = GET_MODE (op0);
5240
5241 /* For some comparisons with 1 and -1, we can convert this to
5242 comparisons with zero. This will often produce more opportunities for
5243 store-flag insns. */
5244
5245 switch (code)
5246 {
5247 case LT:
5248 if (op1 == const1_rtx)
5249 op1 = const0_rtx, code = LE;
5250 break;
5251 case LE:
5252 if (op1 == constm1_rtx)
5253 op1 = const0_rtx, code = LT;
5254 break;
5255 case GE:
5256 if (op1 == const1_rtx)
5257 op1 = const0_rtx, code = GT;
5258 break;
5259 case GT:
5260 if (op1 == constm1_rtx)
5261 op1 = const0_rtx, code = GE;
5262 break;
5263 case GEU:
5264 if (op1 == const1_rtx)
5265 op1 = const0_rtx, code = NE;
5266 break;
5267 case LTU:
5268 if (op1 == const1_rtx)
5269 op1 = const0_rtx, code = EQ;
5270 break;
5271 default:
5272 break;
5273 }
5274
5275 /* If we are comparing a double-word integer with zero or -1, we can
5276 convert the comparison into one involving a single word. */
5277 if (GET_MODE_BITSIZE (mode) == BITS_PER_WORD * 2
5278 && GET_MODE_CLASS (mode) == MODE_INT
5279 && (!MEM_P (op0) || ! MEM_VOLATILE_P (op0)))
5280 {
5281 if ((code == EQ || code == NE)
5282 && (op1 == const0_rtx || op1 == constm1_rtx))
5283 {
5284 rtx op00, op01;
5285
5286 /* Do a logical OR or AND of the two words and compare the
5287 result. */
5288 op00 = simplify_gen_subreg (word_mode, op0, mode, 0);
5289 op01 = simplify_gen_subreg (word_mode, op0, mode, UNITS_PER_WORD);
5290 tem = expand_binop (word_mode,
5291 op1 == const0_rtx ? ior_optab : and_optab,
5292 op00, op01, NULL_RTX, unsignedp,
5293 OPTAB_DIRECT);
5294
5295 if (tem != 0)
5296 tem = emit_store_flag (NULL_RTX, code, tem, op1, word_mode,
5297 unsignedp, normalizep);
5298 }
5299 else if ((code == LT || code == GE) && op1 == const0_rtx)
5300 {
5301 rtx op0h;
5302
5303 /* If testing the sign bit, can just test on high word. */
5304 op0h = simplify_gen_subreg (word_mode, op0, mode,
5305 subreg_highpart_offset (word_mode,
5306 mode));
5307 tem = emit_store_flag (NULL_RTX, code, op0h, op1, word_mode,
5308 unsignedp, normalizep);
5309 }
5310 else
5311 tem = NULL_RTX;
5312
5313 if (tem)
5314 {
5315 if (target_mode == VOIDmode || GET_MODE (tem) == target_mode)
5316 return tem;
5317 if (!target)
5318 target = gen_reg_rtx (target_mode);
5319
5320 convert_move (target, tem,
5321 !val_signbit_known_set_p (word_mode,
5322 (normalizep ? normalizep
5323 : STORE_FLAG_VALUE)));
5324 return target;
5325 }
5326 }
5327
5328 /* If this is A < 0 or A >= 0, we can do this by taking the ones
5329 complement of A (for GE) and shifting the sign bit to the low bit. */
5330 if (op1 == const0_rtx && (code == LT || code == GE)
5331 && GET_MODE_CLASS (mode) == MODE_INT
5332 && (normalizep || STORE_FLAG_VALUE == 1
5333 || val_signbit_p (mode, STORE_FLAG_VALUE)))
5334 {
5335 subtarget = target;
5336
5337 if (!target)
5338 target_mode = mode;
5339
5340 /* If the result is to be wider than OP0, it is best to convert it
5341 first. If it is to be narrower, it is *incorrect* to convert it
5342 first. */
5343 else if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (mode))
5344 {
5345 op0 = convert_modes (target_mode, mode, op0, 0);
5346 mode = target_mode;
5347 }
5348
5349 if (target_mode != mode)
5350 subtarget = 0;
5351
5352 if (code == GE)
5353 op0 = expand_unop (mode, one_cmpl_optab, op0,
5354 ((STORE_FLAG_VALUE == 1 || normalizep)
5355 ? 0 : subtarget), 0);
5356
5357 if (STORE_FLAG_VALUE == 1 || normalizep)
5358 /* If we are supposed to produce a 0/1 value, we want to do
5359 a logical shift from the sign bit to the low-order bit; for
5360 a -1/0 value, we do an arithmetic shift. */
5361 op0 = expand_shift (RSHIFT_EXPR, mode, op0,
5362 GET_MODE_BITSIZE (mode) - 1,
5363 subtarget, normalizep != -1);
5364
5365 if (mode != target_mode)
5366 op0 = convert_modes (target_mode, mode, op0, 0);
5367
5368 return op0;
5369 }
5370
5371 mclass = GET_MODE_CLASS (mode);
5372 for (compare_mode = mode; compare_mode != VOIDmode;
5373 compare_mode = GET_MODE_WIDER_MODE (compare_mode))
5374 {
5375 enum machine_mode optab_mode = mclass == MODE_CC ? CCmode : compare_mode;
5376 icode = optab_handler (cstore_optab, optab_mode);
5377 if (icode != CODE_FOR_nothing)
5378 {
5379 do_pending_stack_adjust ();
5380 tem = emit_cstore (target, icode, code, mode, compare_mode,
5381 unsignedp, op0, op1, normalizep, target_mode);
5382 if (tem)
5383 return tem;
5384
5385 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5386 {
5387 tem = emit_cstore (target, icode, scode, mode, compare_mode,
5388 unsignedp, op1, op0, normalizep, target_mode);
5389 if (tem)
5390 return tem;
5391 }
5392 break;
5393 }
5394 }
5395
5396 return 0;
5397 }
5398
5399 /* Emit a store-flags instruction for comparison CODE on OP0 and OP1
5400 and storing in TARGET. Normally return TARGET.
5401 Return 0 if that cannot be done.
5402
5403 MODE is the mode to use for OP0 and OP1 should they be CONST_INTs. If
5404 it is VOIDmode, they cannot both be CONST_INT.
5405
5406 UNSIGNEDP is for the case where we have to widen the operands
5407 to perform the operation. It says to use zero-extension.
5408
5409 NORMALIZEP is 1 if we should convert the result to be either zero
5410 or one. Normalize is -1 if we should convert the result to be
5411 either zero or -1. If NORMALIZEP is zero, the result will be left
5412 "raw" out of the scc insn. */
5413
5414 rtx
5415 emit_store_flag (rtx target, enum rtx_code code, rtx op0, rtx op1,
5416 enum machine_mode mode, int unsignedp, int normalizep)
5417 {
5418 enum machine_mode target_mode = target ? GET_MODE (target) : VOIDmode;
5419 enum rtx_code rcode;
5420 rtx subtarget;
5421 rtx tem, last, trueval;
5422
5423 tem = emit_store_flag_1 (target, code, op0, op1, mode, unsignedp, normalizep,
5424 target_mode);
5425 if (tem)
5426 return tem;
5427
5428 /* If we reached here, we can't do this with a scc insn, however there
5429 are some comparisons that can be done in other ways. Don't do any
5430 of these cases if branches are very cheap. */
5431 if (BRANCH_COST (optimize_insn_for_speed_p (), false) == 0)
5432 return 0;
5433
5434 /* See what we need to return. We can only return a 1, -1, or the
5435 sign bit. */
5436
5437 if (normalizep == 0)
5438 {
5439 if (STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1)
5440 normalizep = STORE_FLAG_VALUE;
5441
5442 else if (val_signbit_p (mode, STORE_FLAG_VALUE))
5443 ;
5444 else
5445 return 0;
5446 }
5447
5448 last = get_last_insn ();
5449
5450 /* If optimizing, use different pseudo registers for each insn, instead
5451 of reusing the same pseudo. This leads to better CSE, but slows
5452 down the compiler, since there are more pseudos */
5453 subtarget = (!optimize
5454 && (target_mode == mode)) ? target : NULL_RTX;
5455 trueval = GEN_INT (normalizep ? normalizep : STORE_FLAG_VALUE);
5456
5457 /* For floating-point comparisons, try the reverse comparison or try
5458 changing the "orderedness" of the comparison. */
5459 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5460 {
5461 enum rtx_code first_code;
5462 bool and_them;
5463
5464 rcode = reverse_condition_maybe_unordered (code);
5465 if (can_compare_p (rcode, mode, ccp_store_flag)
5466 && (code == ORDERED || code == UNORDERED
5467 || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
5468 || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
5469 {
5470 int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5471 || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5472
5473 /* For the reverse comparison, use either an addition or a XOR. */
5474 if (want_add
5475 && rtx_cost (GEN_INT (normalizep), PLUS, 1,
5476 optimize_insn_for_speed_p ()) == 0)
5477 {
5478 tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5479 STORE_FLAG_VALUE, target_mode);
5480 if (tem)
5481 return expand_binop (target_mode, add_optab, tem,
5482 GEN_INT (normalizep),
5483 target, 0, OPTAB_WIDEN);
5484 }
5485 else if (!want_add
5486 && rtx_cost (trueval, XOR, 1,
5487 optimize_insn_for_speed_p ()) == 0)
5488 {
5489 tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5490 normalizep, target_mode);
5491 if (tem)
5492 return expand_binop (target_mode, xor_optab, tem, trueval,
5493 target, INTVAL (trueval) >= 0, OPTAB_WIDEN);
5494 }
5495 }
5496
5497 delete_insns_since (last);
5498
5499 /* Cannot split ORDERED and UNORDERED, only try the above trick. */
5500 if (code == ORDERED || code == UNORDERED)
5501 return 0;
5502
5503 and_them = split_comparison (code, mode, &first_code, &code);
5504
5505 /* If there are no NaNs, the first comparison should always fall through.
5506 Effectively change the comparison to the other one. */
5507 if (!HONOR_NANS (mode))
5508 {
5509 gcc_assert (first_code == (and_them ? ORDERED : UNORDERED));
5510 return emit_store_flag_1 (target, code, op0, op1, mode, 0, normalizep,
5511 target_mode);
5512 }
5513
5514 #ifdef HAVE_conditional_move
5515 /* Try using a setcc instruction for ORDERED/UNORDERED, followed by a
5516 conditional move. */
5517 tem = emit_store_flag_1 (subtarget, first_code, op0, op1, mode, 0,
5518 normalizep, target_mode);
5519 if (tem == 0)
5520 return 0;
5521
5522 if (and_them)
5523 tem = emit_conditional_move (target, code, op0, op1, mode,
5524 tem, const0_rtx, GET_MODE (tem), 0);
5525 else
5526 tem = emit_conditional_move (target, code, op0, op1, mode,
5527 trueval, tem, GET_MODE (tem), 0);
5528
5529 if (tem == 0)
5530 delete_insns_since (last);
5531 return tem;
5532 #else
5533 return 0;
5534 #endif
5535 }
5536
5537 /* The remaining tricks only apply to integer comparisons. */
5538
5539 if (GET_MODE_CLASS (mode) != MODE_INT)
5540 return 0;
5541
5542 /* If this is an equality comparison of integers, we can try to exclusive-or
5543 (or subtract) the two operands and use a recursive call to try the
5544 comparison with zero. Don't do any of these cases if branches are
5545 very cheap. */
5546
5547 if ((code == EQ || code == NE) && op1 != const0_rtx)
5548 {
5549 tem = expand_binop (mode, xor_optab, op0, op1, subtarget, 1,
5550 OPTAB_WIDEN);
5551
5552 if (tem == 0)
5553 tem = expand_binop (mode, sub_optab, op0, op1, subtarget, 1,
5554 OPTAB_WIDEN);
5555 if (tem != 0)
5556 tem = emit_store_flag (target, code, tem, const0_rtx,
5557 mode, unsignedp, normalizep);
5558 if (tem != 0)
5559 return tem;
5560
5561 delete_insns_since (last);
5562 }
5563
5564 /* For integer comparisons, try the reverse comparison. However, for
5565 small X and if we'd have anyway to extend, implementing "X != 0"
5566 as "-(int)X >> 31" is still cheaper than inverting "(int)X == 0". */
5567 rcode = reverse_condition (code);
5568 if (can_compare_p (rcode, mode, ccp_store_flag)
5569 && ! (optab_handler (cstore_optab, mode) == CODE_FOR_nothing
5570 && code == NE
5571 && GET_MODE_SIZE (mode) < UNITS_PER_WORD
5572 && op1 == const0_rtx))
5573 {
5574 int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5575 || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5576
5577 /* Again, for the reverse comparison, use either an addition or a XOR. */
5578 if (want_add
5579 && rtx_cost (GEN_INT (normalizep), PLUS, 1,
5580 optimize_insn_for_speed_p ()) == 0)
5581 {
5582 tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5583 STORE_FLAG_VALUE, target_mode);
5584 if (tem != 0)
5585 tem = expand_binop (target_mode, add_optab, tem,
5586 GEN_INT (normalizep), target, 0, OPTAB_WIDEN);
5587 }
5588 else if (!want_add
5589 && rtx_cost (trueval, XOR, 1,
5590 optimize_insn_for_speed_p ()) == 0)
5591 {
5592 tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5593 normalizep, target_mode);
5594 if (tem != 0)
5595 tem = expand_binop (target_mode, xor_optab, tem, trueval, target,
5596 INTVAL (trueval) >= 0, OPTAB_WIDEN);
5597 }
5598
5599 if (tem != 0)
5600 return tem;
5601 delete_insns_since (last);
5602 }
5603
5604 /* Some other cases we can do are EQ, NE, LE, and GT comparisons with
5605 the constant zero. Reject all other comparisons at this point. Only
5606 do LE and GT if branches are expensive since they are expensive on
5607 2-operand machines. */
5608
5609 if (op1 != const0_rtx
5610 || (code != EQ && code != NE
5611 && (BRANCH_COST (optimize_insn_for_speed_p (),
5612 false) <= 1 || (code != LE && code != GT))))
5613 return 0;
5614
5615 /* Try to put the result of the comparison in the sign bit. Assume we can't
5616 do the necessary operation below. */
5617
5618 tem = 0;
5619
5620 /* To see if A <= 0, compute (A | (A - 1)). A <= 0 iff that result has
5621 the sign bit set. */
5622
5623 if (code == LE)
5624 {
5625 /* This is destructive, so SUBTARGET can't be OP0. */
5626 if (rtx_equal_p (subtarget, op0))
5627 subtarget = 0;
5628
5629 tem = expand_binop (mode, sub_optab, op0, const1_rtx, subtarget, 0,
5630 OPTAB_WIDEN);
5631 if (tem)
5632 tem = expand_binop (mode, ior_optab, op0, tem, subtarget, 0,
5633 OPTAB_WIDEN);
5634 }
5635
5636 /* To see if A > 0, compute (((signed) A) << BITS) - A, where BITS is the
5637 number of bits in the mode of OP0, minus one. */
5638
5639 if (code == GT)
5640 {
5641 if (rtx_equal_p (subtarget, op0))
5642 subtarget = 0;
5643
5644 tem = expand_shift (RSHIFT_EXPR, mode, op0,
5645 GET_MODE_BITSIZE (mode) - 1,
5646 subtarget, 0);
5647 tem = expand_binop (mode, sub_optab, tem, op0, subtarget, 0,
5648 OPTAB_WIDEN);
5649 }
5650
5651 if (code == EQ || code == NE)
5652 {
5653 /* For EQ or NE, one way to do the comparison is to apply an operation
5654 that converts the operand into a positive number if it is nonzero
5655 or zero if it was originally zero. Then, for EQ, we subtract 1 and
5656 for NE we negate. This puts the result in the sign bit. Then we
5657 normalize with a shift, if needed.
5658
5659 Two operations that can do the above actions are ABS and FFS, so try
5660 them. If that doesn't work, and MODE is smaller than a full word,
5661 we can use zero-extension to the wider mode (an unsigned conversion)
5662 as the operation. */
5663
5664 /* Note that ABS doesn't yield a positive number for INT_MIN, but
5665 that is compensated by the subsequent overflow when subtracting
5666 one / negating. */
5667
5668 if (optab_handler (abs_optab, mode) != CODE_FOR_nothing)
5669 tem = expand_unop (mode, abs_optab, op0, subtarget, 1);
5670 else if (optab_handler (ffs_optab, mode) != CODE_FOR_nothing)
5671 tem = expand_unop (mode, ffs_optab, op0, subtarget, 1);
5672 else if (GET_MODE_SIZE (mode) < UNITS_PER_WORD)
5673 {
5674 tem = convert_modes (word_mode, mode, op0, 1);
5675 mode = word_mode;
5676 }
5677
5678 if (tem != 0)
5679 {
5680 if (code == EQ)
5681 tem = expand_binop (mode, sub_optab, tem, const1_rtx, subtarget,
5682 0, OPTAB_WIDEN);
5683 else
5684 tem = expand_unop (mode, neg_optab, tem, subtarget, 0);
5685 }
5686
5687 /* If we couldn't do it that way, for NE we can "or" the two's complement
5688 of the value with itself. For EQ, we take the one's complement of
5689 that "or", which is an extra insn, so we only handle EQ if branches
5690 are expensive. */
5691
5692 if (tem == 0
5693 && (code == NE
5694 || BRANCH_COST (optimize_insn_for_speed_p (),
5695 false) > 1))
5696 {
5697 if (rtx_equal_p (subtarget, op0))
5698 subtarget = 0;
5699
5700 tem = expand_unop (mode, neg_optab, op0, subtarget, 0);
5701 tem = expand_binop (mode, ior_optab, tem, op0, subtarget, 0,
5702 OPTAB_WIDEN);
5703
5704 if (tem && code == EQ)
5705 tem = expand_unop (mode, one_cmpl_optab, tem, subtarget, 0);
5706 }
5707 }
5708
5709 if (tem && normalizep)
5710 tem = expand_shift (RSHIFT_EXPR, mode, tem,
5711 GET_MODE_BITSIZE (mode) - 1,
5712 subtarget, normalizep == 1);
5713
5714 if (tem)
5715 {
5716 if (!target)
5717 ;
5718 else if (GET_MODE (tem) != target_mode)
5719 {
5720 convert_move (target, tem, 0);
5721 tem = target;
5722 }
5723 else if (!subtarget)
5724 {
5725 emit_move_insn (target, tem);
5726 tem = target;
5727 }
5728 }
5729 else
5730 delete_insns_since (last);
5731
5732 return tem;
5733 }
5734
5735 /* Like emit_store_flag, but always succeeds. */
5736
5737 rtx
5738 emit_store_flag_force (rtx target, enum rtx_code code, rtx op0, rtx op1,
5739 enum machine_mode mode, int unsignedp, int normalizep)
5740 {
5741 rtx tem, label;
5742 rtx trueval, falseval;
5743
5744 /* First see if emit_store_flag can do the job. */
5745 tem = emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep);
5746 if (tem != 0)
5747 return tem;
5748
5749 if (!target)
5750 target = gen_reg_rtx (word_mode);
5751
5752 /* If this failed, we have to do this with set/compare/jump/set code.
5753 For foo != 0, if foo is in OP0, just replace it with 1 if nonzero. */
5754 trueval = normalizep ? GEN_INT (normalizep) : const1_rtx;
5755 if (code == NE
5756 && GET_MODE_CLASS (mode) == MODE_INT
5757 && REG_P (target)
5758 && op0 == target
5759 && op1 == const0_rtx)
5760 {
5761 label = gen_label_rtx ();
5762 do_compare_rtx_and_jump (target, const0_rtx, EQ, unsignedp,
5763 mode, NULL_RTX, NULL_RTX, label, -1);
5764 emit_move_insn (target, trueval);
5765 emit_label (label);
5766 return target;
5767 }
5768
5769 if (!REG_P (target)
5770 || reg_mentioned_p (target, op0) || reg_mentioned_p (target, op1))
5771 target = gen_reg_rtx (GET_MODE (target));
5772
5773 /* Jump in the right direction if the target cannot implement CODE
5774 but can jump on its reverse condition. */
5775 falseval = const0_rtx;
5776 if (! can_compare_p (code, mode, ccp_jump)
5777 && (! FLOAT_MODE_P (mode)
5778 || code == ORDERED || code == UNORDERED
5779 || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
5780 || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
5781 {
5782 enum rtx_code rcode;
5783 if (FLOAT_MODE_P (mode))
5784 rcode = reverse_condition_maybe_unordered (code);
5785 else
5786 rcode = reverse_condition (code);
5787
5788 /* Canonicalize to UNORDERED for the libcall. */
5789 if (can_compare_p (rcode, mode, ccp_jump)
5790 || (code == ORDERED && ! can_compare_p (ORDERED, mode, ccp_jump)))
5791 {
5792 falseval = trueval;
5793 trueval = const0_rtx;
5794 code = rcode;
5795 }
5796 }
5797
5798 emit_move_insn (target, trueval);
5799 label = gen_label_rtx ();
5800 do_compare_rtx_and_jump (op0, op1, code, unsignedp, mode, NULL_RTX,
5801 NULL_RTX, label, -1);
5802
5803 emit_move_insn (target, falseval);
5804 emit_label (label);
5805
5806 return target;
5807 }
5808 \f
5809 /* Perform possibly multi-word comparison and conditional jump to LABEL
5810 if ARG1 OP ARG2 true where ARG1 and ARG2 are of mode MODE. This is
5811 now a thin wrapper around do_compare_rtx_and_jump. */
5812
5813 static void
5814 do_cmp_and_jump (rtx arg1, rtx arg2, enum rtx_code op, enum machine_mode mode,
5815 rtx label)
5816 {
5817 int unsignedp = (op == LTU || op == LEU || op == GTU || op == GEU);
5818 do_compare_rtx_and_jump (arg1, arg2, op, unsignedp, mode,
5819 NULL_RTX, NULL_RTX, label, -1);
5820 }