gcc/expmed.c

   1 /* Medium-level subroutines: convert bit-field store and extract
   2    and shifts, multiplies and divides to rtl instructions.
   3    Copyright (C) 1987-2020 Free Software Foundation, Inc.
   4
   5 This file is part of GCC.
   6
   7 GCC is free software; you can redistribute it and/or modify it under
   8 the terms of the GNU General Public License as published by the Free
   9 Software Foundation; either version 3, or (at your option) any later
  10 version.
  11
  12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  15 for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GCC; see the file COPYING3.  If not see
  19 <http://www.gnu.org/licenses/>.  */
  20
  21 /* Work around tree-optimization/91825.  */
  22 #pragma GCC diagnostic warning "-Wmaybe-uninitialized"
  23
  24 #include "config.h"
  25 #include "system.h"
  26 #include "coretypes.h"
  27 #include "backend.h"
  28 #include "target.h"
  29 #include "rtl.h"
  30 #include "tree.h"
  31 #include "predict.h"
  32 #include "memmodel.h"
  33 #include "tm_p.h"
  34 #include "expmed.h"
  35 #include "optabs.h"
  36 #include "regs.h"
  37 #include "emit-rtl.h"
  38 #include "diagnostic-core.h"
  39 #include "fold-const.h"
  40 #include "stor-layout.h"
  41 #include "dojump.h"
  42 #include "explow.h"
  43 #include "expr.h"
  44 #include "langhooks.h"
  45 #include "tree-vector-builder.h"
  46
  47 struct target_expmed default_target_expmed;
  48 #if SWITCHABLE_TARGET
  49 struct target_expmed *this_target_expmed = &default_target_expmed;
  50 #endif
  51
  52 static bool store_integral_bit_field (rtx, opt_scalar_int_mode,
  53                                       unsigned HOST_WIDE_INT,
  54                                       unsigned HOST_WIDE_INT,
  55                                       poly_uint64, poly_uint64,
  56                                       machine_mode, rtx, bool, bool);
  57 static void store_fixed_bit_field (rtx, opt_scalar_int_mode,
  58                                    unsigned HOST_WIDE_INT,
  59                                    unsigned HOST_WIDE_INT,
  60                                    poly_uint64, poly_uint64,
  61                                    rtx, scalar_int_mode, bool);
  62 static void store_fixed_bit_field_1 (rtx, scalar_int_mode,
  63                                      unsigned HOST_WIDE_INT,
  64                                      unsigned HOST_WIDE_INT,
  65                                      rtx, scalar_int_mode, bool);
  66 static void store_split_bit_field (rtx, opt_scalar_int_mode,
  67                                    unsigned HOST_WIDE_INT,
  68                                    unsigned HOST_WIDE_INT,
  69                                    poly_uint64, poly_uint64,
  70                                    rtx, scalar_int_mode, bool);
  71 static rtx extract_integral_bit_field (rtx, opt_scalar_int_mode,
  72                                        unsigned HOST_WIDE_INT,
  73                                        unsigned HOST_WIDE_INT, int, rtx,
  74                                        machine_mode, machine_mode, bool, bool);
  75 static rtx extract_fixed_bit_field (machine_mode, rtx, opt_scalar_int_mode,
  76                                     unsigned HOST_WIDE_INT,
  77                                     unsigned HOST_WIDE_INT, rtx, int, bool);
  78 static rtx extract_fixed_bit_field_1 (machine_mode, rtx, scalar_int_mode,
  79                                       unsigned HOST_WIDE_INT,
  80                                       unsigned HOST_WIDE_INT, rtx, int, bool);
  81 static rtx lshift_value (machine_mode, unsigned HOST_WIDE_INT, int);
  82 static rtx extract_split_bit_field (rtx, opt_scalar_int_mode,
  83                                     unsigned HOST_WIDE_INT,
  84                                     unsigned HOST_WIDE_INT, int, bool);
  85 static void do_cmp_and_jump (rtx, rtx, enum rtx_code, machine_mode, rtx_code_label *);
  86 static rtx expand_smod_pow2 (scalar_int_mode, rtx, HOST_WIDE_INT);
  87 static rtx expand_sdiv_pow2 (scalar_int_mode, rtx, HOST_WIDE_INT);
  88
  89 /* Return a constant integer mask value of mode MODE with BITSIZE ones
  90    followed by BITPOS zeros, or the complement of that if COMPLEMENT.
  91    The mask is truncated if necessary to the width of mode MODE.  The
  92    mask is zero-extended if BITSIZE+BITPOS is too small for MODE.  */
  93
  94 static inline rtx
  95 mask_rtx (scalar_int_mode mode, int bitpos, int bitsize, bool complement)
  96 {
  97   return immed_wide_int_const
  98     (wi::shifted_mask (bitpos, bitsize, complement,
  99                        GET_MODE_PRECISION (mode)), mode);
 100 }
 101
 102 /* Test whether a value is zero of a power of two.  */
 103 #define EXACT_POWER_OF_2_OR_ZERO_P(x) \
 104   (((x) & ((x) - HOST_WIDE_INT_1U)) == 0)
 105
 106 struct init_expmed_rtl
 107 {
 108   rtx reg;
 109   rtx plus;
 110   rtx neg;
 111   rtx mult;
 112   rtx sdiv;
 113   rtx udiv;
 114   rtx sdiv_32;
 115   rtx smod_32;
 116   rtx wide_mult;
 117   rtx wide_lshr;
 118   rtx wide_trunc;
 119   rtx shift;
 120   rtx shift_mult;
 121   rtx shift_add;
 122   rtx shift_sub0;
 123   rtx shift_sub1;
 124   rtx zext;
 125   rtx trunc;
 126
 127   rtx pow2[MAX_BITS_PER_WORD];
 128   rtx cint[MAX_BITS_PER_WORD];
 129 };
 130
 131 static void
 132 init_expmed_one_conv (struct init_expmed_rtl *all, scalar_int_mode to_mode,
 133                       scalar_int_mode from_mode, bool speed)
 134 {
 135   int to_size, from_size;
 136   rtx which;
 137
 138   to_size = GET_MODE_PRECISION (to_mode);
 139   from_size = GET_MODE_PRECISION (from_mode);
 140
 141   /* Most partial integers have a precision less than the "full"
 142      integer it requires for storage.  In case one doesn't, for
 143      comparison purposes here, reduce the bit size by one in that
 144      case.  */
 145   if (GET_MODE_CLASS (to_mode) == MODE_PARTIAL_INT
 146       && pow2p_hwi (to_size))
 147     to_size --;
 148   if (GET_MODE_CLASS (from_mode) == MODE_PARTIAL_INT
 149       && pow2p_hwi (from_size))
 150     from_size --;
 151
 152   /* Assume cost of zero-extend and sign-extend is the same.  */
 153   which = (to_size < from_size ? all->trunc : all->zext);
 154
 155   PUT_MODE (all->reg, from_mode);
 156   set_convert_cost (to_mode, from_mode, speed,
 157                     set_src_cost (which, to_mode, speed));
 158 }
 159
 160 static void
 161 init_expmed_one_mode (struct init_expmed_rtl *all,
 162                       machine_mode mode, int speed)
 163 {
 164   int m, n, mode_bitsize;
 165   machine_mode mode_from;
 166
 167   mode_bitsize = GET_MODE_UNIT_BITSIZE (mode);
 168
 169   PUT_MODE (all->reg, mode);
 170   PUT_MODE (all->plus, mode);
 171   PUT_MODE (all->neg, mode);
 172   PUT_MODE (all->mult, mode);
 173   PUT_MODE (all->sdiv, mode);
 174   PUT_MODE (all->udiv, mode);
 175   PUT_MODE (all->sdiv_32, mode);
 176   PUT_MODE (all->smod_32, mode);
 177   PUT_MODE (all->wide_trunc, mode);
 178   PUT_MODE (all->shift, mode);
 179   PUT_MODE (all->shift_mult, mode);
 180   PUT_MODE (all->shift_add, mode);
 181   PUT_MODE (all->shift_sub0, mode);
 182   PUT_MODE (all->shift_sub1, mode);
 183   PUT_MODE (all->zext, mode);
 184   PUT_MODE (all->trunc, mode);
 185
 186   set_add_cost (speed, mode, set_src_cost (all->plus, mode, speed));
 187   set_neg_cost (speed, mode, set_src_cost (all->neg, mode, speed));
 188   set_mul_cost (speed, mode, set_src_cost (all->mult, mode, speed));
 189   set_sdiv_cost (speed, mode, set_src_cost (all->sdiv, mode, speed));
 190   set_udiv_cost (speed, mode, set_src_cost (all->udiv, mode, speed));
 191
 192   set_sdiv_pow2_cheap (speed, mode, (set_src_cost (all->sdiv_32, mode, speed)
 193                                      <= 2 * add_cost (speed, mode)));
 194   set_smod_pow2_cheap (speed, mode, (set_src_cost (all->smod_32, mode, speed)
 195                                      <= 4 * add_cost (speed, mode)));
 196
 197   set_shift_cost (speed, mode, 0, 0);
 198   {
 199     int cost = add_cost (speed, mode);
 200     set_shiftadd_cost (speed, mode, 0, cost);
 201     set_shiftsub0_cost (speed, mode, 0, cost);
 202     set_shiftsub1_cost (speed, mode, 0, cost);
 203   }
 204
 205   n = MIN (MAX_BITS_PER_WORD, mode_bitsize);
 206   for (m = 1; m < n; m++)
 207     {
 208       XEXP (all->shift, 1) = all->cint[m];
 209       XEXP (all->shift_mult, 1) = all->pow2[m];
 210
 211       set_shift_cost (speed, mode, m, set_src_cost (all->shift, mode, speed));
 212       set_shiftadd_cost (speed, mode, m, set_src_cost (all->shift_add, mode,
 213                                                        speed));
 214       set_shiftsub0_cost (speed, mode, m, set_src_cost (all->shift_sub0, mode,
 215                                                         speed));
 216       set_shiftsub1_cost (speed, mode, m, set_src_cost (all->shift_sub1, mode,
 217                                                         speed));
 218     }
 219
 220   scalar_int_mode int_mode_to;
 221   if (is_a <scalar_int_mode> (mode, &int_mode_to))
 222     {
 223       for (mode_from = MIN_MODE_INT; mode_from <= MAX_MODE_INT;
 224            mode_from = (machine_mode)(mode_from + 1))
 225         init_expmed_one_conv (all, int_mode_to,
 226                               as_a <scalar_int_mode> (mode_from), speed);
 227
 228       scalar_int_mode wider_mode;
 229       if (GET_MODE_CLASS (int_mode_to) == MODE_INT
 230           && GET_MODE_WIDER_MODE (int_mode_to).exists (&wider_mode))
 231         {
 232           PUT_MODE (all->zext, wider_mode);
 233           PUT_MODE (all->wide_mult, wider_mode);
 234           PUT_MODE (all->wide_lshr, wider_mode);
 235           XEXP (all->wide_lshr, 1)
 236             = gen_int_shift_amount (wider_mode, mode_bitsize);
 237
 238           set_mul_widen_cost (speed, wider_mode,
 239                               set_src_cost (all->wide_mult, wider_mode, speed));
 240           set_mul_highpart_cost (speed, int_mode_to,
 241                                  set_src_cost (all->wide_trunc,
 242                                                int_mode_to, speed));
 243         }
 244     }
 245 }
 246
 247 void
 248 init_expmed (void)
 249 {
 250   struct init_expmed_rtl all;
 251   machine_mode mode = QImode;
 252   int m, speed;
 253
 254   memset (&all, 0, sizeof all);
 255   for (m = 1; m < MAX_BITS_PER_WORD; m++)
 256     {
 257       all.pow2[m] = GEN_INT (HOST_WIDE_INT_1 << m);
 258       all.cint[m] = GEN_INT (m);
 259     }
 260
 261   /* Avoid using hard regs in ways which may be unsupported.  */
 262   all.reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
 263   all.plus = gen_rtx_PLUS (mode, all.reg, all.reg);
 264   all.neg = gen_rtx_NEG (mode, all.reg);
 265   all.mult = gen_rtx_MULT (mode, all.reg, all.reg);
 266   all.sdiv = gen_rtx_DIV (mode, all.reg, all.reg);
 267   all.udiv = gen_rtx_UDIV (mode, all.reg, all.reg);
 268   all.sdiv_32 = gen_rtx_DIV (mode, all.reg, all.pow2[5]);
 269   all.smod_32 = gen_rtx_MOD (mode, all.reg, all.pow2[5]);
 270   all.zext = gen_rtx_ZERO_EXTEND (mode, all.reg);
 271   all.wide_mult = gen_rtx_MULT (mode, all.zext, all.zext);
 272   all.wide_lshr = gen_rtx_LSHIFTRT (mode, all.wide_mult, all.reg);
 273   all.wide_trunc = gen_rtx_TRUNCATE (mode, all.wide_lshr);
 274   all.shift = gen_rtx_ASHIFT (mode, all.reg, all.reg);
 275   all.shift_mult = gen_rtx_MULT (mode, all.reg, all.reg);
 276   all.shift_add = gen_rtx_PLUS (mode, all.shift_mult, all.reg);
 277   all.shift_sub0 = gen_rtx_MINUS (mode, all.shift_mult, all.reg);
 278   all.shift_sub1 = gen_rtx_MINUS (mode, all.reg, all.shift_mult);
 279   all.trunc = gen_rtx_TRUNCATE (mode, all.reg);
 280
 281   for (speed = 0; speed < 2; speed++)
 282     {
 283       crtl->maybe_hot_insn_p = speed;
 284       set_zero_cost (speed, set_src_cost (const0_rtx, mode, speed));
 285
 286       for (mode = MIN_MODE_INT; mode <= MAX_MODE_INT;
 287            mode = (machine_mode)(mode + 1))
 288         init_expmed_one_mode (&all, mode, speed);
 289
 290       if (MIN_MODE_PARTIAL_INT != VOIDmode)
 291         for (mode = MIN_MODE_PARTIAL_INT; mode <= MAX_MODE_PARTIAL_INT;
 292              mode = (machine_mode)(mode + 1))
 293           init_expmed_one_mode (&all, mode, speed);
 294
 295       if (MIN_MODE_VECTOR_INT != VOIDmode)
 296         for (mode = MIN_MODE_VECTOR_INT; mode <= MAX_MODE_VECTOR_INT;
 297              mode = (machine_mode)(mode + 1))
 298           init_expmed_one_mode (&all, mode, speed);
 299     }
 300
 301   if (alg_hash_used_p ())
 302     {
 303       struct alg_hash_entry *p = alg_hash_entry_ptr (0);
 304       memset (p, 0, sizeof (*p) * NUM_ALG_HASH_ENTRIES);
 305     }
 306   else
 307     set_alg_hash_used_p (true);
 308   default_rtl_profile ();
 309
 310   ggc_free (all.trunc);
 311   ggc_free (all.shift_sub1);
 312   ggc_free (all.shift_sub0);
 313   ggc_free (all.shift_add);
 314   ggc_free (all.shift_mult);
 315   ggc_free (all.shift);
 316   ggc_free (all.wide_trunc);
 317   ggc_free (all.wide_lshr);
 318   ggc_free (all.wide_mult);
 319   ggc_free (all.zext);
 320   ggc_free (all.smod_32);
 321   ggc_free (all.sdiv_32);
 322   ggc_free (all.udiv);
 323   ggc_free (all.sdiv);
 324   ggc_free (all.mult);
 325   ggc_free (all.neg);
 326   ggc_free (all.plus);
 327   ggc_free (all.reg);
 328 }
 329
 330 /* Return an rtx representing minus the value of X.
 331    MODE is the intended mode of the result,
 332    useful if X is a CONST_INT.  */
 333
 334 rtx
 335 negate_rtx (machine_mode mode, rtx x)
 336 {
 337   rtx result = simplify_unary_operation (NEG, mode, x, mode);
 338
 339   if (result == 0)
 340     result = expand_unop (mode, neg_optab, x, NULL_RTX, 0);
 341
 342   return result;
 343 }
 344
 345 /* Whether reverse storage order is supported on the target.  */
 346 static int reverse_storage_order_supported = -1;
 347
 348 /* Check whether reverse storage order is supported on the target.  */
 349
 350 static void
 351 check_reverse_storage_order_support (void)
 352 {
 353   if (BYTES_BIG_ENDIAN != WORDS_BIG_ENDIAN)
 354     {
 355       reverse_storage_order_supported = 0;
 356       sorry ("reverse scalar storage order");
 357     }
 358   else
 359     reverse_storage_order_supported = 1;
 360 }
 361
 362 /* Whether reverse FP storage order is supported on the target.  */
 363 static int reverse_float_storage_order_supported = -1;
 364
 365 /* Check whether reverse FP storage order is supported on the target.  */
 366
 367 static void
 368 check_reverse_float_storage_order_support (void)
 369 {
 370   if (FLOAT_WORDS_BIG_ENDIAN != WORDS_BIG_ENDIAN)
 371     {
 372       reverse_float_storage_order_supported = 0;
 373       sorry ("reverse floating-point scalar storage order");
 374     }
 375   else
 376     reverse_float_storage_order_supported = 1;
 377 }
 378
 379 /* Return an rtx representing value of X with reverse storage order.
 380    MODE is the intended mode of the result,
 381    useful if X is a CONST_INT.  */
 382
 383 rtx
 384 flip_storage_order (machine_mode mode, rtx x)
 385 {
 386   scalar_int_mode int_mode;
 387   rtx result;
 388
 389   if (mode == QImode)
 390     return x;
 391
 392   if (COMPLEX_MODE_P (mode))
 393     {
 394       rtx real = read_complex_part (x, false);
 395       rtx imag = read_complex_part (x, true);
 396
 397       real = flip_storage_order (GET_MODE_INNER (mode), real);
 398       imag = flip_storage_order (GET_MODE_INNER (mode), imag);
 399
 400       return gen_rtx_CONCAT (mode, real, imag);
 401     }
 402
 403   if (__builtin_expect (reverse_storage_order_supported < 0, 0))
 404     check_reverse_storage_order_support ();
 405
 406   if (!is_a <scalar_int_mode> (mode, &int_mode))
 407     {
 408       if (FLOAT_MODE_P (mode)
 409           && __builtin_expect (reverse_float_storage_order_supported < 0, 0))
 410         check_reverse_float_storage_order_support ();
 411
 412       if (!int_mode_for_size (GET_MODE_PRECISION (mode), 0).exists (&int_mode))
 413         {
 414           sorry ("reverse storage order for %smode", GET_MODE_NAME (mode));
 415           return x;
 416         }
 417       x = gen_lowpart (int_mode, x);
 418     }
 419
 420   result = simplify_unary_operation (BSWAP, int_mode, x, int_mode);
 421   if (result == 0)
 422     result = expand_unop (int_mode, bswap_optab, x, NULL_RTX, 1);
 423
 424   if (int_mode != mode)
 425     result = gen_lowpart (mode, result);
 426
 427   return result;
 428 }
 429
 430 /* If MODE is set, adjust bitfield memory MEM so that it points to the
 431    first unit of mode MODE that contains a bitfield of size BITSIZE at
 432    bit position BITNUM.  If MODE is not set, return a BLKmode reference
 433    to every byte in the bitfield.  Set *NEW_BITNUM to the bit position
 434    of the field within the new memory.  */
 435
 436 static rtx
 437 narrow_bit_field_mem (rtx mem, opt_scalar_int_mode mode,
 438                       unsigned HOST_WIDE_INT bitsize,
 439                       unsigned HOST_WIDE_INT bitnum,
 440                       unsigned HOST_WIDE_INT *new_bitnum)
 441 {
 442   scalar_int_mode imode;
 443   if (mode.exists (&imode))
 444     {
 445       unsigned int unit = GET_MODE_BITSIZE (imode);
 446       *new_bitnum = bitnum % unit;
 447       HOST_WIDE_INT offset = (bitnum - *new_bitnum) / BITS_PER_UNIT;
 448       return adjust_bitfield_address (mem, imode, offset);
 449     }
 450   else
 451     {
 452       *new_bitnum = bitnum % BITS_PER_UNIT;
 453       HOST_WIDE_INT offset = bitnum / BITS_PER_UNIT;
 454       HOST_WIDE_INT size = ((*new_bitnum + bitsize + BITS_PER_UNIT - 1)
 455                             / BITS_PER_UNIT);
 456       return adjust_bitfield_address_size (mem, BLKmode, offset, size);
 457     }
 458 }
 459
 460 /* The caller wants to perform insertion or extraction PATTERN on a
 461    bitfield of size BITSIZE at BITNUM bits into memory operand OP0.
 462    BITREGION_START and BITREGION_END are as for store_bit_field
 463    and FIELDMODE is the natural mode of the field.
 464
 465    Search for a mode that is compatible with the memory access
 466    restrictions and (where applicable) with a register insertion or
 467    extraction.  Return the new memory on success, storing the adjusted
 468    bit position in *NEW_BITNUM.  Return null otherwise.  */
 469
 470 static rtx
 471 adjust_bit_field_mem_for_reg (enum extraction_pattern pattern,
 472                               rtx op0, HOST_WIDE_INT bitsize,
 473                               HOST_WIDE_INT bitnum,
 474                               poly_uint64 bitregion_start,
 475                               poly_uint64 bitregion_end,
 476                               machine_mode fieldmode,
 477                               unsigned HOST_WIDE_INT *new_bitnum)
 478 {
 479   bit_field_mode_iterator iter (bitsize, bitnum, bitregion_start,
 480                                 bitregion_end, MEM_ALIGN (op0),
 481                                 MEM_VOLATILE_P (op0));
 482   scalar_int_mode best_mode;
 483   if (iter.next_mode (&best_mode))
 484     {
 485       /* We can use a memory in BEST_MODE.  See whether this is true for
 486          any wider modes.  All other things being equal, we prefer to
 487          use the widest mode possible because it tends to expose more
 488          CSE opportunities.  */
 489       if (!iter.prefer_smaller_modes ())
 490         {
 491           /* Limit the search to the mode required by the corresponding
 492              register insertion or extraction instruction, if any.  */
 493           scalar_int_mode limit_mode = word_mode;
 494           extraction_insn insn;
 495           if (get_best_reg_extraction_insn (&insn, pattern,
 496                                             GET_MODE_BITSIZE (best_mode),
 497                                             fieldmode))
 498             limit_mode = insn.field_mode;
 499
 500           scalar_int_mode wider_mode;
 501           while (iter.next_mode (&wider_mode)
 502                  && GET_MODE_SIZE (wider_mode) <= GET_MODE_SIZE (limit_mode))
 503             best_mode = wider_mode;
 504         }
 505       return narrow_bit_field_mem (op0, best_mode, bitsize, bitnum,
 506                                    new_bitnum);
 507     }
 508   return NULL_RTX;
 509 }
 510
 511 /* Return true if a bitfield of size BITSIZE at bit number BITNUM within
 512    a structure of mode STRUCT_MODE represents a lowpart subreg.   The subreg
 513    offset is then BITNUM / BITS_PER_UNIT.  */
 514
 515 static bool
 516 lowpart_bit_field_p (poly_uint64 bitnum, poly_uint64 bitsize,
 517                      machine_mode struct_mode)
 518 {
 519   poly_uint64 regsize = REGMODE_NATURAL_SIZE (struct_mode);
 520   if (BYTES_BIG_ENDIAN)
 521     return (multiple_p (bitnum, BITS_PER_UNIT)
 522             && (known_eq (bitnum + bitsize, GET_MODE_BITSIZE (struct_mode))
 523                 || multiple_p (bitnum + bitsize,
 524                                regsize * BITS_PER_UNIT)));
 525   else
 526     return multiple_p (bitnum, regsize * BITS_PER_UNIT);
 527 }
 528
 529 /* Return true if -fstrict-volatile-bitfields applies to an access of OP0
 530    containing BITSIZE bits starting at BITNUM, with field mode FIELDMODE.
 531    Return false if the access would touch memory outside the range
 532    BITREGION_START to BITREGION_END for conformance to the C++ memory
 533    model.  */
 534
 535 static bool
 536 strict_volatile_bitfield_p (rtx op0, unsigned HOST_WIDE_INT bitsize,
 537                             unsigned HOST_WIDE_INT bitnum,
 538                             scalar_int_mode fieldmode,
 539                             poly_uint64 bitregion_start,
 540                             poly_uint64 bitregion_end)
 541 {
 542   unsigned HOST_WIDE_INT modesize = GET_MODE_BITSIZE (fieldmode);
 543
 544   /* -fstrict-volatile-bitfields must be enabled and we must have a
 545      volatile MEM.  */
 546   if (!MEM_P (op0)
 547       || !MEM_VOLATILE_P (op0)
 548       || flag_strict_volatile_bitfields <= 0)
 549     return false;
 550
 551   /* The bit size must not be larger than the field mode, and
 552      the field mode must not be larger than a word.  */
 553   if (bitsize > modesize || modesize > BITS_PER_WORD)
 554     return false;
 555
 556   /* Check for cases of unaligned fields that must be split.  */
 557   if (bitnum % modesize + bitsize > modesize)
 558     return false;
 559
 560   /* The memory must be sufficiently aligned for a MODESIZE access.
 561      This condition guarantees, that the memory access will not
 562      touch anything after the end of the structure.  */
 563   if (MEM_ALIGN (op0) < modesize)
 564     return false;
 565
 566   /* Check for cases where the C++ memory model applies.  */
 567   if (maybe_ne (bitregion_end, 0U)
 568       && (maybe_lt (bitnum - bitnum % modesize, bitregion_start)
 569           || maybe_gt (bitnum - bitnum % modesize + modesize - 1,
 570                        bitregion_end)))
 571     return false;
 572
 573   return true;
 574 }
 575
 576 /* Return true if OP is a memory and if a bitfield of size BITSIZE at
 577    bit number BITNUM can be treated as a simple value of mode MODE.
 578    Store the byte offset in *BYTENUM if so.  */
 579
 580 static bool
 581 simple_mem_bitfield_p (rtx op0, poly_uint64 bitsize, poly_uint64 bitnum,
 582                        machine_mode mode, poly_uint64 *bytenum)
 583 {
 584   return (MEM_P (op0)
 585           && multiple_p (bitnum, BITS_PER_UNIT, bytenum)
 586           && known_eq (bitsize, GET_MODE_BITSIZE (mode))
 587           && (!targetm.slow_unaligned_access (mode, MEM_ALIGN (op0))
 588               || (multiple_p (bitnum, GET_MODE_ALIGNMENT (mode))
 589                   && MEM_ALIGN (op0) >= GET_MODE_ALIGNMENT (mode))));
 590 }
 591 \f
 592 /* Try to use instruction INSV to store VALUE into a field of OP0.
 593    If OP0_MODE is defined, it is the mode of OP0, otherwise OP0 is a
 594    BLKmode MEM.  VALUE_MODE is the mode of VALUE.  BITSIZE and BITNUM
 595    are as for store_bit_field.  */
 596
 597 static bool
 598 store_bit_field_using_insv (const extraction_insn *insv, rtx op0,
 599                             opt_scalar_int_mode op0_mode,
 600                             unsigned HOST_WIDE_INT bitsize,
 601                             unsigned HOST_WIDE_INT bitnum,
 602                             rtx value, scalar_int_mode value_mode)
 603 {
 604   class expand_operand ops[4];
 605   rtx value1;
 606   rtx xop0 = op0;
 607   rtx_insn *last = get_last_insn ();
 608   bool copy_back = false;
 609
 610   scalar_int_mode op_mode = insv->field_mode;
 611   unsigned int unit = GET_MODE_BITSIZE (op_mode);
 612   if (bitsize == 0 || bitsize > unit)
 613     return false;
 614
 615   if (MEM_P (xop0))
 616     /* Get a reference to the first byte of the field.  */
 617     xop0 = narrow_bit_field_mem (xop0, insv->struct_mode, bitsize, bitnum,
 618                                  &bitnum);
 619   else
 620     {
 621       /* Convert from counting within OP0 to counting in OP_MODE.  */
 622       if (BYTES_BIG_ENDIAN)
 623         bitnum += unit - GET_MODE_BITSIZE (op0_mode.require ());
 624
 625       /* If xop0 is a register, we need it in OP_MODE
 626          to make it acceptable to the format of insv.  */
 627       if (GET_CODE (xop0) == SUBREG)
 628         /* We can't just change the mode, because this might clobber op0,
 629            and we will need the original value of op0 if insv fails.  */
 630         xop0 = gen_rtx_SUBREG (op_mode, SUBREG_REG (xop0), SUBREG_BYTE (xop0));
 631       if (REG_P (xop0) && GET_MODE (xop0) != op_mode)
 632         xop0 = gen_lowpart_SUBREG (op_mode, xop0);
 633     }
 634
 635   /* If the destination is a paradoxical subreg such that we need a
 636      truncate to the inner mode, perform the insertion on a temporary and
 637      truncate the result to the original destination.  Note that we can't
 638      just truncate the paradoxical subreg as (truncate:N (subreg:W (reg:N
 639      X) 0)) is (reg:N X).  */
 640   if (GET_CODE (xop0) == SUBREG
 641       && REG_P (SUBREG_REG (xop0))
 642       && !TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (SUBREG_REG (xop0)),
 643                                          op_mode))
 644     {
 645       rtx tem = gen_reg_rtx (op_mode);
 646       emit_move_insn (tem, xop0);
 647       xop0 = tem;
 648       copy_back = true;
 649     }
 650
 651   /* There are similar overflow check at the start of store_bit_field_1,
 652      but that only check the situation where the field lies completely
 653      outside the register, while there do have situation where the field
 654      lies partialy in the register, we need to adjust bitsize for this
 655      partial overflow situation.  Without this fix, pr48335-2.c on big-endian
 656      will broken on those arch support bit insert instruction, like arm, aarch64
 657      etc.  */
 658   if (bitsize + bitnum > unit && bitnum < unit)
 659     {
 660       warning (OPT_Wextra, "write of %wu-bit data outside the bound of "
 661                "destination object, data truncated into %wu-bit",
 662                bitsize, unit - bitnum);
 663       bitsize = unit - bitnum;
 664     }
 665
 666   /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
 667      "backwards" from the size of the unit we are inserting into.
 668      Otherwise, we count bits from the most significant on a
 669      BYTES/BITS_BIG_ENDIAN machine.  */
 670
 671   if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
 672     bitnum = unit - bitsize - bitnum;
 673
 674   /* Convert VALUE to op_mode (which insv insn wants) in VALUE1.  */
 675   value1 = value;
 676   if (value_mode != op_mode)
 677     {
 678       if (GET_MODE_BITSIZE (value_mode) >= bitsize)
 679         {
 680           rtx tmp;
 681           /* Optimization: Don't bother really extending VALUE
 682              if it has all the bits we will actually use.  However,
 683              if we must narrow it, be sure we do it correctly.  */
 684
 685           if (GET_MODE_SIZE (value_mode) < GET_MODE_SIZE (op_mode))
 686             {
 687               tmp = simplify_subreg (op_mode, value1, value_mode, 0);
 688               if (! tmp)
 689                 tmp = simplify_gen_subreg (op_mode,
 690                                            force_reg (value_mode, value1),
 691                                            value_mode, 0);
 692             }
 693           else
 694             {
 695               tmp = gen_lowpart_if_possible (op_mode, value1);
 696               if (! tmp)
 697                 tmp = gen_lowpart (op_mode, force_reg (value_mode, value1));
 698             }
 699           value1 = tmp;
 700         }
 701       else if (CONST_INT_P (value))
 702         value1 = gen_int_mode (INTVAL (value), op_mode);
 703       else
 704         /* Parse phase is supposed to make VALUE's data type
 705            match that of the component reference, which is a type
 706            at least as wide as the field; so VALUE should have
 707            a mode that corresponds to that type.  */
 708         gcc_assert (CONSTANT_P (value));
 709     }
 710
 711   create_fixed_operand (&ops[0], xop0);
 712   create_integer_operand (&ops[1], bitsize);
 713   create_integer_operand (&ops[2], bitnum);
 714   create_input_operand (&ops[3], value1, op_mode);
 715   if (maybe_expand_insn (insv->icode, 4, ops))
 716     {
 717       if (copy_back)
 718         convert_move (op0, xop0, true);
 719       return true;
 720     }
 721   delete_insns_since (last);
 722   return false;
 723 }
 724
 725 /* A subroutine of store_bit_field, with the same arguments.  Return true
 726    if the operation could be implemented.
 727
 728    If FALLBACK_P is true, fall back to store_fixed_bit_field if we have
 729    no other way of implementing the operation.  If FALLBACK_P is false,
 730    return false instead.  */
 731
 732 static bool
 733 store_bit_field_1 (rtx str_rtx, poly_uint64 bitsize, poly_uint64 bitnum,
 734                    poly_uint64 bitregion_start, poly_uint64 bitregion_end,
 735                    machine_mode fieldmode,
 736                    rtx value, bool reverse, bool fallback_p)
 737 {
 738   rtx op0 = str_rtx;
 739
 740   while (GET_CODE (op0) == SUBREG)
 741     {
 742       bitnum += subreg_memory_offset (op0) * BITS_PER_UNIT;
 743       op0 = SUBREG_REG (op0);
 744     }
 745
 746   /* No action is needed if the target is a register and if the field
 747      lies completely outside that register.  This can occur if the source
 748      code contains an out-of-bounds access to a small array.  */
 749   if (REG_P (op0) && known_ge (bitnum, GET_MODE_BITSIZE (GET_MODE (op0))))
 750     return true;
 751
 752   /* Use vec_set patterns for inserting parts of vectors whenever
 753      available.  */
 754   machine_mode outermode = GET_MODE (op0);
 755   scalar_mode innermode = GET_MODE_INNER (outermode);
 756   poly_uint64 pos;
 757   if (VECTOR_MODE_P (outermode)
 758       && !MEM_P (op0)
 759       && optab_handler (vec_set_optab, outermode) != CODE_FOR_nothing
 760       && fieldmode == innermode
 761       && known_eq (bitsize, GET_MODE_BITSIZE (innermode))
 762       && multiple_p (bitnum, GET_MODE_BITSIZE (innermode), &pos))
 763     {
 764       class expand_operand ops[3];
 765       enum insn_code icode = optab_handler (vec_set_optab, outermode);
 766
 767       create_fixed_operand (&ops[0], op0);
 768       create_input_operand (&ops[1], value, innermode);
 769       create_integer_operand (&ops[2], pos);
 770       if (maybe_expand_insn (icode, 3, ops))
 771         return true;
 772     }
 773
 774   /* If the target is a register, overwriting the entire object, or storing
 775      a full-word or multi-word field can be done with just a SUBREG.  */
 776   if (!MEM_P (op0)
 777       && known_eq (bitsize, GET_MODE_BITSIZE (fieldmode)))
 778     {
 779       /* Use the subreg machinery either to narrow OP0 to the required
 780          words or to cope with mode punning between equal-sized modes.
 781          In the latter case, use subreg on the rhs side, not lhs.  */
 782       rtx sub;
 783       HOST_WIDE_INT regnum;
 784       poly_uint64 regsize = REGMODE_NATURAL_SIZE (GET_MODE (op0));
 785       if (known_eq (bitnum, 0U)
 786           && known_eq (bitsize, GET_MODE_BITSIZE (GET_MODE (op0))))
 787         {
 788           sub = simplify_gen_subreg (GET_MODE (op0), value, fieldmode, 0);
 789           if (sub)
 790             {
 791               if (reverse)
 792                 sub = flip_storage_order (GET_MODE (op0), sub);
 793               emit_move_insn (op0, sub);
 794               return true;
 795             }
 796         }
 797       else if (constant_multiple_p (bitnum, regsize * BITS_PER_UNIT, &regnum)
 798                && multiple_p (bitsize, regsize * BITS_PER_UNIT))
 799         {
 800           sub = simplify_gen_subreg (fieldmode, op0, GET_MODE (op0),
 801                                      regnum * regsize);
 802           if (sub)
 803             {
 804               if (reverse)
 805                 value = flip_storage_order (fieldmode, value);
 806               emit_move_insn (sub, value);
 807               return true;
 808             }
 809         }
 810     }
 811
 812   /* If the target is memory, storing any naturally aligned field can be
 813      done with a simple store.  For targets that support fast unaligned
 814      memory, any naturally sized, unit aligned field can be done directly.  */
 815   poly_uint64 bytenum;
 816   if (simple_mem_bitfield_p (op0, bitsize, bitnum, fieldmode, &bytenum))
 817     {
 818       op0 = adjust_bitfield_address (op0, fieldmode, bytenum);
 819       if (reverse)
 820         value = flip_storage_order (fieldmode, value);
 821       emit_move_insn (op0, value);
 822       return true;
 823     }
 824
 825   /* It's possible we'll need to handle other cases here for
 826      polynomial bitnum and bitsize.  */
 827
 828   /* From here on we need to be looking at a fixed-size insertion.  */
 829   unsigned HOST_WIDE_INT ibitsize = bitsize.to_constant ();
 830   unsigned HOST_WIDE_INT ibitnum = bitnum.to_constant ();
 831
 832   /* Make sure we are playing with integral modes.  Pun with subregs
 833      if we aren't.  This must come after the entire register case above,
 834      since that case is valid for any mode.  The following cases are only
 835      valid for integral modes.  */
 836   opt_scalar_int_mode op0_mode = int_mode_for_mode (GET_MODE (op0));
 837   scalar_int_mode imode;
 838   if (!op0_mode.exists (&imode) || imode != GET_MODE (op0))
 839     {
 840       if (MEM_P (op0))
 841         op0 = adjust_bitfield_address_size (op0, op0_mode.else_blk (),
 842                                             0, MEM_SIZE (op0));
 843       else if (!op0_mode.exists ())
 844         {
 845           if (ibitnum == 0
 846               && known_eq (ibitsize, GET_MODE_BITSIZE (GET_MODE (op0)))
 847               && MEM_P (value)
 848               && !reverse)
 849             {
 850               value = adjust_address (value, GET_MODE (op0), 0);
 851               emit_move_insn (op0, value);
 852               return true;
 853             }
 854           if (!fallback_p)
 855             return false;
 856           rtx temp = assign_stack_temp (GET_MODE (op0),
 857                                         GET_MODE_SIZE (GET_MODE (op0)));
 858           emit_move_insn (temp, op0);
 859           store_bit_field_1 (temp, bitsize, bitnum, 0, 0, fieldmode, value,
 860                              reverse, fallback_p);
 861           emit_move_insn (op0, temp);
 862           return true;
 863         }
 864       else
 865         op0 = gen_lowpart (op0_mode.require (), op0);
 866     }
 867
 868   return store_integral_bit_field (op0, op0_mode, ibitsize, ibitnum,
 869                                    bitregion_start, bitregion_end,
 870                                    fieldmode, value, reverse, fallback_p);
 871 }
 872
 873 /* Subroutine of store_bit_field_1, with the same arguments, except
 874    that BITSIZE and BITNUM are constant.  Handle cases specific to
 875    integral modes.  If OP0_MODE is defined, it is the mode of OP0,
 876    otherwise OP0 is a BLKmode MEM.  */
 877
 878 static bool
 879 store_integral_bit_field (rtx op0, opt_scalar_int_mode op0_mode,
 880                           unsigned HOST_WIDE_INT bitsize,
 881                           unsigned HOST_WIDE_INT bitnum,
 882                           poly_uint64 bitregion_start,
 883                           poly_uint64 bitregion_end,
 884                           machine_mode fieldmode,
 885                           rtx value, bool reverse, bool fallback_p)
 886 {
 887   /* Storing an lsb-aligned field in a register
 888      can be done with a movstrict instruction.  */
 889
 890   if (!MEM_P (op0)
 891       && !reverse
 892       && lowpart_bit_field_p (bitnum, bitsize, op0_mode.require ())
 893       && known_eq (bitsize, GET_MODE_BITSIZE (fieldmode))
 894       && optab_handler (movstrict_optab, fieldmode) != CODE_FOR_nothing)
 895     {
 896       class expand_operand ops[2];
 897       enum insn_code icode = optab_handler (movstrict_optab, fieldmode);
 898       rtx arg0 = op0;
 899       unsigned HOST_WIDE_INT subreg_off;
 900
 901       if (GET_CODE (arg0) == SUBREG)
 902         {
 903           /* Else we've got some float mode source being extracted into
 904              a different float mode destination -- this combination of
 905              subregs results in Severe Tire Damage.  */
 906           gcc_assert (GET_MODE (SUBREG_REG (arg0)) == fieldmode
 907                       || GET_MODE_CLASS (fieldmode) == MODE_INT
 908                       || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT);
 909           arg0 = SUBREG_REG (arg0);
 910         }
 911
 912       subreg_off = bitnum / BITS_PER_UNIT;
 913       if (validate_subreg (fieldmode, GET_MODE (arg0), arg0, subreg_off))
 914         {
 915           arg0 = gen_rtx_SUBREG (fieldmode, arg0, subreg_off);
 916
 917           create_fixed_operand (&ops[0], arg0);
 918           /* Shrink the source operand to FIELDMODE.  */
 919           create_convert_operand_to (&ops[1], value, fieldmode, false);
 920           if (maybe_expand_insn (icode, 2, ops))
 921             return true;
 922         }
 923     }
 924
 925   /* Handle fields bigger than a word.  */
 926
 927   if (bitsize > BITS_PER_WORD)
 928     {
 929       /* Here we transfer the words of the field
 930          in the order least significant first.
 931          This is because the most significant word is the one which may
 932          be less than full.
 933          However, only do that if the value is not BLKmode.  */
 934
 935       const bool backwards = WORDS_BIG_ENDIAN && fieldmode != BLKmode;
 936       const int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
 937       rtx_insn *last;
 938
 939       /* This is the mode we must force value to, so that there will be enough
 940          subwords to extract.  Note that fieldmode will often (always?) be
 941          VOIDmode, because that is what store_field uses to indicate that this
 942          is a bit field, but passing VOIDmode to operand_subword_force
 943          is not allowed.
 944
 945          The mode must be fixed-size, since insertions into variable-sized
 946          objects are meant to be handled before calling this function.  */
 947       fixed_size_mode value_mode = as_a <fixed_size_mode> (GET_MODE (value));
 948       if (value_mode == VOIDmode)
 949         value_mode = smallest_int_mode_for_size (nwords * BITS_PER_WORD);
 950
 951       last = get_last_insn ();
 952       for (int i = 0; i < nwords; i++)
 953         {
 954           /* Number of bits to be stored in this iteration, i.e. BITS_PER_WORD
 955              except maybe for the last iteration.  */
 956           const unsigned HOST_WIDE_INT new_bitsize
 957             = MIN (BITS_PER_WORD, bitsize - i * BITS_PER_WORD);
 958           /* Bit offset from the starting bit number in the target.  */
 959           const unsigned int bit_offset
 960             = backwards ^ reverse
 961               ? MAX ((int) bitsize - (i + 1) * BITS_PER_WORD, 0)
 962               : i * BITS_PER_WORD;
 963           /* Starting word number in the value.  */
 964           const unsigned int wordnum
 965             = backwards
 966               ? GET_MODE_SIZE (value_mode) / UNITS_PER_WORD - (i + 1)
 967               : i;
 968           /* The chunk of the value in word_mode.  We use bit-field extraction
 969               in BLKmode to handle unaligned memory references and to shift the
 970               last chunk right on big-endian machines if need be.  */
 971           rtx value_word
 972             = fieldmode == BLKmode
 973               ? extract_bit_field (value, new_bitsize, wordnum * BITS_PER_WORD,
 974                                    1, NULL_RTX, word_mode, word_mode, false,
 975                                    NULL)
 976               : operand_subword_force (value, wordnum, value_mode);
 977
 978           if (!store_bit_field_1 (op0, new_bitsize,
 979                                   bitnum + bit_offset,
 980                                   bitregion_start, bitregion_end,
 981                                   word_mode,
 982                                   value_word, reverse, fallback_p))
 983             {
 984               delete_insns_since (last);
 985               return false;
 986             }
 987         }
 988       return true;
 989     }
 990
 991   /* If VALUE has a floating-point or complex mode, access it as an
 992      integer of the corresponding size.  This can occur on a machine
 993      with 64 bit registers that uses SFmode for float.  It can also
 994      occur for unaligned float or complex fields.  */
 995   rtx orig_value = value;
 996   scalar_int_mode value_mode;
 997   if (GET_MODE (value) == VOIDmode)
 998     /* By this point we've dealt with values that are bigger than a word,
 999        so word_mode is a conservatively correct choice.  */
1000     value_mode = word_mode;
1001   else if (!is_a <scalar_int_mode> (GET_MODE (value), &value_mode))
1002     {
1003       value_mode = int_mode_for_mode (GET_MODE (value)).require ();
1004       value = gen_reg_rtx (value_mode);
1005       emit_move_insn (gen_lowpart (GET_MODE (orig_value), value), orig_value);
1006     }
1007
1008   /* If OP0 is a multi-word register, narrow it to the affected word.
1009      If the region spans two words, defer to store_split_bit_field.
1010      Don't do this if op0 is a single hard register wider than word
1011      such as a float or vector register.  */
1012   if (!MEM_P (op0)
1013       && GET_MODE_SIZE (op0_mode.require ()) > UNITS_PER_WORD
1014       && (!REG_P (op0)
1015           || !HARD_REGISTER_P (op0)
1016           || hard_regno_nregs (REGNO (op0), op0_mode.require ()) != 1))
1017     {
1018       if (bitnum % BITS_PER_WORD + bitsize > BITS_PER_WORD)
1019         {
1020           if (!fallback_p)
1021             return false;
1022
1023           store_split_bit_field (op0, op0_mode, bitsize, bitnum,
1024                                  bitregion_start, bitregion_end,
1025                                  value, value_mode, reverse);
1026           return true;
1027         }
1028       op0 = simplify_gen_subreg (word_mode, op0, op0_mode.require (),
1029                                  bitnum / BITS_PER_WORD * UNITS_PER_WORD);
1030       gcc_assert (op0);
1031       op0_mode = word_mode;
1032       bitnum %= BITS_PER_WORD;
1033     }
1034
1035   /* From here on we can assume that the field to be stored in fits
1036      within a word.  If the destination is a register, it too fits
1037      in a word.  */
1038
1039   extraction_insn insv;
1040   if (!MEM_P (op0)
1041       && !reverse
1042       && get_best_reg_extraction_insn (&insv, EP_insv,
1043                                        GET_MODE_BITSIZE (op0_mode.require ()),
1044                                        fieldmode)
1045       && store_bit_field_using_insv (&insv, op0, op0_mode,
1046                                      bitsize, bitnum, value, value_mode))
1047     return true;
1048
1049   /* If OP0 is a memory, try copying it to a register and seeing if a
1050      cheap register alternative is available.  */
1051   if (MEM_P (op0) && !reverse)
1052     {
1053       if (get_best_mem_extraction_insn (&insv, EP_insv, bitsize, bitnum,
1054                                         fieldmode)
1055           && store_bit_field_using_insv (&insv, op0, op0_mode,
1056                                          bitsize, bitnum, value, value_mode))
1057         return true;
1058
1059       rtx_insn *last = get_last_insn ();
1060
1061       /* Try loading part of OP0 into a register, inserting the bitfield
1062          into that, and then copying the result back to OP0.  */
1063       unsigned HOST_WIDE_INT bitpos;
1064       rtx xop0 = adjust_bit_field_mem_for_reg (EP_insv, op0, bitsize, bitnum,
1065                                                bitregion_start, bitregion_end,
1066                                                fieldmode, &bitpos);
1067       if (xop0)
1068         {
1069           rtx tempreg = copy_to_reg (xop0);
1070           if (store_bit_field_1 (tempreg, bitsize, bitpos,
1071                                  bitregion_start, bitregion_end,
1072                                  fieldmode, orig_value, reverse, false))
1073             {
1074               emit_move_insn (xop0, tempreg);
1075               return true;
1076             }
1077           delete_insns_since (last);
1078         }
1079     }
1080
1081   if (!fallback_p)
1082     return false;
1083
1084   store_fixed_bit_field (op0, op0_mode, bitsize, bitnum, bitregion_start,
1085                          bitregion_end, value, value_mode, reverse);
1086   return true;
1087 }
1088
1089 /* Generate code to store value from rtx VALUE
1090    into a bit-field within structure STR_RTX
1091    containing BITSIZE bits starting at bit BITNUM.
1092
1093    BITREGION_START is bitpos of the first bitfield in this region.
1094    BITREGION_END is the bitpos of the ending bitfield in this region.
1095    These two fields are 0, if the C++ memory model does not apply,
1096    or we are not interested in keeping track of bitfield regions.
1097
1098    FIELDMODE is the machine-mode of the FIELD_DECL node for this field.
1099
1100    If REVERSE is true, the store is to be done in reverse order.  */
1101
1102 void
1103 store_bit_field (rtx str_rtx, poly_uint64 bitsize, poly_uint64 bitnum,
1104                  poly_uint64 bitregion_start, poly_uint64 bitregion_end,
1105                  machine_mode fieldmode,
1106                  rtx value, bool reverse)
1107 {
1108   /* Handle -fstrict-volatile-bitfields in the cases where it applies.  */
1109   unsigned HOST_WIDE_INT ibitsize = 0, ibitnum = 0;
1110   scalar_int_mode int_mode;
1111   if (bitsize.is_constant (&ibitsize)
1112       && bitnum.is_constant (&ibitnum)
1113       && is_a <scalar_int_mode> (fieldmode, &int_mode)
1114       && strict_volatile_bitfield_p (str_rtx, ibitsize, ibitnum, int_mode,
1115                                      bitregion_start, bitregion_end))
1116     {
1117       /* Storing of a full word can be done with a simple store.
1118          We know here that the field can be accessed with one single
1119          instruction.  For targets that support unaligned memory,
1120          an unaligned access may be necessary.  */
1121       if (ibitsize == GET_MODE_BITSIZE (int_mode))
1122         {
1123           str_rtx = adjust_bitfield_address (str_rtx, int_mode,
1124                                              ibitnum / BITS_PER_UNIT);
1125           if (reverse)
1126             value = flip_storage_order (int_mode, value);
1127           gcc_assert (ibitnum % BITS_PER_UNIT == 0);
1128           emit_move_insn (str_rtx, value);
1129         }
1130       else
1131         {
1132           rtx temp;
1133
1134           str_rtx = narrow_bit_field_mem (str_rtx, int_mode, ibitsize,
1135                                           ibitnum, &ibitnum);
1136           gcc_assert (ibitnum + ibitsize <= GET_MODE_BITSIZE (int_mode));
1137           temp = copy_to_reg (str_rtx);
1138           if (!store_bit_field_1 (temp, ibitsize, ibitnum, 0, 0,
1139                                   int_mode, value, reverse, true))
1140             gcc_unreachable ();
1141
1142           emit_move_insn (str_rtx, temp);
1143         }
1144
1145       return;
1146     }
1147
1148   /* Under the C++0x memory model, we must not touch bits outside the
1149      bit region.  Adjust the address to start at the beginning of the
1150      bit region.  */
1151   if (MEM_P (str_rtx) && maybe_ne (bitregion_start, 0U))
1152     {
1153       scalar_int_mode best_mode;
1154       machine_mode addr_mode = VOIDmode;
1155
1156       poly_uint64 offset = exact_div (bitregion_start, BITS_PER_UNIT);
1157       bitnum -= bitregion_start;
1158       poly_int64 size = bits_to_bytes_round_up (bitnum + bitsize);
1159       bitregion_end -= bitregion_start;
1160       bitregion_start = 0;
1161       if (bitsize.is_constant (&ibitsize)
1162           && bitnum.is_constant (&ibitnum)
1163           && get_best_mode (ibitsize, ibitnum,
1164                             bitregion_start, bitregion_end,
1165                             MEM_ALIGN (str_rtx), INT_MAX,
1166                             MEM_VOLATILE_P (str_rtx), &best_mode))
1167         addr_mode = best_mode;
1168       str_rtx = adjust_bitfield_address_size (str_rtx, addr_mode,
1169                                               offset, size);
1170     }
1171
1172   if (!store_bit_field_1 (str_rtx, bitsize, bitnum,
1173                           bitregion_start, bitregion_end,
1174                           fieldmode, value, reverse, true))
1175     gcc_unreachable ();
1176 }
1177 \f
1178 /* Use shifts and boolean operations to store VALUE into a bit field of
1179    width BITSIZE in OP0, starting at bit BITNUM.  If OP0_MODE is defined,
1180    it is the mode of OP0, otherwise OP0 is a BLKmode MEM.  VALUE_MODE is
1181    the mode of VALUE.
1182
1183    If REVERSE is true, the store is to be done in reverse order.  */
1184
1185 static void
1186 store_fixed_bit_field (rtx op0, opt_scalar_int_mode op0_mode,
1187                        unsigned HOST_WIDE_INT bitsize,
1188                        unsigned HOST_WIDE_INT bitnum,
1189                        poly_uint64 bitregion_start, poly_uint64 bitregion_end,
1190                        rtx value, scalar_int_mode value_mode, bool reverse)
1191 {
1192   /* There is a case not handled here:
1193      a structure with a known alignment of just a halfword
1194      and a field split across two aligned halfwords within the structure.
1195      Or likewise a structure with a known alignment of just a byte
1196      and a field split across two bytes.
1197      Such cases are not supposed to be able to occur.  */
1198
1199   scalar_int_mode best_mode;
1200   if (MEM_P (op0))
1201     {
1202       unsigned int max_bitsize = BITS_PER_WORD;
1203       scalar_int_mode imode;
1204       if (op0_mode.exists (&imode) && GET_MODE_BITSIZE (imode) < max_bitsize)
1205         max_bitsize = GET_MODE_BITSIZE (imode);
1206
1207       if (!get_best_mode (bitsize, bitnum, bitregion_start, bitregion_end,
1208                           MEM_ALIGN (op0), max_bitsize, MEM_VOLATILE_P (op0),
1209                           &best_mode))
1210         {
1211           /* The only way this should occur is if the field spans word
1212              boundaries.  */
1213           store_split_bit_field (op0, op0_mode, bitsize, bitnum,
1214                                  bitregion_start, bitregion_end,
1215                                  value, value_mode, reverse);
1216           return;
1217         }
1218
1219       op0 = narrow_bit_field_mem (op0, best_mode, bitsize, bitnum, &bitnum);
1220     }
1221   else
1222     best_mode = op0_mode.require ();
1223
1224   store_fixed_bit_field_1 (op0, best_mode, bitsize, bitnum,
1225                            value, value_mode, reverse);
1226 }
1227
1228 /* Helper function for store_fixed_bit_field, stores
1229    the bit field always using MODE, which is the mode of OP0.  The other
1230    arguments are as for store_fixed_bit_field.  */
1231
1232 static void
1233 store_fixed_bit_field_1 (rtx op0, scalar_int_mode mode,
1234                          unsigned HOST_WIDE_INT bitsize,
1235                          unsigned HOST_WIDE_INT bitnum,
1236                          rtx value, scalar_int_mode value_mode, bool reverse)
1237 {
1238   rtx temp;
1239   int all_zero = 0;
1240   int all_one = 0;
1241
1242   /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode)
1243      for invalid input, such as f5 from gcc.dg/pr48335-2.c.  */
1244
1245   if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
1246     /* BITNUM is the distance between our msb
1247        and that of the containing datum.
1248        Convert it to the distance from the lsb.  */
1249     bitnum = GET_MODE_BITSIZE (mode) - bitsize - bitnum;
1250
1251   /* Now BITNUM is always the distance between our lsb
1252      and that of OP0.  */
1253
1254   /* Shift VALUE left by BITNUM bits.  If VALUE is not constant,
1255      we must first convert its mode to MODE.  */
1256
1257   if (CONST_INT_P (value))
1258     {
1259       unsigned HOST_WIDE_INT v = UINTVAL (value);
1260
1261       if (bitsize < HOST_BITS_PER_WIDE_INT)
1262         v &= (HOST_WIDE_INT_1U << bitsize) - 1;
1263
1264       if (v == 0)
1265         all_zero = 1;
1266       else if ((bitsize < HOST_BITS_PER_WIDE_INT
1267                 && v == (HOST_WIDE_INT_1U << bitsize) - 1)
1268                || (bitsize == HOST_BITS_PER_WIDE_INT
1269                    && v == HOST_WIDE_INT_M1U))
1270         all_one = 1;
1271
1272       value = lshift_value (mode, v, bitnum);
1273     }
1274   else
1275     {
1276       int must_and = (GET_MODE_BITSIZE (value_mode) != bitsize
1277                       && bitnum + bitsize != GET_MODE_BITSIZE (mode));
1278
1279       if (value_mode != mode)
1280         value = convert_to_mode (mode, value, 1);
1281
1282       if (must_and)
1283         value = expand_binop (mode, and_optab, value,
1284                               mask_rtx (mode, 0, bitsize, 0),
1285                               NULL_RTX, 1, OPTAB_LIB_WIDEN);
1286       if (bitnum > 0)
1287         value = expand_shift (LSHIFT_EXPR, mode, value,
1288                               bitnum, NULL_RTX, 1);
1289     }
1290
1291   if (reverse)
1292     value = flip_storage_order (mode, value);
1293
1294   /* Now clear the chosen bits in OP0,
1295      except that if VALUE is -1 we need not bother.  */
1296   /* We keep the intermediates in registers to allow CSE to combine
1297      consecutive bitfield assignments.  */
1298
1299   temp = force_reg (mode, op0);
1300
1301   if (! all_one)
1302     {
1303       rtx mask = mask_rtx (mode, bitnum, bitsize, 1);
1304       if (reverse)
1305         mask = flip_storage_order (mode, mask);
1306       temp = expand_binop (mode, and_optab, temp, mask,
1307                            NULL_RTX, 1, OPTAB_LIB_WIDEN);
1308       temp = force_reg (mode, temp);
1309     }
1310
1311   /* Now logical-or VALUE into OP0, unless it is zero.  */
1312
1313   if (! all_zero)
1314     {
1315       temp = expand_binop (mode, ior_optab, temp, value,
1316                            NULL_RTX, 1, OPTAB_LIB_WIDEN);
1317       temp = force_reg (mode, temp);
1318     }
1319
1320   if (op0 != temp)
1321     {
1322       op0 = copy_rtx (op0);
1323       emit_move_insn (op0, temp);
1324     }
1325 }
1326 \f
1327 /* Store a bit field that is split across multiple accessible memory objects.
1328
1329    OP0 is the REG, SUBREG or MEM rtx for the first of the objects.
1330    BITSIZE is the field width; BITPOS the position of its first bit
1331    (within the word).
1332    VALUE is the value to store, which has mode VALUE_MODE.
1333    If OP0_MODE is defined, it is the mode of OP0, otherwise OP0 is
1334    a BLKmode MEM.
1335
1336    If REVERSE is true, the store is to be done in reverse order.
1337
1338    This does not yet handle fields wider than BITS_PER_WORD.  */
1339
1340 static void
1341 store_split_bit_field (rtx op0, opt_scalar_int_mode op0_mode,
1342                        unsigned HOST_WIDE_INT bitsize,
1343                        unsigned HOST_WIDE_INT bitpos,
1344                        poly_uint64 bitregion_start, poly_uint64 bitregion_end,
1345                        rtx value, scalar_int_mode value_mode, bool reverse)
1346 {
1347   unsigned int unit, total_bits, bitsdone = 0;
1348
1349   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1350      much at a time.  */
1351   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
1352     unit = BITS_PER_WORD;
1353   else
1354     unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1355
1356   /* If OP0 is a memory with a mode, then UNIT must not be larger than
1357      OP0's mode as well.  Otherwise, store_fixed_bit_field will call us
1358      again, and we will mutually recurse forever.  */
1359   if (MEM_P (op0) && op0_mode.exists ())
1360     unit = MIN (unit, GET_MODE_BITSIZE (op0_mode.require ()));
1361
1362   /* If VALUE is a constant other than a CONST_INT, get it into a register in
1363      WORD_MODE.  If we can do this using gen_lowpart_common, do so.  Note
1364      that VALUE might be a floating-point constant.  */
1365   if (CONSTANT_P (value) && !CONST_INT_P (value))
1366     {
1367       rtx word = gen_lowpart_common (word_mode, value);
1368
1369       if (word && (value != word))
1370         value = word;
1371       else
1372         value = gen_lowpart_common (word_mode, force_reg (value_mode, value));
1373       value_mode = word_mode;
1374     }
1375
1376   total_bits = GET_MODE_BITSIZE (value_mode);
1377
1378   while (bitsdone < bitsize)
1379     {
1380       unsigned HOST_WIDE_INT thissize;
1381       unsigned HOST_WIDE_INT thispos;
1382       unsigned HOST_WIDE_INT offset;
1383       rtx part;
1384
1385       offset = (bitpos + bitsdone) / unit;
1386       thispos = (bitpos + bitsdone) % unit;
1387
1388       /* When region of bytes we can touch is restricted, decrease
1389          UNIT close to the end of the region as needed.  If op0 is a REG
1390          or SUBREG of REG, don't do this, as there can't be data races
1391          on a register and we can expand shorter code in some cases.  */
1392       if (maybe_ne (bitregion_end, 0U)
1393           && unit > BITS_PER_UNIT
1394           && maybe_gt (bitpos + bitsdone - thispos + unit, bitregion_end + 1)
1395           && !REG_P (op0)
1396           && (GET_CODE (op0) != SUBREG || !REG_P (SUBREG_REG (op0))))
1397         {
1398           unit = unit / 2;
1399           continue;
1400         }
1401
1402       /* THISSIZE must not overrun a word boundary.  Otherwise,
1403          store_fixed_bit_field will call us again, and we will mutually
1404          recurse forever.  */
1405       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1406       thissize = MIN (thissize, unit - thispos);
1407
1408       if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
1409         {
1410           /* Fetch successively less significant portions.  */
1411           if (CONST_INT_P (value))
1412             part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1413                              >> (bitsize - bitsdone - thissize))
1414                             & ((HOST_WIDE_INT_1 << thissize) - 1));
1415           /* Likewise, but the source is little-endian.  */
1416           else if (reverse)
1417             part = extract_fixed_bit_field (word_mode, value, value_mode,
1418                                             thissize,
1419                                             bitsize - bitsdone - thissize,
1420                                             NULL_RTX, 1, false);
1421           else
1422             /* The args are chosen so that the last part includes the
1423                lsb.  Give extract_bit_field the value it needs (with
1424                endianness compensation) to fetch the piece we want.  */
1425             part = extract_fixed_bit_field (word_mode, value, value_mode,
1426                                             thissize,
1427                                             total_bits - bitsize + bitsdone,
1428                                             NULL_RTX, 1, false);
1429         }
1430       else
1431         {
1432           /* Fetch successively more significant portions.  */
1433           if (CONST_INT_P (value))
1434             part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1435                              >> bitsdone)
1436                             & ((HOST_WIDE_INT_1 << thissize) - 1));
1437           /* Likewise, but the source is big-endian.  */
1438           else if (reverse)
1439             part = extract_fixed_bit_field (word_mode, value, value_mode,
1440                                             thissize,
1441                                             total_bits - bitsdone - thissize,
1442                                             NULL_RTX, 1, false);
1443           else
1444             part = extract_fixed_bit_field (word_mode, value, value_mode,
1445                                             thissize, bitsdone, NULL_RTX,
1446                                             1, false);
1447         }
1448
1449       /* If OP0 is a register, then handle OFFSET here.  */
1450       rtx op0_piece = op0;
1451       opt_scalar_int_mode op0_piece_mode = op0_mode;
1452       if (SUBREG_P (op0) || REG_P (op0))
1453         {
1454           scalar_int_mode imode;
1455           if (op0_mode.exists (&imode)
1456               && GET_MODE_SIZE (imode) < UNITS_PER_WORD)
1457             {
1458               if (offset)
1459                 op0_piece = const0_rtx;
1460             }
1461           else
1462             {
1463               op0_piece = operand_subword_force (op0,
1464                                                  offset * unit / BITS_PER_WORD,
1465                                                  GET_MODE (op0));
1466               op0_piece_mode = word_mode;
1467             }
1468           offset &= BITS_PER_WORD / unit - 1;
1469         }
1470
1471       /* OFFSET is in UNITs, and UNIT is in bits.  If WORD is const0_rtx,
1472          it is just an out-of-bounds access.  Ignore it.  */
1473       if (op0_piece != const0_rtx)
1474         store_fixed_bit_field (op0_piece, op0_piece_mode, thissize,
1475                                offset * unit + thispos, bitregion_start,
1476                                bitregion_end, part, word_mode, reverse);
1477       bitsdone += thissize;
1478     }
1479 }
1480 \f
1481 /* A subroutine of extract_bit_field_1 that converts return value X
1482    to either MODE or TMODE.  MODE, TMODE and UNSIGNEDP are arguments
1483    to extract_bit_field.  */
1484
1485 static rtx
1486 convert_extracted_bit_field (rtx x, machine_mode mode,
1487                              machine_mode tmode, bool unsignedp)
1488 {
1489   if (GET_MODE (x) == tmode || GET_MODE (x) == mode)
1490     return x;
1491
1492   /* If the x mode is not a scalar integral, first convert to the
1493      integer mode of that size and then access it as a floating-point
1494      value via a SUBREG.  */
1495   if (!SCALAR_INT_MODE_P (tmode))
1496     {
1497       scalar_int_mode int_mode = int_mode_for_mode (tmode).require ();
1498       x = convert_to_mode (int_mode, x, unsignedp);
1499       x = force_reg (int_mode, x);
1500       return gen_lowpart (tmode, x);
1501     }
1502
1503   return convert_to_mode (tmode, x, unsignedp);
1504 }
1505
1506 /* Try to use an ext(z)v pattern to extract a field from OP0.
1507    Return the extracted value on success, otherwise return null.
1508    EXTV describes the extraction instruction to use.  If OP0_MODE
1509    is defined, it is the mode of OP0, otherwise OP0 is a BLKmode MEM.
1510    The other arguments are as for extract_bit_field.  */
1511
1512 static rtx
1513 extract_bit_field_using_extv (const extraction_insn *extv, rtx op0,
1514                               opt_scalar_int_mode op0_mode,
1515                               unsigned HOST_WIDE_INT bitsize,
1516                               unsigned HOST_WIDE_INT bitnum,
1517                               int unsignedp, rtx target,
1518                               machine_mode mode, machine_mode tmode)
1519 {
1520   class expand_operand ops[4];
1521   rtx spec_target = target;
1522   rtx spec_target_subreg = 0;
1523   scalar_int_mode ext_mode = extv->field_mode;
1524   unsigned unit = GET_MODE_BITSIZE (ext_mode);
1525
1526   if (bitsize == 0 || unit < bitsize)
1527     return NULL_RTX;
1528
1529   if (MEM_P (op0))
1530     /* Get a reference to the first byte of the field.  */
1531     op0 = narrow_bit_field_mem (op0, extv->struct_mode, bitsize, bitnum,
1532                                 &bitnum);
1533   else
1534     {
1535       /* Convert from counting within OP0 to counting in EXT_MODE.  */
1536       if (BYTES_BIG_ENDIAN)
1537         bitnum += unit - GET_MODE_BITSIZE (op0_mode.require ());
1538
1539       /* If op0 is a register, we need it in EXT_MODE to make it
1540          acceptable to the format of ext(z)v.  */
1541       if (GET_CODE (op0) == SUBREG && op0_mode.require () != ext_mode)
1542         return NULL_RTX;
1543       if (REG_P (op0) && op0_mode.require () != ext_mode)
1544         op0 = gen_lowpart_SUBREG (ext_mode, op0);
1545     }
1546
1547   /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
1548      "backwards" from the size of the unit we are extracting from.
1549      Otherwise, we count bits from the most significant on a
1550      BYTES/BITS_BIG_ENDIAN machine.  */
1551
1552   if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1553     bitnum = unit - bitsize - bitnum;
1554
1555   if (target == 0)
1556     target = spec_target = gen_reg_rtx (tmode);
1557
1558   if (GET_MODE (target) != ext_mode)
1559     {
1560       /* Don't use LHS paradoxical subreg if explicit truncation is needed
1561          between the mode of the extraction (word_mode) and the target
1562          mode.  Instead, create a temporary and use convert_move to set
1563          the target.  */
1564       if (REG_P (target)
1565           && TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (target), ext_mode))
1566         {
1567           target = gen_lowpart (ext_mode, target);
1568           if (partial_subreg_p (GET_MODE (spec_target), ext_mode))
1569             spec_target_subreg = target;
1570         }
1571       else
1572         target = gen_reg_rtx (ext_mode);
1573     }
1574
1575   create_output_operand (&ops[0], target, ext_mode);
1576   create_fixed_operand (&ops[1], op0);
1577   create_integer_operand (&ops[2], bitsize);
1578   create_integer_operand (&ops[3], bitnum);
1579   if (maybe_expand_insn (extv->icode, 4, ops))
1580     {
1581       target = ops[0].value;
1582       if (target == spec_target)
1583         return target;
1584       if (target == spec_target_subreg)
1585         return spec_target;
1586       return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1587     }
1588   return NULL_RTX;
1589 }
1590
1591 /* See whether it would be valid to extract the part of OP0 described
1592    by BITNUM and BITSIZE into a value of mode MODE using a subreg
1593    operation.  Return the subreg if so, otherwise return null.  */
1594
1595 static rtx
1596 extract_bit_field_as_subreg (machine_mode mode, rtx op0,
1597                              poly_uint64 bitsize, poly_uint64 bitnum)
1598 {
1599   poly_uint64 bytenum;
1600   if (multiple_p (bitnum, BITS_PER_UNIT, &bytenum)
1601       && known_eq (bitsize, GET_MODE_BITSIZE (mode))
1602       && lowpart_bit_field_p (bitnum, bitsize, GET_MODE (op0))
1603       && TRULY_NOOP_TRUNCATION_MODES_P (mode, GET_MODE (op0)))
1604     return simplify_gen_subreg (mode, op0, GET_MODE (op0), bytenum);
1605   return NULL_RTX;
1606 }
1607
1608 /* A subroutine of extract_bit_field, with the same arguments.
1609    If FALLBACK_P is true, fall back to extract_fixed_bit_field
1610    if we can find no other means of implementing the operation.
1611    if FALLBACK_P is false, return NULL instead.  */
1612
1613 static rtx
1614 extract_bit_field_1 (rtx str_rtx, poly_uint64 bitsize, poly_uint64 bitnum,
1615                      int unsignedp, rtx target, machine_mode mode,
1616                      machine_mode tmode, bool reverse, bool fallback_p,
1617                      rtx *alt_rtl)
1618 {
1619   rtx op0 = str_rtx;
1620   machine_mode mode1;
1621
1622   if (tmode == VOIDmode)
1623     tmode = mode;
1624
1625   while (GET_CODE (op0) == SUBREG)
1626     {
1627       bitnum += SUBREG_BYTE (op0) * BITS_PER_UNIT;
1628       op0 = SUBREG_REG (op0);
1629     }
1630
1631   /* If we have an out-of-bounds access to a register, just return an
1632      uninitialized register of the required mode.  This can occur if the
1633      source code contains an out-of-bounds access to a small array.  */
1634   if (REG_P (op0) && known_ge (bitnum, GET_MODE_BITSIZE (GET_MODE (op0))))
1635     return gen_reg_rtx (tmode);
1636
1637   if (REG_P (op0)
1638       && mode == GET_MODE (op0)
1639       && known_eq (bitnum, 0U)
1640       && known_eq (bitsize, GET_MODE_BITSIZE (GET_MODE (op0))))
1641     {
1642       if (reverse)
1643         op0 = flip_storage_order (mode, op0);
1644       /* We're trying to extract a full register from itself.  */
1645       return op0;
1646     }
1647
1648   /* First try to check for vector from vector extractions.  */
1649   if (VECTOR_MODE_P (GET_MODE (op0))
1650       && !MEM_P (op0)
1651       && VECTOR_MODE_P (tmode)
1652       && known_eq (bitsize, GET_MODE_BITSIZE (tmode))
1653       && maybe_gt (GET_MODE_SIZE (GET_MODE (op0)), GET_MODE_SIZE (tmode)))
1654     {
1655       machine_mode new_mode = GET_MODE (op0);
1656       if (GET_MODE_INNER (new_mode) != GET_MODE_INNER (tmode))
1657         {
1658           scalar_mode inner_mode = GET_MODE_INNER (tmode);
1659           poly_uint64 nunits;
1660           if (!multiple_p (GET_MODE_BITSIZE (GET_MODE (op0)),
1661                            GET_MODE_UNIT_BITSIZE (tmode), &nunits)
1662               || !related_vector_mode (tmode, inner_mode,
1663                                        nunits).exists (&new_mode)
1664               || maybe_ne (GET_MODE_SIZE (new_mode),
1665                            GET_MODE_SIZE (GET_MODE (op0))))
1666             new_mode = VOIDmode;
1667         }
1668       poly_uint64 pos;
1669       if (new_mode != VOIDmode
1670           && (convert_optab_handler (vec_extract_optab, new_mode, tmode)
1671               != CODE_FOR_nothing)
1672           && multiple_p (bitnum, GET_MODE_BITSIZE (tmode), &pos))
1673         {
1674           class expand_operand ops[3];
1675           machine_mode outermode = new_mode;
1676           machine_mode innermode = tmode;
1677           enum insn_code icode
1678             = convert_optab_handler (vec_extract_optab, outermode, innermode);
1679
1680           if (new_mode != GET_MODE (op0))
1681             op0 = gen_lowpart (new_mode, op0);
1682           create_output_operand (&ops[0], target, innermode);
1683           ops[0].target = 1;
1684           create_input_operand (&ops[1], op0, outermode);
1685           create_integer_operand (&ops[2], pos);
1686           if (maybe_expand_insn (icode, 3, ops))
1687             {
1688               if (alt_rtl && ops[0].target)
1689                 *alt_rtl = target;
1690               target = ops[0].value;
1691               if (GET_MODE (target) != mode)
1692                 return gen_lowpart (tmode, target);
1693               return target;
1694             }
1695         }
1696     }
1697
1698   /* See if we can get a better vector mode before extracting.  */
1699   if (VECTOR_MODE_P (GET_MODE (op0))
1700       && !MEM_P (op0)
1701       && GET_MODE_INNER (GET_MODE (op0)) != tmode)
1702     {
1703       machine_mode new_mode;
1704
1705       if (GET_MODE_CLASS (tmode) == MODE_FLOAT)
1706         new_mode = MIN_MODE_VECTOR_FLOAT;
1707       else if (GET_MODE_CLASS (tmode) == MODE_FRACT)
1708         new_mode = MIN_MODE_VECTOR_FRACT;
1709       else if (GET_MODE_CLASS (tmode) == MODE_UFRACT)
1710         new_mode = MIN_MODE_VECTOR_UFRACT;
1711       else if (GET_MODE_CLASS (tmode) == MODE_ACCUM)
1712         new_mode = MIN_MODE_VECTOR_ACCUM;
1713       else if (GET_MODE_CLASS (tmode) == MODE_UACCUM)
1714         new_mode = MIN_MODE_VECTOR_UACCUM;
1715       else
1716         new_mode = MIN_MODE_VECTOR_INT;
1717
1718       FOR_EACH_MODE_FROM (new_mode, new_mode)
1719         if (known_eq (GET_MODE_SIZE (new_mode), GET_MODE_SIZE (GET_MODE (op0)))
1720             && known_eq (GET_MODE_UNIT_SIZE (new_mode), GET_MODE_SIZE (tmode))
1721             && targetm.vector_mode_supported_p (new_mode))
1722           break;
1723       if (new_mode != VOIDmode)
1724         op0 = gen_lowpart (new_mode, op0);
1725     }
1726
1727   /* Use vec_extract patterns for extracting parts of vectors whenever
1728      available.  If that fails, see whether the current modes and bitregion
1729      give a natural subreg.  */
1730   machine_mode outermode = GET_MODE (op0);
1731   if (VECTOR_MODE_P (outermode) && !MEM_P (op0))
1732     {
1733       scalar_mode innermode = GET_MODE_INNER (outermode);
1734       enum insn_code icode
1735         = convert_optab_handler (vec_extract_optab, outermode, innermode);
1736       poly_uint64 pos;
1737       if (icode != CODE_FOR_nothing
1738           && known_eq (bitsize, GET_MODE_BITSIZE (innermode))
1739           && multiple_p (bitnum, GET_MODE_BITSIZE (innermode), &pos))
1740         {
1741           class expand_operand ops[3];
1742
1743           create_output_operand (&ops[0], target, innermode);
1744           ops[0].target = 1;
1745           create_input_operand (&ops[1], op0, outermode);
1746           create_integer_operand (&ops[2], pos);
1747           if (maybe_expand_insn (icode, 3, ops))
1748             {
1749               if (alt_rtl && ops[0].target)
1750                 *alt_rtl = target;
1751               target = ops[0].value;
1752               if (GET_MODE (target) != mode)
1753                 return gen_lowpart (tmode, target);
1754               return target;
1755             }
1756         }
1757       /* Using subregs is useful if we're extracting one register vector
1758          from a multi-register vector.  extract_bit_field_as_subreg checks
1759          for valid bitsize and bitnum, so we don't need to do that here.  */
1760       if (VECTOR_MODE_P (mode))
1761         {
1762           rtx sub = extract_bit_field_as_subreg (mode, op0, bitsize, bitnum);
1763           if (sub)
1764             return sub;
1765         }
1766     }
1767
1768   /* Make sure we are playing with integral modes.  Pun with subregs
1769      if we aren't.  */
1770   opt_scalar_int_mode op0_mode = int_mode_for_mode (GET_MODE (op0));
1771   scalar_int_mode imode;
1772   if (!op0_mode.exists (&imode) || imode != GET_MODE (op0))
1773     {
1774       if (MEM_P (op0))
1775         op0 = adjust_bitfield_address_size (op0, op0_mode.else_blk (),
1776                                             0, MEM_SIZE (op0));
1777       else if (op0_mode.exists (&imode))
1778         {
1779           op0 = gen_lowpart (imode, op0);
1780
1781           /* If we got a SUBREG, force it into a register since we
1782              aren't going to be able to do another SUBREG on it.  */
1783           if (GET_CODE (op0) == SUBREG)
1784             op0 = force_reg (imode, op0);
1785         }
1786       else
1787         {
1788           poly_int64 size = GET_MODE_SIZE (GET_MODE (op0));
1789           rtx mem = assign_stack_temp (GET_MODE (op0), size);
1790           emit_move_insn (mem, op0);
1791           op0 = adjust_bitfield_address_size (mem, BLKmode, 0, size);
1792         }
1793     }
1794
1795   /* ??? We currently assume TARGET is at least as big as BITSIZE.
1796      If that's wrong, the solution is to test for it and set TARGET to 0
1797      if needed.  */
1798
1799   /* Get the mode of the field to use for atomic access or subreg
1800      conversion.  */
1801   if (!SCALAR_INT_MODE_P (tmode)
1802       || !mode_for_size (bitsize, GET_MODE_CLASS (tmode), 0).exists (&mode1))
1803     mode1 = mode;
1804   gcc_assert (mode1 != BLKmode);
1805
1806   /* Extraction of a full MODE1 value can be done with a subreg as long
1807      as the least significant bit of the value is the least significant
1808      bit of either OP0 or a word of OP0.  */
1809   if (!MEM_P (op0) && !reverse)
1810     {
1811       rtx sub = extract_bit_field_as_subreg (mode1, op0, bitsize, bitnum);
1812       if (sub)
1813         return convert_extracted_bit_field (sub, mode, tmode, unsignedp);
1814     }
1815
1816   /* Extraction of a full MODE1 value can be done with a load as long as
1817      the field is on a byte boundary and is sufficiently aligned.  */
1818   poly_uint64 bytenum;
1819   if (simple_mem_bitfield_p (op0, bitsize, bitnum, mode1, &bytenum))
1820     {
1821       op0 = adjust_bitfield_address (op0, mode1, bytenum);
1822       if (reverse)
1823         op0 = flip_storage_order (mode1, op0);
1824       return convert_extracted_bit_field (op0, mode, tmode, unsignedp);
1825     }
1826
1827   /* If we have a memory source and a non-constant bit offset, restrict
1828      the memory to the referenced bytes.  This is a worst-case fallback
1829      but is useful for things like vector booleans.  */
1830   if (MEM_P (op0) && !bitnum.is_constant ())
1831     {
1832       bytenum = bits_to_bytes_round_down (bitnum);
1833       bitnum = num_trailing_bits (bitnum);
1834       poly_uint64 bytesize = bits_to_bytes_round_up (bitnum + bitsize);
1835       op0 = adjust_bitfield_address_size (op0, BLKmode, bytenum, bytesize);
1836       op0_mode = opt_scalar_int_mode ();
1837     }
1838
1839   /* It's possible we'll need to handle other cases here for
1840      polynomial bitnum and bitsize.  */
1841
1842   /* From here on we need to be looking at a fixed-size insertion.  */
1843   return extract_integral_bit_field (op0, op0_mode, bitsize.to_constant (),
1844                                      bitnum.to_constant (), unsignedp,
1845                                      target, mode, tmode, reverse, fallback_p);
1846 }
1847
1848 /* Subroutine of extract_bit_field_1, with the same arguments, except
1849    that BITSIZE and BITNUM are constant.  Handle cases specific to
1850    integral modes.  If OP0_MODE is defined, it is the mode of OP0,
1851    otherwise OP0 is a BLKmode MEM.  */
1852
1853 static rtx
1854 extract_integral_bit_field (rtx op0, opt_scalar_int_mode op0_mode,
1855                             unsigned HOST_WIDE_INT bitsize,
1856                             unsigned HOST_WIDE_INT bitnum, int unsignedp,
1857                             rtx target, machine_mode mode, machine_mode tmode,
1858                             bool reverse, bool fallback_p)
1859 {
1860   /* Handle fields bigger than a word.  */
1861
1862   if (bitsize > BITS_PER_WORD)
1863     {
1864       /* Here we transfer the words of the field
1865          in the order least significant first.
1866          This is because the most significant word is the one which may
1867          be less than full.  */
1868
1869       const bool backwards = WORDS_BIG_ENDIAN;
1870       unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
1871       unsigned int i;
1872       rtx_insn *last;
1873
1874       if (target == 0 || !REG_P (target) || !valid_multiword_target_p (target))
1875         target = gen_reg_rtx (mode);
1876
1877       /* In case we're about to clobber a base register or something
1878          (see gcc.c-torture/execute/20040625-1.c).   */
1879       if (reg_mentioned_p (target, op0))
1880         target = gen_reg_rtx (mode);
1881
1882       /* Indicate for flow that the entire target reg is being set.  */
1883       emit_clobber (target);
1884
1885       /* The mode must be fixed-size, since extract_bit_field_1 handles
1886          extractions from variable-sized objects before calling this
1887          function.  */
1888       unsigned int target_size
1889         = GET_MODE_SIZE (GET_MODE (target)).to_constant ();
1890       last = get_last_insn ();
1891       for (i = 0; i < nwords; i++)
1892         {
1893           /* If I is 0, use the low-order word in both field and target;
1894              if I is 1, use the next to lowest word; and so on.  */
1895           /* Word number in TARGET to use.  */
1896           unsigned int wordnum
1897             = (backwards ? target_size / UNITS_PER_WORD - i - 1 : i);
1898           /* Offset from start of field in OP0.  */
1899           unsigned int bit_offset = (backwards ^ reverse
1900                                      ? MAX ((int) bitsize - ((int) i + 1)
1901                                             * BITS_PER_WORD,
1902                                             0)
1903                                      : (int) i * BITS_PER_WORD);
1904           rtx target_part = operand_subword (target, wordnum, 1, VOIDmode);
1905           rtx result_part
1906             = extract_bit_field_1 (op0, MIN (BITS_PER_WORD,
1907                                              bitsize - i * BITS_PER_WORD),
1908                                    bitnum + bit_offset, 1, target_part,
1909                                    mode, word_mode, reverse, fallback_p, NULL);
1910
1911           gcc_assert (target_part);
1912           if (!result_part)
1913             {
1914               delete_insns_since (last);
1915               return NULL;
1916             }
1917
1918           if (result_part != target_part)
1919             emit_move_insn (target_part, result_part);
1920         }
1921
1922       if (unsignedp)
1923         {
1924           /* Unless we've filled TARGET, the upper regs in a multi-reg value
1925              need to be zero'd out.  */
1926           if (target_size > nwords * UNITS_PER_WORD)
1927             {
1928               unsigned int i, total_words;
1929
1930               total_words = target_size / UNITS_PER_WORD;
1931               for (i = nwords; i < total_words; i++)
1932                 emit_move_insn
1933                   (operand_subword (target,
1934                                     backwards ? total_words - i - 1 : i,
1935                                     1, VOIDmode),
1936                    const0_rtx);
1937             }
1938           return target;
1939         }
1940
1941       /* Signed bit field: sign-extend with two arithmetic shifts.  */
1942       target = expand_shift (LSHIFT_EXPR, mode, target,
1943                              GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1944       return expand_shift (RSHIFT_EXPR, mode, target,
1945                            GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1946     }
1947
1948   /* If OP0 is a multi-word register, narrow it to the affected word.
1949      If the region spans two words, defer to extract_split_bit_field.  */
1950   if (!MEM_P (op0) && GET_MODE_SIZE (op0_mode.require ()) > UNITS_PER_WORD)
1951     {
1952       if (bitnum % BITS_PER_WORD + bitsize > BITS_PER_WORD)
1953         {
1954           if (!fallback_p)
1955             return NULL_RTX;
1956           target = extract_split_bit_field (op0, op0_mode, bitsize, bitnum,
1957                                             unsignedp, reverse);
1958           return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1959         }
1960       op0 = simplify_gen_subreg (word_mode, op0, op0_mode.require (),
1961                                  bitnum / BITS_PER_WORD * UNITS_PER_WORD);
1962       op0_mode = word_mode;
1963       bitnum %= BITS_PER_WORD;
1964     }
1965
1966   /* From here on we know the desired field is smaller than a word.
1967      If OP0 is a register, it too fits within a word.  */
1968   enum extraction_pattern pattern = unsignedp ? EP_extzv : EP_extv;
1969   extraction_insn extv;
1970   if (!MEM_P (op0)
1971       && !reverse
1972       /* ??? We could limit the structure size to the part of OP0 that
1973          contains the field, with appropriate checks for endianness
1974          and TARGET_TRULY_NOOP_TRUNCATION.  */
1975       && get_best_reg_extraction_insn (&extv, pattern,
1976                                        GET_MODE_BITSIZE (op0_mode.require ()),
1977                                        tmode))
1978     {
1979       rtx result = extract_bit_field_using_extv (&extv, op0, op0_mode,
1980                                                  bitsize, bitnum,
1981                                                  unsignedp, target, mode,
1982                                                  tmode);
1983       if (result)
1984         return result;
1985     }
1986
1987   /* If OP0 is a memory, try copying it to a register and seeing if a
1988      cheap register alternative is available.  */
1989   if (MEM_P (op0) & !reverse)
1990     {
1991       if (get_best_mem_extraction_insn (&extv, pattern, bitsize, bitnum,
1992                                         tmode))
1993         {
1994           rtx result = extract_bit_field_using_extv (&extv, op0, op0_mode,
1995                                                      bitsize, bitnum,
1996                                                      unsignedp, target, mode,
1997                                                      tmode);
1998           if (result)
1999             return result;
2000         }
2001
2002       rtx_insn *last = get_last_insn ();
2003
2004       /* Try loading part of OP0 into a register and extracting the
2005          bitfield from that.  */
2006       unsigned HOST_WIDE_INT bitpos;
2007       rtx xop0 = adjust_bit_field_mem_for_reg (pattern, op0, bitsize, bitnum,
2008                                                0, 0, tmode, &bitpos);
2009       if (xop0)
2010         {
2011           xop0 = copy_to_reg (xop0);
2012           rtx result = extract_bit_field_1 (xop0, bitsize, bitpos,
2013                                             unsignedp, target,
2014                                             mode, tmode, reverse, false, NULL);
2015           if (result)
2016             return result;
2017           delete_insns_since (last);
2018         }
2019     }
2020
2021   if (!fallback_p)
2022     return NULL;
2023
2024   /* Find a correspondingly-sized integer field, so we can apply
2025      shifts and masks to it.  */
2026   scalar_int_mode int_mode;
2027   if (!int_mode_for_mode (tmode).exists (&int_mode))
2028     /* If this fails, we should probably push op0 out to memory and then
2029        do a load.  */
2030     int_mode = int_mode_for_mode (mode).require ();
2031
2032   target = extract_fixed_bit_field (int_mode, op0, op0_mode, bitsize,
2033                                     bitnum, target, unsignedp, reverse);
2034
2035   /* Complex values must be reversed piecewise, so we need to undo the global
2036      reversal, convert to the complex mode and reverse again.  */
2037   if (reverse && COMPLEX_MODE_P (tmode))
2038     {
2039       target = flip_storage_order (int_mode, target);
2040       target = convert_extracted_bit_field (target, mode, tmode, unsignedp);
2041       target = flip_storage_order (tmode, target);
2042     }
2043   else
2044     target = convert_extracted_bit_field (target, mode, tmode, unsignedp);
2045
2046   return target;
2047 }
2048
2049 /* Generate code to extract a byte-field from STR_RTX
2050    containing BITSIZE bits, starting at BITNUM,
2051    and put it in TARGET if possible (if TARGET is nonzero).
2052    Regardless of TARGET, we return the rtx for where the value is placed.
2053
2054    STR_RTX is the structure containing the byte (a REG or MEM).
2055    UNSIGNEDP is nonzero if this is an unsigned bit field.
2056    MODE is the natural mode of the field value once extracted.
2057    TMODE is the mode the caller would like the value to have;
2058    but the value may be returned with type MODE instead.
2059
2060    If REVERSE is true, the extraction is to be done in reverse order.
2061
2062    If a TARGET is specified and we can store in it at no extra cost,
2063    we do so, and return TARGET.
2064    Otherwise, we return a REG of mode TMODE or MODE, with TMODE preferred
2065    if they are equally easy.
2066
2067    If the result can be stored at TARGET, and ALT_RTL is non-NULL,
2068    then *ALT_RTL is set to TARGET (before legitimziation).  */
2069
2070 rtx
2071 extract_bit_field (rtx str_rtx, poly_uint64 bitsize, poly_uint64 bitnum,
2072                    int unsignedp, rtx target, machine_mode mode,
2073                    machine_mode tmode, bool reverse, rtx *alt_rtl)
2074 {
2075   machine_mode mode1;
2076
2077   /* Handle -fstrict-volatile-bitfields in the cases where it applies.  */
2078   if (maybe_ne (GET_MODE_BITSIZE (GET_MODE (str_rtx)), 0))
2079     mode1 = GET_MODE (str_rtx);
2080   else if (target && maybe_ne (GET_MODE_BITSIZE (GET_MODE (target)), 0))
2081     mode1 = GET_MODE (target);
2082   else
2083     mode1 = tmode;
2084
2085   unsigned HOST_WIDE_INT ibitsize, ibitnum;
2086   scalar_int_mode int_mode;
2087   if (bitsize.is_constant (&ibitsize)
2088       && bitnum.is_constant (&ibitnum)
2089       && is_a <scalar_int_mode> (mode1, &int_mode)
2090       && strict_volatile_bitfield_p (str_rtx, ibitsize, ibitnum,
2091                                      int_mode, 0, 0))
2092     {
2093       /* Extraction of a full INT_MODE value can be done with a simple load.
2094          We know here that the field can be accessed with one single
2095          instruction.  For targets that support unaligned memory,
2096          an unaligned access may be necessary.  */
2097       if (ibitsize == GET_MODE_BITSIZE (int_mode))
2098         {
2099           rtx result = adjust_bitfield_address (str_rtx, int_mode,
2100                                                 ibitnum / BITS_PER_UNIT);
2101           if (reverse)
2102             result = flip_storage_order (int_mode, result);
2103           gcc_assert (ibitnum % BITS_PER_UNIT == 0);
2104           return convert_extracted_bit_field (result, mode, tmode, unsignedp);
2105         }
2106
2107       str_rtx = narrow_bit_field_mem (str_rtx, int_mode, ibitsize, ibitnum,
2108                                       &ibitnum);
2109       gcc_assert (ibitnum + ibitsize <= GET_MODE_BITSIZE (int_mode));
2110       str_rtx = copy_to_reg (str_rtx);
2111       return extract_bit_field_1 (str_rtx, ibitsize, ibitnum, unsignedp,
2112                                   target, mode, tmode, reverse, true, alt_rtl);
2113     }
2114
2115   return extract_bit_field_1 (str_rtx, bitsize, bitnum, unsignedp,
2116                               target, mode, tmode, reverse, true, alt_rtl);
2117 }
2118 \f
2119 /* Use shifts and boolean operations to extract a field of BITSIZE bits
2120    from bit BITNUM of OP0.  If OP0_MODE is defined, it is the mode of OP0,
2121    otherwise OP0 is a BLKmode MEM.
2122
2123    UNSIGNEDP is nonzero for an unsigned bit field (don't sign-extend value).
2124    If REVERSE is true, the extraction is to be done in reverse order.
2125
2126    If TARGET is nonzero, attempts to store the value there
2127    and return TARGET, but this is not guaranteed.
2128    If TARGET is not used, create a pseudo-reg of mode TMODE for the value.  */
2129
2130 static rtx
2131 extract_fixed_bit_field (machine_mode tmode, rtx op0,
2132                          opt_scalar_int_mode op0_mode,
2133                          unsigned HOST_WIDE_INT bitsize,
2134                          unsigned HOST_WIDE_INT bitnum, rtx target,
2135                          int unsignedp, bool reverse)
2136 {
2137   scalar_int_mode mode;
2138   if (MEM_P (op0))
2139     {
2140       if (!get_best_mode (bitsize, bitnum, 0, 0, MEM_ALIGN (op0),
2141                           BITS_PER_WORD, MEM_VOLATILE_P (op0), &mode))
2142         /* The only way this should occur is if the field spans word
2143            boundaries.  */
2144         return extract_split_bit_field (op0, op0_mode, bitsize, bitnum,
2145                                         unsignedp, reverse);
2146
2147       op0 = narrow_bit_field_mem (op0, mode, bitsize, bitnum, &bitnum);
2148     }
2149   else
2150     mode = op0_mode.require ();
2151
2152   return extract_fixed_bit_field_1 (tmode, op0, mode, bitsize, bitnum,
2153                                     target, unsignedp, reverse);
2154 }
2155
2156 /* Helper function for extract_fixed_bit_field, extracts
2157    the bit field always using MODE, which is the mode of OP0.
2158    The other arguments are as for extract_fixed_bit_field.  */
2159
2160 static rtx
2161 extract_fixed_bit_field_1 (machine_mode tmode, rtx op0, scalar_int_mode mode,
2162                            unsigned HOST_WIDE_INT bitsize,
2163                            unsigned HOST_WIDE_INT bitnum, rtx target,
2164                            int unsignedp, bool reverse)
2165 {
2166   /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode)
2167      for invalid input, such as extract equivalent of f5 from
2168      gcc.dg/pr48335-2.c.  */
2169
2170   if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
2171     /* BITNUM is the distance between our msb and that of OP0.
2172        Convert it to the distance from the lsb.  */
2173     bitnum = GET_MODE_BITSIZE (mode) - bitsize - bitnum;
2174
2175   /* Now BITNUM is always the distance between the field's lsb and that of OP0.
2176      We have reduced the big-endian case to the little-endian case.  */
2177   if (reverse)
2178     op0 = flip_storage_order (mode, op0);
2179
2180   if (unsignedp)
2181     {
2182       if (bitnum)
2183         {
2184           /* If the field does not already start at the lsb,
2185              shift it so it does.  */
2186           /* Maybe propagate the target for the shift.  */
2187           rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
2188           if (tmode != mode)
2189             subtarget = 0;
2190           op0 = expand_shift (RSHIFT_EXPR, mode, op0, bitnum, subtarget, 1);
2191         }
2192       /* Convert the value to the desired mode.  TMODE must also be a
2193          scalar integer for this conversion to make sense, since we
2194          shouldn't reinterpret the bits.  */
2195       scalar_int_mode new_mode = as_a <scalar_int_mode> (tmode);
2196       if (mode != new_mode)
2197         op0 = convert_to_mode (new_mode, op0, 1);
2198
2199       /* Unless the msb of the field used to be the msb when we shifted,
2200          mask out the upper bits.  */
2201
2202       if (GET_MODE_BITSIZE (mode) != bitnum + bitsize)
2203         return expand_binop (new_mode, and_optab, op0,
2204                              mask_rtx (new_mode, 0, bitsize, 0),
2205                              target, 1, OPTAB_LIB_WIDEN);
2206       return op0;
2207     }
2208
2209   /* To extract a signed bit-field, first shift its msb to the msb of the word,
2210      then arithmetic-shift its lsb to the lsb of the word.  */
2211   op0 = force_reg (mode, op0);
2212
2213   /* Find the narrowest integer mode that contains the field.  */
2214
2215   opt_scalar_int_mode mode_iter;
2216   FOR_EACH_MODE_IN_CLASS (mode_iter, MODE_INT)
2217     if (GET_MODE_BITSIZE (mode_iter.require ()) >= bitsize + bitnum)
2218       break;
2219
2220   mode = mode_iter.require ();
2221   op0 = convert_to_mode (mode, op0, 0);
2222
2223   if (mode != tmode)
2224     target = 0;
2225
2226   if (GET_MODE_BITSIZE (mode) != (bitsize + bitnum))
2227     {
2228       int amount = GET_MODE_BITSIZE (mode) - (bitsize + bitnum);
2229       /* Maybe propagate the target for the shift.  */
2230       rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
2231       op0 = expand_shift (LSHIFT_EXPR, mode, op0, amount, subtarget, 1);
2232     }
2233
2234   return expand_shift (RSHIFT_EXPR, mode, op0,
2235                        GET_MODE_BITSIZE (mode) - bitsize, target, 0);
2236 }
2237
2238 /* Return a constant integer (CONST_INT or CONST_DOUBLE) rtx with the value
2239    VALUE << BITPOS.  */
2240
2241 static rtx
2242 lshift_value (machine_mode mode, unsigned HOST_WIDE_INT value,
2243               int bitpos)
2244 {
2245   return immed_wide_int_const (wi::lshift (value, bitpos), mode);
2246 }
2247 \f
2248 /* Extract a bit field that is split across two words
2249    and return an RTX for the result.
2250
2251    OP0 is the REG, SUBREG or MEM rtx for the first of the two words.
2252    BITSIZE is the field width; BITPOS, position of its first bit, in the word.
2253    UNSIGNEDP is 1 if should zero-extend the contents; else sign-extend.
2254    If OP0_MODE is defined, it is the mode of OP0, otherwise OP0 is
2255    a BLKmode MEM.
2256
2257    If REVERSE is true, the extraction is to be done in reverse order.  */
2258
2259 static rtx
2260 extract_split_bit_field (rtx op0, opt_scalar_int_mode op0_mode,
2261                          unsigned HOST_WIDE_INT bitsize,
2262                          unsigned HOST_WIDE_INT bitpos, int unsignedp,
2263                          bool reverse)
2264 {
2265   unsigned int unit;
2266   unsigned int bitsdone = 0;
2267   rtx result = NULL_RTX;
2268   int first = 1;
2269
2270   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
2271      much at a time.  */
2272   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
2273     unit = BITS_PER_WORD;
2274   else
2275     unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
2276
2277   while (bitsdone < bitsize)
2278     {
2279       unsigned HOST_WIDE_INT thissize;
2280       rtx part;
2281       unsigned HOST_WIDE_INT thispos;
2282       unsigned HOST_WIDE_INT offset;
2283
2284       offset = (bitpos + bitsdone) / unit;
2285       thispos = (bitpos + bitsdone) % unit;
2286
2287       /* THISSIZE must not overrun a word boundary.  Otherwise,
2288          extract_fixed_bit_field will call us again, and we will mutually
2289          recurse forever.  */
2290       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
2291       thissize = MIN (thissize, unit - thispos);
2292
2293       /* If OP0 is a register, then handle OFFSET here.  */
2294       rtx op0_piece = op0;
2295       opt_scalar_int_mode op0_piece_mode = op0_mode;
2296       if (SUBREG_P (op0) || REG_P (op0))
2297         {
2298           op0_piece = operand_subword_force (op0, offset, op0_mode.require ());
2299           op0_piece_mode = word_mode;
2300           offset = 0;
2301         }
2302
2303       /* Extract the parts in bit-counting order,
2304          whose meaning is determined by BYTES_PER_UNIT.
2305          OFFSET is in UNITs, and UNIT is in bits.  */
2306       part = extract_fixed_bit_field (word_mode, op0_piece, op0_piece_mode,
2307                                       thissize, offset * unit + thispos,
2308                                       0, 1, reverse);
2309       bitsdone += thissize;
2310
2311       /* Shift this part into place for the result.  */
2312       if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
2313         {
2314           if (bitsize != bitsdone)
2315             part = expand_shift (LSHIFT_EXPR, word_mode, part,
2316                                  bitsize - bitsdone, 0, 1);
2317         }
2318       else
2319         {
2320           if (bitsdone != thissize)
2321             part = expand_shift (LSHIFT_EXPR, word_mode, part,
2322                                  bitsdone - thissize, 0, 1);
2323         }
2324
2325       if (first)
2326         result = part;
2327       else
2328         /* Combine the parts with bitwise or.  This works
2329            because we extracted each part as an unsigned bit field.  */
2330         result = expand_binop (word_mode, ior_optab, part, result, NULL_RTX, 1,
2331                                OPTAB_LIB_WIDEN);
2332
2333       first = 0;
2334     }
2335
2336   /* Unsigned bit field: we are done.  */
2337   if (unsignedp)
2338     return result;
2339   /* Signed bit field: sign-extend with two arithmetic shifts.  */
2340   result = expand_shift (LSHIFT_EXPR, word_mode, result,
2341                          BITS_PER_WORD - bitsize, NULL_RTX, 0);
2342   return expand_shift (RSHIFT_EXPR, word_mode, result,
2343                        BITS_PER_WORD - bitsize, NULL_RTX, 0);
2344 }
2345 \f
2346 /* Try to read the low bits of SRC as an rvalue of mode MODE, preserving
2347    the bit pattern.  SRC_MODE is the mode of SRC; if this is smaller than
2348    MODE, fill the upper bits with zeros.  Fail if the layout of either
2349    mode is unknown (as for CC modes) or if the extraction would involve
2350    unprofitable mode punning.  Return the value on success, otherwise
2351    return null.
2352
2353    This is different from gen_lowpart* in these respects:
2354
2355      - the returned value must always be considered an rvalue
2356
2357      - when MODE is wider than SRC_MODE, the extraction involves
2358        a zero extension
2359
2360      - when MODE is smaller than SRC_MODE, the extraction involves
2361        a truncation (and is thus subject to TARGET_TRULY_NOOP_TRUNCATION).
2362
2363    In other words, this routine performs a computation, whereas the
2364    gen_lowpart* routines are conceptually lvalue or rvalue subreg
2365    operations.  */
2366
2367 rtx
2368 extract_low_bits (machine_mode mode, machine_mode src_mode, rtx src)
2369 {
2370   scalar_int_mode int_mode, src_int_mode;
2371
2372   if (mode == src_mode)
2373     return src;
2374
2375   if (CONSTANT_P (src))
2376     {
2377       /* simplify_gen_subreg can't be used here, as if simplify_subreg
2378          fails, it will happily create (subreg (symbol_ref)) or similar
2379          invalid SUBREGs.  */
2380       poly_uint64 byte = subreg_lowpart_offset (mode, src_mode);
2381       rtx ret = simplify_subreg (mode, src, src_mode, byte);
2382       if (ret)
2383         return ret;
2384
2385       if (GET_MODE (src) == VOIDmode
2386           || !validate_subreg (mode, src_mode, src, byte))
2387         return NULL_RTX;
2388
2389       src = force_reg (GET_MODE (src), src);
2390       return gen_rtx_SUBREG (mode, src, byte);
2391     }
2392
2393   if (GET_MODE_CLASS (mode) == MODE_CC || GET_MODE_CLASS (src_mode) == MODE_CC)
2394     return NULL_RTX;
2395
2396   if (known_eq (GET_MODE_BITSIZE (mode), GET_MODE_BITSIZE (src_mode))
2397       && targetm.modes_tieable_p (mode, src_mode))
2398     {
2399       rtx x = gen_lowpart_common (mode, src);
2400       if (x)
2401         return x;
2402     }
2403
2404   if (!int_mode_for_mode (src_mode).exists (&src_int_mode)
2405       || !int_mode_for_mode (mode).exists (&int_mode))
2406     return NULL_RTX;
2407
2408   if (!targetm.modes_tieable_p (src_int_mode, src_mode))
2409     return NULL_RTX;
2410   if (!targetm.modes_tieable_p (int_mode, mode))
2411     return NULL_RTX;
2412
2413   src = gen_lowpart (src_int_mode, src);
2414   if (!validate_subreg (int_mode, src_int_mode, src,
2415                         subreg_lowpart_offset (int_mode, src_int_mode)))
2416     return NULL_RTX;
2417
2418   src = convert_modes (int_mode, src_int_mode, src, true);
2419   src = gen_lowpart (mode, src);
2420   return src;
2421 }
2422 \f
2423 /* Add INC into TARGET.  */
2424
2425 void
2426 expand_inc (rtx target, rtx inc)
2427 {
2428   rtx value = expand_binop (GET_MODE (target), add_optab,
2429                             target, inc,
2430                             target, 0, OPTAB_LIB_WIDEN);
2431   if (value != target)
2432     emit_move_insn (target, value);
2433 }
2434
2435 /* Subtract DEC from TARGET.  */
2436
2437 void
2438 expand_dec (rtx target, rtx dec)
2439 {
2440   rtx value = expand_binop (GET_MODE (target), sub_optab,
2441                             target, dec,
2442                             target, 0, OPTAB_LIB_WIDEN);
2443   if (value != target)
2444     emit_move_insn (target, value);
2445 }
2446 \f
2447 /* Output a shift instruction for expression code CODE,
2448    with SHIFTED being the rtx for the value to shift,
2449    and AMOUNT the rtx for the amount to shift by.
2450    Store the result in the rtx TARGET, if that is convenient.
2451    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2452    Return the rtx for where the value is.
2453    If that cannot be done, abort the compilation unless MAY_FAIL is true,
2454    in which case 0 is returned.  */
2455
2456 static rtx
2457 expand_shift_1 (enum tree_code code, machine_mode mode, rtx shifted,
2458                 rtx amount, rtx target, int unsignedp, bool may_fail = false)
2459 {
2460   rtx op1, temp = 0;
2461   int left = (code == LSHIFT_EXPR || code == LROTATE_EXPR);
2462   int rotate = (code == LROTATE_EXPR || code == RROTATE_EXPR);
2463   optab lshift_optab = ashl_optab;
2464   optab rshift_arith_optab = ashr_optab;
2465   optab rshift_uns_optab = lshr_optab;
2466   optab lrotate_optab = rotl_optab;
2467   optab rrotate_optab = rotr_optab;
2468   machine_mode op1_mode;
2469   scalar_mode scalar_mode = GET_MODE_INNER (mode);
2470   int attempt;
2471   bool speed = optimize_insn_for_speed_p ();
2472
2473   op1 = amount;
2474   op1_mode = GET_MODE (op1);
2475
2476   /* Determine whether the shift/rotate amount is a vector, or scalar.  If the
2477      shift amount is a vector, use the vector/vector shift patterns.  */
2478   if (VECTOR_MODE_P (mode) && VECTOR_MODE_P (op1_mode))
2479     {
2480       lshift_optab = vashl_optab;
2481       rshift_arith_optab = vashr_optab;
2482       rshift_uns_optab = vlshr_optab;
2483       lrotate_optab = vrotl_optab;
2484       rrotate_optab = vrotr_optab;
2485     }
2486
2487   /* Previously detected shift-counts computed by NEGATE_EXPR
2488      and shifted in the other direction; but that does not work
2489      on all machines.  */
2490
2491   if (SHIFT_COUNT_TRUNCATED)
2492     {
2493       if (CONST_INT_P (op1)
2494           && ((unsigned HOST_WIDE_INT) INTVAL (op1) >=
2495               (unsigned HOST_WIDE_INT) GET_MODE_BITSIZE (scalar_mode)))
2496         op1 = gen_int_shift_amount (mode,
2497                                     (unsigned HOST_WIDE_INT) INTVAL (op1)
2498                                     % GET_MODE_BITSIZE (scalar_mode));
2499       else if (GET_CODE (op1) == SUBREG
2500                && subreg_lowpart_p (op1)
2501                && SCALAR_INT_MODE_P (GET_MODE (SUBREG_REG (op1)))
2502                && SCALAR_INT_MODE_P (GET_MODE (op1)))
2503         op1 = SUBREG_REG (op1);
2504     }
2505
2506   /* Canonicalize rotates by constant amount.  If op1 is bitsize / 2,
2507      prefer left rotation, if op1 is from bitsize / 2 + 1 to
2508      bitsize - 1, use other direction of rotate with 1 .. bitsize / 2 - 1
2509      amount instead.  */
2510   if (rotate
2511       && CONST_INT_P (op1)
2512       && IN_RANGE (INTVAL (op1), GET_MODE_BITSIZE (scalar_mode) / 2 + left,
2513                    GET_MODE_BITSIZE (scalar_mode) - 1))
2514     {
2515       op1 = gen_int_shift_amount (mode, (GET_MODE_BITSIZE (scalar_mode)
2516                                          - INTVAL (op1)));
2517       left = !left;
2518       code = left ? LROTATE_EXPR : RROTATE_EXPR;
2519     }
2520
2521   /* Rotation of 16bit values by 8 bits is effectively equivalent to a bswaphi.
2522      Note that this is not the case for bigger values.  For instance a rotation
2523      of 0x01020304 by 16 bits gives 0x03040102 which is different from
2524      0x04030201 (bswapsi).  */
2525   if (rotate
2526       && CONST_INT_P (op1)
2527       && INTVAL (op1) == BITS_PER_UNIT
2528       && GET_MODE_SIZE (scalar_mode) == 2
2529       && optab_handler (bswap_optab, mode) != CODE_FOR_nothing)
2530     return expand_unop (mode, bswap_optab, shifted, NULL_RTX, unsignedp);
2531
2532   if (op1 == const0_rtx)
2533     return shifted;
2534
2535   /* Check whether its cheaper to implement a left shift by a constant
2536      bit count by a sequence of additions.  */
2537   if (code == LSHIFT_EXPR
2538       && CONST_INT_P (op1)
2539       && INTVAL (op1) > 0
2540       && INTVAL (op1) < GET_MODE_PRECISION (scalar_mode)
2541       && INTVAL (op1) < MAX_BITS_PER_WORD
2542       && (shift_cost (speed, mode, INTVAL (op1))
2543           > INTVAL (op1) * add_cost (speed, mode))
2544       && shift_cost (speed, mode, INTVAL (op1)) != MAX_COST)
2545     {
2546       int i;
2547       for (i = 0; i < INTVAL (op1); i++)
2548         {
2549           temp = force_reg (mode, shifted);
2550           shifted = expand_binop (mode, add_optab, temp, temp, NULL_RTX,
2551                                   unsignedp, OPTAB_LIB_WIDEN);
2552         }
2553       return shifted;
2554     }
2555
2556   for (attempt = 0; temp == 0 && attempt < 3; attempt++)
2557     {
2558       enum optab_methods methods;
2559
2560       if (attempt == 0)
2561         methods = OPTAB_DIRECT;
2562       else if (attempt == 1)
2563         methods = OPTAB_WIDEN;
2564       else
2565         methods = OPTAB_LIB_WIDEN;
2566
2567       if (rotate)
2568         {
2569           /* Widening does not work for rotation.  */
2570           if (methods == OPTAB_WIDEN)
2571             continue;
2572           else if (methods == OPTAB_LIB_WIDEN)
2573             {
2574               /* If we have been unable to open-code this by a rotation,
2575                  do it as the IOR of two shifts.  I.e., to rotate A
2576                  by N bits, compute
2577                  (A << N) | ((unsigned) A >> ((-N) & (C - 1)))
2578                  where C is the bitsize of A.
2579
2580                  It is theoretically possible that the target machine might
2581                  not be able to perform either shift and hence we would
2582                  be making two libcalls rather than just the one for the
2583                  shift (similarly if IOR could not be done).  We will allow
2584                  this extremely unlikely lossage to avoid complicating the
2585                  code below.  */
2586
2587               rtx subtarget = target == shifted ? 0 : target;
2588               rtx new_amount, other_amount;
2589               rtx temp1;
2590
2591               new_amount = op1;
2592               if (op1 == const0_rtx)
2593                 return shifted;
2594               else if (CONST_INT_P (op1))
2595                 other_amount = gen_int_shift_amount
2596                   (mode, GET_MODE_BITSIZE (scalar_mode) - INTVAL (op1));
2597               else
2598                 {
2599                   other_amount
2600                     = simplify_gen_unary (NEG, GET_MODE (op1),
2601                                           op1, GET_MODE (op1));
2602                   HOST_WIDE_INT mask = GET_MODE_PRECISION (scalar_mode) - 1;
2603                   other_amount
2604                     = simplify_gen_binary (AND, GET_MODE (op1), other_amount,
2605                                            gen_int_mode (mask, GET_MODE (op1)));
2606                 }
2607
2608               shifted = force_reg (mode, shifted);
2609
2610               temp = expand_shift_1 (left ? LSHIFT_EXPR : RSHIFT_EXPR,
2611                                      mode, shifted, new_amount, 0, 1);
2612               temp1 = expand_shift_1 (left ? RSHIFT_EXPR : LSHIFT_EXPR,
2613                                       mode, shifted, other_amount,
2614                                       subtarget, 1);
2615               return expand_binop (mode, ior_optab, temp, temp1, target,
2616                                    unsignedp, methods);
2617             }
2618
2619           temp = expand_binop (mode,
2620                                left ? lrotate_optab : rrotate_optab,
2621                                shifted, op1, target, unsignedp, methods);
2622         }
2623       else if (unsignedp)
2624         temp = expand_binop (mode,
2625                              left ? lshift_optab : rshift_uns_optab,
2626                              shifted, op1, target, unsignedp, methods);
2627
2628       /* Do arithmetic shifts.
2629          Also, if we are going to widen the operand, we can just as well
2630          use an arithmetic right-shift instead of a logical one.  */
2631       if (temp == 0 && ! rotate
2632           && (! unsignedp || (! left && methods == OPTAB_WIDEN)))
2633         {
2634           enum optab_methods methods1 = methods;
2635
2636           /* If trying to widen a log shift to an arithmetic shift,
2637              don't accept an arithmetic shift of the same size.  */
2638           if (unsignedp)
2639             methods1 = OPTAB_MUST_WIDEN;
2640
2641           /* Arithmetic shift */
2642
2643           temp = expand_binop (mode,
2644                                left ? lshift_optab : rshift_arith_optab,
2645                                shifted, op1, target, unsignedp, methods1);
2646         }
2647
2648       /* We used to try extzv here for logical right shifts, but that was
2649          only useful for one machine, the VAX, and caused poor code
2650          generation there for lshrdi3, so the code was deleted and a
2651          define_expand for lshrsi3 was added to vax.md.  */
2652     }
2653
2654   gcc_assert (temp != NULL_RTX || may_fail);
2655   return temp;
2656 }
2657
2658 /* Output a shift instruction for expression code CODE,
2659    with SHIFTED being the rtx for the value to shift,
2660    and AMOUNT the amount to shift by.
2661    Store the result in the rtx TARGET, if that is convenient.
2662    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2663    Return the rtx for where the value is.  */
2664
2665 rtx
2666 expand_shift (enum tree_code code, machine_mode mode, rtx shifted,
2667               poly_int64 amount, rtx target, int unsignedp)
2668 {
2669   return expand_shift_1 (code, mode, shifted,
2670                          gen_int_shift_amount (mode, amount),
2671                          target, unsignedp);
2672 }
2673
2674 /* Likewise, but return 0 if that cannot be done.  */
2675
2676 static rtx
2677 maybe_expand_shift (enum tree_code code, machine_mode mode, rtx shifted,
2678                     int amount, rtx target, int unsignedp)
2679 {
2680   return expand_shift_1 (code, mode,
2681                          shifted, GEN_INT (amount), target, unsignedp, true);
2682 }
2683
2684 /* Output a shift instruction for expression code CODE,
2685    with SHIFTED being the rtx for the value to shift,
2686    and AMOUNT the tree for the amount to shift by.
2687    Store the result in the rtx TARGET, if that is convenient.
2688    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2689    Return the rtx for where the value is.  */
2690
2691 rtx
2692 expand_variable_shift (enum tree_code code, machine_mode mode, rtx shifted,
2693                        tree amount, rtx target, int unsignedp)
2694 {
2695   return expand_shift_1 (code, mode,
2696                          shifted, expand_normal (amount), target, unsignedp);
2697 }
2698
2699 \f
2700 static void synth_mult (struct algorithm *, unsigned HOST_WIDE_INT,
2701                         const struct mult_cost *, machine_mode mode);
2702 static rtx expand_mult_const (machine_mode, rtx, HOST_WIDE_INT, rtx,
2703                               const struct algorithm *, enum mult_variant);
2704 static unsigned HOST_WIDE_INT invert_mod2n (unsigned HOST_WIDE_INT, int);
2705 static rtx extract_high_half (scalar_int_mode, rtx);
2706 static rtx expmed_mult_highpart (scalar_int_mode, rtx, rtx, rtx, int, int);
2707 static rtx expmed_mult_highpart_optab (scalar_int_mode, rtx, rtx, rtx,
2708                                        int, int);
2709 /* Compute and return the best algorithm for multiplying by T.
2710    The algorithm must cost less than cost_limit
2711    If retval.cost >= COST_LIMIT, no algorithm was found and all
2712    other field of the returned struct are undefined.
2713    MODE is the machine mode of the multiplication.  */
2714
2715 static void
2716 synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
2717             const struct mult_cost *cost_limit, machine_mode mode)
2718 {
2719   int m;
2720   struct algorithm *alg_in, *best_alg;
2721   struct mult_cost best_cost;
2722   struct mult_cost new_limit;
2723   int op_cost, op_latency;
2724   unsigned HOST_WIDE_INT orig_t = t;
2725   unsigned HOST_WIDE_INT q;
2726   int maxm, hash_index;
2727   bool cache_hit = false;
2728   enum alg_code cache_alg = alg_zero;
2729   bool speed = optimize_insn_for_speed_p ();
2730   scalar_int_mode imode;
2731   struct alg_hash_entry *entry_ptr;
2732
2733   /* Indicate that no algorithm is yet found.  If no algorithm
2734      is found, this value will be returned and indicate failure.  */
2735   alg_out->cost.cost = cost_limit->cost + 1;
2736   alg_out->cost.latency = cost_limit->latency + 1;
2737
2738   if (cost_limit->cost < 0
2739       || (cost_limit->cost == 0 && cost_limit->latency <= 0))
2740     return;
2741
2742   /* Be prepared for vector modes.  */
2743   imode = as_a <scalar_int_mode> (GET_MODE_INNER (mode));
2744
2745   maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (imode));
2746
2747   /* Restrict the bits of "t" to the multiplication's mode.  */
2748   t &= GET_MODE_MASK (imode);
2749
2750   /* t == 1 can be done in zero cost.  */
2751   if (t == 1)
2752     {
2753       alg_out->ops = 1;
2754       alg_out->cost.cost = 0;
2755       alg_out->cost.latency = 0;
2756       alg_out->op[0] = alg_m;
2757       return;
2758     }
2759
2760   /* t == 0 sometimes has a cost.  If it does and it exceeds our limit,
2761      fail now.  */
2762   if (t == 0)
2763     {
2764       if (MULT_COST_LESS (cost_limit, zero_cost (speed)))
2765         return;
2766       else
2767         {
2768           alg_out->ops = 1;
2769           alg_out->cost.cost = zero_cost (speed);
2770           alg_out->cost.latency = zero_cost (speed);
2771           alg_out->op[0] = alg_zero;
2772           return;
2773         }
2774     }
2775
2776   /* We'll be needing a couple extra algorithm structures now.  */
2777
2778   alg_in = XALLOCA (struct algorithm);
2779   best_alg = XALLOCA (struct algorithm);
2780   best_cost = *cost_limit;
2781
2782   /* Compute the hash index.  */
2783   hash_index = (t ^ (unsigned int) mode ^ (speed * 256)) % NUM_ALG_HASH_ENTRIES;
2784
2785   /* See if we already know what to do for T.  */
2786   entry_ptr = alg_hash_entry_ptr (hash_index);
2787   if (entry_ptr->t == t
2788       && entry_ptr->mode == mode
2789       && entry_ptr->speed == speed
2790       && entry_ptr->alg != alg_unknown)
2791     {
2792       cache_alg = entry_ptr->alg;
2793
2794       if (cache_alg == alg_impossible)
2795         {
2796           /* The cache tells us that it's impossible to synthesize
2797              multiplication by T within entry_ptr->cost.  */
2798           if (!CHEAPER_MULT_COST (&entry_ptr->cost, cost_limit))
2799             /* COST_LIMIT is at least as restrictive as the one
2800                recorded in the hash table, in which case we have no
2801                hope of synthesizing a multiplication.  Just
2802                return.  */
2803             return;
2804
2805           /* If we get here, COST_LIMIT is less restrictive than the
2806              one recorded in the hash table, so we may be able to
2807              synthesize a multiplication.  Proceed as if we didn't
2808              have the cache entry.  */
2809         }
2810       else
2811         {
2812           if (CHEAPER_MULT_COST (cost_limit, &entry_ptr->cost))
2813             /* The cached algorithm shows that this multiplication
2814                requires more cost than COST_LIMIT.  Just return.  This
2815                way, we don't clobber this cache entry with
2816                alg_impossible but retain useful information.  */
2817             return;
2818
2819           cache_hit = true;
2820
2821           switch (cache_alg)
2822             {
2823             case alg_shift:
2824               goto do_alg_shift;
2825
2826             case alg_add_t_m2:
2827             case alg_sub_t_m2:
2828               goto do_alg_addsub_t_m2;
2829
2830             case alg_add_factor:
2831             case alg_sub_factor:
2832               goto do_alg_addsub_factor;
2833
2834             case alg_add_t2_m:
2835               goto do_alg_add_t2_m;
2836
2837             case alg_sub_t2_m:
2838               goto do_alg_sub_t2_m;
2839
2840             default:
2841               gcc_unreachable ();
2842             }
2843         }
2844     }
2845
2846   /* If we have a group of zero bits at the low-order part of T, try
2847      multiplying by the remaining bits and then doing a shift.  */
2848
2849   if ((t & 1) == 0)
2850     {
2851     do_alg_shift:
2852       m = ctz_or_zero (t); /* m = number of low zero bits */
2853       if (m < maxm)
2854         {
2855           q = t >> m;
2856           /* The function expand_shift will choose between a shift and
2857              a sequence of additions, so the observed cost is given as
2858              MIN (m * add_cost(speed, mode), shift_cost(speed, mode, m)).  */
2859           op_cost = m * add_cost (speed, mode);
2860           if (shift_cost (speed, mode, m) < op_cost)
2861             op_cost = shift_cost (speed, mode, m);
2862           new_limit.cost = best_cost.cost - op_cost;
2863           new_limit.latency = best_cost.latency - op_cost;
2864           synth_mult (alg_in, q, &new_limit, mode);
2865
2866           alg_in->cost.cost += op_cost;
2867           alg_in->cost.latency += op_cost;
2868           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2869             {
2870               best_cost = alg_in->cost;
2871               std::swap (alg_in, best_alg);
2872               best_alg->log[best_alg->ops] = m;
2873               best_alg->op[best_alg->ops] = alg_shift;
2874             }
2875
2876           /* See if treating ORIG_T as a signed number yields a better
2877              sequence.  Try this sequence only for a negative ORIG_T
2878              as it would be useless for a non-negative ORIG_T.  */
2879           if ((HOST_WIDE_INT) orig_t < 0)
2880             {
2881               /* Shift ORIG_T as follows because a right shift of a
2882                  negative-valued signed type is implementation
2883                  defined.  */
2884               q = ~(~orig_t >> m);
2885               /* The function expand_shift will choose between a shift
2886                  and a sequence of additions, so the observed cost is
2887                  given as MIN (m * add_cost(speed, mode),
2888                  shift_cost(speed, mode, m)).  */
2889               op_cost = m * add_cost (speed, mode);
2890               if (shift_cost (speed, mode, m) < op_cost)
2891                 op_cost = shift_cost (speed, mode, m);
2892               new_limit.cost = best_cost.cost - op_cost;
2893               new_limit.latency = best_cost.latency - op_cost;
2894               synth_mult (alg_in, q, &new_limit, mode);
2895
2896               alg_in->cost.cost += op_cost;
2897               alg_in->cost.latency += op_cost;
2898               if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2899                 {
2900                   best_cost = alg_in->cost;
2901                   std::swap (alg_in, best_alg);
2902                   best_alg->log[best_alg->ops] = m;
2903                   best_alg->op[best_alg->ops] = alg_shift;
2904                 }
2905             }
2906         }
2907       if (cache_hit)
2908         goto done;
2909     }
2910
2911   /* If we have an odd number, add or subtract one.  */
2912   if ((t & 1) != 0)
2913     {
2914       unsigned HOST_WIDE_INT w;
2915
2916     do_alg_addsub_t_m2:
2917       for (w = 1; (w & t) != 0; w <<= 1)
2918         ;
2919       /* If T was -1, then W will be zero after the loop.  This is another
2920          case where T ends with ...111.  Handling this with (T + 1) and
2921          subtract 1 produces slightly better code and results in algorithm
2922          selection much faster than treating it like the ...0111 case
2923          below.  */
2924       if (w == 0
2925           || (w > 2
2926               /* Reject the case where t is 3.
2927                  Thus we prefer addition in that case.  */
2928               && t != 3))
2929         {
2930           /* T ends with ...111.  Multiply by (T + 1) and subtract T.  */
2931
2932           op_cost = add_cost (speed, mode);
2933           new_limit.cost = best_cost.cost - op_cost;
2934           new_limit.latency = best_cost.latency - op_cost;
2935           synth_mult (alg_in, t + 1, &new_limit, mode);
2936
2937           alg_in->cost.cost += op_cost;
2938           alg_in->cost.latency += op_cost;
2939           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2940             {
2941               best_cost = alg_in->cost;
2942               std::swap (alg_in, best_alg);
2943               best_alg->log[best_alg->ops] = 0;
2944               best_alg->op[best_alg->ops] = alg_sub_t_m2;
2945             }
2946         }
2947       else
2948         {
2949           /* T ends with ...01 or ...011.  Multiply by (T - 1) and add T.  */
2950
2951           op_cost = add_cost (speed, mode);
2952           new_limit.cost = best_cost.cost - op_cost;
2953           new_limit.latency = best_cost.latency - op_cost;
2954           synth_mult (alg_in, t - 1, &new_limit, mode);
2955
2956           alg_in->cost.cost += op_cost;
2957           alg_in->cost.latency += op_cost;
2958           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2959             {
2960               best_cost = alg_in->cost;
2961               std::swap (alg_in, best_alg);
2962               best_alg->log[best_alg->ops] = 0;
2963               best_alg->op[best_alg->ops] = alg_add_t_m2;
2964             }
2965         }
2966
2967       /* We may be able to calculate a * -7, a * -15, a * -31, etc
2968          quickly with a - a * n for some appropriate constant n.  */
2969       m = exact_log2 (-orig_t + 1);
2970       if (m >= 0 && m < maxm)
2971         {
2972           op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
2973           /* If the target has a cheap shift-and-subtract insn use
2974              that in preference to a shift insn followed by a sub insn.
2975              Assume that the shift-and-sub is "atomic" with a latency
2976              equal to it's cost, otherwise assume that on superscalar
2977              hardware the shift may be executed concurrently with the
2978              earlier steps in the algorithm.  */
2979           if (shiftsub1_cost (speed, mode, m) <= op_cost)
2980             {
2981               op_cost = shiftsub1_cost (speed, mode, m);
2982               op_latency = op_cost;
2983             }
2984           else
2985             op_latency = add_cost (speed, mode);
2986
2987           new_limit.cost = best_cost.cost - op_cost;
2988           new_limit.latency = best_cost.latency - op_latency;
2989           synth_mult (alg_in, (unsigned HOST_WIDE_INT) (-orig_t + 1) >> m,
2990                       &new_limit, mode);
2991
2992           alg_in->cost.cost += op_cost;
2993           alg_in->cost.latency += op_latency;
2994           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2995             {
2996               best_cost = alg_in->cost;
2997               std::swap (alg_in, best_alg);
2998               best_alg->log[best_alg->ops] = m;
2999               best_alg->op[best_alg->ops] = alg_sub_t_m2;
3000             }
3001         }
3002
3003       if (cache_hit)
3004         goto done;
3005     }
3006
3007   /* Look for factors of t of the form
3008      t = q(2**m +- 1), 2 <= m <= floor(log2(t - 1)).
3009      If we find such a factor, we can multiply by t using an algorithm that
3010      multiplies by q, shift the result by m and add/subtract it to itself.
3011
3012      We search for large factors first and loop down, even if large factors
3013      are less probable than small; if we find a large factor we will find a
3014      good sequence quickly, and therefore be able to prune (by decreasing
3015      COST_LIMIT) the search.  */
3016
3017  do_alg_addsub_factor:
3018   for (m = floor_log2 (t - 1); m >= 2; m--)
3019     {
3020       unsigned HOST_WIDE_INT d;
3021
3022       d = (HOST_WIDE_INT_1U << m) + 1;
3023       if (t % d == 0 && t > d && m < maxm
3024           && (!cache_hit || cache_alg == alg_add_factor))
3025         {
3026           op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
3027           if (shiftadd_cost (speed, mode, m) <= op_cost)
3028             op_cost = shiftadd_cost (speed, mode, m);
3029
3030           op_latency = op_cost;
3031
3032
3033           new_limit.cost = best_cost.cost - op_cost;
3034           new_limit.latency = best_cost.latency - op_latency;
3035           synth_mult (alg_in, t / d, &new_limit, mode);
3036
3037           alg_in->cost.cost += op_cost;
3038           alg_in->cost.latency += op_latency;
3039           if (alg_in->cost.latency < op_cost)
3040             alg_in->cost.latency = op_cost;
3041           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
3042             {
3043               best_cost = alg_in->cost;
3044               std::swap (alg_in, best_alg);
3045               best_alg->log[best_alg->ops] = m;
3046               best_alg->op[best_alg->ops] = alg_add_factor;
3047             }
3048           /* Other factors will have been taken care of in the recursion.  */
3049           break;
3050         }
3051
3052       d = (HOST_WIDE_INT_1U << m) - 1;
3053       if (t % d == 0 && t > d && m < maxm
3054           && (!cache_hit || cache_alg == alg_sub_factor))
3055         {
3056           op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
3057           if (shiftsub0_cost (speed, mode, m) <= op_cost)
3058             op_cost = shiftsub0_cost (speed, mode, m);
3059
3060           op_latency = op_cost;
3061
3062           new_limit.cost = best_cost.cost - op_cost;
3063           new_limit.latency = best_cost.latency - op_latency;
3064           synth_mult (alg_in, t / d, &new_limit, mode);
3065
3066           alg_in->cost.cost += op_cost;
3067           alg_in->cost.latency += op_latency;
3068           if (alg_in->cost.latency < op_cost)
3069             alg_in->cost.latency = op_cost;
3070           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
3071             {
3072               best_cost = alg_in->cost;
3073               std::swap (alg_in, best_alg);
3074               best_alg->log[best_alg->ops] = m;
3075               best_alg->op[best_alg->ops] = alg_sub_factor;
3076             }
3077           break;
3078         }
3079     }
3080   if (cache_hit)
3081     goto done;
3082
3083   /* Try shift-and-add (load effective address) instructions,
3084      i.e. do a*3, a*5, a*9.  */
3085   if ((t & 1) != 0)
3086     {
3087     do_alg_add_t2_m:
3088       q = t - 1;
3089       m = ctz_hwi (q);
3090       if (q && m < maxm)
3091         {
3092           op_cost = shiftadd_cost (speed, mode, m);
3093           new_limit.cost = best_cost.cost - op_cost;
3094           new_limit.latency = best_cost.latency - op_cost;
3095           synth_mult (alg_in, (t - 1) >> m, &new_limit, mode);
3096
3097           alg_in->cost.cost += op_cost;
3098           alg_in->cost.latency += op_cost;
3099           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
3100             {
3101               best_cost = alg_in->cost;
3102               std::swap (alg_in, best_alg);
3103               best_alg->log[best_alg->ops] = m;
3104               best_alg->op[best_alg->ops] = alg_add_t2_m;
3105             }
3106         }
3107       if (cache_hit)
3108         goto done;
3109
3110     do_alg_sub_t2_m:
3111       q = t + 1;
3112       m = ctz_hwi (q);
3113       if (q && m < maxm)
3114         {
3115           op_cost = shiftsub0_cost (speed, mode, m);
3116           new_limit.cost = best_cost.cost - op_cost;
3117           new_limit.latency = best_cost.latency - op_cost;
3118           synth_mult (alg_in, (t + 1) >> m, &new_limit, mode);
3119
3120           alg_in->cost.cost += op_cost;
3121           alg_in->cost.latency += op_cost;
3122           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
3123             {
3124               best_cost = alg_in->cost;
3125               std::swap (alg_in, best_alg);
3126               best_alg->log[best_alg->ops] = m;
3127               best_alg->op[best_alg->ops] = alg_sub_t2_m;
3128             }
3129         }
3130       if (cache_hit)
3131         goto done;
3132     }
3133
3134  done:
3135   /* If best_cost has not decreased, we have not found any algorithm.  */
3136   if (!CHEAPER_MULT_COST (&best_cost, cost_limit))
3137     {
3138       /* We failed to find an algorithm.  Record alg_impossible for
3139          this case (that is, <T, MODE, COST_LIMIT>) so that next time
3140          we are asked to find an algorithm for T within the same or
3141          lower COST_LIMIT, we can immediately return to the
3142          caller.  */
3143       entry_ptr->t = t;
3144       entry_ptr->mode = mode;
3145       entry_ptr->speed = speed;
3146       entry_ptr->alg = alg_impossible;
3147       entry_ptr->cost = *cost_limit;
3148       return;
3149     }
3150
3151   /* Cache the result.  */
3152   if (!cache_hit)
3153     {
3154       entry_ptr->t = t;
3155       entry_ptr->mode = mode;
3156       entry_ptr->speed = speed;
3157       entry_ptr->alg = best_alg->op[best_alg->ops];
3158       entry_ptr->cost.cost = best_cost.cost;
3159       entry_ptr->cost.latency = best_cost.latency;
3160     }
3161
3162   /* If we are getting a too long sequence for `struct algorithm'
3163      to record, make this search fail.  */
3164   if (best_alg->ops == MAX_BITS_PER_WORD)
3165     return;
3166
3167   /* Copy the algorithm from temporary space to the space at alg_out.
3168      We avoid using structure assignment because the majority of
3169      best_alg is normally undefined, and this is a critical function.  */
3170   alg_out->ops = best_alg->ops + 1;
3171   alg_out->cost = best_cost;
3172   memcpy (alg_out->op, best_alg->op,
3173           alg_out->ops * sizeof *alg_out->op);
3174   memcpy (alg_out->log, best_alg->log,
3175           alg_out->ops * sizeof *alg_out->log);
3176 }
3177 \f
3178 /* Find the cheapest way of multiplying a value of mode MODE by VAL.
3179    Try three variations:
3180
3181        - a shift/add sequence based on VAL itself
3182        - a shift/add sequence based on -VAL, followed by a negation
3183        - a shift/add sequence based on VAL - 1, followed by an addition.
3184
3185    Return true if the cheapest of these cost less than MULT_COST,
3186    describing the algorithm in *ALG and final fixup in *VARIANT.  */
3187
3188 bool
3189 choose_mult_variant (machine_mode mode, HOST_WIDE_INT val,
3190                      struct algorithm *alg, enum mult_variant *variant,
3191                      int mult_cost)
3192 {
3193   struct algorithm alg2;
3194   struct mult_cost limit;
3195   int op_cost;
3196   bool speed = optimize_insn_for_speed_p ();
3197
3198   /* Fail quickly for impossible bounds.  */
3199   if (mult_cost < 0)
3200     return false;
3201
3202   /* Ensure that mult_cost provides a reasonable upper bound.
3203      Any constant multiplication can be performed with less
3204      than 2 * bits additions.  */
3205   op_cost = 2 * GET_MODE_UNIT_BITSIZE (mode) * add_cost (speed, mode);
3206   if (mult_cost > op_cost)
3207     mult_cost = op_cost;
3208
3209   *variant = basic_variant;
3210   limit.cost = mult_cost;
3211   limit.latency = mult_cost;
3212   synth_mult (alg, val, &limit, mode);
3213
3214   /* This works only if the inverted value actually fits in an
3215      `unsigned int' */
3216   if (HOST_BITS_PER_INT >= GET_MODE_UNIT_BITSIZE (mode))
3217     {
3218       op_cost = neg_cost (speed, mode);
3219       if (MULT_COST_LESS (&alg->cost, mult_cost))
3220         {
3221           limit.cost = alg->cost.cost - op_cost;
3222           limit.latency = alg->cost.latency - op_cost;
3223         }
3224       else
3225         {
3226           limit.cost = mult_cost - op_cost;
3227           limit.latency = mult_cost - op_cost;
3228         }
3229
3230       synth_mult (&alg2, -val, &limit, mode);
3231       alg2.cost.cost += op_cost;
3232       alg2.cost.latency += op_cost;
3233       if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
3234         *alg = alg2, *variant = negate_variant;
3235     }
3236
3237   /* This proves very useful for division-by-constant.  */
3238   op_cost = add_cost (speed, mode);
3239   if (MULT_COST_LESS (&alg->cost, mult_cost))
3240     {
3241       limit.cost = alg->cost.cost - op_cost;
3242       limit.latency = alg->cost.latency - op_cost;
3243     }
3244   else
3245     {
3246       limit.cost = mult_cost - op_cost;
3247       limit.latency = mult_cost - op_cost;
3248     }
3249
3250   synth_mult (&alg2, val - 1, &limit, mode);
3251   alg2.cost.cost += op_cost;
3252   alg2.cost.latency += op_cost;
3253   if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
3254     *alg = alg2, *variant = add_variant;
3255
3256   return MULT_COST_LESS (&alg->cost, mult_cost);
3257 }
3258
3259 /* A subroutine of expand_mult, used for constant multiplications.
3260    Multiply OP0 by VAL in mode MODE, storing the result in TARGET if
3261    convenient.  Use the shift/add sequence described by ALG and apply
3262    the final fixup specified by VARIANT.  */
3263
3264 static rtx
3265 expand_mult_const (machine_mode mode, rtx op0, HOST_WIDE_INT val,
3266                    rtx target, const struct algorithm *alg,
3267                    enum mult_variant variant)
3268 {
3269   unsigned HOST_WIDE_INT val_so_far;
3270   rtx_insn *insn;
3271   rtx accum, tem;
3272   int opno;
3273   machine_mode nmode;
3274
3275   /* Avoid referencing memory over and over and invalid sharing
3276      on SUBREGs.  */
3277   op0 = force_reg (mode, op0);
3278
3279   /* ACCUM starts out either as OP0 or as a zero, depending on
3280      the first operation.  */
3281
3282   if (alg->op[0] == alg_zero)
3283     {
3284       accum = copy_to_mode_reg (mode, CONST0_RTX (mode));
3285       val_so_far = 0;
3286     }
3287   else if (alg->op[0] == alg_m)
3288     {
3289       accum = copy_to_mode_reg (mode, op0);
3290       val_so_far = 1;
3291     }
3292   else
3293     gcc_unreachable ();
3294
3295   for (opno = 1; opno < alg->ops; opno++)
3296     {
3297       int log = alg->log[opno];
3298       rtx shift_subtarget = optimize ? 0 : accum;
3299       rtx add_target
3300         = (opno == alg->ops - 1 && target != 0 && variant != add_variant
3301            && !optimize)
3302           ? target : 0;
3303       rtx accum_target = optimize ? 0 : accum;
3304       rtx accum_inner;
3305
3306       switch (alg->op[opno])
3307         {
3308         case alg_shift:
3309           tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3310           /* REG_EQUAL note will be attached to the following insn.  */
3311           emit_move_insn (accum, tem);
3312           val_so_far <<= log;
3313           break;
3314
3315         case alg_add_t_m2:
3316           tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
3317           accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
3318                                  add_target ? add_target : accum_target);
3319           val_so_far += HOST_WIDE_INT_1U << log;
3320           break;
3321
3322         case alg_sub_t_m2:
3323           tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
3324           accum = force_operand (gen_rtx_MINUS (mode, accum, tem),
3325                                  add_target ? add_target : accum_target);
3326           val_so_far -= HOST_WIDE_INT_1U << log;
3327           break;
3328
3329         case alg_add_t2_m:
3330           accum = expand_shift (LSHIFT_EXPR, mode, accum,
3331                                 log, shift_subtarget, 0);
3332           accum = force_operand (gen_rtx_PLUS (mode, accum, op0),
3333                                  add_target ? add_target : accum_target);
3334           val_so_far = (val_so_far << log) + 1;
3335           break;
3336
3337         case alg_sub_t2_m:
3338           accum = expand_shift (LSHIFT_EXPR, mode, accum,
3339                                 log, shift_subtarget, 0);
3340           accum = force_operand (gen_rtx_MINUS (mode, accum, op0),
3341                                  add_target ? add_target : accum_target);
3342           val_so_far = (val_so_far << log) - 1;
3343           break;
3344
3345         case alg_add_factor:
3346           tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3347           accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
3348                                  add_target ? add_target : accum_target);
3349           val_so_far += val_so_far << log;
3350           break;
3351
3352         case alg_sub_factor:
3353           tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3354           accum = force_operand (gen_rtx_MINUS (mode, tem, accum),
3355                                  (add_target
3356                                   ? add_target : (optimize ? 0 : tem)));
3357           val_so_far = (val_so_far << log) - val_so_far;
3358           break;
3359
3360         default:
3361           gcc_unreachable ();
3362         }
3363
3364       if (SCALAR_INT_MODE_P (mode))
3365         {
3366           /* Write a REG_EQUAL note on the last insn so that we can cse
3367              multiplication sequences.  Note that if ACCUM is a SUBREG,
3368              we've set the inner register and must properly indicate that.  */
3369           tem = op0, nmode = mode;
3370           accum_inner = accum;
3371           if (GET_CODE (accum) == SUBREG)
3372             {
3373               accum_inner = SUBREG_REG (accum);
3374               nmode = GET_MODE (accum_inner);
3375               tem = gen_lowpart (nmode, op0);
3376             }
3377
3378           /* Don't add a REG_EQUAL note if tem is a paradoxical SUBREG.
3379              In that case, only the low bits of accum would be guaranteed to
3380              be equal to the content of the REG_EQUAL note, the upper bits
3381              can be anything.  */
3382           if (!paradoxical_subreg_p (tem))
3383             {
3384               insn = get_last_insn ();
3385               wide_int wval_so_far
3386                 = wi::uhwi (val_so_far,
3387                             GET_MODE_PRECISION (as_a <scalar_mode> (nmode)));
3388               rtx c = immed_wide_int_const (wval_so_far, nmode);
3389               set_dst_reg_note (insn, REG_EQUAL, gen_rtx_MULT (nmode, tem, c),
3390                                 accum_inner);
3391             }
3392         }
3393     }
3394
3395   if (variant == negate_variant)
3396     {
3397       val_so_far = -val_so_far;
3398       accum = expand_unop (mode, neg_optab, accum, target, 0);
3399     }
3400   else if (variant == add_variant)
3401     {
3402       val_so_far = val_so_far + 1;
3403       accum = force_operand (gen_rtx_PLUS (mode, accum, op0), target);
3404     }
3405
3406   /* Compare only the bits of val and val_so_far that are significant
3407      in the result mode, to avoid sign-/zero-extension confusion.  */
3408   nmode = GET_MODE_INNER (mode);
3409   val &= GET_MODE_MASK (nmode);
3410   val_so_far &= GET_MODE_MASK (nmode);
3411   gcc_assert (val == (HOST_WIDE_INT) val_so_far);
3412
3413   return accum;
3414 }
3415
3416 /* Perform a multiplication and return an rtx for the result.
3417    MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3418    TARGET is a suggestion for where to store the result (an rtx).
3419
3420    We check specially for a constant integer as OP1.
3421    If you want this check for OP0 as well, then before calling
3422    you should swap the two operands if OP0 would be constant.  */
3423
3424 rtx
3425 expand_mult (machine_mode mode, rtx op0, rtx op1, rtx target,
3426              int unsignedp, bool no_libcall)
3427 {
3428   enum mult_variant variant;
3429   struct algorithm algorithm;
3430   rtx scalar_op1;
3431   int max_cost;
3432   bool speed = optimize_insn_for_speed_p ();
3433   bool do_trapv = flag_trapv && SCALAR_INT_MODE_P (mode) && !unsignedp;
3434
3435   if (CONSTANT_P (op0))
3436     std::swap (op0, op1);
3437
3438   /* For vectors, there are several simplifications that can be made if
3439      all elements of the vector constant are identical.  */
3440   scalar_op1 = unwrap_const_vec_duplicate (op1);
3441
3442   if (INTEGRAL_MODE_P (mode))
3443     {
3444       rtx fake_reg;
3445       HOST_WIDE_INT coeff;
3446       bool is_neg;
3447       int mode_bitsize;
3448
3449       if (op1 == CONST0_RTX (mode))
3450         return op1;
3451       if (op1 == CONST1_RTX (mode))
3452         return op0;
3453       if (op1 == CONSTM1_RTX (mode))
3454         return expand_unop (mode, do_trapv ? negv_optab : neg_optab,
3455                             op0, target, 0);
3456
3457       if (do_trapv)
3458         goto skip_synth;
3459
3460       /* If mode is integer vector mode, check if the backend supports
3461          vector lshift (by scalar or vector) at all.  If not, we can't use
3462          synthetized multiply.  */
3463       if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
3464           && optab_handler (vashl_optab, mode) == CODE_FOR_nothing
3465           && optab_handler (ashl_optab, mode) == CODE_FOR_nothing)
3466         goto skip_synth;
3467
3468       /* These are the operations that are potentially turned into
3469          a sequence of shifts and additions.  */
3470       mode_bitsize = GET_MODE_UNIT_BITSIZE (mode);
3471
3472       /* synth_mult does an `unsigned int' multiply.  As long as the mode is
3473          less than or equal in size to `unsigned int' this doesn't matter.
3474          If the mode is larger than `unsigned int', then synth_mult works
3475          only if the constant value exactly fits in an `unsigned int' without
3476          any truncation.  This means that multiplying by negative values does
3477          not work; results are off by 2^32 on a 32 bit machine.  */
3478       if (CONST_INT_P (scalar_op1))
3479         {
3480           coeff = INTVAL (scalar_op1);
3481           is_neg = coeff < 0;
3482         }
3483 #if TARGET_SUPPORTS_WIDE_INT
3484       else if (CONST_WIDE_INT_P (scalar_op1))
3485 #else
3486       else if (CONST_DOUBLE_AS_INT_P (scalar_op1))
3487 #endif
3488         {
3489           int shift = wi::exact_log2 (rtx_mode_t (scalar_op1, mode));
3490           /* Perfect power of 2 (other than 1, which is handled above).  */
3491           if (shift > 0)
3492             return expand_shift (LSHIFT_EXPR, mode, op0,
3493                                  shift, target, unsignedp);
3494           else
3495             goto skip_synth;
3496         }
3497       else
3498         goto skip_synth;
3499
3500       /* We used to test optimize here, on the grounds that it's better to
3501          produce a smaller program when -O is not used.  But this causes
3502          such a terrible slowdown sometimes that it seems better to always
3503          use synth_mult.  */
3504
3505       /* Special case powers of two.  */
3506       if (EXACT_POWER_OF_2_OR_ZERO_P (coeff)
3507           && !(is_neg && mode_bitsize > HOST_BITS_PER_WIDE_INT))
3508         return expand_shift (LSHIFT_EXPR, mode, op0,
3509                              floor_log2 (coeff), target, unsignedp);
3510
3511       fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3512
3513       /* Attempt to handle multiplication of DImode values by negative
3514          coefficients, by performing the multiplication by a positive
3515          multiplier and then inverting the result.  */
3516       if (is_neg && mode_bitsize > HOST_BITS_PER_WIDE_INT)
3517         {
3518           /* Its safe to use -coeff even for INT_MIN, as the
3519              result is interpreted as an unsigned coefficient.
3520              Exclude cost of op0 from max_cost to match the cost
3521              calculation of the synth_mult.  */
3522           coeff = -(unsigned HOST_WIDE_INT) coeff;
3523           max_cost = (set_src_cost (gen_rtx_MULT (mode, fake_reg, op1),
3524                                     mode, speed)
3525                       - neg_cost (speed, mode));
3526           if (max_cost <= 0)
3527             goto skip_synth;
3528
3529           /* Special case powers of two.  */
3530           if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3531             {
3532               rtx temp = expand_shift (LSHIFT_EXPR, mode, op0,
3533                                        floor_log2 (coeff), target, unsignedp);
3534               return expand_unop (mode, neg_optab, temp, target, 0);
3535             }
3536
3537           if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3538                                    max_cost))
3539             {
3540               rtx temp = expand_mult_const (mode, op0, coeff, NULL_RTX,
3541                                             &algorithm, variant);
3542               return expand_unop (mode, neg_optab, temp, target, 0);
3543             }
3544           goto skip_synth;
3545         }
3546
3547       /* Exclude cost of op0 from max_cost to match the cost
3548          calculation of the synth_mult.  */
3549       max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, op1), mode, speed);
3550       if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost))
3551         return expand_mult_const (mode, op0, coeff, target,
3552                                   &algorithm, variant);
3553     }
3554  skip_synth:
3555
3556   /* Expand x*2.0 as x+x.  */
3557   if (CONST_DOUBLE_AS_FLOAT_P (scalar_op1)
3558       && real_equal (CONST_DOUBLE_REAL_VALUE (scalar_op1), &dconst2))
3559     {
3560       op0 = force_reg (GET_MODE (op0), op0);
3561       return expand_binop (mode, add_optab, op0, op0,
3562                            target, unsignedp,
3563                            no_libcall ? OPTAB_WIDEN : OPTAB_LIB_WIDEN);
3564     }
3565
3566   /* This used to use umul_optab if unsigned, but for non-widening multiply
3567      there is no difference between signed and unsigned.  */
3568   op0 = expand_binop (mode, do_trapv ? smulv_optab : smul_optab,
3569                       op0, op1, target, unsignedp,
3570                       no_libcall ? OPTAB_WIDEN : OPTAB_LIB_WIDEN);
3571   gcc_assert (op0 || no_libcall);
3572   return op0;
3573 }
3574
3575 /* Return a cost estimate for multiplying a register by the given
3576    COEFFicient in the given MODE and SPEED.  */
3577
3578 int
3579 mult_by_coeff_cost (HOST_WIDE_INT coeff, machine_mode mode, bool speed)
3580 {
3581   int max_cost;
3582   struct algorithm algorithm;
3583   enum mult_variant variant;
3584
3585   rtx fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3586   max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, fake_reg),
3587                            mode, speed);
3588   if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost))
3589     return algorithm.cost.cost;
3590   else
3591     return max_cost;
3592 }
3593
3594 /* Perform a widening multiplication and return an rtx for the result.
3595    MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3596    TARGET is a suggestion for where to store the result (an rtx).
3597    THIS_OPTAB is the optab we should use, it must be either umul_widen_optab
3598    or smul_widen_optab.
3599
3600    We check specially for a constant integer as OP1, comparing the
3601    cost of a widening multiply against the cost of a sequence of shifts
3602    and adds.  */
3603
3604 rtx
3605 expand_widening_mult (machine_mode mode, rtx op0, rtx op1, rtx target,
3606                       int unsignedp, optab this_optab)
3607 {
3608   bool speed = optimize_insn_for_speed_p ();
3609   rtx cop1;
3610
3611   if (CONST_INT_P (op1)
3612       && GET_MODE (op0) != VOIDmode
3613       && (cop1 = convert_modes (mode, GET_MODE (op0), op1,
3614                                 this_optab == umul_widen_optab))
3615       && CONST_INT_P (cop1)
3616       && (INTVAL (cop1) >= 0
3617           || HWI_COMPUTABLE_MODE_P (mode)))
3618     {
3619       HOST_WIDE_INT coeff = INTVAL (cop1);
3620       int max_cost;
3621       enum mult_variant variant;
3622       struct algorithm algorithm;
3623
3624       if (coeff == 0)
3625         return CONST0_RTX (mode);
3626
3627       /* Special case powers of two.  */
3628       if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3629         {
3630           op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3631           return expand_shift (LSHIFT_EXPR, mode, op0,
3632                                floor_log2 (coeff), target, unsignedp);
3633         }
3634
3635       /* Exclude cost of op0 from max_cost to match the cost
3636          calculation of the synth_mult.  */
3637       max_cost = mul_widen_cost (speed, mode);
3638       if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3639                                max_cost))
3640         {
3641           op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3642           return expand_mult_const (mode, op0, coeff, target,
3643                                     &algorithm, variant);
3644         }
3645     }
3646   return expand_binop (mode, this_optab, op0, op1, target,
3647                        unsignedp, OPTAB_LIB_WIDEN);
3648 }
3649 \f
3650 /* Choose a minimal N + 1 bit approximation to 1/D that can be used to
3651    replace division by D, and put the least significant N bits of the result
3652    in *MULTIPLIER_PTR and return the most significant bit.
3653
3654    The width of operations is N (should be <= HOST_BITS_PER_WIDE_INT), the
3655    needed precision is in PRECISION (should be <= N).
3656
3657    PRECISION should be as small as possible so this function can choose
3658    multiplier more freely.
3659
3660    The rounded-up logarithm of D is placed in *lgup_ptr.  A shift count that
3661    is to be used for a final right shift is placed in *POST_SHIFT_PTR.
3662
3663    Using this function, x/D will be equal to (x * m) >> (*POST_SHIFT_PTR),
3664    where m is the full HOST_BITS_PER_WIDE_INT + 1 bit multiplier.  */
3665
3666 unsigned HOST_WIDE_INT
3667 choose_multiplier (unsigned HOST_WIDE_INT d, int n, int precision,
3668                    unsigned HOST_WIDE_INT *multiplier_ptr,
3669                    int *post_shift_ptr, int *lgup_ptr)
3670 {
3671   int lgup, post_shift;
3672   int pow, pow2;
3673
3674   /* lgup = ceil(log2(divisor)); */
3675   lgup = ceil_log2 (d);
3676
3677   gcc_assert (lgup <= n);
3678
3679   pow = n + lgup;
3680   pow2 = n + lgup - precision;
3681
3682   /* mlow = 2^(N + lgup)/d */
3683   wide_int val = wi::set_bit_in_zero (pow, HOST_BITS_PER_DOUBLE_INT);
3684   wide_int mlow = wi::udiv_trunc (val, d);
3685
3686   /* mhigh = (2^(N + lgup) + 2^(N + lgup - precision))/d */
3687   val |= wi::set_bit_in_zero (pow2, HOST_BITS_PER_DOUBLE_INT);
3688   wide_int mhigh = wi::udiv_trunc (val, d);
3689
3690   /* If precision == N, then mlow, mhigh exceed 2^N
3691      (but they do not exceed 2^(N+1)).  */
3692
3693   /* Reduce to lowest terms.  */
3694   for (post_shift = lgup; post_shift > 0; post_shift--)
3695     {
3696       unsigned HOST_WIDE_INT ml_lo = wi::extract_uhwi (mlow, 1,
3697                                                        HOST_BITS_PER_WIDE_INT);
3698       unsigned HOST_WIDE_INT mh_lo = wi::extract_uhwi (mhigh, 1,
3699                                                        HOST_BITS_PER_WIDE_INT);
3700       if (ml_lo >= mh_lo)
3701         break;
3702
3703       mlow = wi::uhwi (ml_lo, HOST_BITS_PER_DOUBLE_INT);
3704       mhigh = wi::uhwi (mh_lo, HOST_BITS_PER_DOUBLE_INT);
3705     }
3706
3707   *post_shift_ptr = post_shift;
3708   *lgup_ptr = lgup;
3709   if (n < HOST_BITS_PER_WIDE_INT)
3710     {
3711       unsigned HOST_WIDE_INT mask = (HOST_WIDE_INT_1U << n) - 1;
3712       *multiplier_ptr = mhigh.to_uhwi () & mask;
3713       return mhigh.to_uhwi () > mask;
3714     }
3715   else
3716     {
3717       *multiplier_ptr = mhigh.to_uhwi ();
3718       return wi::extract_uhwi (mhigh, HOST_BITS_PER_WIDE_INT, 1);
3719     }
3720 }
3721
3722 /* Compute the inverse of X mod 2**n, i.e., find Y such that X * Y is
3723    congruent to 1 (mod 2**N).  */
3724
3725 static unsigned HOST_WIDE_INT
3726 invert_mod2n (unsigned HOST_WIDE_INT x, int n)
3727 {
3728   /* Solve x*y == 1 (mod 2^n), where x is odd.  Return y.  */
3729
3730   /* The algorithm notes that the choice y = x satisfies
3731      x*y == 1 mod 2^3, since x is assumed odd.
3732      Each iteration doubles the number of bits of significance in y.  */
3733
3734   unsigned HOST_WIDE_INT mask;
3735   unsigned HOST_WIDE_INT y = x;
3736   int nbit = 3;
3737
3738   mask = (n == HOST_BITS_PER_WIDE_INT
3739           ? HOST_WIDE_INT_M1U
3740           : (HOST_WIDE_INT_1U << n) - 1);
3741
3742   while (nbit < n)
3743     {
3744       y = y * (2 - x*y) & mask;         /* Modulo 2^N */
3745       nbit *= 2;
3746     }
3747   return y;
3748 }
3749
3750 /* Emit code to adjust ADJ_OPERAND after multiplication of wrong signedness
3751    flavor of OP0 and OP1.  ADJ_OPERAND is already the high half of the
3752    product OP0 x OP1.  If UNSIGNEDP is nonzero, adjust the signed product
3753    to become unsigned, if UNSIGNEDP is zero, adjust the unsigned product to
3754    become signed.
3755
3756    The result is put in TARGET if that is convenient.
3757
3758    MODE is the mode of operation.  */
3759
3760 rtx
3761 expand_mult_highpart_adjust (scalar_int_mode mode, rtx adj_operand, rtx op0,
3762                              rtx op1, rtx target, int unsignedp)
3763 {
3764   rtx tem;
3765   enum rtx_code adj_code = unsignedp ? PLUS : MINUS;
3766
3767   tem = expand_shift (RSHIFT_EXPR, mode, op0,
3768                       GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3769   tem = expand_and (mode, tem, op1, NULL_RTX);
3770   adj_operand
3771     = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3772                      adj_operand);
3773
3774   tem = expand_shift (RSHIFT_EXPR, mode, op1,
3775                       GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3776   tem = expand_and (mode, tem, op0, NULL_RTX);
3777   target = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3778                           target);
3779
3780   return target;
3781 }
3782
3783 /* Subroutine of expmed_mult_highpart.  Return the MODE high part of OP.  */
3784
3785 static rtx
3786 extract_high_half (scalar_int_mode mode, rtx op)
3787 {
3788   if (mode == word_mode)
3789     return gen_highpart (mode, op);
3790
3791   scalar_int_mode wider_mode = GET_MODE_WIDER_MODE (mode).require ();
3792
3793   op = expand_shift (RSHIFT_EXPR, wider_mode, op,
3794                      GET_MODE_BITSIZE (mode), 0, 1);
3795   return convert_modes (mode, wider_mode, op, 0);
3796 }
3797
3798 /* Like expmed_mult_highpart, but only consider using a multiplication
3799    optab.  OP1 is an rtx for the constant operand.  */
3800
3801 static rtx
3802 expmed_mult_highpart_optab (scalar_int_mode mode, rtx op0, rtx op1,
3803                             rtx target, int unsignedp, int max_cost)
3804 {
3805   rtx narrow_op1 = gen_int_mode (INTVAL (op1), mode);
3806   optab moptab;
3807   rtx tem;
3808   int size;
3809   bool speed = optimize_insn_for_speed_p ();
3810
3811   scalar_int_mode wider_mode = GET_MODE_WIDER_MODE (mode).require ();
3812
3813   size = GET_MODE_BITSIZE (mode);
3814
3815   /* Firstly, try using a multiplication insn that only generates the needed
3816      high part of the product, and in the sign flavor of unsignedp.  */
3817   if (mul_highpart_cost (speed, mode) < max_cost)
3818     {
3819       moptab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
3820       tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3821                           unsignedp, OPTAB_DIRECT);
3822       if (tem)
3823         return tem;
3824     }
3825
3826   /* Secondly, same as above, but use sign flavor opposite of unsignedp.
3827      Need to adjust the result after the multiplication.  */
3828   if (size - 1 < BITS_PER_WORD
3829       && (mul_highpart_cost (speed, mode)
3830           + 2 * shift_cost (speed, mode, size-1)
3831           + 4 * add_cost (speed, mode) < max_cost))
3832     {
3833       moptab = unsignedp ? smul_highpart_optab : umul_highpart_optab;
3834       tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3835                           unsignedp, OPTAB_DIRECT);
3836       if (tem)
3837         /* We used the wrong signedness.  Adjust the result.  */
3838         return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3839                                             tem, unsignedp);
3840     }
3841
3842   /* Try widening multiplication.  */
3843   moptab = unsignedp ? umul_widen_optab : smul_widen_optab;
3844   if (convert_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3845       && mul_widen_cost (speed, wider_mode) < max_cost)
3846     {
3847       tem = expand_binop (wider_mode, moptab, op0, narrow_op1, 0,
3848                           unsignedp, OPTAB_WIDEN);
3849       if (tem)
3850         return extract_high_half (mode, tem);
3851     }
3852
3853   /* Try widening the mode and perform a non-widening multiplication.  */
3854   if (optab_handler (smul_optab, wider_mode) != CODE_FOR_nothing
3855       && size - 1 < BITS_PER_WORD
3856       && (mul_cost (speed, wider_mode) + shift_cost (speed, mode, size-1)
3857           < max_cost))
3858     {
3859       rtx_insn *insns;
3860       rtx wop0, wop1;
3861
3862       /* We need to widen the operands, for example to ensure the
3863          constant multiplier is correctly sign or zero extended.
3864          Use a sequence to clean-up any instructions emitted by
3865          the conversions if things don't work out.  */
3866       start_sequence ();
3867       wop0 = convert_modes (wider_mode, mode, op0, unsignedp);
3868       wop1 = convert_modes (wider_mode, mode, op1, unsignedp);
3869       tem = expand_binop (wider_mode, smul_optab, wop0, wop1, 0,
3870                           unsignedp, OPTAB_WIDEN);
3871       insns = get_insns ();
3872       end_sequence ();
3873
3874       if (tem)
3875         {
3876           emit_insn (insns);
3877           return extract_high_half (mode, tem);
3878         }
3879     }
3880
3881   /* Try widening multiplication of opposite signedness, and adjust.  */
3882   moptab = unsignedp ? smul_widen_optab : umul_widen_optab;
3883   if (convert_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3884       && size - 1 < BITS_PER_WORD
3885       && (mul_widen_cost (speed, wider_mode)
3886           + 2 * shift_cost (speed, mode, size-1)
3887           + 4 * add_cost (speed, mode) < max_cost))
3888     {
3889       tem = expand_binop (wider_mode, moptab, op0, narrow_op1,
3890                           NULL_RTX, ! unsignedp, OPTAB_WIDEN);
3891       if (tem != 0)
3892         {
3893           tem = extract_high_half (mode, tem);
3894           /* We used the wrong signedness.  Adjust the result.  */
3895           return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3896                                               target, unsignedp);
3897         }
3898     }
3899
3900   return 0;
3901 }
3902
3903 /* Emit code to multiply OP0 and OP1 (where OP1 is an integer constant),
3904    putting the high half of the result in TARGET if that is convenient,
3905    and return where the result is.  If the operation cannot be performed,
3906    0 is returned.
3907
3908    MODE is the mode of operation and result.
3909
3910    UNSIGNEDP nonzero means unsigned multiply.
3911
3912    MAX_COST is the total allowed cost for the expanded RTL.  */
3913
3914 static rtx
3915 expmed_mult_highpart (scalar_int_mode mode, rtx op0, rtx op1,
3916                       rtx target, int unsignedp, int max_cost)
3917 {
3918   unsigned HOST_WIDE_INT cnst1;
3919   int extra_cost;
3920   bool sign_adjust = false;
3921   enum mult_variant variant;
3922   struct algorithm alg;
3923   rtx tem;
3924   bool speed = optimize_insn_for_speed_p ();
3925
3926   /* We can't support modes wider than HOST_BITS_PER_INT.  */
3927   gcc_assert (HWI_COMPUTABLE_MODE_P (mode));
3928
3929   cnst1 = INTVAL (op1) & GET_MODE_MASK (mode);
3930
3931   /* We can't optimize modes wider than BITS_PER_WORD.
3932      ??? We might be able to perform double-word arithmetic if
3933      mode == word_mode, however all the cost calculations in
3934      synth_mult etc. assume single-word operations.  */
3935   scalar_int_mode wider_mode = GET_MODE_WIDER_MODE (mode).require ();
3936   if (GET_MODE_BITSIZE (wider_mode) > BITS_PER_WORD)
3937     return expmed_mult_highpart_optab (mode, op0, op1, target,
3938                                        unsignedp, max_cost);
3939
3940   extra_cost = shift_cost (speed, mode, GET_MODE_BITSIZE (mode) - 1);
3941
3942   /* Check whether we try to multiply by a negative constant.  */
3943   if (!unsignedp && ((cnst1 >> (GET_MODE_BITSIZE (mode) - 1)) & 1))
3944     {
3945       sign_adjust = true;
3946       extra_cost += add_cost (speed, mode);
3947     }
3948
3949   /* See whether shift/add multiplication is cheap enough.  */
3950   if (choose_mult_variant (wider_mode, cnst1, &alg, &variant,
3951                            max_cost - extra_cost))
3952     {
3953       /* See whether the specialized multiplication optabs are
3954          cheaper than the shift/add version.  */
3955       tem = expmed_mult_highpart_optab (mode, op0, op1, target, unsignedp,
3956                                         alg.cost.cost + extra_cost);
3957       if (tem)
3958         return tem;
3959
3960       tem = convert_to_mode (wider_mode, op0, unsignedp);
3961       tem = expand_mult_const (wider_mode, tem, cnst1, 0, &alg, variant);
3962       tem = extract_high_half (mode, tem);
3963
3964       /* Adjust result for signedness.  */
3965       if (sign_adjust)
3966         tem = force_operand (gen_rtx_MINUS (mode, tem, op0), tem);
3967
3968       return tem;
3969     }
3970   return expmed_mult_highpart_optab (mode, op0, op1, target,
3971                                      unsignedp, max_cost);
3972 }
3973
3974
3975 /* Expand signed modulus of OP0 by a power of two D in mode MODE.  */
3976
3977 static rtx
3978 expand_smod_pow2 (scalar_int_mode mode, rtx op0, HOST_WIDE_INT d)
3979 {
3980   rtx result, temp, shift;
3981   rtx_code_label *label;
3982   int logd;
3983   int prec = GET_MODE_PRECISION (mode);
3984
3985   logd = floor_log2 (d);
3986   result = gen_reg_rtx (mode);
3987
3988   /* Avoid conditional branches when they're expensive.  */
3989   if (BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2
3990       && optimize_insn_for_speed_p ())
3991     {
3992       rtx signmask = emit_store_flag (result, LT, op0, const0_rtx,
3993                                       mode, 0, -1);
3994       if (signmask)
3995         {
3996           HOST_WIDE_INT masklow = (HOST_WIDE_INT_1 << logd) - 1;
3997           signmask = force_reg (mode, signmask);
3998           shift = gen_int_shift_amount (mode, GET_MODE_BITSIZE (mode) - logd);
3999
4000           /* Use the rtx_cost of a LSHIFTRT instruction to determine
4001              which instruction sequence to use.  If logical right shifts
4002              are expensive the use 2 XORs, 2 SUBs and an AND, otherwise
4003              use a LSHIFTRT, 1 ADD, 1 SUB and an AND.  */
4004
4005           temp = gen_rtx_LSHIFTRT (mode, result, shift);
4006           if (optab_handler (lshr_optab, mode) == CODE_FOR_nothing
4007               || (set_src_cost (temp, mode, optimize_insn_for_speed_p ())
4008                   > COSTS_N_INSNS (2)))
4009             {
4010               temp = expand_binop (mode, xor_optab, op0, signmask,
4011                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4012               temp = expand_binop (mode, sub_optab, temp, signmask,
4013                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4014               temp = expand_binop (mode, and_optab, temp,
4015                                    gen_int_mode (masklow, mode),
4016                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4017               temp = expand_binop (mode, xor_optab, temp, signmask,
4018                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4019               temp = expand_binop (mode, sub_optab, temp, signmask,
4020                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4021             }
4022           else
4023             {
4024               signmask = expand_binop (mode, lshr_optab, signmask, shift,
4025                                        NULL_RTX, 1, OPTAB_LIB_WIDEN);
4026               signmask = force_reg (mode, signmask);
4027
4028               temp = expand_binop (mode, add_optab, op0, signmask,
4029                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4030               temp = expand_binop (mode, and_optab, temp,
4031                                    gen_int_mode (masklow, mode),
4032                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4033               temp = expand_binop (mode, sub_optab, temp, signmask,
4034                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4035             }
4036           return temp;
4037         }
4038     }
4039
4040   /* Mask contains the mode's signbit and the significant bits of the
4041      modulus.  By including the signbit in the operation, many targets
4042      can avoid an explicit compare operation in the following comparison
4043      against zero.  */
4044   wide_int mask = wi::mask (logd, false, prec);
4045   mask = wi::set_bit (mask, prec - 1);
4046
4047   temp = expand_binop (mode, and_optab, op0,
4048                        immed_wide_int_const (mask, mode),
4049                        result, 1, OPTAB_LIB_WIDEN);
4050   if (temp != result)
4051     emit_move_insn (result, temp);
4052
4053   label = gen_label_rtx ();
4054   do_cmp_and_jump (result, const0_rtx, GE, mode, label);
4055
4056   temp = expand_binop (mode, sub_optab, result, const1_rtx, result,
4057                        0, OPTAB_LIB_WIDEN);
4058
4059   mask = wi::mask (logd, true, prec);
4060   temp = expand_binop (mode, ior_optab, temp,
4061                        immed_wide_int_const (mask, mode),
4062                        result, 1, OPTAB_LIB_WIDEN);
4063   temp = expand_binop (mode, add_optab, temp, const1_rtx, result,
4064                        0, OPTAB_LIB_WIDEN);
4065   if (temp != result)
4066     emit_move_insn (result, temp);
4067   emit_label (label);
4068   return result;
4069 }
4070
4071 /* Expand signed division of OP0 by a power of two D in mode MODE.
4072    This routine is only called for positive values of D.  */
4073
4074 static rtx
4075 expand_sdiv_pow2 (scalar_int_mode mode, rtx op0, HOST_WIDE_INT d)
4076 {
4077   rtx temp;
4078   rtx_code_label *label;
4079   int logd;
4080
4081   logd = floor_log2 (d);
4082
4083   if (d == 2
4084       && BRANCH_COST (optimize_insn_for_speed_p (),
4085                       false) >= 1)
4086     {
4087       temp = gen_reg_rtx (mode);
4088       temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, 1);
4089       if (temp != NULL_RTX)
4090         {
4091           temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
4092                                0, OPTAB_LIB_WIDEN);
4093           return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
4094         }
4095     }
4096
4097   if (HAVE_conditional_move
4098       && BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2)
4099     {
4100       rtx temp2;
4101
4102       start_sequence ();
4103       temp2 = copy_to_mode_reg (mode, op0);
4104       temp = expand_binop (mode, add_optab, temp2, gen_int_mode (d - 1, mode),
4105                            NULL_RTX, 0, OPTAB_LIB_WIDEN);
4106       temp = force_reg (mode, temp);
4107
4108       /* Construct "temp2 = (temp2 < 0) ? temp : temp2".  */
4109       temp2 = emit_conditional_move (temp2, LT, temp2, const0_rtx,
4110                                      mode, temp, temp2, mode, 0);
4111       if (temp2)
4112         {
4113           rtx_insn *seq = get_insns ();
4114           end_sequence ();
4115           emit_insn (seq);
4116           return expand_shift (RSHIFT_EXPR, mode, temp2, logd, NULL_RTX, 0);
4117         }
4118       end_sequence ();
4119     }
4120
4121   if (BRANCH_COST (optimize_insn_for_speed_p (),
4122                    false) >= 2)
4123     {
4124       int ushift = GET_MODE_BITSIZE (mode) - logd;
4125
4126       temp = gen_reg_rtx (mode);
4127       temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, -1);
4128       if (temp != NULL_RTX)
4129         {
4130           if (GET_MODE_BITSIZE (mode) >= BITS_PER_WORD
4131               || shift_cost (optimize_insn_for_speed_p (), mode, ushift)
4132               > COSTS_N_INSNS (1))
4133             temp = expand_binop (mode, and_optab, temp,
4134                                  gen_int_mode (d - 1, mode),
4135                                  NULL_RTX, 0, OPTAB_LIB_WIDEN);
4136           else
4137             temp = expand_shift (RSHIFT_EXPR, mode, temp,
4138                                  ushift, NULL_RTX, 1);
4139           temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
4140                                0, OPTAB_LIB_WIDEN);
4141           return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
4142         }
4143     }
4144
4145   label = gen_label_rtx ();
4146   temp = copy_to_mode_reg (mode, op0);
4147   do_cmp_and_jump (temp, const0_rtx, GE, mode, label);
4148   expand_inc (temp, gen_int_mode (d - 1, mode));
4149   emit_label (label);
4150   return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
4151 }
4152 \f
4153 /* Emit the code to divide OP0 by OP1, putting the result in TARGET
4154    if that is convenient, and returning where the result is.
4155    You may request either the quotient or the remainder as the result;
4156    specify REM_FLAG nonzero to get the remainder.
4157
4158    CODE is the expression code for which kind of division this is;
4159    it controls how rounding is done.  MODE is the machine mode to use.
4160    UNSIGNEDP nonzero means do unsigned division.  */
4161
4162 /* ??? For CEIL_MOD_EXPR, can compute incorrect remainder with ANDI
4163    and then correct it by or'ing in missing high bits
4164    if result of ANDI is nonzero.
4165    For ROUND_MOD_EXPR, can use ANDI and then sign-extend the result.
4166    This could optimize to a bfexts instruction.
4167    But C doesn't use these operations, so their optimizations are
4168    left for later.  */
4169 /* ??? For modulo, we don't actually need the highpart of the first product,
4170    the low part will do nicely.  And for small divisors, the second multiply
4171    can also be a low-part only multiply or even be completely left out.
4172    E.g. to calculate the remainder of a division by 3 with a 32 bit
4173    multiply, multiply with 0x55555556 and extract the upper two bits;
4174    the result is exact for inputs up to 0x1fffffff.
4175    The input range can be reduced by using cross-sum rules.
4176    For odd divisors >= 3, the following table gives right shift counts
4177    so that if a number is shifted by an integer multiple of the given
4178    amount, the remainder stays the same:
4179    2, 4, 3, 6, 10, 12, 4, 8, 18, 6, 11, 20, 18, 0, 5, 10, 12, 0, 12, 20,
4180    14, 12, 23, 21, 8, 0, 20, 18, 0, 0, 6, 12, 0, 22, 0, 18, 20, 30, 0, 0,
4181    0, 8, 0, 11, 12, 10, 36, 0, 30, 0, 0, 12, 0, 0, 0, 0, 44, 12, 24, 0,
4182    20, 0, 7, 14, 0, 18, 36, 0, 0, 46, 60, 0, 42, 0, 15, 24, 20, 0, 0, 33,
4183    0, 20, 0, 0, 18, 0, 60, 0, 0, 0, 0, 0, 40, 18, 0, 0, 12
4184
4185    Cross-sum rules for even numbers can be derived by leaving as many bits
4186    to the right alone as the divisor has zeros to the right.
4187    E.g. if x is an unsigned 32 bit number:
4188    (x mod 12) == (((x & 1023) + ((x >> 8) & ~3)) * 0x15555558 >> 2 * 3) >> 28
4189    */
4190
4191 rtx
4192 expand_divmod (int rem_flag, enum tree_code code, machine_mode mode,
4193                rtx op0, rtx op1, rtx target, int unsignedp)
4194 {
4195   machine_mode compute_mode;
4196   rtx tquotient;
4197   rtx quotient = 0, remainder = 0;
4198   rtx_insn *last;
4199   rtx_insn *insn;
4200   optab optab1, optab2;
4201   int op1_is_constant, op1_is_pow2 = 0;
4202   int max_cost, extra_cost;
4203   static HOST_WIDE_INT last_div_const = 0;
4204   bool speed = optimize_insn_for_speed_p ();
4205
4206   op1_is_constant = CONST_INT_P (op1);
4207   if (op1_is_constant)
4208     {
4209       wide_int ext_op1 = rtx_mode_t (op1, mode);
4210       op1_is_pow2 = (wi::popcount (ext_op1) == 1
4211                      || (! unsignedp
4212                          && wi::popcount (wi::neg (ext_op1)) == 1));
4213     }
4214
4215   /*
4216      This is the structure of expand_divmod:
4217
4218      First comes code to fix up the operands so we can perform the operations
4219      correctly and efficiently.
4220
4221      Second comes a switch statement with code specific for each rounding mode.
4222      For some special operands this code emits all RTL for the desired
4223      operation, for other cases, it generates only a quotient and stores it in
4224      QUOTIENT.  The case for trunc division/remainder might leave quotient = 0,
4225      to indicate that it has not done anything.
4226
4227      Last comes code that finishes the operation.  If QUOTIENT is set and
4228      REM_FLAG is set, the remainder is computed as OP0 - QUOTIENT * OP1.  If
4229      QUOTIENT is not set, it is computed using trunc rounding.
4230
4231      We try to generate special code for division and remainder when OP1 is a
4232      constant.  If |OP1| = 2**n we can use shifts and some other fast
4233      operations.  For other values of OP1, we compute a carefully selected
4234      fixed-point approximation m = 1/OP1, and generate code that multiplies OP0
4235      by m.
4236
4237      In all cases but EXACT_DIV_EXPR, this multiplication requires the upper
4238      half of the product.  Different strategies for generating the product are
4239      implemented in expmed_mult_highpart.
4240
4241      If what we actually want is the remainder, we generate that by another
4242      by-constant multiplication and a subtraction.  */
4243
4244   /* We shouldn't be called with OP1 == const1_rtx, but some of the
4245      code below will malfunction if we are, so check here and handle
4246      the special case if so.  */
4247   if (op1 == const1_rtx)
4248     return rem_flag ? const0_rtx : op0;
4249
4250     /* When dividing by -1, we could get an overflow.
4251      negv_optab can handle overflows.  */
4252   if (! unsignedp && op1 == constm1_rtx)
4253     {
4254       if (rem_flag)
4255         return const0_rtx;
4256       return expand_unop (mode, flag_trapv && GET_MODE_CLASS (mode) == MODE_INT
4257                           ? negv_optab : neg_optab, op0, target, 0);
4258     }
4259
4260   if (target
4261       /* Don't use the function value register as a target
4262          since we have to read it as well as write it,
4263          and function-inlining gets confused by this.  */
4264       && ((REG_P (target) && REG_FUNCTION_VALUE_P (target))
4265           /* Don't clobber an operand while doing a multi-step calculation.  */
4266           || ((rem_flag || op1_is_constant)
4267               && (reg_mentioned_p (target, op0)
4268                   || (MEM_P (op0) && MEM_P (target))))
4269           || reg_mentioned_p (target, op1)
4270           || (MEM_P (op1) && MEM_P (target))))
4271     target = 0;
4272
4273   /* Get the mode in which to perform this computation.  Normally it will
4274      be MODE, but sometimes we can't do the desired operation in MODE.
4275      If so, pick a wider mode in which we can do the operation.  Convert
4276      to that mode at the start to avoid repeated conversions.
4277
4278      First see what operations we need.  These depend on the expression
4279      we are evaluating.  (We assume that divxx3 insns exist under the
4280      same conditions that modxx3 insns and that these insns don't normally
4281      fail.  If these assumptions are not correct, we may generate less
4282      efficient code in some cases.)
4283
4284      Then see if we find a mode in which we can open-code that operation
4285      (either a division, modulus, or shift).  Finally, check for the smallest
4286      mode for which we can do the operation with a library call.  */
4287
4288   /* We might want to refine this now that we have division-by-constant
4289      optimization.  Since expmed_mult_highpart tries so many variants, it is
4290      not straightforward to generalize this.  Maybe we should make an array
4291      of possible modes in init_expmed?  Save this for GCC 2.7.  */
4292
4293   optab1 = (op1_is_pow2
4294             ? (unsignedp ? lshr_optab : ashr_optab)
4295             : (unsignedp ? udiv_optab : sdiv_optab));
4296   optab2 = (op1_is_pow2 ? optab1
4297             : (unsignedp ? udivmod_optab : sdivmod_optab));
4298
4299   FOR_EACH_MODE_FROM (compute_mode, mode)
4300     if (optab_handler (optab1, compute_mode) != CODE_FOR_nothing
4301         || optab_handler (optab2, compute_mode) != CODE_FOR_nothing)
4302       break;
4303
4304   if (compute_mode == VOIDmode)
4305     FOR_EACH_MODE_FROM (compute_mode, mode)
4306       if (optab_libfunc (optab1, compute_mode)
4307           || optab_libfunc (optab2, compute_mode))
4308         break;
4309
4310   /* If we still couldn't find a mode, use MODE, but expand_binop will
4311      probably die.  */
4312   if (compute_mode == VOIDmode)
4313     compute_mode = mode;
4314
4315   if (target && GET_MODE (target) == compute_mode)
4316     tquotient = target;
4317   else
4318     tquotient = gen_reg_rtx (compute_mode);
4319
4320 #if 0
4321   /* It should be possible to restrict the precision to GET_MODE_BITSIZE
4322      (mode), and thereby get better code when OP1 is a constant.  Do that
4323      later.  It will require going over all usages of SIZE below.  */
4324   size = GET_MODE_BITSIZE (mode);
4325 #endif
4326
4327   /* Only deduct something for a REM if the last divide done was
4328      for a different constant.   Then set the constant of the last
4329      divide.  */
4330   max_cost = (unsignedp
4331               ? udiv_cost (speed, compute_mode)
4332               : sdiv_cost (speed, compute_mode));
4333   if (rem_flag && ! (last_div_const != 0 && op1_is_constant
4334                      && INTVAL (op1) == last_div_const))
4335     max_cost -= (mul_cost (speed, compute_mode)
4336                  + add_cost (speed, compute_mode));
4337
4338   last_div_const = ! rem_flag && op1_is_constant ? INTVAL (op1) : 0;
4339
4340   /* Now convert to the best mode to use.  */
4341   if (compute_mode != mode)
4342     {
4343       op0 = convert_modes (compute_mode, mode, op0, unsignedp);
4344       op1 = convert_modes (compute_mode, mode, op1, unsignedp);
4345
4346       /* convert_modes may have placed op1 into a register, so we
4347          must recompute the following.  */
4348       op1_is_constant = CONST_INT_P (op1);
4349       if (op1_is_constant)
4350         {
4351           wide_int ext_op1 = rtx_mode_t (op1, compute_mode);
4352           op1_is_pow2 = (wi::popcount (ext_op1) == 1
4353                          || (! unsignedp
4354                              && wi::popcount (wi::neg (ext_op1)) == 1));
4355         }
4356       else
4357         op1_is_pow2 = 0;
4358     }
4359
4360   /* If one of the operands is a volatile MEM, copy it into a register.  */
4361
4362   if (MEM_P (op0) && MEM_VOLATILE_P (op0))
4363     op0 = force_reg (compute_mode, op0);
4364   if (MEM_P (op1) && MEM_VOLATILE_P (op1))
4365     op1 = force_reg (compute_mode, op1);
4366
4367   /* If we need the remainder or if OP1 is constant, we need to
4368      put OP0 in a register in case it has any queued subexpressions.  */
4369   if (rem_flag || op1_is_constant)
4370     op0 = force_reg (compute_mode, op0);
4371
4372   last = get_last_insn ();
4373
4374   /* Promote floor rounding to trunc rounding for unsigned operations.  */
4375   if (unsignedp)
4376     {
4377       if (code == FLOOR_DIV_EXPR)
4378         code = TRUNC_DIV_EXPR;
4379       if (code == FLOOR_MOD_EXPR)
4380         code = TRUNC_MOD_EXPR;
4381       if (code == EXACT_DIV_EXPR && op1_is_pow2)
4382         code = TRUNC_DIV_EXPR;
4383     }
4384
4385   if (op1 != const0_rtx)
4386     switch (code)
4387       {
4388       case TRUNC_MOD_EXPR:
4389       case TRUNC_DIV_EXPR:
4390         if (op1_is_constant)
4391           {
4392             scalar_int_mode int_mode = as_a <scalar_int_mode> (compute_mode);
4393             int size = GET_MODE_BITSIZE (int_mode);
4394             if (unsignedp)
4395               {
4396                 unsigned HOST_WIDE_INT mh, ml;
4397                 int pre_shift, post_shift;
4398                 int dummy;
4399                 wide_int wd = rtx_mode_t (op1, int_mode);
4400                 unsigned HOST_WIDE_INT d = wd.to_uhwi ();
4401
4402                 if (wi::popcount (wd) == 1)
4403                   {
4404                     pre_shift = floor_log2 (d);
4405                     if (rem_flag)
4406                       {
4407                         unsigned HOST_WIDE_INT mask
4408                           = (HOST_WIDE_INT_1U << pre_shift) - 1;
4409                         remainder
4410                           = expand_binop (int_mode, and_optab, op0,
4411                                           gen_int_mode (mask, int_mode),
4412                                           remainder, 1,
4413                                           OPTAB_LIB_WIDEN);
4414                         if (remainder)
4415                           return gen_lowpart (mode, remainder);
4416                       }
4417                     quotient = expand_shift (RSHIFT_EXPR, int_mode, op0,
4418                                              pre_shift, tquotient, 1);
4419                   }
4420                 else if (size <= HOST_BITS_PER_WIDE_INT)
4421                   {
4422                     if (d >= (HOST_WIDE_INT_1U << (size - 1)))
4423                       {
4424                         /* Most significant bit of divisor is set; emit an scc
4425                            insn.  */
4426                         quotient = emit_store_flag_force (tquotient, GEU, op0, op1,
4427                                                           int_mode, 1, 1);
4428                       }
4429                     else
4430                       {
4431                         /* Find a suitable multiplier and right shift count
4432                            instead of multiplying with D.  */
4433
4434                         mh = choose_multiplier (d, size, size,
4435                                                 &ml, &post_shift, &dummy);
4436
4437                         /* If the suggested multiplier is more than SIZE bits,
4438                            we can do better for even divisors, using an
4439                            initial right shift.  */
4440                         if (mh != 0 && (d & 1) == 0)
4441                           {
4442                             pre_shift = ctz_or_zero (d);
4443                             mh = choose_multiplier (d >> pre_shift, size,
4444                                                     size - pre_shift,
4445                                                     &ml, &post_shift, &dummy);
4446                             gcc_assert (!mh);
4447                           }
4448                         else
4449                           pre_shift = 0;
4450
4451                         if (mh != 0)
4452                           {
4453                             rtx t1, t2, t3, t4;
4454
4455                             if (post_shift - 1 >= BITS_PER_WORD)
4456                               goto fail1;
4457
4458                             extra_cost
4459                               = (shift_cost (speed, int_mode, post_shift - 1)
4460                                  + shift_cost (speed, int_mode, 1)
4461                                  + 2 * add_cost (speed, int_mode));
4462                             t1 = expmed_mult_highpart
4463                               (int_mode, op0, gen_int_mode (ml, int_mode),
4464                                NULL_RTX, 1, max_cost - extra_cost);
4465                             if (t1 == 0)
4466                               goto fail1;
4467                             t2 = force_operand (gen_rtx_MINUS (int_mode,
4468                                                                op0, t1),
4469                                                 NULL_RTX);
4470                             t3 = expand_shift (RSHIFT_EXPR, int_mode,
4471                                                t2, 1, NULL_RTX, 1);
4472                             t4 = force_operand (gen_rtx_PLUS (int_mode,
4473                                                               t1, t3),
4474                                                 NULL_RTX);
4475                             quotient = expand_shift
4476                               (RSHIFT_EXPR, int_mode, t4,
4477                                post_shift - 1, tquotient, 1);
4478                           }
4479                         else
4480                           {
4481                             rtx t1, t2;
4482
4483                             if (pre_shift >= BITS_PER_WORD
4484                                 || post_shift >= BITS_PER_WORD)
4485                               goto fail1;
4486
4487                             t1 = expand_shift
4488                               (RSHIFT_EXPR, int_mode, op0,
4489                                pre_shift, NULL_RTX, 1);
4490                             extra_cost
4491                               = (shift_cost (speed, int_mode, pre_shift)
4492                                  + shift_cost (speed, int_mode, post_shift));
4493                             t2 = expmed_mult_highpart
4494                               (int_mode, t1,
4495                                gen_int_mode (ml, int_mode),
4496                                NULL_RTX, 1, max_cost - extra_cost);
4497                             if (t2 == 0)
4498                               goto fail1;
4499                             quotient = expand_shift
4500                               (RSHIFT_EXPR, int_mode, t2,
4501                                post_shift, tquotient, 1);
4502                           }
4503                       }
4504                   }
4505                 else            /* Too wide mode to use tricky code */
4506                   break;
4507
4508                 insn = get_last_insn ();
4509                 if (insn != last)
4510                   set_dst_reg_note (insn, REG_EQUAL,
4511                                     gen_rtx_UDIV (int_mode, op0, op1),
4512                                     quotient);
4513               }
4514             else                /* TRUNC_DIV, signed */
4515               {
4516                 unsigned HOST_WIDE_INT ml;
4517                 int lgup, post_shift;
4518                 rtx mlr;
4519                 HOST_WIDE_INT d = INTVAL (op1);
4520                 unsigned HOST_WIDE_INT abs_d;
4521
4522                 /* Not prepared to handle division/remainder by
4523                    0xffffffffffffffff8000000000000000 etc.  */
4524                 if (d == HOST_WIDE_INT_MIN && size > HOST_BITS_PER_WIDE_INT)
4525                   break;
4526
4527                 /* Since d might be INT_MIN, we have to cast to
4528                    unsigned HOST_WIDE_INT before negating to avoid
4529                    undefined signed overflow.  */
4530                 abs_d = (d >= 0
4531                          ? (unsigned HOST_WIDE_INT) d
4532                          : - (unsigned HOST_WIDE_INT) d);
4533
4534                 /* n rem d = n rem -d */
4535                 if (rem_flag && d < 0)
4536                   {
4537                     d = abs_d;
4538                     op1 = gen_int_mode (abs_d, int_mode);
4539                   }
4540
4541                 if (d == 1)
4542                   quotient = op0;
4543                 else if (d == -1)
4544                   quotient = expand_unop (int_mode, neg_optab, op0,
4545                                           tquotient, 0);
4546                 else if (size <= HOST_BITS_PER_WIDE_INT
4547                          && abs_d == HOST_WIDE_INT_1U << (size - 1))
4548                   {
4549                     /* This case is not handled correctly below.  */
4550                     quotient = emit_store_flag (tquotient, EQ, op0, op1,
4551                                                 int_mode, 1, 1);
4552                     if (quotient == 0)
4553                       goto fail1;
4554                   }
4555                 else if (EXACT_POWER_OF_2_OR_ZERO_P (d)
4556                          && (size <= HOST_BITS_PER_WIDE_INT || d >= 0)
4557                          && (rem_flag
4558                              ? smod_pow2_cheap (speed, int_mode)
4559                              : sdiv_pow2_cheap (speed, int_mode))
4560                          /* We assume that cheap metric is true if the
4561                             optab has an expander for this mode.  */
4562                          && ((optab_handler ((rem_flag ? smod_optab
4563                                               : sdiv_optab),
4564                                              int_mode)
4565                               != CODE_FOR_nothing)
4566                              || (optab_handler (sdivmod_optab, int_mode)
4567                                  != CODE_FOR_nothing)))
4568                   ;
4569                 else if (EXACT_POWER_OF_2_OR_ZERO_P (abs_d))
4570                   {
4571                     if (rem_flag)
4572                       {
4573                         remainder = expand_smod_pow2 (int_mode, op0, d);
4574                         if (remainder)
4575                           return gen_lowpart (mode, remainder);
4576                       }
4577
4578                     if (sdiv_pow2_cheap (speed, int_mode)
4579                         && ((optab_handler (sdiv_optab, int_mode)
4580                              != CODE_FOR_nothing)
4581                             || (optab_handler (sdivmod_optab, int_mode)
4582                                 != CODE_FOR_nothing)))
4583                       quotient = expand_divmod (0, TRUNC_DIV_EXPR,
4584                                                 int_mode, op0,
4585                                                 gen_int_mode (abs_d,
4586                                                               int_mode),
4587                                                 NULL_RTX, 0);
4588                     else
4589                       quotient = expand_sdiv_pow2 (int_mode, op0, abs_d);
4590
4591                     /* We have computed OP0 / abs(OP1).  If OP1 is negative,
4592                        negate the quotient.  */
4593                     if (d < 0)
4594                       {
4595                         insn = get_last_insn ();
4596                         if (insn != last
4597                             && abs_d < (HOST_WIDE_INT_1U
4598                                         << (HOST_BITS_PER_WIDE_INT - 1)))
4599                           set_dst_reg_note (insn, REG_EQUAL,
4600                                             gen_rtx_DIV (int_mode, op0,
4601                                                          gen_int_mode
4602                                                            (abs_d,
4603                                                             int_mode)),
4604                                             quotient);
4605
4606                         quotient = expand_unop (int_mode, neg_optab,
4607                                                 quotient, quotient, 0);
4608                       }
4609                   }
4610                 else if (size <= HOST_BITS_PER_WIDE_INT)
4611                   {
4612                     choose_multiplier (abs_d, size, size - 1,
4613                                        &ml, &post_shift, &lgup);
4614                     if (ml < HOST_WIDE_INT_1U << (size - 1))
4615                       {
4616                         rtx t1, t2, t3;
4617
4618                         if (post_shift >= BITS_PER_WORD
4619                             || size - 1 >= BITS_PER_WORD)
4620                           goto fail1;
4621
4622                         extra_cost = (shift_cost (speed, int_mode, post_shift)
4623                                       + shift_cost (speed, int_mode, size - 1)
4624                                       + add_cost (speed, int_mode));
4625                         t1 = expmed_mult_highpart
4626                           (int_mode, op0, gen_int_mode (ml, int_mode),
4627                            NULL_RTX, 0, max_cost - extra_cost);
4628                         if (t1 == 0)
4629                           goto fail1;
4630                         t2 = expand_shift
4631                           (RSHIFT_EXPR, int_mode, t1,
4632                            post_shift, NULL_RTX, 0);
4633                         t3 = expand_shift
4634                           (RSHIFT_EXPR, int_mode, op0,
4635                            size - 1, NULL_RTX, 0);
4636                         if (d < 0)
4637                           quotient
4638                             = force_operand (gen_rtx_MINUS (int_mode, t3, t2),
4639                                              tquotient);
4640                         else
4641                           quotient
4642                             = force_operand (gen_rtx_MINUS (int_mode, t2, t3),
4643                                              tquotient);
4644                       }
4645                     else
4646                       {
4647                         rtx t1, t2, t3, t4;
4648
4649                         if (post_shift >= BITS_PER_WORD
4650                             || size - 1 >= BITS_PER_WORD)
4651                           goto fail1;
4652
4653                         ml |= HOST_WIDE_INT_M1U << (size - 1);
4654                         mlr = gen_int_mode (ml, int_mode);
4655                         extra_cost = (shift_cost (speed, int_mode, post_shift)
4656                                       + shift_cost (speed, int_mode, size - 1)
4657                                       + 2 * add_cost (speed, int_mode));
4658                         t1 = expmed_mult_highpart (int_mode, op0, mlr,
4659                                                    NULL_RTX, 0,
4660                                                    max_cost - extra_cost);
4661                         if (t1 == 0)
4662                           goto fail1;
4663                         t2 = force_operand (gen_rtx_PLUS (int_mode, t1, op0),
4664                                             NULL_RTX);
4665                         t3 = expand_shift
4666                           (RSHIFT_EXPR, int_mode, t2,
4667                            post_shift, NULL_RTX, 0);
4668                         t4 = expand_shift
4669                           (RSHIFT_EXPR, int_mode, op0,
4670                            size - 1, NULL_RTX, 0);
4671                         if (d < 0)
4672                           quotient
4673                             = force_operand (gen_rtx_MINUS (int_mode, t4, t3),
4674                                              tquotient);
4675                         else
4676                           quotient
4677                             = force_operand (gen_rtx_MINUS (int_mode, t3, t4),
4678                                              tquotient);
4679                       }
4680                   }
4681                 else            /* Too wide mode to use tricky code */
4682                   break;
4683
4684                 insn = get_last_insn ();
4685                 if (insn != last)
4686                   set_dst_reg_note (insn, REG_EQUAL,
4687                                     gen_rtx_DIV (int_mode, op0, op1),
4688                                     quotient);
4689               }
4690             break;
4691           }
4692       fail1:
4693         delete_insns_since (last);
4694         break;
4695
4696       case FLOOR_DIV_EXPR:
4697       case FLOOR_MOD_EXPR:
4698       /* We will come here only for signed operations.  */
4699         if (op1_is_constant && HWI_COMPUTABLE_MODE_P (compute_mode))
4700           {
4701             scalar_int_mode int_mode = as_a <scalar_int_mode> (compute_mode);
4702             int size = GET_MODE_BITSIZE (int_mode);
4703             unsigned HOST_WIDE_INT mh, ml;
4704             int pre_shift, lgup, post_shift;
4705             HOST_WIDE_INT d = INTVAL (op1);
4706
4707             if (d > 0)
4708               {
4709                 /* We could just as easily deal with negative constants here,
4710                    but it does not seem worth the trouble for GCC 2.6.  */
4711                 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4712                   {
4713                     pre_shift = floor_log2 (d);
4714                     if (rem_flag)
4715                       {
4716                         unsigned HOST_WIDE_INT mask
4717                           = (HOST_WIDE_INT_1U << pre_shift) - 1;
4718                         remainder = expand_binop
4719                           (int_mode, and_optab, op0,
4720                            gen_int_mode (mask, int_mode),
4721                            remainder, 0, OPTAB_LIB_WIDEN);
4722                         if (remainder)
4723                           return gen_lowpart (mode, remainder);
4724                       }
4725                     quotient = expand_shift
4726                       (RSHIFT_EXPR, int_mode, op0,
4727                        pre_shift, tquotient, 0);
4728                   }
4729                 else
4730                   {
4731                     rtx t1, t2, t3, t4;
4732
4733                     mh = choose_multiplier (d, size, size - 1,
4734                                             &ml, &post_shift, &lgup);
4735                     gcc_assert (!mh);
4736
4737                     if (post_shift < BITS_PER_WORD
4738                         && size - 1 < BITS_PER_WORD)
4739                       {
4740                         t1 = expand_shift
4741                           (RSHIFT_EXPR, int_mode, op0,
4742                            size - 1, NULL_RTX, 0);
4743                         t2 = expand_binop (int_mode, xor_optab, op0, t1,
4744                                            NULL_RTX, 0, OPTAB_WIDEN);
4745                         extra_cost = (shift_cost (speed, int_mode, post_shift)
4746                                       + shift_cost (speed, int_mode, size - 1)
4747                                       + 2 * add_cost (speed, int_mode));
4748                         t3 = expmed_mult_highpart
4749                           (int_mode, t2, gen_int_mode (ml, int_mode),
4750                            NULL_RTX, 1, max_cost - extra_cost);
4751                         if (t3 != 0)
4752                           {
4753                             t4 = expand_shift
4754                               (RSHIFT_EXPR, int_mode, t3,
4755                                post_shift, NULL_RTX, 1);
4756                             quotient = expand_binop (int_mode, xor_optab,
4757                                                      t4, t1, tquotient, 0,
4758                                                      OPTAB_WIDEN);
4759                           }
4760                       }
4761                   }
4762               }
4763             else
4764               {
4765                 rtx nsign, t1, t2, t3, t4;
4766                 t1 = force_operand (gen_rtx_PLUS (int_mode,
4767                                                   op0, constm1_rtx), NULL_RTX);
4768                 t2 = expand_binop (int_mode, ior_optab, op0, t1, NULL_RTX,
4769                                    0, OPTAB_WIDEN);
4770                 nsign = expand_shift (RSHIFT_EXPR, int_mode, t2,
4771                                       size - 1, NULL_RTX, 0);
4772                 t3 = force_operand (gen_rtx_MINUS (int_mode, t1, nsign),
4773                                     NULL_RTX);
4774                 t4 = expand_divmod (0, TRUNC_DIV_EXPR, int_mode, t3, op1,
4775                                     NULL_RTX, 0);
4776                 if (t4)
4777                   {
4778                     rtx t5;
4779                     t5 = expand_unop (int_mode, one_cmpl_optab, nsign,
4780                                       NULL_RTX, 0);
4781                     quotient = force_operand (gen_rtx_PLUS (int_mode, t4, t5),
4782                                               tquotient);
4783                   }
4784               }
4785           }
4786
4787         if (quotient != 0)
4788           break;
4789         delete_insns_since (last);
4790
4791         /* Try using an instruction that produces both the quotient and
4792            remainder, using truncation.  We can easily compensate the quotient
4793            or remainder to get floor rounding, once we have the remainder.
4794            Notice that we compute also the final remainder value here,
4795            and return the result right away.  */
4796         if (target == 0 || GET_MODE (target) != compute_mode)
4797           target = gen_reg_rtx (compute_mode);
4798
4799         if (rem_flag)
4800           {
4801             remainder
4802               = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4803             quotient = gen_reg_rtx (compute_mode);
4804           }
4805         else
4806           {
4807             quotient
4808               = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4809             remainder = gen_reg_rtx (compute_mode);
4810           }
4811
4812         if (expand_twoval_binop (sdivmod_optab, op0, op1,
4813                                  quotient, remainder, 0))
4814           {
4815             /* This could be computed with a branch-less sequence.
4816                Save that for later.  */
4817             rtx tem;
4818             rtx_code_label *label = gen_label_rtx ();
4819             do_cmp_and_jump (remainder, const0_rtx, EQ, compute_mode, label);
4820             tem = expand_binop (compute_mode, xor_optab, op0, op1,
4821                                 NULL_RTX, 0, OPTAB_WIDEN);
4822             do_cmp_and_jump (tem, const0_rtx, GE, compute_mode, label);
4823             expand_dec (quotient, const1_rtx);
4824             expand_inc (remainder, op1);
4825             emit_label (label);
4826             return gen_lowpart (mode, rem_flag ? remainder : quotient);
4827           }
4828
4829         /* No luck with division elimination or divmod.  Have to do it
4830            by conditionally adjusting op0 *and* the result.  */
4831         {
4832           rtx_code_label *label1, *label2, *label3, *label4, *label5;
4833           rtx adjusted_op0;
4834           rtx tem;
4835
4836           quotient = gen_reg_rtx (compute_mode);
4837           adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4838           label1 = gen_label_rtx ();
4839           label2 = gen_label_rtx ();
4840           label3 = gen_label_rtx ();
4841           label4 = gen_label_rtx ();
4842           label5 = gen_label_rtx ();
4843           do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4844           do_cmp_and_jump (adjusted_op0, const0_rtx, LT, compute_mode, label1);
4845           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4846                               quotient, 0, OPTAB_LIB_WIDEN);
4847           if (tem != quotient)
4848             emit_move_insn (quotient, tem);
4849           emit_jump_insn (targetm.gen_jump (label5));
4850           emit_barrier ();
4851           emit_label (label1);
4852           expand_inc (adjusted_op0, const1_rtx);
4853           emit_jump_insn (targetm.gen_jump (label4));
4854           emit_barrier ();
4855           emit_label (label2);
4856           do_cmp_and_jump (adjusted_op0, const0_rtx, GT, compute_mode, label3);
4857           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4858                               quotient, 0, OPTAB_LIB_WIDEN);
4859           if (tem != quotient)
4860             emit_move_insn (quotient, tem);
4861           emit_jump_insn (targetm.gen_jump (label5));
4862           emit_barrier ();
4863           emit_label (label3);
4864           expand_dec (adjusted_op0, const1_rtx);
4865           emit_label (label4);
4866           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4867                               quotient, 0, OPTAB_LIB_WIDEN);
4868           if (tem != quotient)
4869             emit_move_insn (quotient, tem);
4870           expand_dec (quotient, const1_rtx);
4871           emit_label (label5);
4872         }
4873         break;
4874
4875       case CEIL_DIV_EXPR:
4876       case CEIL_MOD_EXPR:
4877         if (unsignedp)
4878           {
4879             if (op1_is_constant
4880                 && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4881                 && (HWI_COMPUTABLE_MODE_P (compute_mode)
4882                     || INTVAL (op1) >= 0))
4883               {
4884                 scalar_int_mode int_mode
4885                   = as_a <scalar_int_mode> (compute_mode);
4886                 rtx t1, t2, t3;
4887                 unsigned HOST_WIDE_INT d = INTVAL (op1);
4888                 t1 = expand_shift (RSHIFT_EXPR, int_mode, op0,
4889                                    floor_log2 (d), tquotient, 1);
4890                 t2 = expand_binop (int_mode, and_optab, op0,
4891                                    gen_int_mode (d - 1, int_mode),
4892                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4893                 t3 = gen_reg_rtx (int_mode);
4894                 t3 = emit_store_flag (t3, NE, t2, const0_rtx, int_mode, 1, 1);
4895                 if (t3 == 0)
4896                   {
4897                     rtx_code_label *lab;
4898                     lab = gen_label_rtx ();
4899                     do_cmp_and_jump (t2, const0_rtx, EQ, int_mode, lab);
4900                     expand_inc (t1, const1_rtx);
4901                     emit_label (lab);
4902                     quotient = t1;
4903                   }
4904                 else
4905                   quotient = force_operand (gen_rtx_PLUS (int_mode, t1, t3),
4906                                             tquotient);
4907                 break;
4908               }
4909
4910             /* Try using an instruction that produces both the quotient and
4911                remainder, using truncation.  We can easily compensate the
4912                quotient or remainder to get ceiling rounding, once we have the
4913                remainder.  Notice that we compute also the final remainder
4914                value here, and return the result right away.  */
4915             if (target == 0 || GET_MODE (target) != compute_mode)
4916               target = gen_reg_rtx (compute_mode);
4917
4918             if (rem_flag)
4919               {
4920                 remainder = (REG_P (target)
4921                              ? target : gen_reg_rtx (compute_mode));
4922                 quotient = gen_reg_rtx (compute_mode);
4923               }
4924             else
4925               {
4926                 quotient = (REG_P (target)
4927                             ? target : gen_reg_rtx (compute_mode));
4928                 remainder = gen_reg_rtx (compute_mode);
4929               }
4930
4931             if (expand_twoval_binop (udivmod_optab, op0, op1, quotient,
4932                                      remainder, 1))
4933               {
4934                 /* This could be computed with a branch-less sequence.
4935                    Save that for later.  */
4936                 rtx_code_label *label = gen_label_rtx ();
4937                 do_cmp_and_jump (remainder, const0_rtx, EQ,
4938                                  compute_mode, label);
4939                 expand_inc (quotient, const1_rtx);
4940                 expand_dec (remainder, op1);
4941                 emit_label (label);
4942                 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4943               }
4944
4945             /* No luck with division elimination or divmod.  Have to do it
4946                by conditionally adjusting op0 *and* the result.  */
4947             {
4948               rtx_code_label *label1, *label2;
4949               rtx adjusted_op0, tem;
4950
4951               quotient = gen_reg_rtx (compute_mode);
4952               adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4953               label1 = gen_label_rtx ();
4954               label2 = gen_label_rtx ();
4955               do_cmp_and_jump (adjusted_op0, const0_rtx, NE,
4956                                compute_mode, label1);
4957               emit_move_insn  (quotient, const0_rtx);
4958               emit_jump_insn (targetm.gen_jump (label2));
4959               emit_barrier ();
4960               emit_label (label1);
4961               expand_dec (adjusted_op0, const1_rtx);
4962               tem = expand_binop (compute_mode, udiv_optab, adjusted_op0, op1,
4963                                   quotient, 1, OPTAB_LIB_WIDEN);
4964               if (tem != quotient)
4965                 emit_move_insn (quotient, tem);
4966               expand_inc (quotient, const1_rtx);
4967               emit_label (label2);
4968             }
4969           }
4970         else /* signed */
4971           {
4972             if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4973                 && INTVAL (op1) >= 0)
4974               {
4975                 /* This is extremely similar to the code for the unsigned case
4976                    above.  For 2.7 we should merge these variants, but for
4977                    2.6.1 I don't want to touch the code for unsigned since that
4978                    get used in C.  The signed case will only be used by other
4979                    languages (Ada).  */
4980
4981                 rtx t1, t2, t3;
4982                 unsigned HOST_WIDE_INT d = INTVAL (op1);
4983                 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4984                                    floor_log2 (d), tquotient, 0);
4985                 t2 = expand_binop (compute_mode, and_optab, op0,
4986                                    gen_int_mode (d - 1, compute_mode),
4987                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4988                 t3 = gen_reg_rtx (compute_mode);
4989                 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4990                                       compute_mode, 1, 1);
4991                 if (t3 == 0)
4992                   {
4993                     rtx_code_label *lab;
4994                     lab = gen_label_rtx ();
4995                     do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4996                     expand_inc (t1, const1_rtx);
4997                     emit_label (lab);
4998                     quotient = t1;
4999                   }
5000                 else
5001                   quotient = force_operand (gen_rtx_PLUS (compute_mode,
5002                                                           t1, t3),
5003                                             tquotient);
5004                 break;
5005               }
5006
5007             /* Try using an instruction that produces both the quotient and
5008                remainder, using truncation.  We can easily compensate the
5009                quotient or remainder to get ceiling rounding, once we have the
5010                remainder.  Notice that we compute also the final remainder
5011                value here, and return the result right away.  */
5012             if (target == 0 || GET_MODE (target) != compute_mode)
5013               target = gen_reg_rtx (compute_mode);
5014             if (rem_flag)
5015               {
5016                 remainder= (REG_P (target)
5017                             ? target : gen_reg_rtx (compute_mode));
5018                 quotient = gen_reg_rtx (compute_mode);
5019               }
5020             else
5021               {
5022                 quotient = (REG_P (target)
5023                             ? target : gen_reg_rtx (compute_mode));
5024                 remainder = gen_reg_rtx (compute_mode);
5025               }
5026
5027             if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient,
5028                                      remainder, 0))
5029               {
5030                 /* This could be computed with a branch-less sequence.
5031                    Save that for later.  */
5032                 rtx tem;
5033                 rtx_code_label *label = gen_label_rtx ();
5034                 do_cmp_and_jump (remainder, const0_rtx, EQ,
5035                                  compute_mode, label);
5036                 tem = expand_binop (compute_mode, xor_optab, op0, op1,
5037                                     NULL_RTX, 0, OPTAB_WIDEN);
5038                 do_cmp_and_jump (tem, const0_rtx, LT, compute_mode, label);
5039                 expand_inc (quotient, const1_rtx);
5040                 expand_dec (remainder, op1);
5041                 emit_label (label);
5042                 return gen_lowpart (mode, rem_flag ? remainder : quotient);
5043               }
5044
5045             /* No luck with division elimination or divmod.  Have to do it
5046                by conditionally adjusting op0 *and* the result.  */
5047             {
5048               rtx_code_label *label1, *label2, *label3, *label4, *label5;
5049               rtx adjusted_op0;
5050               rtx tem;
5051
5052               quotient = gen_reg_rtx (compute_mode);
5053               adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
5054               label1 = gen_label_rtx ();
5055               label2 = gen_label_rtx ();
5056               label3 = gen_label_rtx ();
5057               label4 = gen_label_rtx ();
5058               label5 = gen_label_rtx ();
5059               do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
5060               do_cmp_and_jump (adjusted_op0, const0_rtx, GT,
5061                                compute_mode, label1);
5062               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
5063                                   quotient, 0, OPTAB_LIB_WIDEN);
5064               if (tem != quotient)
5065                 emit_move_insn (quotient, tem);
5066               emit_jump_insn (targetm.gen_jump (label5));
5067               emit_barrier ();
5068               emit_label (label1);
5069               expand_dec (adjusted_op0, const1_rtx);
5070               emit_jump_insn (targetm.gen_jump (label4));
5071               emit_barrier ();
5072               emit_label (label2);
5073               do_cmp_and_jump (adjusted_op0, const0_rtx, LT,
5074                                compute_mode, label3);
5075               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
5076                                   quotient, 0, OPTAB_LIB_WIDEN);
5077               if (tem != quotient)
5078                 emit_move_insn (quotient, tem);
5079               emit_jump_insn (targetm.gen_jump (label5));
5080               emit_barrier ();
5081               emit_label (label3);
5082               expand_inc (adjusted_op0, const1_rtx);
5083               emit_label (label4);
5084               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
5085                                   quotient, 0, OPTAB_LIB_WIDEN);
5086               if (tem != quotient)
5087                 emit_move_insn (quotient, tem);
5088               expand_inc (quotient, const1_rtx);
5089               emit_label (label5);
5090             }
5091           }
5092         break;
5093
5094       case EXACT_DIV_EXPR:
5095         if (op1_is_constant && HWI_COMPUTABLE_MODE_P (compute_mode))
5096           {
5097             scalar_int_mode int_mode = as_a <scalar_int_mode> (compute_mode);
5098             int size = GET_MODE_BITSIZE (int_mode);
5099             HOST_WIDE_INT d = INTVAL (op1);
5100             unsigned HOST_WIDE_INT ml;
5101             int pre_shift;
5102             rtx t1;
5103
5104             pre_shift = ctz_or_zero (d);
5105             ml = invert_mod2n (d >> pre_shift, size);
5106             t1 = expand_shift (RSHIFT_EXPR, int_mode, op0,
5107                                pre_shift, NULL_RTX, unsignedp);
5108             quotient = expand_mult (int_mode, t1, gen_int_mode (ml, int_mode),
5109                                     NULL_RTX, 1);
5110
5111             insn = get_last_insn ();
5112             set_dst_reg_note (insn, REG_EQUAL,
5113                               gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
5114                                               int_mode, op0, op1),
5115                               quotient);
5116           }
5117         break;
5118
5119       case ROUND_DIV_EXPR:
5120       case ROUND_MOD_EXPR:
5121         if (unsignedp)
5122           {
5123             scalar_int_mode int_mode = as_a <scalar_int_mode> (compute_mode);
5124             rtx tem;
5125             rtx_code_label *label;
5126             label = gen_label_rtx ();
5127             quotient = gen_reg_rtx (int_mode);
5128             remainder = gen_reg_rtx (int_mode);
5129             if (expand_twoval_binop (udivmod_optab, op0, op1, quotient, remainder, 1) == 0)
5130               {
5131                 rtx tem;
5132                 quotient = expand_binop (int_mode, udiv_optab, op0, op1,
5133                                          quotient, 1, OPTAB_LIB_WIDEN);
5134                 tem = expand_mult (int_mode, quotient, op1, NULL_RTX, 1);
5135                 remainder = expand_binop (int_mode, sub_optab, op0, tem,
5136                                           remainder, 1, OPTAB_LIB_WIDEN);
5137               }
5138             tem = plus_constant (int_mode, op1, -1);
5139             tem = expand_shift (RSHIFT_EXPR, int_mode, tem, 1, NULL_RTX, 1);
5140             do_cmp_and_jump (remainder, tem, LEU, int_mode, label);
5141             expand_inc (quotient, const1_rtx);
5142             expand_dec (remainder, op1);
5143             emit_label (label);
5144           }
5145         else
5146           {
5147             scalar_int_mode int_mode = as_a <scalar_int_mode> (compute_mode);
5148             int size = GET_MODE_BITSIZE (int_mode);
5149             rtx abs_rem, abs_op1, tem, mask;
5150             rtx_code_label *label;
5151             label = gen_label_rtx ();
5152             quotient = gen_reg_rtx (int_mode);
5153             remainder = gen_reg_rtx (int_mode);
5154             if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient, remainder, 0) == 0)
5155               {
5156                 rtx tem;
5157                 quotient = expand_binop (int_mode, sdiv_optab, op0, op1,
5158                                          quotient, 0, OPTAB_LIB_WIDEN);
5159                 tem = expand_mult (int_mode, quotient, op1, NULL_RTX, 0);
5160                 remainder = expand_binop (int_mode, sub_optab, op0, tem,
5161                                           remainder, 0, OPTAB_LIB_WIDEN);
5162               }
5163             abs_rem = expand_abs (int_mode, remainder, NULL_RTX, 1, 0);
5164             abs_op1 = expand_abs (int_mode, op1, NULL_RTX, 1, 0);
5165             tem = expand_shift (LSHIFT_EXPR, int_mode, abs_rem,
5166                                 1, NULL_RTX, 1);
5167             do_cmp_and_jump (tem, abs_op1, LTU, int_mode, label);
5168             tem = expand_binop (int_mode, xor_optab, op0, op1,
5169                                 NULL_RTX, 0, OPTAB_WIDEN);
5170             mask = expand_shift (RSHIFT_EXPR, int_mode, tem,
5171                                  size - 1, NULL_RTX, 0);
5172             tem = expand_binop (int_mode, xor_optab, mask, const1_rtx,
5173                                 NULL_RTX, 0, OPTAB_WIDEN);
5174             tem = expand_binop (int_mode, sub_optab, tem, mask,
5175                                 NULL_RTX, 0, OPTAB_WIDEN);
5176             expand_inc (quotient, tem);
5177             tem = expand_binop (int_mode, xor_optab, mask, op1,
5178                                 NULL_RTX, 0, OPTAB_WIDEN);
5179             tem = expand_binop (int_mode, sub_optab, tem, mask,
5180                                 NULL_RTX, 0, OPTAB_WIDEN);
5181             expand_dec (remainder, tem);
5182             emit_label (label);
5183           }
5184         return gen_lowpart (mode, rem_flag ? remainder : quotient);
5185
5186       default:
5187         gcc_unreachable ();
5188       }
5189
5190   if (quotient == 0)
5191     {
5192       if (target && GET_MODE (target) != compute_mode)
5193         target = 0;
5194
5195       if (rem_flag)
5196         {
5197           /* Try to produce the remainder without producing the quotient.
5198              If we seem to have a divmod pattern that does not require widening,
5199              don't try widening here.  We should really have a WIDEN argument
5200              to expand_twoval_binop, since what we'd really like to do here is
5201              1) try a mod insn in compute_mode
5202              2) try a divmod insn in compute_mode
5203              3) try a div insn in compute_mode and multiply-subtract to get
5204                 remainder
5205              4) try the same things with widening allowed.  */
5206           remainder
5207             = sign_expand_binop (compute_mode, umod_optab, smod_optab,
5208                                  op0, op1, target,
5209                                  unsignedp,
5210                                  ((optab_handler (optab2, compute_mode)
5211                                    != CODE_FOR_nothing)
5212                                   ? OPTAB_DIRECT : OPTAB_WIDEN));
5213           if (remainder == 0)
5214             {
5215               /* No luck there.  Can we do remainder and divide at once
5216                  without a library call?  */
5217               remainder = gen_reg_rtx (compute_mode);
5218               if (! expand_twoval_binop ((unsignedp
5219                                           ? udivmod_optab
5220                                           : sdivmod_optab),
5221                                          op0, op1,
5222                                          NULL_RTX, remainder, unsignedp))
5223                 remainder = 0;
5224             }
5225
5226           if (remainder)
5227             return gen_lowpart (mode, remainder);
5228         }
5229
5230       /* Produce the quotient.  Try a quotient insn, but not a library call.
5231          If we have a divmod in this mode, use it in preference to widening
5232          the div (for this test we assume it will not fail). Note that optab2
5233          is set to the one of the two optabs that the call below will use.  */
5234       quotient
5235         = sign_expand_binop (compute_mode, udiv_optab, sdiv_optab,
5236                              op0, op1, rem_flag ? NULL_RTX : target,
5237                              unsignedp,
5238                              ((optab_handler (optab2, compute_mode)
5239                                != CODE_FOR_nothing)
5240                               ? OPTAB_DIRECT : OPTAB_WIDEN));
5241
5242       if (quotient == 0)
5243         {
5244           /* No luck there.  Try a quotient-and-remainder insn,
5245              keeping the quotient alone.  */
5246           quotient = gen_reg_rtx (compute_mode);
5247           if (! expand_twoval_binop (unsignedp ? udivmod_optab : sdivmod_optab,
5248                                      op0, op1,
5249                                      quotient, NULL_RTX, unsignedp))
5250             {
5251               quotient = 0;
5252               if (! rem_flag)
5253                 /* Still no luck.  If we are not computing the remainder,
5254                    use a library call for the quotient.  */
5255                 quotient = sign_expand_binop (compute_mode,
5256                                               udiv_optab, sdiv_optab,
5257                                               op0, op1, target,
5258                                               unsignedp, OPTAB_LIB_WIDEN);
5259             }
5260         }
5261     }
5262
5263   if (rem_flag)
5264     {
5265       if (target && GET_MODE (target) != compute_mode)
5266         target = 0;
5267
5268       if (quotient == 0)
5269         {
5270           /* No divide instruction either.  Use library for remainder.  */
5271           remainder = sign_expand_binop (compute_mode, umod_optab, smod_optab,
5272                                          op0, op1, target,
5273                                          unsignedp, OPTAB_LIB_WIDEN);
5274           /* No remainder function.  Try a quotient-and-remainder
5275              function, keeping the remainder.  */
5276           if (!remainder)
5277             {
5278               remainder = gen_reg_rtx (compute_mode);
5279               if (!expand_twoval_binop_libfunc
5280                   (unsignedp ? udivmod_optab : sdivmod_optab,
5281                    op0, op1,
5282                    NULL_RTX, remainder,
5283                    unsignedp ? UMOD : MOD))
5284                 remainder = NULL_RTX;
5285             }
5286         }
5287       else
5288         {
5289           /* We divided.  Now finish doing X - Y * (X / Y).  */
5290           remainder = expand_mult (compute_mode, quotient, op1,
5291                                    NULL_RTX, unsignedp);
5292           remainder = expand_binop (compute_mode, sub_optab, op0,
5293                                     remainder, target, unsignedp,
5294                                     OPTAB_LIB_WIDEN);
5295         }
5296     }
5297
5298   return gen_lowpart (mode, rem_flag ? remainder : quotient);
5299 }
5300 \f
5301 /* Return a tree node with data type TYPE, describing the value of X.
5302    Usually this is an VAR_DECL, if there is no obvious better choice.
5303    X may be an expression, however we only support those expressions
5304    generated by loop.c.  */
5305
5306 tree
5307 make_tree (tree type, rtx x)
5308 {
5309   tree t;
5310
5311   switch (GET_CODE (x))
5312     {
5313     case CONST_INT:
5314     case CONST_WIDE_INT:
5315       t = wide_int_to_tree (type, rtx_mode_t (x, TYPE_MODE (type)));
5316       return t;
5317
5318     case CONST_DOUBLE:
5319       STATIC_ASSERT (HOST_BITS_PER_WIDE_INT * 2 <= MAX_BITSIZE_MODE_ANY_INT);
5320       if (TARGET_SUPPORTS_WIDE_INT == 0 && GET_MODE (x) == VOIDmode)
5321         t = wide_int_to_tree (type,
5322                               wide_int::from_array (&CONST_DOUBLE_LOW (x), 2,
5323                                                     HOST_BITS_PER_WIDE_INT * 2));
5324       else
5325         t = build_real (type, *CONST_DOUBLE_REAL_VALUE (x));
5326
5327       return t;
5328
5329     case CONST_VECTOR:
5330       {
5331         unsigned int npatterns = CONST_VECTOR_NPATTERNS (x);
5332         unsigned int nelts_per_pattern = CONST_VECTOR_NELTS_PER_PATTERN (x);
5333         tree itype = TREE_TYPE (type);
5334
5335         /* Build a tree with vector elements.  */
5336         tree_vector_builder elts (type, npatterns, nelts_per_pattern);
5337         unsigned int count = elts.encoded_nelts ();
5338         for (unsigned int i = 0; i < count; ++i)
5339           {
5340             rtx elt = CONST_VECTOR_ELT (x, i);
5341             elts.quick_push (make_tree (itype, elt));
5342           }
5343
5344         return elts.build ();
5345       }
5346
5347     case PLUS:
5348       return fold_build2 (PLUS_EXPR, type, make_tree (type, XEXP (x, 0)),
5349                           make_tree (type, XEXP (x, 1)));
5350
5351     case MINUS:
5352       return fold_build2 (MINUS_EXPR, type, make_tree (type, XEXP (x, 0)),
5353                           make_tree (type, XEXP (x, 1)));
5354
5355     case NEG:
5356       return fold_build1 (NEGATE_EXPR, type, make_tree (type, XEXP (x, 0)));
5357
5358     case MULT:
5359       return fold_build2 (MULT_EXPR, type, make_tree (type, XEXP (x, 0)),
5360                           make_tree (type, XEXP (x, 1)));
5361
5362     case ASHIFT:
5363       return fold_build2 (LSHIFT_EXPR, type, make_tree (type, XEXP (x, 0)),
5364                           make_tree (type, XEXP (x, 1)));
5365
5366     case LSHIFTRT:
5367       t = unsigned_type_for (type);
5368       return fold_convert (type, build2 (RSHIFT_EXPR, t,
5369                                          make_tree (t, XEXP (x, 0)),
5370                                          make_tree (type, XEXP (x, 1))));
5371
5372     case ASHIFTRT:
5373       t = signed_type_for (type);
5374       return fold_convert (type, build2 (RSHIFT_EXPR, t,
5375                                          make_tree (t, XEXP (x, 0)),
5376                                          make_tree (type, XEXP (x, 1))));
5377
5378     case DIV:
5379       if (TREE_CODE (type) != REAL_TYPE)
5380         t = signed_type_for (type);
5381       else
5382         t = type;
5383
5384       return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5385                                          make_tree (t, XEXP (x, 0)),
5386                                          make_tree (t, XEXP (x, 1))));
5387     case UDIV:
5388       t = unsigned_type_for (type);
5389       return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5390                                          make_tree (t, XEXP (x, 0)),
5391                                          make_tree (t, XEXP (x, 1))));
5392
5393     case SIGN_EXTEND:
5394     case ZERO_EXTEND:
5395       t = lang_hooks.types.type_for_mode (GET_MODE (XEXP (x, 0)),
5396                                           GET_CODE (x) == ZERO_EXTEND);
5397       return fold_convert (type, make_tree (t, XEXP (x, 0)));
5398
5399     case CONST:
5400       return make_tree (type, XEXP (x, 0));
5401
5402     case SYMBOL_REF:
5403       t = SYMBOL_REF_DECL (x);
5404       if (t)
5405         return fold_convert (type, build_fold_addr_expr (t));
5406       /* fall through.  */
5407
5408     default:
5409       if (CONST_POLY_INT_P (x))
5410         return wide_int_to_tree (t, const_poly_int_value (x));
5411
5412       t = build_decl (RTL_LOCATION (x), VAR_DECL, NULL_TREE, type);
5413
5414       /* If TYPE is a POINTER_TYPE, we might need to convert X from
5415          address mode to pointer mode.  */
5416       if (POINTER_TYPE_P (type))
5417         x = convert_memory_address_addr_space
5418           (SCALAR_INT_TYPE_MODE (type), x, TYPE_ADDR_SPACE (TREE_TYPE (type)));
5419
5420       /* Note that we do *not* use SET_DECL_RTL here, because we do not
5421          want set_decl_rtl to go adjusting REG_ATTRS for this temporary.  */
5422       t->decl_with_rtl.rtl = x;
5423
5424       return t;
5425     }
5426 }
5427 \f
5428 /* Compute the logical-and of OP0 and OP1, storing it in TARGET
5429    and returning TARGET.
5430
5431    If TARGET is 0, a pseudo-register or constant is returned.  */
5432
5433 rtx
5434 expand_and (machine_mode mode, rtx op0, rtx op1, rtx target)
5435 {
5436   rtx tem = 0;
5437
5438   if (GET_MODE (op0) == VOIDmode && GET_MODE (op1) == VOIDmode)
5439     tem = simplify_binary_operation (AND, mode, op0, op1);
5440   if (tem == 0)
5441     tem = expand_binop (mode, and_optab, op0, op1, target, 0, OPTAB_LIB_WIDEN);
5442
5443   if (target == 0)
5444     target = tem;
5445   else if (tem != target)
5446     emit_move_insn (target, tem);
5447   return target;
5448 }
5449
5450 /* Helper function for emit_store_flag.  */
5451 rtx
5452 emit_cstore (rtx target, enum insn_code icode, enum rtx_code code,
5453              machine_mode mode, machine_mode compare_mode,
5454              int unsignedp, rtx x, rtx y, int normalizep,
5455              machine_mode target_mode)
5456 {
5457   class expand_operand ops[4];
5458   rtx op0, comparison, subtarget;
5459   rtx_insn *last;
5460   scalar_int_mode result_mode = targetm.cstore_mode (icode);
5461   scalar_int_mode int_target_mode;
5462
5463   last = get_last_insn ();
5464   x = prepare_operand (icode, x, 2, mode, compare_mode, unsignedp);
5465   y = prepare_operand (icode, y, 3, mode, compare_mode, unsignedp);
5466   if (!x || !y)
5467     {
5468       delete_insns_since (last);
5469       return NULL_RTX;
5470     }
5471
5472   if (target_mode == VOIDmode)
5473     int_target_mode = result_mode;
5474   else
5475     int_target_mode = as_a <scalar_int_mode> (target_mode);
5476   if (!target)
5477     target = gen_reg_rtx (int_target_mode);
5478
5479   comparison = gen_rtx_fmt_ee (code, result_mode, x, y);
5480
5481   create_output_operand (&ops[0], optimize ? NULL_RTX : target, result_mode);
5482   create_fixed_operand (&ops[1], comparison);
5483   create_fixed_operand (&ops[2], x);
5484   create_fixed_operand (&ops[3], y);
5485   if (!maybe_expand_insn (icode, 4, ops))
5486     {
5487       delete_insns_since (last);
5488       return NULL_RTX;
5489     }
5490   subtarget = ops[0].value;
5491
5492   /* If we are converting to a wider mode, first convert to
5493      INT_TARGET_MODE, then normalize.  This produces better combining
5494      opportunities on machines that have a SIGN_EXTRACT when we are
5495      testing a single bit.  This mostly benefits the 68k.
5496
5497      If STORE_FLAG_VALUE does not have the sign bit set when
5498      interpreted in MODE, we can do this conversion as unsigned, which
5499      is usually more efficient.  */
5500   if (GET_MODE_PRECISION (int_target_mode) > GET_MODE_PRECISION (result_mode))
5501     {
5502       gcc_assert (GET_MODE_PRECISION (result_mode) != 1
5503                   || STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1);
5504
5505       bool unsignedp = (STORE_FLAG_VALUE >= 0);
5506       convert_move (target, subtarget, unsignedp);
5507
5508       op0 = target;
5509       result_mode = int_target_mode;
5510     }
5511   else
5512     op0 = subtarget;
5513
5514   /* If we want to keep subexpressions around, don't reuse our last
5515      target.  */
5516   if (optimize)
5517     subtarget = 0;
5518
5519   /* Now normalize to the proper value in MODE.  Sometimes we don't
5520      have to do anything.  */
5521   if (normalizep == 0 || normalizep == STORE_FLAG_VALUE)
5522     ;
5523   /* STORE_FLAG_VALUE might be the most negative number, so write
5524      the comparison this way to avoid a compiler-time warning.  */
5525   else if (- normalizep == STORE_FLAG_VALUE)
5526     op0 = expand_unop (result_mode, neg_optab, op0, subtarget, 0);
5527
5528   /* We don't want to use STORE_FLAG_VALUE < 0 below since this makes
5529      it hard to use a value of just the sign bit due to ANSI integer
5530      constant typing rules.  */
5531   else if (val_signbit_known_set_p (result_mode, STORE_FLAG_VALUE))
5532     op0 = expand_shift (RSHIFT_EXPR, result_mode, op0,
5533                         GET_MODE_BITSIZE (result_mode) - 1, subtarget,
5534                         normalizep == 1);
5535   else
5536     {
5537       gcc_assert (STORE_FLAG_VALUE & 1);
5538
5539       op0 = expand_and (result_mode, op0, const1_rtx, subtarget);
5540       if (normalizep == -1)
5541         op0 = expand_unop (result_mode, neg_optab, op0, op0, 0);
5542     }
5543
5544   /* If we were converting to a smaller mode, do the conversion now.  */
5545   if (int_target_mode != result_mode)
5546     {
5547       convert_move (target, op0, 0);
5548       return target;
5549     }
5550   else
5551     return op0;
5552 }
5553
5554
5555 /* A subroutine of emit_store_flag only including "tricks" that do not
5556    need a recursive call.  These are kept separate to avoid infinite
5557    loops.  */
5558
5559 static rtx
5560 emit_store_flag_1 (rtx target, enum rtx_code code, rtx op0, rtx op1,
5561                    machine_mode mode, int unsignedp, int normalizep,
5562                    machine_mode target_mode)
5563 {
5564   rtx subtarget;
5565   enum insn_code icode;
5566   machine_mode compare_mode;
5567   enum mode_class mclass;
5568   enum rtx_code scode;
5569
5570   if (unsignedp)
5571     code = unsigned_condition (code);
5572   scode = swap_condition (code);
5573
5574   /* If one operand is constant, make it the second one.  Only do this
5575      if the other operand is not constant as well.  */
5576
5577   if (swap_commutative_operands_p (op0, op1))
5578     {
5579       std::swap (op0, op1);
5580       code = swap_condition (code);
5581     }
5582
5583   if (mode == VOIDmode)
5584     mode = GET_MODE (op0);
5585
5586   if (CONST_SCALAR_INT_P (op1))
5587     canonicalize_comparison (mode, &code, &op1);
5588
5589   /* For some comparisons with 1 and -1, we can convert this to
5590      comparisons with zero.  This will often produce more opportunities for
5591      store-flag insns.  */
5592
5593   switch (code)
5594     {
5595     case LT:
5596       if (op1 == const1_rtx)
5597         op1 = const0_rtx, code = LE;
5598       break;
5599     case LE:
5600       if (op1 == constm1_rtx)
5601         op1 = const0_rtx, code = LT;
5602       break;
5603     case GE:
5604       if (op1 == const1_rtx)
5605         op1 = const0_rtx, code = GT;
5606       break;
5607     case GT:
5608       if (op1 == constm1_rtx)
5609         op1 = const0_rtx, code = GE;
5610       break;
5611     case GEU:
5612       if (op1 == const1_rtx)
5613         op1 = const0_rtx, code = NE;
5614       break;
5615     case LTU:
5616       if (op1 == const1_rtx)
5617         op1 = const0_rtx, code = EQ;
5618       break;
5619     default:
5620       break;
5621     }
5622
5623   /* If we are comparing a double-word integer with zero or -1, we can
5624      convert the comparison into one involving a single word.  */
5625   scalar_int_mode int_mode;
5626   if (is_int_mode (mode, &int_mode)
5627       && GET_MODE_BITSIZE (int_mode) == BITS_PER_WORD * 2
5628       && (!MEM_P (op0) || ! MEM_VOLATILE_P (op0)))
5629     {
5630       rtx tem;
5631       if ((code == EQ || code == NE)
5632           && (op1 == const0_rtx || op1 == constm1_rtx))
5633         {
5634           rtx op00, op01;
5635
5636           /* Do a logical OR or AND of the two words and compare the
5637              result.  */
5638           op00 = simplify_gen_subreg (word_mode, op0, int_mode, 0);
5639           op01 = simplify_gen_subreg (word_mode, op0, int_mode, UNITS_PER_WORD);
5640           tem = expand_binop (word_mode,
5641                               op1 == const0_rtx ? ior_optab : and_optab,
5642                               op00, op01, NULL_RTX, unsignedp,
5643                               OPTAB_DIRECT);
5644
5645           if (tem != 0)
5646             tem = emit_store_flag (NULL_RTX, code, tem, op1, word_mode,
5647                                    unsignedp, normalizep);
5648         }
5649       else if ((code == LT || code == GE) && op1 == const0_rtx)
5650         {
5651           rtx op0h;
5652
5653           /* If testing the sign bit, can just test on high word.  */
5654           op0h = simplify_gen_subreg (word_mode, op0, int_mode,
5655                                       subreg_highpart_offset (word_mode,
5656                                                               int_mode));
5657           tem = emit_store_flag (NULL_RTX, code, op0h, op1, word_mode,
5658                                  unsignedp, normalizep);
5659         }
5660       else
5661         tem = NULL_RTX;
5662
5663       if (tem)
5664         {
5665           if (target_mode == VOIDmode || GET_MODE (tem) == target_mode)
5666             return tem;
5667           if (!target)
5668             target = gen_reg_rtx (target_mode);
5669
5670           convert_move (target, tem,
5671                         !val_signbit_known_set_p (word_mode,
5672                                                   (normalizep ? normalizep
5673                                                    : STORE_FLAG_VALUE)));
5674           return target;
5675         }
5676     }
5677
5678   /* If this is A < 0 or A >= 0, we can do this by taking the ones
5679      complement of A (for GE) and shifting the sign bit to the low bit.  */
5680   if (op1 == const0_rtx && (code == LT || code == GE)
5681       && is_int_mode (mode, &int_mode)
5682       && (normalizep || STORE_FLAG_VALUE == 1
5683           || val_signbit_p (int_mode, STORE_FLAG_VALUE)))
5684     {
5685       scalar_int_mode int_target_mode;
5686       subtarget = target;
5687
5688       if (!target)
5689         int_target_mode = int_mode;
5690       else
5691         {
5692           /* If the result is to be wider than OP0, it is best to convert it
5693              first.  If it is to be narrower, it is *incorrect* to convert it
5694              first.  */
5695           int_target_mode = as_a <scalar_int_mode> (target_mode);
5696           if (GET_MODE_SIZE (int_target_mode) > GET_MODE_SIZE (int_mode))
5697             {
5698               op0 = convert_modes (int_target_mode, int_mode, op0, 0);
5699               int_mode = int_target_mode;
5700             }
5701         }
5702
5703       if (int_target_mode != int_mode)
5704         subtarget = 0;
5705
5706       if (code == GE)
5707         op0 = expand_unop (int_mode, one_cmpl_optab, op0,
5708                            ((STORE_FLAG_VALUE == 1 || normalizep)
5709                             ? 0 : subtarget), 0);
5710
5711       if (STORE_FLAG_VALUE == 1 || normalizep)
5712         /* If we are supposed to produce a 0/1 value, we want to do
5713            a logical shift from the sign bit to the low-order bit; for
5714            a -1/0 value, we do an arithmetic shift.  */
5715         op0 = expand_shift (RSHIFT_EXPR, int_mode, op0,
5716                             GET_MODE_BITSIZE (int_mode) - 1,
5717                             subtarget, normalizep != -1);
5718
5719       if (int_mode != int_target_mode)
5720         op0 = convert_modes (int_target_mode, int_mode, op0, 0);
5721
5722       return op0;
5723     }
5724
5725   mclass = GET_MODE_CLASS (mode);
5726   FOR_EACH_MODE_FROM (compare_mode, mode)
5727     {
5728      machine_mode optab_mode = mclass == MODE_CC ? CCmode : compare_mode;
5729      icode = optab_handler (cstore_optab, optab_mode);
5730      if (icode != CODE_FOR_nothing)
5731         {
5732           do_pending_stack_adjust ();
5733           rtx tem = emit_cstore (target, icode, code, mode, compare_mode,
5734                                  unsignedp, op0, op1, normalizep, target_mode);
5735           if (tem)
5736             return tem;
5737
5738           if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5739             {
5740               tem = emit_cstore (target, icode, scode, mode, compare_mode,
5741                                  unsignedp, op1, op0, normalizep, target_mode);
5742               if (tem)
5743                 return tem;
5744             }
5745           break;
5746         }
5747     }
5748
5749   return 0;
5750 }
5751
5752 /* Subroutine of emit_store_flag that handles cases in which the operands
5753    are scalar integers.  SUBTARGET is the target to use for temporary
5754    operations and TRUEVAL is the value to store when the condition is
5755    true.  All other arguments are as for emit_store_flag.  */
5756
5757 rtx
5758 emit_store_flag_int (rtx target, rtx subtarget, enum rtx_code code, rtx op0,
5759                      rtx op1, scalar_int_mode mode, int unsignedp,
5760                      int normalizep, rtx trueval)
5761 {
5762   machine_mode target_mode = target ? GET_MODE (target) : VOIDmode;
5763   rtx_insn *last = get_last_insn ();
5764
5765   /* If this is an equality comparison of integers, we can try to exclusive-or
5766      (or subtract) the two operands and use a recursive call to try the
5767      comparison with zero.  Don't do any of these cases if branches are
5768      very cheap.  */
5769
5770   if ((code == EQ || code == NE) && op1 != const0_rtx)
5771     {
5772       rtx tem = expand_binop (mode, xor_optab, op0, op1, subtarget, 1,
5773                               OPTAB_WIDEN);
5774
5775       if (tem == 0)
5776         tem = expand_binop (mode, sub_optab, op0, op1, subtarget, 1,
5777                             OPTAB_WIDEN);
5778       if (tem != 0)
5779         tem = emit_store_flag (target, code, tem, const0_rtx,
5780                                mode, unsignedp, normalizep);
5781       if (tem != 0)
5782         return tem;
5783
5784       delete_insns_since (last);
5785     }
5786
5787   /* For integer comparisons, try the reverse comparison.  However, for
5788      small X and if we'd have anyway to extend, implementing "X != 0"
5789      as "-(int)X >> 31" is still cheaper than inverting "(int)X == 0".  */
5790   rtx_code rcode = reverse_condition (code);
5791   if (can_compare_p (rcode, mode, ccp_store_flag)
5792       && ! (optab_handler (cstore_optab, mode) == CODE_FOR_nothing
5793             && code == NE
5794             && GET_MODE_SIZE (mode) < UNITS_PER_WORD
5795             && op1 == const0_rtx))
5796     {
5797       int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5798                       || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5799
5800       /* Again, for the reverse comparison, use either an addition or a XOR.  */
5801       if (want_add
5802           && rtx_cost (GEN_INT (normalizep), mode, PLUS, 1,
5803                        optimize_insn_for_speed_p ()) == 0)
5804         {
5805           rtx tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5806                                        STORE_FLAG_VALUE, target_mode);
5807           if (tem != 0)
5808             tem = expand_binop (target_mode, add_optab, tem,
5809                                 gen_int_mode (normalizep, target_mode),
5810                                 target, 0, OPTAB_WIDEN);
5811           if (tem != 0)
5812             return tem;
5813         }
5814       else if (!want_add
5815                && rtx_cost (trueval, mode, XOR, 1,
5816                             optimize_insn_for_speed_p ()) == 0)
5817         {
5818           rtx tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5819                                        normalizep, target_mode);
5820           if (tem != 0)
5821             tem = expand_binop (target_mode, xor_optab, tem, trueval, target,
5822                                 INTVAL (trueval) >= 0, OPTAB_WIDEN);
5823           if (tem != 0)
5824             return tem;
5825         }
5826
5827       delete_insns_since (last);
5828     }
5829
5830   /* Some other cases we can do are EQ, NE, LE, and GT comparisons with
5831      the constant zero.  Reject all other comparisons at this point.  Only
5832      do LE and GT if branches are expensive since they are expensive on
5833      2-operand machines.  */
5834
5835   if (op1 != const0_rtx
5836       || (code != EQ && code != NE
5837           && (BRANCH_COST (optimize_insn_for_speed_p (),
5838                            false) <= 1 || (code != LE && code != GT))))
5839     return 0;
5840
5841   /* Try to put the result of the comparison in the sign bit.  Assume we can't
5842      do the necessary operation below.  */
5843
5844   rtx tem = 0;
5845
5846   /* To see if A <= 0, compute (A | (A - 1)).  A <= 0 iff that result has
5847      the sign bit set.  */
5848
5849   if (code == LE)
5850     {
5851       /* This is destructive, so SUBTARGET can't be OP0.  */
5852       if (rtx_equal_p (subtarget, op0))
5853         subtarget = 0;
5854
5855       tem = expand_binop (mode, sub_optab, op0, const1_rtx, subtarget, 0,
5856                           OPTAB_WIDEN);
5857       if (tem)
5858         tem = expand_binop (mode, ior_optab, op0, tem, subtarget, 0,
5859                             OPTAB_WIDEN);
5860     }
5861
5862   /* To see if A > 0, compute (((signed) A) << BITS) - A, where BITS is the
5863      number of bits in the mode of OP0, minus one.  */
5864
5865   if (code == GT)
5866     {
5867       if (rtx_equal_p (subtarget, op0))
5868         subtarget = 0;
5869
5870       tem = maybe_expand_shift (RSHIFT_EXPR, mode, op0,
5871                                 GET_MODE_BITSIZE (mode) - 1,
5872                                 subtarget, 0);
5873       if (tem)
5874         tem = expand_binop (mode, sub_optab, tem, op0, subtarget, 0,
5875                             OPTAB_WIDEN);
5876     }
5877
5878   if (code == EQ || code == NE)
5879     {
5880       /* For EQ or NE, one way to do the comparison is to apply an operation
5881          that converts the operand into a positive number if it is nonzero
5882          or zero if it was originally zero.  Then, for EQ, we subtract 1 and
5883          for NE we negate.  This puts the result in the sign bit.  Then we
5884          normalize with a shift, if needed.
5885
5886          Two operations that can do the above actions are ABS and FFS, so try
5887          them.  If that doesn't work, and MODE is smaller than a full word,
5888          we can use zero-extension to the wider mode (an unsigned conversion)
5889          as the operation.  */
5890
5891       /* Note that ABS doesn't yield a positive number for INT_MIN, but
5892          that is compensated by the subsequent overflow when subtracting
5893          one / negating.  */
5894
5895       if (optab_handler (abs_optab, mode) != CODE_FOR_nothing)
5896         tem = expand_unop (mode, abs_optab, op0, subtarget, 1);
5897       else if (optab_handler (ffs_optab, mode) != CODE_FOR_nothing)
5898         tem = expand_unop (mode, ffs_optab, op0, subtarget, 1);
5899       else if (GET_MODE_SIZE (mode) < UNITS_PER_WORD)
5900         {
5901           tem = convert_modes (word_mode, mode, op0, 1);
5902           mode = word_mode;
5903         }
5904
5905       if (tem != 0)
5906         {
5907           if (code == EQ)
5908             tem = expand_binop (mode, sub_optab, tem, const1_rtx, subtarget,
5909                                 0, OPTAB_WIDEN);
5910           else
5911             tem = expand_unop (mode, neg_optab, tem, subtarget, 0);
5912         }
5913
5914       /* If we couldn't do it that way, for NE we can "or" the two's complement
5915          of the value with itself.  For EQ, we take the one's complement of
5916          that "or", which is an extra insn, so we only handle EQ if branches
5917          are expensive.  */
5918
5919       if (tem == 0
5920           && (code == NE
5921               || BRANCH_COST (optimize_insn_for_speed_p (),
5922                               false) > 1))
5923         {
5924           if (rtx_equal_p (subtarget, op0))
5925             subtarget = 0;
5926
5927           tem = expand_unop (mode, neg_optab, op0, subtarget, 0);
5928           tem = expand_binop (mode, ior_optab, tem, op0, subtarget, 0,
5929                               OPTAB_WIDEN);
5930
5931           if (tem && code == EQ)
5932             tem = expand_unop (mode, one_cmpl_optab, tem, subtarget, 0);
5933         }
5934     }
5935
5936   if (tem && normalizep)
5937     tem = maybe_expand_shift (RSHIFT_EXPR, mode, tem,
5938                               GET_MODE_BITSIZE (mode) - 1,
5939                               subtarget, normalizep == 1);
5940
5941   if (tem)
5942     {
5943       if (!target)
5944         ;
5945       else if (GET_MODE (tem) != target_mode)
5946         {
5947           convert_move (target, tem, 0);
5948           tem = target;
5949         }
5950       else if (!subtarget)
5951         {
5952           emit_move_insn (target, tem);
5953           tem = target;
5954         }
5955     }
5956   else
5957     delete_insns_since (last);
5958
5959   return tem;
5960 }
5961
5962 /* Emit a store-flags instruction for comparison CODE on OP0 and OP1
5963    and storing in TARGET.  Normally return TARGET.
5964    Return 0 if that cannot be done.
5965
5966    MODE is the mode to use for OP0 and OP1 should they be CONST_INTs.  If
5967    it is VOIDmode, they cannot both be CONST_INT.
5968
5969    UNSIGNEDP is for the case where we have to widen the operands
5970    to perform the operation.  It says to use zero-extension.
5971
5972    NORMALIZEP is 1 if we should convert the result to be either zero
5973    or one.  Normalize is -1 if we should convert the result to be
5974    either zero or -1.  If NORMALIZEP is zero, the result will be left
5975    "raw" out of the scc insn.  */
5976
5977 rtx
5978 emit_store_flag (rtx target, enum rtx_code code, rtx op0, rtx op1,
5979                  machine_mode mode, int unsignedp, int normalizep)
5980 {
5981   machine_mode target_mode = target ? GET_MODE (target) : VOIDmode;
5982   enum rtx_code rcode;
5983   rtx subtarget;
5984   rtx tem, trueval;
5985   rtx_insn *last;
5986
5987   /* If we compare constants, we shouldn't use a store-flag operation,
5988      but a constant load.  We can get there via the vanilla route that
5989      usually generates a compare-branch sequence, but will in this case
5990      fold the comparison to a constant, and thus elide the branch.  */
5991   if (CONSTANT_P (op0) && CONSTANT_P (op1))
5992     return NULL_RTX;
5993
5994   tem = emit_store_flag_1 (target, code, op0, op1, mode, unsignedp, normalizep,
5995                            target_mode);
5996   if (tem)
5997     return tem;
5998
5999   /* If we reached here, we can't do this with a scc insn, however there
6000      are some comparisons that can be done in other ways.  Don't do any
6001      of these cases if branches are very cheap.  */
6002   if (BRANCH_COST (optimize_insn_for_speed_p (), false) == 0)
6003     return 0;
6004
6005   /* See what we need to return.  We can only return a 1, -1, or the
6006      sign bit.  */
6007
6008   if (normalizep == 0)
6009     {
6010       if (STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1)
6011         normalizep = STORE_FLAG_VALUE;
6012
6013       else if (val_signbit_p (mode, STORE_FLAG_VALUE))
6014         ;
6015       else
6016         return 0;
6017     }
6018
6019   last = get_last_insn ();
6020
6021   /* If optimizing, use different pseudo registers for each insn, instead
6022      of reusing the same pseudo.  This leads to better CSE, but slows
6023      down the compiler, since there are more pseudos.  */
6024   subtarget = (!optimize
6025                && (target_mode == mode)) ? target : NULL_RTX;
6026   trueval = GEN_INT (normalizep ? normalizep : STORE_FLAG_VALUE);
6027
6028   /* For floating-point comparisons, try the reverse comparison or try
6029      changing the "orderedness" of the comparison.  */
6030   if (GET_MODE_CLASS (mode) == MODE_FLOAT)
6031     {
6032       enum rtx_code first_code;
6033       bool and_them;
6034
6035       rcode = reverse_condition_maybe_unordered (code);
6036       if (can_compare_p (rcode, mode, ccp_store_flag)
6037           && (code == ORDERED || code == UNORDERED
6038               || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
6039               || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
6040         {
6041           int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
6042                           || (STORE_FLAG_VALUE == -1 && normalizep == 1));
6043
6044           /* For the reverse comparison, use either an addition or a XOR.  */
6045           if (want_add
6046               && rtx_cost (GEN_INT (normalizep), mode, PLUS, 1,
6047                            optimize_insn_for_speed_p ()) == 0)
6048             {
6049               tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
6050                                        STORE_FLAG_VALUE, target_mode);
6051               if (tem)
6052                 return expand_binop (target_mode, add_optab, tem,
6053                                      gen_int_mode (normalizep, target_mode),
6054                                      target, 0, OPTAB_WIDEN);
6055             }
6056           else if (!want_add
6057                    && rtx_cost (trueval, mode, XOR, 1,
6058                                 optimize_insn_for_speed_p ()) == 0)
6059             {
6060               tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
6061                                        normalizep, target_mode);
6062               if (tem)
6063                 return expand_binop (target_mode, xor_optab, tem, trueval,
6064                                      target, INTVAL (trueval) >= 0,
6065                                      OPTAB_WIDEN);
6066             }
6067         }
6068
6069       delete_insns_since (last);
6070
6071       /* Cannot split ORDERED and UNORDERED, only try the above trick.  */
6072       if (code == ORDERED || code == UNORDERED)
6073         return 0;
6074
6075       and_them = split_comparison (code, mode, &first_code, &code);
6076
6077       /* If there are no NaNs, the first comparison should always fall through.
6078          Effectively change the comparison to the other one.  */
6079       if (!HONOR_NANS (mode))
6080         {
6081           gcc_assert (first_code == (and_them ? ORDERED : UNORDERED));
6082           return emit_store_flag_1 (target, code, op0, op1, mode, 0, normalizep,
6083                                     target_mode);
6084         }
6085
6086       if (!HAVE_conditional_move)
6087         return 0;
6088
6089       /* Do not turn a trapping comparison into a non-trapping one.  */
6090       if ((code != EQ && code != NE && code != UNEQ && code != LTGT)
6091           && flag_trapping_math)
6092         return 0;
6093
6094       /* Try using a setcc instruction for ORDERED/UNORDERED, followed by a
6095          conditional move.  */
6096       tem = emit_store_flag_1 (subtarget, first_code, op0, op1, mode, 0,
6097                                normalizep, target_mode);
6098       if (tem == 0)
6099         return 0;
6100
6101       if (and_them)
6102         tem = emit_conditional_move (target, code, op0, op1, mode,
6103                                      tem, const0_rtx, GET_MODE (tem), 0);
6104       else
6105         tem = emit_conditional_move (target, code, op0, op1, mode,
6106                                      trueval, tem, GET_MODE (tem), 0);
6107
6108       if (tem == 0)
6109         delete_insns_since (last);
6110       return tem;
6111     }
6112
6113   /* The remaining tricks only apply to integer comparisons.  */
6114
6115   scalar_int_mode int_mode;
6116   if (is_int_mode (mode, &int_mode))
6117     return emit_store_flag_int (target, subtarget, code, op0, op1, int_mode,
6118                                 unsignedp, normalizep, trueval);
6119
6120   return 0;
6121 }
6122
6123 /* Like emit_store_flag, but always succeeds.  */
6124
6125 rtx
6126 emit_store_flag_force (rtx target, enum rtx_code code, rtx op0, rtx op1,
6127                        machine_mode mode, int unsignedp, int normalizep)
6128 {
6129   rtx tem;
6130   rtx_code_label *label;
6131   rtx trueval, falseval;
6132
6133   /* First see if emit_store_flag can do the job.  */
6134   tem = emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep);
6135   if (tem != 0)
6136     return tem;
6137
6138   /* If one operand is constant, make it the second one.  Only do this
6139      if the other operand is not constant as well.  */
6140   if (swap_commutative_operands_p (op0, op1))
6141     {
6142       std::swap (op0, op1);
6143       code = swap_condition (code);
6144     }
6145
6146   if (mode == VOIDmode)
6147     mode = GET_MODE (op0);
6148
6149   if (!target)
6150     target = gen_reg_rtx (word_mode);
6151
6152   /* If this failed, we have to do this with set/compare/jump/set code.
6153      For foo != 0, if foo is in OP0, just replace it with 1 if nonzero.  */
6154   trueval = normalizep ? GEN_INT (normalizep) : const1_rtx;
6155   if (code == NE
6156       && GET_MODE_CLASS (mode) == MODE_INT
6157       && REG_P (target)
6158       && op0 == target
6159       && op1 == const0_rtx)
6160     {
6161       label = gen_label_rtx ();
6162       do_compare_rtx_and_jump (target, const0_rtx, EQ, unsignedp, mode,
6163                                NULL_RTX, NULL, label,
6164                                profile_probability::uninitialized ());
6165       emit_move_insn (target, trueval);
6166       emit_label (label);
6167       return target;
6168     }
6169
6170   if (!REG_P (target)
6171       || reg_mentioned_p (target, op0) || reg_mentioned_p (target, op1))
6172     target = gen_reg_rtx (GET_MODE (target));
6173
6174   /* Jump in the right direction if the target cannot implement CODE
6175      but can jump on its reverse condition.  */
6176   falseval = const0_rtx;
6177   if (! can_compare_p (code, mode, ccp_jump)
6178       && (! FLOAT_MODE_P (mode)
6179           || code == ORDERED || code == UNORDERED
6180           || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
6181           || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
6182     {
6183       enum rtx_code rcode;
6184       if (FLOAT_MODE_P (mode))
6185         rcode = reverse_condition_maybe_unordered (code);
6186       else
6187         rcode = reverse_condition (code);
6188
6189       /* Canonicalize to UNORDERED for the libcall.  */
6190       if (can_compare_p (rcode, mode, ccp_jump)
6191           || (code == ORDERED && ! can_compare_p (ORDERED, mode, ccp_jump)))
6192         {
6193           falseval = trueval;
6194           trueval = const0_rtx;
6195           code = rcode;
6196         }
6197     }
6198
6199   emit_move_insn (target, trueval);
6200   label = gen_label_rtx ();
6201   do_compare_rtx_and_jump (op0, op1, code, unsignedp, mode, NULL_RTX, NULL,
6202                            label, profile_probability::uninitialized ());
6203
6204   emit_move_insn (target, falseval);
6205   emit_label (label);
6206
6207   return target;
6208 }
6209
6210 /* Helper function for canonicalize_cmp_for_target.  Swap between inclusive
6211    and exclusive ranges in order to create an equivalent comparison.  See
6212    canonicalize_cmp_for_target for the possible cases.  */
6213
6214 static enum rtx_code
6215 equivalent_cmp_code (enum rtx_code code)
6216 {
6217   switch (code)
6218     {
6219     case GT:
6220       return GE;
6221     case GE:
6222       return GT;
6223     case LT:
6224       return LE;
6225     case LE:
6226       return LT;
6227     case GTU:
6228       return GEU;
6229     case GEU:
6230       return GTU;
6231     case LTU:
6232       return LEU;
6233     case LEU:
6234       return LTU;
6235
6236     default:
6237       return code;
6238     }
6239 }
6240
6241 /* Choose the more appropiate immediate in scalar integer comparisons.  The
6242    purpose of this is to end up with an immediate which can be loaded into a
6243    register in fewer moves, if possible.
6244
6245    For each integer comparison there exists an equivalent choice:
6246      i)   a >  b or a >= b + 1
6247      ii)  a <= b or a <  b + 1
6248      iii) a >= b or a >  b - 1
6249      iv)  a <  b or a <= b - 1
6250
6251    MODE is the mode of the first operand.
6252    CODE points to the comparison code.
6253    IMM points to the rtx containing the immediate.  *IMM must satisfy
6254    CONST_SCALAR_INT_P on entry and continues to satisfy CONST_SCALAR_INT_P
6255    on exit.  */
6256
6257 void
6258 canonicalize_comparison (machine_mode mode, enum rtx_code *code, rtx *imm)
6259 {
6260   if (!SCALAR_INT_MODE_P (mode))
6261     return;
6262
6263   int to_add = 0;
6264   enum signop sgn = unsigned_condition_p (*code) ? UNSIGNED : SIGNED;
6265
6266   /* Extract the immediate value from the rtx.  */
6267   wide_int imm_val = rtx_mode_t (*imm, mode);
6268
6269   if (*code == GT || *code == GTU || *code == LE || *code == LEU)
6270     to_add = 1;
6271   else if (*code == GE || *code == GEU || *code == LT || *code == LTU)
6272     to_add = -1;
6273   else
6274     return;
6275
6276   /* Check for overflow/underflow in the case of signed values and
6277      wrapping around in the case of unsigned values.  If any occur
6278      cancel the optimization.  */
6279   wi::overflow_type overflow = wi::OVF_NONE;
6280   wide_int imm_modif;
6281
6282   if (to_add == 1)
6283     imm_modif = wi::add (imm_val, 1, sgn, &overflow);
6284   else
6285     imm_modif = wi::sub (imm_val, 1, sgn, &overflow);
6286
6287   if (overflow)
6288     return;
6289
6290   /* The following creates a pseudo; if we cannot do that, bail out.  */
6291   if (!can_create_pseudo_p ())
6292     return;
6293
6294   rtx reg = gen_rtx_REG (mode, LAST_VIRTUAL_REGISTER + 1);
6295   rtx new_imm = immed_wide_int_const (imm_modif, mode);
6296
6297   rtx_insn *old_rtx = gen_move_insn (reg, *imm);
6298   rtx_insn *new_rtx = gen_move_insn (reg, new_imm);
6299
6300   /* Update the immediate and the code.  */
6301   if (insn_cost (old_rtx, true) > insn_cost (new_rtx, true))
6302     {
6303       *code = equivalent_cmp_code (*code);
6304       *imm = new_imm;
6305     }
6306 }
6307
6308
6309 \f
6310 /* Perform possibly multi-word comparison and conditional jump to LABEL
6311    if ARG1 OP ARG2 true where ARG1 and ARG2 are of mode MODE.  This is
6312    now a thin wrapper around do_compare_rtx_and_jump.  */
6313
6314 static void
6315 do_cmp_and_jump (rtx arg1, rtx arg2, enum rtx_code op, machine_mode mode,
6316                  rtx_code_label *label)
6317 {
6318   int unsignedp = (op == LTU || op == LEU || op == GTU || op == GEU);
6319   do_compare_rtx_and_jump (arg1, arg2, op, unsignedp, mode, NULL_RTX,
6320                            NULL, label, profile_probability::uninitialized ());
6321 }