gcc/expmed.c

   1 /* Medium-level subroutines: convert bit-field store and extract
   2    and shifts, multiplies and divides to rtl instructions.
   3    Copyright (C) 1987-2017 Free Software Foundation, Inc.
   4
   5 This file is part of GCC.
   6
   7 GCC is free software; you can redistribute it and/or modify it under
   8 the terms of the GNU General Public License as published by the Free
   9 Software Foundation; either version 3, or (at your option) any later
  10 version.
  11
  12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  15 for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GCC; see the file COPYING3.  If not see
  19 <http://www.gnu.org/licenses/>.  */
  20
  21
  22 #include "config.h"
  23 #include "system.h"
  24 #include "coretypes.h"
  25 #include "backend.h"
  26 #include "target.h"
  27 #include "rtl.h"
  28 #include "tree.h"
  29 #include "predict.h"
  30 #include "memmodel.h"
  31 #include "tm_p.h"
  32 #include "expmed.h"
  33 #include "optabs.h"
  34 #include "emit-rtl.h"
  35 #include "diagnostic-core.h"
  36 #include "fold-const.h"
  37 #include "stor-layout.h"
  38 #include "dojump.h"
  39 #include "explow.h"
  40 #include "expr.h"
  41 #include "langhooks.h"
  42
  43 struct target_expmed default_target_expmed;
  44 #if SWITCHABLE_TARGET
  45 struct target_expmed *this_target_expmed = &default_target_expmed;
  46 #endif
  47
  48 static void store_fixed_bit_field (rtx, opt_scalar_int_mode,
  49                                    unsigned HOST_WIDE_INT,
  50                                    unsigned HOST_WIDE_INT,
  51                                    unsigned HOST_WIDE_INT,
  52                                    unsigned HOST_WIDE_INT,
  53                                    rtx, scalar_int_mode, bool);
  54 static void store_fixed_bit_field_1 (rtx, scalar_int_mode,
  55                                      unsigned HOST_WIDE_INT,
  56                                      unsigned HOST_WIDE_INT,
  57                                      rtx, scalar_int_mode, bool);
  58 static void store_split_bit_field (rtx, opt_scalar_int_mode,
  59                                    unsigned HOST_WIDE_INT,
  60                                    unsigned HOST_WIDE_INT,
  61                                    unsigned HOST_WIDE_INT,
  62                                    unsigned HOST_WIDE_INT,
  63                                    rtx, scalar_int_mode, bool);
  64 static rtx extract_fixed_bit_field (machine_mode, rtx, opt_scalar_int_mode,
  65                                     unsigned HOST_WIDE_INT,
  66                                     unsigned HOST_WIDE_INT, rtx, int, bool);
  67 static rtx extract_fixed_bit_field_1 (machine_mode, rtx, scalar_int_mode,
  68                                       unsigned HOST_WIDE_INT,
  69                                       unsigned HOST_WIDE_INT, rtx, int, bool);
  70 static rtx lshift_value (machine_mode, unsigned HOST_WIDE_INT, int);
  71 static rtx extract_split_bit_field (rtx, opt_scalar_int_mode,
  72                                     unsigned HOST_WIDE_INT,
  73                                     unsigned HOST_WIDE_INT, int, bool);
  74 static void do_cmp_and_jump (rtx, rtx, enum rtx_code, machine_mode, rtx_code_label *);
  75 static rtx expand_smod_pow2 (scalar_int_mode, rtx, HOST_WIDE_INT);
  76 static rtx expand_sdiv_pow2 (scalar_int_mode, rtx, HOST_WIDE_INT);
  77
  78 /* Return a constant integer mask value of mode MODE with BITSIZE ones
  79    followed by BITPOS zeros, or the complement of that if COMPLEMENT.
  80    The mask is truncated if necessary to the width of mode MODE.  The
  81    mask is zero-extended if BITSIZE+BITPOS is too small for MODE.  */
  82
  83 static inline rtx
  84 mask_rtx (scalar_int_mode mode, int bitpos, int bitsize, bool complement)
  85 {
  86   return immed_wide_int_const
  87     (wi::shifted_mask (bitpos, bitsize, complement,
  88                        GET_MODE_PRECISION (mode)), mode);
  89 }
  90
  91 /* Test whether a value is zero of a power of two.  */
  92 #define EXACT_POWER_OF_2_OR_ZERO_P(x) \
  93   (((x) & ((x) - HOST_WIDE_INT_1U)) == 0)
  94
  95 struct init_expmed_rtl
  96 {
  97   rtx reg;
  98   rtx plus;
  99   rtx neg;
 100   rtx mult;
 101   rtx sdiv;
 102   rtx udiv;
 103   rtx sdiv_32;
 104   rtx smod_32;
 105   rtx wide_mult;
 106   rtx wide_lshr;
 107   rtx wide_trunc;
 108   rtx shift;
 109   rtx shift_mult;
 110   rtx shift_add;
 111   rtx shift_sub0;
 112   rtx shift_sub1;
 113   rtx zext;
 114   rtx trunc;
 115
 116   rtx pow2[MAX_BITS_PER_WORD];
 117   rtx cint[MAX_BITS_PER_WORD];
 118 };
 119
 120 static void
 121 init_expmed_one_conv (struct init_expmed_rtl *all, scalar_int_mode to_mode,
 122                       scalar_int_mode from_mode, bool speed)
 123 {
 124   int to_size, from_size;
 125   rtx which;
 126
 127   to_size = GET_MODE_PRECISION (to_mode);
 128   from_size = GET_MODE_PRECISION (from_mode);
 129
 130   /* Most partial integers have a precision less than the "full"
 131      integer it requires for storage.  In case one doesn't, for
 132      comparison purposes here, reduce the bit size by one in that
 133      case.  */
 134   if (GET_MODE_CLASS (to_mode) == MODE_PARTIAL_INT
 135       && pow2p_hwi (to_size))
 136     to_size --;
 137   if (GET_MODE_CLASS (from_mode) == MODE_PARTIAL_INT
 138       && pow2p_hwi (from_size))
 139     from_size --;
 140
 141   /* Assume cost of zero-extend and sign-extend is the same.  */
 142   which = (to_size < from_size ? all->trunc : all->zext);
 143
 144   PUT_MODE (all->reg, from_mode);
 145   set_convert_cost (to_mode, from_mode, speed,
 146                     set_src_cost (which, to_mode, speed));
 147 }
 148
 149 static void
 150 init_expmed_one_mode (struct init_expmed_rtl *all,
 151                       machine_mode mode, int speed)
 152 {
 153   int m, n, mode_bitsize;
 154   machine_mode mode_from;
 155
 156   mode_bitsize = GET_MODE_UNIT_BITSIZE (mode);
 157
 158   PUT_MODE (all->reg, mode);
 159   PUT_MODE (all->plus, mode);
 160   PUT_MODE (all->neg, mode);
 161   PUT_MODE (all->mult, mode);
 162   PUT_MODE (all->sdiv, mode);
 163   PUT_MODE (all->udiv, mode);
 164   PUT_MODE (all->sdiv_32, mode);
 165   PUT_MODE (all->smod_32, mode);
 166   PUT_MODE (all->wide_trunc, mode);
 167   PUT_MODE (all->shift, mode);
 168   PUT_MODE (all->shift_mult, mode);
 169   PUT_MODE (all->shift_add, mode);
 170   PUT_MODE (all->shift_sub0, mode);
 171   PUT_MODE (all->shift_sub1, mode);
 172   PUT_MODE (all->zext, mode);
 173   PUT_MODE (all->trunc, mode);
 174
 175   set_add_cost (speed, mode, set_src_cost (all->plus, mode, speed));
 176   set_neg_cost (speed, mode, set_src_cost (all->neg, mode, speed));
 177   set_mul_cost (speed, mode, set_src_cost (all->mult, mode, speed));
 178   set_sdiv_cost (speed, mode, set_src_cost (all->sdiv, mode, speed));
 179   set_udiv_cost (speed, mode, set_src_cost (all->udiv, mode, speed));
 180
 181   set_sdiv_pow2_cheap (speed, mode, (set_src_cost (all->sdiv_32, mode, speed)
 182                                      <= 2 * add_cost (speed, mode)));
 183   set_smod_pow2_cheap (speed, mode, (set_src_cost (all->smod_32, mode, speed)
 184                                      <= 4 * add_cost (speed, mode)));
 185
 186   set_shift_cost (speed, mode, 0, 0);
 187   {
 188     int cost = add_cost (speed, mode);
 189     set_shiftadd_cost (speed, mode, 0, cost);
 190     set_shiftsub0_cost (speed, mode, 0, cost);
 191     set_shiftsub1_cost (speed, mode, 0, cost);
 192   }
 193
 194   n = MIN (MAX_BITS_PER_WORD, mode_bitsize);
 195   for (m = 1; m < n; m++)
 196     {
 197       XEXP (all->shift, 1) = all->cint[m];
 198       XEXP (all->shift_mult, 1) = all->pow2[m];
 199
 200       set_shift_cost (speed, mode, m, set_src_cost (all->shift, mode, speed));
 201       set_shiftadd_cost (speed, mode, m, set_src_cost (all->shift_add, mode,
 202                                                        speed));
 203       set_shiftsub0_cost (speed, mode, m, set_src_cost (all->shift_sub0, mode,
 204                                                         speed));
 205       set_shiftsub1_cost (speed, mode, m, set_src_cost (all->shift_sub1, mode,
 206                                                         speed));
 207     }
 208
 209   scalar_int_mode int_mode_to;
 210   if (is_a <scalar_int_mode> (mode, &int_mode_to))
 211     {
 212       for (mode_from = MIN_MODE_INT; mode_from <= MAX_MODE_INT;
 213            mode_from = (machine_mode)(mode_from + 1))
 214         init_expmed_one_conv (all, int_mode_to,
 215                               as_a <scalar_int_mode> (mode_from), speed);
 216
 217       scalar_int_mode wider_mode;
 218       if (GET_MODE_CLASS (int_mode_to) == MODE_INT
 219           && GET_MODE_WIDER_MODE (int_mode_to).exists (&wider_mode))
 220         {
 221           PUT_MODE (all->zext, wider_mode);
 222           PUT_MODE (all->wide_mult, wider_mode);
 223           PUT_MODE (all->wide_lshr, wider_mode);
 224           XEXP (all->wide_lshr, 1) = GEN_INT (mode_bitsize);
 225
 226           set_mul_widen_cost (speed, wider_mode,
 227                               set_src_cost (all->wide_mult, wider_mode, speed));
 228           set_mul_highpart_cost (speed, int_mode_to,
 229                                  set_src_cost (all->wide_trunc,
 230                                                int_mode_to, speed));
 231         }
 232     }
 233 }
 234
 235 void
 236 init_expmed (void)
 237 {
 238   struct init_expmed_rtl all;
 239   machine_mode mode = QImode;
 240   int m, speed;
 241
 242   memset (&all, 0, sizeof all);
 243   for (m = 1; m < MAX_BITS_PER_WORD; m++)
 244     {
 245       all.pow2[m] = GEN_INT (HOST_WIDE_INT_1 << m);
 246       all.cint[m] = GEN_INT (m);
 247     }
 248
 249   /* Avoid using hard regs in ways which may be unsupported.  */
 250   all.reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
 251   all.plus = gen_rtx_PLUS (mode, all.reg, all.reg);
 252   all.neg = gen_rtx_NEG (mode, all.reg);
 253   all.mult = gen_rtx_MULT (mode, all.reg, all.reg);
 254   all.sdiv = gen_rtx_DIV (mode, all.reg, all.reg);
 255   all.udiv = gen_rtx_UDIV (mode, all.reg, all.reg);
 256   all.sdiv_32 = gen_rtx_DIV (mode, all.reg, all.pow2[5]);
 257   all.smod_32 = gen_rtx_MOD (mode, all.reg, all.pow2[5]);
 258   all.zext = gen_rtx_ZERO_EXTEND (mode, all.reg);
 259   all.wide_mult = gen_rtx_MULT (mode, all.zext, all.zext);
 260   all.wide_lshr = gen_rtx_LSHIFTRT (mode, all.wide_mult, all.reg);
 261   all.wide_trunc = gen_rtx_TRUNCATE (mode, all.wide_lshr);
 262   all.shift = gen_rtx_ASHIFT (mode, all.reg, all.reg);
 263   all.shift_mult = gen_rtx_MULT (mode, all.reg, all.reg);
 264   all.shift_add = gen_rtx_PLUS (mode, all.shift_mult, all.reg);
 265   all.shift_sub0 = gen_rtx_MINUS (mode, all.shift_mult, all.reg);
 266   all.shift_sub1 = gen_rtx_MINUS (mode, all.reg, all.shift_mult);
 267   all.trunc = gen_rtx_TRUNCATE (mode, all.reg);
 268
 269   for (speed = 0; speed < 2; speed++)
 270     {
 271       crtl->maybe_hot_insn_p = speed;
 272       set_zero_cost (speed, set_src_cost (const0_rtx, mode, speed));
 273
 274       for (mode = MIN_MODE_INT; mode <= MAX_MODE_INT;
 275            mode = (machine_mode)(mode + 1))
 276         init_expmed_one_mode (&all, mode, speed);
 277
 278       if (MIN_MODE_PARTIAL_INT != VOIDmode)
 279         for (mode = MIN_MODE_PARTIAL_INT; mode <= MAX_MODE_PARTIAL_INT;
 280              mode = (machine_mode)(mode + 1))
 281           init_expmed_one_mode (&all, mode, speed);
 282
 283       if (MIN_MODE_VECTOR_INT != VOIDmode)
 284         for (mode = MIN_MODE_VECTOR_INT; mode <= MAX_MODE_VECTOR_INT;
 285              mode = (machine_mode)(mode + 1))
 286           init_expmed_one_mode (&all, mode, speed);
 287     }
 288
 289   if (alg_hash_used_p ())
 290     {
 291       struct alg_hash_entry *p = alg_hash_entry_ptr (0);
 292       memset (p, 0, sizeof (*p) * NUM_ALG_HASH_ENTRIES);
 293     }
 294   else
 295     set_alg_hash_used_p (true);
 296   default_rtl_profile ();
 297
 298   ggc_free (all.trunc);
 299   ggc_free (all.shift_sub1);
 300   ggc_free (all.shift_sub0);
 301   ggc_free (all.shift_add);
 302   ggc_free (all.shift_mult);
 303   ggc_free (all.shift);
 304   ggc_free (all.wide_trunc);
 305   ggc_free (all.wide_lshr);
 306   ggc_free (all.wide_mult);
 307   ggc_free (all.zext);
 308   ggc_free (all.smod_32);
 309   ggc_free (all.sdiv_32);
 310   ggc_free (all.udiv);
 311   ggc_free (all.sdiv);
 312   ggc_free (all.mult);
 313   ggc_free (all.neg);
 314   ggc_free (all.plus);
 315   ggc_free (all.reg);
 316 }
 317
 318 /* Return an rtx representing minus the value of X.
 319    MODE is the intended mode of the result,
 320    useful if X is a CONST_INT.  */
 321
 322 rtx
 323 negate_rtx (machine_mode mode, rtx x)
 324 {
 325   rtx result = simplify_unary_operation (NEG, mode, x, mode);
 326
 327   if (result == 0)
 328     result = expand_unop (mode, neg_optab, x, NULL_RTX, 0);
 329
 330   return result;
 331 }
 332
 333 /* Whether reverse storage order is supported on the target.  */
 334 static int reverse_storage_order_supported = -1;
 335
 336 /* Check whether reverse storage order is supported on the target.  */
 337
 338 static void
 339 check_reverse_storage_order_support (void)
 340 {
 341   if (BYTES_BIG_ENDIAN != WORDS_BIG_ENDIAN)
 342     {
 343       reverse_storage_order_supported = 0;
 344       sorry ("reverse scalar storage order");
 345     }
 346   else
 347     reverse_storage_order_supported = 1;
 348 }
 349
 350 /* Whether reverse FP storage order is supported on the target.  */
 351 static int reverse_float_storage_order_supported = -1;
 352
 353 /* Check whether reverse FP storage order is supported on the target.  */
 354
 355 static void
 356 check_reverse_float_storage_order_support (void)
 357 {
 358   if (FLOAT_WORDS_BIG_ENDIAN != WORDS_BIG_ENDIAN)
 359     {
 360       reverse_float_storage_order_supported = 0;
 361       sorry ("reverse floating-point scalar storage order");
 362     }
 363   else
 364     reverse_float_storage_order_supported = 1;
 365 }
 366
 367 /* Return an rtx representing value of X with reverse storage order.
 368    MODE is the intended mode of the result,
 369    useful if X is a CONST_INT.  */
 370
 371 rtx
 372 flip_storage_order (machine_mode mode, rtx x)
 373 {
 374   scalar_int_mode int_mode;
 375   rtx result;
 376
 377   if (mode == QImode)
 378     return x;
 379
 380   if (COMPLEX_MODE_P (mode))
 381     {
 382       rtx real = read_complex_part (x, false);
 383       rtx imag = read_complex_part (x, true);
 384
 385       real = flip_storage_order (GET_MODE_INNER (mode), real);
 386       imag = flip_storage_order (GET_MODE_INNER (mode), imag);
 387
 388       return gen_rtx_CONCAT (mode, real, imag);
 389     }
 390
 391   if (__builtin_expect (reverse_storage_order_supported < 0, 0))
 392     check_reverse_storage_order_support ();
 393
 394   if (!is_a <scalar_int_mode> (mode, &int_mode))
 395     {
 396       if (FLOAT_MODE_P (mode)
 397           && __builtin_expect (reverse_float_storage_order_supported < 0, 0))
 398         check_reverse_float_storage_order_support ();
 399
 400       if (!int_mode_for_size (GET_MODE_PRECISION (mode), 0).exists (&int_mode))
 401         {
 402           sorry ("reverse storage order for %smode", GET_MODE_NAME (mode));
 403           return x;
 404         }
 405       x = gen_lowpart (int_mode, x);
 406     }
 407
 408   result = simplify_unary_operation (BSWAP, int_mode, x, int_mode);
 409   if (result == 0)
 410     result = expand_unop (int_mode, bswap_optab, x, NULL_RTX, 1);
 411
 412   if (int_mode != mode)
 413     result = gen_lowpart (mode, result);
 414
 415   return result;
 416 }
 417
 418 /* If MODE is set, adjust bitfield memory MEM so that it points to the
 419    first unit of mode MODE that contains a bitfield of size BITSIZE at
 420    bit position BITNUM.  If MODE is not set, return a BLKmode reference
 421    to every byte in the bitfield.  Set *NEW_BITNUM to the bit position
 422    of the field within the new memory.  */
 423
 424 static rtx
 425 narrow_bit_field_mem (rtx mem, opt_scalar_int_mode mode,
 426                       unsigned HOST_WIDE_INT bitsize,
 427                       unsigned HOST_WIDE_INT bitnum,
 428                       unsigned HOST_WIDE_INT *new_bitnum)
 429 {
 430   scalar_int_mode imode;
 431   if (mode.exists (&imode))
 432     {
 433       unsigned int unit = GET_MODE_BITSIZE (imode);
 434       *new_bitnum = bitnum % unit;
 435       HOST_WIDE_INT offset = (bitnum - *new_bitnum) / BITS_PER_UNIT;
 436       return adjust_bitfield_address (mem, imode, offset);
 437     }
 438   else
 439     {
 440       *new_bitnum = bitnum % BITS_PER_UNIT;
 441       HOST_WIDE_INT offset = bitnum / BITS_PER_UNIT;
 442       HOST_WIDE_INT size = ((*new_bitnum + bitsize + BITS_PER_UNIT - 1)
 443                             / BITS_PER_UNIT);
 444       return adjust_bitfield_address_size (mem, BLKmode, offset, size);
 445     }
 446 }
 447
 448 /* The caller wants to perform insertion or extraction PATTERN on a
 449    bitfield of size BITSIZE at BITNUM bits into memory operand OP0.
 450    BITREGION_START and BITREGION_END are as for store_bit_field
 451    and FIELDMODE is the natural mode of the field.
 452
 453    Search for a mode that is compatible with the memory access
 454    restrictions and (where applicable) with a register insertion or
 455    extraction.  Return the new memory on success, storing the adjusted
 456    bit position in *NEW_BITNUM.  Return null otherwise.  */
 457
 458 static rtx
 459 adjust_bit_field_mem_for_reg (enum extraction_pattern pattern,
 460                               rtx op0, HOST_WIDE_INT bitsize,
 461                               HOST_WIDE_INT bitnum,
 462                               unsigned HOST_WIDE_INT bitregion_start,
 463                               unsigned HOST_WIDE_INT bitregion_end,
 464                               machine_mode fieldmode,
 465                               unsigned HOST_WIDE_INT *new_bitnum)
 466 {
 467   bit_field_mode_iterator iter (bitsize, bitnum, bitregion_start,
 468                                 bitregion_end, MEM_ALIGN (op0),
 469                                 MEM_VOLATILE_P (op0));
 470   scalar_int_mode best_mode;
 471   if (iter.next_mode (&best_mode))
 472     {
 473       /* We can use a memory in BEST_MODE.  See whether this is true for
 474          any wider modes.  All other things being equal, we prefer to
 475          use the widest mode possible because it tends to expose more
 476          CSE opportunities.  */
 477       if (!iter.prefer_smaller_modes ())
 478         {
 479           /* Limit the search to the mode required by the corresponding
 480              register insertion or extraction instruction, if any.  */
 481           scalar_int_mode limit_mode = word_mode;
 482           extraction_insn insn;
 483           if (get_best_reg_extraction_insn (&insn, pattern,
 484                                             GET_MODE_BITSIZE (best_mode),
 485                                             fieldmode))
 486             limit_mode = insn.field_mode;
 487
 488           scalar_int_mode wider_mode;
 489           while (iter.next_mode (&wider_mode)
 490                  && GET_MODE_SIZE (wider_mode) <= GET_MODE_SIZE (limit_mode))
 491             best_mode = wider_mode;
 492         }
 493       return narrow_bit_field_mem (op0, best_mode, bitsize, bitnum,
 494                                    new_bitnum);
 495     }
 496   return NULL_RTX;
 497 }
 498
 499 /* Return true if a bitfield of size BITSIZE at bit number BITNUM within
 500    a structure of mode STRUCT_MODE represents a lowpart subreg.   The subreg
 501    offset is then BITNUM / BITS_PER_UNIT.  */
 502
 503 static bool
 504 lowpart_bit_field_p (unsigned HOST_WIDE_INT bitnum,
 505                      unsigned HOST_WIDE_INT bitsize,
 506                      machine_mode struct_mode)
 507 {
 508   if (BYTES_BIG_ENDIAN)
 509     return (bitnum % BITS_PER_UNIT == 0
 510             && (bitnum + bitsize == GET_MODE_BITSIZE (struct_mode)
 511                 || (bitnum + bitsize) % BITS_PER_WORD == 0));
 512   else
 513     return bitnum % BITS_PER_WORD == 0;
 514 }
 515
 516 /* Return true if -fstrict-volatile-bitfields applies to an access of OP0
 517    containing BITSIZE bits starting at BITNUM, with field mode FIELDMODE.
 518    Return false if the access would touch memory outside the range
 519    BITREGION_START to BITREGION_END for conformance to the C++ memory
 520    model.  */
 521
 522 static bool
 523 strict_volatile_bitfield_p (rtx op0, unsigned HOST_WIDE_INT bitsize,
 524                             unsigned HOST_WIDE_INT bitnum,
 525                             scalar_int_mode fieldmode,
 526                             unsigned HOST_WIDE_INT bitregion_start,
 527                             unsigned HOST_WIDE_INT bitregion_end)
 528 {
 529   unsigned HOST_WIDE_INT modesize = GET_MODE_BITSIZE (fieldmode);
 530
 531   /* -fstrict-volatile-bitfields must be enabled and we must have a
 532      volatile MEM.  */
 533   if (!MEM_P (op0)
 534       || !MEM_VOLATILE_P (op0)
 535       || flag_strict_volatile_bitfields <= 0)
 536     return false;
 537
 538   /* The bit size must not be larger than the field mode, and
 539      the field mode must not be larger than a word.  */
 540   if (bitsize > modesize || modesize > BITS_PER_WORD)
 541     return false;
 542
 543   /* Check for cases of unaligned fields that must be split.  */
 544   if (bitnum % modesize + bitsize > modesize)
 545     return false;
 546
 547   /* The memory must be sufficiently aligned for a MODESIZE access.
 548      This condition guarantees, that the memory access will not
 549      touch anything after the end of the structure.  */
 550   if (MEM_ALIGN (op0) < modesize)
 551     return false;
 552
 553   /* Check for cases where the C++ memory model applies.  */
 554   if (bitregion_end != 0
 555       && (bitnum - bitnum % modesize < bitregion_start
 556           || bitnum - bitnum % modesize + modesize - 1 > bitregion_end))
 557     return false;
 558
 559   return true;
 560 }
 561
 562 /* Return true if OP is a memory and if a bitfield of size BITSIZE at
 563    bit number BITNUM can be treated as a simple value of mode MODE.  */
 564
 565 static bool
 566 simple_mem_bitfield_p (rtx op0, unsigned HOST_WIDE_INT bitsize,
 567                        unsigned HOST_WIDE_INT bitnum, machine_mode mode)
 568 {
 569   return (MEM_P (op0)
 570           && bitnum % BITS_PER_UNIT == 0
 571           && bitsize == GET_MODE_BITSIZE (mode)
 572           && (!SLOW_UNALIGNED_ACCESS (mode, MEM_ALIGN (op0))
 573               || (bitnum % GET_MODE_ALIGNMENT (mode) == 0
 574                   && MEM_ALIGN (op0) >= GET_MODE_ALIGNMENT (mode))));
 575 }
 576 \f
 577 /* Try to use instruction INSV to store VALUE into a field of OP0.
 578    If OP0_MODE is defined, it is the mode of OP0, otherwise OP0 is a
 579    BLKmode MEM.  VALUE_MODE is the mode of VALUE.  BITSIZE and BITNUM
 580    are as for store_bit_field.  */
 581
 582 static bool
 583 store_bit_field_using_insv (const extraction_insn *insv, rtx op0,
 584                             opt_scalar_int_mode op0_mode,
 585                             unsigned HOST_WIDE_INT bitsize,
 586                             unsigned HOST_WIDE_INT bitnum,
 587                             rtx value, scalar_int_mode value_mode)
 588 {
 589   struct expand_operand ops[4];
 590   rtx value1;
 591   rtx xop0 = op0;
 592   rtx_insn *last = get_last_insn ();
 593   bool copy_back = false;
 594
 595   scalar_int_mode op_mode = insv->field_mode;
 596   unsigned int unit = GET_MODE_BITSIZE (op_mode);
 597   if (bitsize == 0 || bitsize > unit)
 598     return false;
 599
 600   if (MEM_P (xop0))
 601     /* Get a reference to the first byte of the field.  */
 602     xop0 = narrow_bit_field_mem (xop0, insv->struct_mode, bitsize, bitnum,
 603                                  &bitnum);
 604   else
 605     {
 606       /* Convert from counting within OP0 to counting in OP_MODE.  */
 607       if (BYTES_BIG_ENDIAN)
 608         bitnum += unit - GET_MODE_BITSIZE (op0_mode.require ());
 609
 610       /* If xop0 is a register, we need it in OP_MODE
 611          to make it acceptable to the format of insv.  */
 612       if (GET_CODE (xop0) == SUBREG)
 613         /* We can't just change the mode, because this might clobber op0,
 614            and we will need the original value of op0 if insv fails.  */
 615         xop0 = gen_rtx_SUBREG (op_mode, SUBREG_REG (xop0), SUBREG_BYTE (xop0));
 616       if (REG_P (xop0) && GET_MODE (xop0) != op_mode)
 617         xop0 = gen_lowpart_SUBREG (op_mode, xop0);
 618     }
 619
 620   /* If the destination is a paradoxical subreg such that we need a
 621      truncate to the inner mode, perform the insertion on a temporary and
 622      truncate the result to the original destination.  Note that we can't
 623      just truncate the paradoxical subreg as (truncate:N (subreg:W (reg:N
 624      X) 0)) is (reg:N X).  */
 625   if (GET_CODE (xop0) == SUBREG
 626       && REG_P (SUBREG_REG (xop0))
 627       && !TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (SUBREG_REG (xop0)),
 628                                          op_mode))
 629     {
 630       rtx tem = gen_reg_rtx (op_mode);
 631       emit_move_insn (tem, xop0);
 632       xop0 = tem;
 633       copy_back = true;
 634     }
 635
 636   /* There are similar overflow check at the start of store_bit_field_1,
 637      but that only check the situation where the field lies completely
 638      outside the register, while there do have situation where the field
 639      lies partialy in the register, we need to adjust bitsize for this
 640      partial overflow situation.  Without this fix, pr48335-2.c on big-endian
 641      will broken on those arch support bit insert instruction, like arm, aarch64
 642      etc.  */
 643   if (bitsize + bitnum > unit && bitnum < unit)
 644     {
 645       warning (OPT_Wextra, "write of %wu-bit data outside the bound of "
 646                "destination object, data truncated into %wu-bit",
 647                bitsize, unit - bitnum);
 648       bitsize = unit - bitnum;
 649     }
 650
 651   /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
 652      "backwards" from the size of the unit we are inserting into.
 653      Otherwise, we count bits from the most significant on a
 654      BYTES/BITS_BIG_ENDIAN machine.  */
 655
 656   if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
 657     bitnum = unit - bitsize - bitnum;
 658
 659   /* Convert VALUE to op_mode (which insv insn wants) in VALUE1.  */
 660   value1 = value;
 661   if (value_mode != op_mode)
 662     {
 663       if (GET_MODE_BITSIZE (value_mode) >= bitsize)
 664         {
 665           rtx tmp;
 666           /* Optimization: Don't bother really extending VALUE
 667              if it has all the bits we will actually use.  However,
 668              if we must narrow it, be sure we do it correctly.  */
 669
 670           if (GET_MODE_SIZE (value_mode) < GET_MODE_SIZE (op_mode))
 671             {
 672               tmp = simplify_subreg (op_mode, value1, value_mode, 0);
 673               if (! tmp)
 674                 tmp = simplify_gen_subreg (op_mode,
 675                                            force_reg (value_mode, value1),
 676                                            value_mode, 0);
 677             }
 678           else
 679             {
 680               tmp = gen_lowpart_if_possible (op_mode, value1);
 681               if (! tmp)
 682                 tmp = gen_lowpart (op_mode, force_reg (value_mode, value1));
 683             }
 684           value1 = tmp;
 685         }
 686       else if (CONST_INT_P (value))
 687         value1 = gen_int_mode (INTVAL (value), op_mode);
 688       else
 689         /* Parse phase is supposed to make VALUE's data type
 690            match that of the component reference, which is a type
 691            at least as wide as the field; so VALUE should have
 692            a mode that corresponds to that type.  */
 693         gcc_assert (CONSTANT_P (value));
 694     }
 695
 696   create_fixed_operand (&ops[0], xop0);
 697   create_integer_operand (&ops[1], bitsize);
 698   create_integer_operand (&ops[2], bitnum);
 699   create_input_operand (&ops[3], value1, op_mode);
 700   if (maybe_expand_insn (insv->icode, 4, ops))
 701     {
 702       if (copy_back)
 703         convert_move (op0, xop0, true);
 704       return true;
 705     }
 706   delete_insns_since (last);
 707   return false;
 708 }
 709
 710 /* A subroutine of store_bit_field, with the same arguments.  Return true
 711    if the operation could be implemented.
 712
 713    If FALLBACK_P is true, fall back to store_fixed_bit_field if we have
 714    no other way of implementing the operation.  If FALLBACK_P is false,
 715    return false instead.  */
 716
 717 static bool
 718 store_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
 719                    unsigned HOST_WIDE_INT bitnum,
 720                    unsigned HOST_WIDE_INT bitregion_start,
 721                    unsigned HOST_WIDE_INT bitregion_end,
 722                    machine_mode fieldmode,
 723                    rtx value, bool reverse, bool fallback_p)
 724 {
 725   rtx op0 = str_rtx;
 726   rtx orig_value;
 727
 728   while (GET_CODE (op0) == SUBREG)
 729     {
 730       bitnum += subreg_memory_offset (op0) * BITS_PER_UNIT;
 731       op0 = SUBREG_REG (op0);
 732     }
 733
 734   /* No action is needed if the target is a register and if the field
 735      lies completely outside that register.  This can occur if the source
 736      code contains an out-of-bounds access to a small array.  */
 737   if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
 738     return true;
 739
 740   /* Use vec_set patterns for inserting parts of vectors whenever
 741      available.  */
 742   machine_mode outermode = GET_MODE (op0);
 743   scalar_mode innermode = GET_MODE_INNER (outermode);
 744   if (VECTOR_MODE_P (outermode)
 745       && !MEM_P (op0)
 746       && optab_handler (vec_set_optab, outermode) != CODE_FOR_nothing
 747       && fieldmode == innermode
 748       && bitsize == GET_MODE_BITSIZE (innermode)
 749       && !(bitnum % GET_MODE_BITSIZE (innermode)))
 750     {
 751       struct expand_operand ops[3];
 752       enum insn_code icode = optab_handler (vec_set_optab, outermode);
 753       int pos = bitnum / GET_MODE_BITSIZE (innermode);
 754
 755       create_fixed_operand (&ops[0], op0);
 756       create_input_operand (&ops[1], value, innermode);
 757       create_integer_operand (&ops[2], pos);
 758       if (maybe_expand_insn (icode, 3, ops))
 759         return true;
 760     }
 761
 762   /* If the target is a register, overwriting the entire object, or storing
 763      a full-word or multi-word field can be done with just a SUBREG.  */
 764   if (!MEM_P (op0)
 765       && bitsize == GET_MODE_BITSIZE (fieldmode)
 766       && ((bitsize == GET_MODE_BITSIZE (GET_MODE (op0)) && bitnum == 0)
 767           || (bitsize % BITS_PER_WORD == 0 && bitnum % BITS_PER_WORD == 0)))
 768     {
 769       /* Use the subreg machinery either to narrow OP0 to the required
 770          words or to cope with mode punning between equal-sized modes.
 771          In the latter case, use subreg on the rhs side, not lhs.  */
 772       rtx sub;
 773
 774       if (bitsize == GET_MODE_BITSIZE (GET_MODE (op0)))
 775         {
 776           sub = simplify_gen_subreg (GET_MODE (op0), value, fieldmode, 0);
 777           if (sub)
 778             {
 779               if (reverse)
 780                 sub = flip_storage_order (GET_MODE (op0), sub);
 781               emit_move_insn (op0, sub);
 782               return true;
 783             }
 784         }
 785       else
 786         {
 787           sub = simplify_gen_subreg (fieldmode, op0, GET_MODE (op0),
 788                                      bitnum / BITS_PER_UNIT);
 789           if (sub)
 790             {
 791               if (reverse)
 792                 value = flip_storage_order (fieldmode, value);
 793               emit_move_insn (sub, value);
 794               return true;
 795             }
 796         }
 797     }
 798
 799   /* If the target is memory, storing any naturally aligned field can be
 800      done with a simple store.  For targets that support fast unaligned
 801      memory, any naturally sized, unit aligned field can be done directly.  */
 802   if (simple_mem_bitfield_p (op0, bitsize, bitnum, fieldmode))
 803     {
 804       op0 = adjust_bitfield_address (op0, fieldmode, bitnum / BITS_PER_UNIT);
 805       if (reverse)
 806         value = flip_storage_order (fieldmode, value);
 807       emit_move_insn (op0, value);
 808       return true;
 809     }
 810
 811   /* Make sure we are playing with integral modes.  Pun with subregs
 812      if we aren't.  This must come after the entire register case above,
 813      since that case is valid for any mode.  The following cases are only
 814      valid for integral modes.  */
 815   opt_scalar_int_mode op0_mode = int_mode_for_mode (GET_MODE (op0));
 816   scalar_int_mode imode;
 817   if (!op0_mode.exists (&imode) || imode != GET_MODE (op0))
 818     {
 819       if (MEM_P (op0))
 820         op0 = adjust_bitfield_address_size (op0, op0_mode.else_blk (),
 821                                             0, MEM_SIZE (op0));
 822       else
 823         op0 = gen_lowpart (op0_mode.require (), op0);
 824     }
 825
 826   /* Storing an lsb-aligned field in a register
 827      can be done with a movstrict instruction.  */
 828
 829   if (!MEM_P (op0)
 830       && !reverse
 831       && lowpart_bit_field_p (bitnum, bitsize, GET_MODE (op0))
 832       && bitsize == GET_MODE_BITSIZE (fieldmode)
 833       && optab_handler (movstrict_optab, fieldmode) != CODE_FOR_nothing)
 834     {
 835       struct expand_operand ops[2];
 836       enum insn_code icode = optab_handler (movstrict_optab, fieldmode);
 837       rtx arg0 = op0;
 838       unsigned HOST_WIDE_INT subreg_off;
 839
 840       if (GET_CODE (arg0) == SUBREG)
 841         {
 842           /* Else we've got some float mode source being extracted into
 843              a different float mode destination -- this combination of
 844              subregs results in Severe Tire Damage.  */
 845           gcc_assert (GET_MODE (SUBREG_REG (arg0)) == fieldmode
 846                       || GET_MODE_CLASS (fieldmode) == MODE_INT
 847                       || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT);
 848           arg0 = SUBREG_REG (arg0);
 849         }
 850
 851       subreg_off = bitnum / BITS_PER_UNIT;
 852       if (validate_subreg (fieldmode, GET_MODE (arg0), arg0, subreg_off))
 853         {
 854           arg0 = gen_rtx_SUBREG (fieldmode, arg0, subreg_off);
 855
 856           create_fixed_operand (&ops[0], arg0);
 857           /* Shrink the source operand to FIELDMODE.  */
 858           create_convert_operand_to (&ops[1], value, fieldmode, false);
 859           if (maybe_expand_insn (icode, 2, ops))
 860             return true;
 861         }
 862     }
 863
 864   /* Handle fields bigger than a word.  */
 865
 866   if (bitsize > BITS_PER_WORD)
 867     {
 868       /* Here we transfer the words of the field
 869          in the order least significant first.
 870          This is because the most significant word is the one which may
 871          be less than full.
 872          However, only do that if the value is not BLKmode.  */
 873
 874       const bool backwards = WORDS_BIG_ENDIAN && fieldmode != BLKmode;
 875       unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
 876       unsigned int i;
 877       rtx_insn *last;
 878
 879       /* This is the mode we must force value to, so that there will be enough
 880          subwords to extract.  Note that fieldmode will often (always?) be
 881          VOIDmode, because that is what store_field uses to indicate that this
 882          is a bit field, but passing VOIDmode to operand_subword_force
 883          is not allowed.  */
 884       fieldmode = GET_MODE (value);
 885       if (fieldmode == VOIDmode)
 886         fieldmode = smallest_int_mode_for_size (nwords * BITS_PER_WORD);
 887
 888       last = get_last_insn ();
 889       for (i = 0; i < nwords; i++)
 890         {
 891           /* If I is 0, use the low-order word in both field and target;
 892              if I is 1, use the next to lowest word; and so on.  */
 893           unsigned int wordnum = (backwards
 894                                   ? GET_MODE_SIZE (fieldmode) / UNITS_PER_WORD
 895                                   - i - 1
 896                                   : i);
 897           unsigned int bit_offset = (backwards ^ reverse
 898                                      ? MAX ((int) bitsize - ((int) i + 1)
 899                                             * BITS_PER_WORD,
 900                                             0)
 901                                      : (int) i * BITS_PER_WORD);
 902           rtx value_word = operand_subword_force (value, wordnum, fieldmode);
 903           unsigned HOST_WIDE_INT new_bitsize =
 904             MIN (BITS_PER_WORD, bitsize - i * BITS_PER_WORD);
 905
 906           /* If the remaining chunk doesn't have full wordsize we have
 907              to make sure that for big-endian machines the higher order
 908              bits are used.  */
 909           if (new_bitsize < BITS_PER_WORD && BYTES_BIG_ENDIAN && !backwards)
 910             value_word = simplify_expand_binop (word_mode, lshr_optab,
 911                                                 value_word,
 912                                                 GEN_INT (BITS_PER_WORD
 913                                                          - new_bitsize),
 914                                                 NULL_RTX, true,
 915                                                 OPTAB_LIB_WIDEN);
 916
 917           if (!store_bit_field_1 (op0, new_bitsize,
 918                                   bitnum + bit_offset,
 919                                   bitregion_start, bitregion_end,
 920                                   word_mode,
 921                                   value_word, reverse, fallback_p))
 922             {
 923               delete_insns_since (last);
 924               return false;
 925             }
 926         }
 927       return true;
 928     }
 929
 930   /* If VALUE has a floating-point or complex mode, access it as an
 931      integer of the corresponding size.  This can occur on a machine
 932      with 64 bit registers that uses SFmode for float.  It can also
 933      occur for unaligned float or complex fields.  */
 934   orig_value = value;
 935   scalar_int_mode value_mode;
 936   if (GET_MODE (value) == VOIDmode)
 937     /* By this point we've dealt with values that are bigger than a word,
 938        so word_mode is a conservatively correct choice.  */
 939     value_mode = word_mode;
 940   else if (!is_a <scalar_int_mode> (GET_MODE (value), &value_mode))
 941     {
 942       value_mode = int_mode_for_mode (GET_MODE (value)).require ();
 943       value = gen_reg_rtx (value_mode);
 944       emit_move_insn (gen_lowpart (GET_MODE (orig_value), value), orig_value);
 945     }
 946
 947   /* If OP0 is a multi-word register, narrow it to the affected word.
 948      If the region spans two words, defer to store_split_bit_field.
 949      Don't do this if op0 is a single hard register wider than word
 950      such as a float or vector register.  */
 951   if (!MEM_P (op0)
 952       && GET_MODE_SIZE (op0_mode.require ()) > UNITS_PER_WORD
 953       && (!REG_P (op0)
 954           || !HARD_REGISTER_P (op0)
 955           || HARD_REGNO_NREGS (REGNO (op0), op0_mode.require ()) != 1))
 956     {
 957       if (bitnum % BITS_PER_WORD + bitsize > BITS_PER_WORD)
 958         {
 959           if (!fallback_p)
 960             return false;
 961
 962           store_split_bit_field (op0, op0_mode, bitsize, bitnum,
 963                                  bitregion_start, bitregion_end,
 964                                  value, value_mode, reverse);
 965           return true;
 966         }
 967       op0 = simplify_gen_subreg (word_mode, op0, op0_mode.require (),
 968                                  bitnum / BITS_PER_WORD * UNITS_PER_WORD);
 969       gcc_assert (op0);
 970       op0_mode = word_mode;
 971       bitnum %= BITS_PER_WORD;
 972     }
 973
 974   /* From here on we can assume that the field to be stored in fits
 975      within a word.  If the destination is a register, it too fits
 976      in a word.  */
 977
 978   extraction_insn insv;
 979   if (!MEM_P (op0)
 980       && !reverse
 981       && get_best_reg_extraction_insn (&insv, EP_insv,
 982                                        GET_MODE_BITSIZE (op0_mode.require ()),
 983                                        fieldmode)
 984       && store_bit_field_using_insv (&insv, op0, op0_mode,
 985                                      bitsize, bitnum, value, value_mode))
 986     return true;
 987
 988   /* If OP0 is a memory, try copying it to a register and seeing if a
 989      cheap register alternative is available.  */
 990   if (MEM_P (op0) && !reverse)
 991     {
 992       if (get_best_mem_extraction_insn (&insv, EP_insv, bitsize, bitnum,
 993                                         fieldmode)
 994           && store_bit_field_using_insv (&insv, op0, op0_mode,
 995                                          bitsize, bitnum, value, value_mode))
 996         return true;
 997
 998       rtx_insn *last = get_last_insn ();
 999
1000       /* Try loading part of OP0 into a register, inserting the bitfield
1001          into that, and then copying the result back to OP0.  */
1002       unsigned HOST_WIDE_INT bitpos;
1003       rtx xop0 = adjust_bit_field_mem_for_reg (EP_insv, op0, bitsize, bitnum,
1004                                                bitregion_start, bitregion_end,
1005                                                fieldmode, &bitpos);
1006       if (xop0)
1007         {
1008           rtx tempreg = copy_to_reg (xop0);
1009           if (store_bit_field_1 (tempreg, bitsize, bitpos,
1010                                  bitregion_start, bitregion_end,
1011                                  fieldmode, orig_value, reverse, false))
1012             {
1013               emit_move_insn (xop0, tempreg);
1014               return true;
1015             }
1016           delete_insns_since (last);
1017         }
1018     }
1019
1020   if (!fallback_p)
1021     return false;
1022
1023   store_fixed_bit_field (op0, op0_mode, bitsize, bitnum, bitregion_start,
1024                          bitregion_end, value, value_mode, reverse);
1025   return true;
1026 }
1027
1028 /* Generate code to store value from rtx VALUE
1029    into a bit-field within structure STR_RTX
1030    containing BITSIZE bits starting at bit BITNUM.
1031
1032    BITREGION_START is bitpos of the first bitfield in this region.
1033    BITREGION_END is the bitpos of the ending bitfield in this region.
1034    These two fields are 0, if the C++ memory model does not apply,
1035    or we are not interested in keeping track of bitfield regions.
1036
1037    FIELDMODE is the machine-mode of the FIELD_DECL node for this field.
1038
1039    If REVERSE is true, the store is to be done in reverse order.  */
1040
1041 void
1042 store_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1043                  unsigned HOST_WIDE_INT bitnum,
1044                  unsigned HOST_WIDE_INT bitregion_start,
1045                  unsigned HOST_WIDE_INT bitregion_end,
1046                  machine_mode fieldmode,
1047                  rtx value, bool reverse)
1048 {
1049   /* Handle -fstrict-volatile-bitfields in the cases where it applies.  */
1050   scalar_int_mode int_mode;
1051   if (is_a <scalar_int_mode> (fieldmode, &int_mode)
1052       && strict_volatile_bitfield_p (str_rtx, bitsize, bitnum, int_mode,
1053                                      bitregion_start, bitregion_end))
1054     {
1055       /* Storing of a full word can be done with a simple store.
1056          We know here that the field can be accessed with one single
1057          instruction.  For targets that support unaligned memory,
1058          an unaligned access may be necessary.  */
1059       if (bitsize == GET_MODE_BITSIZE (int_mode))
1060         {
1061           str_rtx = adjust_bitfield_address (str_rtx, int_mode,
1062                                              bitnum / BITS_PER_UNIT);
1063           if (reverse)
1064             value = flip_storage_order (int_mode, value);
1065           gcc_assert (bitnum % BITS_PER_UNIT == 0);
1066           emit_move_insn (str_rtx, value);
1067         }
1068       else
1069         {
1070           rtx temp;
1071
1072           str_rtx = narrow_bit_field_mem (str_rtx, int_mode, bitsize, bitnum,
1073                                           &bitnum);
1074           gcc_assert (bitnum + bitsize <= GET_MODE_BITSIZE (int_mode));
1075           temp = copy_to_reg (str_rtx);
1076           if (!store_bit_field_1 (temp, bitsize, bitnum, 0, 0,
1077                                   int_mode, value, reverse, true))
1078             gcc_unreachable ();
1079
1080           emit_move_insn (str_rtx, temp);
1081         }
1082
1083       return;
1084     }
1085
1086   /* Under the C++0x memory model, we must not touch bits outside the
1087      bit region.  Adjust the address to start at the beginning of the
1088      bit region.  */
1089   if (MEM_P (str_rtx) && bitregion_start > 0)
1090     {
1091       scalar_int_mode best_mode;
1092       machine_mode addr_mode = VOIDmode;
1093       HOST_WIDE_INT offset, size;
1094
1095       gcc_assert ((bitregion_start % BITS_PER_UNIT) == 0);
1096
1097       offset = bitregion_start / BITS_PER_UNIT;
1098       bitnum -= bitregion_start;
1099       size = (bitnum + bitsize + BITS_PER_UNIT - 1) / BITS_PER_UNIT;
1100       bitregion_end -= bitregion_start;
1101       bitregion_start = 0;
1102       if (get_best_mode (bitsize, bitnum,
1103                          bitregion_start, bitregion_end,
1104                          MEM_ALIGN (str_rtx), INT_MAX,
1105                          MEM_VOLATILE_P (str_rtx), &best_mode))
1106         addr_mode = best_mode;
1107       str_rtx = adjust_bitfield_address_size (str_rtx, addr_mode,
1108                                               offset, size);
1109     }
1110
1111   if (!store_bit_field_1 (str_rtx, bitsize, bitnum,
1112                           bitregion_start, bitregion_end,
1113                           fieldmode, value, reverse, true))
1114     gcc_unreachable ();
1115 }
1116 \f
1117 /* Use shifts and boolean operations to store VALUE into a bit field of
1118    width BITSIZE in OP0, starting at bit BITNUM.  If OP0_MODE is defined,
1119    it is the mode of OP0, otherwise OP0 is a BLKmode MEM.  VALUE_MODE is
1120    the mode of VALUE.
1121
1122    If REVERSE is true, the store is to be done in reverse order.  */
1123
1124 static void
1125 store_fixed_bit_field (rtx op0, opt_scalar_int_mode op0_mode,
1126                        unsigned HOST_WIDE_INT bitsize,
1127                        unsigned HOST_WIDE_INT bitnum,
1128                        unsigned HOST_WIDE_INT bitregion_start,
1129                        unsigned HOST_WIDE_INT bitregion_end,
1130                        rtx value, scalar_int_mode value_mode, bool reverse)
1131 {
1132   /* There is a case not handled here:
1133      a structure with a known alignment of just a halfword
1134      and a field split across two aligned halfwords within the structure.
1135      Or likewise a structure with a known alignment of just a byte
1136      and a field split across two bytes.
1137      Such cases are not supposed to be able to occur.  */
1138
1139   scalar_int_mode best_mode;
1140   if (MEM_P (op0))
1141     {
1142       unsigned int max_bitsize = BITS_PER_WORD;
1143       scalar_int_mode imode;
1144       if (op0_mode.exists (&imode) && GET_MODE_BITSIZE (imode) < max_bitsize)
1145         max_bitsize = GET_MODE_BITSIZE (imode);
1146
1147       if (!get_best_mode (bitsize, bitnum, bitregion_start, bitregion_end,
1148                           MEM_ALIGN (op0), max_bitsize, MEM_VOLATILE_P (op0),
1149                           &best_mode))
1150         {
1151           /* The only way this should occur is if the field spans word
1152              boundaries.  */
1153           store_split_bit_field (op0, op0_mode, bitsize, bitnum,
1154                                  bitregion_start, bitregion_end,
1155                                  value, value_mode, reverse);
1156           return;
1157         }
1158
1159       op0 = narrow_bit_field_mem (op0, best_mode, bitsize, bitnum, &bitnum);
1160     }
1161   else
1162     best_mode = op0_mode.require ();
1163
1164   store_fixed_bit_field_1 (op0, best_mode, bitsize, bitnum,
1165                            value, value_mode, reverse);
1166 }
1167
1168 /* Helper function for store_fixed_bit_field, stores
1169    the bit field always using MODE, which is the mode of OP0.  The other
1170    arguments are as for store_fixed_bit_field.  */
1171
1172 static void
1173 store_fixed_bit_field_1 (rtx op0, scalar_int_mode mode,
1174                          unsigned HOST_WIDE_INT bitsize,
1175                          unsigned HOST_WIDE_INT bitnum,
1176                          rtx value, scalar_int_mode value_mode, bool reverse)
1177 {
1178   rtx temp;
1179   int all_zero = 0;
1180   int all_one = 0;
1181
1182   /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode)
1183      for invalid input, such as f5 from gcc.dg/pr48335-2.c.  */
1184
1185   if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
1186     /* BITNUM is the distance between our msb
1187        and that of the containing datum.
1188        Convert it to the distance from the lsb.  */
1189     bitnum = GET_MODE_BITSIZE (mode) - bitsize - bitnum;
1190
1191   /* Now BITNUM is always the distance between our lsb
1192      and that of OP0.  */
1193
1194   /* Shift VALUE left by BITNUM bits.  If VALUE is not constant,
1195      we must first convert its mode to MODE.  */
1196
1197   if (CONST_INT_P (value))
1198     {
1199       unsigned HOST_WIDE_INT v = UINTVAL (value);
1200
1201       if (bitsize < HOST_BITS_PER_WIDE_INT)
1202         v &= (HOST_WIDE_INT_1U << bitsize) - 1;
1203
1204       if (v == 0)
1205         all_zero = 1;
1206       else if ((bitsize < HOST_BITS_PER_WIDE_INT
1207                 && v == (HOST_WIDE_INT_1U << bitsize) - 1)
1208                || (bitsize == HOST_BITS_PER_WIDE_INT
1209                    && v == HOST_WIDE_INT_M1U))
1210         all_one = 1;
1211
1212       value = lshift_value (mode, v, bitnum);
1213     }
1214   else
1215     {
1216       int must_and = (GET_MODE_BITSIZE (value_mode) != bitsize
1217                       && bitnum + bitsize != GET_MODE_BITSIZE (mode));
1218
1219       if (value_mode != mode)
1220         value = convert_to_mode (mode, value, 1);
1221
1222       if (must_and)
1223         value = expand_binop (mode, and_optab, value,
1224                               mask_rtx (mode, 0, bitsize, 0),
1225                               NULL_RTX, 1, OPTAB_LIB_WIDEN);
1226       if (bitnum > 0)
1227         value = expand_shift (LSHIFT_EXPR, mode, value,
1228                               bitnum, NULL_RTX, 1);
1229     }
1230
1231   if (reverse)
1232     value = flip_storage_order (mode, value);
1233
1234   /* Now clear the chosen bits in OP0,
1235      except that if VALUE is -1 we need not bother.  */
1236   /* We keep the intermediates in registers to allow CSE to combine
1237      consecutive bitfield assignments.  */
1238
1239   temp = force_reg (mode, op0);
1240
1241   if (! all_one)
1242     {
1243       rtx mask = mask_rtx (mode, bitnum, bitsize, 1);
1244       if (reverse)
1245         mask = flip_storage_order (mode, mask);
1246       temp = expand_binop (mode, and_optab, temp, mask,
1247                            NULL_RTX, 1, OPTAB_LIB_WIDEN);
1248       temp = force_reg (mode, temp);
1249     }
1250
1251   /* Now logical-or VALUE into OP0, unless it is zero.  */
1252
1253   if (! all_zero)
1254     {
1255       temp = expand_binop (mode, ior_optab, temp, value,
1256                            NULL_RTX, 1, OPTAB_LIB_WIDEN);
1257       temp = force_reg (mode, temp);
1258     }
1259
1260   if (op0 != temp)
1261     {
1262       op0 = copy_rtx (op0);
1263       emit_move_insn (op0, temp);
1264     }
1265 }
1266 \f
1267 /* Store a bit field that is split across multiple accessible memory objects.
1268
1269    OP0 is the REG, SUBREG or MEM rtx for the first of the objects.
1270    BITSIZE is the field width; BITPOS the position of its first bit
1271    (within the word).
1272    VALUE is the value to store, which has mode VALUE_MODE.
1273    If OP0_MODE is defined, it is the mode of OP0, otherwise OP0 is
1274    a BLKmode MEM.
1275
1276    If REVERSE is true, the store is to be done in reverse order.
1277
1278    This does not yet handle fields wider than BITS_PER_WORD.  */
1279
1280 static void
1281 store_split_bit_field (rtx op0, opt_scalar_int_mode op0_mode,
1282                        unsigned HOST_WIDE_INT bitsize,
1283                        unsigned HOST_WIDE_INT bitpos,
1284                        unsigned HOST_WIDE_INT bitregion_start,
1285                        unsigned HOST_WIDE_INT bitregion_end,
1286                        rtx value, scalar_int_mode value_mode, bool reverse)
1287 {
1288   unsigned int unit, total_bits, bitsdone = 0;
1289
1290   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1291      much at a time.  */
1292   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
1293     unit = BITS_PER_WORD;
1294   else
1295     unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1296
1297   /* If OP0 is a memory with a mode, then UNIT must not be larger than
1298      OP0's mode as well.  Otherwise, store_fixed_bit_field will call us
1299      again, and we will mutually recurse forever.  */
1300   if (MEM_P (op0) && op0_mode.exists ())
1301     unit = MIN (unit, GET_MODE_BITSIZE (op0_mode.require ()));
1302
1303   /* If VALUE is a constant other than a CONST_INT, get it into a register in
1304      WORD_MODE.  If we can do this using gen_lowpart_common, do so.  Note
1305      that VALUE might be a floating-point constant.  */
1306   if (CONSTANT_P (value) && !CONST_INT_P (value))
1307     {
1308       rtx word = gen_lowpart_common (word_mode, value);
1309
1310       if (word && (value != word))
1311         value = word;
1312       else
1313         value = gen_lowpart_common (word_mode, force_reg (value_mode, value));
1314       value_mode = word_mode;
1315     }
1316
1317   total_bits = GET_MODE_BITSIZE (value_mode);
1318
1319   while (bitsdone < bitsize)
1320     {
1321       unsigned HOST_WIDE_INT thissize;
1322       unsigned HOST_WIDE_INT thispos;
1323       unsigned HOST_WIDE_INT offset;
1324       rtx part;
1325
1326       offset = (bitpos + bitsdone) / unit;
1327       thispos = (bitpos + bitsdone) % unit;
1328
1329       /* When region of bytes we can touch is restricted, decrease
1330          UNIT close to the end of the region as needed.  If op0 is a REG
1331          or SUBREG of REG, don't do this, as there can't be data races
1332          on a register and we can expand shorter code in some cases.  */
1333       if (bitregion_end
1334           && unit > BITS_PER_UNIT
1335           && bitpos + bitsdone - thispos + unit > bitregion_end + 1
1336           && !REG_P (op0)
1337           && (GET_CODE (op0) != SUBREG || !REG_P (SUBREG_REG (op0))))
1338         {
1339           unit = unit / 2;
1340           continue;
1341         }
1342
1343       /* THISSIZE must not overrun a word boundary.  Otherwise,
1344          store_fixed_bit_field will call us again, and we will mutually
1345          recurse forever.  */
1346       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1347       thissize = MIN (thissize, unit - thispos);
1348
1349       if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
1350         {
1351           /* Fetch successively less significant portions.  */
1352           if (CONST_INT_P (value))
1353             part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1354                              >> (bitsize - bitsdone - thissize))
1355                             & ((HOST_WIDE_INT_1 << thissize) - 1));
1356           /* Likewise, but the source is little-endian.  */
1357           else if (reverse)
1358             part = extract_fixed_bit_field (word_mode, value, value_mode,
1359                                             thissize,
1360                                             bitsize - bitsdone - thissize,
1361                                             NULL_RTX, 1, false);
1362           else
1363             /* The args are chosen so that the last part includes the
1364                lsb.  Give extract_bit_field the value it needs (with
1365                endianness compensation) to fetch the piece we want.  */
1366             part = extract_fixed_bit_field (word_mode, value, value_mode,
1367                                             thissize,
1368                                             total_bits - bitsize + bitsdone,
1369                                             NULL_RTX, 1, false);
1370         }
1371       else
1372         {
1373           /* Fetch successively more significant portions.  */
1374           if (CONST_INT_P (value))
1375             part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1376                              >> bitsdone)
1377                             & ((HOST_WIDE_INT_1 << thissize) - 1));
1378           /* Likewise, but the source is big-endian.  */
1379           else if (reverse)
1380             part = extract_fixed_bit_field (word_mode, value, value_mode,
1381                                             thissize,
1382                                             total_bits - bitsdone - thissize,
1383                                             NULL_RTX, 1, false);
1384           else
1385             part = extract_fixed_bit_field (word_mode, value, value_mode,
1386                                             thissize, bitsdone, NULL_RTX,
1387                                             1, false);
1388         }
1389
1390       /* If OP0 is a register, then handle OFFSET here.  */
1391       rtx op0_piece = op0;
1392       opt_scalar_int_mode op0_piece_mode = op0_mode;
1393       if (SUBREG_P (op0) || REG_P (op0))
1394         {
1395           scalar_int_mode imode;
1396           if (op0_mode.exists (&imode)
1397               && GET_MODE_SIZE (imode) < UNITS_PER_WORD)
1398             {
1399               if (offset)
1400                 op0_piece = const0_rtx;
1401             }
1402           else
1403             {
1404               op0_piece = operand_subword_force (op0,
1405                                                  offset * unit / BITS_PER_WORD,
1406                                                  GET_MODE (op0));
1407               op0_piece_mode = word_mode;
1408             }
1409           offset &= BITS_PER_WORD / unit - 1;
1410         }
1411
1412       /* OFFSET is in UNITs, and UNIT is in bits.  If WORD is const0_rtx,
1413          it is just an out-of-bounds access.  Ignore it.  */
1414       if (op0_piece != const0_rtx)
1415         store_fixed_bit_field (op0_piece, op0_piece_mode, thissize,
1416                                offset * unit + thispos, bitregion_start,
1417                                bitregion_end, part, word_mode, reverse);
1418       bitsdone += thissize;
1419     }
1420 }
1421 \f
1422 /* A subroutine of extract_bit_field_1 that converts return value X
1423    to either MODE or TMODE.  MODE, TMODE and UNSIGNEDP are arguments
1424    to extract_bit_field.  */
1425
1426 static rtx
1427 convert_extracted_bit_field (rtx x, machine_mode mode,
1428                              machine_mode tmode, bool unsignedp)
1429 {
1430   if (GET_MODE (x) == tmode || GET_MODE (x) == mode)
1431     return x;
1432
1433   /* If the x mode is not a scalar integral, first convert to the
1434      integer mode of that size and then access it as a floating-point
1435      value via a SUBREG.  */
1436   if (!SCALAR_INT_MODE_P (tmode))
1437     {
1438       scalar_int_mode int_mode = int_mode_for_mode (tmode).require ();
1439       x = convert_to_mode (int_mode, x, unsignedp);
1440       x = force_reg (int_mode, x);
1441       return gen_lowpart (tmode, x);
1442     }
1443
1444   return convert_to_mode (tmode, x, unsignedp);
1445 }
1446
1447 /* Try to use an ext(z)v pattern to extract a field from OP0.
1448    Return the extracted value on success, otherwise return null.
1449    EXTV describes the extraction instruction to use.  If OP0_MODE
1450    is defined, it is the mode of OP0, otherwise OP0 is a BLKmode MEM.
1451    The other arguments are as for extract_bit_field.  */
1452
1453 static rtx
1454 extract_bit_field_using_extv (const extraction_insn *extv, rtx op0,
1455                               opt_scalar_int_mode op0_mode,
1456                               unsigned HOST_WIDE_INT bitsize,
1457                               unsigned HOST_WIDE_INT bitnum,
1458                               int unsignedp, rtx target,
1459                               machine_mode mode, machine_mode tmode)
1460 {
1461   struct expand_operand ops[4];
1462   rtx spec_target = target;
1463   rtx spec_target_subreg = 0;
1464   scalar_int_mode ext_mode = extv->field_mode;
1465   unsigned unit = GET_MODE_BITSIZE (ext_mode);
1466
1467   if (bitsize == 0 || unit < bitsize)
1468     return NULL_RTX;
1469
1470   if (MEM_P (op0))
1471     /* Get a reference to the first byte of the field.  */
1472     op0 = narrow_bit_field_mem (op0, extv->struct_mode, bitsize, bitnum,
1473                                 &bitnum);
1474   else
1475     {
1476       /* Convert from counting within OP0 to counting in EXT_MODE.  */
1477       if (BYTES_BIG_ENDIAN)
1478         bitnum += unit - GET_MODE_BITSIZE (op0_mode.require ());
1479
1480       /* If op0 is a register, we need it in EXT_MODE to make it
1481          acceptable to the format of ext(z)v.  */
1482       if (GET_CODE (op0) == SUBREG && op0_mode.require () != ext_mode)
1483         return NULL_RTX;
1484       if (REG_P (op0) && op0_mode.require () != ext_mode)
1485         op0 = gen_lowpart_SUBREG (ext_mode, op0);
1486     }
1487
1488   /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
1489      "backwards" from the size of the unit we are extracting from.
1490      Otherwise, we count bits from the most significant on a
1491      BYTES/BITS_BIG_ENDIAN machine.  */
1492
1493   if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1494     bitnum = unit - bitsize - bitnum;
1495
1496   if (target == 0)
1497     target = spec_target = gen_reg_rtx (tmode);
1498
1499   if (GET_MODE (target) != ext_mode)
1500     {
1501       /* Don't use LHS paradoxical subreg if explicit truncation is needed
1502          between the mode of the extraction (word_mode) and the target
1503          mode.  Instead, create a temporary and use convert_move to set
1504          the target.  */
1505       if (REG_P (target)
1506           && TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (target), ext_mode))
1507         {
1508           target = gen_lowpart (ext_mode, target);
1509           if (partial_subreg_p (GET_MODE (spec_target), ext_mode))
1510             spec_target_subreg = target;
1511         }
1512       else
1513         target = gen_reg_rtx (ext_mode);
1514     }
1515
1516   create_output_operand (&ops[0], target, ext_mode);
1517   create_fixed_operand (&ops[1], op0);
1518   create_integer_operand (&ops[2], bitsize);
1519   create_integer_operand (&ops[3], bitnum);
1520   if (maybe_expand_insn (extv->icode, 4, ops))
1521     {
1522       target = ops[0].value;
1523       if (target == spec_target)
1524         return target;
1525       if (target == spec_target_subreg)
1526         return spec_target;
1527       return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1528     }
1529   return NULL_RTX;
1530 }
1531
1532 /* A subroutine of extract_bit_field, with the same arguments.
1533    If FALLBACK_P is true, fall back to extract_fixed_bit_field
1534    if we can find no other means of implementing the operation.
1535    if FALLBACK_P is false, return NULL instead.  */
1536
1537 static rtx
1538 extract_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1539                      unsigned HOST_WIDE_INT bitnum, int unsignedp, rtx target,
1540                      machine_mode mode, machine_mode tmode,
1541                      bool reverse, bool fallback_p, rtx *alt_rtl)
1542 {
1543   rtx op0 = str_rtx;
1544   machine_mode mode1;
1545
1546   if (tmode == VOIDmode)
1547     tmode = mode;
1548
1549   while (GET_CODE (op0) == SUBREG)
1550     {
1551       bitnum += SUBREG_BYTE (op0) * BITS_PER_UNIT;
1552       op0 = SUBREG_REG (op0);
1553     }
1554
1555   /* If we have an out-of-bounds access to a register, just return an
1556      uninitialized register of the required mode.  This can occur if the
1557      source code contains an out-of-bounds access to a small array.  */
1558   if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
1559     return gen_reg_rtx (tmode);
1560
1561   if (REG_P (op0)
1562       && mode == GET_MODE (op0)
1563       && bitnum == 0
1564       && bitsize == GET_MODE_BITSIZE (GET_MODE (op0)))
1565     {
1566       if (reverse)
1567         op0 = flip_storage_order (mode, op0);
1568       /* We're trying to extract a full register from itself.  */
1569       return op0;
1570     }
1571
1572   /* First try to check for vector from vector extractions.  */
1573   if (VECTOR_MODE_P (GET_MODE (op0))
1574       && !MEM_P (op0)
1575       && VECTOR_MODE_P (tmode)
1576       && GET_MODE_SIZE (GET_MODE (op0)) > GET_MODE_SIZE (tmode))
1577     {
1578       machine_mode new_mode = GET_MODE (op0);
1579       if (GET_MODE_INNER (new_mode) != GET_MODE_INNER (tmode))
1580         {
1581           new_mode = mode_for_vector (GET_MODE_INNER (tmode),
1582                                       GET_MODE_BITSIZE (GET_MODE (op0))
1583                                       / GET_MODE_UNIT_BITSIZE (tmode));
1584           if (!VECTOR_MODE_P (new_mode)
1585               || GET_MODE_SIZE (new_mode) != GET_MODE_SIZE (GET_MODE (op0))
1586               || GET_MODE_INNER (new_mode) != GET_MODE_INNER (tmode)
1587               || !targetm.vector_mode_supported_p (new_mode))
1588             new_mode = VOIDmode;
1589         }
1590       if (new_mode != VOIDmode
1591           && (convert_optab_handler (vec_extract_optab, new_mode, tmode)
1592               != CODE_FOR_nothing)
1593           && ((bitnum + bitsize - 1) / GET_MODE_BITSIZE (tmode)
1594               == bitnum / GET_MODE_BITSIZE (tmode)))
1595         {
1596           struct expand_operand ops[3];
1597           machine_mode outermode = new_mode;
1598           machine_mode innermode = tmode;
1599           enum insn_code icode
1600             = convert_optab_handler (vec_extract_optab, outermode, innermode);
1601           unsigned HOST_WIDE_INT pos = bitnum / GET_MODE_BITSIZE (innermode);
1602
1603           if (new_mode != GET_MODE (op0))
1604             op0 = gen_lowpart (new_mode, op0);
1605           create_output_operand (&ops[0], target, innermode);
1606           ops[0].target = 1;
1607           create_input_operand (&ops[1], op0, outermode);
1608           create_integer_operand (&ops[2], pos);
1609           if (maybe_expand_insn (icode, 3, ops))
1610             {
1611               if (alt_rtl && ops[0].target)
1612                 *alt_rtl = target;
1613               target = ops[0].value;
1614               if (GET_MODE (target) != mode)
1615                 return gen_lowpart (tmode, target);
1616               return target;
1617             }
1618         }
1619     }
1620
1621   /* See if we can get a better vector mode before extracting.  */
1622   if (VECTOR_MODE_P (GET_MODE (op0))
1623       && !MEM_P (op0)
1624       && GET_MODE_INNER (GET_MODE (op0)) != tmode)
1625     {
1626       machine_mode new_mode;
1627
1628       if (GET_MODE_CLASS (tmode) == MODE_FLOAT)
1629         new_mode = MIN_MODE_VECTOR_FLOAT;
1630       else if (GET_MODE_CLASS (tmode) == MODE_FRACT)
1631         new_mode = MIN_MODE_VECTOR_FRACT;
1632       else if (GET_MODE_CLASS (tmode) == MODE_UFRACT)
1633         new_mode = MIN_MODE_VECTOR_UFRACT;
1634       else if (GET_MODE_CLASS (tmode) == MODE_ACCUM)
1635         new_mode = MIN_MODE_VECTOR_ACCUM;
1636       else if (GET_MODE_CLASS (tmode) == MODE_UACCUM)
1637         new_mode = MIN_MODE_VECTOR_UACCUM;
1638       else
1639         new_mode = MIN_MODE_VECTOR_INT;
1640
1641       FOR_EACH_MODE_FROM (new_mode, new_mode)
1642         if (GET_MODE_SIZE (new_mode) == GET_MODE_SIZE (GET_MODE (op0))
1643             && GET_MODE_UNIT_SIZE (new_mode) == GET_MODE_SIZE (tmode)
1644             && targetm.vector_mode_supported_p (new_mode))
1645           break;
1646       if (new_mode != VOIDmode)
1647         op0 = gen_lowpart (new_mode, op0);
1648     }
1649
1650   /* Use vec_extract patterns for extracting parts of vectors whenever
1651      available.  */
1652   machine_mode outermode = GET_MODE (op0);
1653   scalar_mode innermode = GET_MODE_INNER (outermode);
1654   if (VECTOR_MODE_P (outermode)
1655       && !MEM_P (op0)
1656       && (convert_optab_handler (vec_extract_optab, outermode, innermode)
1657           != CODE_FOR_nothing)
1658       && ((bitnum + bitsize - 1) / GET_MODE_BITSIZE (innermode)
1659           == bitnum / GET_MODE_BITSIZE (innermode)))
1660     {
1661       struct expand_operand ops[3];
1662       enum insn_code icode
1663         = convert_optab_handler (vec_extract_optab, outermode, innermode);
1664       unsigned HOST_WIDE_INT pos = bitnum / GET_MODE_BITSIZE (innermode);
1665
1666       create_output_operand (&ops[0], target, innermode);
1667       ops[0].target = 1;
1668       create_input_operand (&ops[1], op0, outermode);
1669       create_integer_operand (&ops[2], pos);
1670       if (maybe_expand_insn (icode, 3, ops))
1671         {
1672           if (alt_rtl && ops[0].target)
1673             *alt_rtl = target;
1674           target = ops[0].value;
1675           if (GET_MODE (target) != mode)
1676             return gen_lowpart (tmode, target);
1677           return target;
1678         }
1679     }
1680
1681   /* Make sure we are playing with integral modes.  Pun with subregs
1682      if we aren't.  */
1683   opt_scalar_int_mode op0_mode = int_mode_for_mode (GET_MODE (op0));
1684   scalar_int_mode imode;
1685   if (!op0_mode.exists (&imode) || imode != GET_MODE (op0))
1686     {
1687       if (MEM_P (op0))
1688         op0 = adjust_bitfield_address_size (op0, op0_mode.else_blk (),
1689                                             0, MEM_SIZE (op0));
1690       else if (op0_mode.exists (&imode))
1691         {
1692           op0 = gen_lowpart (imode, op0);
1693
1694           /* If we got a SUBREG, force it into a register since we
1695              aren't going to be able to do another SUBREG on it.  */
1696           if (GET_CODE (op0) == SUBREG)
1697             op0 = force_reg (imode, op0);
1698         }
1699       else
1700         {
1701           HOST_WIDE_INT size = GET_MODE_SIZE (GET_MODE (op0));
1702           rtx mem = assign_stack_temp (GET_MODE (op0), size);
1703           emit_move_insn (mem, op0);
1704           op0 = adjust_bitfield_address_size (mem, BLKmode, 0, size);
1705         }
1706     }
1707
1708   /* ??? We currently assume TARGET is at least as big as BITSIZE.
1709      If that's wrong, the solution is to test for it and set TARGET to 0
1710      if needed.  */
1711
1712   /* Get the mode of the field to use for atomic access or subreg
1713      conversion.  */
1714   mode1 = mode;
1715   if (SCALAR_INT_MODE_P (tmode))
1716     {
1717       machine_mode try_mode = mode_for_size (bitsize,
1718                                                   GET_MODE_CLASS (tmode), 0);
1719       if (try_mode != BLKmode)
1720         mode1 = try_mode;
1721     }
1722   gcc_assert (mode1 != BLKmode);
1723
1724   /* Extraction of a full MODE1 value can be done with a subreg as long
1725      as the least significant bit of the value is the least significant
1726      bit of either OP0 or a word of OP0.  */
1727   if (!MEM_P (op0)
1728       && !reverse
1729       && lowpart_bit_field_p (bitnum, bitsize, op0_mode.require ())
1730       && bitsize == GET_MODE_BITSIZE (mode1)
1731       && TRULY_NOOP_TRUNCATION_MODES_P (mode1, op0_mode.require ()))
1732     {
1733       rtx sub = simplify_gen_subreg (mode1, op0, op0_mode.require (),
1734                                      bitnum / BITS_PER_UNIT);
1735       if (sub)
1736         return convert_extracted_bit_field (sub, mode, tmode, unsignedp);
1737     }
1738
1739   /* Extraction of a full MODE1 value can be done with a load as long as
1740      the field is on a byte boundary and is sufficiently aligned.  */
1741   if (simple_mem_bitfield_p (op0, bitsize, bitnum, mode1))
1742     {
1743       op0 = adjust_bitfield_address (op0, mode1, bitnum / BITS_PER_UNIT);
1744       if (reverse)
1745         op0 = flip_storage_order (mode1, op0);
1746       return convert_extracted_bit_field (op0, mode, tmode, unsignedp);
1747     }
1748
1749   /* Handle fields bigger than a word.  */
1750
1751   if (bitsize > BITS_PER_WORD)
1752     {
1753       /* Here we transfer the words of the field
1754          in the order least significant first.
1755          This is because the most significant word is the one which may
1756          be less than full.  */
1757
1758       const bool backwards = WORDS_BIG_ENDIAN;
1759       unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
1760       unsigned int i;
1761       rtx_insn *last;
1762
1763       if (target == 0 || !REG_P (target) || !valid_multiword_target_p (target))
1764         target = gen_reg_rtx (mode);
1765
1766       /* In case we're about to clobber a base register or something
1767          (see gcc.c-torture/execute/20040625-1.c).   */
1768       if (reg_mentioned_p (target, str_rtx))
1769         target = gen_reg_rtx (mode);
1770
1771       /* Indicate for flow that the entire target reg is being set.  */
1772       emit_clobber (target);
1773
1774       last = get_last_insn ();
1775       for (i = 0; i < nwords; i++)
1776         {
1777           /* If I is 0, use the low-order word in both field and target;
1778              if I is 1, use the next to lowest word; and so on.  */
1779           /* Word number in TARGET to use.  */
1780           unsigned int wordnum
1781             = (backwards
1782                ? GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD - i - 1
1783                : i);
1784           /* Offset from start of field in OP0.  */
1785           unsigned int bit_offset = (backwards ^ reverse
1786                                      ? MAX ((int) bitsize - ((int) i + 1)
1787                                             * BITS_PER_WORD,
1788                                             0)
1789                                      : (int) i * BITS_PER_WORD);
1790           rtx target_part = operand_subword (target, wordnum, 1, VOIDmode);
1791           rtx result_part
1792             = extract_bit_field_1 (op0, MIN (BITS_PER_WORD,
1793                                              bitsize - i * BITS_PER_WORD),
1794                                    bitnum + bit_offset, 1, target_part,
1795                                    mode, word_mode, reverse, fallback_p, NULL);
1796
1797           gcc_assert (target_part);
1798           if (!result_part)
1799             {
1800               delete_insns_since (last);
1801               return NULL;
1802             }
1803
1804           if (result_part != target_part)
1805             emit_move_insn (target_part, result_part);
1806         }
1807
1808       if (unsignedp)
1809         {
1810           /* Unless we've filled TARGET, the upper regs in a multi-reg value
1811              need to be zero'd out.  */
1812           if (GET_MODE_SIZE (GET_MODE (target)) > nwords * UNITS_PER_WORD)
1813             {
1814               unsigned int i, total_words;
1815
1816               total_words = GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD;
1817               for (i = nwords; i < total_words; i++)
1818                 emit_move_insn
1819                   (operand_subword (target,
1820                                     backwards ? total_words - i - 1 : i,
1821                                     1, VOIDmode),
1822                    const0_rtx);
1823             }
1824           return target;
1825         }
1826
1827       /* Signed bit field: sign-extend with two arithmetic shifts.  */
1828       target = expand_shift (LSHIFT_EXPR, mode, target,
1829                              GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1830       return expand_shift (RSHIFT_EXPR, mode, target,
1831                            GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1832     }
1833
1834   /* If OP0 is a multi-word register, narrow it to the affected word.
1835      If the region spans two words, defer to extract_split_bit_field.  */
1836   if (!MEM_P (op0) && GET_MODE_SIZE (op0_mode.require ()) > UNITS_PER_WORD)
1837     {
1838       if (bitnum % BITS_PER_WORD + bitsize > BITS_PER_WORD)
1839         {
1840           if (!fallback_p)
1841             return NULL_RTX;
1842           target = extract_split_bit_field (op0, op0_mode, bitsize, bitnum,
1843                                             unsignedp, reverse);
1844           return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1845         }
1846       op0 = simplify_gen_subreg (word_mode, op0, op0_mode.require (),
1847                                  bitnum / BITS_PER_WORD * UNITS_PER_WORD);
1848       op0_mode = word_mode;
1849       bitnum %= BITS_PER_WORD;
1850     }
1851
1852   /* From here on we know the desired field is smaller than a word.
1853      If OP0 is a register, it too fits within a word.  */
1854   enum extraction_pattern pattern = unsignedp ? EP_extzv : EP_extv;
1855   extraction_insn extv;
1856   if (!MEM_P (op0)
1857       && !reverse
1858       /* ??? We could limit the structure size to the part of OP0 that
1859          contains the field, with appropriate checks for endianness
1860          and TRULY_NOOP_TRUNCATION.  */
1861       && get_best_reg_extraction_insn (&extv, pattern,
1862                                        GET_MODE_BITSIZE (op0_mode.require ()),
1863                                        tmode))
1864     {
1865       rtx result = extract_bit_field_using_extv (&extv, op0, op0_mode,
1866                                                  bitsize, bitnum,
1867                                                  unsignedp, target, mode,
1868                                                  tmode);
1869       if (result)
1870         return result;
1871     }
1872
1873   /* If OP0 is a memory, try copying it to a register and seeing if a
1874      cheap register alternative is available.  */
1875   if (MEM_P (op0) & !reverse)
1876     {
1877       if (get_best_mem_extraction_insn (&extv, pattern, bitsize, bitnum,
1878                                         tmode))
1879         {
1880           rtx result = extract_bit_field_using_extv (&extv, op0, op0_mode,
1881                                                      bitsize, bitnum,
1882                                                      unsignedp, target, mode,
1883                                                      tmode);
1884           if (result)
1885             return result;
1886         }
1887
1888       rtx_insn *last = get_last_insn ();
1889
1890       /* Try loading part of OP0 into a register and extracting the
1891          bitfield from that.  */
1892       unsigned HOST_WIDE_INT bitpos;
1893       rtx xop0 = adjust_bit_field_mem_for_reg (pattern, op0, bitsize, bitnum,
1894                                                0, 0, tmode, &bitpos);
1895       if (xop0)
1896         {
1897           xop0 = copy_to_reg (xop0);
1898           rtx result = extract_bit_field_1 (xop0, bitsize, bitpos,
1899                                             unsignedp, target,
1900                                             mode, tmode, reverse, false, NULL);
1901           if (result)
1902             return result;
1903           delete_insns_since (last);
1904         }
1905     }
1906
1907   if (!fallback_p)
1908     return NULL;
1909
1910   /* Find a correspondingly-sized integer field, so we can apply
1911      shifts and masks to it.  */
1912   scalar_int_mode int_mode;
1913   if (!int_mode_for_mode (tmode).exists (&int_mode))
1914     /* If this fails, we should probably push op0 out to memory and then
1915        do a load.  */
1916     int_mode = int_mode_for_mode (mode).require ();
1917
1918   target = extract_fixed_bit_field (int_mode, op0, op0_mode, bitsize,
1919                                     bitnum, target, unsignedp, reverse);
1920
1921   /* Complex values must be reversed piecewise, so we need to undo the global
1922      reversal, convert to the complex mode and reverse again.  */
1923   if (reverse && COMPLEX_MODE_P (tmode))
1924     {
1925       target = flip_storage_order (int_mode, target);
1926       target = convert_extracted_bit_field (target, mode, tmode, unsignedp);
1927       target = flip_storage_order (tmode, target);
1928     }
1929   else
1930     target = convert_extracted_bit_field (target, mode, tmode, unsignedp);
1931
1932   return target;
1933 }
1934
1935 /* Generate code to extract a byte-field from STR_RTX
1936    containing BITSIZE bits, starting at BITNUM,
1937    and put it in TARGET if possible (if TARGET is nonzero).
1938    Regardless of TARGET, we return the rtx for where the value is placed.
1939
1940    STR_RTX is the structure containing the byte (a REG or MEM).
1941    UNSIGNEDP is nonzero if this is an unsigned bit field.
1942    MODE is the natural mode of the field value once extracted.
1943    TMODE is the mode the caller would like the value to have;
1944    but the value may be returned with type MODE instead.
1945
1946    If REVERSE is true, the extraction is to be done in reverse order.
1947
1948    If a TARGET is specified and we can store in it at no extra cost,
1949    we do so, and return TARGET.
1950    Otherwise, we return a REG of mode TMODE or MODE, with TMODE preferred
1951    if they are equally easy.  */
1952
1953 rtx
1954 extract_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1955                    unsigned HOST_WIDE_INT bitnum, int unsignedp, rtx target,
1956                    machine_mode mode, machine_mode tmode, bool reverse,
1957                    rtx *alt_rtl)
1958 {
1959   machine_mode mode1;
1960
1961   /* Handle -fstrict-volatile-bitfields in the cases where it applies.  */
1962   if (GET_MODE_BITSIZE (GET_MODE (str_rtx)) > 0)
1963     mode1 = GET_MODE (str_rtx);
1964   else if (target && GET_MODE_BITSIZE (GET_MODE (target)) > 0)
1965     mode1 = GET_MODE (target);
1966   else
1967     mode1 = tmode;
1968
1969   scalar_int_mode int_mode;
1970   if (is_a <scalar_int_mode> (mode1, &int_mode)
1971       && strict_volatile_bitfield_p (str_rtx, bitsize, bitnum, int_mode, 0, 0))
1972     {
1973       /* Extraction of a full INT_MODE value can be done with a simple load.
1974          We know here that the field can be accessed with one single
1975          instruction.  For targets that support unaligned memory,
1976          an unaligned access may be necessary.  */
1977       if (bitsize == GET_MODE_BITSIZE (int_mode))
1978         {
1979           rtx result = adjust_bitfield_address (str_rtx, int_mode,
1980                                                 bitnum / BITS_PER_UNIT);
1981           if (reverse)
1982             result = flip_storage_order (int_mode, result);
1983           gcc_assert (bitnum % BITS_PER_UNIT == 0);
1984           return convert_extracted_bit_field (result, mode, tmode, unsignedp);
1985         }
1986
1987       str_rtx = narrow_bit_field_mem (str_rtx, int_mode, bitsize, bitnum,
1988                                       &bitnum);
1989       gcc_assert (bitnum + bitsize <= GET_MODE_BITSIZE (int_mode));
1990       str_rtx = copy_to_reg (str_rtx);
1991     }
1992
1993   return extract_bit_field_1 (str_rtx, bitsize, bitnum, unsignedp,
1994                               target, mode, tmode, reverse, true, alt_rtl);
1995 }
1996 \f
1997 /* Use shifts and boolean operations to extract a field of BITSIZE bits
1998    from bit BITNUM of OP0.  If OP0_MODE is defined, it is the mode of OP0,
1999    otherwise OP0 is a BLKmode MEM.
2000
2001    UNSIGNEDP is nonzero for an unsigned bit field (don't sign-extend value).
2002    If REVERSE is true, the extraction is to be done in reverse order.
2003
2004    If TARGET is nonzero, attempts to store the value there
2005    and return TARGET, but this is not guaranteed.
2006    If TARGET is not used, create a pseudo-reg of mode TMODE for the value.  */
2007
2008 static rtx
2009 extract_fixed_bit_field (machine_mode tmode, rtx op0,
2010                          opt_scalar_int_mode op0_mode,
2011                          unsigned HOST_WIDE_INT bitsize,
2012                          unsigned HOST_WIDE_INT bitnum, rtx target,
2013                          int unsignedp, bool reverse)
2014 {
2015   scalar_int_mode mode;
2016   if (MEM_P (op0))
2017     {
2018       if (!get_best_mode (bitsize, bitnum, 0, 0, MEM_ALIGN (op0),
2019                           BITS_PER_WORD, MEM_VOLATILE_P (op0), &mode))
2020         /* The only way this should occur is if the field spans word
2021            boundaries.  */
2022         return extract_split_bit_field (op0, op0_mode, bitsize, bitnum,
2023                                         unsignedp, reverse);
2024
2025       op0 = narrow_bit_field_mem (op0, mode, bitsize, bitnum, &bitnum);
2026     }
2027   else
2028     mode = op0_mode.require ();
2029
2030   return extract_fixed_bit_field_1 (tmode, op0, mode, bitsize, bitnum,
2031                                     target, unsignedp, reverse);
2032 }
2033
2034 /* Helper function for extract_fixed_bit_field, extracts
2035    the bit field always using MODE, which is the mode of OP0.
2036    The other arguments are as for extract_fixed_bit_field.  */
2037
2038 static rtx
2039 extract_fixed_bit_field_1 (machine_mode tmode, rtx op0, scalar_int_mode mode,
2040                            unsigned HOST_WIDE_INT bitsize,
2041                            unsigned HOST_WIDE_INT bitnum, rtx target,
2042                            int unsignedp, bool reverse)
2043 {
2044   /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode)
2045      for invalid input, such as extract equivalent of f5 from
2046      gcc.dg/pr48335-2.c.  */
2047
2048   if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
2049     /* BITNUM is the distance between our msb and that of OP0.
2050        Convert it to the distance from the lsb.  */
2051     bitnum = GET_MODE_BITSIZE (mode) - bitsize - bitnum;
2052
2053   /* Now BITNUM is always the distance between the field's lsb and that of OP0.
2054      We have reduced the big-endian case to the little-endian case.  */
2055   if (reverse)
2056     op0 = flip_storage_order (mode, op0);
2057
2058   if (unsignedp)
2059     {
2060       if (bitnum)
2061         {
2062           /* If the field does not already start at the lsb,
2063              shift it so it does.  */
2064           /* Maybe propagate the target for the shift.  */
2065           rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
2066           if (tmode != mode)
2067             subtarget = 0;
2068           op0 = expand_shift (RSHIFT_EXPR, mode, op0, bitnum, subtarget, 1);
2069         }
2070       /* Convert the value to the desired mode.  TMODE must also be a
2071          scalar integer for this conversion to make sense, since we
2072          shouldn't reinterpret the bits.  */
2073       scalar_int_mode new_mode = as_a <scalar_int_mode> (tmode);
2074       if (mode != new_mode)
2075         op0 = convert_to_mode (new_mode, op0, 1);
2076
2077       /* Unless the msb of the field used to be the msb when we shifted,
2078          mask out the upper bits.  */
2079
2080       if (GET_MODE_BITSIZE (mode) != bitnum + bitsize)
2081         return expand_binop (new_mode, and_optab, op0,
2082                              mask_rtx (new_mode, 0, bitsize, 0),
2083                              target, 1, OPTAB_LIB_WIDEN);
2084       return op0;
2085     }
2086
2087   /* To extract a signed bit-field, first shift its msb to the msb of the word,
2088      then arithmetic-shift its lsb to the lsb of the word.  */
2089   op0 = force_reg (mode, op0);
2090
2091   /* Find the narrowest integer mode that contains the field.  */
2092
2093   opt_scalar_int_mode mode_iter;
2094   FOR_EACH_MODE_IN_CLASS (mode_iter, MODE_INT)
2095     if (GET_MODE_BITSIZE (mode_iter.require ()) >= bitsize + bitnum)
2096       break;
2097
2098   mode = mode_iter.require ();
2099   op0 = convert_to_mode (mode, op0, 0);
2100
2101   if (mode != tmode)
2102     target = 0;
2103
2104   if (GET_MODE_BITSIZE (mode) != (bitsize + bitnum))
2105     {
2106       int amount = GET_MODE_BITSIZE (mode) - (bitsize + bitnum);
2107       /* Maybe propagate the target for the shift.  */
2108       rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
2109       op0 = expand_shift (LSHIFT_EXPR, mode, op0, amount, subtarget, 1);
2110     }
2111
2112   return expand_shift (RSHIFT_EXPR, mode, op0,
2113                        GET_MODE_BITSIZE (mode) - bitsize, target, 0);
2114 }
2115
2116 /* Return a constant integer (CONST_INT or CONST_DOUBLE) rtx with the value
2117    VALUE << BITPOS.  */
2118
2119 static rtx
2120 lshift_value (machine_mode mode, unsigned HOST_WIDE_INT value,
2121               int bitpos)
2122 {
2123   return immed_wide_int_const (wi::lshift (value, bitpos), mode);
2124 }
2125 \f
2126 /* Extract a bit field that is split across two words
2127    and return an RTX for the result.
2128
2129    OP0 is the REG, SUBREG or MEM rtx for the first of the two words.
2130    BITSIZE is the field width; BITPOS, position of its first bit, in the word.
2131    UNSIGNEDP is 1 if should zero-extend the contents; else sign-extend.
2132    If OP0_MODE is defined, it is the mode of OP0, otherwise OP0 is
2133    a BLKmode MEM.
2134
2135    If REVERSE is true, the extraction is to be done in reverse order.  */
2136
2137 static rtx
2138 extract_split_bit_field (rtx op0, opt_scalar_int_mode op0_mode,
2139                          unsigned HOST_WIDE_INT bitsize,
2140                          unsigned HOST_WIDE_INT bitpos, int unsignedp,
2141                          bool reverse)
2142 {
2143   unsigned int unit;
2144   unsigned int bitsdone = 0;
2145   rtx result = NULL_RTX;
2146   int first = 1;
2147
2148   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
2149      much at a time.  */
2150   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
2151     unit = BITS_PER_WORD;
2152   else
2153     unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
2154
2155   while (bitsdone < bitsize)
2156     {
2157       unsigned HOST_WIDE_INT thissize;
2158       rtx part;
2159       unsigned HOST_WIDE_INT thispos;
2160       unsigned HOST_WIDE_INT offset;
2161
2162       offset = (bitpos + bitsdone) / unit;
2163       thispos = (bitpos + bitsdone) % unit;
2164
2165       /* THISSIZE must not overrun a word boundary.  Otherwise,
2166          extract_fixed_bit_field will call us again, and we will mutually
2167          recurse forever.  */
2168       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
2169       thissize = MIN (thissize, unit - thispos);
2170
2171       /* If OP0 is a register, then handle OFFSET here.  */
2172       rtx op0_piece = op0;
2173       opt_scalar_int_mode op0_piece_mode = op0_mode;
2174       if (SUBREG_P (op0) || REG_P (op0))
2175         {
2176           op0_piece = operand_subword_force (op0, offset, op0_mode.require ());
2177           op0_piece_mode = word_mode;
2178           offset = 0;
2179         }
2180
2181       /* Extract the parts in bit-counting order,
2182          whose meaning is determined by BYTES_PER_UNIT.
2183          OFFSET is in UNITs, and UNIT is in bits.  */
2184       part = extract_fixed_bit_field (word_mode, op0_piece, op0_piece_mode,
2185                                       thissize, offset * unit + thispos,
2186                                       0, 1, reverse);
2187       bitsdone += thissize;
2188
2189       /* Shift this part into place for the result.  */
2190       if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
2191         {
2192           if (bitsize != bitsdone)
2193             part = expand_shift (LSHIFT_EXPR, word_mode, part,
2194                                  bitsize - bitsdone, 0, 1);
2195         }
2196       else
2197         {
2198           if (bitsdone != thissize)
2199             part = expand_shift (LSHIFT_EXPR, word_mode, part,
2200                                  bitsdone - thissize, 0, 1);
2201         }
2202
2203       if (first)
2204         result = part;
2205       else
2206         /* Combine the parts with bitwise or.  This works
2207            because we extracted each part as an unsigned bit field.  */
2208         result = expand_binop (word_mode, ior_optab, part, result, NULL_RTX, 1,
2209                                OPTAB_LIB_WIDEN);
2210
2211       first = 0;
2212     }
2213
2214   /* Unsigned bit field: we are done.  */
2215   if (unsignedp)
2216     return result;
2217   /* Signed bit field: sign-extend with two arithmetic shifts.  */
2218   result = expand_shift (LSHIFT_EXPR, word_mode, result,
2219                          BITS_PER_WORD - bitsize, NULL_RTX, 0);
2220   return expand_shift (RSHIFT_EXPR, word_mode, result,
2221                        BITS_PER_WORD - bitsize, NULL_RTX, 0);
2222 }
2223 \f
2224 /* Try to read the low bits of SRC as an rvalue of mode MODE, preserving
2225    the bit pattern.  SRC_MODE is the mode of SRC; if this is smaller than
2226    MODE, fill the upper bits with zeros.  Fail if the layout of either
2227    mode is unknown (as for CC modes) or if the extraction would involve
2228    unprofitable mode punning.  Return the value on success, otherwise
2229    return null.
2230
2231    This is different from gen_lowpart* in these respects:
2232
2233      - the returned value must always be considered an rvalue
2234
2235      - when MODE is wider than SRC_MODE, the extraction involves
2236        a zero extension
2237
2238      - when MODE is smaller than SRC_MODE, the extraction involves
2239        a truncation (and is thus subject to TRULY_NOOP_TRUNCATION).
2240
2241    In other words, this routine performs a computation, whereas the
2242    gen_lowpart* routines are conceptually lvalue or rvalue subreg
2243    operations.  */
2244
2245 rtx
2246 extract_low_bits (machine_mode mode, machine_mode src_mode, rtx src)
2247 {
2248   scalar_int_mode int_mode, src_int_mode;
2249
2250   if (mode == src_mode)
2251     return src;
2252
2253   if (CONSTANT_P (src))
2254     {
2255       /* simplify_gen_subreg can't be used here, as if simplify_subreg
2256          fails, it will happily create (subreg (symbol_ref)) or similar
2257          invalid SUBREGs.  */
2258       unsigned int byte = subreg_lowpart_offset (mode, src_mode);
2259       rtx ret = simplify_subreg (mode, src, src_mode, byte);
2260       if (ret)
2261         return ret;
2262
2263       if (GET_MODE (src) == VOIDmode
2264           || !validate_subreg (mode, src_mode, src, byte))
2265         return NULL_RTX;
2266
2267       src = force_reg (GET_MODE (src), src);
2268       return gen_rtx_SUBREG (mode, src, byte);
2269     }
2270
2271   if (GET_MODE_CLASS (mode) == MODE_CC || GET_MODE_CLASS (src_mode) == MODE_CC)
2272     return NULL_RTX;
2273
2274   if (GET_MODE_BITSIZE (mode) == GET_MODE_BITSIZE (src_mode)
2275       && MODES_TIEABLE_P (mode, src_mode))
2276     {
2277       rtx x = gen_lowpart_common (mode, src);
2278       if (x)
2279         return x;
2280     }
2281
2282   if (!int_mode_for_mode (src_mode).exists (&src_int_mode)
2283       || !int_mode_for_mode (mode).exists (&int_mode))
2284     return NULL_RTX;
2285
2286   if (!MODES_TIEABLE_P (src_int_mode, src_mode))
2287     return NULL_RTX;
2288   if (!MODES_TIEABLE_P (int_mode, mode))
2289     return NULL_RTX;
2290
2291   src = gen_lowpart (src_int_mode, src);
2292   src = convert_modes (int_mode, src_int_mode, src, true);
2293   src = gen_lowpart (mode, src);
2294   return src;
2295 }
2296 \f
2297 /* Add INC into TARGET.  */
2298
2299 void
2300 expand_inc (rtx target, rtx inc)
2301 {
2302   rtx value = expand_binop (GET_MODE (target), add_optab,
2303                             target, inc,
2304                             target, 0, OPTAB_LIB_WIDEN);
2305   if (value != target)
2306     emit_move_insn (target, value);
2307 }
2308
2309 /* Subtract DEC from TARGET.  */
2310
2311 void
2312 expand_dec (rtx target, rtx dec)
2313 {
2314   rtx value = expand_binop (GET_MODE (target), sub_optab,
2315                             target, dec,
2316                             target, 0, OPTAB_LIB_WIDEN);
2317   if (value != target)
2318     emit_move_insn (target, value);
2319 }
2320 \f
2321 /* Output a shift instruction for expression code CODE,
2322    with SHIFTED being the rtx for the value to shift,
2323    and AMOUNT the rtx for the amount to shift by.
2324    Store the result in the rtx TARGET, if that is convenient.
2325    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2326    Return the rtx for where the value is.
2327    If that cannot be done, abort the compilation unless MAY_FAIL is true,
2328    in which case 0 is returned.  */
2329
2330 static rtx
2331 expand_shift_1 (enum tree_code code, machine_mode mode, rtx shifted,
2332                 rtx amount, rtx target, int unsignedp, bool may_fail = false)
2333 {
2334   rtx op1, temp = 0;
2335   int left = (code == LSHIFT_EXPR || code == LROTATE_EXPR);
2336   int rotate = (code == LROTATE_EXPR || code == RROTATE_EXPR);
2337   optab lshift_optab = ashl_optab;
2338   optab rshift_arith_optab = ashr_optab;
2339   optab rshift_uns_optab = lshr_optab;
2340   optab lrotate_optab = rotl_optab;
2341   optab rrotate_optab = rotr_optab;
2342   machine_mode op1_mode;
2343   machine_mode scalar_mode = mode;
2344   int attempt;
2345   bool speed = optimize_insn_for_speed_p ();
2346
2347   if (VECTOR_MODE_P (mode))
2348     scalar_mode = GET_MODE_INNER (mode);
2349   op1 = amount;
2350   op1_mode = GET_MODE (op1);
2351
2352   /* Determine whether the shift/rotate amount is a vector, or scalar.  If the
2353      shift amount is a vector, use the vector/vector shift patterns.  */
2354   if (VECTOR_MODE_P (mode) && VECTOR_MODE_P (op1_mode))
2355     {
2356       lshift_optab = vashl_optab;
2357       rshift_arith_optab = vashr_optab;
2358       rshift_uns_optab = vlshr_optab;
2359       lrotate_optab = vrotl_optab;
2360       rrotate_optab = vrotr_optab;
2361     }
2362
2363   /* Previously detected shift-counts computed by NEGATE_EXPR
2364      and shifted in the other direction; but that does not work
2365      on all machines.  */
2366
2367   if (SHIFT_COUNT_TRUNCATED)
2368     {
2369       if (CONST_INT_P (op1)
2370           && ((unsigned HOST_WIDE_INT) INTVAL (op1) >=
2371               (unsigned HOST_WIDE_INT) GET_MODE_BITSIZE (scalar_mode)))
2372         op1 = GEN_INT ((unsigned HOST_WIDE_INT) INTVAL (op1)
2373                        % GET_MODE_BITSIZE (scalar_mode));
2374       else if (GET_CODE (op1) == SUBREG
2375                && subreg_lowpart_p (op1)
2376                && SCALAR_INT_MODE_P (GET_MODE (SUBREG_REG (op1)))
2377                && SCALAR_INT_MODE_P (GET_MODE (op1)))
2378         op1 = SUBREG_REG (op1);
2379     }
2380
2381   /* Canonicalize rotates by constant amount.  If op1 is bitsize / 2,
2382      prefer left rotation, if op1 is from bitsize / 2 + 1 to
2383      bitsize - 1, use other direction of rotate with 1 .. bitsize / 2 - 1
2384      amount instead.  */
2385   if (rotate
2386       && CONST_INT_P (op1)
2387       && IN_RANGE (INTVAL (op1), GET_MODE_BITSIZE (scalar_mode) / 2 + left,
2388                    GET_MODE_BITSIZE (scalar_mode) - 1))
2389     {
2390       op1 = GEN_INT (GET_MODE_BITSIZE (scalar_mode) - INTVAL (op1));
2391       left = !left;
2392       code = left ? LROTATE_EXPR : RROTATE_EXPR;
2393     }
2394
2395   /* Rotation of 16bit values by 8 bits is effectively equivalent to a bswaphi.
2396      Note that this is not the case for bigger values.  For instance a rotation
2397      of 0x01020304 by 16 bits gives 0x03040102 which is different from
2398      0x04030201 (bswapsi).  */
2399   if (rotate
2400       && CONST_INT_P (op1)
2401       && INTVAL (op1) == BITS_PER_UNIT
2402       && GET_MODE_SIZE (scalar_mode) == 2
2403       && optab_handler (bswap_optab, HImode) != CODE_FOR_nothing)
2404     return expand_unop (HImode, bswap_optab, shifted, NULL_RTX,
2405                                   unsignedp);
2406
2407   if (op1 == const0_rtx)
2408     return shifted;
2409
2410   /* Check whether its cheaper to implement a left shift by a constant
2411      bit count by a sequence of additions.  */
2412   if (code == LSHIFT_EXPR
2413       && CONST_INT_P (op1)
2414       && INTVAL (op1) > 0
2415       && INTVAL (op1) < GET_MODE_PRECISION (scalar_mode)
2416       && INTVAL (op1) < MAX_BITS_PER_WORD
2417       && (shift_cost (speed, mode, INTVAL (op1))
2418           > INTVAL (op1) * add_cost (speed, mode))
2419       && shift_cost (speed, mode, INTVAL (op1)) != MAX_COST)
2420     {
2421       int i;
2422       for (i = 0; i < INTVAL (op1); i++)
2423         {
2424           temp = force_reg (mode, shifted);
2425           shifted = expand_binop (mode, add_optab, temp, temp, NULL_RTX,
2426                                   unsignedp, OPTAB_LIB_WIDEN);
2427         }
2428       return shifted;
2429     }
2430
2431   for (attempt = 0; temp == 0 && attempt < 3; attempt++)
2432     {
2433       enum optab_methods methods;
2434
2435       if (attempt == 0)
2436         methods = OPTAB_DIRECT;
2437       else if (attempt == 1)
2438         methods = OPTAB_WIDEN;
2439       else
2440         methods = OPTAB_LIB_WIDEN;
2441
2442       if (rotate)
2443         {
2444           /* Widening does not work for rotation.  */
2445           if (methods == OPTAB_WIDEN)
2446             continue;
2447           else if (methods == OPTAB_LIB_WIDEN)
2448             {
2449               /* If we have been unable to open-code this by a rotation,
2450                  do it as the IOR of two shifts.  I.e., to rotate A
2451                  by N bits, compute
2452                  (A << N) | ((unsigned) A >> ((-N) & (C - 1)))
2453                  where C is the bitsize of A.
2454
2455                  It is theoretically possible that the target machine might
2456                  not be able to perform either shift and hence we would
2457                  be making two libcalls rather than just the one for the
2458                  shift (similarly if IOR could not be done).  We will allow
2459                  this extremely unlikely lossage to avoid complicating the
2460                  code below.  */
2461
2462               rtx subtarget = target == shifted ? 0 : target;
2463               rtx new_amount, other_amount;
2464               rtx temp1;
2465
2466               new_amount = op1;
2467               if (op1 == const0_rtx)
2468                 return shifted;
2469               else if (CONST_INT_P (op1))
2470                 other_amount = GEN_INT (GET_MODE_BITSIZE (scalar_mode)
2471                                         - INTVAL (op1));
2472               else
2473                 {
2474                   other_amount
2475                     = simplify_gen_unary (NEG, GET_MODE (op1),
2476                                           op1, GET_MODE (op1));
2477                   HOST_WIDE_INT mask = GET_MODE_PRECISION (scalar_mode) - 1;
2478                   other_amount
2479                     = simplify_gen_binary (AND, GET_MODE (op1), other_amount,
2480                                            gen_int_mode (mask, GET_MODE (op1)));
2481                 }
2482
2483               shifted = force_reg (mode, shifted);
2484
2485               temp = expand_shift_1 (left ? LSHIFT_EXPR : RSHIFT_EXPR,
2486                                      mode, shifted, new_amount, 0, 1);
2487               temp1 = expand_shift_1 (left ? RSHIFT_EXPR : LSHIFT_EXPR,
2488                                       mode, shifted, other_amount,
2489                                       subtarget, 1);
2490               return expand_binop (mode, ior_optab, temp, temp1, target,
2491                                    unsignedp, methods);
2492             }
2493
2494           temp = expand_binop (mode,
2495                                left ? lrotate_optab : rrotate_optab,
2496                                shifted, op1, target, unsignedp, methods);
2497         }
2498       else if (unsignedp)
2499         temp = expand_binop (mode,
2500                              left ? lshift_optab : rshift_uns_optab,
2501                              shifted, op1, target, unsignedp, methods);
2502
2503       /* Do arithmetic shifts.
2504          Also, if we are going to widen the operand, we can just as well
2505          use an arithmetic right-shift instead of a logical one.  */
2506       if (temp == 0 && ! rotate
2507           && (! unsignedp || (! left && methods == OPTAB_WIDEN)))
2508         {
2509           enum optab_methods methods1 = methods;
2510
2511           /* If trying to widen a log shift to an arithmetic shift,
2512              don't accept an arithmetic shift of the same size.  */
2513           if (unsignedp)
2514             methods1 = OPTAB_MUST_WIDEN;
2515
2516           /* Arithmetic shift */
2517
2518           temp = expand_binop (mode,
2519                                left ? lshift_optab : rshift_arith_optab,
2520                                shifted, op1, target, unsignedp, methods1);
2521         }
2522
2523       /* We used to try extzv here for logical right shifts, but that was
2524          only useful for one machine, the VAX, and caused poor code
2525          generation there for lshrdi3, so the code was deleted and a
2526          define_expand for lshrsi3 was added to vax.md.  */
2527     }
2528
2529   gcc_assert (temp != NULL_RTX || may_fail);
2530   return temp;
2531 }
2532
2533 /* Output a shift instruction for expression code CODE,
2534    with SHIFTED being the rtx for the value to shift,
2535    and AMOUNT the amount to shift by.
2536    Store the result in the rtx TARGET, if that is convenient.
2537    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2538    Return the rtx for where the value is.  */
2539
2540 rtx
2541 expand_shift (enum tree_code code, machine_mode mode, rtx shifted,
2542               int amount, rtx target, int unsignedp)
2543 {
2544   return expand_shift_1 (code, mode,
2545                          shifted, GEN_INT (amount), target, unsignedp);
2546 }
2547
2548 /* Likewise, but return 0 if that cannot be done.  */
2549
2550 static rtx
2551 maybe_expand_shift (enum tree_code code, machine_mode mode, rtx shifted,
2552                     int amount, rtx target, int unsignedp)
2553 {
2554   return expand_shift_1 (code, mode,
2555                          shifted, GEN_INT (amount), target, unsignedp, true);
2556 }
2557
2558 /* Output a shift instruction for expression code CODE,
2559    with SHIFTED being the rtx for the value to shift,
2560    and AMOUNT the tree for the amount to shift by.
2561    Store the result in the rtx TARGET, if that is convenient.
2562    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2563    Return the rtx for where the value is.  */
2564
2565 rtx
2566 expand_variable_shift (enum tree_code code, machine_mode mode, rtx shifted,
2567                        tree amount, rtx target, int unsignedp)
2568 {
2569   return expand_shift_1 (code, mode,
2570                          shifted, expand_normal (amount), target, unsignedp);
2571 }
2572
2573 \f
2574 static void synth_mult (struct algorithm *, unsigned HOST_WIDE_INT,
2575                         const struct mult_cost *, machine_mode mode);
2576 static rtx expand_mult_const (machine_mode, rtx, HOST_WIDE_INT, rtx,
2577                               const struct algorithm *, enum mult_variant);
2578 static unsigned HOST_WIDE_INT invert_mod2n (unsigned HOST_WIDE_INT, int);
2579 static rtx extract_high_half (scalar_int_mode, rtx);
2580 static rtx expmed_mult_highpart (scalar_int_mode, rtx, rtx, rtx, int, int);
2581 static rtx expmed_mult_highpart_optab (scalar_int_mode, rtx, rtx, rtx,
2582                                        int, int);
2583 /* Compute and return the best algorithm for multiplying by T.
2584    The algorithm must cost less than cost_limit
2585    If retval.cost >= COST_LIMIT, no algorithm was found and all
2586    other field of the returned struct are undefined.
2587    MODE is the machine mode of the multiplication.  */
2588
2589 static void
2590 synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
2591             const struct mult_cost *cost_limit, machine_mode mode)
2592 {
2593   int m;
2594   struct algorithm *alg_in, *best_alg;
2595   struct mult_cost best_cost;
2596   struct mult_cost new_limit;
2597   int op_cost, op_latency;
2598   unsigned HOST_WIDE_INT orig_t = t;
2599   unsigned HOST_WIDE_INT q;
2600   int maxm, hash_index;
2601   bool cache_hit = false;
2602   enum alg_code cache_alg = alg_zero;
2603   bool speed = optimize_insn_for_speed_p ();
2604   scalar_int_mode imode;
2605   struct alg_hash_entry *entry_ptr;
2606
2607   /* Indicate that no algorithm is yet found.  If no algorithm
2608      is found, this value will be returned and indicate failure.  */
2609   alg_out->cost.cost = cost_limit->cost + 1;
2610   alg_out->cost.latency = cost_limit->latency + 1;
2611
2612   if (cost_limit->cost < 0
2613       || (cost_limit->cost == 0 && cost_limit->latency <= 0))
2614     return;
2615
2616   /* Be prepared for vector modes.  */
2617   imode = as_a <scalar_int_mode> (GET_MODE_INNER (mode));
2618
2619   maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (imode));
2620
2621   /* Restrict the bits of "t" to the multiplication's mode.  */
2622   t &= GET_MODE_MASK (imode);
2623
2624   /* t == 1 can be done in zero cost.  */
2625   if (t == 1)
2626     {
2627       alg_out->ops = 1;
2628       alg_out->cost.cost = 0;
2629       alg_out->cost.latency = 0;
2630       alg_out->op[0] = alg_m;
2631       return;
2632     }
2633
2634   /* t == 0 sometimes has a cost.  If it does and it exceeds our limit,
2635      fail now.  */
2636   if (t == 0)
2637     {
2638       if (MULT_COST_LESS (cost_limit, zero_cost (speed)))
2639         return;
2640       else
2641         {
2642           alg_out->ops = 1;
2643           alg_out->cost.cost = zero_cost (speed);
2644           alg_out->cost.latency = zero_cost (speed);
2645           alg_out->op[0] = alg_zero;
2646           return;
2647         }
2648     }
2649
2650   /* We'll be needing a couple extra algorithm structures now.  */
2651
2652   alg_in = XALLOCA (struct algorithm);
2653   best_alg = XALLOCA (struct algorithm);
2654   best_cost = *cost_limit;
2655
2656   /* Compute the hash index.  */
2657   hash_index = (t ^ (unsigned int) mode ^ (speed * 256)) % NUM_ALG_HASH_ENTRIES;
2658
2659   /* See if we already know what to do for T.  */
2660   entry_ptr = alg_hash_entry_ptr (hash_index);
2661   if (entry_ptr->t == t
2662       && entry_ptr->mode == mode
2663       && entry_ptr->speed == speed
2664       && entry_ptr->alg != alg_unknown)
2665     {
2666       cache_alg = entry_ptr->alg;
2667
2668       if (cache_alg == alg_impossible)
2669         {
2670           /* The cache tells us that it's impossible to synthesize
2671              multiplication by T within entry_ptr->cost.  */
2672           if (!CHEAPER_MULT_COST (&entry_ptr->cost, cost_limit))
2673             /* COST_LIMIT is at least as restrictive as the one
2674                recorded in the hash table, in which case we have no
2675                hope of synthesizing a multiplication.  Just
2676                return.  */
2677             return;
2678
2679           /* If we get here, COST_LIMIT is less restrictive than the
2680              one recorded in the hash table, so we may be able to
2681              synthesize a multiplication.  Proceed as if we didn't
2682              have the cache entry.  */
2683         }
2684       else
2685         {
2686           if (CHEAPER_MULT_COST (cost_limit, &entry_ptr->cost))
2687             /* The cached algorithm shows that this multiplication
2688                requires more cost than COST_LIMIT.  Just return.  This
2689                way, we don't clobber this cache entry with
2690                alg_impossible but retain useful information.  */
2691             return;
2692
2693           cache_hit = true;
2694
2695           switch (cache_alg)
2696             {
2697             case alg_shift:
2698               goto do_alg_shift;
2699
2700             case alg_add_t_m2:
2701             case alg_sub_t_m2:
2702               goto do_alg_addsub_t_m2;
2703
2704             case alg_add_factor:
2705             case alg_sub_factor:
2706               goto do_alg_addsub_factor;
2707
2708             case alg_add_t2_m:
2709               goto do_alg_add_t2_m;
2710
2711             case alg_sub_t2_m:
2712               goto do_alg_sub_t2_m;
2713
2714             default:
2715               gcc_unreachable ();
2716             }
2717         }
2718     }
2719
2720   /* If we have a group of zero bits at the low-order part of T, try
2721      multiplying by the remaining bits and then doing a shift.  */
2722
2723   if ((t & 1) == 0)
2724     {
2725     do_alg_shift:
2726       m = ctz_or_zero (t); /* m = number of low zero bits */
2727       if (m < maxm)
2728         {
2729           q = t >> m;
2730           /* The function expand_shift will choose between a shift and
2731              a sequence of additions, so the observed cost is given as
2732              MIN (m * add_cost(speed, mode), shift_cost(speed, mode, m)).  */
2733           op_cost = m * add_cost (speed, mode);
2734           if (shift_cost (speed, mode, m) < op_cost)
2735             op_cost = shift_cost (speed, mode, m);
2736           new_limit.cost = best_cost.cost - op_cost;
2737           new_limit.latency = best_cost.latency - op_cost;
2738           synth_mult (alg_in, q, &new_limit, mode);
2739
2740           alg_in->cost.cost += op_cost;
2741           alg_in->cost.latency += op_cost;
2742           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2743             {
2744               best_cost = alg_in->cost;
2745               std::swap (alg_in, best_alg);
2746               best_alg->log[best_alg->ops] = m;
2747               best_alg->op[best_alg->ops] = alg_shift;
2748             }
2749
2750           /* See if treating ORIG_T as a signed number yields a better
2751              sequence.  Try this sequence only for a negative ORIG_T
2752              as it would be useless for a non-negative ORIG_T.  */
2753           if ((HOST_WIDE_INT) orig_t < 0)
2754             {
2755               /* Shift ORIG_T as follows because a right shift of a
2756                  negative-valued signed type is implementation
2757                  defined.  */
2758               q = ~(~orig_t >> m);
2759               /* The function expand_shift will choose between a shift
2760                  and a sequence of additions, so the observed cost is
2761                  given as MIN (m * add_cost(speed, mode),
2762                  shift_cost(speed, mode, m)).  */
2763               op_cost = m * add_cost (speed, mode);
2764               if (shift_cost (speed, mode, m) < op_cost)
2765                 op_cost = shift_cost (speed, mode, m);
2766               new_limit.cost = best_cost.cost - op_cost;
2767               new_limit.latency = best_cost.latency - op_cost;
2768               synth_mult (alg_in, q, &new_limit, mode);
2769
2770               alg_in->cost.cost += op_cost;
2771               alg_in->cost.latency += op_cost;
2772               if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2773                 {
2774                   best_cost = alg_in->cost;
2775                   std::swap (alg_in, best_alg);
2776                   best_alg->log[best_alg->ops] = m;
2777                   best_alg->op[best_alg->ops] = alg_shift;
2778                 }
2779             }
2780         }
2781       if (cache_hit)
2782         goto done;
2783     }
2784
2785   /* If we have an odd number, add or subtract one.  */
2786   if ((t & 1) != 0)
2787     {
2788       unsigned HOST_WIDE_INT w;
2789
2790     do_alg_addsub_t_m2:
2791       for (w = 1; (w & t) != 0; w <<= 1)
2792         ;
2793       /* If T was -1, then W will be zero after the loop.  This is another
2794          case where T ends with ...111.  Handling this with (T + 1) and
2795          subtract 1 produces slightly better code and results in algorithm
2796          selection much faster than treating it like the ...0111 case
2797          below.  */
2798       if (w == 0
2799           || (w > 2
2800               /* Reject the case where t is 3.
2801                  Thus we prefer addition in that case.  */
2802               && t != 3))
2803         {
2804           /* T ends with ...111.  Multiply by (T + 1) and subtract T.  */
2805
2806           op_cost = add_cost (speed, mode);
2807           new_limit.cost = best_cost.cost - op_cost;
2808           new_limit.latency = best_cost.latency - op_cost;
2809           synth_mult (alg_in, t + 1, &new_limit, mode);
2810
2811           alg_in->cost.cost += op_cost;
2812           alg_in->cost.latency += op_cost;
2813           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2814             {
2815               best_cost = alg_in->cost;
2816               std::swap (alg_in, best_alg);
2817               best_alg->log[best_alg->ops] = 0;
2818               best_alg->op[best_alg->ops] = alg_sub_t_m2;
2819             }
2820         }
2821       else
2822         {
2823           /* T ends with ...01 or ...011.  Multiply by (T - 1) and add T.  */
2824
2825           op_cost = add_cost (speed, mode);
2826           new_limit.cost = best_cost.cost - op_cost;
2827           new_limit.latency = best_cost.latency - op_cost;
2828           synth_mult (alg_in, t - 1, &new_limit, mode);
2829
2830           alg_in->cost.cost += op_cost;
2831           alg_in->cost.latency += op_cost;
2832           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2833             {
2834               best_cost = alg_in->cost;
2835               std::swap (alg_in, best_alg);
2836               best_alg->log[best_alg->ops] = 0;
2837               best_alg->op[best_alg->ops] = alg_add_t_m2;
2838             }
2839         }
2840
2841       /* We may be able to calculate a * -7, a * -15, a * -31, etc
2842          quickly with a - a * n for some appropriate constant n.  */
2843       m = exact_log2 (-orig_t + 1);
2844       if (m >= 0 && m < maxm)
2845         {
2846           op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
2847           /* If the target has a cheap shift-and-subtract insn use
2848              that in preference to a shift insn followed by a sub insn.
2849              Assume that the shift-and-sub is "atomic" with a latency
2850              equal to it's cost, otherwise assume that on superscalar
2851              hardware the shift may be executed concurrently with the
2852              earlier steps in the algorithm.  */
2853           if (shiftsub1_cost (speed, mode, m) <= op_cost)
2854             {
2855               op_cost = shiftsub1_cost (speed, mode, m);
2856               op_latency = op_cost;
2857             }
2858           else
2859             op_latency = add_cost (speed, mode);
2860
2861           new_limit.cost = best_cost.cost - op_cost;
2862           new_limit.latency = best_cost.latency - op_latency;
2863           synth_mult (alg_in, (unsigned HOST_WIDE_INT) (-orig_t + 1) >> m,
2864                       &new_limit, mode);
2865
2866           alg_in->cost.cost += op_cost;
2867           alg_in->cost.latency += op_latency;
2868           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2869             {
2870               best_cost = alg_in->cost;
2871               std::swap (alg_in, best_alg);
2872               best_alg->log[best_alg->ops] = m;
2873               best_alg->op[best_alg->ops] = alg_sub_t_m2;
2874             }
2875         }
2876
2877       if (cache_hit)
2878         goto done;
2879     }
2880
2881   /* Look for factors of t of the form
2882      t = q(2**m +- 1), 2 <= m <= floor(log2(t - 1)).
2883      If we find such a factor, we can multiply by t using an algorithm that
2884      multiplies by q, shift the result by m and add/subtract it to itself.
2885
2886      We search for large factors first and loop down, even if large factors
2887      are less probable than small; if we find a large factor we will find a
2888      good sequence quickly, and therefore be able to prune (by decreasing
2889      COST_LIMIT) the search.  */
2890
2891  do_alg_addsub_factor:
2892   for (m = floor_log2 (t - 1); m >= 2; m--)
2893     {
2894       unsigned HOST_WIDE_INT d;
2895
2896       d = (HOST_WIDE_INT_1U << m) + 1;
2897       if (t % d == 0 && t > d && m < maxm
2898           && (!cache_hit || cache_alg == alg_add_factor))
2899         {
2900           op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
2901           if (shiftadd_cost (speed, mode, m) <= op_cost)
2902             op_cost = shiftadd_cost (speed, mode, m);
2903
2904           op_latency = op_cost;
2905
2906
2907           new_limit.cost = best_cost.cost - op_cost;
2908           new_limit.latency = best_cost.latency - op_latency;
2909           synth_mult (alg_in, t / d, &new_limit, mode);
2910
2911           alg_in->cost.cost += op_cost;
2912           alg_in->cost.latency += op_latency;
2913           if (alg_in->cost.latency < op_cost)
2914             alg_in->cost.latency = op_cost;
2915           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2916             {
2917               best_cost = alg_in->cost;
2918               std::swap (alg_in, best_alg);
2919               best_alg->log[best_alg->ops] = m;
2920               best_alg->op[best_alg->ops] = alg_add_factor;
2921             }
2922           /* Other factors will have been taken care of in the recursion.  */
2923           break;
2924         }
2925
2926       d = (HOST_WIDE_INT_1U << m) - 1;
2927       if (t % d == 0 && t > d && m < maxm
2928           && (!cache_hit || cache_alg == alg_sub_factor))
2929         {
2930           op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
2931           if (shiftsub0_cost (speed, mode, m) <= op_cost)
2932             op_cost = shiftsub0_cost (speed, mode, m);
2933
2934           op_latency = op_cost;
2935
2936           new_limit.cost = best_cost.cost - op_cost;
2937           new_limit.latency = best_cost.latency - op_latency;
2938           synth_mult (alg_in, t / d, &new_limit, mode);
2939
2940           alg_in->cost.cost += op_cost;
2941           alg_in->cost.latency += op_latency;
2942           if (alg_in->cost.latency < op_cost)
2943             alg_in->cost.latency = op_cost;
2944           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2945             {
2946               best_cost = alg_in->cost;
2947               std::swap (alg_in, best_alg);
2948               best_alg->log[best_alg->ops] = m;
2949               best_alg->op[best_alg->ops] = alg_sub_factor;
2950             }
2951           break;
2952         }
2953     }
2954   if (cache_hit)
2955     goto done;
2956
2957   /* Try shift-and-add (load effective address) instructions,
2958      i.e. do a*3, a*5, a*9.  */
2959   if ((t & 1) != 0)
2960     {
2961     do_alg_add_t2_m:
2962       q = t - 1;
2963       m = ctz_hwi (q);
2964       if (q && m < maxm)
2965         {
2966           op_cost = shiftadd_cost (speed, mode, m);
2967           new_limit.cost = best_cost.cost - op_cost;
2968           new_limit.latency = best_cost.latency - op_cost;
2969           synth_mult (alg_in, (t - 1) >> m, &new_limit, mode);
2970
2971           alg_in->cost.cost += op_cost;
2972           alg_in->cost.latency += op_cost;
2973           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2974             {
2975               best_cost = alg_in->cost;
2976               std::swap (alg_in, best_alg);
2977               best_alg->log[best_alg->ops] = m;
2978               best_alg->op[best_alg->ops] = alg_add_t2_m;
2979             }
2980         }
2981       if (cache_hit)
2982         goto done;
2983
2984     do_alg_sub_t2_m:
2985       q = t + 1;
2986       m = ctz_hwi (q);
2987       if (q && m < maxm)
2988         {
2989           op_cost = shiftsub0_cost (speed, mode, m);
2990           new_limit.cost = best_cost.cost - op_cost;
2991           new_limit.latency = best_cost.latency - op_cost;
2992           synth_mult (alg_in, (t + 1) >> m, &new_limit, mode);
2993
2994           alg_in->cost.cost += op_cost;
2995           alg_in->cost.latency += op_cost;
2996           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2997             {
2998               best_cost = alg_in->cost;
2999               std::swap (alg_in, best_alg);
3000               best_alg->log[best_alg->ops] = m;
3001               best_alg->op[best_alg->ops] = alg_sub_t2_m;
3002             }
3003         }
3004       if (cache_hit)
3005         goto done;
3006     }
3007
3008  done:
3009   /* If best_cost has not decreased, we have not found any algorithm.  */
3010   if (!CHEAPER_MULT_COST (&best_cost, cost_limit))
3011     {
3012       /* We failed to find an algorithm.  Record alg_impossible for
3013          this case (that is, <T, MODE, COST_LIMIT>) so that next time
3014          we are asked to find an algorithm for T within the same or
3015          lower COST_LIMIT, we can immediately return to the
3016          caller.  */
3017       entry_ptr->t = t;
3018       entry_ptr->mode = mode;
3019       entry_ptr->speed = speed;
3020       entry_ptr->alg = alg_impossible;
3021       entry_ptr->cost = *cost_limit;
3022       return;
3023     }
3024
3025   /* Cache the result.  */
3026   if (!cache_hit)
3027     {
3028       entry_ptr->t = t;
3029       entry_ptr->mode = mode;
3030       entry_ptr->speed = speed;
3031       entry_ptr->alg = best_alg->op[best_alg->ops];
3032       entry_ptr->cost.cost = best_cost.cost;
3033       entry_ptr->cost.latency = best_cost.latency;
3034     }
3035
3036   /* If we are getting a too long sequence for `struct algorithm'
3037      to record, make this search fail.  */
3038   if (best_alg->ops == MAX_BITS_PER_WORD)
3039     return;
3040
3041   /* Copy the algorithm from temporary space to the space at alg_out.
3042      We avoid using structure assignment because the majority of
3043      best_alg is normally undefined, and this is a critical function.  */
3044   alg_out->ops = best_alg->ops + 1;
3045   alg_out->cost = best_cost;
3046   memcpy (alg_out->op, best_alg->op,
3047           alg_out->ops * sizeof *alg_out->op);
3048   memcpy (alg_out->log, best_alg->log,
3049           alg_out->ops * sizeof *alg_out->log);
3050 }
3051 \f
3052 /* Find the cheapest way of multiplying a value of mode MODE by VAL.
3053    Try three variations:
3054
3055        - a shift/add sequence based on VAL itself
3056        - a shift/add sequence based on -VAL, followed by a negation
3057        - a shift/add sequence based on VAL - 1, followed by an addition.
3058
3059    Return true if the cheapest of these cost less than MULT_COST,
3060    describing the algorithm in *ALG and final fixup in *VARIANT.  */
3061
3062 bool
3063 choose_mult_variant (machine_mode mode, HOST_WIDE_INT val,
3064                      struct algorithm *alg, enum mult_variant *variant,
3065                      int mult_cost)
3066 {
3067   struct algorithm alg2;
3068   struct mult_cost limit;
3069   int op_cost;
3070   bool speed = optimize_insn_for_speed_p ();
3071
3072   /* Fail quickly for impossible bounds.  */
3073   if (mult_cost < 0)
3074     return false;
3075
3076   /* Ensure that mult_cost provides a reasonable upper bound.
3077      Any constant multiplication can be performed with less
3078      than 2 * bits additions.  */
3079   op_cost = 2 * GET_MODE_UNIT_BITSIZE (mode) * add_cost (speed, mode);
3080   if (mult_cost > op_cost)
3081     mult_cost = op_cost;
3082
3083   *variant = basic_variant;
3084   limit.cost = mult_cost;
3085   limit.latency = mult_cost;
3086   synth_mult (alg, val, &limit, mode);
3087
3088   /* This works only if the inverted value actually fits in an
3089      `unsigned int' */
3090   if (HOST_BITS_PER_INT >= GET_MODE_UNIT_BITSIZE (mode))
3091     {
3092       op_cost = neg_cost (speed, mode);
3093       if (MULT_COST_LESS (&alg->cost, mult_cost))
3094         {
3095           limit.cost = alg->cost.cost - op_cost;
3096           limit.latency = alg->cost.latency - op_cost;
3097         }
3098       else
3099         {
3100           limit.cost = mult_cost - op_cost;
3101           limit.latency = mult_cost - op_cost;
3102         }
3103
3104       synth_mult (&alg2, -val, &limit, mode);
3105       alg2.cost.cost += op_cost;
3106       alg2.cost.latency += op_cost;
3107       if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
3108         *alg = alg2, *variant = negate_variant;
3109     }
3110
3111   /* This proves very useful for division-by-constant.  */
3112   op_cost = add_cost (speed, mode);
3113   if (MULT_COST_LESS (&alg->cost, mult_cost))
3114     {
3115       limit.cost = alg->cost.cost - op_cost;
3116       limit.latency = alg->cost.latency - op_cost;
3117     }
3118   else
3119     {
3120       limit.cost = mult_cost - op_cost;
3121       limit.latency = mult_cost - op_cost;
3122     }
3123
3124   synth_mult (&alg2, val - 1, &limit, mode);
3125   alg2.cost.cost += op_cost;
3126   alg2.cost.latency += op_cost;
3127   if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
3128     *alg = alg2, *variant = add_variant;
3129
3130   return MULT_COST_LESS (&alg->cost, mult_cost);
3131 }
3132
3133 /* A subroutine of expand_mult, used for constant multiplications.
3134    Multiply OP0 by VAL in mode MODE, storing the result in TARGET if
3135    convenient.  Use the shift/add sequence described by ALG and apply
3136    the final fixup specified by VARIANT.  */
3137
3138 static rtx
3139 expand_mult_const (machine_mode mode, rtx op0, HOST_WIDE_INT val,
3140                    rtx target, const struct algorithm *alg,
3141                    enum mult_variant variant)
3142 {
3143   unsigned HOST_WIDE_INT val_so_far;
3144   rtx_insn *insn;
3145   rtx accum, tem;
3146   int opno;
3147   machine_mode nmode;
3148
3149   /* Avoid referencing memory over and over and invalid sharing
3150      on SUBREGs.  */
3151   op0 = force_reg (mode, op0);
3152
3153   /* ACCUM starts out either as OP0 or as a zero, depending on
3154      the first operation.  */
3155
3156   if (alg->op[0] == alg_zero)
3157     {
3158       accum = copy_to_mode_reg (mode, CONST0_RTX (mode));
3159       val_so_far = 0;
3160     }
3161   else if (alg->op[0] == alg_m)
3162     {
3163       accum = copy_to_mode_reg (mode, op0);
3164       val_so_far = 1;
3165     }
3166   else
3167     gcc_unreachable ();
3168
3169   for (opno = 1; opno < alg->ops; opno++)
3170     {
3171       int log = alg->log[opno];
3172       rtx shift_subtarget = optimize ? 0 : accum;
3173       rtx add_target
3174         = (opno == alg->ops - 1 && target != 0 && variant != add_variant
3175            && !optimize)
3176           ? target : 0;
3177       rtx accum_target = optimize ? 0 : accum;
3178       rtx accum_inner;
3179
3180       switch (alg->op[opno])
3181         {
3182         case alg_shift:
3183           tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3184           /* REG_EQUAL note will be attached to the following insn.  */
3185           emit_move_insn (accum, tem);
3186           val_so_far <<= log;
3187           break;
3188
3189         case alg_add_t_m2:
3190           tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
3191           accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
3192                                  add_target ? add_target : accum_target);
3193           val_so_far += HOST_WIDE_INT_1U << log;
3194           break;
3195
3196         case alg_sub_t_m2:
3197           tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
3198           accum = force_operand (gen_rtx_MINUS (mode, accum, tem),
3199                                  add_target ? add_target : accum_target);
3200           val_so_far -= HOST_WIDE_INT_1U << log;
3201           break;
3202
3203         case alg_add_t2_m:
3204           accum = expand_shift (LSHIFT_EXPR, mode, accum,
3205                                 log, shift_subtarget, 0);
3206           accum = force_operand (gen_rtx_PLUS (mode, accum, op0),
3207                                  add_target ? add_target : accum_target);
3208           val_so_far = (val_so_far << log) + 1;
3209           break;
3210
3211         case alg_sub_t2_m:
3212           accum = expand_shift (LSHIFT_EXPR, mode, accum,
3213                                 log, shift_subtarget, 0);
3214           accum = force_operand (gen_rtx_MINUS (mode, accum, op0),
3215                                  add_target ? add_target : accum_target);
3216           val_so_far = (val_so_far << log) - 1;
3217           break;
3218
3219         case alg_add_factor:
3220           tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3221           accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
3222                                  add_target ? add_target : accum_target);
3223           val_so_far += val_so_far << log;
3224           break;
3225
3226         case alg_sub_factor:
3227           tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3228           accum = force_operand (gen_rtx_MINUS (mode, tem, accum),
3229                                  (add_target
3230                                   ? add_target : (optimize ? 0 : tem)));
3231           val_so_far = (val_so_far << log) - val_so_far;
3232           break;
3233
3234         default:
3235           gcc_unreachable ();
3236         }
3237
3238       if (SCALAR_INT_MODE_P (mode))
3239         {
3240           /* Write a REG_EQUAL note on the last insn so that we can cse
3241              multiplication sequences.  Note that if ACCUM is a SUBREG,
3242              we've set the inner register and must properly indicate that.  */
3243           tem = op0, nmode = mode;
3244           accum_inner = accum;
3245           if (GET_CODE (accum) == SUBREG)
3246             {
3247               accum_inner = SUBREG_REG (accum);
3248               nmode = GET_MODE (accum_inner);
3249               tem = gen_lowpart (nmode, op0);
3250             }
3251
3252           insn = get_last_insn ();
3253           set_dst_reg_note (insn, REG_EQUAL,
3254                             gen_rtx_MULT (nmode, tem,
3255                                           gen_int_mode (val_so_far, nmode)),
3256                             accum_inner);
3257         }
3258     }
3259
3260   if (variant == negate_variant)
3261     {
3262       val_so_far = -val_so_far;
3263       accum = expand_unop (mode, neg_optab, accum, target, 0);
3264     }
3265   else if (variant == add_variant)
3266     {
3267       val_so_far = val_so_far + 1;
3268       accum = force_operand (gen_rtx_PLUS (mode, accum, op0), target);
3269     }
3270
3271   /* Compare only the bits of val and val_so_far that are significant
3272      in the result mode, to avoid sign-/zero-extension confusion.  */
3273   nmode = GET_MODE_INNER (mode);
3274   val &= GET_MODE_MASK (nmode);
3275   val_so_far &= GET_MODE_MASK (nmode);
3276   gcc_assert (val == (HOST_WIDE_INT) val_so_far);
3277
3278   return accum;
3279 }
3280
3281 /* Perform a multiplication and return an rtx for the result.
3282    MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3283    TARGET is a suggestion for where to store the result (an rtx).
3284
3285    We check specially for a constant integer as OP1.
3286    If you want this check for OP0 as well, then before calling
3287    you should swap the two operands if OP0 would be constant.  */
3288
3289 rtx
3290 expand_mult (machine_mode mode, rtx op0, rtx op1, rtx target,
3291              int unsignedp)
3292 {
3293   enum mult_variant variant;
3294   struct algorithm algorithm;
3295   rtx scalar_op1;
3296   int max_cost;
3297   bool speed = optimize_insn_for_speed_p ();
3298   bool do_trapv = flag_trapv && SCALAR_INT_MODE_P (mode) && !unsignedp;
3299
3300   if (CONSTANT_P (op0))
3301     std::swap (op0, op1);
3302
3303   /* For vectors, there are several simplifications that can be made if
3304      all elements of the vector constant are identical.  */
3305   scalar_op1 = unwrap_const_vec_duplicate (op1);
3306
3307   if (INTEGRAL_MODE_P (mode))
3308     {
3309       rtx fake_reg;
3310       HOST_WIDE_INT coeff;
3311       bool is_neg;
3312       int mode_bitsize;
3313
3314       if (op1 == CONST0_RTX (mode))
3315         return op1;
3316       if (op1 == CONST1_RTX (mode))
3317         return op0;
3318       if (op1 == CONSTM1_RTX (mode))
3319         return expand_unop (mode, do_trapv ? negv_optab : neg_optab,
3320                             op0, target, 0);
3321
3322       if (do_trapv)
3323         goto skip_synth;
3324
3325       /* If mode is integer vector mode, check if the backend supports
3326          vector lshift (by scalar or vector) at all.  If not, we can't use
3327          synthetized multiply.  */
3328       if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
3329           && optab_handler (vashl_optab, mode) == CODE_FOR_nothing
3330           && optab_handler (ashl_optab, mode) == CODE_FOR_nothing)
3331         goto skip_synth;
3332
3333       /* These are the operations that are potentially turned into
3334          a sequence of shifts and additions.  */
3335       mode_bitsize = GET_MODE_UNIT_BITSIZE (mode);
3336
3337       /* synth_mult does an `unsigned int' multiply.  As long as the mode is
3338          less than or equal in size to `unsigned int' this doesn't matter.
3339          If the mode is larger than `unsigned int', then synth_mult works
3340          only if the constant value exactly fits in an `unsigned int' without
3341          any truncation.  This means that multiplying by negative values does
3342          not work; results are off by 2^32 on a 32 bit machine.  */
3343       if (CONST_INT_P (scalar_op1))
3344         {
3345           coeff = INTVAL (scalar_op1);
3346           is_neg = coeff < 0;
3347         }
3348 #if TARGET_SUPPORTS_WIDE_INT
3349       else if (CONST_WIDE_INT_P (scalar_op1))
3350 #else
3351       else if (CONST_DOUBLE_AS_INT_P (scalar_op1))
3352 #endif
3353         {
3354           int shift = wi::exact_log2 (rtx_mode_t (scalar_op1, mode));
3355           /* Perfect power of 2 (other than 1, which is handled above).  */
3356           if (shift > 0)
3357             return expand_shift (LSHIFT_EXPR, mode, op0,
3358                                  shift, target, unsignedp);
3359           else
3360             goto skip_synth;
3361         }
3362       else
3363         goto skip_synth;
3364
3365       /* We used to test optimize here, on the grounds that it's better to
3366          produce a smaller program when -O is not used.  But this causes
3367          such a terrible slowdown sometimes that it seems better to always
3368          use synth_mult.  */
3369
3370       /* Special case powers of two.  */
3371       if (EXACT_POWER_OF_2_OR_ZERO_P (coeff)
3372           && !(is_neg && mode_bitsize > HOST_BITS_PER_WIDE_INT))
3373         return expand_shift (LSHIFT_EXPR, mode, op0,
3374                              floor_log2 (coeff), target, unsignedp);
3375
3376       fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3377
3378       /* Attempt to handle multiplication of DImode values by negative
3379          coefficients, by performing the multiplication by a positive
3380          multiplier and then inverting the result.  */
3381       if (is_neg && mode_bitsize > HOST_BITS_PER_WIDE_INT)
3382         {
3383           /* Its safe to use -coeff even for INT_MIN, as the
3384              result is interpreted as an unsigned coefficient.
3385              Exclude cost of op0 from max_cost to match the cost
3386              calculation of the synth_mult.  */
3387           coeff = -(unsigned HOST_WIDE_INT) coeff;
3388           max_cost = (set_src_cost (gen_rtx_MULT (mode, fake_reg, op1),
3389                                     mode, speed)
3390                       - neg_cost (speed, mode));
3391           if (max_cost <= 0)
3392             goto skip_synth;
3393
3394           /* Special case powers of two.  */
3395           if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3396             {
3397               rtx temp = expand_shift (LSHIFT_EXPR, mode, op0,
3398                                        floor_log2 (coeff), target, unsignedp);
3399               return expand_unop (mode, neg_optab, temp, target, 0);
3400             }
3401
3402           if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3403                                    max_cost))
3404             {
3405               rtx temp = expand_mult_const (mode, op0, coeff, NULL_RTX,
3406                                             &algorithm, variant);
3407               return expand_unop (mode, neg_optab, temp, target, 0);
3408             }
3409           goto skip_synth;
3410         }
3411
3412       /* Exclude cost of op0 from max_cost to match the cost
3413          calculation of the synth_mult.  */
3414       max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, op1), mode, speed);
3415       if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost))
3416         return expand_mult_const (mode, op0, coeff, target,
3417                                   &algorithm, variant);
3418     }
3419  skip_synth:
3420
3421   /* Expand x*2.0 as x+x.  */
3422   if (CONST_DOUBLE_AS_FLOAT_P (scalar_op1)
3423       && real_equal (CONST_DOUBLE_REAL_VALUE (scalar_op1), &dconst2))
3424     {
3425       op0 = force_reg (GET_MODE (op0), op0);
3426       return expand_binop (mode, add_optab, op0, op0,
3427                            target, unsignedp, OPTAB_LIB_WIDEN);
3428     }
3429
3430   /* This used to use umul_optab if unsigned, but for non-widening multiply
3431      there is no difference between signed and unsigned.  */
3432   op0 = expand_binop (mode, do_trapv ? smulv_optab : smul_optab,
3433                       op0, op1, target, unsignedp, OPTAB_LIB_WIDEN);
3434   gcc_assert (op0);
3435   return op0;
3436 }
3437
3438 /* Return a cost estimate for multiplying a register by the given
3439    COEFFicient in the given MODE and SPEED.  */
3440
3441 int
3442 mult_by_coeff_cost (HOST_WIDE_INT coeff, machine_mode mode, bool speed)
3443 {
3444   int max_cost;
3445   struct algorithm algorithm;
3446   enum mult_variant variant;
3447
3448   rtx fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3449   max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, fake_reg),
3450                            mode, speed);
3451   if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost))
3452     return algorithm.cost.cost;
3453   else
3454     return max_cost;
3455 }
3456
3457 /* Perform a widening multiplication and return an rtx for the result.
3458    MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3459    TARGET is a suggestion for where to store the result (an rtx).
3460    THIS_OPTAB is the optab we should use, it must be either umul_widen_optab
3461    or smul_widen_optab.
3462
3463    We check specially for a constant integer as OP1, comparing the
3464    cost of a widening multiply against the cost of a sequence of shifts
3465    and adds.  */
3466
3467 rtx
3468 expand_widening_mult (machine_mode mode, rtx op0, rtx op1, rtx target,
3469                       int unsignedp, optab this_optab)
3470 {
3471   bool speed = optimize_insn_for_speed_p ();
3472   rtx cop1;
3473
3474   if (CONST_INT_P (op1)
3475       && GET_MODE (op0) != VOIDmode
3476       && (cop1 = convert_modes (mode, GET_MODE (op0), op1,
3477                                 this_optab == umul_widen_optab))
3478       && CONST_INT_P (cop1)
3479       && (INTVAL (cop1) >= 0
3480           || HWI_COMPUTABLE_MODE_P (mode)))
3481     {
3482       HOST_WIDE_INT coeff = INTVAL (cop1);
3483       int max_cost;
3484       enum mult_variant variant;
3485       struct algorithm algorithm;
3486
3487       if (coeff == 0)
3488         return CONST0_RTX (mode);
3489
3490       /* Special case powers of two.  */
3491       if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3492         {
3493           op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3494           return expand_shift (LSHIFT_EXPR, mode, op0,
3495                                floor_log2 (coeff), target, unsignedp);
3496         }
3497
3498       /* Exclude cost of op0 from max_cost to match the cost
3499          calculation of the synth_mult.  */
3500       max_cost = mul_widen_cost (speed, mode);
3501       if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3502                                max_cost))
3503         {
3504           op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3505           return expand_mult_const (mode, op0, coeff, target,
3506                                     &algorithm, variant);
3507         }
3508     }
3509   return expand_binop (mode, this_optab, op0, op1, target,
3510                        unsignedp, OPTAB_LIB_WIDEN);
3511 }
3512 \f
3513 /* Choose a minimal N + 1 bit approximation to 1/D that can be used to
3514    replace division by D, and put the least significant N bits of the result
3515    in *MULTIPLIER_PTR and return the most significant bit.
3516
3517    The width of operations is N (should be <= HOST_BITS_PER_WIDE_INT), the
3518    needed precision is in PRECISION (should be <= N).
3519
3520    PRECISION should be as small as possible so this function can choose
3521    multiplier more freely.
3522
3523    The rounded-up logarithm of D is placed in *lgup_ptr.  A shift count that
3524    is to be used for a final right shift is placed in *POST_SHIFT_PTR.
3525
3526    Using this function, x/D will be equal to (x * m) >> (*POST_SHIFT_PTR),
3527    where m is the full HOST_BITS_PER_WIDE_INT + 1 bit multiplier.  */
3528
3529 unsigned HOST_WIDE_INT
3530 choose_multiplier (unsigned HOST_WIDE_INT d, int n, int precision,
3531                    unsigned HOST_WIDE_INT *multiplier_ptr,
3532                    int *post_shift_ptr, int *lgup_ptr)
3533 {
3534   int lgup, post_shift;
3535   int pow, pow2;
3536
3537   /* lgup = ceil(log2(divisor)); */
3538   lgup = ceil_log2 (d);
3539
3540   gcc_assert (lgup <= n);
3541
3542   pow = n + lgup;
3543   pow2 = n + lgup - precision;
3544
3545   /* mlow = 2^(N + lgup)/d */
3546   wide_int val = wi::set_bit_in_zero (pow, HOST_BITS_PER_DOUBLE_INT);
3547   wide_int mlow = wi::udiv_trunc (val, d);
3548
3549   /* mhigh = (2^(N + lgup) + 2^(N + lgup - precision))/d */
3550   val |= wi::set_bit_in_zero (pow2, HOST_BITS_PER_DOUBLE_INT);
3551   wide_int mhigh = wi::udiv_trunc (val, d);
3552
3553   /* If precision == N, then mlow, mhigh exceed 2^N
3554      (but they do not exceed 2^(N+1)).  */
3555
3556   /* Reduce to lowest terms.  */
3557   for (post_shift = lgup; post_shift > 0; post_shift--)
3558     {
3559       unsigned HOST_WIDE_INT ml_lo = wi::extract_uhwi (mlow, 1,
3560                                                        HOST_BITS_PER_WIDE_INT);
3561       unsigned HOST_WIDE_INT mh_lo = wi::extract_uhwi (mhigh, 1,
3562                                                        HOST_BITS_PER_WIDE_INT);
3563       if (ml_lo >= mh_lo)
3564         break;
3565
3566       mlow = wi::uhwi (ml_lo, HOST_BITS_PER_DOUBLE_INT);
3567       mhigh = wi::uhwi (mh_lo, HOST_BITS_PER_DOUBLE_INT);
3568     }
3569
3570   *post_shift_ptr = post_shift;
3571   *lgup_ptr = lgup;
3572   if (n < HOST_BITS_PER_WIDE_INT)
3573     {
3574       unsigned HOST_WIDE_INT mask = (HOST_WIDE_INT_1U << n) - 1;
3575       *multiplier_ptr = mhigh.to_uhwi () & mask;
3576       return mhigh.to_uhwi () >= mask;
3577     }
3578   else
3579     {
3580       *multiplier_ptr = mhigh.to_uhwi ();
3581       return wi::extract_uhwi (mhigh, HOST_BITS_PER_WIDE_INT, 1);
3582     }
3583 }
3584
3585 /* Compute the inverse of X mod 2**n, i.e., find Y such that X * Y is
3586    congruent to 1 (mod 2**N).  */
3587
3588 static unsigned HOST_WIDE_INT
3589 invert_mod2n (unsigned HOST_WIDE_INT x, int n)
3590 {
3591   /* Solve x*y == 1 (mod 2^n), where x is odd.  Return y.  */
3592
3593   /* The algorithm notes that the choice y = x satisfies
3594      x*y == 1 mod 2^3, since x is assumed odd.
3595      Each iteration doubles the number of bits of significance in y.  */
3596
3597   unsigned HOST_WIDE_INT mask;
3598   unsigned HOST_WIDE_INT y = x;
3599   int nbit = 3;
3600
3601   mask = (n == HOST_BITS_PER_WIDE_INT
3602           ? HOST_WIDE_INT_M1U
3603           : (HOST_WIDE_INT_1U << n) - 1);
3604
3605   while (nbit < n)
3606     {
3607       y = y * (2 - x*y) & mask;         /* Modulo 2^N */
3608       nbit *= 2;
3609     }
3610   return y;
3611 }
3612
3613 /* Emit code to adjust ADJ_OPERAND after multiplication of wrong signedness
3614    flavor of OP0 and OP1.  ADJ_OPERAND is already the high half of the
3615    product OP0 x OP1.  If UNSIGNEDP is nonzero, adjust the signed product
3616    to become unsigned, if UNSIGNEDP is zero, adjust the unsigned product to
3617    become signed.
3618
3619    The result is put in TARGET if that is convenient.
3620
3621    MODE is the mode of operation.  */
3622
3623 rtx
3624 expand_mult_highpart_adjust (scalar_int_mode mode, rtx adj_operand, rtx op0,
3625                              rtx op1, rtx target, int unsignedp)
3626 {
3627   rtx tem;
3628   enum rtx_code adj_code = unsignedp ? PLUS : MINUS;
3629
3630   tem = expand_shift (RSHIFT_EXPR, mode, op0,
3631                       GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3632   tem = expand_and (mode, tem, op1, NULL_RTX);
3633   adj_operand
3634     = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3635                      adj_operand);
3636
3637   tem = expand_shift (RSHIFT_EXPR, mode, op1,
3638                       GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3639   tem = expand_and (mode, tem, op0, NULL_RTX);
3640   target = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3641                           target);
3642
3643   return target;
3644 }
3645
3646 /* Subroutine of expmed_mult_highpart.  Return the MODE high part of OP.  */
3647
3648 static rtx
3649 extract_high_half (scalar_int_mode mode, rtx op)
3650 {
3651   if (mode == word_mode)
3652     return gen_highpart (mode, op);
3653
3654   scalar_int_mode wider_mode = GET_MODE_WIDER_MODE (mode).require ();
3655
3656   op = expand_shift (RSHIFT_EXPR, wider_mode, op,
3657                      GET_MODE_BITSIZE (mode), 0, 1);
3658   return convert_modes (mode, wider_mode, op, 0);
3659 }
3660
3661 /* Like expmed_mult_highpart, but only consider using a multiplication
3662    optab.  OP1 is an rtx for the constant operand.  */
3663
3664 static rtx
3665 expmed_mult_highpart_optab (scalar_int_mode mode, rtx op0, rtx op1,
3666                             rtx target, int unsignedp, int max_cost)
3667 {
3668   rtx narrow_op1 = gen_int_mode (INTVAL (op1), mode);
3669   optab moptab;
3670   rtx tem;
3671   int size;
3672   bool speed = optimize_insn_for_speed_p ();
3673
3674   scalar_int_mode wider_mode = GET_MODE_WIDER_MODE (mode).require ();
3675
3676   size = GET_MODE_BITSIZE (mode);
3677
3678   /* Firstly, try using a multiplication insn that only generates the needed
3679      high part of the product, and in the sign flavor of unsignedp.  */
3680   if (mul_highpart_cost (speed, mode) < max_cost)
3681     {
3682       moptab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
3683       tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3684                           unsignedp, OPTAB_DIRECT);
3685       if (tem)
3686         return tem;
3687     }
3688
3689   /* Secondly, same as above, but use sign flavor opposite of unsignedp.
3690      Need to adjust the result after the multiplication.  */
3691   if (size - 1 < BITS_PER_WORD
3692       && (mul_highpart_cost (speed, mode)
3693           + 2 * shift_cost (speed, mode, size-1)
3694           + 4 * add_cost (speed, mode) < max_cost))
3695     {
3696       moptab = unsignedp ? smul_highpart_optab : umul_highpart_optab;
3697       tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3698                           unsignedp, OPTAB_DIRECT);
3699       if (tem)
3700         /* We used the wrong signedness.  Adjust the result.  */
3701         return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3702                                             tem, unsignedp);
3703     }
3704
3705   /* Try widening multiplication.  */
3706   moptab = unsignedp ? umul_widen_optab : smul_widen_optab;
3707   if (widening_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3708       && mul_widen_cost (speed, wider_mode) < max_cost)
3709     {
3710       tem = expand_binop (wider_mode, moptab, op0, narrow_op1, 0,
3711                           unsignedp, OPTAB_WIDEN);
3712       if (tem)
3713         return extract_high_half (mode, tem);
3714     }
3715
3716   /* Try widening the mode and perform a non-widening multiplication.  */
3717   if (optab_handler (smul_optab, wider_mode) != CODE_FOR_nothing
3718       && size - 1 < BITS_PER_WORD
3719       && (mul_cost (speed, wider_mode) + shift_cost (speed, mode, size-1)
3720           < max_cost))
3721     {
3722       rtx_insn *insns;
3723       rtx wop0, wop1;
3724
3725       /* We need to widen the operands, for example to ensure the
3726          constant multiplier is correctly sign or zero extended.
3727          Use a sequence to clean-up any instructions emitted by
3728          the conversions if things don't work out.  */
3729       start_sequence ();
3730       wop0 = convert_modes (wider_mode, mode, op0, unsignedp);
3731       wop1 = convert_modes (wider_mode, mode, op1, unsignedp);
3732       tem = expand_binop (wider_mode, smul_optab, wop0, wop1, 0,
3733                           unsignedp, OPTAB_WIDEN);
3734       insns = get_insns ();
3735       end_sequence ();
3736
3737       if (tem)
3738         {
3739           emit_insn (insns);
3740           return extract_high_half (mode, tem);
3741         }
3742     }
3743
3744   /* Try widening multiplication of opposite signedness, and adjust.  */
3745   moptab = unsignedp ? smul_widen_optab : umul_widen_optab;
3746   if (widening_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3747       && size - 1 < BITS_PER_WORD
3748       && (mul_widen_cost (speed, wider_mode)
3749           + 2 * shift_cost (speed, mode, size-1)
3750           + 4 * add_cost (speed, mode) < max_cost))
3751     {
3752       tem = expand_binop (wider_mode, moptab, op0, narrow_op1,
3753                           NULL_RTX, ! unsignedp, OPTAB_WIDEN);
3754       if (tem != 0)
3755         {
3756           tem = extract_high_half (mode, tem);
3757           /* We used the wrong signedness.  Adjust the result.  */
3758           return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3759                                               target, unsignedp);
3760         }
3761     }
3762
3763   return 0;
3764 }
3765
3766 /* Emit code to multiply OP0 and OP1 (where OP1 is an integer constant),
3767    putting the high half of the result in TARGET if that is convenient,
3768    and return where the result is.  If the operation can not be performed,
3769    0 is returned.
3770
3771    MODE is the mode of operation and result.
3772
3773    UNSIGNEDP nonzero means unsigned multiply.
3774
3775    MAX_COST is the total allowed cost for the expanded RTL.  */
3776
3777 static rtx
3778 expmed_mult_highpart (scalar_int_mode mode, rtx op0, rtx op1,
3779                       rtx target, int unsignedp, int max_cost)
3780 {
3781   unsigned HOST_WIDE_INT cnst1;
3782   int extra_cost;
3783   bool sign_adjust = false;
3784   enum mult_variant variant;
3785   struct algorithm alg;
3786   rtx tem;
3787   bool speed = optimize_insn_for_speed_p ();
3788
3789   /* We can't support modes wider than HOST_BITS_PER_INT.  */
3790   gcc_assert (HWI_COMPUTABLE_MODE_P (mode));
3791
3792   cnst1 = INTVAL (op1) & GET_MODE_MASK (mode);
3793
3794   /* We can't optimize modes wider than BITS_PER_WORD.
3795      ??? We might be able to perform double-word arithmetic if
3796      mode == word_mode, however all the cost calculations in
3797      synth_mult etc. assume single-word operations.  */
3798   scalar_int_mode wider_mode = GET_MODE_WIDER_MODE (mode).require ();
3799   if (GET_MODE_BITSIZE (wider_mode) > BITS_PER_WORD)
3800     return expmed_mult_highpart_optab (mode, op0, op1, target,
3801                                        unsignedp, max_cost);
3802
3803   extra_cost = shift_cost (speed, mode, GET_MODE_BITSIZE (mode) - 1);
3804
3805   /* Check whether we try to multiply by a negative constant.  */
3806   if (!unsignedp && ((cnst1 >> (GET_MODE_BITSIZE (mode) - 1)) & 1))
3807     {
3808       sign_adjust = true;
3809       extra_cost += add_cost (speed, mode);
3810     }
3811
3812   /* See whether shift/add multiplication is cheap enough.  */
3813   if (choose_mult_variant (wider_mode, cnst1, &alg, &variant,
3814                            max_cost - extra_cost))
3815     {
3816       /* See whether the specialized multiplication optabs are
3817          cheaper than the shift/add version.  */
3818       tem = expmed_mult_highpart_optab (mode, op0, op1, target, unsignedp,
3819                                         alg.cost.cost + extra_cost);
3820       if (tem)
3821         return tem;
3822
3823       tem = convert_to_mode (wider_mode, op0, unsignedp);
3824       tem = expand_mult_const (wider_mode, tem, cnst1, 0, &alg, variant);
3825       tem = extract_high_half (mode, tem);
3826
3827       /* Adjust result for signedness.  */
3828       if (sign_adjust)
3829         tem = force_operand (gen_rtx_MINUS (mode, tem, op0), tem);
3830
3831       return tem;
3832     }
3833   return expmed_mult_highpart_optab (mode, op0, op1, target,
3834                                      unsignedp, max_cost);
3835 }
3836
3837
3838 /* Expand signed modulus of OP0 by a power of two D in mode MODE.  */
3839
3840 static rtx
3841 expand_smod_pow2 (scalar_int_mode mode, rtx op0, HOST_WIDE_INT d)
3842 {
3843   rtx result, temp, shift;
3844   rtx_code_label *label;
3845   int logd;
3846   int prec = GET_MODE_PRECISION (mode);
3847
3848   logd = floor_log2 (d);
3849   result = gen_reg_rtx (mode);
3850
3851   /* Avoid conditional branches when they're expensive.  */
3852   if (BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2
3853       && optimize_insn_for_speed_p ())
3854     {
3855       rtx signmask = emit_store_flag (result, LT, op0, const0_rtx,
3856                                       mode, 0, -1);
3857       if (signmask)
3858         {
3859           HOST_WIDE_INT masklow = (HOST_WIDE_INT_1 << logd) - 1;
3860           signmask = force_reg (mode, signmask);
3861           shift = GEN_INT (GET_MODE_BITSIZE (mode) - logd);
3862
3863           /* Use the rtx_cost of a LSHIFTRT instruction to determine
3864              which instruction sequence to use.  If logical right shifts
3865              are expensive the use 2 XORs, 2 SUBs and an AND, otherwise
3866              use a LSHIFTRT, 1 ADD, 1 SUB and an AND.  */
3867
3868           temp = gen_rtx_LSHIFTRT (mode, result, shift);
3869           if (optab_handler (lshr_optab, mode) == CODE_FOR_nothing
3870               || (set_src_cost (temp, mode, optimize_insn_for_speed_p ())
3871                   > COSTS_N_INSNS (2)))
3872             {
3873               temp = expand_binop (mode, xor_optab, op0, signmask,
3874                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3875               temp = expand_binop (mode, sub_optab, temp, signmask,
3876                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3877               temp = expand_binop (mode, and_optab, temp,
3878                                    gen_int_mode (masklow, mode),
3879                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3880               temp = expand_binop (mode, xor_optab, temp, signmask,
3881                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3882               temp = expand_binop (mode, sub_optab, temp, signmask,
3883                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3884             }
3885           else
3886             {
3887               signmask = expand_binop (mode, lshr_optab, signmask, shift,
3888                                        NULL_RTX, 1, OPTAB_LIB_WIDEN);
3889               signmask = force_reg (mode, signmask);
3890
3891               temp = expand_binop (mode, add_optab, op0, signmask,
3892                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3893               temp = expand_binop (mode, and_optab, temp,
3894                                    gen_int_mode (masklow, mode),
3895                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3896               temp = expand_binop (mode, sub_optab, temp, signmask,
3897                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3898             }
3899           return temp;
3900         }
3901     }
3902
3903   /* Mask contains the mode's signbit and the significant bits of the
3904      modulus.  By including the signbit in the operation, many targets
3905      can avoid an explicit compare operation in the following comparison
3906      against zero.  */
3907   wide_int mask = wi::mask (logd, false, prec);
3908   mask = wi::set_bit (mask, prec - 1);
3909
3910   temp = expand_binop (mode, and_optab, op0,
3911                        immed_wide_int_const (mask, mode),
3912                        result, 1, OPTAB_LIB_WIDEN);
3913   if (temp != result)
3914     emit_move_insn (result, temp);
3915
3916   label = gen_label_rtx ();
3917   do_cmp_and_jump (result, const0_rtx, GE, mode, label);
3918
3919   temp = expand_binop (mode, sub_optab, result, const1_rtx, result,
3920                        0, OPTAB_LIB_WIDEN);
3921
3922   mask = wi::mask (logd, true, prec);
3923   temp = expand_binop (mode, ior_optab, temp,
3924                        immed_wide_int_const (mask, mode),
3925                        result, 1, OPTAB_LIB_WIDEN);
3926   temp = expand_binop (mode, add_optab, temp, const1_rtx, result,
3927                        0, OPTAB_LIB_WIDEN);
3928   if (temp != result)
3929     emit_move_insn (result, temp);
3930   emit_label (label);
3931   return result;
3932 }
3933
3934 /* Expand signed division of OP0 by a power of two D in mode MODE.
3935    This routine is only called for positive values of D.  */
3936
3937 static rtx
3938 expand_sdiv_pow2 (scalar_int_mode mode, rtx op0, HOST_WIDE_INT d)
3939 {
3940   rtx temp;
3941   rtx_code_label *label;
3942   int logd;
3943
3944   logd = floor_log2 (d);
3945
3946   if (d == 2
3947       && BRANCH_COST (optimize_insn_for_speed_p (),
3948                       false) >= 1)
3949     {
3950       temp = gen_reg_rtx (mode);
3951       temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, 1);
3952       temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3953                            0, OPTAB_LIB_WIDEN);
3954       return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3955     }
3956
3957   if (HAVE_conditional_move
3958       && BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2)
3959     {
3960       rtx temp2;
3961
3962       start_sequence ();
3963       temp2 = copy_to_mode_reg (mode, op0);
3964       temp = expand_binop (mode, add_optab, temp2, gen_int_mode (d - 1, mode),
3965                            NULL_RTX, 0, OPTAB_LIB_WIDEN);
3966       temp = force_reg (mode, temp);
3967
3968       /* Construct "temp2 = (temp2 < 0) ? temp : temp2".  */
3969       temp2 = emit_conditional_move (temp2, LT, temp2, const0_rtx,
3970                                      mode, temp, temp2, mode, 0);
3971       if (temp2)
3972         {
3973           rtx_insn *seq = get_insns ();
3974           end_sequence ();
3975           emit_insn (seq);
3976           return expand_shift (RSHIFT_EXPR, mode, temp2, logd, NULL_RTX, 0);
3977         }
3978       end_sequence ();
3979     }
3980
3981   if (BRANCH_COST (optimize_insn_for_speed_p (),
3982                    false) >= 2)
3983     {
3984       int ushift = GET_MODE_BITSIZE (mode) - logd;
3985
3986       temp = gen_reg_rtx (mode);
3987       temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, -1);
3988       if (GET_MODE_BITSIZE (mode) >= BITS_PER_WORD
3989           || shift_cost (optimize_insn_for_speed_p (), mode, ushift)
3990              > COSTS_N_INSNS (1))
3991         temp = expand_binop (mode, and_optab, temp, gen_int_mode (d - 1, mode),
3992                              NULL_RTX, 0, OPTAB_LIB_WIDEN);
3993       else
3994         temp = expand_shift (RSHIFT_EXPR, mode, temp,
3995                              ushift, NULL_RTX, 1);
3996       temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3997                            0, OPTAB_LIB_WIDEN);
3998       return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3999     }
4000
4001   label = gen_label_rtx ();
4002   temp = copy_to_mode_reg (mode, op0);
4003   do_cmp_and_jump (temp, const0_rtx, GE, mode, label);
4004   expand_inc (temp, gen_int_mode (d - 1, mode));
4005   emit_label (label);
4006   return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
4007 }
4008 \f
4009 /* Emit the code to divide OP0 by OP1, putting the result in TARGET
4010    if that is convenient, and returning where the result is.
4011    You may request either the quotient or the remainder as the result;
4012    specify REM_FLAG nonzero to get the remainder.
4013
4014    CODE is the expression code for which kind of division this is;
4015    it controls how rounding is done.  MODE is the machine mode to use.
4016    UNSIGNEDP nonzero means do unsigned division.  */
4017
4018 /* ??? For CEIL_MOD_EXPR, can compute incorrect remainder with ANDI
4019    and then correct it by or'ing in missing high bits
4020    if result of ANDI is nonzero.
4021    For ROUND_MOD_EXPR, can use ANDI and then sign-extend the result.
4022    This could optimize to a bfexts instruction.
4023    But C doesn't use these operations, so their optimizations are
4024    left for later.  */
4025 /* ??? For modulo, we don't actually need the highpart of the first product,
4026    the low part will do nicely.  And for small divisors, the second multiply
4027    can also be a low-part only multiply or even be completely left out.
4028    E.g. to calculate the remainder of a division by 3 with a 32 bit
4029    multiply, multiply with 0x55555556 and extract the upper two bits;
4030    the result is exact for inputs up to 0x1fffffff.
4031    The input range can be reduced by using cross-sum rules.
4032    For odd divisors >= 3, the following table gives right shift counts
4033    so that if a number is shifted by an integer multiple of the given
4034    amount, the remainder stays the same:
4035    2, 4, 3, 6, 10, 12, 4, 8, 18, 6, 11, 20, 18, 0, 5, 10, 12, 0, 12, 20,
4036    14, 12, 23, 21, 8, 0, 20, 18, 0, 0, 6, 12, 0, 22, 0, 18, 20, 30, 0, 0,
4037    0, 8, 0, 11, 12, 10, 36, 0, 30, 0, 0, 12, 0, 0, 0, 0, 44, 12, 24, 0,
4038    20, 0, 7, 14, 0, 18, 36, 0, 0, 46, 60, 0, 42, 0, 15, 24, 20, 0, 0, 33,
4039    0, 20, 0, 0, 18, 0, 60, 0, 0, 0, 0, 0, 40, 18, 0, 0, 12
4040
4041    Cross-sum rules for even numbers can be derived by leaving as many bits
4042    to the right alone as the divisor has zeros to the right.
4043    E.g. if x is an unsigned 32 bit number:
4044    (x mod 12) == (((x & 1023) + ((x >> 8) & ~3)) * 0x15555558 >> 2 * 3) >> 28
4045    */
4046
4047 rtx
4048 expand_divmod (int rem_flag, enum tree_code code, machine_mode mode,
4049                rtx op0, rtx op1, rtx target, int unsignedp)
4050 {
4051   machine_mode compute_mode;
4052   rtx tquotient;
4053   rtx quotient = 0, remainder = 0;
4054   rtx_insn *last;
4055   rtx_insn *insn;
4056   optab optab1, optab2;
4057   int op1_is_constant, op1_is_pow2 = 0;
4058   int max_cost, extra_cost;
4059   static HOST_WIDE_INT last_div_const = 0;
4060   bool speed = optimize_insn_for_speed_p ();
4061
4062   op1_is_constant = CONST_INT_P (op1);
4063   if (op1_is_constant)
4064     {
4065       wide_int ext_op1 = rtx_mode_t (op1, mode);
4066       op1_is_pow2 = (wi::popcount (ext_op1) == 1
4067                      || (! unsignedp
4068                          && wi::popcount (wi::neg (ext_op1)) == 1));
4069     }
4070
4071   /*
4072      This is the structure of expand_divmod:
4073
4074      First comes code to fix up the operands so we can perform the operations
4075      correctly and efficiently.
4076
4077      Second comes a switch statement with code specific for each rounding mode.
4078      For some special operands this code emits all RTL for the desired
4079      operation, for other cases, it generates only a quotient and stores it in
4080      QUOTIENT.  The case for trunc division/remainder might leave quotient = 0,
4081      to indicate that it has not done anything.
4082
4083      Last comes code that finishes the operation.  If QUOTIENT is set and
4084      REM_FLAG is set, the remainder is computed as OP0 - QUOTIENT * OP1.  If
4085      QUOTIENT is not set, it is computed using trunc rounding.
4086
4087      We try to generate special code for division and remainder when OP1 is a
4088      constant.  If |OP1| = 2**n we can use shifts and some other fast
4089      operations.  For other values of OP1, we compute a carefully selected
4090      fixed-point approximation m = 1/OP1, and generate code that multiplies OP0
4091      by m.
4092
4093      In all cases but EXACT_DIV_EXPR, this multiplication requires the upper
4094      half of the product.  Different strategies for generating the product are
4095      implemented in expmed_mult_highpart.
4096
4097      If what we actually want is the remainder, we generate that by another
4098      by-constant multiplication and a subtraction.  */
4099
4100   /* We shouldn't be called with OP1 == const1_rtx, but some of the
4101      code below will malfunction if we are, so check here and handle
4102      the special case if so.  */
4103   if (op1 == const1_rtx)
4104     return rem_flag ? const0_rtx : op0;
4105
4106     /* When dividing by -1, we could get an overflow.
4107      negv_optab can handle overflows.  */
4108   if (! unsignedp && op1 == constm1_rtx)
4109     {
4110       if (rem_flag)
4111         return const0_rtx;
4112       return expand_unop (mode, flag_trapv && GET_MODE_CLASS (mode) == MODE_INT
4113                           ? negv_optab : neg_optab, op0, target, 0);
4114     }
4115
4116   if (target
4117       /* Don't use the function value register as a target
4118          since we have to read it as well as write it,
4119          and function-inlining gets confused by this.  */
4120       && ((REG_P (target) && REG_FUNCTION_VALUE_P (target))
4121           /* Don't clobber an operand while doing a multi-step calculation.  */
4122           || ((rem_flag || op1_is_constant)
4123               && (reg_mentioned_p (target, op0)
4124                   || (MEM_P (op0) && MEM_P (target))))
4125           || reg_mentioned_p (target, op1)
4126           || (MEM_P (op1) && MEM_P (target))))
4127     target = 0;
4128
4129   /* Get the mode in which to perform this computation.  Normally it will
4130      be MODE, but sometimes we can't do the desired operation in MODE.
4131      If so, pick a wider mode in which we can do the operation.  Convert
4132      to that mode at the start to avoid repeated conversions.
4133
4134      First see what operations we need.  These depend on the expression
4135      we are evaluating.  (We assume that divxx3 insns exist under the
4136      same conditions that modxx3 insns and that these insns don't normally
4137      fail.  If these assumptions are not correct, we may generate less
4138      efficient code in some cases.)
4139
4140      Then see if we find a mode in which we can open-code that operation
4141      (either a division, modulus, or shift).  Finally, check for the smallest
4142      mode for which we can do the operation with a library call.  */
4143
4144   /* We might want to refine this now that we have division-by-constant
4145      optimization.  Since expmed_mult_highpart tries so many variants, it is
4146      not straightforward to generalize this.  Maybe we should make an array
4147      of possible modes in init_expmed?  Save this for GCC 2.7.  */
4148
4149   optab1 = (op1_is_pow2
4150             ? (unsignedp ? lshr_optab : ashr_optab)
4151             : (unsignedp ? udiv_optab : sdiv_optab));
4152   optab2 = (op1_is_pow2 ? optab1
4153             : (unsignedp ? udivmod_optab : sdivmod_optab));
4154
4155   FOR_EACH_MODE_FROM (compute_mode, mode)
4156     if (optab_handler (optab1, compute_mode) != CODE_FOR_nothing
4157         || optab_handler (optab2, compute_mode) != CODE_FOR_nothing)
4158       break;
4159
4160   if (compute_mode == VOIDmode)
4161     FOR_EACH_MODE_FROM (compute_mode, mode)
4162       if (optab_libfunc (optab1, compute_mode)
4163           || optab_libfunc (optab2, compute_mode))
4164         break;
4165
4166   /* If we still couldn't find a mode, use MODE, but expand_binop will
4167      probably die.  */
4168   if (compute_mode == VOIDmode)
4169     compute_mode = mode;
4170
4171   if (target && GET_MODE (target) == compute_mode)
4172     tquotient = target;
4173   else
4174     tquotient = gen_reg_rtx (compute_mode);
4175
4176 #if 0
4177   /* It should be possible to restrict the precision to GET_MODE_BITSIZE
4178      (mode), and thereby get better code when OP1 is a constant.  Do that
4179      later.  It will require going over all usages of SIZE below.  */
4180   size = GET_MODE_BITSIZE (mode);
4181 #endif
4182
4183   /* Only deduct something for a REM if the last divide done was
4184      for a different constant.   Then set the constant of the last
4185      divide.  */
4186   max_cost = (unsignedp
4187               ? udiv_cost (speed, compute_mode)
4188               : sdiv_cost (speed, compute_mode));
4189   if (rem_flag && ! (last_div_const != 0 && op1_is_constant
4190                      && INTVAL (op1) == last_div_const))
4191     max_cost -= (mul_cost (speed, compute_mode)
4192                  + add_cost (speed, compute_mode));
4193
4194   last_div_const = ! rem_flag && op1_is_constant ? INTVAL (op1) : 0;
4195
4196   /* Now convert to the best mode to use.  */
4197   if (compute_mode != mode)
4198     {
4199       op0 = convert_modes (compute_mode, mode, op0, unsignedp);
4200       op1 = convert_modes (compute_mode, mode, op1, unsignedp);
4201
4202       /* convert_modes may have placed op1 into a register, so we
4203          must recompute the following.  */
4204       op1_is_constant = CONST_INT_P (op1);
4205       if (op1_is_constant)
4206         {
4207           wide_int ext_op1 = rtx_mode_t (op1, compute_mode);
4208           op1_is_pow2 = (wi::popcount (ext_op1) == 1
4209                          || (! unsignedp
4210                              && wi::popcount (wi::neg (ext_op1)) == 1));
4211         }
4212       else
4213         op1_is_pow2 = 0;
4214     }
4215
4216   /* If one of the operands is a volatile MEM, copy it into a register.  */
4217
4218   if (MEM_P (op0) && MEM_VOLATILE_P (op0))
4219     op0 = force_reg (compute_mode, op0);
4220   if (MEM_P (op1) && MEM_VOLATILE_P (op1))
4221     op1 = force_reg (compute_mode, op1);
4222
4223   /* If we need the remainder or if OP1 is constant, we need to
4224      put OP0 in a register in case it has any queued subexpressions.  */
4225   if (rem_flag || op1_is_constant)
4226     op0 = force_reg (compute_mode, op0);
4227
4228   last = get_last_insn ();
4229
4230   /* Promote floor rounding to trunc rounding for unsigned operations.  */
4231   if (unsignedp)
4232     {
4233       if (code == FLOOR_DIV_EXPR)
4234         code = TRUNC_DIV_EXPR;
4235       if (code == FLOOR_MOD_EXPR)
4236         code = TRUNC_MOD_EXPR;
4237       if (code == EXACT_DIV_EXPR && op1_is_pow2)
4238         code = TRUNC_DIV_EXPR;
4239     }
4240
4241   if (op1 != const0_rtx)
4242     switch (code)
4243       {
4244       case TRUNC_MOD_EXPR:
4245       case TRUNC_DIV_EXPR:
4246         if (op1_is_constant)
4247           {
4248             scalar_int_mode int_mode = as_a <scalar_int_mode> (compute_mode);
4249             int size = GET_MODE_BITSIZE (int_mode);
4250             if (unsignedp)
4251               {
4252                 unsigned HOST_WIDE_INT mh, ml;
4253                 int pre_shift, post_shift;
4254                 int dummy;
4255                 wide_int wd = rtx_mode_t (op1, int_mode);
4256                 unsigned HOST_WIDE_INT d = wd.to_uhwi ();
4257
4258                 if (wi::popcount (wd) == 1)
4259                   {
4260                     pre_shift = floor_log2 (d);
4261                     if (rem_flag)
4262                       {
4263                         unsigned HOST_WIDE_INT mask
4264                           = (HOST_WIDE_INT_1U << pre_shift) - 1;
4265                         remainder
4266                           = expand_binop (int_mode, and_optab, op0,
4267                                           gen_int_mode (mask, int_mode),
4268                                           remainder, 1,
4269                                           OPTAB_LIB_WIDEN);
4270                         if (remainder)
4271                           return gen_lowpart (mode, remainder);
4272                       }
4273                     quotient = expand_shift (RSHIFT_EXPR, int_mode, op0,
4274                                              pre_shift, tquotient, 1);
4275                   }
4276                 else if (size <= HOST_BITS_PER_WIDE_INT)
4277                   {
4278                     if (d >= (HOST_WIDE_INT_1U << (size - 1)))
4279                       {
4280                         /* Most significant bit of divisor is set; emit an scc
4281                            insn.  */
4282                         quotient = emit_store_flag_force (tquotient, GEU, op0, op1,
4283                                                           int_mode, 1, 1);
4284                       }
4285                     else
4286                       {
4287                         /* Find a suitable multiplier and right shift count
4288                            instead of multiplying with D.  */
4289
4290                         mh = choose_multiplier (d, size, size,
4291                                                 &ml, &post_shift, &dummy);
4292
4293                         /* If the suggested multiplier is more than SIZE bits,
4294                            we can do better for even divisors, using an
4295                            initial right shift.  */
4296                         if (mh != 0 && (d & 1) == 0)
4297                           {
4298                             pre_shift = ctz_or_zero (d);
4299                             mh = choose_multiplier (d >> pre_shift, size,
4300                                                     size - pre_shift,
4301                                                     &ml, &post_shift, &dummy);
4302                             gcc_assert (!mh);
4303                           }
4304                         else
4305                           pre_shift = 0;
4306
4307                         if (mh != 0)
4308                           {
4309                             rtx t1, t2, t3, t4;
4310
4311                             if (post_shift - 1 >= BITS_PER_WORD)
4312                               goto fail1;
4313
4314                             extra_cost
4315                               = (shift_cost (speed, int_mode, post_shift - 1)
4316                                  + shift_cost (speed, int_mode, 1)
4317                                  + 2 * add_cost (speed, int_mode));
4318                             t1 = expmed_mult_highpart
4319                               (int_mode, op0, gen_int_mode (ml, int_mode),
4320                                NULL_RTX, 1, max_cost - extra_cost);
4321                             if (t1 == 0)
4322                               goto fail1;
4323                             t2 = force_operand (gen_rtx_MINUS (int_mode,
4324                                                                op0, t1),
4325                                                 NULL_RTX);
4326                             t3 = expand_shift (RSHIFT_EXPR, int_mode,
4327                                                t2, 1, NULL_RTX, 1);
4328                             t4 = force_operand (gen_rtx_PLUS (int_mode,
4329                                                               t1, t3),
4330                                                 NULL_RTX);
4331                             quotient = expand_shift
4332                               (RSHIFT_EXPR, int_mode, t4,
4333                                post_shift - 1, tquotient, 1);
4334                           }
4335                         else
4336                           {
4337                             rtx t1, t2;
4338
4339                             if (pre_shift >= BITS_PER_WORD
4340                                 || post_shift >= BITS_PER_WORD)
4341                               goto fail1;
4342
4343                             t1 = expand_shift
4344                               (RSHIFT_EXPR, int_mode, op0,
4345                                pre_shift, NULL_RTX, 1);
4346                             extra_cost
4347                               = (shift_cost (speed, int_mode, pre_shift)
4348                                  + shift_cost (speed, int_mode, post_shift));
4349                             t2 = expmed_mult_highpart
4350                               (int_mode, t1,
4351                                gen_int_mode (ml, int_mode),
4352                                NULL_RTX, 1, max_cost - extra_cost);
4353                             if (t2 == 0)
4354                               goto fail1;
4355                             quotient = expand_shift
4356                               (RSHIFT_EXPR, int_mode, t2,
4357                                post_shift, tquotient, 1);
4358                           }
4359                       }
4360                   }
4361                 else            /* Too wide mode to use tricky code */
4362                   break;
4363
4364                 insn = get_last_insn ();
4365                 if (insn != last)
4366                   set_dst_reg_note (insn, REG_EQUAL,
4367                                     gen_rtx_UDIV (int_mode, op0, op1),
4368                                     quotient);
4369               }
4370             else                /* TRUNC_DIV, signed */
4371               {
4372                 unsigned HOST_WIDE_INT ml;
4373                 int lgup, post_shift;
4374                 rtx mlr;
4375                 HOST_WIDE_INT d = INTVAL (op1);
4376                 unsigned HOST_WIDE_INT abs_d;
4377
4378                 /* Since d might be INT_MIN, we have to cast to
4379                    unsigned HOST_WIDE_INT before negating to avoid
4380                    undefined signed overflow.  */
4381                 abs_d = (d >= 0
4382                          ? (unsigned HOST_WIDE_INT) d
4383                          : - (unsigned HOST_WIDE_INT) d);
4384
4385                 /* n rem d = n rem -d */
4386                 if (rem_flag && d < 0)
4387                   {
4388                     d = abs_d;
4389                     op1 = gen_int_mode (abs_d, int_mode);
4390                   }
4391
4392                 if (d == 1)
4393                   quotient = op0;
4394                 else if (d == -1)
4395                   quotient = expand_unop (int_mode, neg_optab, op0,
4396                                           tquotient, 0);
4397                 else if (size <= HOST_BITS_PER_WIDE_INT
4398                          && abs_d == HOST_WIDE_INT_1U << (size - 1))
4399                   {
4400                     /* This case is not handled correctly below.  */
4401                     quotient = emit_store_flag (tquotient, EQ, op0, op1,
4402                                                 int_mode, 1, 1);
4403                     if (quotient == 0)
4404                       goto fail1;
4405                   }
4406                 else if (EXACT_POWER_OF_2_OR_ZERO_P (d)
4407                          && (size <= HOST_BITS_PER_WIDE_INT || d >= 0)
4408                          && (rem_flag
4409                              ? smod_pow2_cheap (speed, int_mode)
4410                              : sdiv_pow2_cheap (speed, int_mode))
4411                          /* We assume that cheap metric is true if the
4412                             optab has an expander for this mode.  */
4413                          && ((optab_handler ((rem_flag ? smod_optab
4414                                               : sdiv_optab),
4415                                              int_mode)
4416                               != CODE_FOR_nothing)
4417                              || (optab_handler (sdivmod_optab, int_mode)
4418                                  != CODE_FOR_nothing)))
4419                   ;
4420                 else if (EXACT_POWER_OF_2_OR_ZERO_P (abs_d)
4421                          && (size <= HOST_BITS_PER_WIDE_INT
4422                              || abs_d != (unsigned HOST_WIDE_INT) d))
4423                   {
4424                     if (rem_flag)
4425                       {
4426                         remainder = expand_smod_pow2 (int_mode, op0, d);
4427                         if (remainder)
4428                           return gen_lowpart (mode, remainder);
4429                       }
4430
4431                     if (sdiv_pow2_cheap (speed, int_mode)
4432                         && ((optab_handler (sdiv_optab, int_mode)
4433                              != CODE_FOR_nothing)
4434                             || (optab_handler (sdivmod_optab, int_mode)
4435                                 != CODE_FOR_nothing)))
4436                       quotient = expand_divmod (0, TRUNC_DIV_EXPR,
4437                                                 int_mode, op0,
4438                                                 gen_int_mode (abs_d,
4439                                                               int_mode),
4440                                                 NULL_RTX, 0);
4441                     else
4442                       quotient = expand_sdiv_pow2 (int_mode, op0, abs_d);
4443
4444                     /* We have computed OP0 / abs(OP1).  If OP1 is negative,
4445                        negate the quotient.  */
4446                     if (d < 0)
4447                       {
4448                         insn = get_last_insn ();
4449                         if (insn != last
4450                             && abs_d < (HOST_WIDE_INT_1U
4451                                         << (HOST_BITS_PER_WIDE_INT - 1)))
4452                           set_dst_reg_note (insn, REG_EQUAL,
4453                                             gen_rtx_DIV (int_mode, op0,
4454                                                          gen_int_mode
4455                                                            (abs_d,
4456                                                             int_mode)),
4457                                             quotient);
4458
4459                         quotient = expand_unop (int_mode, neg_optab,
4460                                                 quotient, quotient, 0);
4461                       }
4462                   }
4463                 else if (size <= HOST_BITS_PER_WIDE_INT)
4464                   {
4465                     choose_multiplier (abs_d, size, size - 1,
4466                                        &ml, &post_shift, &lgup);
4467                     if (ml < HOST_WIDE_INT_1U << (size - 1))
4468                       {
4469                         rtx t1, t2, t3;
4470
4471                         if (post_shift >= BITS_PER_WORD
4472                             || size - 1 >= BITS_PER_WORD)
4473                           goto fail1;
4474
4475                         extra_cost = (shift_cost (speed, int_mode, post_shift)
4476                                       + shift_cost (speed, int_mode, size - 1)
4477                                       + add_cost (speed, int_mode));
4478                         t1 = expmed_mult_highpart
4479                           (int_mode, op0, gen_int_mode (ml, int_mode),
4480                            NULL_RTX, 0, max_cost - extra_cost);
4481                         if (t1 == 0)
4482                           goto fail1;
4483                         t2 = expand_shift
4484                           (RSHIFT_EXPR, int_mode, t1,
4485                            post_shift, NULL_RTX, 0);
4486                         t3 = expand_shift
4487                           (RSHIFT_EXPR, int_mode, op0,
4488                            size - 1, NULL_RTX, 0);
4489                         if (d < 0)
4490                           quotient
4491                             = force_operand (gen_rtx_MINUS (int_mode, t3, t2),
4492                                              tquotient);
4493                         else
4494                           quotient
4495                             = force_operand (gen_rtx_MINUS (int_mode, t2, t3),
4496                                              tquotient);
4497                       }
4498                     else
4499                       {
4500                         rtx t1, t2, t3, t4;
4501
4502                         if (post_shift >= BITS_PER_WORD
4503                             || size - 1 >= BITS_PER_WORD)
4504                           goto fail1;
4505
4506                         ml |= HOST_WIDE_INT_M1U << (size - 1);
4507                         mlr = gen_int_mode (ml, int_mode);
4508                         extra_cost = (shift_cost (speed, int_mode, post_shift)
4509                                       + shift_cost (speed, int_mode, size - 1)
4510                                       + 2 * add_cost (speed, int_mode));
4511                         t1 = expmed_mult_highpart (int_mode, op0, mlr,
4512                                                    NULL_RTX, 0,
4513                                                    max_cost - extra_cost);
4514                         if (t1 == 0)
4515                           goto fail1;
4516                         t2 = force_operand (gen_rtx_PLUS (int_mode, t1, op0),
4517                                             NULL_RTX);
4518                         t3 = expand_shift
4519                           (RSHIFT_EXPR, int_mode, t2,
4520                            post_shift, NULL_RTX, 0);
4521                         t4 = expand_shift
4522                           (RSHIFT_EXPR, int_mode, op0,
4523                            size - 1, NULL_RTX, 0);
4524                         if (d < 0)
4525                           quotient
4526                             = force_operand (gen_rtx_MINUS (int_mode, t4, t3),
4527                                              tquotient);
4528                         else
4529                           quotient
4530                             = force_operand (gen_rtx_MINUS (int_mode, t3, t4),
4531                                              tquotient);
4532                       }
4533                   }
4534                 else            /* Too wide mode to use tricky code */
4535                   break;
4536
4537                 insn = get_last_insn ();
4538                 if (insn != last)
4539                   set_dst_reg_note (insn, REG_EQUAL,
4540                                     gen_rtx_DIV (int_mode, op0, op1),
4541                                     quotient);
4542               }
4543             break;
4544           }
4545       fail1:
4546         delete_insns_since (last);
4547         break;
4548
4549       case FLOOR_DIV_EXPR:
4550       case FLOOR_MOD_EXPR:
4551       /* We will come here only for signed operations.  */
4552         if (op1_is_constant && HWI_COMPUTABLE_MODE_P (compute_mode))
4553           {
4554             scalar_int_mode int_mode = as_a <scalar_int_mode> (compute_mode);
4555             int size = GET_MODE_BITSIZE (int_mode);
4556             unsigned HOST_WIDE_INT mh, ml;
4557             int pre_shift, lgup, post_shift;
4558             HOST_WIDE_INT d = INTVAL (op1);
4559
4560             if (d > 0)
4561               {
4562                 /* We could just as easily deal with negative constants here,
4563                    but it does not seem worth the trouble for GCC 2.6.  */
4564                 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4565                   {
4566                     pre_shift = floor_log2 (d);
4567                     if (rem_flag)
4568                       {
4569                         unsigned HOST_WIDE_INT mask
4570                           = (HOST_WIDE_INT_1U << pre_shift) - 1;
4571                         remainder = expand_binop
4572                           (int_mode, and_optab, op0,
4573                            gen_int_mode (mask, int_mode),
4574                            remainder, 0, OPTAB_LIB_WIDEN);
4575                         if (remainder)
4576                           return gen_lowpart (mode, remainder);
4577                       }
4578                     quotient = expand_shift
4579                       (RSHIFT_EXPR, int_mode, op0,
4580                        pre_shift, tquotient, 0);
4581                   }
4582                 else
4583                   {
4584                     rtx t1, t2, t3, t4;
4585
4586                     mh = choose_multiplier (d, size, size - 1,
4587                                             &ml, &post_shift, &lgup);
4588                     gcc_assert (!mh);
4589
4590                     if (post_shift < BITS_PER_WORD
4591                         && size - 1 < BITS_PER_WORD)
4592                       {
4593                         t1 = expand_shift
4594                           (RSHIFT_EXPR, int_mode, op0,
4595                            size - 1, NULL_RTX, 0);
4596                         t2 = expand_binop (int_mode, xor_optab, op0, t1,
4597                                            NULL_RTX, 0, OPTAB_WIDEN);
4598                         extra_cost = (shift_cost (speed, int_mode, post_shift)
4599                                       + shift_cost (speed, int_mode, size - 1)
4600                                       + 2 * add_cost (speed, int_mode));
4601                         t3 = expmed_mult_highpart
4602                           (int_mode, t2, gen_int_mode (ml, int_mode),
4603                            NULL_RTX, 1, max_cost - extra_cost);
4604                         if (t3 != 0)
4605                           {
4606                             t4 = expand_shift
4607                               (RSHIFT_EXPR, int_mode, t3,
4608                                post_shift, NULL_RTX, 1);
4609                             quotient = expand_binop (int_mode, xor_optab,
4610                                                      t4, t1, tquotient, 0,
4611                                                      OPTAB_WIDEN);
4612                           }
4613                       }
4614                   }
4615               }
4616             else
4617               {
4618                 rtx nsign, t1, t2, t3, t4;
4619                 t1 = force_operand (gen_rtx_PLUS (int_mode,
4620                                                   op0, constm1_rtx), NULL_RTX);
4621                 t2 = expand_binop (int_mode, ior_optab, op0, t1, NULL_RTX,
4622                                    0, OPTAB_WIDEN);
4623                 nsign = expand_shift (RSHIFT_EXPR, int_mode, t2,
4624                                       size - 1, NULL_RTX, 0);
4625                 t3 = force_operand (gen_rtx_MINUS (int_mode, t1, nsign),
4626                                     NULL_RTX);
4627                 t4 = expand_divmod (0, TRUNC_DIV_EXPR, int_mode, t3, op1,
4628                                     NULL_RTX, 0);
4629                 if (t4)
4630                   {
4631                     rtx t5;
4632                     t5 = expand_unop (int_mode, one_cmpl_optab, nsign,
4633                                       NULL_RTX, 0);
4634                     quotient = force_operand (gen_rtx_PLUS (int_mode, t4, t5),
4635                                               tquotient);
4636                   }
4637               }
4638           }
4639
4640         if (quotient != 0)
4641           break;
4642         delete_insns_since (last);
4643
4644         /* Try using an instruction that produces both the quotient and
4645            remainder, using truncation.  We can easily compensate the quotient
4646            or remainder to get floor rounding, once we have the remainder.
4647            Notice that we compute also the final remainder value here,
4648            and return the result right away.  */
4649         if (target == 0 || GET_MODE (target) != compute_mode)
4650           target = gen_reg_rtx (compute_mode);
4651
4652         if (rem_flag)
4653           {
4654             remainder
4655               = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4656             quotient = gen_reg_rtx (compute_mode);
4657           }
4658         else
4659           {
4660             quotient
4661               = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4662             remainder = gen_reg_rtx (compute_mode);
4663           }
4664
4665         if (expand_twoval_binop (sdivmod_optab, op0, op1,
4666                                  quotient, remainder, 0))
4667           {
4668             /* This could be computed with a branch-less sequence.
4669                Save that for later.  */
4670             rtx tem;
4671             rtx_code_label *label = gen_label_rtx ();
4672             do_cmp_and_jump (remainder, const0_rtx, EQ, compute_mode, label);
4673             tem = expand_binop (compute_mode, xor_optab, op0, op1,
4674                                 NULL_RTX, 0, OPTAB_WIDEN);
4675             do_cmp_and_jump (tem, const0_rtx, GE, compute_mode, label);
4676             expand_dec (quotient, const1_rtx);
4677             expand_inc (remainder, op1);
4678             emit_label (label);
4679             return gen_lowpart (mode, rem_flag ? remainder : quotient);
4680           }
4681
4682         /* No luck with division elimination or divmod.  Have to do it
4683            by conditionally adjusting op0 *and* the result.  */
4684         {
4685           rtx_code_label *label1, *label2, *label3, *label4, *label5;
4686           rtx adjusted_op0;
4687           rtx tem;
4688
4689           quotient = gen_reg_rtx (compute_mode);
4690           adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4691           label1 = gen_label_rtx ();
4692           label2 = gen_label_rtx ();
4693           label3 = gen_label_rtx ();
4694           label4 = gen_label_rtx ();
4695           label5 = gen_label_rtx ();
4696           do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4697           do_cmp_and_jump (adjusted_op0, const0_rtx, LT, compute_mode, label1);
4698           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4699                               quotient, 0, OPTAB_LIB_WIDEN);
4700           if (tem != quotient)
4701             emit_move_insn (quotient, tem);
4702           emit_jump_insn (targetm.gen_jump (label5));
4703           emit_barrier ();
4704           emit_label (label1);
4705           expand_inc (adjusted_op0, const1_rtx);
4706           emit_jump_insn (targetm.gen_jump (label4));
4707           emit_barrier ();
4708           emit_label (label2);
4709           do_cmp_and_jump (adjusted_op0, const0_rtx, GT, compute_mode, label3);
4710           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4711                               quotient, 0, OPTAB_LIB_WIDEN);
4712           if (tem != quotient)
4713             emit_move_insn (quotient, tem);
4714           emit_jump_insn (targetm.gen_jump (label5));
4715           emit_barrier ();
4716           emit_label (label3);
4717           expand_dec (adjusted_op0, const1_rtx);
4718           emit_label (label4);
4719           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4720                               quotient, 0, OPTAB_LIB_WIDEN);
4721           if (tem != quotient)
4722             emit_move_insn (quotient, tem);
4723           expand_dec (quotient, const1_rtx);
4724           emit_label (label5);
4725         }
4726         break;
4727
4728       case CEIL_DIV_EXPR:
4729       case CEIL_MOD_EXPR:
4730         if (unsignedp)
4731           {
4732             if (op1_is_constant
4733                 && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4734                 && (HWI_COMPUTABLE_MODE_P (compute_mode)
4735                     || INTVAL (op1) >= 0))
4736               {
4737                 scalar_int_mode int_mode
4738                   = as_a <scalar_int_mode> (compute_mode);
4739                 rtx t1, t2, t3;
4740                 unsigned HOST_WIDE_INT d = INTVAL (op1);
4741                 t1 = expand_shift (RSHIFT_EXPR, int_mode, op0,
4742                                    floor_log2 (d), tquotient, 1);
4743                 t2 = expand_binop (int_mode, and_optab, op0,
4744                                    gen_int_mode (d - 1, int_mode),
4745                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4746                 t3 = gen_reg_rtx (int_mode);
4747                 t3 = emit_store_flag (t3, NE, t2, const0_rtx, int_mode, 1, 1);
4748                 if (t3 == 0)
4749                   {
4750                     rtx_code_label *lab;
4751                     lab = gen_label_rtx ();
4752                     do_cmp_and_jump (t2, const0_rtx, EQ, int_mode, lab);
4753                     expand_inc (t1, const1_rtx);
4754                     emit_label (lab);
4755                     quotient = t1;
4756                   }
4757                 else
4758                   quotient = force_operand (gen_rtx_PLUS (int_mode, t1, t3),
4759                                             tquotient);
4760                 break;
4761               }
4762
4763             /* Try using an instruction that produces both the quotient and
4764                remainder, using truncation.  We can easily compensate the
4765                quotient or remainder to get ceiling rounding, once we have the
4766                remainder.  Notice that we compute also the final remainder
4767                value here, and return the result right away.  */
4768             if (target == 0 || GET_MODE (target) != compute_mode)
4769               target = gen_reg_rtx (compute_mode);
4770
4771             if (rem_flag)
4772               {
4773                 remainder = (REG_P (target)
4774                              ? target : gen_reg_rtx (compute_mode));
4775                 quotient = gen_reg_rtx (compute_mode);
4776               }
4777             else
4778               {
4779                 quotient = (REG_P (target)
4780                             ? target : gen_reg_rtx (compute_mode));
4781                 remainder = gen_reg_rtx (compute_mode);
4782               }
4783
4784             if (expand_twoval_binop (udivmod_optab, op0, op1, quotient,
4785                                      remainder, 1))
4786               {
4787                 /* This could be computed with a branch-less sequence.
4788                    Save that for later.  */
4789                 rtx_code_label *label = gen_label_rtx ();
4790                 do_cmp_and_jump (remainder, const0_rtx, EQ,
4791                                  compute_mode, label);
4792                 expand_inc (quotient, const1_rtx);
4793                 expand_dec (remainder, op1);
4794                 emit_label (label);
4795                 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4796               }
4797
4798             /* No luck with division elimination or divmod.  Have to do it
4799                by conditionally adjusting op0 *and* the result.  */
4800             {
4801               rtx_code_label *label1, *label2;
4802               rtx adjusted_op0, tem;
4803
4804               quotient = gen_reg_rtx (compute_mode);
4805               adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4806               label1 = gen_label_rtx ();
4807               label2 = gen_label_rtx ();
4808               do_cmp_and_jump (adjusted_op0, const0_rtx, NE,
4809                                compute_mode, label1);
4810               emit_move_insn  (quotient, const0_rtx);
4811               emit_jump_insn (targetm.gen_jump (label2));
4812               emit_barrier ();
4813               emit_label (label1);
4814               expand_dec (adjusted_op0, const1_rtx);
4815               tem = expand_binop (compute_mode, udiv_optab, adjusted_op0, op1,
4816                                   quotient, 1, OPTAB_LIB_WIDEN);
4817               if (tem != quotient)
4818                 emit_move_insn (quotient, tem);
4819               expand_inc (quotient, const1_rtx);
4820               emit_label (label2);
4821             }
4822           }
4823         else /* signed */
4824           {
4825             if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4826                 && INTVAL (op1) >= 0)
4827               {
4828                 /* This is extremely similar to the code for the unsigned case
4829                    above.  For 2.7 we should merge these variants, but for
4830                    2.6.1 I don't want to touch the code for unsigned since that
4831                    get used in C.  The signed case will only be used by other
4832                    languages (Ada).  */
4833
4834                 rtx t1, t2, t3;
4835                 unsigned HOST_WIDE_INT d = INTVAL (op1);
4836                 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4837                                    floor_log2 (d), tquotient, 0);
4838                 t2 = expand_binop (compute_mode, and_optab, op0,
4839                                    gen_int_mode (d - 1, compute_mode),
4840                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4841                 t3 = gen_reg_rtx (compute_mode);
4842                 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4843                                       compute_mode, 1, 1);
4844                 if (t3 == 0)
4845                   {
4846                     rtx_code_label *lab;
4847                     lab = gen_label_rtx ();
4848                     do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4849                     expand_inc (t1, const1_rtx);
4850                     emit_label (lab);
4851                     quotient = t1;
4852                   }
4853                 else
4854                   quotient = force_operand (gen_rtx_PLUS (compute_mode,
4855                                                           t1, t3),
4856                                             tquotient);
4857                 break;
4858               }
4859
4860             /* Try using an instruction that produces both the quotient and
4861                remainder, using truncation.  We can easily compensate the
4862                quotient or remainder to get ceiling rounding, once we have the
4863                remainder.  Notice that we compute also the final remainder
4864                value here, and return the result right away.  */
4865             if (target == 0 || GET_MODE (target) != compute_mode)
4866               target = gen_reg_rtx (compute_mode);
4867             if (rem_flag)
4868               {
4869                 remainder= (REG_P (target)
4870                             ? target : gen_reg_rtx (compute_mode));
4871                 quotient = gen_reg_rtx (compute_mode);
4872               }
4873             else
4874               {
4875                 quotient = (REG_P (target)
4876                             ? target : gen_reg_rtx (compute_mode));
4877                 remainder = gen_reg_rtx (compute_mode);
4878               }
4879
4880             if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient,
4881                                      remainder, 0))
4882               {
4883                 /* This could be computed with a branch-less sequence.
4884                    Save that for later.  */
4885                 rtx tem;
4886                 rtx_code_label *label = gen_label_rtx ();
4887                 do_cmp_and_jump (remainder, const0_rtx, EQ,
4888                                  compute_mode, label);
4889                 tem = expand_binop (compute_mode, xor_optab, op0, op1,
4890                                     NULL_RTX, 0, OPTAB_WIDEN);
4891                 do_cmp_and_jump (tem, const0_rtx, LT, compute_mode, label);
4892                 expand_inc (quotient, const1_rtx);
4893                 expand_dec (remainder, op1);
4894                 emit_label (label);
4895                 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4896               }
4897
4898             /* No luck with division elimination or divmod.  Have to do it
4899                by conditionally adjusting op0 *and* the result.  */
4900             {
4901               rtx_code_label *label1, *label2, *label3, *label4, *label5;
4902               rtx adjusted_op0;
4903               rtx tem;
4904
4905               quotient = gen_reg_rtx (compute_mode);
4906               adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4907               label1 = gen_label_rtx ();
4908               label2 = gen_label_rtx ();
4909               label3 = gen_label_rtx ();
4910               label4 = gen_label_rtx ();
4911               label5 = gen_label_rtx ();
4912               do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4913               do_cmp_and_jump (adjusted_op0, const0_rtx, GT,
4914                                compute_mode, label1);
4915               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4916                                   quotient, 0, OPTAB_LIB_WIDEN);
4917               if (tem != quotient)
4918                 emit_move_insn (quotient, tem);
4919               emit_jump_insn (targetm.gen_jump (label5));
4920               emit_barrier ();
4921               emit_label (label1);
4922               expand_dec (adjusted_op0, const1_rtx);
4923               emit_jump_insn (targetm.gen_jump (label4));
4924               emit_barrier ();
4925               emit_label (label2);
4926               do_cmp_and_jump (adjusted_op0, const0_rtx, LT,
4927                                compute_mode, label3);
4928               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4929                                   quotient, 0, OPTAB_LIB_WIDEN);
4930               if (tem != quotient)
4931                 emit_move_insn (quotient, tem);
4932               emit_jump_insn (targetm.gen_jump (label5));
4933               emit_barrier ();
4934               emit_label (label3);
4935               expand_inc (adjusted_op0, const1_rtx);
4936               emit_label (label4);
4937               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4938                                   quotient, 0, OPTAB_LIB_WIDEN);
4939               if (tem != quotient)
4940                 emit_move_insn (quotient, tem);
4941               expand_inc (quotient, const1_rtx);
4942               emit_label (label5);
4943             }
4944           }
4945         break;
4946
4947       case EXACT_DIV_EXPR:
4948         if (op1_is_constant && HWI_COMPUTABLE_MODE_P (compute_mode))
4949           {
4950             scalar_int_mode int_mode = as_a <scalar_int_mode> (compute_mode);
4951             int size = GET_MODE_BITSIZE (int_mode);
4952             HOST_WIDE_INT d = INTVAL (op1);
4953             unsigned HOST_WIDE_INT ml;
4954             int pre_shift;
4955             rtx t1;
4956
4957             pre_shift = ctz_or_zero (d);
4958             ml = invert_mod2n (d >> pre_shift, size);
4959             t1 = expand_shift (RSHIFT_EXPR, int_mode, op0,
4960                                pre_shift, NULL_RTX, unsignedp);
4961             quotient = expand_mult (int_mode, t1, gen_int_mode (ml, int_mode),
4962                                     NULL_RTX, 1);
4963
4964             insn = get_last_insn ();
4965             set_dst_reg_note (insn, REG_EQUAL,
4966                               gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
4967                                               int_mode, op0, op1),
4968                               quotient);
4969           }
4970         break;
4971
4972       case ROUND_DIV_EXPR:
4973       case ROUND_MOD_EXPR:
4974         if (unsignedp)
4975           {
4976             scalar_int_mode int_mode = as_a <scalar_int_mode> (compute_mode);
4977             rtx tem;
4978             rtx_code_label *label;
4979             label = gen_label_rtx ();
4980             quotient = gen_reg_rtx (int_mode);
4981             remainder = gen_reg_rtx (int_mode);
4982             if (expand_twoval_binop (udivmod_optab, op0, op1, quotient, remainder, 1) == 0)
4983               {
4984                 rtx tem;
4985                 quotient = expand_binop (int_mode, udiv_optab, op0, op1,
4986                                          quotient, 1, OPTAB_LIB_WIDEN);
4987                 tem = expand_mult (int_mode, quotient, op1, NULL_RTX, 1);
4988                 remainder = expand_binop (int_mode, sub_optab, op0, tem,
4989                                           remainder, 1, OPTAB_LIB_WIDEN);
4990               }
4991             tem = plus_constant (int_mode, op1, -1);
4992             tem = expand_shift (RSHIFT_EXPR, int_mode, tem, 1, NULL_RTX, 1);
4993             do_cmp_and_jump (remainder, tem, LEU, int_mode, label);
4994             expand_inc (quotient, const1_rtx);
4995             expand_dec (remainder, op1);
4996             emit_label (label);
4997           }
4998         else
4999           {
5000             scalar_int_mode int_mode = as_a <scalar_int_mode> (compute_mode);
5001             int size = GET_MODE_BITSIZE (int_mode);
5002             rtx abs_rem, abs_op1, tem, mask;
5003             rtx_code_label *label;
5004             label = gen_label_rtx ();
5005             quotient = gen_reg_rtx (int_mode);
5006             remainder = gen_reg_rtx (int_mode);
5007             if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient, remainder, 0) == 0)
5008               {
5009                 rtx tem;
5010                 quotient = expand_binop (int_mode, sdiv_optab, op0, op1,
5011                                          quotient, 0, OPTAB_LIB_WIDEN);
5012                 tem = expand_mult (int_mode, quotient, op1, NULL_RTX, 0);
5013                 remainder = expand_binop (int_mode, sub_optab, op0, tem,
5014                                           remainder, 0, OPTAB_LIB_WIDEN);
5015               }
5016             abs_rem = expand_abs (int_mode, remainder, NULL_RTX, 1, 0);
5017             abs_op1 = expand_abs (int_mode, op1, NULL_RTX, 1, 0);
5018             tem = expand_shift (LSHIFT_EXPR, int_mode, abs_rem,
5019                                 1, NULL_RTX, 1);
5020             do_cmp_and_jump (tem, abs_op1, LTU, int_mode, label);
5021             tem = expand_binop (int_mode, xor_optab, op0, op1,
5022                                 NULL_RTX, 0, OPTAB_WIDEN);
5023             mask = expand_shift (RSHIFT_EXPR, int_mode, tem,
5024                                  size - 1, NULL_RTX, 0);
5025             tem = expand_binop (int_mode, xor_optab, mask, const1_rtx,
5026                                 NULL_RTX, 0, OPTAB_WIDEN);
5027             tem = expand_binop (int_mode, sub_optab, tem, mask,
5028                                 NULL_RTX, 0, OPTAB_WIDEN);
5029             expand_inc (quotient, tem);
5030             tem = expand_binop (int_mode, xor_optab, mask, op1,
5031                                 NULL_RTX, 0, OPTAB_WIDEN);
5032             tem = expand_binop (int_mode, sub_optab, tem, mask,
5033                                 NULL_RTX, 0, OPTAB_WIDEN);
5034             expand_dec (remainder, tem);
5035             emit_label (label);
5036           }
5037         return gen_lowpart (mode, rem_flag ? remainder : quotient);
5038
5039       default:
5040         gcc_unreachable ();
5041       }
5042
5043   if (quotient == 0)
5044     {
5045       if (target && GET_MODE (target) != compute_mode)
5046         target = 0;
5047
5048       if (rem_flag)
5049         {
5050           /* Try to produce the remainder without producing the quotient.
5051              If we seem to have a divmod pattern that does not require widening,
5052              don't try widening here.  We should really have a WIDEN argument
5053              to expand_twoval_binop, since what we'd really like to do here is
5054              1) try a mod insn in compute_mode
5055              2) try a divmod insn in compute_mode
5056              3) try a div insn in compute_mode and multiply-subtract to get
5057                 remainder
5058              4) try the same things with widening allowed.  */
5059           remainder
5060             = sign_expand_binop (compute_mode, umod_optab, smod_optab,
5061                                  op0, op1, target,
5062                                  unsignedp,
5063                                  ((optab_handler (optab2, compute_mode)
5064                                    != CODE_FOR_nothing)
5065                                   ? OPTAB_DIRECT : OPTAB_WIDEN));
5066           if (remainder == 0)
5067             {
5068               /* No luck there.  Can we do remainder and divide at once
5069                  without a library call?  */
5070               remainder = gen_reg_rtx (compute_mode);
5071               if (! expand_twoval_binop ((unsignedp
5072                                           ? udivmod_optab
5073                                           : sdivmod_optab),
5074                                          op0, op1,
5075                                          NULL_RTX, remainder, unsignedp))
5076                 remainder = 0;
5077             }
5078
5079           if (remainder)
5080             return gen_lowpart (mode, remainder);
5081         }
5082
5083       /* Produce the quotient.  Try a quotient insn, but not a library call.
5084          If we have a divmod in this mode, use it in preference to widening
5085          the div (for this test we assume it will not fail). Note that optab2
5086          is set to the one of the two optabs that the call below will use.  */
5087       quotient
5088         = sign_expand_binop (compute_mode, udiv_optab, sdiv_optab,
5089                              op0, op1, rem_flag ? NULL_RTX : target,
5090                              unsignedp,
5091                              ((optab_handler (optab2, compute_mode)
5092                                != CODE_FOR_nothing)
5093                               ? OPTAB_DIRECT : OPTAB_WIDEN));
5094
5095       if (quotient == 0)
5096         {
5097           /* No luck there.  Try a quotient-and-remainder insn,
5098              keeping the quotient alone.  */
5099           quotient = gen_reg_rtx (compute_mode);
5100           if (! expand_twoval_binop (unsignedp ? udivmod_optab : sdivmod_optab,
5101                                      op0, op1,
5102                                      quotient, NULL_RTX, unsignedp))
5103             {
5104               quotient = 0;
5105               if (! rem_flag)
5106                 /* Still no luck.  If we are not computing the remainder,
5107                    use a library call for the quotient.  */
5108                 quotient = sign_expand_binop (compute_mode,
5109                                               udiv_optab, sdiv_optab,
5110                                               op0, op1, target,
5111                                               unsignedp, OPTAB_LIB_WIDEN);
5112             }
5113         }
5114     }
5115
5116   if (rem_flag)
5117     {
5118       if (target && GET_MODE (target) != compute_mode)
5119         target = 0;
5120
5121       if (quotient == 0)
5122         {
5123           /* No divide instruction either.  Use library for remainder.  */
5124           remainder = sign_expand_binop (compute_mode, umod_optab, smod_optab,
5125                                          op0, op1, target,
5126                                          unsignedp, OPTAB_LIB_WIDEN);
5127           /* No remainder function.  Try a quotient-and-remainder
5128              function, keeping the remainder.  */
5129           if (!remainder)
5130             {
5131               remainder = gen_reg_rtx (compute_mode);
5132               if (!expand_twoval_binop_libfunc
5133                   (unsignedp ? udivmod_optab : sdivmod_optab,
5134                    op0, op1,
5135                    NULL_RTX, remainder,
5136                    unsignedp ? UMOD : MOD))
5137                 remainder = NULL_RTX;
5138             }
5139         }
5140       else
5141         {
5142           /* We divided.  Now finish doing X - Y * (X / Y).  */
5143           remainder = expand_mult (compute_mode, quotient, op1,
5144                                    NULL_RTX, unsignedp);
5145           remainder = expand_binop (compute_mode, sub_optab, op0,
5146                                     remainder, target, unsignedp,
5147                                     OPTAB_LIB_WIDEN);
5148         }
5149     }
5150
5151   return gen_lowpart (mode, rem_flag ? remainder : quotient);
5152 }
5153 \f
5154 /* Return a tree node with data type TYPE, describing the value of X.
5155    Usually this is an VAR_DECL, if there is no obvious better choice.
5156    X may be an expression, however we only support those expressions
5157    generated by loop.c.  */
5158
5159 tree
5160 make_tree (tree type, rtx x)
5161 {
5162   tree t;
5163
5164   switch (GET_CODE (x))
5165     {
5166     case CONST_INT:
5167     case CONST_WIDE_INT:
5168       t = wide_int_to_tree (type, rtx_mode_t (x, TYPE_MODE (type)));
5169       return t;
5170
5171     case CONST_DOUBLE:
5172       STATIC_ASSERT (HOST_BITS_PER_WIDE_INT * 2 <= MAX_BITSIZE_MODE_ANY_INT);
5173       if (TARGET_SUPPORTS_WIDE_INT == 0 && GET_MODE (x) == VOIDmode)
5174         t = wide_int_to_tree (type,
5175                               wide_int::from_array (&CONST_DOUBLE_LOW (x), 2,
5176                                                     HOST_BITS_PER_WIDE_INT * 2));
5177       else
5178         t = build_real (type, *CONST_DOUBLE_REAL_VALUE (x));
5179
5180       return t;
5181
5182     case CONST_VECTOR:
5183       {
5184         int units = CONST_VECTOR_NUNITS (x);
5185         tree itype = TREE_TYPE (type);
5186         tree *elts;
5187         int i;
5188
5189         /* Build a tree with vector elements.  */
5190         elts = XALLOCAVEC (tree, units);
5191         for (i = units - 1; i >= 0; --i)
5192           {
5193             rtx elt = CONST_VECTOR_ELT (x, i);
5194             elts[i] = make_tree (itype, elt);
5195           }
5196
5197         return build_vector (type, elts);
5198       }
5199
5200     case PLUS:
5201       return fold_build2 (PLUS_EXPR, type, make_tree (type, XEXP (x, 0)),
5202                           make_tree (type, XEXP (x, 1)));
5203
5204     case MINUS:
5205       return fold_build2 (MINUS_EXPR, type, make_tree (type, XEXP (x, 0)),
5206                           make_tree (type, XEXP (x, 1)));
5207
5208     case NEG:
5209       return fold_build1 (NEGATE_EXPR, type, make_tree (type, XEXP (x, 0)));
5210
5211     case MULT:
5212       return fold_build2 (MULT_EXPR, type, make_tree (type, XEXP (x, 0)),
5213                           make_tree (type, XEXP (x, 1)));
5214
5215     case ASHIFT:
5216       return fold_build2 (LSHIFT_EXPR, type, make_tree (type, XEXP (x, 0)),
5217                           make_tree (type, XEXP (x, 1)));
5218
5219     case LSHIFTRT:
5220       t = unsigned_type_for (type);
5221       return fold_convert (type, build2 (RSHIFT_EXPR, t,
5222                                          make_tree (t, XEXP (x, 0)),
5223                                          make_tree (type, XEXP (x, 1))));
5224
5225     case ASHIFTRT:
5226       t = signed_type_for (type);
5227       return fold_convert (type, build2 (RSHIFT_EXPR, t,
5228                                          make_tree (t, XEXP (x, 0)),
5229                                          make_tree (type, XEXP (x, 1))));
5230
5231     case DIV:
5232       if (TREE_CODE (type) != REAL_TYPE)
5233         t = signed_type_for (type);
5234       else
5235         t = type;
5236
5237       return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5238                                          make_tree (t, XEXP (x, 0)),
5239                                          make_tree (t, XEXP (x, 1))));
5240     case UDIV:
5241       t = unsigned_type_for (type);
5242       return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5243                                          make_tree (t, XEXP (x, 0)),
5244                                          make_tree (t, XEXP (x, 1))));
5245
5246     case SIGN_EXTEND:
5247     case ZERO_EXTEND:
5248       t = lang_hooks.types.type_for_mode (GET_MODE (XEXP (x, 0)),
5249                                           GET_CODE (x) == ZERO_EXTEND);
5250       return fold_convert (type, make_tree (t, XEXP (x, 0)));
5251
5252     case CONST:
5253       return make_tree (type, XEXP (x, 0));
5254
5255     case SYMBOL_REF:
5256       t = SYMBOL_REF_DECL (x);
5257       if (t)
5258         return fold_convert (type, build_fold_addr_expr (t));
5259       /* fall through.  */
5260
5261     default:
5262       t = build_decl (RTL_LOCATION (x), VAR_DECL, NULL_TREE, type);
5263
5264       /* If TYPE is a POINTER_TYPE, we might need to convert X from
5265          address mode to pointer mode.  */
5266       if (POINTER_TYPE_P (type))
5267         x = convert_memory_address_addr_space
5268           (SCALAR_INT_TYPE_MODE (type), x, TYPE_ADDR_SPACE (TREE_TYPE (type)));
5269
5270       /* Note that we do *not* use SET_DECL_RTL here, because we do not
5271          want set_decl_rtl to go adjusting REG_ATTRS for this temporary.  */
5272       t->decl_with_rtl.rtl = x;
5273
5274       return t;
5275     }
5276 }
5277 \f
5278 /* Compute the logical-and of OP0 and OP1, storing it in TARGET
5279    and returning TARGET.
5280
5281    If TARGET is 0, a pseudo-register or constant is returned.  */
5282
5283 rtx
5284 expand_and (machine_mode mode, rtx op0, rtx op1, rtx target)
5285 {
5286   rtx tem = 0;
5287
5288   if (GET_MODE (op0) == VOIDmode && GET_MODE (op1) == VOIDmode)
5289     tem = simplify_binary_operation (AND, mode, op0, op1);
5290   if (tem == 0)
5291     tem = expand_binop (mode, and_optab, op0, op1, target, 0, OPTAB_LIB_WIDEN);
5292
5293   if (target == 0)
5294     target = tem;
5295   else if (tem != target)
5296     emit_move_insn (target, tem);
5297   return target;
5298 }
5299
5300 /* Helper function for emit_store_flag.  */
5301 rtx
5302 emit_cstore (rtx target, enum insn_code icode, enum rtx_code code,
5303              machine_mode mode, machine_mode compare_mode,
5304              int unsignedp, rtx x, rtx y, int normalizep,
5305              machine_mode target_mode)
5306 {
5307   struct expand_operand ops[4];
5308   rtx op0, comparison, subtarget;
5309   rtx_insn *last;
5310   scalar_int_mode result_mode = targetm.cstore_mode (icode);
5311   scalar_int_mode int_target_mode;
5312
5313   last = get_last_insn ();
5314   x = prepare_operand (icode, x, 2, mode, compare_mode, unsignedp);
5315   y = prepare_operand (icode, y, 3, mode, compare_mode, unsignedp);
5316   if (!x || !y)
5317     {
5318       delete_insns_since (last);
5319       return NULL_RTX;
5320     }
5321
5322   if (target_mode == VOIDmode)
5323     int_target_mode = result_mode;
5324   else
5325     int_target_mode = as_a <scalar_int_mode> (target_mode);
5326   if (!target)
5327     target = gen_reg_rtx (int_target_mode);
5328
5329   comparison = gen_rtx_fmt_ee (code, result_mode, x, y);
5330
5331   create_output_operand (&ops[0], optimize ? NULL_RTX : target, result_mode);
5332   create_fixed_operand (&ops[1], comparison);
5333   create_fixed_operand (&ops[2], x);
5334   create_fixed_operand (&ops[3], y);
5335   if (!maybe_expand_insn (icode, 4, ops))
5336     {
5337       delete_insns_since (last);
5338       return NULL_RTX;
5339     }
5340   subtarget = ops[0].value;
5341
5342   /* If we are converting to a wider mode, first convert to
5343      INT_TARGET_MODE, then normalize.  This produces better combining
5344      opportunities on machines that have a SIGN_EXTRACT when we are
5345      testing a single bit.  This mostly benefits the 68k.
5346
5347      If STORE_FLAG_VALUE does not have the sign bit set when
5348      interpreted in MODE, we can do this conversion as unsigned, which
5349      is usually more efficient.  */
5350   if (GET_MODE_SIZE (int_target_mode) > GET_MODE_SIZE (result_mode))
5351     {
5352       convert_move (target, subtarget,
5353                     val_signbit_known_clear_p (result_mode,
5354                                                STORE_FLAG_VALUE));
5355       op0 = target;
5356       result_mode = int_target_mode;
5357     }
5358   else
5359     op0 = subtarget;
5360
5361   /* If we want to keep subexpressions around, don't reuse our last
5362      target.  */
5363   if (optimize)
5364     subtarget = 0;
5365
5366   /* Now normalize to the proper value in MODE.  Sometimes we don't
5367      have to do anything.  */
5368   if (normalizep == 0 || normalizep == STORE_FLAG_VALUE)
5369     ;
5370   /* STORE_FLAG_VALUE might be the most negative number, so write
5371      the comparison this way to avoid a compiler-time warning.  */
5372   else if (- normalizep == STORE_FLAG_VALUE)
5373     op0 = expand_unop (result_mode, neg_optab, op0, subtarget, 0);
5374
5375   /* We don't want to use STORE_FLAG_VALUE < 0 below since this makes
5376      it hard to use a value of just the sign bit due to ANSI integer
5377      constant typing rules.  */
5378   else if (val_signbit_known_set_p (result_mode, STORE_FLAG_VALUE))
5379     op0 = expand_shift (RSHIFT_EXPR, result_mode, op0,
5380                         GET_MODE_BITSIZE (result_mode) - 1, subtarget,
5381                         normalizep == 1);
5382   else
5383     {
5384       gcc_assert (STORE_FLAG_VALUE & 1);
5385
5386       op0 = expand_and (result_mode, op0, const1_rtx, subtarget);
5387       if (normalizep == -1)
5388         op0 = expand_unop (result_mode, neg_optab, op0, op0, 0);
5389     }
5390
5391   /* If we were converting to a smaller mode, do the conversion now.  */
5392   if (int_target_mode != result_mode)
5393     {
5394       convert_move (target, op0, 0);
5395       return target;
5396     }
5397   else
5398     return op0;
5399 }
5400
5401
5402 /* A subroutine of emit_store_flag only including "tricks" that do not
5403    need a recursive call.  These are kept separate to avoid infinite
5404    loops.  */
5405
5406 static rtx
5407 emit_store_flag_1 (rtx target, enum rtx_code code, rtx op0, rtx op1,
5408                    machine_mode mode, int unsignedp, int normalizep,
5409                    machine_mode target_mode)
5410 {
5411   rtx subtarget;
5412   enum insn_code icode;
5413   machine_mode compare_mode;
5414   enum mode_class mclass;
5415   enum rtx_code scode;
5416
5417   if (unsignedp)
5418     code = unsigned_condition (code);
5419   scode = swap_condition (code);
5420
5421   /* If one operand is constant, make it the second one.  Only do this
5422      if the other operand is not constant as well.  */
5423
5424   if (swap_commutative_operands_p (op0, op1))
5425     {
5426       std::swap (op0, op1);
5427       code = swap_condition (code);
5428     }
5429
5430   if (mode == VOIDmode)
5431     mode = GET_MODE (op0);
5432
5433   /* For some comparisons with 1 and -1, we can convert this to
5434      comparisons with zero.  This will often produce more opportunities for
5435      store-flag insns.  */
5436
5437   switch (code)
5438     {
5439     case LT:
5440       if (op1 == const1_rtx)
5441         op1 = const0_rtx, code = LE;
5442       break;
5443     case LE:
5444       if (op1 == constm1_rtx)
5445         op1 = const0_rtx, code = LT;
5446       break;
5447     case GE:
5448       if (op1 == const1_rtx)
5449         op1 = const0_rtx, code = GT;
5450       break;
5451     case GT:
5452       if (op1 == constm1_rtx)
5453         op1 = const0_rtx, code = GE;
5454       break;
5455     case GEU:
5456       if (op1 == const1_rtx)
5457         op1 = const0_rtx, code = NE;
5458       break;
5459     case LTU:
5460       if (op1 == const1_rtx)
5461         op1 = const0_rtx, code = EQ;
5462       break;
5463     default:
5464       break;
5465     }
5466
5467   /* If we are comparing a double-word integer with zero or -1, we can
5468      convert the comparison into one involving a single word.  */
5469   scalar_int_mode int_mode;
5470   if (is_int_mode (mode, &int_mode)
5471       && GET_MODE_BITSIZE (int_mode) == BITS_PER_WORD * 2
5472       && (!MEM_P (op0) || ! MEM_VOLATILE_P (op0)))
5473     {
5474       rtx tem;
5475       if ((code == EQ || code == NE)
5476           && (op1 == const0_rtx || op1 == constm1_rtx))
5477         {
5478           rtx op00, op01;
5479
5480           /* Do a logical OR or AND of the two words and compare the
5481              result.  */
5482           op00 = simplify_gen_subreg (word_mode, op0, int_mode, 0);
5483           op01 = simplify_gen_subreg (word_mode, op0, int_mode, UNITS_PER_WORD);
5484           tem = expand_binop (word_mode,
5485                               op1 == const0_rtx ? ior_optab : and_optab,
5486                               op00, op01, NULL_RTX, unsignedp,
5487                               OPTAB_DIRECT);
5488
5489           if (tem != 0)
5490             tem = emit_store_flag (NULL_RTX, code, tem, op1, word_mode,
5491                                    unsignedp, normalizep);
5492         }
5493       else if ((code == LT || code == GE) && op1 == const0_rtx)
5494         {
5495           rtx op0h;
5496
5497           /* If testing the sign bit, can just test on high word.  */
5498           op0h = simplify_gen_subreg (word_mode, op0, int_mode,
5499                                       subreg_highpart_offset (word_mode,
5500                                                               int_mode));
5501           tem = emit_store_flag (NULL_RTX, code, op0h, op1, word_mode,
5502                                  unsignedp, normalizep);
5503         }
5504       else
5505         tem = NULL_RTX;
5506
5507       if (tem)
5508         {
5509           if (target_mode == VOIDmode || GET_MODE (tem) == target_mode)
5510             return tem;
5511           if (!target)
5512             target = gen_reg_rtx (target_mode);
5513
5514           convert_move (target, tem,
5515                         !val_signbit_known_set_p (word_mode,
5516                                                   (normalizep ? normalizep
5517                                                    : STORE_FLAG_VALUE)));
5518           return target;
5519         }
5520     }
5521
5522   /* If this is A < 0 or A >= 0, we can do this by taking the ones
5523      complement of A (for GE) and shifting the sign bit to the low bit.  */
5524   if (op1 == const0_rtx && (code == LT || code == GE)
5525       && is_int_mode (mode, &int_mode)
5526       && (normalizep || STORE_FLAG_VALUE == 1
5527           || val_signbit_p (int_mode, STORE_FLAG_VALUE)))
5528     {
5529       scalar_int_mode int_target_mode;
5530       subtarget = target;
5531
5532       if (!target)
5533         int_target_mode = int_mode;
5534       else
5535         {
5536           /* If the result is to be wider than OP0, it is best to convert it
5537              first.  If it is to be narrower, it is *incorrect* to convert it
5538              first.  */
5539           int_target_mode = as_a <scalar_int_mode> (target_mode);
5540           if (GET_MODE_SIZE (int_target_mode) > GET_MODE_SIZE (int_mode))
5541             {
5542               op0 = convert_modes (int_target_mode, int_mode, op0, 0);
5543               int_mode = int_target_mode;
5544             }
5545         }
5546
5547       if (int_target_mode != int_mode)
5548         subtarget = 0;
5549
5550       if (code == GE)
5551         op0 = expand_unop (int_mode, one_cmpl_optab, op0,
5552                            ((STORE_FLAG_VALUE == 1 || normalizep)
5553                             ? 0 : subtarget), 0);
5554
5555       if (STORE_FLAG_VALUE == 1 || normalizep)
5556         /* If we are supposed to produce a 0/1 value, we want to do
5557            a logical shift from the sign bit to the low-order bit; for
5558            a -1/0 value, we do an arithmetic shift.  */
5559         op0 = expand_shift (RSHIFT_EXPR, int_mode, op0,
5560                             GET_MODE_BITSIZE (int_mode) - 1,
5561                             subtarget, normalizep != -1);
5562
5563       if (int_mode != int_target_mode)
5564         op0 = convert_modes (int_target_mode, int_mode, op0, 0);
5565
5566       return op0;
5567     }
5568
5569   mclass = GET_MODE_CLASS (mode);
5570   FOR_EACH_MODE_FROM (compare_mode, mode)
5571     {
5572      machine_mode optab_mode = mclass == MODE_CC ? CCmode : compare_mode;
5573      icode = optab_handler (cstore_optab, optab_mode);
5574      if (icode != CODE_FOR_nothing)
5575         {
5576           do_pending_stack_adjust ();
5577           rtx tem = emit_cstore (target, icode, code, mode, compare_mode,
5578                                  unsignedp, op0, op1, normalizep, target_mode);
5579           if (tem)
5580             return tem;
5581
5582           if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5583             {
5584               tem = emit_cstore (target, icode, scode, mode, compare_mode,
5585                                  unsignedp, op1, op0, normalizep, target_mode);
5586               if (tem)
5587                 return tem;
5588             }
5589           break;
5590         }
5591     }
5592
5593   return 0;
5594 }
5595
5596 /* Subroutine of emit_store_flag that handles cases in which the operands
5597    are scalar integers.  SUBTARGET is the target to use for temporary
5598    operations and TRUEVAL is the value to store when the condition is
5599    true.  All other arguments are as for emit_store_flag.  */
5600
5601 rtx
5602 emit_store_flag_int (rtx target, rtx subtarget, enum rtx_code code, rtx op0,
5603                      rtx op1, scalar_int_mode mode, int unsignedp,
5604                      int normalizep, rtx trueval)
5605 {
5606   machine_mode target_mode = target ? GET_MODE (target) : VOIDmode;
5607   rtx_insn *last = get_last_insn ();
5608   rtx tem;
5609
5610   /* If this is an equality comparison of integers, we can try to exclusive-or
5611      (or subtract) the two operands and use a recursive call to try the
5612      comparison with zero.  Don't do any of these cases if branches are
5613      very cheap.  */
5614
5615   if ((code == EQ || code == NE) && op1 != const0_rtx)
5616     {
5617       tem = expand_binop (mode, xor_optab, op0, op1, subtarget, 1,
5618                           OPTAB_WIDEN);
5619
5620       if (tem == 0)
5621         tem = expand_binop (mode, sub_optab, op0, op1, subtarget, 1,
5622                             OPTAB_WIDEN);
5623       if (tem != 0)
5624         tem = emit_store_flag (target, code, tem, const0_rtx,
5625                                mode, unsignedp, normalizep);
5626       if (tem != 0)
5627         return tem;
5628
5629       delete_insns_since (last);
5630     }
5631
5632   /* For integer comparisons, try the reverse comparison.  However, for
5633      small X and if we'd have anyway to extend, implementing "X != 0"
5634      as "-(int)X >> 31" is still cheaper than inverting "(int)X == 0".  */
5635   rtx_code rcode = reverse_condition (code);
5636   if (can_compare_p (rcode, mode, ccp_store_flag)
5637       && ! (optab_handler (cstore_optab, mode) == CODE_FOR_nothing
5638             && code == NE
5639             && GET_MODE_SIZE (mode) < UNITS_PER_WORD
5640             && op1 == const0_rtx))
5641     {
5642       int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5643                       || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5644
5645       /* Again, for the reverse comparison, use either an addition or a XOR.  */
5646       if (want_add
5647           && rtx_cost (GEN_INT (normalizep), mode, PLUS, 1,
5648                        optimize_insn_for_speed_p ()) == 0)
5649         {
5650           tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5651                                    STORE_FLAG_VALUE, target_mode);
5652           if (tem != 0)
5653             tem = expand_binop (target_mode, add_optab, tem,
5654                                 gen_int_mode (normalizep, target_mode),
5655                                 target, 0, OPTAB_WIDEN);
5656         }
5657       else if (!want_add
5658                && rtx_cost (trueval, mode, XOR, 1,
5659                             optimize_insn_for_speed_p ()) == 0)
5660         {
5661           tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5662                                    normalizep, target_mode);
5663           if (tem != 0)
5664             tem = expand_binop (target_mode, xor_optab, tem, trueval, target,
5665                                 INTVAL (trueval) >= 0, OPTAB_WIDEN);
5666         }
5667
5668       if (tem != 0)
5669         return tem;
5670       delete_insns_since (last);
5671     }
5672
5673   /* Some other cases we can do are EQ, NE, LE, and GT comparisons with
5674      the constant zero.  Reject all other comparisons at this point.  Only
5675      do LE and GT if branches are expensive since they are expensive on
5676      2-operand machines.  */
5677
5678   if (op1 != const0_rtx
5679       || (code != EQ && code != NE
5680           && (BRANCH_COST (optimize_insn_for_speed_p (),
5681                            false) <= 1 || (code != LE && code != GT))))
5682     return 0;
5683
5684   /* Try to put the result of the comparison in the sign bit.  Assume we can't
5685      do the necessary operation below.  */
5686
5687   tem = 0;
5688
5689   /* To see if A <= 0, compute (A | (A - 1)).  A <= 0 iff that result has
5690      the sign bit set.  */
5691
5692   if (code == LE)
5693     {
5694       /* This is destructive, so SUBTARGET can't be OP0.  */
5695       if (rtx_equal_p (subtarget, op0))
5696         subtarget = 0;
5697
5698       tem = expand_binop (mode, sub_optab, op0, const1_rtx, subtarget, 0,
5699                           OPTAB_WIDEN);
5700       if (tem)
5701         tem = expand_binop (mode, ior_optab, op0, tem, subtarget, 0,
5702                             OPTAB_WIDEN);
5703     }
5704
5705   /* To see if A > 0, compute (((signed) A) << BITS) - A, where BITS is the
5706      number of bits in the mode of OP0, minus one.  */
5707
5708   if (code == GT)
5709     {
5710       if (rtx_equal_p (subtarget, op0))
5711         subtarget = 0;
5712
5713       tem = maybe_expand_shift (RSHIFT_EXPR, mode, op0,
5714                                 GET_MODE_BITSIZE (mode) - 1,
5715                                 subtarget, 0);
5716       if (tem)
5717         tem = expand_binop (mode, sub_optab, tem, op0, subtarget, 0,
5718                             OPTAB_WIDEN);
5719     }
5720
5721   if (code == EQ || code == NE)
5722     {
5723       /* For EQ or NE, one way to do the comparison is to apply an operation
5724          that converts the operand into a positive number if it is nonzero
5725          or zero if it was originally zero.  Then, for EQ, we subtract 1 and
5726          for NE we negate.  This puts the result in the sign bit.  Then we
5727          normalize with a shift, if needed.
5728
5729          Two operations that can do the above actions are ABS and FFS, so try
5730          them.  If that doesn't work, and MODE is smaller than a full word,
5731          we can use zero-extension to the wider mode (an unsigned conversion)
5732          as the operation.  */
5733
5734       /* Note that ABS doesn't yield a positive number for INT_MIN, but
5735          that is compensated by the subsequent overflow when subtracting
5736          one / negating.  */
5737
5738       if (optab_handler (abs_optab, mode) != CODE_FOR_nothing)
5739         tem = expand_unop (mode, abs_optab, op0, subtarget, 1);
5740       else if (optab_handler (ffs_optab, mode) != CODE_FOR_nothing)
5741         tem = expand_unop (mode, ffs_optab, op0, subtarget, 1);
5742       else if (GET_MODE_SIZE (mode) < UNITS_PER_WORD)
5743         {
5744           tem = convert_modes (word_mode, mode, op0, 1);
5745           mode = word_mode;
5746         }
5747
5748       if (tem != 0)
5749         {
5750           if (code == EQ)
5751             tem = expand_binop (mode, sub_optab, tem, const1_rtx, subtarget,
5752                                 0, OPTAB_WIDEN);
5753           else
5754             tem = expand_unop (mode, neg_optab, tem, subtarget, 0);
5755         }
5756
5757       /* If we couldn't do it that way, for NE we can "or" the two's complement
5758          of the value with itself.  For EQ, we take the one's complement of
5759          that "or", which is an extra insn, so we only handle EQ if branches
5760          are expensive.  */
5761
5762       if (tem == 0
5763           && (code == NE
5764               || BRANCH_COST (optimize_insn_for_speed_p (),
5765                               false) > 1))
5766         {
5767           if (rtx_equal_p (subtarget, op0))
5768             subtarget = 0;
5769
5770           tem = expand_unop (mode, neg_optab, op0, subtarget, 0);
5771           tem = expand_binop (mode, ior_optab, tem, op0, subtarget, 0,
5772                               OPTAB_WIDEN);
5773
5774           if (tem && code == EQ)
5775             tem = expand_unop (mode, one_cmpl_optab, tem, subtarget, 0);
5776         }
5777     }
5778
5779   if (tem && normalizep)
5780     tem = maybe_expand_shift (RSHIFT_EXPR, mode, tem,
5781                               GET_MODE_BITSIZE (mode) - 1,
5782                               subtarget, normalizep == 1);
5783
5784   if (tem)
5785     {
5786       if (!target)
5787         ;
5788       else if (GET_MODE (tem) != target_mode)
5789         {
5790           convert_move (target, tem, 0);
5791           tem = target;
5792         }
5793       else if (!subtarget)
5794         {
5795           emit_move_insn (target, tem);
5796           tem = target;
5797         }
5798     }
5799   else
5800     delete_insns_since (last);
5801
5802   return tem;
5803 }
5804
5805 /* Emit a store-flags instruction for comparison CODE on OP0 and OP1
5806    and storing in TARGET.  Normally return TARGET.
5807    Return 0 if that cannot be done.
5808
5809    MODE is the mode to use for OP0 and OP1 should they be CONST_INTs.  If
5810    it is VOIDmode, they cannot both be CONST_INT.
5811
5812    UNSIGNEDP is for the case where we have to widen the operands
5813    to perform the operation.  It says to use zero-extension.
5814
5815    NORMALIZEP is 1 if we should convert the result to be either zero
5816    or one.  Normalize is -1 if we should convert the result to be
5817    either zero or -1.  If NORMALIZEP is zero, the result will be left
5818    "raw" out of the scc insn.  */
5819
5820 rtx
5821 emit_store_flag (rtx target, enum rtx_code code, rtx op0, rtx op1,
5822                  machine_mode mode, int unsignedp, int normalizep)
5823 {
5824   machine_mode target_mode = target ? GET_MODE (target) : VOIDmode;
5825   enum rtx_code rcode;
5826   rtx subtarget;
5827   rtx tem, trueval;
5828   rtx_insn *last;
5829
5830   /* If we compare constants, we shouldn't use a store-flag operation,
5831      but a constant load.  We can get there via the vanilla route that
5832      usually generates a compare-branch sequence, but will in this case
5833      fold the comparison to a constant, and thus elide the branch.  */
5834   if (CONSTANT_P (op0) && CONSTANT_P (op1))
5835     return NULL_RTX;
5836
5837   tem = emit_store_flag_1 (target, code, op0, op1, mode, unsignedp, normalizep,
5838                            target_mode);
5839   if (tem)
5840     return tem;
5841
5842   /* If we reached here, we can't do this with a scc insn, however there
5843      are some comparisons that can be done in other ways.  Don't do any
5844      of these cases if branches are very cheap.  */
5845   if (BRANCH_COST (optimize_insn_for_speed_p (), false) == 0)
5846     return 0;
5847
5848   /* See what we need to return.  We can only return a 1, -1, or the
5849      sign bit.  */
5850
5851   if (normalizep == 0)
5852     {
5853       if (STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1)
5854         normalizep = STORE_FLAG_VALUE;
5855
5856       else if (val_signbit_p (mode, STORE_FLAG_VALUE))
5857         ;
5858       else
5859         return 0;
5860     }
5861
5862   last = get_last_insn ();
5863
5864   /* If optimizing, use different pseudo registers for each insn, instead
5865      of reusing the same pseudo.  This leads to better CSE, but slows
5866      down the compiler, since there are more pseudos.  */
5867   subtarget = (!optimize
5868                && (target_mode == mode)) ? target : NULL_RTX;
5869   trueval = GEN_INT (normalizep ? normalizep : STORE_FLAG_VALUE);
5870
5871   /* For floating-point comparisons, try the reverse comparison or try
5872      changing the "orderedness" of the comparison.  */
5873   if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5874     {
5875       enum rtx_code first_code;
5876       bool and_them;
5877
5878       rcode = reverse_condition_maybe_unordered (code);
5879       if (can_compare_p (rcode, mode, ccp_store_flag)
5880           && (code == ORDERED || code == UNORDERED
5881               || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
5882               || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
5883         {
5884           int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5885                           || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5886
5887           /* For the reverse comparison, use either an addition or a XOR.  */
5888           if (want_add
5889               && rtx_cost (GEN_INT (normalizep), mode, PLUS, 1,
5890                            optimize_insn_for_speed_p ()) == 0)
5891             {
5892               tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5893                                        STORE_FLAG_VALUE, target_mode);
5894               if (tem)
5895                 return expand_binop (target_mode, add_optab, tem,
5896                                      gen_int_mode (normalizep, target_mode),
5897                                      target, 0, OPTAB_WIDEN);
5898             }
5899           else if (!want_add
5900                    && rtx_cost (trueval, mode, XOR, 1,
5901                                 optimize_insn_for_speed_p ()) == 0)
5902             {
5903               tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5904                                        normalizep, target_mode);
5905               if (tem)
5906                 return expand_binop (target_mode, xor_optab, tem, trueval,
5907                                      target, INTVAL (trueval) >= 0,
5908                                      OPTAB_WIDEN);
5909             }
5910         }
5911
5912       delete_insns_since (last);
5913
5914       /* Cannot split ORDERED and UNORDERED, only try the above trick.  */
5915       if (code == ORDERED || code == UNORDERED)
5916         return 0;
5917
5918       and_them = split_comparison (code, mode, &first_code, &code);
5919
5920       /* If there are no NaNs, the first comparison should always fall through.
5921          Effectively change the comparison to the other one.  */
5922       if (!HONOR_NANS (mode))
5923         {
5924           gcc_assert (first_code == (and_them ? ORDERED : UNORDERED));
5925           return emit_store_flag_1 (target, code, op0, op1, mode, 0, normalizep,
5926                                     target_mode);
5927         }
5928
5929       if (!HAVE_conditional_move)
5930         return 0;
5931
5932       /* Try using a setcc instruction for ORDERED/UNORDERED, followed by a
5933          conditional move.  */
5934       tem = emit_store_flag_1 (subtarget, first_code, op0, op1, mode, 0,
5935                                normalizep, target_mode);
5936       if (tem == 0)
5937         return 0;
5938
5939       if (and_them)
5940         tem = emit_conditional_move (target, code, op0, op1, mode,
5941                                      tem, const0_rtx, GET_MODE (tem), 0);
5942       else
5943         tem = emit_conditional_move (target, code, op0, op1, mode,
5944                                      trueval, tem, GET_MODE (tem), 0);
5945
5946       if (tem == 0)
5947         delete_insns_since (last);
5948       return tem;
5949     }
5950
5951   /* The remaining tricks only apply to integer comparisons.  */
5952
5953   scalar_int_mode int_mode;
5954   if (is_int_mode (mode, &int_mode))
5955     return emit_store_flag_int (target, subtarget, code, op0, op1, int_mode,
5956                                 unsignedp, normalizep, trueval);
5957
5958   return 0;
5959 }
5960
5961 /* Like emit_store_flag, but always succeeds.  */
5962
5963 rtx
5964 emit_store_flag_force (rtx target, enum rtx_code code, rtx op0, rtx op1,
5965                        machine_mode mode, int unsignedp, int normalizep)
5966 {
5967   rtx tem;
5968   rtx_code_label *label;
5969   rtx trueval, falseval;
5970
5971   /* First see if emit_store_flag can do the job.  */
5972   tem = emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep);
5973   if (tem != 0)
5974     return tem;
5975
5976   if (!target)
5977     target = gen_reg_rtx (word_mode);
5978
5979   /* If this failed, we have to do this with set/compare/jump/set code.
5980      For foo != 0, if foo is in OP0, just replace it with 1 if nonzero.  */
5981   trueval = normalizep ? GEN_INT (normalizep) : const1_rtx;
5982   if (code == NE
5983       && GET_MODE_CLASS (mode) == MODE_INT
5984       && REG_P (target)
5985       && op0 == target
5986       && op1 == const0_rtx)
5987     {
5988       label = gen_label_rtx ();
5989       do_compare_rtx_and_jump (target, const0_rtx, EQ, unsignedp, mode,
5990                                NULL_RTX, NULL, label,
5991                                profile_probability::uninitialized ());
5992       emit_move_insn (target, trueval);
5993       emit_label (label);
5994       return target;
5995     }
5996
5997   if (!REG_P (target)
5998       || reg_mentioned_p (target, op0) || reg_mentioned_p (target, op1))
5999     target = gen_reg_rtx (GET_MODE (target));
6000
6001   /* Jump in the right direction if the target cannot implement CODE
6002      but can jump on its reverse condition.  */
6003   falseval = const0_rtx;
6004   if (! can_compare_p (code, mode, ccp_jump)
6005       && (! FLOAT_MODE_P (mode)
6006           || code == ORDERED || code == UNORDERED
6007           || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
6008           || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
6009     {
6010       enum rtx_code rcode;
6011       if (FLOAT_MODE_P (mode))
6012         rcode = reverse_condition_maybe_unordered (code);
6013       else
6014         rcode = reverse_condition (code);
6015
6016       /* Canonicalize to UNORDERED for the libcall.  */
6017       if (can_compare_p (rcode, mode, ccp_jump)
6018           || (code == ORDERED && ! can_compare_p (ORDERED, mode, ccp_jump)))
6019         {
6020           falseval = trueval;
6021           trueval = const0_rtx;
6022           code = rcode;
6023         }
6024     }
6025
6026   emit_move_insn (target, trueval);
6027   label = gen_label_rtx ();
6028   do_compare_rtx_and_jump (op0, op1, code, unsignedp, mode, NULL_RTX, NULL,
6029                            label, profile_probability::uninitialized ());
6030
6031   emit_move_insn (target, falseval);
6032   emit_label (label);
6033
6034   return target;
6035 }
6036 \f
6037 /* Perform possibly multi-word comparison and conditional jump to LABEL
6038    if ARG1 OP ARG2 true where ARG1 and ARG2 are of mode MODE.  This is
6039    now a thin wrapper around do_compare_rtx_and_jump.  */
6040
6041 static void
6042 do_cmp_and_jump (rtx arg1, rtx arg2, enum rtx_code op, machine_mode mode,
6043                  rtx_code_label *label)
6044 {
6045   int unsignedp = (op == LTU || op == LEU || op == GTU || op == GEU);
6046   do_compare_rtx_and_jump (arg1, arg2, op, unsignedp, mode, NULL_RTX,
6047                            NULL, label, profile_probability::uninitialized ());
6048 }