gcc/expmed.c

   1 /* Medium-level subroutines: convert bit-field store and extract
   2    and shifts, multiplies and divides to rtl instructions.
   3    Copyright (C) 1987-2017 Free Software Foundation, Inc.
   4
   5 This file is part of GCC.
   6
   7 GCC is free software; you can redistribute it and/or modify it under
   8 the terms of the GNU General Public License as published by the Free
   9 Software Foundation; either version 3, or (at your option) any later
  10 version.
  11
  12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  15 for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GCC; see the file COPYING3.  If not see
  19 <http://www.gnu.org/licenses/>.  */
  20
  21
  22 #include "config.h"
  23 #include "system.h"
  24 #include "coretypes.h"
  25 #include "backend.h"
  26 #include "target.h"
  27 #include "rtl.h"
  28 #include "tree.h"
  29 #include "predict.h"
  30 #include "memmodel.h"
  31 #include "tm_p.h"
  32 #include "expmed.h"
  33 #include "optabs.h"
  34 #include "emit-rtl.h"
  35 #include "diagnostic-core.h"
  36 #include "fold-const.h"
  37 #include "stor-layout.h"
  38 #include "dojump.h"
  39 #include "explow.h"
  40 #include "expr.h"
  41 #include "langhooks.h"
  42
  43 struct target_expmed default_target_expmed;
  44 #if SWITCHABLE_TARGET
  45 struct target_expmed *this_target_expmed = &default_target_expmed;
  46 #endif
  47
  48 static void store_fixed_bit_field (rtx, unsigned HOST_WIDE_INT,
  49                                    unsigned HOST_WIDE_INT,
  50                                    unsigned HOST_WIDE_INT,
  51                                    unsigned HOST_WIDE_INT,
  52                                    rtx, bool);
  53 static void store_fixed_bit_field_1 (rtx, unsigned HOST_WIDE_INT,
  54                                      unsigned HOST_WIDE_INT,
  55                                      rtx, bool);
  56 static void store_split_bit_field (rtx, unsigned HOST_WIDE_INT,
  57                                    unsigned HOST_WIDE_INT,
  58                                    unsigned HOST_WIDE_INT,
  59                                    unsigned HOST_WIDE_INT,
  60                                    rtx, bool);
  61 static rtx extract_fixed_bit_field (machine_mode, rtx,
  62                                     unsigned HOST_WIDE_INT,
  63                                     unsigned HOST_WIDE_INT, rtx, int, bool);
  64 static rtx extract_fixed_bit_field_1 (machine_mode, rtx,
  65                                       unsigned HOST_WIDE_INT,
  66                                       unsigned HOST_WIDE_INT, rtx, int, bool);
  67 static rtx lshift_value (machine_mode, unsigned HOST_WIDE_INT, int);
  68 static rtx extract_split_bit_field (rtx, unsigned HOST_WIDE_INT,
  69                                     unsigned HOST_WIDE_INT, int, bool);
  70 static void do_cmp_and_jump (rtx, rtx, enum rtx_code, machine_mode, rtx_code_label *);
  71 static rtx expand_smod_pow2 (machine_mode, rtx, HOST_WIDE_INT);
  72 static rtx expand_sdiv_pow2 (machine_mode, rtx, HOST_WIDE_INT);
  73
  74 /* Return a constant integer mask value of mode MODE with BITSIZE ones
  75    followed by BITPOS zeros, or the complement of that if COMPLEMENT.
  76    The mask is truncated if necessary to the width of mode MODE.  The
  77    mask is zero-extended if BITSIZE+BITPOS is too small for MODE.  */
  78
  79 static inline rtx
  80 mask_rtx (machine_mode mode, int bitpos, int bitsize, bool complement)
  81 {
  82   return immed_wide_int_const
  83     (wi::shifted_mask (bitpos, bitsize, complement,
  84                        GET_MODE_PRECISION (mode)), mode);
  85 }
  86
  87 /* Test whether a value is zero of a power of two.  */
  88 #define EXACT_POWER_OF_2_OR_ZERO_P(x) \
  89   (((x) & ((x) - HOST_WIDE_INT_1U)) == 0)
  90
  91 struct init_expmed_rtl
  92 {
  93   rtx reg;
  94   rtx plus;
  95   rtx neg;
  96   rtx mult;
  97   rtx sdiv;
  98   rtx udiv;
  99   rtx sdiv_32;
 100   rtx smod_32;
 101   rtx wide_mult;
 102   rtx wide_lshr;
 103   rtx wide_trunc;
 104   rtx shift;
 105   rtx shift_mult;
 106   rtx shift_add;
 107   rtx shift_sub0;
 108   rtx shift_sub1;
 109   rtx zext;
 110   rtx trunc;
 111
 112   rtx pow2[MAX_BITS_PER_WORD];
 113   rtx cint[MAX_BITS_PER_WORD];
 114 };
 115
 116 static void
 117 init_expmed_one_conv (struct init_expmed_rtl *all, machine_mode to_mode,
 118                       machine_mode from_mode, bool speed)
 119 {
 120   int to_size, from_size;
 121   rtx which;
 122
 123   to_size = GET_MODE_PRECISION (to_mode);
 124   from_size = GET_MODE_PRECISION (from_mode);
 125
 126   /* Most partial integers have a precision less than the "full"
 127      integer it requires for storage.  In case one doesn't, for
 128      comparison purposes here, reduce the bit size by one in that
 129      case.  */
 130   if (GET_MODE_CLASS (to_mode) == MODE_PARTIAL_INT
 131       && pow2p_hwi (to_size))
 132     to_size --;
 133   if (GET_MODE_CLASS (from_mode) == MODE_PARTIAL_INT
 134       && pow2p_hwi (from_size))
 135     from_size --;
 136
 137   /* Assume cost of zero-extend and sign-extend is the same.  */
 138   which = (to_size < from_size ? all->trunc : all->zext);
 139
 140   PUT_MODE (all->reg, from_mode);
 141   set_convert_cost (to_mode, from_mode, speed,
 142                     set_src_cost (which, to_mode, speed));
 143 }
 144
 145 static void
 146 init_expmed_one_mode (struct init_expmed_rtl *all,
 147                       machine_mode mode, int speed)
 148 {
 149   int m, n, mode_bitsize;
 150   machine_mode mode_from;
 151
 152   mode_bitsize = GET_MODE_UNIT_BITSIZE (mode);
 153
 154   PUT_MODE (all->reg, mode);
 155   PUT_MODE (all->plus, mode);
 156   PUT_MODE (all->neg, mode);
 157   PUT_MODE (all->mult, mode);
 158   PUT_MODE (all->sdiv, mode);
 159   PUT_MODE (all->udiv, mode);
 160   PUT_MODE (all->sdiv_32, mode);
 161   PUT_MODE (all->smod_32, mode);
 162   PUT_MODE (all->wide_trunc, mode);
 163   PUT_MODE (all->shift, mode);
 164   PUT_MODE (all->shift_mult, mode);
 165   PUT_MODE (all->shift_add, mode);
 166   PUT_MODE (all->shift_sub0, mode);
 167   PUT_MODE (all->shift_sub1, mode);
 168   PUT_MODE (all->zext, mode);
 169   PUT_MODE (all->trunc, mode);
 170
 171   set_add_cost (speed, mode, set_src_cost (all->plus, mode, speed));
 172   set_neg_cost (speed, mode, set_src_cost (all->neg, mode, speed));
 173   set_mul_cost (speed, mode, set_src_cost (all->mult, mode, speed));
 174   set_sdiv_cost (speed, mode, set_src_cost (all->sdiv, mode, speed));
 175   set_udiv_cost (speed, mode, set_src_cost (all->udiv, mode, speed));
 176
 177   set_sdiv_pow2_cheap (speed, mode, (set_src_cost (all->sdiv_32, mode, speed)
 178                                      <= 2 * add_cost (speed, mode)));
 179   set_smod_pow2_cheap (speed, mode, (set_src_cost (all->smod_32, mode, speed)
 180                                      <= 4 * add_cost (speed, mode)));
 181
 182   set_shift_cost (speed, mode, 0, 0);
 183   {
 184     int cost = add_cost (speed, mode);
 185     set_shiftadd_cost (speed, mode, 0, cost);
 186     set_shiftsub0_cost (speed, mode, 0, cost);
 187     set_shiftsub1_cost (speed, mode, 0, cost);
 188   }
 189
 190   n = MIN (MAX_BITS_PER_WORD, mode_bitsize);
 191   for (m = 1; m < n; m++)
 192     {
 193       XEXP (all->shift, 1) = all->cint[m];
 194       XEXP (all->shift_mult, 1) = all->pow2[m];
 195
 196       set_shift_cost (speed, mode, m, set_src_cost (all->shift, mode, speed));
 197       set_shiftadd_cost (speed, mode, m, set_src_cost (all->shift_add, mode,
 198                                                        speed));
 199       set_shiftsub0_cost (speed, mode, m, set_src_cost (all->shift_sub0, mode,
 200                                                         speed));
 201       set_shiftsub1_cost (speed, mode, m, set_src_cost (all->shift_sub1, mode,
 202                                                         speed));
 203     }
 204
 205   if (SCALAR_INT_MODE_P (mode))
 206     {
 207       for (mode_from = MIN_MODE_INT; mode_from <= MAX_MODE_INT;
 208            mode_from = (machine_mode)(mode_from + 1))
 209         init_expmed_one_conv (all, mode, mode_from, speed);
 210     }
 211   if (GET_MODE_CLASS (mode) == MODE_INT)
 212     {
 213       machine_mode  wider_mode = GET_MODE_WIDER_MODE (mode);
 214       if (wider_mode != VOIDmode)
 215         {
 216           PUT_MODE (all->zext, wider_mode);
 217           PUT_MODE (all->wide_mult, wider_mode);
 218           PUT_MODE (all->wide_lshr, wider_mode);
 219           XEXP (all->wide_lshr, 1) = GEN_INT (mode_bitsize);
 220
 221           set_mul_widen_cost (speed, wider_mode,
 222                               set_src_cost (all->wide_mult, wider_mode, speed));
 223           set_mul_highpart_cost (speed, mode,
 224                                  set_src_cost (all->wide_trunc, mode, speed));
 225         }
 226     }
 227 }
 228
 229 void
 230 init_expmed (void)
 231 {
 232   struct init_expmed_rtl all;
 233   machine_mode mode = QImode;
 234   int m, speed;
 235
 236   memset (&all, 0, sizeof all);
 237   for (m = 1; m < MAX_BITS_PER_WORD; m++)
 238     {
 239       all.pow2[m] = GEN_INT (HOST_WIDE_INT_1 << m);
 240       all.cint[m] = GEN_INT (m);
 241     }
 242
 243   /* Avoid using hard regs in ways which may be unsupported.  */
 244   all.reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
 245   all.plus = gen_rtx_PLUS (mode, all.reg, all.reg);
 246   all.neg = gen_rtx_NEG (mode, all.reg);
 247   all.mult = gen_rtx_MULT (mode, all.reg, all.reg);
 248   all.sdiv = gen_rtx_DIV (mode, all.reg, all.reg);
 249   all.udiv = gen_rtx_UDIV (mode, all.reg, all.reg);
 250   all.sdiv_32 = gen_rtx_DIV (mode, all.reg, all.pow2[5]);
 251   all.smod_32 = gen_rtx_MOD (mode, all.reg, all.pow2[5]);
 252   all.zext = gen_rtx_ZERO_EXTEND (mode, all.reg);
 253   all.wide_mult = gen_rtx_MULT (mode, all.zext, all.zext);
 254   all.wide_lshr = gen_rtx_LSHIFTRT (mode, all.wide_mult, all.reg);
 255   all.wide_trunc = gen_rtx_TRUNCATE (mode, all.wide_lshr);
 256   all.shift = gen_rtx_ASHIFT (mode, all.reg, all.reg);
 257   all.shift_mult = gen_rtx_MULT (mode, all.reg, all.reg);
 258   all.shift_add = gen_rtx_PLUS (mode, all.shift_mult, all.reg);
 259   all.shift_sub0 = gen_rtx_MINUS (mode, all.shift_mult, all.reg);
 260   all.shift_sub1 = gen_rtx_MINUS (mode, all.reg, all.shift_mult);
 261   all.trunc = gen_rtx_TRUNCATE (mode, all.reg);
 262
 263   for (speed = 0; speed < 2; speed++)
 264     {
 265       crtl->maybe_hot_insn_p = speed;
 266       set_zero_cost (speed, set_src_cost (const0_rtx, mode, speed));
 267
 268       for (mode = MIN_MODE_INT; mode <= MAX_MODE_INT;
 269            mode = (machine_mode)(mode + 1))
 270         init_expmed_one_mode (&all, mode, speed);
 271
 272       if (MIN_MODE_PARTIAL_INT != VOIDmode)
 273         for (mode = MIN_MODE_PARTIAL_INT; mode <= MAX_MODE_PARTIAL_INT;
 274              mode = (machine_mode)(mode + 1))
 275           init_expmed_one_mode (&all, mode, speed);
 276
 277       if (MIN_MODE_VECTOR_INT != VOIDmode)
 278         for (mode = MIN_MODE_VECTOR_INT; mode <= MAX_MODE_VECTOR_INT;
 279              mode = (machine_mode)(mode + 1))
 280           init_expmed_one_mode (&all, mode, speed);
 281     }
 282
 283   if (alg_hash_used_p ())
 284     {
 285       struct alg_hash_entry *p = alg_hash_entry_ptr (0);
 286       memset (p, 0, sizeof (*p) * NUM_ALG_HASH_ENTRIES);
 287     }
 288   else
 289     set_alg_hash_used_p (true);
 290   default_rtl_profile ();
 291
 292   ggc_free (all.trunc);
 293   ggc_free (all.shift_sub1);
 294   ggc_free (all.shift_sub0);
 295   ggc_free (all.shift_add);
 296   ggc_free (all.shift_mult);
 297   ggc_free (all.shift);
 298   ggc_free (all.wide_trunc);
 299   ggc_free (all.wide_lshr);
 300   ggc_free (all.wide_mult);
 301   ggc_free (all.zext);
 302   ggc_free (all.smod_32);
 303   ggc_free (all.sdiv_32);
 304   ggc_free (all.udiv);
 305   ggc_free (all.sdiv);
 306   ggc_free (all.mult);
 307   ggc_free (all.neg);
 308   ggc_free (all.plus);
 309   ggc_free (all.reg);
 310 }
 311
 312 /* Return an rtx representing minus the value of X.
 313    MODE is the intended mode of the result,
 314    useful if X is a CONST_INT.  */
 315
 316 rtx
 317 negate_rtx (machine_mode mode, rtx x)
 318 {
 319   rtx result = simplify_unary_operation (NEG, mode, x, mode);
 320
 321   if (result == 0)
 322     result = expand_unop (mode, neg_optab, x, NULL_RTX, 0);
 323
 324   return result;
 325 }
 326
 327 /* Whether reverse storage order is supported on the target.  */
 328 static int reverse_storage_order_supported = -1;
 329
 330 /* Check whether reverse storage order is supported on the target.  */
 331
 332 static void
 333 check_reverse_storage_order_support (void)
 334 {
 335   if (BYTES_BIG_ENDIAN != WORDS_BIG_ENDIAN)
 336     {
 337       reverse_storage_order_supported = 0;
 338       sorry ("reverse scalar storage order");
 339     }
 340   else
 341     reverse_storage_order_supported = 1;
 342 }
 343
 344 /* Whether reverse FP storage order is supported on the target.  */
 345 static int reverse_float_storage_order_supported = -1;
 346
 347 /* Check whether reverse FP storage order is supported on the target.  */
 348
 349 static void
 350 check_reverse_float_storage_order_support (void)
 351 {
 352   if (FLOAT_WORDS_BIG_ENDIAN != WORDS_BIG_ENDIAN)
 353     {
 354       reverse_float_storage_order_supported = 0;
 355       sorry ("reverse floating-point scalar storage order");
 356     }
 357   else
 358     reverse_float_storage_order_supported = 1;
 359 }
 360
 361 /* Return an rtx representing value of X with reverse storage order.
 362    MODE is the intended mode of the result,
 363    useful if X is a CONST_INT.  */
 364
 365 rtx
 366 flip_storage_order (machine_mode mode, rtx x)
 367 {
 368   machine_mode int_mode;
 369   rtx result;
 370
 371   if (mode == QImode)
 372     return x;
 373
 374   if (COMPLEX_MODE_P (mode))
 375     {
 376       rtx real = read_complex_part (x, false);
 377       rtx imag = read_complex_part (x, true);
 378
 379       real = flip_storage_order (GET_MODE_INNER (mode), real);
 380       imag = flip_storage_order (GET_MODE_INNER (mode), imag);
 381
 382       return gen_rtx_CONCAT (mode, real, imag);
 383     }
 384
 385   if (__builtin_expect (reverse_storage_order_supported < 0, 0))
 386     check_reverse_storage_order_support ();
 387
 388   if (SCALAR_INT_MODE_P (mode))
 389     int_mode = mode;
 390   else
 391     {
 392       if (FLOAT_MODE_P (mode)
 393           && __builtin_expect (reverse_float_storage_order_supported < 0, 0))
 394         check_reverse_float_storage_order_support ();
 395
 396       int_mode = mode_for_size (GET_MODE_PRECISION (mode), MODE_INT, 0);
 397       if (int_mode == BLKmode)
 398         {
 399           sorry ("reverse storage order for %smode", GET_MODE_NAME (mode));
 400           return x;
 401         }
 402       x = gen_lowpart (int_mode, x);
 403     }
 404
 405   result = simplify_unary_operation (BSWAP, int_mode, x, int_mode);
 406   if (result == 0)
 407     result = expand_unop (int_mode, bswap_optab, x, NULL_RTX, 1);
 408
 409   if (int_mode != mode)
 410     result = gen_lowpart (mode, result);
 411
 412   return result;
 413 }
 414
 415 /* Adjust bitfield memory MEM so that it points to the first unit of mode
 416    MODE that contains a bitfield of size BITSIZE at bit position BITNUM.
 417    If MODE is BLKmode, return a reference to every byte in the bitfield.
 418    Set *NEW_BITNUM to the bit position of the field within the new memory.  */
 419
 420 static rtx
 421 narrow_bit_field_mem (rtx mem, machine_mode mode,
 422                       unsigned HOST_WIDE_INT bitsize,
 423                       unsigned HOST_WIDE_INT bitnum,
 424                       unsigned HOST_WIDE_INT *new_bitnum)
 425 {
 426   if (mode == BLKmode)
 427     {
 428       *new_bitnum = bitnum % BITS_PER_UNIT;
 429       HOST_WIDE_INT offset = bitnum / BITS_PER_UNIT;
 430       HOST_WIDE_INT size = ((*new_bitnum + bitsize + BITS_PER_UNIT - 1)
 431                             / BITS_PER_UNIT);
 432       return adjust_bitfield_address_size (mem, mode, offset, size);
 433     }
 434   else
 435     {
 436       unsigned int unit = GET_MODE_BITSIZE (mode);
 437       *new_bitnum = bitnum % unit;
 438       HOST_WIDE_INT offset = (bitnum - *new_bitnum) / BITS_PER_UNIT;
 439       return adjust_bitfield_address (mem, mode, offset);
 440     }
 441 }
 442
 443 /* The caller wants to perform insertion or extraction PATTERN on a
 444    bitfield of size BITSIZE at BITNUM bits into memory operand OP0.
 445    BITREGION_START and BITREGION_END are as for store_bit_field
 446    and FIELDMODE is the natural mode of the field.
 447
 448    Search for a mode that is compatible with the memory access
 449    restrictions and (where applicable) with a register insertion or
 450    extraction.  Return the new memory on success, storing the adjusted
 451    bit position in *NEW_BITNUM.  Return null otherwise.  */
 452
 453 static rtx
 454 adjust_bit_field_mem_for_reg (enum extraction_pattern pattern,
 455                               rtx op0, HOST_WIDE_INT bitsize,
 456                               HOST_WIDE_INT bitnum,
 457                               unsigned HOST_WIDE_INT bitregion_start,
 458                               unsigned HOST_WIDE_INT bitregion_end,
 459                               machine_mode fieldmode,
 460                               unsigned HOST_WIDE_INT *new_bitnum)
 461 {
 462   bit_field_mode_iterator iter (bitsize, bitnum, bitregion_start,
 463                                 bitregion_end, MEM_ALIGN (op0),
 464                                 MEM_VOLATILE_P (op0));
 465   machine_mode best_mode;
 466   if (iter.next_mode (&best_mode))
 467     {
 468       /* We can use a memory in BEST_MODE.  See whether this is true for
 469          any wider modes.  All other things being equal, we prefer to
 470          use the widest mode possible because it tends to expose more
 471          CSE opportunities.  */
 472       if (!iter.prefer_smaller_modes ())
 473         {
 474           /* Limit the search to the mode required by the corresponding
 475              register insertion or extraction instruction, if any.  */
 476           machine_mode limit_mode = word_mode;
 477           extraction_insn insn;
 478           if (get_best_reg_extraction_insn (&insn, pattern,
 479                                             GET_MODE_BITSIZE (best_mode),
 480                                             fieldmode))
 481             limit_mode = insn.field_mode;
 482
 483           machine_mode wider_mode;
 484           while (iter.next_mode (&wider_mode)
 485                  && GET_MODE_SIZE (wider_mode) <= GET_MODE_SIZE (limit_mode))
 486             best_mode = wider_mode;
 487         }
 488       return narrow_bit_field_mem (op0, best_mode, bitsize, bitnum,
 489                                    new_bitnum);
 490     }
 491   return NULL_RTX;
 492 }
 493
 494 /* Return true if a bitfield of size BITSIZE at bit number BITNUM within
 495    a structure of mode STRUCT_MODE represents a lowpart subreg.   The subreg
 496    offset is then BITNUM / BITS_PER_UNIT.  */
 497
 498 static bool
 499 lowpart_bit_field_p (unsigned HOST_WIDE_INT bitnum,
 500                      unsigned HOST_WIDE_INT bitsize,
 501                      machine_mode struct_mode)
 502 {
 503   if (BYTES_BIG_ENDIAN)
 504     return (bitnum % BITS_PER_UNIT == 0
 505             && (bitnum + bitsize == GET_MODE_BITSIZE (struct_mode)
 506                 || (bitnum + bitsize) % BITS_PER_WORD == 0));
 507   else
 508     return bitnum % BITS_PER_WORD == 0;
 509 }
 510
 511 /* Return true if -fstrict-volatile-bitfields applies to an access of OP0
 512    containing BITSIZE bits starting at BITNUM, with field mode FIELDMODE.
 513    Return false if the access would touch memory outside the range
 514    BITREGION_START to BITREGION_END for conformance to the C++ memory
 515    model.  */
 516
 517 static bool
 518 strict_volatile_bitfield_p (rtx op0, unsigned HOST_WIDE_INT bitsize,
 519                             unsigned HOST_WIDE_INT bitnum,
 520                             machine_mode fieldmode,
 521                             unsigned HOST_WIDE_INT bitregion_start,
 522                             unsigned HOST_WIDE_INT bitregion_end)
 523 {
 524   unsigned HOST_WIDE_INT modesize = GET_MODE_BITSIZE (fieldmode);
 525
 526   /* -fstrict-volatile-bitfields must be enabled and we must have a
 527      volatile MEM.  */
 528   if (!MEM_P (op0)
 529       || !MEM_VOLATILE_P (op0)
 530       || flag_strict_volatile_bitfields <= 0)
 531     return false;
 532
 533   /* Non-integral modes likely only happen with packed structures.
 534      Punt.  */
 535   if (!SCALAR_INT_MODE_P (fieldmode))
 536     return false;
 537
 538   /* The bit size must not be larger than the field mode, and
 539      the field mode must not be larger than a word.  */
 540   if (bitsize > modesize || modesize > BITS_PER_WORD)
 541     return false;
 542
 543   /* Check for cases of unaligned fields that must be split.  */
 544   if (bitnum % modesize + bitsize > modesize)
 545     return false;
 546
 547   /* The memory must be sufficiently aligned for a MODESIZE access.
 548      This condition guarantees, that the memory access will not
 549      touch anything after the end of the structure.  */
 550   if (MEM_ALIGN (op0) < modesize)
 551     return false;
 552
 553   /* Check for cases where the C++ memory model applies.  */
 554   if (bitregion_end != 0
 555       && (bitnum - bitnum % modesize < bitregion_start
 556           || bitnum - bitnum % modesize + modesize - 1 > bitregion_end))
 557     return false;
 558
 559   return true;
 560 }
 561
 562 /* Return true if OP is a memory and if a bitfield of size BITSIZE at
 563    bit number BITNUM can be treated as a simple value of mode MODE.  */
 564
 565 static bool
 566 simple_mem_bitfield_p (rtx op0, unsigned HOST_WIDE_INT bitsize,
 567                        unsigned HOST_WIDE_INT bitnum, machine_mode mode)
 568 {
 569   return (MEM_P (op0)
 570           && bitnum % BITS_PER_UNIT == 0
 571           && bitsize == GET_MODE_BITSIZE (mode)
 572           && (!SLOW_UNALIGNED_ACCESS (mode, MEM_ALIGN (op0))
 573               || (bitnum % GET_MODE_ALIGNMENT (mode) == 0
 574                   && MEM_ALIGN (op0) >= GET_MODE_ALIGNMENT (mode))));
 575 }
 576 \f
 577 /* Try to use instruction INSV to store VALUE into a field of OP0.
 578    BITSIZE and BITNUM are as for store_bit_field.  */
 579
 580 static bool
 581 store_bit_field_using_insv (const extraction_insn *insv, rtx op0,
 582                             unsigned HOST_WIDE_INT bitsize,
 583                             unsigned HOST_WIDE_INT bitnum,
 584                             rtx value)
 585 {
 586   struct expand_operand ops[4];
 587   rtx value1;
 588   rtx xop0 = op0;
 589   rtx_insn *last = get_last_insn ();
 590   bool copy_back = false;
 591
 592   machine_mode op_mode = insv->field_mode;
 593   unsigned int unit = GET_MODE_BITSIZE (op_mode);
 594   if (bitsize == 0 || bitsize > unit)
 595     return false;
 596
 597   if (MEM_P (xop0))
 598     /* Get a reference to the first byte of the field.  */
 599     xop0 = narrow_bit_field_mem (xop0, insv->struct_mode, bitsize, bitnum,
 600                                  &bitnum);
 601   else
 602     {
 603       /* Convert from counting within OP0 to counting in OP_MODE.  */
 604       if (BYTES_BIG_ENDIAN)
 605         bitnum += unit - GET_MODE_BITSIZE (GET_MODE (op0));
 606
 607       /* If xop0 is a register, we need it in OP_MODE
 608          to make it acceptable to the format of insv.  */
 609       if (GET_CODE (xop0) == SUBREG)
 610         /* We can't just change the mode, because this might clobber op0,
 611            and we will need the original value of op0 if insv fails.  */
 612         xop0 = gen_rtx_SUBREG (op_mode, SUBREG_REG (xop0), SUBREG_BYTE (xop0));
 613       if (REG_P (xop0) && GET_MODE (xop0) != op_mode)
 614         xop0 = gen_lowpart_SUBREG (op_mode, xop0);
 615     }
 616
 617   /* If the destination is a paradoxical subreg such that we need a
 618      truncate to the inner mode, perform the insertion on a temporary and
 619      truncate the result to the original destination.  Note that we can't
 620      just truncate the paradoxical subreg as (truncate:N (subreg:W (reg:N
 621      X) 0)) is (reg:N X).  */
 622   if (GET_CODE (xop0) == SUBREG
 623       && REG_P (SUBREG_REG (xop0))
 624       && !TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (SUBREG_REG (xop0)),
 625                                          op_mode))
 626     {
 627       rtx tem = gen_reg_rtx (op_mode);
 628       emit_move_insn (tem, xop0);
 629       xop0 = tem;
 630       copy_back = true;
 631     }
 632
 633   /* There are similar overflow check at the start of store_bit_field_1,
 634      but that only check the situation where the field lies completely
 635      outside the register, while there do have situation where the field
 636      lies partialy in the register, we need to adjust bitsize for this
 637      partial overflow situation.  Without this fix, pr48335-2.c on big-endian
 638      will broken on those arch support bit insert instruction, like arm, aarch64
 639      etc.  */
 640   if (bitsize + bitnum > unit && bitnum < unit)
 641     {
 642       warning (OPT_Wextra, "write of %wu-bit data outside the bound of "
 643                "destination object, data truncated into %wu-bit",
 644                bitsize, unit - bitnum);
 645       bitsize = unit - bitnum;
 646     }
 647
 648   /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
 649      "backwards" from the size of the unit we are inserting into.
 650      Otherwise, we count bits from the most significant on a
 651      BYTES/BITS_BIG_ENDIAN machine.  */
 652
 653   if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
 654     bitnum = unit - bitsize - bitnum;
 655
 656   /* Convert VALUE to op_mode (which insv insn wants) in VALUE1.  */
 657   value1 = value;
 658   if (GET_MODE (value) != op_mode)
 659     {
 660       if (GET_MODE_BITSIZE (GET_MODE (value)) >= bitsize)
 661         {
 662           rtx tmp;
 663           /* Optimization: Don't bother really extending VALUE
 664              if it has all the bits we will actually use.  However,
 665              if we must narrow it, be sure we do it correctly.  */
 666
 667           if (GET_MODE_SIZE (GET_MODE (value)) < GET_MODE_SIZE (op_mode))
 668             {
 669               tmp = simplify_subreg (op_mode, value1, GET_MODE (value), 0);
 670               if (! tmp)
 671                 tmp = simplify_gen_subreg (op_mode,
 672                                            force_reg (GET_MODE (value),
 673                                                       value1),
 674                                            GET_MODE (value), 0);
 675             }
 676           else
 677             {
 678               tmp = gen_lowpart_if_possible (op_mode, value1);
 679               if (! tmp)
 680                 tmp = gen_lowpart (op_mode, force_reg (GET_MODE (value),
 681                                                        value1));
 682             }
 683           value1 = tmp;
 684         }
 685       else if (CONST_INT_P (value))
 686         value1 = gen_int_mode (INTVAL (value), op_mode);
 687       else
 688         /* Parse phase is supposed to make VALUE's data type
 689            match that of the component reference, which is a type
 690            at least as wide as the field; so VALUE should have
 691            a mode that corresponds to that type.  */
 692         gcc_assert (CONSTANT_P (value));
 693     }
 694
 695   create_fixed_operand (&ops[0], xop0);
 696   create_integer_operand (&ops[1], bitsize);
 697   create_integer_operand (&ops[2], bitnum);
 698   create_input_operand (&ops[3], value1, op_mode);
 699   if (maybe_expand_insn (insv->icode, 4, ops))
 700     {
 701       if (copy_back)
 702         convert_move (op0, xop0, true);
 703       return true;
 704     }
 705   delete_insns_since (last);
 706   return false;
 707 }
 708
 709 /* A subroutine of store_bit_field, with the same arguments.  Return true
 710    if the operation could be implemented.
 711
 712    If FALLBACK_P is true, fall back to store_fixed_bit_field if we have
 713    no other way of implementing the operation.  If FALLBACK_P is false,
 714    return false instead.  */
 715
 716 static bool
 717 store_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
 718                    unsigned HOST_WIDE_INT bitnum,
 719                    unsigned HOST_WIDE_INT bitregion_start,
 720                    unsigned HOST_WIDE_INT bitregion_end,
 721                    machine_mode fieldmode,
 722                    rtx value, bool reverse, bool fallback_p)
 723 {
 724   rtx op0 = str_rtx;
 725   rtx orig_value;
 726
 727   while (GET_CODE (op0) == SUBREG)
 728     {
 729       /* The following line once was done only if WORDS_BIG_ENDIAN,
 730          but I think that is a mistake.  WORDS_BIG_ENDIAN is
 731          meaningful at a much higher level; when structures are copied
 732          between memory and regs, the higher-numbered regs
 733          always get higher addresses.  */
 734       int inner_mode_size = GET_MODE_SIZE (GET_MODE (SUBREG_REG (op0)));
 735       int outer_mode_size = GET_MODE_SIZE (GET_MODE (op0));
 736       int byte_offset = 0;
 737
 738       /* Paradoxical subregs need special handling on big-endian machines.  */
 739       if (SUBREG_BYTE (op0) == 0 && inner_mode_size < outer_mode_size)
 740         {
 741           int difference = inner_mode_size - outer_mode_size;
 742
 743           if (WORDS_BIG_ENDIAN)
 744             byte_offset += (difference / UNITS_PER_WORD) * UNITS_PER_WORD;
 745           if (BYTES_BIG_ENDIAN)
 746             byte_offset += difference % UNITS_PER_WORD;
 747         }
 748       else
 749         byte_offset = SUBREG_BYTE (op0);
 750
 751       bitnum += byte_offset * BITS_PER_UNIT;
 752       op0 = SUBREG_REG (op0);
 753     }
 754
 755   /* No action is needed if the target is a register and if the field
 756      lies completely outside that register.  This can occur if the source
 757      code contains an out-of-bounds access to a small array.  */
 758   if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
 759     return true;
 760
 761   /* Use vec_set patterns for inserting parts of vectors whenever
 762      available.  */
 763   if (VECTOR_MODE_P (GET_MODE (op0))
 764       && !MEM_P (op0)
 765       && optab_handler (vec_set_optab, GET_MODE (op0)) != CODE_FOR_nothing
 766       && fieldmode == GET_MODE_INNER (GET_MODE (op0))
 767       && bitsize == GET_MODE_UNIT_BITSIZE (GET_MODE (op0))
 768       && !(bitnum % GET_MODE_UNIT_BITSIZE (GET_MODE (op0))))
 769     {
 770       struct expand_operand ops[3];
 771       machine_mode outermode = GET_MODE (op0);
 772       machine_mode innermode = GET_MODE_INNER (outermode);
 773       enum insn_code icode = optab_handler (vec_set_optab, outermode);
 774       int pos = bitnum / GET_MODE_BITSIZE (innermode);
 775
 776       create_fixed_operand (&ops[0], op0);
 777       create_input_operand (&ops[1], value, innermode);
 778       create_integer_operand (&ops[2], pos);
 779       if (maybe_expand_insn (icode, 3, ops))
 780         return true;
 781     }
 782
 783   /* If the target is a register, overwriting the entire object, or storing
 784      a full-word or multi-word field can be done with just a SUBREG.  */
 785   if (!MEM_P (op0)
 786       && bitsize == GET_MODE_BITSIZE (fieldmode)
 787       && ((bitsize == GET_MODE_BITSIZE (GET_MODE (op0)) && bitnum == 0)
 788           || (bitsize % BITS_PER_WORD == 0 && bitnum % BITS_PER_WORD == 0)))
 789     {
 790       /* Use the subreg machinery either to narrow OP0 to the required
 791          words or to cope with mode punning between equal-sized modes.
 792          In the latter case, use subreg on the rhs side, not lhs.  */
 793       rtx sub;
 794
 795       if (bitsize == GET_MODE_BITSIZE (GET_MODE (op0)))
 796         {
 797           sub = simplify_gen_subreg (GET_MODE (op0), value, fieldmode, 0);
 798           if (sub)
 799             {
 800               if (reverse)
 801                 sub = flip_storage_order (GET_MODE (op0), sub);
 802               emit_move_insn (op0, sub);
 803               return true;
 804             }
 805         }
 806       else
 807         {
 808           sub = simplify_gen_subreg (fieldmode, op0, GET_MODE (op0),
 809                                      bitnum / BITS_PER_UNIT);
 810           if (sub)
 811             {
 812               if (reverse)
 813                 value = flip_storage_order (fieldmode, value);
 814               emit_move_insn (sub, value);
 815               return true;
 816             }
 817         }
 818     }
 819
 820   /* If the target is memory, storing any naturally aligned field can be
 821      done with a simple store.  For targets that support fast unaligned
 822      memory, any naturally sized, unit aligned field can be done directly.  */
 823   if (simple_mem_bitfield_p (op0, bitsize, bitnum, fieldmode))
 824     {
 825       op0 = adjust_bitfield_address (op0, fieldmode, bitnum / BITS_PER_UNIT);
 826       if (reverse)
 827         value = flip_storage_order (fieldmode, value);
 828       emit_move_insn (op0, value);
 829       return true;
 830     }
 831
 832   /* Make sure we are playing with integral modes.  Pun with subregs
 833      if we aren't.  This must come after the entire register case above,
 834      since that case is valid for any mode.  The following cases are only
 835      valid for integral modes.  */
 836   {
 837     machine_mode imode = int_mode_for_mode (GET_MODE (op0));
 838     if (imode != GET_MODE (op0))
 839       {
 840         if (MEM_P (op0))
 841           op0 = adjust_bitfield_address_size (op0, imode, 0, MEM_SIZE (op0));
 842         else
 843           {
 844             gcc_assert (imode != BLKmode);
 845             op0 = gen_lowpart (imode, op0);
 846           }
 847       }
 848   }
 849
 850   /* Storing an lsb-aligned field in a register
 851      can be done with a movstrict instruction.  */
 852
 853   if (!MEM_P (op0)
 854       && !reverse
 855       && lowpart_bit_field_p (bitnum, bitsize, GET_MODE (op0))
 856       && bitsize == GET_MODE_BITSIZE (fieldmode)
 857       && optab_handler (movstrict_optab, fieldmode) != CODE_FOR_nothing)
 858     {
 859       struct expand_operand ops[2];
 860       enum insn_code icode = optab_handler (movstrict_optab, fieldmode);
 861       rtx arg0 = op0;
 862       unsigned HOST_WIDE_INT subreg_off;
 863
 864       if (GET_CODE (arg0) == SUBREG)
 865         {
 866           /* Else we've got some float mode source being extracted into
 867              a different float mode destination -- this combination of
 868              subregs results in Severe Tire Damage.  */
 869           gcc_assert (GET_MODE (SUBREG_REG (arg0)) == fieldmode
 870                       || GET_MODE_CLASS (fieldmode) == MODE_INT
 871                       || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT);
 872           arg0 = SUBREG_REG (arg0);
 873         }
 874
 875       subreg_off = bitnum / BITS_PER_UNIT;
 876       if (validate_subreg (fieldmode, GET_MODE (arg0), arg0, subreg_off))
 877         {
 878           arg0 = gen_rtx_SUBREG (fieldmode, arg0, subreg_off);
 879
 880           create_fixed_operand (&ops[0], arg0);
 881           /* Shrink the source operand to FIELDMODE.  */
 882           create_convert_operand_to (&ops[1], value, fieldmode, false);
 883           if (maybe_expand_insn (icode, 2, ops))
 884             return true;
 885         }
 886     }
 887
 888   /* Handle fields bigger than a word.  */
 889
 890   if (bitsize > BITS_PER_WORD)
 891     {
 892       /* Here we transfer the words of the field
 893          in the order least significant first.
 894          This is because the most significant word is the one which may
 895          be less than full.
 896          However, only do that if the value is not BLKmode.  */
 897
 898       const bool backwards = WORDS_BIG_ENDIAN && fieldmode != BLKmode;
 899       unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
 900       unsigned int i;
 901       rtx_insn *last;
 902
 903       /* This is the mode we must force value to, so that there will be enough
 904          subwords to extract.  Note that fieldmode will often (always?) be
 905          VOIDmode, because that is what store_field uses to indicate that this
 906          is a bit field, but passing VOIDmode to operand_subword_force
 907          is not allowed.  */
 908       fieldmode = GET_MODE (value);
 909       if (fieldmode == VOIDmode)
 910         fieldmode = smallest_mode_for_size (nwords * BITS_PER_WORD, MODE_INT);
 911
 912       last = get_last_insn ();
 913       for (i = 0; i < nwords; i++)
 914         {
 915           /* If I is 0, use the low-order word in both field and target;
 916              if I is 1, use the next to lowest word; and so on.  */
 917           unsigned int wordnum = (backwards
 918                                   ? GET_MODE_SIZE (fieldmode) / UNITS_PER_WORD
 919                                   - i - 1
 920                                   : i);
 921           unsigned int bit_offset = (backwards ^ reverse
 922                                      ? MAX ((int) bitsize - ((int) i + 1)
 923                                             * BITS_PER_WORD,
 924                                             0)
 925                                      : (int) i * BITS_PER_WORD);
 926           rtx value_word = operand_subword_force (value, wordnum, fieldmode);
 927           unsigned HOST_WIDE_INT new_bitsize =
 928             MIN (BITS_PER_WORD, bitsize - i * BITS_PER_WORD);
 929
 930           /* If the remaining chunk doesn't have full wordsize we have
 931              to make sure that for big-endian machines the higher order
 932              bits are used.  */
 933           if (new_bitsize < BITS_PER_WORD && BYTES_BIG_ENDIAN && !backwards)
 934             value_word = simplify_expand_binop (word_mode, lshr_optab,
 935                                                 value_word,
 936                                                 GEN_INT (BITS_PER_WORD
 937                                                          - new_bitsize),
 938                                                 NULL_RTX, true,
 939                                                 OPTAB_LIB_WIDEN);
 940
 941           if (!store_bit_field_1 (op0, new_bitsize,
 942                                   bitnum + bit_offset,
 943                                   bitregion_start, bitregion_end,
 944                                   word_mode,
 945                                   value_word, reverse, fallback_p))
 946             {
 947               delete_insns_since (last);
 948               return false;
 949             }
 950         }
 951       return true;
 952     }
 953
 954   /* If VALUE has a floating-point or complex mode, access it as an
 955      integer of the corresponding size.  This can occur on a machine
 956      with 64 bit registers that uses SFmode for float.  It can also
 957      occur for unaligned float or complex fields.  */
 958   orig_value = value;
 959   if (GET_MODE (value) != VOIDmode
 960       && GET_MODE_CLASS (GET_MODE (value)) != MODE_INT
 961       && GET_MODE_CLASS (GET_MODE (value)) != MODE_PARTIAL_INT)
 962     {
 963       value = gen_reg_rtx (int_mode_for_mode (GET_MODE (value)));
 964       emit_move_insn (gen_lowpart (GET_MODE (orig_value), value), orig_value);
 965     }
 966
 967   /* If OP0 is a multi-word register, narrow it to the affected word.
 968      If the region spans two words, defer to store_split_bit_field.
 969      Don't do this if op0 is a single hard register wider than word
 970      such as a float or vector register.  */
 971   if (!MEM_P (op0)
 972       && GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD
 973       && (!REG_P (op0)
 974           || !HARD_REGISTER_P (op0)
 975           || HARD_REGNO_NREGS (REGNO (op0), GET_MODE (op0)) != 1))
 976     {
 977       if (bitnum % BITS_PER_WORD + bitsize > BITS_PER_WORD)
 978         {
 979           if (!fallback_p)
 980             return false;
 981
 982           store_split_bit_field (op0, bitsize, bitnum, bitregion_start,
 983                                  bitregion_end, value, reverse);
 984           return true;
 985         }
 986       op0 = simplify_gen_subreg (word_mode, op0, GET_MODE (op0),
 987                                  bitnum / BITS_PER_WORD * UNITS_PER_WORD);
 988       gcc_assert (op0);
 989       bitnum %= BITS_PER_WORD;
 990     }
 991
 992   /* From here on we can assume that the field to be stored in fits
 993      within a word.  If the destination is a register, it too fits
 994      in a word.  */
 995
 996   extraction_insn insv;
 997   if (!MEM_P (op0)
 998       && !reverse
 999       && get_best_reg_extraction_insn (&insv, EP_insv,
1000                                        GET_MODE_BITSIZE (GET_MODE (op0)),
1001                                        fieldmode)
1002       && store_bit_field_using_insv (&insv, op0, bitsize, bitnum, value))
1003     return true;
1004
1005   /* If OP0 is a memory, try copying it to a register and seeing if a
1006      cheap register alternative is available.  */
1007   if (MEM_P (op0) && !reverse)
1008     {
1009       if (get_best_mem_extraction_insn (&insv, EP_insv, bitsize, bitnum,
1010                                         fieldmode)
1011           && store_bit_field_using_insv (&insv, op0, bitsize, bitnum, value))
1012         return true;
1013
1014       rtx_insn *last = get_last_insn ();
1015
1016       /* Try loading part of OP0 into a register, inserting the bitfield
1017          into that, and then copying the result back to OP0.  */
1018       unsigned HOST_WIDE_INT bitpos;
1019       rtx xop0 = adjust_bit_field_mem_for_reg (EP_insv, op0, bitsize, bitnum,
1020                                                bitregion_start, bitregion_end,
1021                                                fieldmode, &bitpos);
1022       if (xop0)
1023         {
1024           rtx tempreg = copy_to_reg (xop0);
1025           if (store_bit_field_1 (tempreg, bitsize, bitpos,
1026                                  bitregion_start, bitregion_end,
1027                                  fieldmode, orig_value, reverse, false))
1028             {
1029               emit_move_insn (xop0, tempreg);
1030               return true;
1031             }
1032           delete_insns_since (last);
1033         }
1034     }
1035
1036   if (!fallback_p)
1037     return false;
1038
1039   store_fixed_bit_field (op0, bitsize, bitnum, bitregion_start,
1040                          bitregion_end, value, reverse);
1041   return true;
1042 }
1043
1044 /* Generate code to store value from rtx VALUE
1045    into a bit-field within structure STR_RTX
1046    containing BITSIZE bits starting at bit BITNUM.
1047
1048    BITREGION_START is bitpos of the first bitfield in this region.
1049    BITREGION_END is the bitpos of the ending bitfield in this region.
1050    These two fields are 0, if the C++ memory model does not apply,
1051    or we are not interested in keeping track of bitfield regions.
1052
1053    FIELDMODE is the machine-mode of the FIELD_DECL node for this field.
1054
1055    If REVERSE is true, the store is to be done in reverse order.  */
1056
1057 void
1058 store_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1059                  unsigned HOST_WIDE_INT bitnum,
1060                  unsigned HOST_WIDE_INT bitregion_start,
1061                  unsigned HOST_WIDE_INT bitregion_end,
1062                  machine_mode fieldmode,
1063                  rtx value, bool reverse)
1064 {
1065   /* Handle -fstrict-volatile-bitfields in the cases where it applies.  */
1066   if (strict_volatile_bitfield_p (str_rtx, bitsize, bitnum, fieldmode,
1067                                   bitregion_start, bitregion_end))
1068     {
1069       /* Storing of a full word can be done with a simple store.
1070          We know here that the field can be accessed with one single
1071          instruction.  For targets that support unaligned memory,
1072          an unaligned access may be necessary.  */
1073       if (bitsize == GET_MODE_BITSIZE (fieldmode))
1074         {
1075           str_rtx = adjust_bitfield_address (str_rtx, fieldmode,
1076                                              bitnum / BITS_PER_UNIT);
1077           if (reverse)
1078             value = flip_storage_order (fieldmode, value);
1079           gcc_assert (bitnum % BITS_PER_UNIT == 0);
1080           emit_move_insn (str_rtx, value);
1081         }
1082       else
1083         {
1084           rtx temp;
1085
1086           str_rtx = narrow_bit_field_mem (str_rtx, fieldmode, bitsize, bitnum,
1087                                           &bitnum);
1088           gcc_assert (bitnum + bitsize <= GET_MODE_BITSIZE (fieldmode));
1089           temp = copy_to_reg (str_rtx);
1090           if (!store_bit_field_1 (temp, bitsize, bitnum, 0, 0,
1091                                   fieldmode, value, reverse, true))
1092             gcc_unreachable ();
1093
1094           emit_move_insn (str_rtx, temp);
1095         }
1096
1097       return;
1098     }
1099
1100   /* Under the C++0x memory model, we must not touch bits outside the
1101      bit region.  Adjust the address to start at the beginning of the
1102      bit region.  */
1103   if (MEM_P (str_rtx) && bitregion_start > 0)
1104     {
1105       machine_mode bestmode;
1106       HOST_WIDE_INT offset, size;
1107
1108       gcc_assert ((bitregion_start % BITS_PER_UNIT) == 0);
1109
1110       offset = bitregion_start / BITS_PER_UNIT;
1111       bitnum -= bitregion_start;
1112       size = (bitnum + bitsize + BITS_PER_UNIT - 1) / BITS_PER_UNIT;
1113       bitregion_end -= bitregion_start;
1114       bitregion_start = 0;
1115       bestmode = get_best_mode (bitsize, bitnum,
1116                                 bitregion_start, bitregion_end,
1117                                 MEM_ALIGN (str_rtx), VOIDmode,
1118                                 MEM_VOLATILE_P (str_rtx));
1119       str_rtx = adjust_bitfield_address_size (str_rtx, bestmode, offset, size);
1120     }
1121
1122   if (!store_bit_field_1 (str_rtx, bitsize, bitnum,
1123                           bitregion_start, bitregion_end,
1124                           fieldmode, value, reverse, true))
1125     gcc_unreachable ();
1126 }
1127 \f
1128 /* Use shifts and boolean operations to store VALUE into a bit field of
1129    width BITSIZE in OP0, starting at bit BITNUM.
1130
1131    If REVERSE is true, the store is to be done in reverse order.  */
1132
1133 static void
1134 store_fixed_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1135                        unsigned HOST_WIDE_INT bitnum,
1136                        unsigned HOST_WIDE_INT bitregion_start,
1137                        unsigned HOST_WIDE_INT bitregion_end,
1138                        rtx value, bool reverse)
1139 {
1140   /* There is a case not handled here:
1141      a structure with a known alignment of just a halfword
1142      and a field split across two aligned halfwords within the structure.
1143      Or likewise a structure with a known alignment of just a byte
1144      and a field split across two bytes.
1145      Such cases are not supposed to be able to occur.  */
1146
1147   if (MEM_P (op0))
1148     {
1149       machine_mode mode = GET_MODE (op0);
1150       if (GET_MODE_BITSIZE (mode) == 0
1151           || GET_MODE_BITSIZE (mode) > GET_MODE_BITSIZE (word_mode))
1152         mode = word_mode;
1153       mode = get_best_mode (bitsize, bitnum, bitregion_start, bitregion_end,
1154                             MEM_ALIGN (op0), mode, MEM_VOLATILE_P (op0));
1155
1156       if (mode == VOIDmode)
1157         {
1158           /* The only way this should occur is if the field spans word
1159              boundaries.  */
1160           store_split_bit_field (op0, bitsize, bitnum, bitregion_start,
1161                                  bitregion_end, value, reverse);
1162           return;
1163         }
1164
1165       op0 = narrow_bit_field_mem (op0, mode, bitsize, bitnum, &bitnum);
1166     }
1167
1168   store_fixed_bit_field_1 (op0, bitsize, bitnum, value, reverse);
1169 }
1170
1171 /* Helper function for store_fixed_bit_field, stores
1172    the bit field always using the MODE of OP0.  */
1173
1174 static void
1175 store_fixed_bit_field_1 (rtx op0, unsigned HOST_WIDE_INT bitsize,
1176                          unsigned HOST_WIDE_INT bitnum,
1177                          rtx value, bool reverse)
1178 {
1179   machine_mode mode;
1180   rtx temp;
1181   int all_zero = 0;
1182   int all_one = 0;
1183
1184   mode = GET_MODE (op0);
1185   gcc_assert (SCALAR_INT_MODE_P (mode));
1186
1187   /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode)
1188      for invalid input, such as f5 from gcc.dg/pr48335-2.c.  */
1189
1190   if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
1191     /* BITNUM is the distance between our msb
1192        and that of the containing datum.
1193        Convert it to the distance from the lsb.  */
1194     bitnum = GET_MODE_BITSIZE (mode) - bitsize - bitnum;
1195
1196   /* Now BITNUM is always the distance between our lsb
1197      and that of OP0.  */
1198
1199   /* Shift VALUE left by BITNUM bits.  If VALUE is not constant,
1200      we must first convert its mode to MODE.  */
1201
1202   if (CONST_INT_P (value))
1203     {
1204       unsigned HOST_WIDE_INT v = UINTVAL (value);
1205
1206       if (bitsize < HOST_BITS_PER_WIDE_INT)
1207         v &= (HOST_WIDE_INT_1U << bitsize) - 1;
1208
1209       if (v == 0)
1210         all_zero = 1;
1211       else if ((bitsize < HOST_BITS_PER_WIDE_INT
1212                 && v == (HOST_WIDE_INT_1U << bitsize) - 1)
1213                || (bitsize == HOST_BITS_PER_WIDE_INT
1214                    && v == HOST_WIDE_INT_M1U))
1215         all_one = 1;
1216
1217       value = lshift_value (mode, v, bitnum);
1218     }
1219   else
1220     {
1221       int must_and = (GET_MODE_BITSIZE (GET_MODE (value)) != bitsize
1222                       && bitnum + bitsize != GET_MODE_BITSIZE (mode));
1223
1224       if (GET_MODE (value) != mode)
1225         value = convert_to_mode (mode, value, 1);
1226
1227       if (must_and)
1228         value = expand_binop (mode, and_optab, value,
1229                               mask_rtx (mode, 0, bitsize, 0),
1230                               NULL_RTX, 1, OPTAB_LIB_WIDEN);
1231       if (bitnum > 0)
1232         value = expand_shift (LSHIFT_EXPR, mode, value,
1233                               bitnum, NULL_RTX, 1);
1234     }
1235
1236   if (reverse)
1237     value = flip_storage_order (mode, value);
1238
1239   /* Now clear the chosen bits in OP0,
1240      except that if VALUE is -1 we need not bother.  */
1241   /* We keep the intermediates in registers to allow CSE to combine
1242      consecutive bitfield assignments.  */
1243
1244   temp = force_reg (mode, op0);
1245
1246   if (! all_one)
1247     {
1248       rtx mask = mask_rtx (mode, bitnum, bitsize, 1);
1249       if (reverse)
1250         mask = flip_storage_order (mode, mask);
1251       temp = expand_binop (mode, and_optab, temp, mask,
1252                            NULL_RTX, 1, OPTAB_LIB_WIDEN);
1253       temp = force_reg (mode, temp);
1254     }
1255
1256   /* Now logical-or VALUE into OP0, unless it is zero.  */
1257
1258   if (! all_zero)
1259     {
1260       temp = expand_binop (mode, ior_optab, temp, value,
1261                            NULL_RTX, 1, OPTAB_LIB_WIDEN);
1262       temp = force_reg (mode, temp);
1263     }
1264
1265   if (op0 != temp)
1266     {
1267       op0 = copy_rtx (op0);
1268       emit_move_insn (op0, temp);
1269     }
1270 }
1271 \f
1272 /* Store a bit field that is split across multiple accessible memory objects.
1273
1274    OP0 is the REG, SUBREG or MEM rtx for the first of the objects.
1275    BITSIZE is the field width; BITPOS the position of its first bit
1276    (within the word).
1277    VALUE is the value to store.
1278
1279    If REVERSE is true, the store is to be done in reverse order.
1280
1281    This does not yet handle fields wider than BITS_PER_WORD.  */
1282
1283 static void
1284 store_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1285                        unsigned HOST_WIDE_INT bitpos,
1286                        unsigned HOST_WIDE_INT bitregion_start,
1287                        unsigned HOST_WIDE_INT bitregion_end,
1288                        rtx value, bool reverse)
1289 {
1290   unsigned int unit, total_bits, bitsdone = 0;
1291
1292   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1293      much at a time.  */
1294   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
1295     unit = BITS_PER_WORD;
1296   else
1297     unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1298
1299   /* If OP0 is a memory with a mode, then UNIT must not be larger than
1300      OP0's mode as well.  Otherwise, store_fixed_bit_field will call us
1301      again, and we will mutually recurse forever.  */
1302   if (MEM_P (op0) && GET_MODE_BITSIZE (GET_MODE (op0)) > 0)
1303     unit = MIN (unit, GET_MODE_BITSIZE (GET_MODE (op0)));
1304
1305   /* If VALUE is a constant other than a CONST_INT, get it into a register in
1306      WORD_MODE.  If we can do this using gen_lowpart_common, do so.  Note
1307      that VALUE might be a floating-point constant.  */
1308   if (CONSTANT_P (value) && !CONST_INT_P (value))
1309     {
1310       rtx word = gen_lowpart_common (word_mode, value);
1311
1312       if (word && (value != word))
1313         value = word;
1314       else
1315         value = gen_lowpart_common (word_mode,
1316                                     force_reg (GET_MODE (value) != VOIDmode
1317                                                ? GET_MODE (value)
1318                                                : word_mode, value));
1319     }
1320
1321   total_bits = GET_MODE_BITSIZE (GET_MODE (value));
1322
1323   while (bitsdone < bitsize)
1324     {
1325       unsigned HOST_WIDE_INT thissize;
1326       unsigned HOST_WIDE_INT thispos;
1327       unsigned HOST_WIDE_INT offset;
1328       rtx part, word;
1329
1330       offset = (bitpos + bitsdone) / unit;
1331       thispos = (bitpos + bitsdone) % unit;
1332
1333       /* When region of bytes we can touch is restricted, decrease
1334          UNIT close to the end of the region as needed.  If op0 is a REG
1335          or SUBREG of REG, don't do this, as there can't be data races
1336          on a register and we can expand shorter code in some cases.  */
1337       if (bitregion_end
1338           && unit > BITS_PER_UNIT
1339           && bitpos + bitsdone - thispos + unit > bitregion_end + 1
1340           && !REG_P (op0)
1341           && (GET_CODE (op0) != SUBREG || !REG_P (SUBREG_REG (op0))))
1342         {
1343           unit = unit / 2;
1344           continue;
1345         }
1346
1347       /* THISSIZE must not overrun a word boundary.  Otherwise,
1348          store_fixed_bit_field will call us again, and we will mutually
1349          recurse forever.  */
1350       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1351       thissize = MIN (thissize, unit - thispos);
1352
1353       if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
1354         {
1355           /* Fetch successively less significant portions.  */
1356           if (CONST_INT_P (value))
1357             part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1358                              >> (bitsize - bitsdone - thissize))
1359                             & ((HOST_WIDE_INT_1 << thissize) - 1));
1360           /* Likewise, but the source is little-endian.  */
1361           else if (reverse)
1362             part = extract_fixed_bit_field (word_mode, value, thissize,
1363                                             bitsize - bitsdone - thissize,
1364                                             NULL_RTX, 1, false);
1365           else
1366             {
1367               int total_bits = GET_MODE_BITSIZE (GET_MODE (value));
1368               /* The args are chosen so that the last part includes the
1369                  lsb.  Give extract_bit_field the value it needs (with
1370                  endianness compensation) to fetch the piece we want.  */
1371               part = extract_fixed_bit_field (word_mode, value, thissize,
1372                                               total_bits - bitsize + bitsdone,
1373                                               NULL_RTX, 1, false);
1374             }
1375         }
1376       else
1377         {
1378           /* Fetch successively more significant portions.  */
1379           if (CONST_INT_P (value))
1380             part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1381                              >> bitsdone)
1382                             & ((HOST_WIDE_INT_1 << thissize) - 1));
1383           /* Likewise, but the source is big-endian.  */
1384           else if (reverse)
1385             part = extract_fixed_bit_field (word_mode, value, thissize,
1386                                             total_bits - bitsdone - thissize,
1387                                             NULL_RTX, 1, false);
1388           else
1389             part = extract_fixed_bit_field (word_mode, value, thissize,
1390                                             bitsdone, NULL_RTX, 1, false);
1391         }
1392
1393       /* If OP0 is a register, then handle OFFSET here.  */
1394       if (SUBREG_P (op0) || REG_P (op0))
1395         {
1396           machine_mode op0_mode = GET_MODE (op0);
1397           if (op0_mode != BLKmode && GET_MODE_SIZE (op0_mode) < UNITS_PER_WORD)
1398             word = offset ? const0_rtx : op0;
1399           else
1400             word = operand_subword_force (op0, offset * unit / BITS_PER_WORD,
1401                                           GET_MODE (op0));
1402           offset &= BITS_PER_WORD / unit - 1;
1403         }
1404       else
1405         word = op0;
1406
1407       /* OFFSET is in UNITs, and UNIT is in bits.  If WORD is const0_rtx,
1408          it is just an out-of-bounds access.  Ignore it.  */
1409       if (word != const0_rtx)
1410         store_fixed_bit_field (word, thissize, offset * unit + thispos,
1411                                bitregion_start, bitregion_end, part,
1412                                reverse);
1413       bitsdone += thissize;
1414     }
1415 }
1416 \f
1417 /* A subroutine of extract_bit_field_1 that converts return value X
1418    to either MODE or TMODE.  MODE, TMODE and UNSIGNEDP are arguments
1419    to extract_bit_field.  */
1420
1421 static rtx
1422 convert_extracted_bit_field (rtx x, machine_mode mode,
1423                              machine_mode tmode, bool unsignedp)
1424 {
1425   if (GET_MODE (x) == tmode || GET_MODE (x) == mode)
1426     return x;
1427
1428   /* If the x mode is not a scalar integral, first convert to the
1429      integer mode of that size and then access it as a floating-point
1430      value via a SUBREG.  */
1431   if (!SCALAR_INT_MODE_P (tmode))
1432     {
1433       machine_mode smode;
1434
1435       smode = mode_for_size (GET_MODE_BITSIZE (tmode), MODE_INT, 0);
1436       x = convert_to_mode (smode, x, unsignedp);
1437       x = force_reg (smode, x);
1438       return gen_lowpart (tmode, x);
1439     }
1440
1441   return convert_to_mode (tmode, x, unsignedp);
1442 }
1443
1444 /* Try to use an ext(z)v pattern to extract a field from OP0.
1445    Return the extracted value on success, otherwise return null.
1446    EXT_MODE is the mode of the extraction and the other arguments
1447    are as for extract_bit_field.  */
1448
1449 static rtx
1450 extract_bit_field_using_extv (const extraction_insn *extv, rtx op0,
1451                               unsigned HOST_WIDE_INT bitsize,
1452                               unsigned HOST_WIDE_INT bitnum,
1453                               int unsignedp, rtx target,
1454                               machine_mode mode, machine_mode tmode)
1455 {
1456   struct expand_operand ops[4];
1457   rtx spec_target = target;
1458   rtx spec_target_subreg = 0;
1459   machine_mode ext_mode = extv->field_mode;
1460   unsigned unit = GET_MODE_BITSIZE (ext_mode);
1461
1462   if (bitsize == 0 || unit < bitsize)
1463     return NULL_RTX;
1464
1465   if (MEM_P (op0))
1466     /* Get a reference to the first byte of the field.  */
1467     op0 = narrow_bit_field_mem (op0, extv->struct_mode, bitsize, bitnum,
1468                                 &bitnum);
1469   else
1470     {
1471       /* Convert from counting within OP0 to counting in EXT_MODE.  */
1472       if (BYTES_BIG_ENDIAN)
1473         bitnum += unit - GET_MODE_BITSIZE (GET_MODE (op0));
1474
1475       /* If op0 is a register, we need it in EXT_MODE to make it
1476          acceptable to the format of ext(z)v.  */
1477       if (GET_CODE (op0) == SUBREG && GET_MODE (op0) != ext_mode)
1478         return NULL_RTX;
1479       if (REG_P (op0) && GET_MODE (op0) != ext_mode)
1480         op0 = gen_lowpart_SUBREG (ext_mode, op0);
1481     }
1482
1483   /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
1484      "backwards" from the size of the unit we are extracting from.
1485      Otherwise, we count bits from the most significant on a
1486      BYTES/BITS_BIG_ENDIAN machine.  */
1487
1488   if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1489     bitnum = unit - bitsize - bitnum;
1490
1491   if (target == 0)
1492     target = spec_target = gen_reg_rtx (tmode);
1493
1494   if (GET_MODE (target) != ext_mode)
1495     {
1496       /* Don't use LHS paradoxical subreg if explicit truncation is needed
1497          between the mode of the extraction (word_mode) and the target
1498          mode.  Instead, create a temporary and use convert_move to set
1499          the target.  */
1500       if (REG_P (target)
1501           && TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (target), ext_mode))
1502         {
1503           target = gen_lowpart (ext_mode, target);
1504           if (GET_MODE_PRECISION (ext_mode)
1505               > GET_MODE_PRECISION (GET_MODE (spec_target)))
1506             spec_target_subreg = target;
1507         }
1508       else
1509         target = gen_reg_rtx (ext_mode);
1510     }
1511
1512   create_output_operand (&ops[0], target, ext_mode);
1513   create_fixed_operand (&ops[1], op0);
1514   create_integer_operand (&ops[2], bitsize);
1515   create_integer_operand (&ops[3], bitnum);
1516   if (maybe_expand_insn (extv->icode, 4, ops))
1517     {
1518       target = ops[0].value;
1519       if (target == spec_target)
1520         return target;
1521       if (target == spec_target_subreg)
1522         return spec_target;
1523       return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1524     }
1525   return NULL_RTX;
1526 }
1527
1528 /* A subroutine of extract_bit_field, with the same arguments.
1529    If FALLBACK_P is true, fall back to extract_fixed_bit_field
1530    if we can find no other means of implementing the operation.
1531    if FALLBACK_P is false, return NULL instead.  */
1532
1533 static rtx
1534 extract_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1535                      unsigned HOST_WIDE_INT bitnum, int unsignedp, rtx target,
1536                      machine_mode mode, machine_mode tmode,
1537                      bool reverse, bool fallback_p, rtx *alt_rtl)
1538 {
1539   rtx op0 = str_rtx;
1540   machine_mode int_mode;
1541   machine_mode mode1;
1542
1543   if (tmode == VOIDmode)
1544     tmode = mode;
1545
1546   while (GET_CODE (op0) == SUBREG)
1547     {
1548       bitnum += SUBREG_BYTE (op0) * BITS_PER_UNIT;
1549       op0 = SUBREG_REG (op0);
1550     }
1551
1552   /* If we have an out-of-bounds access to a register, just return an
1553      uninitialized register of the required mode.  This can occur if the
1554      source code contains an out-of-bounds access to a small array.  */
1555   if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
1556     return gen_reg_rtx (tmode);
1557
1558   if (REG_P (op0)
1559       && mode == GET_MODE (op0)
1560       && bitnum == 0
1561       && bitsize == GET_MODE_BITSIZE (GET_MODE (op0)))
1562     {
1563       if (reverse)
1564         op0 = flip_storage_order (mode, op0);
1565       /* We're trying to extract a full register from itself.  */
1566       return op0;
1567     }
1568
1569   /* First try to check for vector from vector extractions.  */
1570   if (VECTOR_MODE_P (GET_MODE (op0))
1571       && !MEM_P (op0)
1572       && VECTOR_MODE_P (tmode)
1573       && GET_MODE_SIZE (GET_MODE (op0)) > GET_MODE_SIZE (tmode))
1574     {
1575       machine_mode new_mode = GET_MODE (op0);
1576       if (GET_MODE_INNER (new_mode) != GET_MODE_INNER (tmode))
1577         {
1578           new_mode = mode_for_vector (GET_MODE_INNER (tmode),
1579                                       GET_MODE_BITSIZE (GET_MODE (op0))
1580                                       / GET_MODE_UNIT_BITSIZE (tmode));
1581           if (!VECTOR_MODE_P (new_mode)
1582               || GET_MODE_SIZE (new_mode) != GET_MODE_SIZE (GET_MODE (op0))
1583               || GET_MODE_INNER (new_mode) != GET_MODE_INNER (tmode)
1584               || !targetm.vector_mode_supported_p (new_mode))
1585             new_mode = VOIDmode;
1586         }
1587       if (new_mode != VOIDmode
1588           && (convert_optab_handler (vec_extract_optab, new_mode, tmode)
1589               != CODE_FOR_nothing)
1590           && ((bitnum + bitsize - 1) / GET_MODE_BITSIZE (tmode)
1591               == bitnum / GET_MODE_BITSIZE (tmode)))
1592         {
1593           struct expand_operand ops[3];
1594           machine_mode outermode = new_mode;
1595           machine_mode innermode = tmode;
1596           enum insn_code icode
1597             = convert_optab_handler (vec_extract_optab, outermode, innermode);
1598           unsigned HOST_WIDE_INT pos = bitnum / GET_MODE_BITSIZE (innermode);
1599
1600           if (new_mode != GET_MODE (op0))
1601             op0 = gen_lowpart (new_mode, op0);
1602           create_output_operand (&ops[0], target, innermode);
1603           ops[0].target = 1;
1604           create_input_operand (&ops[1], op0, outermode);
1605           create_integer_operand (&ops[2], pos);
1606           if (maybe_expand_insn (icode, 3, ops))
1607             {
1608               if (alt_rtl && ops[0].target)
1609                 *alt_rtl = target;
1610               target = ops[0].value;
1611               if (GET_MODE (target) != mode)
1612                 return gen_lowpart (tmode, target);
1613               return target;
1614             }
1615         }
1616     }
1617
1618   /* See if we can get a better vector mode before extracting.  */
1619   if (VECTOR_MODE_P (GET_MODE (op0))
1620       && !MEM_P (op0)
1621       && GET_MODE_INNER (GET_MODE (op0)) != tmode)
1622     {
1623       machine_mode new_mode;
1624
1625       if (GET_MODE_CLASS (tmode) == MODE_FLOAT)
1626         new_mode = MIN_MODE_VECTOR_FLOAT;
1627       else if (GET_MODE_CLASS (tmode) == MODE_FRACT)
1628         new_mode = MIN_MODE_VECTOR_FRACT;
1629       else if (GET_MODE_CLASS (tmode) == MODE_UFRACT)
1630         new_mode = MIN_MODE_VECTOR_UFRACT;
1631       else if (GET_MODE_CLASS (tmode) == MODE_ACCUM)
1632         new_mode = MIN_MODE_VECTOR_ACCUM;
1633       else if (GET_MODE_CLASS (tmode) == MODE_UACCUM)
1634         new_mode = MIN_MODE_VECTOR_UACCUM;
1635       else
1636         new_mode = MIN_MODE_VECTOR_INT;
1637
1638       for (; new_mode != VOIDmode ; new_mode = GET_MODE_WIDER_MODE (new_mode))
1639         if (GET_MODE_SIZE (new_mode) == GET_MODE_SIZE (GET_MODE (op0))
1640             && GET_MODE_UNIT_SIZE (new_mode) == GET_MODE_SIZE (tmode)
1641             && targetm.vector_mode_supported_p (new_mode))
1642           break;
1643       if (new_mode != VOIDmode)
1644         op0 = gen_lowpart (new_mode, op0);
1645     }
1646
1647   /* Use vec_extract patterns for extracting parts of vectors whenever
1648      available.  */
1649   if (VECTOR_MODE_P (GET_MODE (op0))
1650       && !MEM_P (op0)
1651       && (convert_optab_handler (vec_extract_optab, GET_MODE (op0),
1652                                  GET_MODE_INNER (GET_MODE (op0)))
1653           != CODE_FOR_nothing)
1654       && ((bitnum + bitsize - 1) / GET_MODE_UNIT_BITSIZE (GET_MODE (op0))
1655           == bitnum / GET_MODE_UNIT_BITSIZE (GET_MODE (op0))))
1656     {
1657       struct expand_operand ops[3];
1658       machine_mode outermode = GET_MODE (op0);
1659       machine_mode innermode = GET_MODE_INNER (outermode);
1660       enum insn_code icode
1661         = convert_optab_handler (vec_extract_optab, outermode, innermode);
1662       unsigned HOST_WIDE_INT pos = bitnum / GET_MODE_BITSIZE (innermode);
1663
1664       create_output_operand (&ops[0], target, innermode);
1665       ops[0].target = 1;
1666       create_input_operand (&ops[1], op0, outermode);
1667       create_integer_operand (&ops[2], pos);
1668       if (maybe_expand_insn (icode, 3, ops))
1669         {
1670           if (alt_rtl && ops[0].target)
1671             *alt_rtl = target;
1672           target = ops[0].value;
1673           if (GET_MODE (target) != mode)
1674             return gen_lowpart (tmode, target);
1675           return target;
1676         }
1677     }
1678
1679   /* Make sure we are playing with integral modes.  Pun with subregs
1680      if we aren't.  */
1681   {
1682     machine_mode imode = int_mode_for_mode (GET_MODE (op0));
1683     if (imode != GET_MODE (op0))
1684       {
1685         if (MEM_P (op0))
1686           op0 = adjust_bitfield_address_size (op0, imode, 0, MEM_SIZE (op0));
1687         else if (imode != BLKmode)
1688           {
1689             op0 = gen_lowpart (imode, op0);
1690
1691             /* If we got a SUBREG, force it into a register since we
1692                aren't going to be able to do another SUBREG on it.  */
1693             if (GET_CODE (op0) == SUBREG)
1694               op0 = force_reg (imode, op0);
1695           }
1696         else
1697           {
1698             HOST_WIDE_INT size = GET_MODE_SIZE (GET_MODE (op0));
1699             rtx mem = assign_stack_temp (GET_MODE (op0), size);
1700             emit_move_insn (mem, op0);
1701             op0 = adjust_bitfield_address_size (mem, BLKmode, 0, size);
1702           }
1703       }
1704   }
1705
1706   /* ??? We currently assume TARGET is at least as big as BITSIZE.
1707      If that's wrong, the solution is to test for it and set TARGET to 0
1708      if needed.  */
1709
1710   /* Get the mode of the field to use for atomic access or subreg
1711      conversion.  */
1712   mode1 = mode;
1713   if (SCALAR_INT_MODE_P (tmode))
1714     {
1715       machine_mode try_mode = mode_for_size (bitsize,
1716                                                   GET_MODE_CLASS (tmode), 0);
1717       if (try_mode != BLKmode)
1718         mode1 = try_mode;
1719     }
1720   gcc_assert (mode1 != BLKmode);
1721
1722   /* Extraction of a full MODE1 value can be done with a subreg as long
1723      as the least significant bit of the value is the least significant
1724      bit of either OP0 or a word of OP0.  */
1725   if (!MEM_P (op0)
1726       && !reverse
1727       && lowpart_bit_field_p (bitnum, bitsize, GET_MODE (op0))
1728       && bitsize == GET_MODE_BITSIZE (mode1)
1729       && TRULY_NOOP_TRUNCATION_MODES_P (mode1, GET_MODE (op0)))
1730     {
1731       rtx sub = simplify_gen_subreg (mode1, op0, GET_MODE (op0),
1732                                      bitnum / BITS_PER_UNIT);
1733       if (sub)
1734         return convert_extracted_bit_field (sub, mode, tmode, unsignedp);
1735     }
1736
1737   /* Extraction of a full MODE1 value can be done with a load as long as
1738      the field is on a byte boundary and is sufficiently aligned.  */
1739   if (simple_mem_bitfield_p (op0, bitsize, bitnum, mode1))
1740     {
1741       op0 = adjust_bitfield_address (op0, mode1, bitnum / BITS_PER_UNIT);
1742       if (reverse)
1743         op0 = flip_storage_order (mode1, op0);
1744       return convert_extracted_bit_field (op0, mode, tmode, unsignedp);
1745     }
1746
1747   /* Handle fields bigger than a word.  */
1748
1749   if (bitsize > BITS_PER_WORD)
1750     {
1751       /* Here we transfer the words of the field
1752          in the order least significant first.
1753          This is because the most significant word is the one which may
1754          be less than full.  */
1755
1756       const bool backwards = WORDS_BIG_ENDIAN;
1757       unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
1758       unsigned int i;
1759       rtx_insn *last;
1760
1761       if (target == 0 || !REG_P (target) || !valid_multiword_target_p (target))
1762         target = gen_reg_rtx (mode);
1763
1764       /* In case we're about to clobber a base register or something
1765          (see gcc.c-torture/execute/20040625-1.c).   */
1766       if (reg_mentioned_p (target, str_rtx))
1767         target = gen_reg_rtx (mode);
1768
1769       /* Indicate for flow that the entire target reg is being set.  */
1770       emit_clobber (target);
1771
1772       last = get_last_insn ();
1773       for (i = 0; i < nwords; i++)
1774         {
1775           /* If I is 0, use the low-order word in both field and target;
1776              if I is 1, use the next to lowest word; and so on.  */
1777           /* Word number in TARGET to use.  */
1778           unsigned int wordnum
1779             = (backwards
1780                ? GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD - i - 1
1781                : i);
1782           /* Offset from start of field in OP0.  */
1783           unsigned int bit_offset = (backwards ^ reverse
1784                                      ? MAX ((int) bitsize - ((int) i + 1)
1785                                             * BITS_PER_WORD,
1786                                             0)
1787                                      : (int) i * BITS_PER_WORD);
1788           rtx target_part = operand_subword (target, wordnum, 1, VOIDmode);
1789           rtx result_part
1790             = extract_bit_field_1 (op0, MIN (BITS_PER_WORD,
1791                                              bitsize - i * BITS_PER_WORD),
1792                                    bitnum + bit_offset, 1, target_part,
1793                                    mode, word_mode, reverse, fallback_p, NULL);
1794
1795           gcc_assert (target_part);
1796           if (!result_part)
1797             {
1798               delete_insns_since (last);
1799               return NULL;
1800             }
1801
1802           if (result_part != target_part)
1803             emit_move_insn (target_part, result_part);
1804         }
1805
1806       if (unsignedp)
1807         {
1808           /* Unless we've filled TARGET, the upper regs in a multi-reg value
1809              need to be zero'd out.  */
1810           if (GET_MODE_SIZE (GET_MODE (target)) > nwords * UNITS_PER_WORD)
1811             {
1812               unsigned int i, total_words;
1813
1814               total_words = GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD;
1815               for (i = nwords; i < total_words; i++)
1816                 emit_move_insn
1817                   (operand_subword (target,
1818                                     backwards ? total_words - i - 1 : i,
1819                                     1, VOIDmode),
1820                    const0_rtx);
1821             }
1822           return target;
1823         }
1824
1825       /* Signed bit field: sign-extend with two arithmetic shifts.  */
1826       target = expand_shift (LSHIFT_EXPR, mode, target,
1827                              GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1828       return expand_shift (RSHIFT_EXPR, mode, target,
1829                            GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1830     }
1831
1832   /* If OP0 is a multi-word register, narrow it to the affected word.
1833      If the region spans two words, defer to extract_split_bit_field.  */
1834   if (!MEM_P (op0) && GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
1835     {
1836       if (bitnum % BITS_PER_WORD + bitsize > BITS_PER_WORD)
1837         {
1838           if (!fallback_p)
1839             return NULL_RTX;
1840           target = extract_split_bit_field (op0, bitsize, bitnum, unsignedp,
1841                                             reverse);
1842           return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1843         }
1844       op0 = simplify_gen_subreg (word_mode, op0, GET_MODE (op0),
1845                                  bitnum / BITS_PER_WORD * UNITS_PER_WORD);
1846       bitnum %= BITS_PER_WORD;
1847     }
1848
1849   /* From here on we know the desired field is smaller than a word.
1850      If OP0 is a register, it too fits within a word.  */
1851   enum extraction_pattern pattern = unsignedp ? EP_extzv : EP_extv;
1852   extraction_insn extv;
1853   if (!MEM_P (op0)
1854       && !reverse
1855       /* ??? We could limit the structure size to the part of OP0 that
1856          contains the field, with appropriate checks for endianness
1857          and TRULY_NOOP_TRUNCATION.  */
1858       && get_best_reg_extraction_insn (&extv, pattern,
1859                                        GET_MODE_BITSIZE (GET_MODE (op0)),
1860                                        tmode))
1861     {
1862       rtx result = extract_bit_field_using_extv (&extv, op0, bitsize, bitnum,
1863                                                  unsignedp, target, mode,
1864                                                  tmode);
1865       if (result)
1866         return result;
1867     }
1868
1869   /* If OP0 is a memory, try copying it to a register and seeing if a
1870      cheap register alternative is available.  */
1871   if (MEM_P (op0) & !reverse)
1872     {
1873       if (get_best_mem_extraction_insn (&extv, pattern, bitsize, bitnum,
1874                                         tmode))
1875         {
1876           rtx result = extract_bit_field_using_extv (&extv, op0, bitsize,
1877                                                      bitnum, unsignedp,
1878                                                      target, mode,
1879                                                      tmode);
1880           if (result)
1881             return result;
1882         }
1883
1884       rtx_insn *last = get_last_insn ();
1885
1886       /* Try loading part of OP0 into a register and extracting the
1887          bitfield from that.  */
1888       unsigned HOST_WIDE_INT bitpos;
1889       rtx xop0 = adjust_bit_field_mem_for_reg (pattern, op0, bitsize, bitnum,
1890                                                0, 0, tmode, &bitpos);
1891       if (xop0)
1892         {
1893           xop0 = copy_to_reg (xop0);
1894           rtx result = extract_bit_field_1 (xop0, bitsize, bitpos,
1895                                             unsignedp, target,
1896                                             mode, tmode, reverse, false, NULL);
1897           if (result)
1898             return result;
1899           delete_insns_since (last);
1900         }
1901     }
1902
1903   if (!fallback_p)
1904     return NULL;
1905
1906   /* Find a correspondingly-sized integer field, so we can apply
1907      shifts and masks to it.  */
1908   int_mode = int_mode_for_mode (tmode);
1909   if (int_mode == BLKmode)
1910     int_mode = int_mode_for_mode (mode);
1911   /* Should probably push op0 out to memory and then do a load.  */
1912   gcc_assert (int_mode != BLKmode);
1913
1914   target = extract_fixed_bit_field (int_mode, op0, bitsize, bitnum, target,
1915                                     unsignedp, reverse);
1916
1917   /* Complex values must be reversed piecewise, so we need to undo the global
1918      reversal, convert to the complex mode and reverse again.  */
1919   if (reverse && COMPLEX_MODE_P (tmode))
1920     {
1921       target = flip_storage_order (int_mode, target);
1922       target = convert_extracted_bit_field (target, mode, tmode, unsignedp);
1923       target = flip_storage_order (tmode, target);
1924     }
1925   else
1926     target = convert_extracted_bit_field (target, mode, tmode, unsignedp);
1927
1928   return target;
1929 }
1930
1931 /* Generate code to extract a byte-field from STR_RTX
1932    containing BITSIZE bits, starting at BITNUM,
1933    and put it in TARGET if possible (if TARGET is nonzero).
1934    Regardless of TARGET, we return the rtx for where the value is placed.
1935
1936    STR_RTX is the structure containing the byte (a REG or MEM).
1937    UNSIGNEDP is nonzero if this is an unsigned bit field.
1938    MODE is the natural mode of the field value once extracted.
1939    TMODE is the mode the caller would like the value to have;
1940    but the value may be returned with type MODE instead.
1941
1942    If REVERSE is true, the extraction is to be done in reverse order.
1943
1944    If a TARGET is specified and we can store in it at no extra cost,
1945    we do so, and return TARGET.
1946    Otherwise, we return a REG of mode TMODE or MODE, with TMODE preferred
1947    if they are equally easy.  */
1948
1949 rtx
1950 extract_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1951                    unsigned HOST_WIDE_INT bitnum, int unsignedp, rtx target,
1952                    machine_mode mode, machine_mode tmode, bool reverse,
1953                    rtx *alt_rtl)
1954 {
1955   machine_mode mode1;
1956
1957   /* Handle -fstrict-volatile-bitfields in the cases where it applies.  */
1958   if (GET_MODE_BITSIZE (GET_MODE (str_rtx)) > 0)
1959     mode1 = GET_MODE (str_rtx);
1960   else if (target && GET_MODE_BITSIZE (GET_MODE (target)) > 0)
1961     mode1 = GET_MODE (target);
1962   else
1963     mode1 = tmode;
1964
1965   if (strict_volatile_bitfield_p (str_rtx, bitsize, bitnum, mode1, 0, 0))
1966     {
1967       /* Extraction of a full MODE1 value can be done with a simple load.
1968          We know here that the field can be accessed with one single
1969          instruction.  For targets that support unaligned memory,
1970          an unaligned access may be necessary.  */
1971       if (bitsize == GET_MODE_BITSIZE (mode1))
1972         {
1973           rtx result = adjust_bitfield_address (str_rtx, mode1,
1974                                                 bitnum / BITS_PER_UNIT);
1975           if (reverse)
1976             result = flip_storage_order (mode1, result);
1977           gcc_assert (bitnum % BITS_PER_UNIT == 0);
1978           return convert_extracted_bit_field (result, mode, tmode, unsignedp);
1979         }
1980
1981       str_rtx = narrow_bit_field_mem (str_rtx, mode1, bitsize, bitnum,
1982                                       &bitnum);
1983       gcc_assert (bitnum + bitsize <= GET_MODE_BITSIZE (mode1));
1984       str_rtx = copy_to_reg (str_rtx);
1985     }
1986
1987   return extract_bit_field_1 (str_rtx, bitsize, bitnum, unsignedp,
1988                               target, mode, tmode, reverse, true, alt_rtl);
1989 }
1990 \f
1991 /* Use shifts and boolean operations to extract a field of BITSIZE bits
1992    from bit BITNUM of OP0.
1993
1994    UNSIGNEDP is nonzero for an unsigned bit field (don't sign-extend value).
1995    If REVERSE is true, the extraction is to be done in reverse order.
1996
1997    If TARGET is nonzero, attempts to store the value there
1998    and return TARGET, but this is not guaranteed.
1999    If TARGET is not used, create a pseudo-reg of mode TMODE for the value.  */
2000
2001 static rtx
2002 extract_fixed_bit_field (machine_mode tmode, rtx op0,
2003                          unsigned HOST_WIDE_INT bitsize,
2004                          unsigned HOST_WIDE_INT bitnum, rtx target,
2005                          int unsignedp, bool reverse)
2006 {
2007   if (MEM_P (op0))
2008     {
2009       machine_mode mode
2010         = get_best_mode (bitsize, bitnum, 0, 0, MEM_ALIGN (op0), word_mode,
2011                          MEM_VOLATILE_P (op0));
2012
2013       if (mode == VOIDmode)
2014         /* The only way this should occur is if the field spans word
2015            boundaries.  */
2016         return extract_split_bit_field (op0, bitsize, bitnum, unsignedp,
2017                                         reverse);
2018
2019       op0 = narrow_bit_field_mem (op0, mode, bitsize, bitnum, &bitnum);
2020     }
2021
2022   return extract_fixed_bit_field_1 (tmode, op0, bitsize, bitnum,
2023                                     target, unsignedp, reverse);
2024 }
2025
2026 /* Helper function for extract_fixed_bit_field, extracts
2027    the bit field always using the MODE of OP0.  */
2028
2029 static rtx
2030 extract_fixed_bit_field_1 (machine_mode tmode, rtx op0,
2031                            unsigned HOST_WIDE_INT bitsize,
2032                            unsigned HOST_WIDE_INT bitnum, rtx target,
2033                            int unsignedp, bool reverse)
2034 {
2035   machine_mode mode = GET_MODE (op0);
2036   gcc_assert (SCALAR_INT_MODE_P (mode));
2037
2038   /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode)
2039      for invalid input, such as extract equivalent of f5 from
2040      gcc.dg/pr48335-2.c.  */
2041
2042   if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
2043     /* BITNUM is the distance between our msb and that of OP0.
2044        Convert it to the distance from the lsb.  */
2045     bitnum = GET_MODE_BITSIZE (mode) - bitsize - bitnum;
2046
2047   /* Now BITNUM is always the distance between the field's lsb and that of OP0.
2048      We have reduced the big-endian case to the little-endian case.  */
2049   if (reverse)
2050     op0 = flip_storage_order (mode, op0);
2051
2052   if (unsignedp)
2053     {
2054       if (bitnum)
2055         {
2056           /* If the field does not already start at the lsb,
2057              shift it so it does.  */
2058           /* Maybe propagate the target for the shift.  */
2059           rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
2060           if (tmode != mode)
2061             subtarget = 0;
2062           op0 = expand_shift (RSHIFT_EXPR, mode, op0, bitnum, subtarget, 1);
2063         }
2064       /* Convert the value to the desired mode.  */
2065       if (mode != tmode)
2066         op0 = convert_to_mode (tmode, op0, 1);
2067
2068       /* Unless the msb of the field used to be the msb when we shifted,
2069          mask out the upper bits.  */
2070
2071       if (GET_MODE_BITSIZE (mode) != bitnum + bitsize)
2072         return expand_binop (GET_MODE (op0), and_optab, op0,
2073                              mask_rtx (GET_MODE (op0), 0, bitsize, 0),
2074                              target, 1, OPTAB_LIB_WIDEN);
2075       return op0;
2076     }
2077
2078   /* To extract a signed bit-field, first shift its msb to the msb of the word,
2079      then arithmetic-shift its lsb to the lsb of the word.  */
2080   op0 = force_reg (mode, op0);
2081
2082   /* Find the narrowest integer mode that contains the field.  */
2083
2084   for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT); mode != VOIDmode;
2085        mode = GET_MODE_WIDER_MODE (mode))
2086     if (GET_MODE_BITSIZE (mode) >= bitsize + bitnum)
2087       {
2088         op0 = convert_to_mode (mode, op0, 0);
2089         break;
2090       }
2091
2092   if (mode != tmode)
2093     target = 0;
2094
2095   if (GET_MODE_BITSIZE (mode) != (bitsize + bitnum))
2096     {
2097       int amount = GET_MODE_BITSIZE (mode) - (bitsize + bitnum);
2098       /* Maybe propagate the target for the shift.  */
2099       rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
2100       op0 = expand_shift (LSHIFT_EXPR, mode, op0, amount, subtarget, 1);
2101     }
2102
2103   return expand_shift (RSHIFT_EXPR, mode, op0,
2104                        GET_MODE_BITSIZE (mode) - bitsize, target, 0);
2105 }
2106
2107 /* Return a constant integer (CONST_INT or CONST_DOUBLE) rtx with the value
2108    VALUE << BITPOS.  */
2109
2110 static rtx
2111 lshift_value (machine_mode mode, unsigned HOST_WIDE_INT value,
2112               int bitpos)
2113 {
2114   return immed_wide_int_const (wi::lshift (value, bitpos), mode);
2115 }
2116 \f
2117 /* Extract a bit field that is split across two words
2118    and return an RTX for the result.
2119
2120    OP0 is the REG, SUBREG or MEM rtx for the first of the two words.
2121    BITSIZE is the field width; BITPOS, position of its first bit, in the word.
2122    UNSIGNEDP is 1 if should zero-extend the contents; else sign-extend.
2123
2124    If REVERSE is true, the extraction is to be done in reverse order.  */
2125
2126 static rtx
2127 extract_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
2128                          unsigned HOST_WIDE_INT bitpos, int unsignedp,
2129                          bool reverse)
2130 {
2131   unsigned int unit;
2132   unsigned int bitsdone = 0;
2133   rtx result = NULL_RTX;
2134   int first = 1;
2135
2136   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
2137      much at a time.  */
2138   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
2139     unit = BITS_PER_WORD;
2140   else
2141     unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
2142
2143   while (bitsdone < bitsize)
2144     {
2145       unsigned HOST_WIDE_INT thissize;
2146       rtx part, word;
2147       unsigned HOST_WIDE_INT thispos;
2148       unsigned HOST_WIDE_INT offset;
2149
2150       offset = (bitpos + bitsdone) / unit;
2151       thispos = (bitpos + bitsdone) % unit;
2152
2153       /* THISSIZE must not overrun a word boundary.  Otherwise,
2154          extract_fixed_bit_field will call us again, and we will mutually
2155          recurse forever.  */
2156       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
2157       thissize = MIN (thissize, unit - thispos);
2158
2159       /* If OP0 is a register, then handle OFFSET here.  */
2160       if (SUBREG_P (op0) || REG_P (op0))
2161         {
2162           word = operand_subword_force (op0, offset, GET_MODE (op0));
2163           offset = 0;
2164         }
2165       else
2166         word = op0;
2167
2168       /* Extract the parts in bit-counting order,
2169          whose meaning is determined by BYTES_PER_UNIT.
2170          OFFSET is in UNITs, and UNIT is in bits.  */
2171       part = extract_fixed_bit_field (word_mode, word, thissize,
2172                                       offset * unit + thispos, 0, 1, reverse);
2173       bitsdone += thissize;
2174
2175       /* Shift this part into place for the result.  */
2176       if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
2177         {
2178           if (bitsize != bitsdone)
2179             part = expand_shift (LSHIFT_EXPR, word_mode, part,
2180                                  bitsize - bitsdone, 0, 1);
2181         }
2182       else
2183         {
2184           if (bitsdone != thissize)
2185             part = expand_shift (LSHIFT_EXPR, word_mode, part,
2186                                  bitsdone - thissize, 0, 1);
2187         }
2188
2189       if (first)
2190         result = part;
2191       else
2192         /* Combine the parts with bitwise or.  This works
2193            because we extracted each part as an unsigned bit field.  */
2194         result = expand_binop (word_mode, ior_optab, part, result, NULL_RTX, 1,
2195                                OPTAB_LIB_WIDEN);
2196
2197       first = 0;
2198     }
2199
2200   /* Unsigned bit field: we are done.  */
2201   if (unsignedp)
2202     return result;
2203   /* Signed bit field: sign-extend with two arithmetic shifts.  */
2204   result = expand_shift (LSHIFT_EXPR, word_mode, result,
2205                          BITS_PER_WORD - bitsize, NULL_RTX, 0);
2206   return expand_shift (RSHIFT_EXPR, word_mode, result,
2207                        BITS_PER_WORD - bitsize, NULL_RTX, 0);
2208 }
2209 \f
2210 /* Try to read the low bits of SRC as an rvalue of mode MODE, preserving
2211    the bit pattern.  SRC_MODE is the mode of SRC; if this is smaller than
2212    MODE, fill the upper bits with zeros.  Fail if the layout of either
2213    mode is unknown (as for CC modes) or if the extraction would involve
2214    unprofitable mode punning.  Return the value on success, otherwise
2215    return null.
2216
2217    This is different from gen_lowpart* in these respects:
2218
2219      - the returned value must always be considered an rvalue
2220
2221      - when MODE is wider than SRC_MODE, the extraction involves
2222        a zero extension
2223
2224      - when MODE is smaller than SRC_MODE, the extraction involves
2225        a truncation (and is thus subject to TRULY_NOOP_TRUNCATION).
2226
2227    In other words, this routine performs a computation, whereas the
2228    gen_lowpart* routines are conceptually lvalue or rvalue subreg
2229    operations.  */
2230
2231 rtx
2232 extract_low_bits (machine_mode mode, machine_mode src_mode, rtx src)
2233 {
2234   machine_mode int_mode, src_int_mode;
2235
2236   if (mode == src_mode)
2237     return src;
2238
2239   if (CONSTANT_P (src))
2240     {
2241       /* simplify_gen_subreg can't be used here, as if simplify_subreg
2242          fails, it will happily create (subreg (symbol_ref)) or similar
2243          invalid SUBREGs.  */
2244       unsigned int byte = subreg_lowpart_offset (mode, src_mode);
2245       rtx ret = simplify_subreg (mode, src, src_mode, byte);
2246       if (ret)
2247         return ret;
2248
2249       if (GET_MODE (src) == VOIDmode
2250           || !validate_subreg (mode, src_mode, src, byte))
2251         return NULL_RTX;
2252
2253       src = force_reg (GET_MODE (src), src);
2254       return gen_rtx_SUBREG (mode, src, byte);
2255     }
2256
2257   if (GET_MODE_CLASS (mode) == MODE_CC || GET_MODE_CLASS (src_mode) == MODE_CC)
2258     return NULL_RTX;
2259
2260   if (GET_MODE_BITSIZE (mode) == GET_MODE_BITSIZE (src_mode)
2261       && MODES_TIEABLE_P (mode, src_mode))
2262     {
2263       rtx x = gen_lowpart_common (mode, src);
2264       if (x)
2265         return x;
2266     }
2267
2268   src_int_mode = int_mode_for_mode (src_mode);
2269   int_mode = int_mode_for_mode (mode);
2270   if (src_int_mode == BLKmode || int_mode == BLKmode)
2271     return NULL_RTX;
2272
2273   if (!MODES_TIEABLE_P (src_int_mode, src_mode))
2274     return NULL_RTX;
2275   if (!MODES_TIEABLE_P (int_mode, mode))
2276     return NULL_RTX;
2277
2278   src = gen_lowpart (src_int_mode, src);
2279   src = convert_modes (int_mode, src_int_mode, src, true);
2280   src = gen_lowpart (mode, src);
2281   return src;
2282 }
2283 \f
2284 /* Add INC into TARGET.  */
2285
2286 void
2287 expand_inc (rtx target, rtx inc)
2288 {
2289   rtx value = expand_binop (GET_MODE (target), add_optab,
2290                             target, inc,
2291                             target, 0, OPTAB_LIB_WIDEN);
2292   if (value != target)
2293     emit_move_insn (target, value);
2294 }
2295
2296 /* Subtract DEC from TARGET.  */
2297
2298 void
2299 expand_dec (rtx target, rtx dec)
2300 {
2301   rtx value = expand_binop (GET_MODE (target), sub_optab,
2302                             target, dec,
2303                             target, 0, OPTAB_LIB_WIDEN);
2304   if (value != target)
2305     emit_move_insn (target, value);
2306 }
2307 \f
2308 /* Output a shift instruction for expression code CODE,
2309    with SHIFTED being the rtx for the value to shift,
2310    and AMOUNT the rtx for the amount to shift by.
2311    Store the result in the rtx TARGET, if that is convenient.
2312    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2313    Return the rtx for where the value is.
2314    If that cannot be done, abort the compilation unless MAY_FAIL is true,
2315    in which case 0 is returned.  */
2316
2317 static rtx
2318 expand_shift_1 (enum tree_code code, machine_mode mode, rtx shifted,
2319                 rtx amount, rtx target, int unsignedp, bool may_fail = false)
2320 {
2321   rtx op1, temp = 0;
2322   int left = (code == LSHIFT_EXPR || code == LROTATE_EXPR);
2323   int rotate = (code == LROTATE_EXPR || code == RROTATE_EXPR);
2324   optab lshift_optab = ashl_optab;
2325   optab rshift_arith_optab = ashr_optab;
2326   optab rshift_uns_optab = lshr_optab;
2327   optab lrotate_optab = rotl_optab;
2328   optab rrotate_optab = rotr_optab;
2329   machine_mode op1_mode;
2330   machine_mode scalar_mode = mode;
2331   int attempt;
2332   bool speed = optimize_insn_for_speed_p ();
2333
2334   if (VECTOR_MODE_P (mode))
2335     scalar_mode = GET_MODE_INNER (mode);
2336   op1 = amount;
2337   op1_mode = GET_MODE (op1);
2338
2339   /* Determine whether the shift/rotate amount is a vector, or scalar.  If the
2340      shift amount is a vector, use the vector/vector shift patterns.  */
2341   if (VECTOR_MODE_P (mode) && VECTOR_MODE_P (op1_mode))
2342     {
2343       lshift_optab = vashl_optab;
2344       rshift_arith_optab = vashr_optab;
2345       rshift_uns_optab = vlshr_optab;
2346       lrotate_optab = vrotl_optab;
2347       rrotate_optab = vrotr_optab;
2348     }
2349
2350   /* Previously detected shift-counts computed by NEGATE_EXPR
2351      and shifted in the other direction; but that does not work
2352      on all machines.  */
2353
2354   if (SHIFT_COUNT_TRUNCATED)
2355     {
2356       if (CONST_INT_P (op1)
2357           && ((unsigned HOST_WIDE_INT) INTVAL (op1) >=
2358               (unsigned HOST_WIDE_INT) GET_MODE_BITSIZE (scalar_mode)))
2359         op1 = GEN_INT ((unsigned HOST_WIDE_INT) INTVAL (op1)
2360                        % GET_MODE_BITSIZE (scalar_mode));
2361       else if (GET_CODE (op1) == SUBREG
2362                && subreg_lowpart_p (op1)
2363                && SCALAR_INT_MODE_P (GET_MODE (SUBREG_REG (op1)))
2364                && SCALAR_INT_MODE_P (GET_MODE (op1)))
2365         op1 = SUBREG_REG (op1);
2366     }
2367
2368   /* Canonicalize rotates by constant amount.  If op1 is bitsize / 2,
2369      prefer left rotation, if op1 is from bitsize / 2 + 1 to
2370      bitsize - 1, use other direction of rotate with 1 .. bitsize / 2 - 1
2371      amount instead.  */
2372   if (rotate
2373       && CONST_INT_P (op1)
2374       && IN_RANGE (INTVAL (op1), GET_MODE_BITSIZE (scalar_mode) / 2 + left,
2375                    GET_MODE_BITSIZE (scalar_mode) - 1))
2376     {
2377       op1 = GEN_INT (GET_MODE_BITSIZE (scalar_mode) - INTVAL (op1));
2378       left = !left;
2379       code = left ? LROTATE_EXPR : RROTATE_EXPR;
2380     }
2381
2382   /* Rotation of 16bit values by 8 bits is effectively equivalent to a bswaphi.
2383      Note that this is not the case for bigger values.  For instance a rotation
2384      of 0x01020304 by 16 bits gives 0x03040102 which is different from
2385      0x04030201 (bswapsi).  */
2386   if (rotate
2387       && CONST_INT_P (op1)
2388       && INTVAL (op1) == BITS_PER_UNIT
2389       && GET_MODE_SIZE (scalar_mode) == 2
2390       && optab_handler (bswap_optab, HImode) != CODE_FOR_nothing)
2391     return expand_unop (HImode, bswap_optab, shifted, NULL_RTX,
2392                                   unsignedp);
2393
2394   if (op1 == const0_rtx)
2395     return shifted;
2396
2397   /* Check whether its cheaper to implement a left shift by a constant
2398      bit count by a sequence of additions.  */
2399   if (code == LSHIFT_EXPR
2400       && CONST_INT_P (op1)
2401       && INTVAL (op1) > 0
2402       && INTVAL (op1) < GET_MODE_PRECISION (scalar_mode)
2403       && INTVAL (op1) < MAX_BITS_PER_WORD
2404       && (shift_cost (speed, mode, INTVAL (op1))
2405           > INTVAL (op1) * add_cost (speed, mode))
2406       && shift_cost (speed, mode, INTVAL (op1)) != MAX_COST)
2407     {
2408       int i;
2409       for (i = 0; i < INTVAL (op1); i++)
2410         {
2411           temp = force_reg (mode, shifted);
2412           shifted = expand_binop (mode, add_optab, temp, temp, NULL_RTX,
2413                                   unsignedp, OPTAB_LIB_WIDEN);
2414         }
2415       return shifted;
2416     }
2417
2418   for (attempt = 0; temp == 0 && attempt < 3; attempt++)
2419     {
2420       enum optab_methods methods;
2421
2422       if (attempt == 0)
2423         methods = OPTAB_DIRECT;
2424       else if (attempt == 1)
2425         methods = OPTAB_WIDEN;
2426       else
2427         methods = OPTAB_LIB_WIDEN;
2428
2429       if (rotate)
2430         {
2431           /* Widening does not work for rotation.  */
2432           if (methods == OPTAB_WIDEN)
2433             continue;
2434           else if (methods == OPTAB_LIB_WIDEN)
2435             {
2436               /* If we have been unable to open-code this by a rotation,
2437                  do it as the IOR of two shifts.  I.e., to rotate A
2438                  by N bits, compute
2439                  (A << N) | ((unsigned) A >> ((-N) & (C - 1)))
2440                  where C is the bitsize of A.
2441
2442                  It is theoretically possible that the target machine might
2443                  not be able to perform either shift and hence we would
2444                  be making two libcalls rather than just the one for the
2445                  shift (similarly if IOR could not be done).  We will allow
2446                  this extremely unlikely lossage to avoid complicating the
2447                  code below.  */
2448
2449               rtx subtarget = target == shifted ? 0 : target;
2450               rtx new_amount, other_amount;
2451               rtx temp1;
2452
2453               new_amount = op1;
2454               if (op1 == const0_rtx)
2455                 return shifted;
2456               else if (CONST_INT_P (op1))
2457                 other_amount = GEN_INT (GET_MODE_BITSIZE (scalar_mode)
2458                                         - INTVAL (op1));
2459               else
2460                 {
2461                   other_amount
2462                     = simplify_gen_unary (NEG, GET_MODE (op1),
2463                                           op1, GET_MODE (op1));
2464                   HOST_WIDE_INT mask = GET_MODE_PRECISION (scalar_mode) - 1;
2465                   other_amount
2466                     = simplify_gen_binary (AND, GET_MODE (op1), other_amount,
2467                                            gen_int_mode (mask, GET_MODE (op1)));
2468                 }
2469
2470               shifted = force_reg (mode, shifted);
2471
2472               temp = expand_shift_1 (left ? LSHIFT_EXPR : RSHIFT_EXPR,
2473                                      mode, shifted, new_amount, 0, 1);
2474               temp1 = expand_shift_1 (left ? RSHIFT_EXPR : LSHIFT_EXPR,
2475                                       mode, shifted, other_amount,
2476                                       subtarget, 1);
2477               return expand_binop (mode, ior_optab, temp, temp1, target,
2478                                    unsignedp, methods);
2479             }
2480
2481           temp = expand_binop (mode,
2482                                left ? lrotate_optab : rrotate_optab,
2483                                shifted, op1, target, unsignedp, methods);
2484         }
2485       else if (unsignedp)
2486         temp = expand_binop (mode,
2487                              left ? lshift_optab : rshift_uns_optab,
2488                              shifted, op1, target, unsignedp, methods);
2489
2490       /* Do arithmetic shifts.
2491          Also, if we are going to widen the operand, we can just as well
2492          use an arithmetic right-shift instead of a logical one.  */
2493       if (temp == 0 && ! rotate
2494           && (! unsignedp || (! left && methods == OPTAB_WIDEN)))
2495         {
2496           enum optab_methods methods1 = methods;
2497
2498           /* If trying to widen a log shift to an arithmetic shift,
2499              don't accept an arithmetic shift of the same size.  */
2500           if (unsignedp)
2501             methods1 = OPTAB_MUST_WIDEN;
2502
2503           /* Arithmetic shift */
2504
2505           temp = expand_binop (mode,
2506                                left ? lshift_optab : rshift_arith_optab,
2507                                shifted, op1, target, unsignedp, methods1);
2508         }
2509
2510       /* We used to try extzv here for logical right shifts, but that was
2511          only useful for one machine, the VAX, and caused poor code
2512          generation there for lshrdi3, so the code was deleted and a
2513          define_expand for lshrsi3 was added to vax.md.  */
2514     }
2515
2516   gcc_assert (temp != NULL_RTX || may_fail);
2517   return temp;
2518 }
2519
2520 /* Output a shift instruction for expression code CODE,
2521    with SHIFTED being the rtx for the value to shift,
2522    and AMOUNT the amount to shift by.
2523    Store the result in the rtx TARGET, if that is convenient.
2524    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2525    Return the rtx for where the value is.  */
2526
2527 rtx
2528 expand_shift (enum tree_code code, machine_mode mode, rtx shifted,
2529               int amount, rtx target, int unsignedp)
2530 {
2531   return expand_shift_1 (code, mode,
2532                          shifted, GEN_INT (amount), target, unsignedp);
2533 }
2534
2535 /* Likewise, but return 0 if that cannot be done.  */
2536
2537 static rtx
2538 maybe_expand_shift (enum tree_code code, machine_mode mode, rtx shifted,
2539                     int amount, rtx target, int unsignedp)
2540 {
2541   return expand_shift_1 (code, mode,
2542                          shifted, GEN_INT (amount), target, unsignedp, true);
2543 }
2544
2545 /* Output a shift instruction for expression code CODE,
2546    with SHIFTED being the rtx for the value to shift,
2547    and AMOUNT the tree for the amount to shift by.
2548    Store the result in the rtx TARGET, if that is convenient.
2549    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2550    Return the rtx for where the value is.  */
2551
2552 rtx
2553 expand_variable_shift (enum tree_code code, machine_mode mode, rtx shifted,
2554                        tree amount, rtx target, int unsignedp)
2555 {
2556   return expand_shift_1 (code, mode,
2557                          shifted, expand_normal (amount), target, unsignedp);
2558 }
2559
2560 \f
2561 static void synth_mult (struct algorithm *, unsigned HOST_WIDE_INT,
2562                         const struct mult_cost *, machine_mode mode);
2563 static rtx expand_mult_const (machine_mode, rtx, HOST_WIDE_INT, rtx,
2564                               const struct algorithm *, enum mult_variant);
2565 static unsigned HOST_WIDE_INT invert_mod2n (unsigned HOST_WIDE_INT, int);
2566 static rtx extract_high_half (machine_mode, rtx);
2567 static rtx expmed_mult_highpart (machine_mode, rtx, rtx, rtx, int, int);
2568 static rtx expmed_mult_highpart_optab (machine_mode, rtx, rtx, rtx,
2569                                        int, int);
2570 /* Compute and return the best algorithm for multiplying by T.
2571    The algorithm must cost less than cost_limit
2572    If retval.cost >= COST_LIMIT, no algorithm was found and all
2573    other field of the returned struct are undefined.
2574    MODE is the machine mode of the multiplication.  */
2575
2576 static void
2577 synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
2578             const struct mult_cost *cost_limit, machine_mode mode)
2579 {
2580   int m;
2581   struct algorithm *alg_in, *best_alg;
2582   struct mult_cost best_cost;
2583   struct mult_cost new_limit;
2584   int op_cost, op_latency;
2585   unsigned HOST_WIDE_INT orig_t = t;
2586   unsigned HOST_WIDE_INT q;
2587   int maxm, hash_index;
2588   bool cache_hit = false;
2589   enum alg_code cache_alg = alg_zero;
2590   bool speed = optimize_insn_for_speed_p ();
2591   machine_mode imode;
2592   struct alg_hash_entry *entry_ptr;
2593
2594   /* Indicate that no algorithm is yet found.  If no algorithm
2595      is found, this value will be returned and indicate failure.  */
2596   alg_out->cost.cost = cost_limit->cost + 1;
2597   alg_out->cost.latency = cost_limit->latency + 1;
2598
2599   if (cost_limit->cost < 0
2600       || (cost_limit->cost == 0 && cost_limit->latency <= 0))
2601     return;
2602
2603   /* Be prepared for vector modes.  */
2604   imode = GET_MODE_INNER (mode);
2605
2606   maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (imode));
2607
2608   /* Restrict the bits of "t" to the multiplication's mode.  */
2609   t &= GET_MODE_MASK (imode);
2610
2611   /* t == 1 can be done in zero cost.  */
2612   if (t == 1)
2613     {
2614       alg_out->ops = 1;
2615       alg_out->cost.cost = 0;
2616       alg_out->cost.latency = 0;
2617       alg_out->op[0] = alg_m;
2618       return;
2619     }
2620
2621   /* t == 0 sometimes has a cost.  If it does and it exceeds our limit,
2622      fail now.  */
2623   if (t == 0)
2624     {
2625       if (MULT_COST_LESS (cost_limit, zero_cost (speed)))
2626         return;
2627       else
2628         {
2629           alg_out->ops = 1;
2630           alg_out->cost.cost = zero_cost (speed);
2631           alg_out->cost.latency = zero_cost (speed);
2632           alg_out->op[0] = alg_zero;
2633           return;
2634         }
2635     }
2636
2637   /* We'll be needing a couple extra algorithm structures now.  */
2638
2639   alg_in = XALLOCA (struct algorithm);
2640   best_alg = XALLOCA (struct algorithm);
2641   best_cost = *cost_limit;
2642
2643   /* Compute the hash index.  */
2644   hash_index = (t ^ (unsigned int) mode ^ (speed * 256)) % NUM_ALG_HASH_ENTRIES;
2645
2646   /* See if we already know what to do for T.  */
2647   entry_ptr = alg_hash_entry_ptr (hash_index);
2648   if (entry_ptr->t == t
2649       && entry_ptr->mode == mode
2650       && entry_ptr->speed == speed
2651       && entry_ptr->alg != alg_unknown)
2652     {
2653       cache_alg = entry_ptr->alg;
2654
2655       if (cache_alg == alg_impossible)
2656         {
2657           /* The cache tells us that it's impossible to synthesize
2658              multiplication by T within entry_ptr->cost.  */
2659           if (!CHEAPER_MULT_COST (&entry_ptr->cost, cost_limit))
2660             /* COST_LIMIT is at least as restrictive as the one
2661                recorded in the hash table, in which case we have no
2662                hope of synthesizing a multiplication.  Just
2663                return.  */
2664             return;
2665
2666           /* If we get here, COST_LIMIT is less restrictive than the
2667              one recorded in the hash table, so we may be able to
2668              synthesize a multiplication.  Proceed as if we didn't
2669              have the cache entry.  */
2670         }
2671       else
2672         {
2673           if (CHEAPER_MULT_COST (cost_limit, &entry_ptr->cost))
2674             /* The cached algorithm shows that this multiplication
2675                requires more cost than COST_LIMIT.  Just return.  This
2676                way, we don't clobber this cache entry with
2677                alg_impossible but retain useful information.  */
2678             return;
2679
2680           cache_hit = true;
2681
2682           switch (cache_alg)
2683             {
2684             case alg_shift:
2685               goto do_alg_shift;
2686
2687             case alg_add_t_m2:
2688             case alg_sub_t_m2:
2689               goto do_alg_addsub_t_m2;
2690
2691             case alg_add_factor:
2692             case alg_sub_factor:
2693               goto do_alg_addsub_factor;
2694
2695             case alg_add_t2_m:
2696               goto do_alg_add_t2_m;
2697
2698             case alg_sub_t2_m:
2699               goto do_alg_sub_t2_m;
2700
2701             default:
2702               gcc_unreachable ();
2703             }
2704         }
2705     }
2706
2707   /* If we have a group of zero bits at the low-order part of T, try
2708      multiplying by the remaining bits and then doing a shift.  */
2709
2710   if ((t & 1) == 0)
2711     {
2712     do_alg_shift:
2713       m = ctz_or_zero (t); /* m = number of low zero bits */
2714       if (m < maxm)
2715         {
2716           q = t >> m;
2717           /* The function expand_shift will choose between a shift and
2718              a sequence of additions, so the observed cost is given as
2719              MIN (m * add_cost(speed, mode), shift_cost(speed, mode, m)).  */
2720           op_cost = m * add_cost (speed, mode);
2721           if (shift_cost (speed, mode, m) < op_cost)
2722             op_cost = shift_cost (speed, mode, m);
2723           new_limit.cost = best_cost.cost - op_cost;
2724           new_limit.latency = best_cost.latency - op_cost;
2725           synth_mult (alg_in, q, &new_limit, mode);
2726
2727           alg_in->cost.cost += op_cost;
2728           alg_in->cost.latency += op_cost;
2729           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2730             {
2731               best_cost = alg_in->cost;
2732               std::swap (alg_in, best_alg);
2733               best_alg->log[best_alg->ops] = m;
2734               best_alg->op[best_alg->ops] = alg_shift;
2735             }
2736
2737           /* See if treating ORIG_T as a signed number yields a better
2738              sequence.  Try this sequence only for a negative ORIG_T
2739              as it would be useless for a non-negative ORIG_T.  */
2740           if ((HOST_WIDE_INT) orig_t < 0)
2741             {
2742               /* Shift ORIG_T as follows because a right shift of a
2743                  negative-valued signed type is implementation
2744                  defined.  */
2745               q = ~(~orig_t >> m);
2746               /* The function expand_shift will choose between a shift
2747                  and a sequence of additions, so the observed cost is
2748                  given as MIN (m * add_cost(speed, mode),
2749                  shift_cost(speed, mode, m)).  */
2750               op_cost = m * add_cost (speed, mode);
2751               if (shift_cost (speed, mode, m) < op_cost)
2752                 op_cost = shift_cost (speed, mode, m);
2753               new_limit.cost = best_cost.cost - op_cost;
2754               new_limit.latency = best_cost.latency - op_cost;
2755               synth_mult (alg_in, q, &new_limit, mode);
2756
2757               alg_in->cost.cost += op_cost;
2758               alg_in->cost.latency += op_cost;
2759               if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2760                 {
2761                   best_cost = alg_in->cost;
2762                   std::swap (alg_in, best_alg);
2763                   best_alg->log[best_alg->ops] = m;
2764                   best_alg->op[best_alg->ops] = alg_shift;
2765                 }
2766             }
2767         }
2768       if (cache_hit)
2769         goto done;
2770     }
2771
2772   /* If we have an odd number, add or subtract one.  */
2773   if ((t & 1) != 0)
2774     {
2775       unsigned HOST_WIDE_INT w;
2776
2777     do_alg_addsub_t_m2:
2778       for (w = 1; (w & t) != 0; w <<= 1)
2779         ;
2780       /* If T was -1, then W will be zero after the loop.  This is another
2781          case where T ends with ...111.  Handling this with (T + 1) and
2782          subtract 1 produces slightly better code and results in algorithm
2783          selection much faster than treating it like the ...0111 case
2784          below.  */
2785       if (w == 0
2786           || (w > 2
2787               /* Reject the case where t is 3.
2788                  Thus we prefer addition in that case.  */
2789               && t != 3))
2790         {
2791           /* T ends with ...111.  Multiply by (T + 1) and subtract T.  */
2792
2793           op_cost = add_cost (speed, mode);
2794           new_limit.cost = best_cost.cost - op_cost;
2795           new_limit.latency = best_cost.latency - op_cost;
2796           synth_mult (alg_in, t + 1, &new_limit, mode);
2797
2798           alg_in->cost.cost += op_cost;
2799           alg_in->cost.latency += op_cost;
2800           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2801             {
2802               best_cost = alg_in->cost;
2803               std::swap (alg_in, best_alg);
2804               best_alg->log[best_alg->ops] = 0;
2805               best_alg->op[best_alg->ops] = alg_sub_t_m2;
2806             }
2807         }
2808       else
2809         {
2810           /* T ends with ...01 or ...011.  Multiply by (T - 1) and add T.  */
2811
2812           op_cost = add_cost (speed, mode);
2813           new_limit.cost = best_cost.cost - op_cost;
2814           new_limit.latency = best_cost.latency - op_cost;
2815           synth_mult (alg_in, t - 1, &new_limit, mode);
2816
2817           alg_in->cost.cost += op_cost;
2818           alg_in->cost.latency += op_cost;
2819           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2820             {
2821               best_cost = alg_in->cost;
2822               std::swap (alg_in, best_alg);
2823               best_alg->log[best_alg->ops] = 0;
2824               best_alg->op[best_alg->ops] = alg_add_t_m2;
2825             }
2826         }
2827
2828       /* We may be able to calculate a * -7, a * -15, a * -31, etc
2829          quickly with a - a * n for some appropriate constant n.  */
2830       m = exact_log2 (-orig_t + 1);
2831       if (m >= 0 && m < maxm)
2832         {
2833           op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
2834           /* If the target has a cheap shift-and-subtract insn use
2835              that in preference to a shift insn followed by a sub insn.
2836              Assume that the shift-and-sub is "atomic" with a latency
2837              equal to it's cost, otherwise assume that on superscalar
2838              hardware the shift may be executed concurrently with the
2839              earlier steps in the algorithm.  */
2840           if (shiftsub1_cost (speed, mode, m) <= op_cost)
2841             {
2842               op_cost = shiftsub1_cost (speed, mode, m);
2843               op_latency = op_cost;
2844             }
2845           else
2846             op_latency = add_cost (speed, mode);
2847
2848           new_limit.cost = best_cost.cost - op_cost;
2849           new_limit.latency = best_cost.latency - op_latency;
2850           synth_mult (alg_in, (unsigned HOST_WIDE_INT) (-orig_t + 1) >> m,
2851                       &new_limit, mode);
2852
2853           alg_in->cost.cost += op_cost;
2854           alg_in->cost.latency += op_latency;
2855           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2856             {
2857               best_cost = alg_in->cost;
2858               std::swap (alg_in, best_alg);
2859               best_alg->log[best_alg->ops] = m;
2860               best_alg->op[best_alg->ops] = alg_sub_t_m2;
2861             }
2862         }
2863
2864       if (cache_hit)
2865         goto done;
2866     }
2867
2868   /* Look for factors of t of the form
2869      t = q(2**m +- 1), 2 <= m <= floor(log2(t - 1)).
2870      If we find such a factor, we can multiply by t using an algorithm that
2871      multiplies by q, shift the result by m and add/subtract it to itself.
2872
2873      We search for large factors first and loop down, even if large factors
2874      are less probable than small; if we find a large factor we will find a
2875      good sequence quickly, and therefore be able to prune (by decreasing
2876      COST_LIMIT) the search.  */
2877
2878  do_alg_addsub_factor:
2879   for (m = floor_log2 (t - 1); m >= 2; m--)
2880     {
2881       unsigned HOST_WIDE_INT d;
2882
2883       d = (HOST_WIDE_INT_1U << m) + 1;
2884       if (t % d == 0 && t > d && m < maxm
2885           && (!cache_hit || cache_alg == alg_add_factor))
2886         {
2887           op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
2888           if (shiftadd_cost (speed, mode, m) <= op_cost)
2889             op_cost = shiftadd_cost (speed, mode, m);
2890
2891           op_latency = op_cost;
2892
2893
2894           new_limit.cost = best_cost.cost - op_cost;
2895           new_limit.latency = best_cost.latency - op_latency;
2896           synth_mult (alg_in, t / d, &new_limit, mode);
2897
2898           alg_in->cost.cost += op_cost;
2899           alg_in->cost.latency += op_latency;
2900           if (alg_in->cost.latency < op_cost)
2901             alg_in->cost.latency = op_cost;
2902           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2903             {
2904               best_cost = alg_in->cost;
2905               std::swap (alg_in, best_alg);
2906               best_alg->log[best_alg->ops] = m;
2907               best_alg->op[best_alg->ops] = alg_add_factor;
2908             }
2909           /* Other factors will have been taken care of in the recursion.  */
2910           break;
2911         }
2912
2913       d = (HOST_WIDE_INT_1U << m) - 1;
2914       if (t % d == 0 && t > d && m < maxm
2915           && (!cache_hit || cache_alg == alg_sub_factor))
2916         {
2917           op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
2918           if (shiftsub0_cost (speed, mode, m) <= op_cost)
2919             op_cost = shiftsub0_cost (speed, mode, m);
2920
2921           op_latency = op_cost;
2922
2923           new_limit.cost = best_cost.cost - op_cost;
2924           new_limit.latency = best_cost.latency - op_latency;
2925           synth_mult (alg_in, t / d, &new_limit, mode);
2926
2927           alg_in->cost.cost += op_cost;
2928           alg_in->cost.latency += op_latency;
2929           if (alg_in->cost.latency < op_cost)
2930             alg_in->cost.latency = op_cost;
2931           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2932             {
2933               best_cost = alg_in->cost;
2934               std::swap (alg_in, best_alg);
2935               best_alg->log[best_alg->ops] = m;
2936               best_alg->op[best_alg->ops] = alg_sub_factor;
2937             }
2938           break;
2939         }
2940     }
2941   if (cache_hit)
2942     goto done;
2943
2944   /* Try shift-and-add (load effective address) instructions,
2945      i.e. do a*3, a*5, a*9.  */
2946   if ((t & 1) != 0)
2947     {
2948     do_alg_add_t2_m:
2949       q = t - 1;
2950       m = ctz_hwi (q);
2951       if (q && m < maxm)
2952         {
2953           op_cost = shiftadd_cost (speed, mode, m);
2954           new_limit.cost = best_cost.cost - op_cost;
2955           new_limit.latency = best_cost.latency - op_cost;
2956           synth_mult (alg_in, (t - 1) >> m, &new_limit, mode);
2957
2958           alg_in->cost.cost += op_cost;
2959           alg_in->cost.latency += op_cost;
2960           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2961             {
2962               best_cost = alg_in->cost;
2963               std::swap (alg_in, best_alg);
2964               best_alg->log[best_alg->ops] = m;
2965               best_alg->op[best_alg->ops] = alg_add_t2_m;
2966             }
2967         }
2968       if (cache_hit)
2969         goto done;
2970
2971     do_alg_sub_t2_m:
2972       q = t + 1;
2973       m = ctz_hwi (q);
2974       if (q && m < maxm)
2975         {
2976           op_cost = shiftsub0_cost (speed, mode, m);
2977           new_limit.cost = best_cost.cost - op_cost;
2978           new_limit.latency = best_cost.latency - op_cost;
2979           synth_mult (alg_in, (t + 1) >> m, &new_limit, mode);
2980
2981           alg_in->cost.cost += op_cost;
2982           alg_in->cost.latency += op_cost;
2983           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2984             {
2985               best_cost = alg_in->cost;
2986               std::swap (alg_in, best_alg);
2987               best_alg->log[best_alg->ops] = m;
2988               best_alg->op[best_alg->ops] = alg_sub_t2_m;
2989             }
2990         }
2991       if (cache_hit)
2992         goto done;
2993     }
2994
2995  done:
2996   /* If best_cost has not decreased, we have not found any algorithm.  */
2997   if (!CHEAPER_MULT_COST (&best_cost, cost_limit))
2998     {
2999       /* We failed to find an algorithm.  Record alg_impossible for
3000          this case (that is, <T, MODE, COST_LIMIT>) so that next time
3001          we are asked to find an algorithm for T within the same or
3002          lower COST_LIMIT, we can immediately return to the
3003          caller.  */
3004       entry_ptr->t = t;
3005       entry_ptr->mode = mode;
3006       entry_ptr->speed = speed;
3007       entry_ptr->alg = alg_impossible;
3008       entry_ptr->cost = *cost_limit;
3009       return;
3010     }
3011
3012   /* Cache the result.  */
3013   if (!cache_hit)
3014     {
3015       entry_ptr->t = t;
3016       entry_ptr->mode = mode;
3017       entry_ptr->speed = speed;
3018       entry_ptr->alg = best_alg->op[best_alg->ops];
3019       entry_ptr->cost.cost = best_cost.cost;
3020       entry_ptr->cost.latency = best_cost.latency;
3021     }
3022
3023   /* If we are getting a too long sequence for `struct algorithm'
3024      to record, make this search fail.  */
3025   if (best_alg->ops == MAX_BITS_PER_WORD)
3026     return;
3027
3028   /* Copy the algorithm from temporary space to the space at alg_out.
3029      We avoid using structure assignment because the majority of
3030      best_alg is normally undefined, and this is a critical function.  */
3031   alg_out->ops = best_alg->ops + 1;
3032   alg_out->cost = best_cost;
3033   memcpy (alg_out->op, best_alg->op,
3034           alg_out->ops * sizeof *alg_out->op);
3035   memcpy (alg_out->log, best_alg->log,
3036           alg_out->ops * sizeof *alg_out->log);
3037 }
3038 \f
3039 /* Find the cheapest way of multiplying a value of mode MODE by VAL.
3040    Try three variations:
3041
3042        - a shift/add sequence based on VAL itself
3043        - a shift/add sequence based on -VAL, followed by a negation
3044        - a shift/add sequence based on VAL - 1, followed by an addition.
3045
3046    Return true if the cheapest of these cost less than MULT_COST,
3047    describing the algorithm in *ALG and final fixup in *VARIANT.  */
3048
3049 bool
3050 choose_mult_variant (machine_mode mode, HOST_WIDE_INT val,
3051                      struct algorithm *alg, enum mult_variant *variant,
3052                      int mult_cost)
3053 {
3054   struct algorithm alg2;
3055   struct mult_cost limit;
3056   int op_cost;
3057   bool speed = optimize_insn_for_speed_p ();
3058
3059   /* Fail quickly for impossible bounds.  */
3060   if (mult_cost < 0)
3061     return false;
3062
3063   /* Ensure that mult_cost provides a reasonable upper bound.
3064      Any constant multiplication can be performed with less
3065      than 2 * bits additions.  */
3066   op_cost = 2 * GET_MODE_UNIT_BITSIZE (mode) * add_cost (speed, mode);
3067   if (mult_cost > op_cost)
3068     mult_cost = op_cost;
3069
3070   *variant = basic_variant;
3071   limit.cost = mult_cost;
3072   limit.latency = mult_cost;
3073   synth_mult (alg, val, &limit, mode);
3074
3075   /* This works only if the inverted value actually fits in an
3076      `unsigned int' */
3077   if (HOST_BITS_PER_INT >= GET_MODE_UNIT_BITSIZE (mode))
3078     {
3079       op_cost = neg_cost (speed, mode);
3080       if (MULT_COST_LESS (&alg->cost, mult_cost))
3081         {
3082           limit.cost = alg->cost.cost - op_cost;
3083           limit.latency = alg->cost.latency - op_cost;
3084         }
3085       else
3086         {
3087           limit.cost = mult_cost - op_cost;
3088           limit.latency = mult_cost - op_cost;
3089         }
3090
3091       synth_mult (&alg2, -val, &limit, mode);
3092       alg2.cost.cost += op_cost;
3093       alg2.cost.latency += op_cost;
3094       if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
3095         *alg = alg2, *variant = negate_variant;
3096     }
3097
3098   /* This proves very useful for division-by-constant.  */
3099   op_cost = add_cost (speed, mode);
3100   if (MULT_COST_LESS (&alg->cost, mult_cost))
3101     {
3102       limit.cost = alg->cost.cost - op_cost;
3103       limit.latency = alg->cost.latency - op_cost;
3104     }
3105   else
3106     {
3107       limit.cost = mult_cost - op_cost;
3108       limit.latency = mult_cost - op_cost;
3109     }
3110
3111   synth_mult (&alg2, val - 1, &limit, mode);
3112   alg2.cost.cost += op_cost;
3113   alg2.cost.latency += op_cost;
3114   if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
3115     *alg = alg2, *variant = add_variant;
3116
3117   return MULT_COST_LESS (&alg->cost, mult_cost);
3118 }
3119
3120 /* A subroutine of expand_mult, used for constant multiplications.
3121    Multiply OP0 by VAL in mode MODE, storing the result in TARGET if
3122    convenient.  Use the shift/add sequence described by ALG and apply
3123    the final fixup specified by VARIANT.  */
3124
3125 static rtx
3126 expand_mult_const (machine_mode mode, rtx op0, HOST_WIDE_INT val,
3127                    rtx target, const struct algorithm *alg,
3128                    enum mult_variant variant)
3129 {
3130   unsigned HOST_WIDE_INT val_so_far;
3131   rtx_insn *insn;
3132   rtx accum, tem;
3133   int opno;
3134   machine_mode nmode;
3135
3136   /* Avoid referencing memory over and over and invalid sharing
3137      on SUBREGs.  */
3138   op0 = force_reg (mode, op0);
3139
3140   /* ACCUM starts out either as OP0 or as a zero, depending on
3141      the first operation.  */
3142
3143   if (alg->op[0] == alg_zero)
3144     {
3145       accum = copy_to_mode_reg (mode, CONST0_RTX (mode));
3146       val_so_far = 0;
3147     }
3148   else if (alg->op[0] == alg_m)
3149     {
3150       accum = copy_to_mode_reg (mode, op0);
3151       val_so_far = 1;
3152     }
3153   else
3154     gcc_unreachable ();
3155
3156   for (opno = 1; opno < alg->ops; opno++)
3157     {
3158       int log = alg->log[opno];
3159       rtx shift_subtarget = optimize ? 0 : accum;
3160       rtx add_target
3161         = (opno == alg->ops - 1 && target != 0 && variant != add_variant
3162            && !optimize)
3163           ? target : 0;
3164       rtx accum_target = optimize ? 0 : accum;
3165       rtx accum_inner;
3166
3167       switch (alg->op[opno])
3168         {
3169         case alg_shift:
3170           tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3171           /* REG_EQUAL note will be attached to the following insn.  */
3172           emit_move_insn (accum, tem);
3173           val_so_far <<= log;
3174           break;
3175
3176         case alg_add_t_m2:
3177           tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
3178           accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
3179                                  add_target ? add_target : accum_target);
3180           val_so_far += HOST_WIDE_INT_1U << log;
3181           break;
3182
3183         case alg_sub_t_m2:
3184           tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
3185           accum = force_operand (gen_rtx_MINUS (mode, accum, tem),
3186                                  add_target ? add_target : accum_target);
3187           val_so_far -= HOST_WIDE_INT_1U << log;
3188           break;
3189
3190         case alg_add_t2_m:
3191           accum = expand_shift (LSHIFT_EXPR, mode, accum,
3192                                 log, shift_subtarget, 0);
3193           accum = force_operand (gen_rtx_PLUS (mode, accum, op0),
3194                                  add_target ? add_target : accum_target);
3195           val_so_far = (val_so_far << log) + 1;
3196           break;
3197
3198         case alg_sub_t2_m:
3199           accum = expand_shift (LSHIFT_EXPR, mode, accum,
3200                                 log, shift_subtarget, 0);
3201           accum = force_operand (gen_rtx_MINUS (mode, accum, op0),
3202                                  add_target ? add_target : accum_target);
3203           val_so_far = (val_so_far << log) - 1;
3204           break;
3205
3206         case alg_add_factor:
3207           tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3208           accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
3209                                  add_target ? add_target : accum_target);
3210           val_so_far += val_so_far << log;
3211           break;
3212
3213         case alg_sub_factor:
3214           tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3215           accum = force_operand (gen_rtx_MINUS (mode, tem, accum),
3216                                  (add_target
3217                                   ? add_target : (optimize ? 0 : tem)));
3218           val_so_far = (val_so_far << log) - val_so_far;
3219           break;
3220
3221         default:
3222           gcc_unreachable ();
3223         }
3224
3225       if (SCALAR_INT_MODE_P (mode))
3226         {
3227           /* Write a REG_EQUAL note on the last insn so that we can cse
3228              multiplication sequences.  Note that if ACCUM is a SUBREG,
3229              we've set the inner register and must properly indicate that.  */
3230           tem = op0, nmode = mode;
3231           accum_inner = accum;
3232           if (GET_CODE (accum) == SUBREG)
3233             {
3234               accum_inner = SUBREG_REG (accum);
3235               nmode = GET_MODE (accum_inner);
3236               tem = gen_lowpart (nmode, op0);
3237             }
3238
3239           insn = get_last_insn ();
3240           set_dst_reg_note (insn, REG_EQUAL,
3241                             gen_rtx_MULT (nmode, tem,
3242                                           gen_int_mode (val_so_far, nmode)),
3243                             accum_inner);
3244         }
3245     }
3246
3247   if (variant == negate_variant)
3248     {
3249       val_so_far = -val_so_far;
3250       accum = expand_unop (mode, neg_optab, accum, target, 0);
3251     }
3252   else if (variant == add_variant)
3253     {
3254       val_so_far = val_so_far + 1;
3255       accum = force_operand (gen_rtx_PLUS (mode, accum, op0), target);
3256     }
3257
3258   /* Compare only the bits of val and val_so_far that are significant
3259      in the result mode, to avoid sign-/zero-extension confusion.  */
3260   nmode = GET_MODE_INNER (mode);
3261   val &= GET_MODE_MASK (nmode);
3262   val_so_far &= GET_MODE_MASK (nmode);
3263   gcc_assert (val == (HOST_WIDE_INT) val_so_far);
3264
3265   return accum;
3266 }
3267
3268 /* Perform a multiplication and return an rtx for the result.
3269    MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3270    TARGET is a suggestion for where to store the result (an rtx).
3271
3272    We check specially for a constant integer as OP1.
3273    If you want this check for OP0 as well, then before calling
3274    you should swap the two operands if OP0 would be constant.  */
3275
3276 rtx
3277 expand_mult (machine_mode mode, rtx op0, rtx op1, rtx target,
3278              int unsignedp)
3279 {
3280   enum mult_variant variant;
3281   struct algorithm algorithm;
3282   rtx scalar_op1;
3283   int max_cost;
3284   bool speed = optimize_insn_for_speed_p ();
3285   bool do_trapv = flag_trapv && SCALAR_INT_MODE_P (mode) && !unsignedp;
3286
3287   if (CONSTANT_P (op0))
3288     std::swap (op0, op1);
3289
3290   /* For vectors, there are several simplifications that can be made if
3291      all elements of the vector constant are identical.  */
3292   scalar_op1 = unwrap_const_vec_duplicate (op1);
3293
3294   if (INTEGRAL_MODE_P (mode))
3295     {
3296       rtx fake_reg;
3297       HOST_WIDE_INT coeff;
3298       bool is_neg;
3299       int mode_bitsize;
3300
3301       if (op1 == CONST0_RTX (mode))
3302         return op1;
3303       if (op1 == CONST1_RTX (mode))
3304         return op0;
3305       if (op1 == CONSTM1_RTX (mode))
3306         return expand_unop (mode, do_trapv ? negv_optab : neg_optab,
3307                             op0, target, 0);
3308
3309       if (do_trapv)
3310         goto skip_synth;
3311
3312       /* If mode is integer vector mode, check if the backend supports
3313          vector lshift (by scalar or vector) at all.  If not, we can't use
3314          synthetized multiply.  */
3315       if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
3316           && optab_handler (vashl_optab, mode) == CODE_FOR_nothing
3317           && optab_handler (ashl_optab, mode) == CODE_FOR_nothing)
3318         goto skip_synth;
3319
3320       /* These are the operations that are potentially turned into
3321          a sequence of shifts and additions.  */
3322       mode_bitsize = GET_MODE_UNIT_BITSIZE (mode);
3323
3324       /* synth_mult does an `unsigned int' multiply.  As long as the mode is
3325          less than or equal in size to `unsigned int' this doesn't matter.
3326          If the mode is larger than `unsigned int', then synth_mult works
3327          only if the constant value exactly fits in an `unsigned int' without
3328          any truncation.  This means that multiplying by negative values does
3329          not work; results are off by 2^32 on a 32 bit machine.  */
3330       if (CONST_INT_P (scalar_op1))
3331         {
3332           coeff = INTVAL (scalar_op1);
3333           is_neg = coeff < 0;
3334         }
3335 #if TARGET_SUPPORTS_WIDE_INT
3336       else if (CONST_WIDE_INT_P (scalar_op1))
3337 #else
3338       else if (CONST_DOUBLE_AS_INT_P (scalar_op1))
3339 #endif
3340         {
3341           int shift = wi::exact_log2 (rtx_mode_t (scalar_op1, mode));
3342           /* Perfect power of 2 (other than 1, which is handled above).  */
3343           if (shift > 0)
3344             return expand_shift (LSHIFT_EXPR, mode, op0,
3345                                  shift, target, unsignedp);
3346           else
3347             goto skip_synth;
3348         }
3349       else
3350         goto skip_synth;
3351
3352       /* We used to test optimize here, on the grounds that it's better to
3353          produce a smaller program when -O is not used.  But this causes
3354          such a terrible slowdown sometimes that it seems better to always
3355          use synth_mult.  */
3356
3357       /* Special case powers of two.  */
3358       if (EXACT_POWER_OF_2_OR_ZERO_P (coeff)
3359           && !(is_neg && mode_bitsize > HOST_BITS_PER_WIDE_INT))
3360         return expand_shift (LSHIFT_EXPR, mode, op0,
3361                              floor_log2 (coeff), target, unsignedp);
3362
3363       fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3364
3365       /* Attempt to handle multiplication of DImode values by negative
3366          coefficients, by performing the multiplication by a positive
3367          multiplier and then inverting the result.  */
3368       if (is_neg && mode_bitsize > HOST_BITS_PER_WIDE_INT)
3369         {
3370           /* Its safe to use -coeff even for INT_MIN, as the
3371              result is interpreted as an unsigned coefficient.
3372              Exclude cost of op0 from max_cost to match the cost
3373              calculation of the synth_mult.  */
3374           coeff = -(unsigned HOST_WIDE_INT) coeff;
3375           max_cost = (set_src_cost (gen_rtx_MULT (mode, fake_reg, op1),
3376                                     mode, speed)
3377                       - neg_cost (speed, mode));
3378           if (max_cost <= 0)
3379             goto skip_synth;
3380
3381           /* Special case powers of two.  */
3382           if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3383             {
3384               rtx temp = expand_shift (LSHIFT_EXPR, mode, op0,
3385                                        floor_log2 (coeff), target, unsignedp);
3386               return expand_unop (mode, neg_optab, temp, target, 0);
3387             }
3388
3389           if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3390                                    max_cost))
3391             {
3392               rtx temp = expand_mult_const (mode, op0, coeff, NULL_RTX,
3393                                             &algorithm, variant);
3394               return expand_unop (mode, neg_optab, temp, target, 0);
3395             }
3396           goto skip_synth;
3397         }
3398
3399       /* Exclude cost of op0 from max_cost to match the cost
3400          calculation of the synth_mult.  */
3401       max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, op1), mode, speed);
3402       if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost))
3403         return expand_mult_const (mode, op0, coeff, target,
3404                                   &algorithm, variant);
3405     }
3406  skip_synth:
3407
3408   /* Expand x*2.0 as x+x.  */
3409   if (CONST_DOUBLE_AS_FLOAT_P (scalar_op1)
3410       && real_equal (CONST_DOUBLE_REAL_VALUE (scalar_op1), &dconst2))
3411     {
3412       op0 = force_reg (GET_MODE (op0), op0);
3413       return expand_binop (mode, add_optab, op0, op0,
3414                            target, unsignedp, OPTAB_LIB_WIDEN);
3415     }
3416
3417   /* This used to use umul_optab if unsigned, but for non-widening multiply
3418      there is no difference between signed and unsigned.  */
3419   op0 = expand_binop (mode, do_trapv ? smulv_optab : smul_optab,
3420                       op0, op1, target, unsignedp, OPTAB_LIB_WIDEN);
3421   gcc_assert (op0);
3422   return op0;
3423 }
3424
3425 /* Return a cost estimate for multiplying a register by the given
3426    COEFFicient in the given MODE and SPEED.  */
3427
3428 int
3429 mult_by_coeff_cost (HOST_WIDE_INT coeff, machine_mode mode, bool speed)
3430 {
3431   int max_cost;
3432   struct algorithm algorithm;
3433   enum mult_variant variant;
3434
3435   rtx fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3436   max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, fake_reg),
3437                            mode, speed);
3438   if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost))
3439     return algorithm.cost.cost;
3440   else
3441     return max_cost;
3442 }
3443
3444 /* Perform a widening multiplication and return an rtx for the result.
3445    MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3446    TARGET is a suggestion for where to store the result (an rtx).
3447    THIS_OPTAB is the optab we should use, it must be either umul_widen_optab
3448    or smul_widen_optab.
3449
3450    We check specially for a constant integer as OP1, comparing the
3451    cost of a widening multiply against the cost of a sequence of shifts
3452    and adds.  */
3453
3454 rtx
3455 expand_widening_mult (machine_mode mode, rtx op0, rtx op1, rtx target,
3456                       int unsignedp, optab this_optab)
3457 {
3458   bool speed = optimize_insn_for_speed_p ();
3459   rtx cop1;
3460
3461   if (CONST_INT_P (op1)
3462       && GET_MODE (op0) != VOIDmode
3463       && (cop1 = convert_modes (mode, GET_MODE (op0), op1,
3464                                 this_optab == umul_widen_optab))
3465       && CONST_INT_P (cop1)
3466       && (INTVAL (cop1) >= 0
3467           || HWI_COMPUTABLE_MODE_P (mode)))
3468     {
3469       HOST_WIDE_INT coeff = INTVAL (cop1);
3470       int max_cost;
3471       enum mult_variant variant;
3472       struct algorithm algorithm;
3473
3474       if (coeff == 0)
3475         return CONST0_RTX (mode);
3476
3477       /* Special case powers of two.  */
3478       if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3479         {
3480           op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3481           return expand_shift (LSHIFT_EXPR, mode, op0,
3482                                floor_log2 (coeff), target, unsignedp);
3483         }
3484
3485       /* Exclude cost of op0 from max_cost to match the cost
3486          calculation of the synth_mult.  */
3487       max_cost = mul_widen_cost (speed, mode);
3488       if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3489                                max_cost))
3490         {
3491           op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3492           return expand_mult_const (mode, op0, coeff, target,
3493                                     &algorithm, variant);
3494         }
3495     }
3496   return expand_binop (mode, this_optab, op0, op1, target,
3497                        unsignedp, OPTAB_LIB_WIDEN);
3498 }
3499 \f
3500 /* Choose a minimal N + 1 bit approximation to 1/D that can be used to
3501    replace division by D, and put the least significant N bits of the result
3502    in *MULTIPLIER_PTR and return the most significant bit.
3503
3504    The width of operations is N (should be <= HOST_BITS_PER_WIDE_INT), the
3505    needed precision is in PRECISION (should be <= N).
3506
3507    PRECISION should be as small as possible so this function can choose
3508    multiplier more freely.
3509
3510    The rounded-up logarithm of D is placed in *lgup_ptr.  A shift count that
3511    is to be used for a final right shift is placed in *POST_SHIFT_PTR.
3512
3513    Using this function, x/D will be equal to (x * m) >> (*POST_SHIFT_PTR),
3514    where m is the full HOST_BITS_PER_WIDE_INT + 1 bit multiplier.  */
3515
3516 unsigned HOST_WIDE_INT
3517 choose_multiplier (unsigned HOST_WIDE_INT d, int n, int precision,
3518                    unsigned HOST_WIDE_INT *multiplier_ptr,
3519                    int *post_shift_ptr, int *lgup_ptr)
3520 {
3521   int lgup, post_shift;
3522   int pow, pow2;
3523
3524   /* lgup = ceil(log2(divisor)); */
3525   lgup = ceil_log2 (d);
3526
3527   gcc_assert (lgup <= n);
3528
3529   pow = n + lgup;
3530   pow2 = n + lgup - precision;
3531
3532   /* mlow = 2^(N + lgup)/d */
3533   wide_int val = wi::set_bit_in_zero (pow, HOST_BITS_PER_DOUBLE_INT);
3534   wide_int mlow = wi::udiv_trunc (val, d);
3535
3536   /* mhigh = (2^(N + lgup) + 2^(N + lgup - precision))/d */
3537   val |= wi::set_bit_in_zero (pow2, HOST_BITS_PER_DOUBLE_INT);
3538   wide_int mhigh = wi::udiv_trunc (val, d);
3539
3540   /* If precision == N, then mlow, mhigh exceed 2^N
3541      (but they do not exceed 2^(N+1)).  */
3542
3543   /* Reduce to lowest terms.  */
3544   for (post_shift = lgup; post_shift > 0; post_shift--)
3545     {
3546       unsigned HOST_WIDE_INT ml_lo = wi::extract_uhwi (mlow, 1,
3547                                                        HOST_BITS_PER_WIDE_INT);
3548       unsigned HOST_WIDE_INT mh_lo = wi::extract_uhwi (mhigh, 1,
3549                                                        HOST_BITS_PER_WIDE_INT);
3550       if (ml_lo >= mh_lo)
3551         break;
3552
3553       mlow = wi::uhwi (ml_lo, HOST_BITS_PER_DOUBLE_INT);
3554       mhigh = wi::uhwi (mh_lo, HOST_BITS_PER_DOUBLE_INT);
3555     }
3556
3557   *post_shift_ptr = post_shift;
3558   *lgup_ptr = lgup;
3559   if (n < HOST_BITS_PER_WIDE_INT)
3560     {
3561       unsigned HOST_WIDE_INT mask = (HOST_WIDE_INT_1U << n) - 1;
3562       *multiplier_ptr = mhigh.to_uhwi () & mask;
3563       return mhigh.to_uhwi () >= mask;
3564     }
3565   else
3566     {
3567       *multiplier_ptr = mhigh.to_uhwi ();
3568       return wi::extract_uhwi (mhigh, HOST_BITS_PER_WIDE_INT, 1);
3569     }
3570 }
3571
3572 /* Compute the inverse of X mod 2**n, i.e., find Y such that X * Y is
3573    congruent to 1 (mod 2**N).  */
3574
3575 static unsigned HOST_WIDE_INT
3576 invert_mod2n (unsigned HOST_WIDE_INT x, int n)
3577 {
3578   /* Solve x*y == 1 (mod 2^n), where x is odd.  Return y.  */
3579
3580   /* The algorithm notes that the choice y = x satisfies
3581      x*y == 1 mod 2^3, since x is assumed odd.
3582      Each iteration doubles the number of bits of significance in y.  */
3583
3584   unsigned HOST_WIDE_INT mask;
3585   unsigned HOST_WIDE_INT y = x;
3586   int nbit = 3;
3587
3588   mask = (n == HOST_BITS_PER_WIDE_INT
3589           ? HOST_WIDE_INT_M1U
3590           : (HOST_WIDE_INT_1U << n) - 1);
3591
3592   while (nbit < n)
3593     {
3594       y = y * (2 - x*y) & mask;         /* Modulo 2^N */
3595       nbit *= 2;
3596     }
3597   return y;
3598 }
3599
3600 /* Emit code to adjust ADJ_OPERAND after multiplication of wrong signedness
3601    flavor of OP0 and OP1.  ADJ_OPERAND is already the high half of the
3602    product OP0 x OP1.  If UNSIGNEDP is nonzero, adjust the signed product
3603    to become unsigned, if UNSIGNEDP is zero, adjust the unsigned product to
3604    become signed.
3605
3606    The result is put in TARGET if that is convenient.
3607
3608    MODE is the mode of operation.  */
3609
3610 rtx
3611 expand_mult_highpart_adjust (machine_mode mode, rtx adj_operand, rtx op0,
3612                              rtx op1, rtx target, int unsignedp)
3613 {
3614   rtx tem;
3615   enum rtx_code adj_code = unsignedp ? PLUS : MINUS;
3616
3617   tem = expand_shift (RSHIFT_EXPR, mode, op0,
3618                       GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3619   tem = expand_and (mode, tem, op1, NULL_RTX);
3620   adj_operand
3621     = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3622                      adj_operand);
3623
3624   tem = expand_shift (RSHIFT_EXPR, mode, op1,
3625                       GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3626   tem = expand_and (mode, tem, op0, NULL_RTX);
3627   target = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3628                           target);
3629
3630   return target;
3631 }
3632
3633 /* Subroutine of expmed_mult_highpart.  Return the MODE high part of OP.  */
3634
3635 static rtx
3636 extract_high_half (machine_mode mode, rtx op)
3637 {
3638   machine_mode wider_mode;
3639
3640   if (mode == word_mode)
3641     return gen_highpart (mode, op);
3642
3643   gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3644
3645   wider_mode = GET_MODE_WIDER_MODE (mode);
3646   op = expand_shift (RSHIFT_EXPR, wider_mode, op,
3647                      GET_MODE_BITSIZE (mode), 0, 1);
3648   return convert_modes (mode, wider_mode, op, 0);
3649 }
3650
3651 /* Like expmed_mult_highpart, but only consider using a multiplication
3652    optab.  OP1 is an rtx for the constant operand.  */
3653
3654 static rtx
3655 expmed_mult_highpart_optab (machine_mode mode, rtx op0, rtx op1,
3656                             rtx target, int unsignedp, int max_cost)
3657 {
3658   rtx narrow_op1 = gen_int_mode (INTVAL (op1), mode);
3659   machine_mode wider_mode;
3660   optab moptab;
3661   rtx tem;
3662   int size;
3663   bool speed = optimize_insn_for_speed_p ();
3664
3665   gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3666
3667   wider_mode = GET_MODE_WIDER_MODE (mode);
3668   size = GET_MODE_BITSIZE (mode);
3669
3670   /* Firstly, try using a multiplication insn that only generates the needed
3671      high part of the product, and in the sign flavor of unsignedp.  */
3672   if (mul_highpart_cost (speed, mode) < max_cost)
3673     {
3674       moptab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
3675       tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3676                           unsignedp, OPTAB_DIRECT);
3677       if (tem)
3678         return tem;
3679     }
3680
3681   /* Secondly, same as above, but use sign flavor opposite of unsignedp.
3682      Need to adjust the result after the multiplication.  */
3683   if (size - 1 < BITS_PER_WORD
3684       && (mul_highpart_cost (speed, mode)
3685           + 2 * shift_cost (speed, mode, size-1)
3686           + 4 * add_cost (speed, mode) < max_cost))
3687     {
3688       moptab = unsignedp ? smul_highpart_optab : umul_highpart_optab;
3689       tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3690                           unsignedp, OPTAB_DIRECT);
3691       if (tem)
3692         /* We used the wrong signedness.  Adjust the result.  */
3693         return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3694                                             tem, unsignedp);
3695     }
3696
3697   /* Try widening multiplication.  */
3698   moptab = unsignedp ? umul_widen_optab : smul_widen_optab;
3699   if (widening_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3700       && mul_widen_cost (speed, wider_mode) < max_cost)
3701     {
3702       tem = expand_binop (wider_mode, moptab, op0, narrow_op1, 0,
3703                           unsignedp, OPTAB_WIDEN);
3704       if (tem)
3705         return extract_high_half (mode, tem);
3706     }
3707
3708   /* Try widening the mode and perform a non-widening multiplication.  */
3709   if (optab_handler (smul_optab, wider_mode) != CODE_FOR_nothing
3710       && size - 1 < BITS_PER_WORD
3711       && (mul_cost (speed, wider_mode) + shift_cost (speed, mode, size-1)
3712           < max_cost))
3713     {
3714       rtx_insn *insns;
3715       rtx wop0, wop1;
3716
3717       /* We need to widen the operands, for example to ensure the
3718          constant multiplier is correctly sign or zero extended.
3719          Use a sequence to clean-up any instructions emitted by
3720          the conversions if things don't work out.  */
3721       start_sequence ();
3722       wop0 = convert_modes (wider_mode, mode, op0, unsignedp);
3723       wop1 = convert_modes (wider_mode, mode, op1, unsignedp);
3724       tem = expand_binop (wider_mode, smul_optab, wop0, wop1, 0,
3725                           unsignedp, OPTAB_WIDEN);
3726       insns = get_insns ();
3727       end_sequence ();
3728
3729       if (tem)
3730         {
3731           emit_insn (insns);
3732           return extract_high_half (mode, tem);
3733         }
3734     }
3735
3736   /* Try widening multiplication of opposite signedness, and adjust.  */
3737   moptab = unsignedp ? smul_widen_optab : umul_widen_optab;
3738   if (widening_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3739       && size - 1 < BITS_PER_WORD
3740       && (mul_widen_cost (speed, wider_mode)
3741           + 2 * shift_cost (speed, mode, size-1)
3742           + 4 * add_cost (speed, mode) < max_cost))
3743     {
3744       tem = expand_binop (wider_mode, moptab, op0, narrow_op1,
3745                           NULL_RTX, ! unsignedp, OPTAB_WIDEN);
3746       if (tem != 0)
3747         {
3748           tem = extract_high_half (mode, tem);
3749           /* We used the wrong signedness.  Adjust the result.  */
3750           return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3751                                               target, unsignedp);
3752         }
3753     }
3754
3755   return 0;
3756 }
3757
3758 /* Emit code to multiply OP0 and OP1 (where OP1 is an integer constant),
3759    putting the high half of the result in TARGET if that is convenient,
3760    and return where the result is.  If the operation can not be performed,
3761    0 is returned.
3762
3763    MODE is the mode of operation and result.
3764
3765    UNSIGNEDP nonzero means unsigned multiply.
3766
3767    MAX_COST is the total allowed cost for the expanded RTL.  */
3768
3769 static rtx
3770 expmed_mult_highpart (machine_mode mode, rtx op0, rtx op1,
3771                       rtx target, int unsignedp, int max_cost)
3772 {
3773   machine_mode wider_mode = GET_MODE_WIDER_MODE (mode);
3774   unsigned HOST_WIDE_INT cnst1;
3775   int extra_cost;
3776   bool sign_adjust = false;
3777   enum mult_variant variant;
3778   struct algorithm alg;
3779   rtx tem;
3780   bool speed = optimize_insn_for_speed_p ();
3781
3782   gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3783   /* We can't support modes wider than HOST_BITS_PER_INT.  */
3784   gcc_assert (HWI_COMPUTABLE_MODE_P (mode));
3785
3786   cnst1 = INTVAL (op1) & GET_MODE_MASK (mode);
3787
3788   /* We can't optimize modes wider than BITS_PER_WORD.
3789      ??? We might be able to perform double-word arithmetic if
3790      mode == word_mode, however all the cost calculations in
3791      synth_mult etc. assume single-word operations.  */
3792   if (GET_MODE_BITSIZE (wider_mode) > BITS_PER_WORD)
3793     return expmed_mult_highpart_optab (mode, op0, op1, target,
3794                                        unsignedp, max_cost);
3795
3796   extra_cost = shift_cost (speed, mode, GET_MODE_BITSIZE (mode) - 1);
3797
3798   /* Check whether we try to multiply by a negative constant.  */
3799   if (!unsignedp && ((cnst1 >> (GET_MODE_BITSIZE (mode) - 1)) & 1))
3800     {
3801       sign_adjust = true;
3802       extra_cost += add_cost (speed, mode);
3803     }
3804
3805   /* See whether shift/add multiplication is cheap enough.  */
3806   if (choose_mult_variant (wider_mode, cnst1, &alg, &variant,
3807                            max_cost - extra_cost))
3808     {
3809       /* See whether the specialized multiplication optabs are
3810          cheaper than the shift/add version.  */
3811       tem = expmed_mult_highpart_optab (mode, op0, op1, target, unsignedp,
3812                                         alg.cost.cost + extra_cost);
3813       if (tem)
3814         return tem;
3815
3816       tem = convert_to_mode (wider_mode, op0, unsignedp);
3817       tem = expand_mult_const (wider_mode, tem, cnst1, 0, &alg, variant);
3818       tem = extract_high_half (mode, tem);
3819
3820       /* Adjust result for signedness.  */
3821       if (sign_adjust)
3822         tem = force_operand (gen_rtx_MINUS (mode, tem, op0), tem);
3823
3824       return tem;
3825     }
3826   return expmed_mult_highpart_optab (mode, op0, op1, target,
3827                                      unsignedp, max_cost);
3828 }
3829
3830
3831 /* Expand signed modulus of OP0 by a power of two D in mode MODE.  */
3832
3833 static rtx
3834 expand_smod_pow2 (machine_mode mode, rtx op0, HOST_WIDE_INT d)
3835 {
3836   rtx result, temp, shift;
3837   rtx_code_label *label;
3838   int logd;
3839   int prec = GET_MODE_PRECISION (mode);
3840
3841   logd = floor_log2 (d);
3842   result = gen_reg_rtx (mode);
3843
3844   /* Avoid conditional branches when they're expensive.  */
3845   if (BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2
3846       && optimize_insn_for_speed_p ())
3847     {
3848       rtx signmask = emit_store_flag (result, LT, op0, const0_rtx,
3849                                       mode, 0, -1);
3850       if (signmask)
3851         {
3852           HOST_WIDE_INT masklow = (HOST_WIDE_INT_1 << logd) - 1;
3853           signmask = force_reg (mode, signmask);
3854           shift = GEN_INT (GET_MODE_BITSIZE (mode) - logd);
3855
3856           /* Use the rtx_cost of a LSHIFTRT instruction to determine
3857              which instruction sequence to use.  If logical right shifts
3858              are expensive the use 2 XORs, 2 SUBs and an AND, otherwise
3859              use a LSHIFTRT, 1 ADD, 1 SUB and an AND.  */
3860
3861           temp = gen_rtx_LSHIFTRT (mode, result, shift);
3862           if (optab_handler (lshr_optab, mode) == CODE_FOR_nothing
3863               || (set_src_cost (temp, mode, optimize_insn_for_speed_p ())
3864                   > COSTS_N_INSNS (2)))
3865             {
3866               temp = expand_binop (mode, xor_optab, op0, signmask,
3867                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3868               temp = expand_binop (mode, sub_optab, temp, signmask,
3869                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3870               temp = expand_binop (mode, and_optab, temp,
3871                                    gen_int_mode (masklow, mode),
3872                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3873               temp = expand_binop (mode, xor_optab, temp, signmask,
3874                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3875               temp = expand_binop (mode, sub_optab, temp, signmask,
3876                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3877             }
3878           else
3879             {
3880               signmask = expand_binop (mode, lshr_optab, signmask, shift,
3881                                        NULL_RTX, 1, OPTAB_LIB_WIDEN);
3882               signmask = force_reg (mode, signmask);
3883
3884               temp = expand_binop (mode, add_optab, op0, signmask,
3885                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3886               temp = expand_binop (mode, and_optab, temp,
3887                                    gen_int_mode (masklow, mode),
3888                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3889               temp = expand_binop (mode, sub_optab, temp, signmask,
3890                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3891             }
3892           return temp;
3893         }
3894     }
3895
3896   /* Mask contains the mode's signbit and the significant bits of the
3897      modulus.  By including the signbit in the operation, many targets
3898      can avoid an explicit compare operation in the following comparison
3899      against zero.  */
3900   wide_int mask = wi::mask (logd, false, prec);
3901   mask = wi::set_bit (mask, prec - 1);
3902
3903   temp = expand_binop (mode, and_optab, op0,
3904                        immed_wide_int_const (mask, mode),
3905                        result, 1, OPTAB_LIB_WIDEN);
3906   if (temp != result)
3907     emit_move_insn (result, temp);
3908
3909   label = gen_label_rtx ();
3910   do_cmp_and_jump (result, const0_rtx, GE, mode, label);
3911
3912   temp = expand_binop (mode, sub_optab, result, const1_rtx, result,
3913                        0, OPTAB_LIB_WIDEN);
3914
3915   mask = wi::mask (logd, true, prec);
3916   temp = expand_binop (mode, ior_optab, temp,
3917                        immed_wide_int_const (mask, mode),
3918                        result, 1, OPTAB_LIB_WIDEN);
3919   temp = expand_binop (mode, add_optab, temp, const1_rtx, result,
3920                        0, OPTAB_LIB_WIDEN);
3921   if (temp != result)
3922     emit_move_insn (result, temp);
3923   emit_label (label);
3924   return result;
3925 }
3926
3927 /* Expand signed division of OP0 by a power of two D in mode MODE.
3928    This routine is only called for positive values of D.  */
3929
3930 static rtx
3931 expand_sdiv_pow2 (machine_mode mode, rtx op0, HOST_WIDE_INT d)
3932 {
3933   rtx temp;
3934   rtx_code_label *label;
3935   int logd;
3936
3937   logd = floor_log2 (d);
3938
3939   if (d == 2
3940       && BRANCH_COST (optimize_insn_for_speed_p (),
3941                       false) >= 1)
3942     {
3943       temp = gen_reg_rtx (mode);
3944       temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, 1);
3945       temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3946                            0, OPTAB_LIB_WIDEN);
3947       return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3948     }
3949
3950   if (HAVE_conditional_move
3951       && BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2)
3952     {
3953       rtx temp2;
3954
3955       start_sequence ();
3956       temp2 = copy_to_mode_reg (mode, op0);
3957       temp = expand_binop (mode, add_optab, temp2, gen_int_mode (d - 1, mode),
3958                            NULL_RTX, 0, OPTAB_LIB_WIDEN);
3959       temp = force_reg (mode, temp);
3960
3961       /* Construct "temp2 = (temp2 < 0) ? temp : temp2".  */
3962       temp2 = emit_conditional_move (temp2, LT, temp2, const0_rtx,
3963                                      mode, temp, temp2, mode, 0);
3964       if (temp2)
3965         {
3966           rtx_insn *seq = get_insns ();
3967           end_sequence ();
3968           emit_insn (seq);
3969           return expand_shift (RSHIFT_EXPR, mode, temp2, logd, NULL_RTX, 0);
3970         }
3971       end_sequence ();
3972     }
3973
3974   if (BRANCH_COST (optimize_insn_for_speed_p (),
3975                    false) >= 2)
3976     {
3977       int ushift = GET_MODE_BITSIZE (mode) - logd;
3978
3979       temp = gen_reg_rtx (mode);
3980       temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, -1);
3981       if (GET_MODE_BITSIZE (mode) >= BITS_PER_WORD
3982           || shift_cost (optimize_insn_for_speed_p (), mode, ushift)
3983              > COSTS_N_INSNS (1))
3984         temp = expand_binop (mode, and_optab, temp, gen_int_mode (d - 1, mode),
3985                              NULL_RTX, 0, OPTAB_LIB_WIDEN);
3986       else
3987         temp = expand_shift (RSHIFT_EXPR, mode, temp,
3988                              ushift, NULL_RTX, 1);
3989       temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3990                            0, OPTAB_LIB_WIDEN);
3991       return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3992     }
3993
3994   label = gen_label_rtx ();
3995   temp = copy_to_mode_reg (mode, op0);
3996   do_cmp_and_jump (temp, const0_rtx, GE, mode, label);
3997   expand_inc (temp, gen_int_mode (d - 1, mode));
3998   emit_label (label);
3999   return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
4000 }
4001 \f
4002 /* Emit the code to divide OP0 by OP1, putting the result in TARGET
4003    if that is convenient, and returning where the result is.
4004    You may request either the quotient or the remainder as the result;
4005    specify REM_FLAG nonzero to get the remainder.
4006
4007    CODE is the expression code for which kind of division this is;
4008    it controls how rounding is done.  MODE is the machine mode to use.
4009    UNSIGNEDP nonzero means do unsigned division.  */
4010
4011 /* ??? For CEIL_MOD_EXPR, can compute incorrect remainder with ANDI
4012    and then correct it by or'ing in missing high bits
4013    if result of ANDI is nonzero.
4014    For ROUND_MOD_EXPR, can use ANDI and then sign-extend the result.
4015    This could optimize to a bfexts instruction.
4016    But C doesn't use these operations, so their optimizations are
4017    left for later.  */
4018 /* ??? For modulo, we don't actually need the highpart of the first product,
4019    the low part will do nicely.  And for small divisors, the second multiply
4020    can also be a low-part only multiply or even be completely left out.
4021    E.g. to calculate the remainder of a division by 3 with a 32 bit
4022    multiply, multiply with 0x55555556 and extract the upper two bits;
4023    the result is exact for inputs up to 0x1fffffff.
4024    The input range can be reduced by using cross-sum rules.
4025    For odd divisors >= 3, the following table gives right shift counts
4026    so that if a number is shifted by an integer multiple of the given
4027    amount, the remainder stays the same:
4028    2, 4, 3, 6, 10, 12, 4, 8, 18, 6, 11, 20, 18, 0, 5, 10, 12, 0, 12, 20,
4029    14, 12, 23, 21, 8, 0, 20, 18, 0, 0, 6, 12, 0, 22, 0, 18, 20, 30, 0, 0,
4030    0, 8, 0, 11, 12, 10, 36, 0, 30, 0, 0, 12, 0, 0, 0, 0, 44, 12, 24, 0,
4031    20, 0, 7, 14, 0, 18, 36, 0, 0, 46, 60, 0, 42, 0, 15, 24, 20, 0, 0, 33,
4032    0, 20, 0, 0, 18, 0, 60, 0, 0, 0, 0, 0, 40, 18, 0, 0, 12
4033
4034    Cross-sum rules for even numbers can be derived by leaving as many bits
4035    to the right alone as the divisor has zeros to the right.
4036    E.g. if x is an unsigned 32 bit number:
4037    (x mod 12) == (((x & 1023) + ((x >> 8) & ~3)) * 0x15555558 >> 2 * 3) >> 28
4038    */
4039
4040 rtx
4041 expand_divmod (int rem_flag, enum tree_code code, machine_mode mode,
4042                rtx op0, rtx op1, rtx target, int unsignedp)
4043 {
4044   machine_mode compute_mode;
4045   rtx tquotient;
4046   rtx quotient = 0, remainder = 0;
4047   rtx_insn *last;
4048   int size;
4049   rtx_insn *insn;
4050   optab optab1, optab2;
4051   int op1_is_constant, op1_is_pow2 = 0;
4052   int max_cost, extra_cost;
4053   static HOST_WIDE_INT last_div_const = 0;
4054   bool speed = optimize_insn_for_speed_p ();
4055
4056   op1_is_constant = CONST_INT_P (op1);
4057   if (op1_is_constant)
4058     {
4059       wide_int ext_op1 = rtx_mode_t (op1, mode);
4060       op1_is_pow2 = (wi::popcount (ext_op1) == 1
4061                      || (! unsignedp
4062                          && wi::popcount (wi::neg (ext_op1)) == 1));
4063     }
4064
4065   /*
4066      This is the structure of expand_divmod:
4067
4068      First comes code to fix up the operands so we can perform the operations
4069      correctly and efficiently.
4070
4071      Second comes a switch statement with code specific for each rounding mode.
4072      For some special operands this code emits all RTL for the desired
4073      operation, for other cases, it generates only a quotient and stores it in
4074      QUOTIENT.  The case for trunc division/remainder might leave quotient = 0,
4075      to indicate that it has not done anything.
4076
4077      Last comes code that finishes the operation.  If QUOTIENT is set and
4078      REM_FLAG is set, the remainder is computed as OP0 - QUOTIENT * OP1.  If
4079      QUOTIENT is not set, it is computed using trunc rounding.
4080
4081      We try to generate special code for division and remainder when OP1 is a
4082      constant.  If |OP1| = 2**n we can use shifts and some other fast
4083      operations.  For other values of OP1, we compute a carefully selected
4084      fixed-point approximation m = 1/OP1, and generate code that multiplies OP0
4085      by m.
4086
4087      In all cases but EXACT_DIV_EXPR, this multiplication requires the upper
4088      half of the product.  Different strategies for generating the product are
4089      implemented in expmed_mult_highpart.
4090
4091      If what we actually want is the remainder, we generate that by another
4092      by-constant multiplication and a subtraction.  */
4093
4094   /* We shouldn't be called with OP1 == const1_rtx, but some of the
4095      code below will malfunction if we are, so check here and handle
4096      the special case if so.  */
4097   if (op1 == const1_rtx)
4098     return rem_flag ? const0_rtx : op0;
4099
4100     /* When dividing by -1, we could get an overflow.
4101      negv_optab can handle overflows.  */
4102   if (! unsignedp && op1 == constm1_rtx)
4103     {
4104       if (rem_flag)
4105         return const0_rtx;
4106       return expand_unop (mode, flag_trapv && GET_MODE_CLASS (mode) == MODE_INT
4107                           ? negv_optab : neg_optab, op0, target, 0);
4108     }
4109
4110   if (target
4111       /* Don't use the function value register as a target
4112          since we have to read it as well as write it,
4113          and function-inlining gets confused by this.  */
4114       && ((REG_P (target) && REG_FUNCTION_VALUE_P (target))
4115           /* Don't clobber an operand while doing a multi-step calculation.  */
4116           || ((rem_flag || op1_is_constant)
4117               && (reg_mentioned_p (target, op0)
4118                   || (MEM_P (op0) && MEM_P (target))))
4119           || reg_mentioned_p (target, op1)
4120           || (MEM_P (op1) && MEM_P (target))))
4121     target = 0;
4122
4123   /* Get the mode in which to perform this computation.  Normally it will
4124      be MODE, but sometimes we can't do the desired operation in MODE.
4125      If so, pick a wider mode in which we can do the operation.  Convert
4126      to that mode at the start to avoid repeated conversions.
4127
4128      First see what operations we need.  These depend on the expression
4129      we are evaluating.  (We assume that divxx3 insns exist under the
4130      same conditions that modxx3 insns and that these insns don't normally
4131      fail.  If these assumptions are not correct, we may generate less
4132      efficient code in some cases.)
4133
4134      Then see if we find a mode in which we can open-code that operation
4135      (either a division, modulus, or shift).  Finally, check for the smallest
4136      mode for which we can do the operation with a library call.  */
4137
4138   /* We might want to refine this now that we have division-by-constant
4139      optimization.  Since expmed_mult_highpart tries so many variants, it is
4140      not straightforward to generalize this.  Maybe we should make an array
4141      of possible modes in init_expmed?  Save this for GCC 2.7.  */
4142
4143   optab1 = (op1_is_pow2
4144             ? (unsignedp ? lshr_optab : ashr_optab)
4145             : (unsignedp ? udiv_optab : sdiv_optab));
4146   optab2 = (op1_is_pow2 ? optab1
4147             : (unsignedp ? udivmod_optab : sdivmod_optab));
4148
4149   for (compute_mode = mode; compute_mode != VOIDmode;
4150        compute_mode = GET_MODE_WIDER_MODE (compute_mode))
4151     if (optab_handler (optab1, compute_mode) != CODE_FOR_nothing
4152         || optab_handler (optab2, compute_mode) != CODE_FOR_nothing)
4153       break;
4154
4155   if (compute_mode == VOIDmode)
4156     for (compute_mode = mode; compute_mode != VOIDmode;
4157          compute_mode = GET_MODE_WIDER_MODE (compute_mode))
4158       if (optab_libfunc (optab1, compute_mode)
4159           || optab_libfunc (optab2, compute_mode))
4160         break;
4161
4162   /* If we still couldn't find a mode, use MODE, but expand_binop will
4163      probably die.  */
4164   if (compute_mode == VOIDmode)
4165     compute_mode = mode;
4166
4167   if (target && GET_MODE (target) == compute_mode)
4168     tquotient = target;
4169   else
4170     tquotient = gen_reg_rtx (compute_mode);
4171
4172   size = GET_MODE_BITSIZE (compute_mode);
4173 #if 0
4174   /* It should be possible to restrict the precision to GET_MODE_BITSIZE
4175      (mode), and thereby get better code when OP1 is a constant.  Do that
4176      later.  It will require going over all usages of SIZE below.  */
4177   size = GET_MODE_BITSIZE (mode);
4178 #endif
4179
4180   /* Only deduct something for a REM if the last divide done was
4181      for a different constant.   Then set the constant of the last
4182      divide.  */
4183   max_cost = (unsignedp
4184               ? udiv_cost (speed, compute_mode)
4185               : sdiv_cost (speed, compute_mode));
4186   if (rem_flag && ! (last_div_const != 0 && op1_is_constant
4187                      && INTVAL (op1) == last_div_const))
4188     max_cost -= (mul_cost (speed, compute_mode)
4189                  + add_cost (speed, compute_mode));
4190
4191   last_div_const = ! rem_flag && op1_is_constant ? INTVAL (op1) : 0;
4192
4193   /* Now convert to the best mode to use.  */
4194   if (compute_mode != mode)
4195     {
4196       op0 = convert_modes (compute_mode, mode, op0, unsignedp);
4197       op1 = convert_modes (compute_mode, mode, op1, unsignedp);
4198
4199       /* convert_modes may have placed op1 into a register, so we
4200          must recompute the following.  */
4201       op1_is_constant = CONST_INT_P (op1);
4202       if (op1_is_constant)
4203         {
4204           wide_int ext_op1 = rtx_mode_t (op1, compute_mode);
4205           op1_is_pow2 = (wi::popcount (ext_op1) == 1
4206                          || (! unsignedp
4207                              && wi::popcount (wi::neg (ext_op1)) == 1));
4208         }
4209       else
4210         op1_is_pow2 = 0;
4211     }
4212
4213   /* If one of the operands is a volatile MEM, copy it into a register.  */
4214
4215   if (MEM_P (op0) && MEM_VOLATILE_P (op0))
4216     op0 = force_reg (compute_mode, op0);
4217   if (MEM_P (op1) && MEM_VOLATILE_P (op1))
4218     op1 = force_reg (compute_mode, op1);
4219
4220   /* If we need the remainder or if OP1 is constant, we need to
4221      put OP0 in a register in case it has any queued subexpressions.  */
4222   if (rem_flag || op1_is_constant)
4223     op0 = force_reg (compute_mode, op0);
4224
4225   last = get_last_insn ();
4226
4227   /* Promote floor rounding to trunc rounding for unsigned operations.  */
4228   if (unsignedp)
4229     {
4230       if (code == FLOOR_DIV_EXPR)
4231         code = TRUNC_DIV_EXPR;
4232       if (code == FLOOR_MOD_EXPR)
4233         code = TRUNC_MOD_EXPR;
4234       if (code == EXACT_DIV_EXPR && op1_is_pow2)
4235         code = TRUNC_DIV_EXPR;
4236     }
4237
4238   if (op1 != const0_rtx)
4239     switch (code)
4240       {
4241       case TRUNC_MOD_EXPR:
4242       case TRUNC_DIV_EXPR:
4243         if (op1_is_constant)
4244           {
4245             if (unsignedp)
4246               {
4247                 unsigned HOST_WIDE_INT mh, ml;
4248                 int pre_shift, post_shift;
4249                 int dummy;
4250                 wide_int wd = rtx_mode_t (op1, compute_mode);
4251                 unsigned HOST_WIDE_INT d = wd.to_uhwi ();
4252
4253                 if (wi::popcount (wd) == 1)
4254                   {
4255                     pre_shift = floor_log2 (d);
4256                     if (rem_flag)
4257                       {
4258                         unsigned HOST_WIDE_INT mask
4259                           = (HOST_WIDE_INT_1U << pre_shift) - 1;
4260                         remainder
4261                           = expand_binop (compute_mode, and_optab, op0,
4262                                           gen_int_mode (mask, compute_mode),
4263                                           remainder, 1,
4264                                           OPTAB_LIB_WIDEN);
4265                         if (remainder)
4266                           return gen_lowpart (mode, remainder);
4267                       }
4268                     quotient = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4269                                              pre_shift, tquotient, 1);
4270                   }
4271                 else if (size <= HOST_BITS_PER_WIDE_INT)
4272                   {
4273                     if (d >= (HOST_WIDE_INT_1U << (size - 1)))
4274                       {
4275                         /* Most significant bit of divisor is set; emit an scc
4276                            insn.  */
4277                         quotient = emit_store_flag_force (tquotient, GEU, op0, op1,
4278                                                           compute_mode, 1, 1);
4279                       }
4280                     else
4281                       {
4282                         /* Find a suitable multiplier and right shift count
4283                            instead of multiplying with D.  */
4284
4285                         mh = choose_multiplier (d, size, size,
4286                                                 &ml, &post_shift, &dummy);
4287
4288                         /* If the suggested multiplier is more than SIZE bits,
4289                            we can do better for even divisors, using an
4290                            initial right shift.  */
4291                         if (mh != 0 && (d & 1) == 0)
4292                           {
4293                             pre_shift = ctz_or_zero (d);
4294                             mh = choose_multiplier (d >> pre_shift, size,
4295                                                     size - pre_shift,
4296                                                     &ml, &post_shift, &dummy);
4297                             gcc_assert (!mh);
4298                           }
4299                         else
4300                           pre_shift = 0;
4301
4302                         if (mh != 0)
4303                           {
4304                             rtx t1, t2, t3, t4;
4305
4306                             if (post_shift - 1 >= BITS_PER_WORD)
4307                               goto fail1;
4308
4309                             extra_cost
4310                               = (shift_cost (speed, compute_mode, post_shift - 1)
4311                                  + shift_cost (speed, compute_mode, 1)
4312                                  + 2 * add_cost (speed, compute_mode));
4313                             t1 = expmed_mult_highpart
4314                               (compute_mode, op0,
4315                                gen_int_mode (ml, compute_mode),
4316                                NULL_RTX, 1, max_cost - extra_cost);
4317                             if (t1 == 0)
4318                               goto fail1;
4319                             t2 = force_operand (gen_rtx_MINUS (compute_mode,
4320                                                                op0, t1),
4321                                                 NULL_RTX);
4322                             t3 = expand_shift (RSHIFT_EXPR, compute_mode,
4323                                                t2, 1, NULL_RTX, 1);
4324                             t4 = force_operand (gen_rtx_PLUS (compute_mode,
4325                                                               t1, t3),
4326                                                 NULL_RTX);
4327                             quotient = expand_shift
4328                               (RSHIFT_EXPR, compute_mode, t4,
4329                                post_shift - 1, tquotient, 1);
4330                           }
4331                         else
4332                           {
4333                             rtx t1, t2;
4334
4335                             if (pre_shift >= BITS_PER_WORD
4336                                 || post_shift >= BITS_PER_WORD)
4337                               goto fail1;
4338
4339                             t1 = expand_shift
4340                               (RSHIFT_EXPR, compute_mode, op0,
4341                                pre_shift, NULL_RTX, 1);
4342                             extra_cost
4343                               = (shift_cost (speed, compute_mode, pre_shift)
4344                                  + shift_cost (speed, compute_mode, post_shift));
4345                             t2 = expmed_mult_highpart
4346                               (compute_mode, t1,
4347                                gen_int_mode (ml, compute_mode),
4348                                NULL_RTX, 1, max_cost - extra_cost);
4349                             if (t2 == 0)
4350                               goto fail1;
4351                             quotient = expand_shift
4352                               (RSHIFT_EXPR, compute_mode, t2,
4353                                post_shift, tquotient, 1);
4354                           }
4355                       }
4356                   }
4357                 else            /* Too wide mode to use tricky code */
4358                   break;
4359
4360                 insn = get_last_insn ();
4361                 if (insn != last)
4362                   set_dst_reg_note (insn, REG_EQUAL,
4363                                     gen_rtx_UDIV (compute_mode, op0, op1),
4364                                     quotient);
4365               }
4366             else                /* TRUNC_DIV, signed */
4367               {
4368                 unsigned HOST_WIDE_INT ml;
4369                 int lgup, post_shift;
4370                 rtx mlr;
4371                 HOST_WIDE_INT d = INTVAL (op1);
4372                 unsigned HOST_WIDE_INT abs_d;
4373
4374                 /* Since d might be INT_MIN, we have to cast to
4375                    unsigned HOST_WIDE_INT before negating to avoid
4376                    undefined signed overflow.  */
4377                 abs_d = (d >= 0
4378                          ? (unsigned HOST_WIDE_INT) d
4379                          : - (unsigned HOST_WIDE_INT) d);
4380
4381                 /* n rem d = n rem -d */
4382                 if (rem_flag && d < 0)
4383                   {
4384                     d = abs_d;
4385                     op1 = gen_int_mode (abs_d, compute_mode);
4386                   }
4387
4388                 if (d == 1)
4389                   quotient = op0;
4390                 else if (d == -1)
4391                   quotient = expand_unop (compute_mode, neg_optab, op0,
4392                                           tquotient, 0);
4393                 else if (size <= HOST_BITS_PER_WIDE_INT
4394                          && abs_d == HOST_WIDE_INT_1U << (size - 1))
4395                   {
4396                     /* This case is not handled correctly below.  */
4397                     quotient = emit_store_flag (tquotient, EQ, op0, op1,
4398                                                 compute_mode, 1, 1);
4399                     if (quotient == 0)
4400                       goto fail1;
4401                   }
4402                 else if (EXACT_POWER_OF_2_OR_ZERO_P (d)
4403                          && (size <= HOST_BITS_PER_WIDE_INT || d >= 0)
4404                          && (rem_flag
4405                              ? smod_pow2_cheap (speed, compute_mode)
4406                              : sdiv_pow2_cheap (speed, compute_mode))
4407                          /* We assume that cheap metric is true if the
4408                             optab has an expander for this mode.  */
4409                          && ((optab_handler ((rem_flag ? smod_optab
4410                                               : sdiv_optab),
4411                                              compute_mode)
4412                               != CODE_FOR_nothing)
4413                              || (optab_handler (sdivmod_optab,
4414                                                 compute_mode)
4415                                  != CODE_FOR_nothing)))
4416                   ;
4417                 else if (EXACT_POWER_OF_2_OR_ZERO_P (abs_d)
4418                          && (size <= HOST_BITS_PER_WIDE_INT
4419                              || abs_d != (unsigned HOST_WIDE_INT) d))
4420                   {
4421                     if (rem_flag)
4422                       {
4423                         remainder = expand_smod_pow2 (compute_mode, op0, d);
4424                         if (remainder)
4425                           return gen_lowpart (mode, remainder);
4426                       }
4427
4428                     if (sdiv_pow2_cheap (speed, compute_mode)
4429                         && ((optab_handler (sdiv_optab, compute_mode)
4430                              != CODE_FOR_nothing)
4431                             || (optab_handler (sdivmod_optab, compute_mode)
4432                                 != CODE_FOR_nothing)))
4433                       quotient = expand_divmod (0, TRUNC_DIV_EXPR,
4434                                                 compute_mode, op0,
4435                                                 gen_int_mode (abs_d,
4436                                                               compute_mode),
4437                                                 NULL_RTX, 0);
4438                     else
4439                       quotient = expand_sdiv_pow2 (compute_mode, op0, abs_d);
4440
4441                     /* We have computed OP0 / abs(OP1).  If OP1 is negative,
4442                        negate the quotient.  */
4443                     if (d < 0)
4444                       {
4445                         insn = get_last_insn ();
4446                         if (insn != last
4447                             && abs_d < (HOST_WIDE_INT_1U
4448                                         << (HOST_BITS_PER_WIDE_INT - 1)))
4449                           set_dst_reg_note (insn, REG_EQUAL,
4450                                             gen_rtx_DIV (compute_mode, op0,
4451                                                          gen_int_mode
4452                                                            (abs_d,
4453                                                             compute_mode)),
4454                                             quotient);
4455
4456                         quotient = expand_unop (compute_mode, neg_optab,
4457                                                 quotient, quotient, 0);
4458                       }
4459                   }
4460                 else if (size <= HOST_BITS_PER_WIDE_INT)
4461                   {
4462                     choose_multiplier (abs_d, size, size - 1,
4463                                        &ml, &post_shift, &lgup);
4464                     if (ml < HOST_WIDE_INT_1U << (size - 1))
4465                       {
4466                         rtx t1, t2, t3;
4467
4468                         if (post_shift >= BITS_PER_WORD
4469                             || size - 1 >= BITS_PER_WORD)
4470                           goto fail1;
4471
4472                         extra_cost = (shift_cost (speed, compute_mode, post_shift)
4473                                       + shift_cost (speed, compute_mode, size - 1)
4474                                       + add_cost (speed, compute_mode));
4475                         t1 = expmed_mult_highpart
4476                           (compute_mode, op0, gen_int_mode (ml, compute_mode),
4477                            NULL_RTX, 0, max_cost - extra_cost);
4478                         if (t1 == 0)
4479                           goto fail1;
4480                         t2 = expand_shift
4481                           (RSHIFT_EXPR, compute_mode, t1,
4482                            post_shift, NULL_RTX, 0);
4483                         t3 = expand_shift
4484                           (RSHIFT_EXPR, compute_mode, op0,
4485                            size - 1, NULL_RTX, 0);
4486                         if (d < 0)
4487                           quotient
4488                             = force_operand (gen_rtx_MINUS (compute_mode,
4489                                                             t3, t2),
4490                                              tquotient);
4491                         else
4492                           quotient
4493                             = force_operand (gen_rtx_MINUS (compute_mode,
4494                                                             t2, t3),
4495                                              tquotient);
4496                       }
4497                     else
4498                       {
4499                         rtx t1, t2, t3, t4;
4500
4501                         if (post_shift >= BITS_PER_WORD
4502                             || size - 1 >= BITS_PER_WORD)
4503                           goto fail1;
4504
4505                         ml |= HOST_WIDE_INT_M1U << (size - 1);
4506                         mlr = gen_int_mode (ml, compute_mode);
4507                         extra_cost = (shift_cost (speed, compute_mode, post_shift)
4508                                       + shift_cost (speed, compute_mode, size - 1)
4509                                       + 2 * add_cost (speed, compute_mode));
4510                         t1 = expmed_mult_highpart (compute_mode, op0, mlr,
4511                                                    NULL_RTX, 0,
4512                                                    max_cost - extra_cost);
4513                         if (t1 == 0)
4514                           goto fail1;
4515                         t2 = force_operand (gen_rtx_PLUS (compute_mode,
4516                                                           t1, op0),
4517                                             NULL_RTX);
4518                         t3 = expand_shift
4519                           (RSHIFT_EXPR, compute_mode, t2,
4520                            post_shift, NULL_RTX, 0);
4521                         t4 = expand_shift
4522                           (RSHIFT_EXPR, compute_mode, op0,
4523                            size - 1, NULL_RTX, 0);
4524                         if (d < 0)
4525                           quotient
4526                             = force_operand (gen_rtx_MINUS (compute_mode,
4527                                                             t4, t3),
4528                                              tquotient);
4529                         else
4530                           quotient
4531                             = force_operand (gen_rtx_MINUS (compute_mode,
4532                                                             t3, t4),
4533                                              tquotient);
4534                       }
4535                   }
4536                 else            /* Too wide mode to use tricky code */
4537                   break;
4538
4539                 insn = get_last_insn ();
4540                 if (insn != last)
4541                   set_dst_reg_note (insn, REG_EQUAL,
4542                                     gen_rtx_DIV (compute_mode, op0, op1),
4543                                     quotient);
4544               }
4545             break;
4546           }
4547       fail1:
4548         delete_insns_since (last);
4549         break;
4550
4551       case FLOOR_DIV_EXPR:
4552       case FLOOR_MOD_EXPR:
4553       /* We will come here only for signed operations.  */
4554         if (op1_is_constant && size <= HOST_BITS_PER_WIDE_INT)
4555           {
4556             unsigned HOST_WIDE_INT mh, ml;
4557             int pre_shift, lgup, post_shift;
4558             HOST_WIDE_INT d = INTVAL (op1);
4559
4560             if (d > 0)
4561               {
4562                 /* We could just as easily deal with negative constants here,
4563                    but it does not seem worth the trouble for GCC 2.6.  */
4564                 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4565                   {
4566                     pre_shift = floor_log2 (d);
4567                     if (rem_flag)
4568                       {
4569                         unsigned HOST_WIDE_INT mask
4570                           = (HOST_WIDE_INT_1U << pre_shift) - 1;
4571                         remainder = expand_binop
4572                           (compute_mode, and_optab, op0,
4573                            gen_int_mode (mask, compute_mode),
4574                            remainder, 0, OPTAB_LIB_WIDEN);
4575                         if (remainder)
4576                           return gen_lowpart (mode, remainder);
4577                       }
4578                     quotient = expand_shift
4579                       (RSHIFT_EXPR, compute_mode, op0,
4580                        pre_shift, tquotient, 0);
4581                   }
4582                 else
4583                   {
4584                     rtx t1, t2, t3, t4;
4585
4586                     mh = choose_multiplier (d, size, size - 1,
4587                                             &ml, &post_shift, &lgup);
4588                     gcc_assert (!mh);
4589
4590                     if (post_shift < BITS_PER_WORD
4591                         && size - 1 < BITS_PER_WORD)
4592                       {
4593                         t1 = expand_shift
4594                           (RSHIFT_EXPR, compute_mode, op0,
4595                            size - 1, NULL_RTX, 0);
4596                         t2 = expand_binop (compute_mode, xor_optab, op0, t1,
4597                                            NULL_RTX, 0, OPTAB_WIDEN);
4598                         extra_cost = (shift_cost (speed, compute_mode, post_shift)
4599                                       + shift_cost (speed, compute_mode, size - 1)
4600                                       + 2 * add_cost (speed, compute_mode));
4601                         t3 = expmed_mult_highpart
4602                           (compute_mode, t2, gen_int_mode (ml, compute_mode),
4603                            NULL_RTX, 1, max_cost - extra_cost);
4604                         if (t3 != 0)
4605                           {
4606                             t4 = expand_shift
4607                               (RSHIFT_EXPR, compute_mode, t3,
4608                                post_shift, NULL_RTX, 1);
4609                             quotient = expand_binop (compute_mode, xor_optab,
4610                                                      t4, t1, tquotient, 0,
4611                                                      OPTAB_WIDEN);
4612                           }
4613                       }
4614                   }
4615               }
4616             else
4617               {
4618                 rtx nsign, t1, t2, t3, t4;
4619                 t1 = force_operand (gen_rtx_PLUS (compute_mode,
4620                                                   op0, constm1_rtx), NULL_RTX);
4621                 t2 = expand_binop (compute_mode, ior_optab, op0, t1, NULL_RTX,
4622                                    0, OPTAB_WIDEN);
4623                 nsign = expand_shift (RSHIFT_EXPR, compute_mode, t2,
4624                                       size - 1, NULL_RTX, 0);
4625                 t3 = force_operand (gen_rtx_MINUS (compute_mode, t1, nsign),
4626                                     NULL_RTX);
4627                 t4 = expand_divmod (0, TRUNC_DIV_EXPR, compute_mode, t3, op1,
4628                                     NULL_RTX, 0);
4629                 if (t4)
4630                   {
4631                     rtx t5;
4632                     t5 = expand_unop (compute_mode, one_cmpl_optab, nsign,
4633                                       NULL_RTX, 0);
4634                     quotient = force_operand (gen_rtx_PLUS (compute_mode,
4635                                                             t4, t5),
4636                                               tquotient);
4637                   }
4638               }
4639           }
4640
4641         if (quotient != 0)
4642           break;
4643         delete_insns_since (last);
4644
4645         /* Try using an instruction that produces both the quotient and
4646            remainder, using truncation.  We can easily compensate the quotient
4647            or remainder to get floor rounding, once we have the remainder.
4648            Notice that we compute also the final remainder value here,
4649            and return the result right away.  */
4650         if (target == 0 || GET_MODE (target) != compute_mode)
4651           target = gen_reg_rtx (compute_mode);
4652
4653         if (rem_flag)
4654           {
4655             remainder
4656               = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4657             quotient = gen_reg_rtx (compute_mode);
4658           }
4659         else
4660           {
4661             quotient
4662               = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4663             remainder = gen_reg_rtx (compute_mode);
4664           }
4665
4666         if (expand_twoval_binop (sdivmod_optab, op0, op1,
4667                                  quotient, remainder, 0))
4668           {
4669             /* This could be computed with a branch-less sequence.
4670                Save that for later.  */
4671             rtx tem;
4672             rtx_code_label *label = gen_label_rtx ();
4673             do_cmp_and_jump (remainder, const0_rtx, EQ, compute_mode, label);
4674             tem = expand_binop (compute_mode, xor_optab, op0, op1,
4675                                 NULL_RTX, 0, OPTAB_WIDEN);
4676             do_cmp_and_jump (tem, const0_rtx, GE, compute_mode, label);
4677             expand_dec (quotient, const1_rtx);
4678             expand_inc (remainder, op1);
4679             emit_label (label);
4680             return gen_lowpart (mode, rem_flag ? remainder : quotient);
4681           }
4682
4683         /* No luck with division elimination or divmod.  Have to do it
4684            by conditionally adjusting op0 *and* the result.  */
4685         {
4686           rtx_code_label *label1, *label2, *label3, *label4, *label5;
4687           rtx adjusted_op0;
4688           rtx tem;
4689
4690           quotient = gen_reg_rtx (compute_mode);
4691           adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4692           label1 = gen_label_rtx ();
4693           label2 = gen_label_rtx ();
4694           label3 = gen_label_rtx ();
4695           label4 = gen_label_rtx ();
4696           label5 = gen_label_rtx ();
4697           do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4698           do_cmp_and_jump (adjusted_op0, const0_rtx, LT, compute_mode, label1);
4699           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4700                               quotient, 0, OPTAB_LIB_WIDEN);
4701           if (tem != quotient)
4702             emit_move_insn (quotient, tem);
4703           emit_jump_insn (targetm.gen_jump (label5));
4704           emit_barrier ();
4705           emit_label (label1);
4706           expand_inc (adjusted_op0, const1_rtx);
4707           emit_jump_insn (targetm.gen_jump (label4));
4708           emit_barrier ();
4709           emit_label (label2);
4710           do_cmp_and_jump (adjusted_op0, const0_rtx, GT, compute_mode, label3);
4711           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4712                               quotient, 0, OPTAB_LIB_WIDEN);
4713           if (tem != quotient)
4714             emit_move_insn (quotient, tem);
4715           emit_jump_insn (targetm.gen_jump (label5));
4716           emit_barrier ();
4717           emit_label (label3);
4718           expand_dec (adjusted_op0, const1_rtx);
4719           emit_label (label4);
4720           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4721                               quotient, 0, OPTAB_LIB_WIDEN);
4722           if (tem != quotient)
4723             emit_move_insn (quotient, tem);
4724           expand_dec (quotient, const1_rtx);
4725           emit_label (label5);
4726         }
4727         break;
4728
4729       case CEIL_DIV_EXPR:
4730       case CEIL_MOD_EXPR:
4731         if (unsignedp)
4732           {
4733             if (op1_is_constant
4734                 && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4735                 && (size <= HOST_BITS_PER_WIDE_INT
4736                     || INTVAL (op1) >= 0))
4737               {
4738                 rtx t1, t2, t3;
4739                 unsigned HOST_WIDE_INT d = INTVAL (op1);
4740                 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4741                                    floor_log2 (d), tquotient, 1);
4742                 t2 = expand_binop (compute_mode, and_optab, op0,
4743                                    gen_int_mode (d - 1, compute_mode),
4744                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4745                 t3 = gen_reg_rtx (compute_mode);
4746                 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4747                                       compute_mode, 1, 1);
4748                 if (t3 == 0)
4749                   {
4750                     rtx_code_label *lab;
4751                     lab = gen_label_rtx ();
4752                     do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4753                     expand_inc (t1, const1_rtx);
4754                     emit_label (lab);
4755                     quotient = t1;
4756                   }
4757                 else
4758                   quotient = force_operand (gen_rtx_PLUS (compute_mode,
4759                                                           t1, t3),
4760                                             tquotient);
4761                 break;
4762               }
4763
4764             /* Try using an instruction that produces both the quotient and
4765                remainder, using truncation.  We can easily compensate the
4766                quotient or remainder to get ceiling rounding, once we have the
4767                remainder.  Notice that we compute also the final remainder
4768                value here, and return the result right away.  */
4769             if (target == 0 || GET_MODE (target) != compute_mode)
4770               target = gen_reg_rtx (compute_mode);
4771
4772             if (rem_flag)
4773               {
4774                 remainder = (REG_P (target)
4775                              ? target : gen_reg_rtx (compute_mode));
4776                 quotient = gen_reg_rtx (compute_mode);
4777               }
4778             else
4779               {
4780                 quotient = (REG_P (target)
4781                             ? target : gen_reg_rtx (compute_mode));
4782                 remainder = gen_reg_rtx (compute_mode);
4783               }
4784
4785             if (expand_twoval_binop (udivmod_optab, op0, op1, quotient,
4786                                      remainder, 1))
4787               {
4788                 /* This could be computed with a branch-less sequence.
4789                    Save that for later.  */
4790                 rtx_code_label *label = gen_label_rtx ();
4791                 do_cmp_and_jump (remainder, const0_rtx, EQ,
4792                                  compute_mode, label);
4793                 expand_inc (quotient, const1_rtx);
4794                 expand_dec (remainder, op1);
4795                 emit_label (label);
4796                 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4797               }
4798
4799             /* No luck with division elimination or divmod.  Have to do it
4800                by conditionally adjusting op0 *and* the result.  */
4801             {
4802               rtx_code_label *label1, *label2;
4803               rtx adjusted_op0, tem;
4804
4805               quotient = gen_reg_rtx (compute_mode);
4806               adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4807               label1 = gen_label_rtx ();
4808               label2 = gen_label_rtx ();
4809               do_cmp_and_jump (adjusted_op0, const0_rtx, NE,
4810                                compute_mode, label1);
4811               emit_move_insn  (quotient, const0_rtx);
4812               emit_jump_insn (targetm.gen_jump (label2));
4813               emit_barrier ();
4814               emit_label (label1);
4815               expand_dec (adjusted_op0, const1_rtx);
4816               tem = expand_binop (compute_mode, udiv_optab, adjusted_op0, op1,
4817                                   quotient, 1, OPTAB_LIB_WIDEN);
4818               if (tem != quotient)
4819                 emit_move_insn (quotient, tem);
4820               expand_inc (quotient, const1_rtx);
4821               emit_label (label2);
4822             }
4823           }
4824         else /* signed */
4825           {
4826             if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4827                 && INTVAL (op1) >= 0)
4828               {
4829                 /* This is extremely similar to the code for the unsigned case
4830                    above.  For 2.7 we should merge these variants, but for
4831                    2.6.1 I don't want to touch the code for unsigned since that
4832                    get used in C.  The signed case will only be used by other
4833                    languages (Ada).  */
4834
4835                 rtx t1, t2, t3;
4836                 unsigned HOST_WIDE_INT d = INTVAL (op1);
4837                 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4838                                    floor_log2 (d), tquotient, 0);
4839                 t2 = expand_binop (compute_mode, and_optab, op0,
4840                                    gen_int_mode (d - 1, compute_mode),
4841                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4842                 t3 = gen_reg_rtx (compute_mode);
4843                 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4844                                       compute_mode, 1, 1);
4845                 if (t3 == 0)
4846                   {
4847                     rtx_code_label *lab;
4848                     lab = gen_label_rtx ();
4849                     do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4850                     expand_inc (t1, const1_rtx);
4851                     emit_label (lab);
4852                     quotient = t1;
4853                   }
4854                 else
4855                   quotient = force_operand (gen_rtx_PLUS (compute_mode,
4856                                                           t1, t3),
4857                                             tquotient);
4858                 break;
4859               }
4860
4861             /* Try using an instruction that produces both the quotient and
4862                remainder, using truncation.  We can easily compensate the
4863                quotient or remainder to get ceiling rounding, once we have the
4864                remainder.  Notice that we compute also the final remainder
4865                value here, and return the result right away.  */
4866             if (target == 0 || GET_MODE (target) != compute_mode)
4867               target = gen_reg_rtx (compute_mode);
4868             if (rem_flag)
4869               {
4870                 remainder= (REG_P (target)
4871                             ? target : gen_reg_rtx (compute_mode));
4872                 quotient = gen_reg_rtx (compute_mode);
4873               }
4874             else
4875               {
4876                 quotient = (REG_P (target)
4877                             ? target : gen_reg_rtx (compute_mode));
4878                 remainder = gen_reg_rtx (compute_mode);
4879               }
4880
4881             if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient,
4882                                      remainder, 0))
4883               {
4884                 /* This could be computed with a branch-less sequence.
4885                    Save that for later.  */
4886                 rtx tem;
4887                 rtx_code_label *label = gen_label_rtx ();
4888                 do_cmp_and_jump (remainder, const0_rtx, EQ,
4889                                  compute_mode, label);
4890                 tem = expand_binop (compute_mode, xor_optab, op0, op1,
4891                                     NULL_RTX, 0, OPTAB_WIDEN);
4892                 do_cmp_and_jump (tem, const0_rtx, LT, compute_mode, label);
4893                 expand_inc (quotient, const1_rtx);
4894                 expand_dec (remainder, op1);
4895                 emit_label (label);
4896                 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4897               }
4898
4899             /* No luck with division elimination or divmod.  Have to do it
4900                by conditionally adjusting op0 *and* the result.  */
4901             {
4902               rtx_code_label *label1, *label2, *label3, *label4, *label5;
4903               rtx adjusted_op0;
4904               rtx tem;
4905
4906               quotient = gen_reg_rtx (compute_mode);
4907               adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4908               label1 = gen_label_rtx ();
4909               label2 = gen_label_rtx ();
4910               label3 = gen_label_rtx ();
4911               label4 = gen_label_rtx ();
4912               label5 = gen_label_rtx ();
4913               do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4914               do_cmp_and_jump (adjusted_op0, const0_rtx, GT,
4915                                compute_mode, label1);
4916               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4917                                   quotient, 0, OPTAB_LIB_WIDEN);
4918               if (tem != quotient)
4919                 emit_move_insn (quotient, tem);
4920               emit_jump_insn (targetm.gen_jump (label5));
4921               emit_barrier ();
4922               emit_label (label1);
4923               expand_dec (adjusted_op0, const1_rtx);
4924               emit_jump_insn (targetm.gen_jump (label4));
4925               emit_barrier ();
4926               emit_label (label2);
4927               do_cmp_and_jump (adjusted_op0, const0_rtx, LT,
4928                                compute_mode, label3);
4929               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4930                                   quotient, 0, OPTAB_LIB_WIDEN);
4931               if (tem != quotient)
4932                 emit_move_insn (quotient, tem);
4933               emit_jump_insn (targetm.gen_jump (label5));
4934               emit_barrier ();
4935               emit_label (label3);
4936               expand_inc (adjusted_op0, const1_rtx);
4937               emit_label (label4);
4938               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4939                                   quotient, 0, OPTAB_LIB_WIDEN);
4940               if (tem != quotient)
4941                 emit_move_insn (quotient, tem);
4942               expand_inc (quotient, const1_rtx);
4943               emit_label (label5);
4944             }
4945           }
4946         break;
4947
4948       case EXACT_DIV_EXPR:
4949         if (op1_is_constant && size <= HOST_BITS_PER_WIDE_INT)
4950           {
4951             HOST_WIDE_INT d = INTVAL (op1);
4952             unsigned HOST_WIDE_INT ml;
4953             int pre_shift;
4954             rtx t1;
4955
4956             pre_shift = ctz_or_zero (d);
4957             ml = invert_mod2n (d >> pre_shift, size);
4958             t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4959                                pre_shift, NULL_RTX, unsignedp);
4960             quotient = expand_mult (compute_mode, t1,
4961                                     gen_int_mode (ml, compute_mode),
4962                                     NULL_RTX, 1);
4963
4964             insn = get_last_insn ();
4965             set_dst_reg_note (insn, REG_EQUAL,
4966                               gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
4967                                               compute_mode, op0, op1),
4968                               quotient);
4969           }
4970         break;
4971
4972       case ROUND_DIV_EXPR:
4973       case ROUND_MOD_EXPR:
4974         if (unsignedp)
4975           {
4976             rtx tem;
4977             rtx_code_label *label;
4978             label = gen_label_rtx ();
4979             quotient = gen_reg_rtx (compute_mode);
4980             remainder = gen_reg_rtx (compute_mode);
4981             if (expand_twoval_binop (udivmod_optab, op0, op1, quotient, remainder, 1) == 0)
4982               {
4983                 rtx tem;
4984                 quotient = expand_binop (compute_mode, udiv_optab, op0, op1,
4985                                          quotient, 1, OPTAB_LIB_WIDEN);
4986                 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 1);
4987                 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4988                                           remainder, 1, OPTAB_LIB_WIDEN);
4989               }
4990             tem = plus_constant (compute_mode, op1, -1);
4991             tem = expand_shift (RSHIFT_EXPR, compute_mode, tem, 1, NULL_RTX, 1);
4992             do_cmp_and_jump (remainder, tem, LEU, compute_mode, label);
4993             expand_inc (quotient, const1_rtx);
4994             expand_dec (remainder, op1);
4995             emit_label (label);
4996           }
4997         else
4998           {
4999             rtx abs_rem, abs_op1, tem, mask;
5000             rtx_code_label *label;
5001             label = gen_label_rtx ();
5002             quotient = gen_reg_rtx (compute_mode);
5003             remainder = gen_reg_rtx (compute_mode);
5004             if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient, remainder, 0) == 0)
5005               {
5006                 rtx tem;
5007                 quotient = expand_binop (compute_mode, sdiv_optab, op0, op1,
5008                                          quotient, 0, OPTAB_LIB_WIDEN);
5009                 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 0);
5010                 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
5011                                           remainder, 0, OPTAB_LIB_WIDEN);
5012               }
5013             abs_rem = expand_abs (compute_mode, remainder, NULL_RTX, 1, 0);
5014             abs_op1 = expand_abs (compute_mode, op1, NULL_RTX, 1, 0);
5015             tem = expand_shift (LSHIFT_EXPR, compute_mode, abs_rem,
5016                                 1, NULL_RTX, 1);
5017             do_cmp_and_jump (tem, abs_op1, LTU, compute_mode, label);
5018             tem = expand_binop (compute_mode, xor_optab, op0, op1,
5019                                 NULL_RTX, 0, OPTAB_WIDEN);
5020             mask = expand_shift (RSHIFT_EXPR, compute_mode, tem,
5021                                  size - 1, NULL_RTX, 0);
5022             tem = expand_binop (compute_mode, xor_optab, mask, const1_rtx,
5023                                 NULL_RTX, 0, OPTAB_WIDEN);
5024             tem = expand_binop (compute_mode, sub_optab, tem, mask,
5025                                 NULL_RTX, 0, OPTAB_WIDEN);
5026             expand_inc (quotient, tem);
5027             tem = expand_binop (compute_mode, xor_optab, mask, op1,
5028                                 NULL_RTX, 0, OPTAB_WIDEN);
5029             tem = expand_binop (compute_mode, sub_optab, tem, mask,
5030                                 NULL_RTX, 0, OPTAB_WIDEN);
5031             expand_dec (remainder, tem);
5032             emit_label (label);
5033           }
5034         return gen_lowpart (mode, rem_flag ? remainder : quotient);
5035
5036       default:
5037         gcc_unreachable ();
5038       }
5039
5040   if (quotient == 0)
5041     {
5042       if (target && GET_MODE (target) != compute_mode)
5043         target = 0;
5044
5045       if (rem_flag)
5046         {
5047           /* Try to produce the remainder without producing the quotient.
5048              If we seem to have a divmod pattern that does not require widening,
5049              don't try widening here.  We should really have a WIDEN argument
5050              to expand_twoval_binop, since what we'd really like to do here is
5051              1) try a mod insn in compute_mode
5052              2) try a divmod insn in compute_mode
5053              3) try a div insn in compute_mode and multiply-subtract to get
5054                 remainder
5055              4) try the same things with widening allowed.  */
5056           remainder
5057             = sign_expand_binop (compute_mode, umod_optab, smod_optab,
5058                                  op0, op1, target,
5059                                  unsignedp,
5060                                  ((optab_handler (optab2, compute_mode)
5061                                    != CODE_FOR_nothing)
5062                                   ? OPTAB_DIRECT : OPTAB_WIDEN));
5063           if (remainder == 0)
5064             {
5065               /* No luck there.  Can we do remainder and divide at once
5066                  without a library call?  */
5067               remainder = gen_reg_rtx (compute_mode);
5068               if (! expand_twoval_binop ((unsignedp
5069                                           ? udivmod_optab
5070                                           : sdivmod_optab),
5071                                          op0, op1,
5072                                          NULL_RTX, remainder, unsignedp))
5073                 remainder = 0;
5074             }
5075
5076           if (remainder)
5077             return gen_lowpart (mode, remainder);
5078         }
5079
5080       /* Produce the quotient.  Try a quotient insn, but not a library call.
5081          If we have a divmod in this mode, use it in preference to widening
5082          the div (for this test we assume it will not fail). Note that optab2
5083          is set to the one of the two optabs that the call below will use.  */
5084       quotient
5085         = sign_expand_binop (compute_mode, udiv_optab, sdiv_optab,
5086                              op0, op1, rem_flag ? NULL_RTX : target,
5087                              unsignedp,
5088                              ((optab_handler (optab2, compute_mode)
5089                                != CODE_FOR_nothing)
5090                               ? OPTAB_DIRECT : OPTAB_WIDEN));
5091
5092       if (quotient == 0)
5093         {
5094           /* No luck there.  Try a quotient-and-remainder insn,
5095              keeping the quotient alone.  */
5096           quotient = gen_reg_rtx (compute_mode);
5097           if (! expand_twoval_binop (unsignedp ? udivmod_optab : sdivmod_optab,
5098                                      op0, op1,
5099                                      quotient, NULL_RTX, unsignedp))
5100             {
5101               quotient = 0;
5102               if (! rem_flag)
5103                 /* Still no luck.  If we are not computing the remainder,
5104                    use a library call for the quotient.  */
5105                 quotient = sign_expand_binop (compute_mode,
5106                                               udiv_optab, sdiv_optab,
5107                                               op0, op1, target,
5108                                               unsignedp, OPTAB_LIB_WIDEN);
5109             }
5110         }
5111     }
5112
5113   if (rem_flag)
5114     {
5115       if (target && GET_MODE (target) != compute_mode)
5116         target = 0;
5117
5118       if (quotient == 0)
5119         {
5120           /* No divide instruction either.  Use library for remainder.  */
5121           remainder = sign_expand_binop (compute_mode, umod_optab, smod_optab,
5122                                          op0, op1, target,
5123                                          unsignedp, OPTAB_LIB_WIDEN);
5124           /* No remainder function.  Try a quotient-and-remainder
5125              function, keeping the remainder.  */
5126           if (!remainder)
5127             {
5128               remainder = gen_reg_rtx (compute_mode);
5129               if (!expand_twoval_binop_libfunc
5130                   (unsignedp ? udivmod_optab : sdivmod_optab,
5131                    op0, op1,
5132                    NULL_RTX, remainder,
5133                    unsignedp ? UMOD : MOD))
5134                 remainder = NULL_RTX;
5135             }
5136         }
5137       else
5138         {
5139           /* We divided.  Now finish doing X - Y * (X / Y).  */
5140           remainder = expand_mult (compute_mode, quotient, op1,
5141                                    NULL_RTX, unsignedp);
5142           remainder = expand_binop (compute_mode, sub_optab, op0,
5143                                     remainder, target, unsignedp,
5144                                     OPTAB_LIB_WIDEN);
5145         }
5146     }
5147
5148   return gen_lowpart (mode, rem_flag ? remainder : quotient);
5149 }
5150 \f
5151 /* Return a tree node with data type TYPE, describing the value of X.
5152    Usually this is an VAR_DECL, if there is no obvious better choice.
5153    X may be an expression, however we only support those expressions
5154    generated by loop.c.  */
5155
5156 tree
5157 make_tree (tree type, rtx x)
5158 {
5159   tree t;
5160
5161   switch (GET_CODE (x))
5162     {
5163     case CONST_INT:
5164     case CONST_WIDE_INT:
5165       t = wide_int_to_tree (type, rtx_mode_t (x, TYPE_MODE (type)));
5166       return t;
5167
5168     case CONST_DOUBLE:
5169       STATIC_ASSERT (HOST_BITS_PER_WIDE_INT * 2 <= MAX_BITSIZE_MODE_ANY_INT);
5170       if (TARGET_SUPPORTS_WIDE_INT == 0 && GET_MODE (x) == VOIDmode)
5171         t = wide_int_to_tree (type,
5172                               wide_int::from_array (&CONST_DOUBLE_LOW (x), 2,
5173                                                     HOST_BITS_PER_WIDE_INT * 2));
5174       else
5175         t = build_real (type, *CONST_DOUBLE_REAL_VALUE (x));
5176
5177       return t;
5178
5179     case CONST_VECTOR:
5180       {
5181         int units = CONST_VECTOR_NUNITS (x);
5182         tree itype = TREE_TYPE (type);
5183         tree *elts;
5184         int i;
5185
5186         /* Build a tree with vector elements.  */
5187         elts = XALLOCAVEC (tree, units);
5188         for (i = units - 1; i >= 0; --i)
5189           {
5190             rtx elt = CONST_VECTOR_ELT (x, i);
5191             elts[i] = make_tree (itype, elt);
5192           }
5193
5194         return build_vector (type, elts);
5195       }
5196
5197     case PLUS:
5198       return fold_build2 (PLUS_EXPR, type, make_tree (type, XEXP (x, 0)),
5199                           make_tree (type, XEXP (x, 1)));
5200
5201     case MINUS:
5202       return fold_build2 (MINUS_EXPR, type, make_tree (type, XEXP (x, 0)),
5203                           make_tree (type, XEXP (x, 1)));
5204
5205     case NEG:
5206       return fold_build1 (NEGATE_EXPR, type, make_tree (type, XEXP (x, 0)));
5207
5208     case MULT:
5209       return fold_build2 (MULT_EXPR, type, make_tree (type, XEXP (x, 0)),
5210                           make_tree (type, XEXP (x, 1)));
5211
5212     case ASHIFT:
5213       return fold_build2 (LSHIFT_EXPR, type, make_tree (type, XEXP (x, 0)),
5214                           make_tree (type, XEXP (x, 1)));
5215
5216     case LSHIFTRT:
5217       t = unsigned_type_for (type);
5218       return fold_convert (type, build2 (RSHIFT_EXPR, t,
5219                                          make_tree (t, XEXP (x, 0)),
5220                                          make_tree (type, XEXP (x, 1))));
5221
5222     case ASHIFTRT:
5223       t = signed_type_for (type);
5224       return fold_convert (type, build2 (RSHIFT_EXPR, t,
5225                                          make_tree (t, XEXP (x, 0)),
5226                                          make_tree (type, XEXP (x, 1))));
5227
5228     case DIV:
5229       if (TREE_CODE (type) != REAL_TYPE)
5230         t = signed_type_for (type);
5231       else
5232         t = type;
5233
5234       return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5235                                          make_tree (t, XEXP (x, 0)),
5236                                          make_tree (t, XEXP (x, 1))));
5237     case UDIV:
5238       t = unsigned_type_for (type);
5239       return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5240                                          make_tree (t, XEXP (x, 0)),
5241                                          make_tree (t, XEXP (x, 1))));
5242
5243     case SIGN_EXTEND:
5244     case ZERO_EXTEND:
5245       t = lang_hooks.types.type_for_mode (GET_MODE (XEXP (x, 0)),
5246                                           GET_CODE (x) == ZERO_EXTEND);
5247       return fold_convert (type, make_tree (t, XEXP (x, 0)));
5248
5249     case CONST:
5250       return make_tree (type, XEXP (x, 0));
5251
5252     case SYMBOL_REF:
5253       t = SYMBOL_REF_DECL (x);
5254       if (t)
5255         return fold_convert (type, build_fold_addr_expr (t));
5256       /* fall through.  */
5257
5258     default:
5259       t = build_decl (RTL_LOCATION (x), VAR_DECL, NULL_TREE, type);
5260
5261       /* If TYPE is a POINTER_TYPE, we might need to convert X from
5262          address mode to pointer mode.  */
5263       if (POINTER_TYPE_P (type))
5264         x = convert_memory_address_addr_space
5265               (TYPE_MODE (type), x, TYPE_ADDR_SPACE (TREE_TYPE (type)));
5266
5267       /* Note that we do *not* use SET_DECL_RTL here, because we do not
5268          want set_decl_rtl to go adjusting REG_ATTRS for this temporary.  */
5269       t->decl_with_rtl.rtl = x;
5270
5271       return t;
5272     }
5273 }
5274 \f
5275 /* Compute the logical-and of OP0 and OP1, storing it in TARGET
5276    and returning TARGET.
5277
5278    If TARGET is 0, a pseudo-register or constant is returned.  */
5279
5280 rtx
5281 expand_and (machine_mode mode, rtx op0, rtx op1, rtx target)
5282 {
5283   rtx tem = 0;
5284
5285   if (GET_MODE (op0) == VOIDmode && GET_MODE (op1) == VOIDmode)
5286     tem = simplify_binary_operation (AND, mode, op0, op1);
5287   if (tem == 0)
5288     tem = expand_binop (mode, and_optab, op0, op1, target, 0, OPTAB_LIB_WIDEN);
5289
5290   if (target == 0)
5291     target = tem;
5292   else if (tem != target)
5293     emit_move_insn (target, tem);
5294   return target;
5295 }
5296
5297 /* Helper function for emit_store_flag.  */
5298 rtx
5299 emit_cstore (rtx target, enum insn_code icode, enum rtx_code code,
5300              machine_mode mode, machine_mode compare_mode,
5301              int unsignedp, rtx x, rtx y, int normalizep,
5302              machine_mode target_mode)
5303 {
5304   struct expand_operand ops[4];
5305   rtx op0, comparison, subtarget;
5306   rtx_insn *last;
5307   machine_mode result_mode = targetm.cstore_mode (icode);
5308
5309   last = get_last_insn ();
5310   x = prepare_operand (icode, x, 2, mode, compare_mode, unsignedp);
5311   y = prepare_operand (icode, y, 3, mode, compare_mode, unsignedp);
5312   if (!x || !y)
5313     {
5314       delete_insns_since (last);
5315       return NULL_RTX;
5316     }
5317
5318   if (target_mode == VOIDmode)
5319     target_mode = result_mode;
5320   if (!target)
5321     target = gen_reg_rtx (target_mode);
5322
5323   comparison = gen_rtx_fmt_ee (code, result_mode, x, y);
5324
5325   create_output_operand (&ops[0], optimize ? NULL_RTX : target, result_mode);
5326   create_fixed_operand (&ops[1], comparison);
5327   create_fixed_operand (&ops[2], x);
5328   create_fixed_operand (&ops[3], y);
5329   if (!maybe_expand_insn (icode, 4, ops))
5330     {
5331       delete_insns_since (last);
5332       return NULL_RTX;
5333     }
5334   subtarget = ops[0].value;
5335
5336   /* If we are converting to a wider mode, first convert to
5337      TARGET_MODE, then normalize.  This produces better combining
5338      opportunities on machines that have a SIGN_EXTRACT when we are
5339      testing a single bit.  This mostly benefits the 68k.
5340
5341      If STORE_FLAG_VALUE does not have the sign bit set when
5342      interpreted in MODE, we can do this conversion as unsigned, which
5343      is usually more efficient.  */
5344   if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (result_mode))
5345     {
5346       convert_move (target, subtarget,
5347                     val_signbit_known_clear_p (result_mode,
5348                                                STORE_FLAG_VALUE));
5349       op0 = target;
5350       result_mode = target_mode;
5351     }
5352   else
5353     op0 = subtarget;
5354
5355   /* If we want to keep subexpressions around, don't reuse our last
5356      target.  */
5357   if (optimize)
5358     subtarget = 0;
5359
5360   /* Now normalize to the proper value in MODE.  Sometimes we don't
5361      have to do anything.  */
5362   if (normalizep == 0 || normalizep == STORE_FLAG_VALUE)
5363     ;
5364   /* STORE_FLAG_VALUE might be the most negative number, so write
5365      the comparison this way to avoid a compiler-time warning.  */
5366   else if (- normalizep == STORE_FLAG_VALUE)
5367     op0 = expand_unop (result_mode, neg_optab, op0, subtarget, 0);
5368
5369   /* We don't want to use STORE_FLAG_VALUE < 0 below since this makes
5370      it hard to use a value of just the sign bit due to ANSI integer
5371      constant typing rules.  */
5372   else if (val_signbit_known_set_p (result_mode, STORE_FLAG_VALUE))
5373     op0 = expand_shift (RSHIFT_EXPR, result_mode, op0,
5374                         GET_MODE_BITSIZE (result_mode) - 1, subtarget,
5375                         normalizep == 1);
5376   else
5377     {
5378       gcc_assert (STORE_FLAG_VALUE & 1);
5379
5380       op0 = expand_and (result_mode, op0, const1_rtx, subtarget);
5381       if (normalizep == -1)
5382         op0 = expand_unop (result_mode, neg_optab, op0, op0, 0);
5383     }
5384
5385   /* If we were converting to a smaller mode, do the conversion now.  */
5386   if (target_mode != result_mode)
5387     {
5388       convert_move (target, op0, 0);
5389       return target;
5390     }
5391   else
5392     return op0;
5393 }
5394
5395
5396 /* A subroutine of emit_store_flag only including "tricks" that do not
5397    need a recursive call.  These are kept separate to avoid infinite
5398    loops.  */
5399
5400 static rtx
5401 emit_store_flag_1 (rtx target, enum rtx_code code, rtx op0, rtx op1,
5402                    machine_mode mode, int unsignedp, int normalizep,
5403                    machine_mode target_mode)
5404 {
5405   rtx subtarget;
5406   enum insn_code icode;
5407   machine_mode compare_mode;
5408   enum mode_class mclass;
5409   enum rtx_code scode;
5410
5411   if (unsignedp)
5412     code = unsigned_condition (code);
5413   scode = swap_condition (code);
5414
5415   /* If one operand is constant, make it the second one.  Only do this
5416      if the other operand is not constant as well.  */
5417
5418   if (swap_commutative_operands_p (op0, op1))
5419     {
5420       std::swap (op0, op1);
5421       code = swap_condition (code);
5422     }
5423
5424   if (mode == VOIDmode)
5425     mode = GET_MODE (op0);
5426
5427   /* For some comparisons with 1 and -1, we can convert this to
5428      comparisons with zero.  This will often produce more opportunities for
5429      store-flag insns.  */
5430
5431   switch (code)
5432     {
5433     case LT:
5434       if (op1 == const1_rtx)
5435         op1 = const0_rtx, code = LE;
5436       break;
5437     case LE:
5438       if (op1 == constm1_rtx)
5439         op1 = const0_rtx, code = LT;
5440       break;
5441     case GE:
5442       if (op1 == const1_rtx)
5443         op1 = const0_rtx, code = GT;
5444       break;
5445     case GT:
5446       if (op1 == constm1_rtx)
5447         op1 = const0_rtx, code = GE;
5448       break;
5449     case GEU:
5450       if (op1 == const1_rtx)
5451         op1 = const0_rtx, code = NE;
5452       break;
5453     case LTU:
5454       if (op1 == const1_rtx)
5455         op1 = const0_rtx, code = EQ;
5456       break;
5457     default:
5458       break;
5459     }
5460
5461   /* If we are comparing a double-word integer with zero or -1, we can
5462      convert the comparison into one involving a single word.  */
5463   if (GET_MODE_BITSIZE (mode) == BITS_PER_WORD * 2
5464       && GET_MODE_CLASS (mode) == MODE_INT
5465       && (!MEM_P (op0) || ! MEM_VOLATILE_P (op0)))
5466     {
5467       rtx tem;
5468       if ((code == EQ || code == NE)
5469           && (op1 == const0_rtx || op1 == constm1_rtx))
5470         {
5471           rtx op00, op01;
5472
5473           /* Do a logical OR or AND of the two words and compare the
5474              result.  */
5475           op00 = simplify_gen_subreg (word_mode, op0, mode, 0);
5476           op01 = simplify_gen_subreg (word_mode, op0, mode, UNITS_PER_WORD);
5477           tem = expand_binop (word_mode,
5478                               op1 == const0_rtx ? ior_optab : and_optab,
5479                               op00, op01, NULL_RTX, unsignedp,
5480                               OPTAB_DIRECT);
5481
5482           if (tem != 0)
5483             tem = emit_store_flag (NULL_RTX, code, tem, op1, word_mode,
5484                                    unsignedp, normalizep);
5485         }
5486       else if ((code == LT || code == GE) && op1 == const0_rtx)
5487         {
5488           rtx op0h;
5489
5490           /* If testing the sign bit, can just test on high word.  */
5491           op0h = simplify_gen_subreg (word_mode, op0, mode,
5492                                       subreg_highpart_offset (word_mode,
5493                                                               mode));
5494           tem = emit_store_flag (NULL_RTX, code, op0h, op1, word_mode,
5495                                  unsignedp, normalizep);
5496         }
5497       else
5498         tem = NULL_RTX;
5499
5500       if (tem)
5501         {
5502           if (target_mode == VOIDmode || GET_MODE (tem) == target_mode)
5503             return tem;
5504           if (!target)
5505             target = gen_reg_rtx (target_mode);
5506
5507           convert_move (target, tem,
5508                         !val_signbit_known_set_p (word_mode,
5509                                                   (normalizep ? normalizep
5510                                                    : STORE_FLAG_VALUE)));
5511           return target;
5512         }
5513     }
5514
5515   /* If this is A < 0 or A >= 0, we can do this by taking the ones
5516      complement of A (for GE) and shifting the sign bit to the low bit.  */
5517   if (op1 == const0_rtx && (code == LT || code == GE)
5518       && GET_MODE_CLASS (mode) == MODE_INT
5519       && (normalizep || STORE_FLAG_VALUE == 1
5520           || val_signbit_p (mode, STORE_FLAG_VALUE)))
5521     {
5522       subtarget = target;
5523
5524       if (!target)
5525         target_mode = mode;
5526
5527       /* If the result is to be wider than OP0, it is best to convert it
5528          first.  If it is to be narrower, it is *incorrect* to convert it
5529          first.  */
5530       else if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (mode))
5531         {
5532           op0 = convert_modes (target_mode, mode, op0, 0);
5533           mode = target_mode;
5534         }
5535
5536       if (target_mode != mode)
5537         subtarget = 0;
5538
5539       if (code == GE)
5540         op0 = expand_unop (mode, one_cmpl_optab, op0,
5541                            ((STORE_FLAG_VALUE == 1 || normalizep)
5542                             ? 0 : subtarget), 0);
5543
5544       if (STORE_FLAG_VALUE == 1 || normalizep)
5545         /* If we are supposed to produce a 0/1 value, we want to do
5546            a logical shift from the sign bit to the low-order bit; for
5547            a -1/0 value, we do an arithmetic shift.  */
5548         op0 = expand_shift (RSHIFT_EXPR, mode, op0,
5549                             GET_MODE_BITSIZE (mode) - 1,
5550                             subtarget, normalizep != -1);
5551
5552       if (mode != target_mode)
5553         op0 = convert_modes (target_mode, mode, op0, 0);
5554
5555       return op0;
5556     }
5557
5558   mclass = GET_MODE_CLASS (mode);
5559   for (compare_mode = mode; compare_mode != VOIDmode;
5560        compare_mode = GET_MODE_WIDER_MODE (compare_mode))
5561     {
5562      machine_mode optab_mode = mclass == MODE_CC ? CCmode : compare_mode;
5563      icode = optab_handler (cstore_optab, optab_mode);
5564      if (icode != CODE_FOR_nothing)
5565         {
5566           do_pending_stack_adjust ();
5567           rtx tem = emit_cstore (target, icode, code, mode, compare_mode,
5568                                  unsignedp, op0, op1, normalizep, target_mode);
5569           if (tem)
5570             return tem;
5571
5572           if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5573             {
5574               tem = emit_cstore (target, icode, scode, mode, compare_mode,
5575                                  unsignedp, op1, op0, normalizep, target_mode);
5576               if (tem)
5577                 return tem;
5578             }
5579           break;
5580         }
5581     }
5582
5583   return 0;
5584 }
5585
5586 /* Emit a store-flags instruction for comparison CODE on OP0 and OP1
5587    and storing in TARGET.  Normally return TARGET.
5588    Return 0 if that cannot be done.
5589
5590    MODE is the mode to use for OP0 and OP1 should they be CONST_INTs.  If
5591    it is VOIDmode, they cannot both be CONST_INT.
5592
5593    UNSIGNEDP is for the case where we have to widen the operands
5594    to perform the operation.  It says to use zero-extension.
5595
5596    NORMALIZEP is 1 if we should convert the result to be either zero
5597    or one.  Normalize is -1 if we should convert the result to be
5598    either zero or -1.  If NORMALIZEP is zero, the result will be left
5599    "raw" out of the scc insn.  */
5600
5601 rtx
5602 emit_store_flag (rtx target, enum rtx_code code, rtx op0, rtx op1,
5603                  machine_mode mode, int unsignedp, int normalizep)
5604 {
5605   machine_mode target_mode = target ? GET_MODE (target) : VOIDmode;
5606   enum rtx_code rcode;
5607   rtx subtarget;
5608   rtx tem, trueval;
5609   rtx_insn *last;
5610
5611   /* If we compare constants, we shouldn't use a store-flag operation,
5612      but a constant load.  We can get there via the vanilla route that
5613      usually generates a compare-branch sequence, but will in this case
5614      fold the comparison to a constant, and thus elide the branch.  */
5615   if (CONSTANT_P (op0) && CONSTANT_P (op1))
5616     return NULL_RTX;
5617
5618   tem = emit_store_flag_1 (target, code, op0, op1, mode, unsignedp, normalizep,
5619                            target_mode);
5620   if (tem)
5621     return tem;
5622
5623   /* If we reached here, we can't do this with a scc insn, however there
5624      are some comparisons that can be done in other ways.  Don't do any
5625      of these cases if branches are very cheap.  */
5626   if (BRANCH_COST (optimize_insn_for_speed_p (), false) == 0)
5627     return 0;
5628
5629   /* See what we need to return.  We can only return a 1, -1, or the
5630      sign bit.  */
5631
5632   if (normalizep == 0)
5633     {
5634       if (STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1)
5635         normalizep = STORE_FLAG_VALUE;
5636
5637       else if (val_signbit_p (mode, STORE_FLAG_VALUE))
5638         ;
5639       else
5640         return 0;
5641     }
5642
5643   last = get_last_insn ();
5644
5645   /* If optimizing, use different pseudo registers for each insn, instead
5646      of reusing the same pseudo.  This leads to better CSE, but slows
5647      down the compiler, since there are more pseudos */
5648   subtarget = (!optimize
5649                && (target_mode == mode)) ? target : NULL_RTX;
5650   trueval = GEN_INT (normalizep ? normalizep : STORE_FLAG_VALUE);
5651
5652   /* For floating-point comparisons, try the reverse comparison or try
5653      changing the "orderedness" of the comparison.  */
5654   if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5655     {
5656       enum rtx_code first_code;
5657       bool and_them;
5658
5659       rcode = reverse_condition_maybe_unordered (code);
5660       if (can_compare_p (rcode, mode, ccp_store_flag)
5661           && (code == ORDERED || code == UNORDERED
5662               || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
5663               || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
5664         {
5665           int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5666                           || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5667
5668           /* For the reverse comparison, use either an addition or a XOR.  */
5669           if (want_add
5670               && rtx_cost (GEN_INT (normalizep), mode, PLUS, 1,
5671                            optimize_insn_for_speed_p ()) == 0)
5672             {
5673               tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5674                                        STORE_FLAG_VALUE, target_mode);
5675               if (tem)
5676                 return expand_binop (target_mode, add_optab, tem,
5677                                      gen_int_mode (normalizep, target_mode),
5678                                      target, 0, OPTAB_WIDEN);
5679             }
5680           else if (!want_add
5681                    && rtx_cost (trueval, mode, XOR, 1,
5682                                 optimize_insn_for_speed_p ()) == 0)
5683             {
5684               tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5685                                        normalizep, target_mode);
5686               if (tem)
5687                 return expand_binop (target_mode, xor_optab, tem, trueval,
5688                                      target, INTVAL (trueval) >= 0, OPTAB_WIDEN);
5689             }
5690         }
5691
5692       delete_insns_since (last);
5693
5694       /* Cannot split ORDERED and UNORDERED, only try the above trick.   */
5695       if (code == ORDERED || code == UNORDERED)
5696         return 0;
5697
5698       and_them = split_comparison (code, mode, &first_code, &code);
5699
5700       /* If there are no NaNs, the first comparison should always fall through.
5701          Effectively change the comparison to the other one.  */
5702       if (!HONOR_NANS (mode))
5703         {
5704           gcc_assert (first_code == (and_them ? ORDERED : UNORDERED));
5705           return emit_store_flag_1 (target, code, op0, op1, mode, 0, normalizep,
5706                                     target_mode);
5707         }
5708
5709       if (!HAVE_conditional_move)
5710         return 0;
5711
5712       /* Try using a setcc instruction for ORDERED/UNORDERED, followed by a
5713          conditional move.  */
5714       tem = emit_store_flag_1 (subtarget, first_code, op0, op1, mode, 0,
5715                                normalizep, target_mode);
5716       if (tem == 0)
5717         return 0;
5718
5719       if (and_them)
5720         tem = emit_conditional_move (target, code, op0, op1, mode,
5721                                      tem, const0_rtx, GET_MODE (tem), 0);
5722       else
5723         tem = emit_conditional_move (target, code, op0, op1, mode,
5724                                      trueval, tem, GET_MODE (tem), 0);
5725
5726       if (tem == 0)
5727         delete_insns_since (last);
5728       return tem;
5729     }
5730
5731   /* The remaining tricks only apply to integer comparisons.  */
5732
5733   if (GET_MODE_CLASS (mode) != MODE_INT)
5734     return 0;
5735
5736   /* If this is an equality comparison of integers, we can try to exclusive-or
5737      (or subtract) the two operands and use a recursive call to try the
5738      comparison with zero.  Don't do any of these cases if branches are
5739      very cheap.  */
5740
5741   if ((code == EQ || code == NE) && op1 != const0_rtx)
5742     {
5743       tem = expand_binop (mode, xor_optab, op0, op1, subtarget, 1,
5744                           OPTAB_WIDEN);
5745
5746       if (tem == 0)
5747         tem = expand_binop (mode, sub_optab, op0, op1, subtarget, 1,
5748                             OPTAB_WIDEN);
5749       if (tem != 0)
5750         tem = emit_store_flag (target, code, tem, const0_rtx,
5751                                mode, unsignedp, normalizep);
5752       if (tem != 0)
5753         return tem;
5754
5755       delete_insns_since (last);
5756     }
5757
5758   /* For integer comparisons, try the reverse comparison.  However, for
5759      small X and if we'd have anyway to extend, implementing "X != 0"
5760      as "-(int)X >> 31" is still cheaper than inverting "(int)X == 0".  */
5761   rcode = reverse_condition (code);
5762   if (can_compare_p (rcode, mode, ccp_store_flag)
5763       && ! (optab_handler (cstore_optab, mode) == CODE_FOR_nothing
5764             && code == NE
5765             && GET_MODE_SIZE (mode) < UNITS_PER_WORD
5766             && op1 == const0_rtx))
5767     {
5768       int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5769                       || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5770
5771       /* Again, for the reverse comparison, use either an addition or a XOR.  */
5772       if (want_add
5773           && rtx_cost (GEN_INT (normalizep), mode, PLUS, 1,
5774                        optimize_insn_for_speed_p ()) == 0)
5775         {
5776           tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5777                                    STORE_FLAG_VALUE, target_mode);
5778           if (tem != 0)
5779             tem = expand_binop (target_mode, add_optab, tem,
5780                                 gen_int_mode (normalizep, target_mode),
5781                                 target, 0, OPTAB_WIDEN);
5782         }
5783       else if (!want_add
5784                && rtx_cost (trueval, mode, XOR, 1,
5785                             optimize_insn_for_speed_p ()) == 0)
5786         {
5787           tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5788                                    normalizep, target_mode);
5789           if (tem != 0)
5790             tem = expand_binop (target_mode, xor_optab, tem, trueval, target,
5791                                 INTVAL (trueval) >= 0, OPTAB_WIDEN);
5792         }
5793
5794       if (tem != 0)
5795         return tem;
5796       delete_insns_since (last);
5797     }
5798
5799   /* Some other cases we can do are EQ, NE, LE, and GT comparisons with
5800      the constant zero.  Reject all other comparisons at this point.  Only
5801      do LE and GT if branches are expensive since they are expensive on
5802      2-operand machines.  */
5803
5804   if (op1 != const0_rtx
5805       || (code != EQ && code != NE
5806           && (BRANCH_COST (optimize_insn_for_speed_p (),
5807                            false) <= 1 || (code != LE && code != GT))))
5808     return 0;
5809
5810   /* Try to put the result of the comparison in the sign bit.  Assume we can't
5811      do the necessary operation below.  */
5812
5813   tem = 0;
5814
5815   /* To see if A <= 0, compute (A | (A - 1)).  A <= 0 iff that result has
5816      the sign bit set.  */
5817
5818   if (code == LE)
5819     {
5820       /* This is destructive, so SUBTARGET can't be OP0.  */
5821       if (rtx_equal_p (subtarget, op0))
5822         subtarget = 0;
5823
5824       tem = expand_binop (mode, sub_optab, op0, const1_rtx, subtarget, 0,
5825                           OPTAB_WIDEN);
5826       if (tem)
5827         tem = expand_binop (mode, ior_optab, op0, tem, subtarget, 0,
5828                             OPTAB_WIDEN);
5829     }
5830
5831   /* To see if A > 0, compute (((signed) A) << BITS) - A, where BITS is the
5832      number of bits in the mode of OP0, minus one.  */
5833
5834   if (code == GT)
5835     {
5836       if (rtx_equal_p (subtarget, op0))
5837         subtarget = 0;
5838
5839       tem = maybe_expand_shift (RSHIFT_EXPR, mode, op0,
5840                                 GET_MODE_BITSIZE (mode) - 1,
5841                                 subtarget, 0);
5842       if (tem)
5843         tem = expand_binop (mode, sub_optab, tem, op0, subtarget, 0,
5844                             OPTAB_WIDEN);
5845     }
5846
5847   if (code == EQ || code == NE)
5848     {
5849       /* For EQ or NE, one way to do the comparison is to apply an operation
5850          that converts the operand into a positive number if it is nonzero
5851          or zero if it was originally zero.  Then, for EQ, we subtract 1 and
5852          for NE we negate.  This puts the result in the sign bit.  Then we
5853          normalize with a shift, if needed.
5854
5855          Two operations that can do the above actions are ABS and FFS, so try
5856          them.  If that doesn't work, and MODE is smaller than a full word,
5857          we can use zero-extension to the wider mode (an unsigned conversion)
5858          as the operation.  */
5859
5860       /* Note that ABS doesn't yield a positive number for INT_MIN, but
5861          that is compensated by the subsequent overflow when subtracting
5862          one / negating.  */
5863
5864       if (optab_handler (abs_optab, mode) != CODE_FOR_nothing)
5865         tem = expand_unop (mode, abs_optab, op0, subtarget, 1);
5866       else if (optab_handler (ffs_optab, mode) != CODE_FOR_nothing)
5867         tem = expand_unop (mode, ffs_optab, op0, subtarget, 1);
5868       else if (GET_MODE_SIZE (mode) < UNITS_PER_WORD)
5869         {
5870           tem = convert_modes (word_mode, mode, op0, 1);
5871           mode = word_mode;
5872         }
5873
5874       if (tem != 0)
5875         {
5876           if (code == EQ)
5877             tem = expand_binop (mode, sub_optab, tem, const1_rtx, subtarget,
5878                                 0, OPTAB_WIDEN);
5879           else
5880             tem = expand_unop (mode, neg_optab, tem, subtarget, 0);
5881         }
5882
5883       /* If we couldn't do it that way, for NE we can "or" the two's complement
5884          of the value with itself.  For EQ, we take the one's complement of
5885          that "or", which is an extra insn, so we only handle EQ if branches
5886          are expensive.  */
5887
5888       if (tem == 0
5889           && (code == NE
5890               || BRANCH_COST (optimize_insn_for_speed_p (),
5891                               false) > 1))
5892         {
5893           if (rtx_equal_p (subtarget, op0))
5894             subtarget = 0;
5895
5896           tem = expand_unop (mode, neg_optab, op0, subtarget, 0);
5897           tem = expand_binop (mode, ior_optab, tem, op0, subtarget, 0,
5898                               OPTAB_WIDEN);
5899
5900           if (tem && code == EQ)
5901             tem = expand_unop (mode, one_cmpl_optab, tem, subtarget, 0);
5902         }
5903     }
5904
5905   if (tem && normalizep)
5906     tem = maybe_expand_shift (RSHIFT_EXPR, mode, tem,
5907                               GET_MODE_BITSIZE (mode) - 1,
5908                               subtarget, normalizep == 1);
5909
5910   if (tem)
5911     {
5912       if (!target)
5913         ;
5914       else if (GET_MODE (tem) != target_mode)
5915         {
5916           convert_move (target, tem, 0);
5917           tem = target;
5918         }
5919       else if (!subtarget)
5920         {
5921           emit_move_insn (target, tem);
5922           tem = target;
5923         }
5924     }
5925   else
5926     delete_insns_since (last);
5927
5928   return tem;
5929 }
5930
5931 /* Like emit_store_flag, but always succeeds.  */
5932
5933 rtx
5934 emit_store_flag_force (rtx target, enum rtx_code code, rtx op0, rtx op1,
5935                        machine_mode mode, int unsignedp, int normalizep)
5936 {
5937   rtx tem;
5938   rtx_code_label *label;
5939   rtx trueval, falseval;
5940
5941   /* First see if emit_store_flag can do the job.  */
5942   tem = emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep);
5943   if (tem != 0)
5944     return tem;
5945
5946   if (!target)
5947     target = gen_reg_rtx (word_mode);
5948
5949   /* If this failed, we have to do this with set/compare/jump/set code.
5950      For foo != 0, if foo is in OP0, just replace it with 1 if nonzero.  */
5951   trueval = normalizep ? GEN_INT (normalizep) : const1_rtx;
5952   if (code == NE
5953       && GET_MODE_CLASS (mode) == MODE_INT
5954       && REG_P (target)
5955       && op0 == target
5956       && op1 == const0_rtx)
5957     {
5958       label = gen_label_rtx ();
5959       do_compare_rtx_and_jump (target, const0_rtx, EQ, unsignedp, mode,
5960                                NULL_RTX, NULL, label,
5961                                profile_probability::uninitialized ());
5962       emit_move_insn (target, trueval);
5963       emit_label (label);
5964       return target;
5965     }
5966
5967   if (!REG_P (target)
5968       || reg_mentioned_p (target, op0) || reg_mentioned_p (target, op1))
5969     target = gen_reg_rtx (GET_MODE (target));
5970
5971   /* Jump in the right direction if the target cannot implement CODE
5972      but can jump on its reverse condition.  */
5973   falseval = const0_rtx;
5974   if (! can_compare_p (code, mode, ccp_jump)
5975       && (! FLOAT_MODE_P (mode)
5976           || code == ORDERED || code == UNORDERED
5977           || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
5978           || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
5979     {
5980       enum rtx_code rcode;
5981       if (FLOAT_MODE_P (mode))
5982         rcode = reverse_condition_maybe_unordered (code);
5983       else
5984         rcode = reverse_condition (code);
5985
5986       /* Canonicalize to UNORDERED for the libcall.  */
5987       if (can_compare_p (rcode, mode, ccp_jump)
5988           || (code == ORDERED && ! can_compare_p (ORDERED, mode, ccp_jump)))
5989         {
5990           falseval = trueval;
5991           trueval = const0_rtx;
5992           code = rcode;
5993         }
5994     }
5995
5996   emit_move_insn (target, trueval);
5997   label = gen_label_rtx ();
5998   do_compare_rtx_and_jump (op0, op1, code, unsignedp, mode, NULL_RTX, NULL,
5999                            label, profile_probability::uninitialized ());
6000
6001   emit_move_insn (target, falseval);
6002   emit_label (label);
6003
6004   return target;
6005 }
6006 \f
6007 /* Perform possibly multi-word comparison and conditional jump to LABEL
6008    if ARG1 OP ARG2 true where ARG1 and ARG2 are of mode MODE.  This is
6009    now a thin wrapper around do_compare_rtx_and_jump.  */
6010
6011 static void
6012 do_cmp_and_jump (rtx arg1, rtx arg2, enum rtx_code op, machine_mode mode,
6013                  rtx_code_label *label)
6014 {
6015   int unsignedp = (op == LTU || op == LEU || op == GTU || op == GEU);
6016   do_compare_rtx_and_jump (arg1, arg2, op, unsignedp, mode, NULL_RTX,
6017                            NULL, label, profile_probability::uninitialized ());
6018 }