gcc/expmed.c

   1 /* Medium-level subroutines: convert bit-field store and extract
   2    and shifts, multiplies and divides to rtl instructions.
   3    Copyright (C) 1987-2014 Free Software Foundation, Inc.
   4
   5 This file is part of GCC.
   6
   7 GCC is free software; you can redistribute it and/or modify it under
   8 the terms of the GNU General Public License as published by the Free
   9 Software Foundation; either version 3, or (at your option) any later
  10 version.
  11
  12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  15 for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GCC; see the file COPYING3.  If not see
  19 <http://www.gnu.org/licenses/>.  */
  20
  21
  22 #include "config.h"
  23 #include "system.h"
  24 #include "coretypes.h"
  25 #include "tm.h"
  26 #include "diagnostic-core.h"
  27 #include "rtl.h"
  28 #include "tree.h"
  29 #include "stor-layout.h"
  30 #include "tm_p.h"
  31 #include "flags.h"
  32 #include "insn-config.h"
  33 #include "expr.h"
  34 #include "insn-codes.h"
  35 #include "optabs.h"
  36 #include "recog.h"
  37 #include "langhooks.h"
  38 #include "predict.h"
  39 #include "basic-block.h"
  40 #include "df.h"
  41 #include "target.h"
  42 #include "expmed.h"
  43
  44 struct target_expmed default_target_expmed;
  45 #if SWITCHABLE_TARGET
  46 struct target_expmed *this_target_expmed = &default_target_expmed;
  47 #endif
  48
  49 static void store_fixed_bit_field (rtx, unsigned HOST_WIDE_INT,
  50                                    unsigned HOST_WIDE_INT,
  51                                    unsigned HOST_WIDE_INT,
  52                                    unsigned HOST_WIDE_INT,
  53                                    rtx);
  54 static void store_fixed_bit_field_1 (rtx, unsigned HOST_WIDE_INT,
  55                                      unsigned HOST_WIDE_INT,
  56                                      rtx);
  57 static void store_split_bit_field (rtx, unsigned HOST_WIDE_INT,
  58                                    unsigned HOST_WIDE_INT,
  59                                    unsigned HOST_WIDE_INT,
  60                                    unsigned HOST_WIDE_INT,
  61                                    rtx);
  62 static rtx extract_fixed_bit_field (machine_mode, rtx,
  63                                     unsigned HOST_WIDE_INT,
  64                                     unsigned HOST_WIDE_INT, rtx, int);
  65 static rtx extract_fixed_bit_field_1 (machine_mode, rtx,
  66                                       unsigned HOST_WIDE_INT,
  67                                       unsigned HOST_WIDE_INT, rtx, int);
  68 static rtx lshift_value (machine_mode, unsigned HOST_WIDE_INT, int);
  69 static rtx extract_split_bit_field (rtx, unsigned HOST_WIDE_INT,
  70                                     unsigned HOST_WIDE_INT, int);
  71 static void do_cmp_and_jump (rtx, rtx, enum rtx_code, machine_mode, rtx_code_label *);
  72 static rtx expand_smod_pow2 (machine_mode, rtx, HOST_WIDE_INT);
  73 static rtx expand_sdiv_pow2 (machine_mode, rtx, HOST_WIDE_INT);
  74
  75 /* Return a constant integer mask value of mode MODE with BITSIZE ones
  76    followed by BITPOS zeros, or the complement of that if COMPLEMENT.
  77    The mask is truncated if necessary to the width of mode MODE.  The
  78    mask is zero-extended if BITSIZE+BITPOS is too small for MODE.  */
  79
  80 static inline rtx
  81 mask_rtx (machine_mode mode, int bitpos, int bitsize, bool complement)
  82 {
  83   return immed_wide_int_const
  84     (wi::shifted_mask (bitpos, bitsize, complement,
  85                        GET_MODE_PRECISION (mode)), mode);
  86 }
  87
  88 /* Test whether a value is zero of a power of two.  */
  89 #define EXACT_POWER_OF_2_OR_ZERO_P(x) \
  90   (((x) & ((x) - (unsigned HOST_WIDE_INT) 1)) == 0)
  91
  92 struct init_expmed_rtl
  93 {
  94   rtx reg;
  95   rtx plus;
  96   rtx neg;
  97   rtx mult;
  98   rtx sdiv;
  99   rtx udiv;
 100   rtx sdiv_32;
 101   rtx smod_32;
 102   rtx wide_mult;
 103   rtx wide_lshr;
 104   rtx wide_trunc;
 105   rtx shift;
 106   rtx shift_mult;
 107   rtx shift_add;
 108   rtx shift_sub0;
 109   rtx shift_sub1;
 110   rtx zext;
 111   rtx trunc;
 112
 113   rtx pow2[MAX_BITS_PER_WORD];
 114   rtx cint[MAX_BITS_PER_WORD];
 115 };
 116
 117 static void
 118 init_expmed_one_conv (struct init_expmed_rtl *all, machine_mode to_mode,
 119                       machine_mode from_mode, bool speed)
 120 {
 121   int to_size, from_size;
 122   rtx which;
 123
 124   to_size = GET_MODE_PRECISION (to_mode);
 125   from_size = GET_MODE_PRECISION (from_mode);
 126
 127   /* Most partial integers have a precision less than the "full"
 128      integer it requires for storage.  In case one doesn't, for
 129      comparison purposes here, reduce the bit size by one in that
 130      case.  */
 131   if (GET_MODE_CLASS (to_mode) == MODE_PARTIAL_INT
 132       && exact_log2 (to_size) != -1)
 133     to_size --;
 134   if (GET_MODE_CLASS (from_mode) == MODE_PARTIAL_INT
 135       && exact_log2 (from_size) != -1)
 136     from_size --;
 137
 138   /* Assume cost of zero-extend and sign-extend is the same.  */
 139   which = (to_size < from_size ? all->trunc : all->zext);
 140
 141   PUT_MODE (all->reg, from_mode);
 142   set_convert_cost (to_mode, from_mode, speed, set_src_cost (which, speed));
 143 }
 144
 145 static void
 146 init_expmed_one_mode (struct init_expmed_rtl *all,
 147                       machine_mode mode, int speed)
 148 {
 149   int m, n, mode_bitsize;
 150   machine_mode mode_from;
 151
 152   mode_bitsize = GET_MODE_UNIT_BITSIZE (mode);
 153
 154   PUT_MODE (all->reg, mode);
 155   PUT_MODE (all->plus, mode);
 156   PUT_MODE (all->neg, mode);
 157   PUT_MODE (all->mult, mode);
 158   PUT_MODE (all->sdiv, mode);
 159   PUT_MODE (all->udiv, mode);
 160   PUT_MODE (all->sdiv_32, mode);
 161   PUT_MODE (all->smod_32, mode);
 162   PUT_MODE (all->wide_trunc, mode);
 163   PUT_MODE (all->shift, mode);
 164   PUT_MODE (all->shift_mult, mode);
 165   PUT_MODE (all->shift_add, mode);
 166   PUT_MODE (all->shift_sub0, mode);
 167   PUT_MODE (all->shift_sub1, mode);
 168   PUT_MODE (all->zext, mode);
 169   PUT_MODE (all->trunc, mode);
 170
 171   set_add_cost (speed, mode, set_src_cost (all->plus, speed));
 172   set_neg_cost (speed, mode, set_src_cost (all->neg, speed));
 173   set_mul_cost (speed, mode, set_src_cost (all->mult, speed));
 174   set_sdiv_cost (speed, mode, set_src_cost (all->sdiv, speed));
 175   set_udiv_cost (speed, mode, set_src_cost (all->udiv, speed));
 176
 177   set_sdiv_pow2_cheap (speed, mode, (set_src_cost (all->sdiv_32, speed)
 178                                      <= 2 * add_cost (speed, mode)));
 179   set_smod_pow2_cheap (speed, mode, (set_src_cost (all->smod_32, speed)
 180                                      <= 4 * add_cost (speed, mode)));
 181
 182   set_shift_cost (speed, mode, 0, 0);
 183   {
 184     int cost = add_cost (speed, mode);
 185     set_shiftadd_cost (speed, mode, 0, cost);
 186     set_shiftsub0_cost (speed, mode, 0, cost);
 187     set_shiftsub1_cost (speed, mode, 0, cost);
 188   }
 189
 190   n = MIN (MAX_BITS_PER_WORD, mode_bitsize);
 191   for (m = 1; m < n; m++)
 192     {
 193       XEXP (all->shift, 1) = all->cint[m];
 194       XEXP (all->shift_mult, 1) = all->pow2[m];
 195
 196       set_shift_cost (speed, mode, m, set_src_cost (all->shift, speed));
 197       set_shiftadd_cost (speed, mode, m, set_src_cost (all->shift_add, speed));
 198       set_shiftsub0_cost (speed, mode, m, set_src_cost (all->shift_sub0, speed));
 199       set_shiftsub1_cost (speed, mode, m, set_src_cost (all->shift_sub1, speed));
 200     }
 201
 202   if (SCALAR_INT_MODE_P (mode))
 203     {
 204       for (mode_from = MIN_MODE_INT; mode_from <= MAX_MODE_INT;
 205            mode_from = (machine_mode)(mode_from + 1))
 206         init_expmed_one_conv (all, mode, mode_from, speed);
 207     }
 208   if (GET_MODE_CLASS (mode) == MODE_INT)
 209     {
 210       machine_mode  wider_mode = GET_MODE_WIDER_MODE (mode);
 211       if (wider_mode != VOIDmode)
 212         {
 213           PUT_MODE (all->zext, wider_mode);
 214           PUT_MODE (all->wide_mult, wider_mode);
 215           PUT_MODE (all->wide_lshr, wider_mode);
 216           XEXP (all->wide_lshr, 1) = GEN_INT (mode_bitsize);
 217
 218           set_mul_widen_cost (speed, wider_mode,
 219                               set_src_cost (all->wide_mult, speed));
 220           set_mul_highpart_cost (speed, mode,
 221                                  set_src_cost (all->wide_trunc, speed));
 222         }
 223     }
 224 }
 225
 226 void
 227 init_expmed (void)
 228 {
 229   struct init_expmed_rtl all;
 230   machine_mode mode = QImode;
 231   int m, speed;
 232
 233   memset (&all, 0, sizeof all);
 234   for (m = 1; m < MAX_BITS_PER_WORD; m++)
 235     {
 236       all.pow2[m] = GEN_INT ((HOST_WIDE_INT) 1 << m);
 237       all.cint[m] = GEN_INT (m);
 238     }
 239
 240   /* Avoid using hard regs in ways which may be unsupported.  */
 241   all.reg = gen_rtx_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
 242   all.plus = gen_rtx_PLUS (mode, all.reg, all.reg);
 243   all.neg = gen_rtx_NEG (mode, all.reg);
 244   all.mult = gen_rtx_MULT (mode, all.reg, all.reg);
 245   all.sdiv = gen_rtx_DIV (mode, all.reg, all.reg);
 246   all.udiv = gen_rtx_UDIV (mode, all.reg, all.reg);
 247   all.sdiv_32 = gen_rtx_DIV (mode, all.reg, all.pow2[5]);
 248   all.smod_32 = gen_rtx_MOD (mode, all.reg, all.pow2[5]);
 249   all.zext = gen_rtx_ZERO_EXTEND (mode, all.reg);
 250   all.wide_mult = gen_rtx_MULT (mode, all.zext, all.zext);
 251   all.wide_lshr = gen_rtx_LSHIFTRT (mode, all.wide_mult, all.reg);
 252   all.wide_trunc = gen_rtx_TRUNCATE (mode, all.wide_lshr);
 253   all.shift = gen_rtx_ASHIFT (mode, all.reg, all.reg);
 254   all.shift_mult = gen_rtx_MULT (mode, all.reg, all.reg);
 255   all.shift_add = gen_rtx_PLUS (mode, all.shift_mult, all.reg);
 256   all.shift_sub0 = gen_rtx_MINUS (mode, all.shift_mult, all.reg);
 257   all.shift_sub1 = gen_rtx_MINUS (mode, all.reg, all.shift_mult);
 258   all.trunc = gen_rtx_TRUNCATE (mode, all.reg);
 259
 260   for (speed = 0; speed < 2; speed++)
 261     {
 262       crtl->maybe_hot_insn_p = speed;
 263       set_zero_cost (speed, set_src_cost (const0_rtx, speed));
 264
 265       for (mode = MIN_MODE_INT; mode <= MAX_MODE_INT;
 266            mode = (machine_mode)(mode + 1))
 267         init_expmed_one_mode (&all, mode, speed);
 268
 269       if (MIN_MODE_PARTIAL_INT != VOIDmode)
 270         for (mode = MIN_MODE_PARTIAL_INT; mode <= MAX_MODE_PARTIAL_INT;
 271              mode = (machine_mode)(mode + 1))
 272           init_expmed_one_mode (&all, mode, speed);
 273
 274       if (MIN_MODE_VECTOR_INT != VOIDmode)
 275         for (mode = MIN_MODE_VECTOR_INT; mode <= MAX_MODE_VECTOR_INT;
 276              mode = (machine_mode)(mode + 1))
 277           init_expmed_one_mode (&all, mode, speed);
 278     }
 279
 280   if (alg_hash_used_p ())
 281     {
 282       struct alg_hash_entry *p = alg_hash_entry_ptr (0);
 283       memset (p, 0, sizeof (*p) * NUM_ALG_HASH_ENTRIES);
 284     }
 285   else
 286     set_alg_hash_used_p (true);
 287   default_rtl_profile ();
 288
 289   ggc_free (all.trunc);
 290   ggc_free (all.shift_sub1);
 291   ggc_free (all.shift_sub0);
 292   ggc_free (all.shift_add);
 293   ggc_free (all.shift_mult);
 294   ggc_free (all.shift);
 295   ggc_free (all.wide_trunc);
 296   ggc_free (all.wide_lshr);
 297   ggc_free (all.wide_mult);
 298   ggc_free (all.zext);
 299   ggc_free (all.smod_32);
 300   ggc_free (all.sdiv_32);
 301   ggc_free (all.udiv);
 302   ggc_free (all.sdiv);
 303   ggc_free (all.mult);
 304   ggc_free (all.neg);
 305   ggc_free (all.plus);
 306   ggc_free (all.reg);
 307 }
 308
 309 /* Return an rtx representing minus the value of X.
 310    MODE is the intended mode of the result,
 311    useful if X is a CONST_INT.  */
 312
 313 rtx
 314 negate_rtx (machine_mode mode, rtx x)
 315 {
 316   rtx result = simplify_unary_operation (NEG, mode, x, mode);
 317
 318   if (result == 0)
 319     result = expand_unop (mode, neg_optab, x, NULL_RTX, 0);
 320
 321   return result;
 322 }
 323
 324 /* Adjust bitfield memory MEM so that it points to the first unit of mode
 325    MODE that contains a bitfield of size BITSIZE at bit position BITNUM.
 326    If MODE is BLKmode, return a reference to every byte in the bitfield.
 327    Set *NEW_BITNUM to the bit position of the field within the new memory.  */
 328
 329 static rtx
 330 narrow_bit_field_mem (rtx mem, machine_mode mode,
 331                       unsigned HOST_WIDE_INT bitsize,
 332                       unsigned HOST_WIDE_INT bitnum,
 333                       unsigned HOST_WIDE_INT *new_bitnum)
 334 {
 335   if (mode == BLKmode)
 336     {
 337       *new_bitnum = bitnum % BITS_PER_UNIT;
 338       HOST_WIDE_INT offset = bitnum / BITS_PER_UNIT;
 339       HOST_WIDE_INT size = ((*new_bitnum + bitsize + BITS_PER_UNIT - 1)
 340                             / BITS_PER_UNIT);
 341       return adjust_bitfield_address_size (mem, mode, offset, size);
 342     }
 343   else
 344     {
 345       unsigned int unit = GET_MODE_BITSIZE (mode);
 346       *new_bitnum = bitnum % unit;
 347       HOST_WIDE_INT offset = (bitnum - *new_bitnum) / BITS_PER_UNIT;
 348       return adjust_bitfield_address (mem, mode, offset);
 349     }
 350 }
 351
 352 /* The caller wants to perform insertion or extraction PATTERN on a
 353    bitfield of size BITSIZE at BITNUM bits into memory operand OP0.
 354    BITREGION_START and BITREGION_END are as for store_bit_field
 355    and FIELDMODE is the natural mode of the field.
 356
 357    Search for a mode that is compatible with the memory access
 358    restrictions and (where applicable) with a register insertion or
 359    extraction.  Return the new memory on success, storing the adjusted
 360    bit position in *NEW_BITNUM.  Return null otherwise.  */
 361
 362 static rtx
 363 adjust_bit_field_mem_for_reg (enum extraction_pattern pattern,
 364                               rtx op0, HOST_WIDE_INT bitsize,
 365                               HOST_WIDE_INT bitnum,
 366                               unsigned HOST_WIDE_INT bitregion_start,
 367                               unsigned HOST_WIDE_INT bitregion_end,
 368                               machine_mode fieldmode,
 369                               unsigned HOST_WIDE_INT *new_bitnum)
 370 {
 371   bit_field_mode_iterator iter (bitsize, bitnum, bitregion_start,
 372                                 bitregion_end, MEM_ALIGN (op0),
 373                                 MEM_VOLATILE_P (op0));
 374   machine_mode best_mode;
 375   if (iter.next_mode (&best_mode))
 376     {
 377       /* We can use a memory in BEST_MODE.  See whether this is true for
 378          any wider modes.  All other things being equal, we prefer to
 379          use the widest mode possible because it tends to expose more
 380          CSE opportunities.  */
 381       if (!iter.prefer_smaller_modes ())
 382         {
 383           /* Limit the search to the mode required by the corresponding
 384              register insertion or extraction instruction, if any.  */
 385           machine_mode limit_mode = word_mode;
 386           extraction_insn insn;
 387           if (get_best_reg_extraction_insn (&insn, pattern,
 388                                             GET_MODE_BITSIZE (best_mode),
 389                                             fieldmode))
 390             limit_mode = insn.field_mode;
 391
 392           machine_mode wider_mode;
 393           while (iter.next_mode (&wider_mode)
 394                  && GET_MODE_SIZE (wider_mode) <= GET_MODE_SIZE (limit_mode))
 395             best_mode = wider_mode;
 396         }
 397       return narrow_bit_field_mem (op0, best_mode, bitsize, bitnum,
 398                                    new_bitnum);
 399     }
 400   return NULL_RTX;
 401 }
 402
 403 /* Return true if a bitfield of size BITSIZE at bit number BITNUM within
 404    a structure of mode STRUCT_MODE represents a lowpart subreg.   The subreg
 405    offset is then BITNUM / BITS_PER_UNIT.  */
 406
 407 static bool
 408 lowpart_bit_field_p (unsigned HOST_WIDE_INT bitnum,
 409                      unsigned HOST_WIDE_INT bitsize,
 410                      machine_mode struct_mode)
 411 {
 412   if (BYTES_BIG_ENDIAN)
 413     return (bitnum % BITS_PER_UNIT == 0
 414             && (bitnum + bitsize == GET_MODE_BITSIZE (struct_mode)
 415                 || (bitnum + bitsize) % BITS_PER_WORD == 0));
 416   else
 417     return bitnum % BITS_PER_WORD == 0;
 418 }
 419
 420 /* Return true if -fstrict-volatile-bitfields applies to an access of OP0
 421    containing BITSIZE bits starting at BITNUM, with field mode FIELDMODE.
 422    Return false if the access would touch memory outside the range
 423    BITREGION_START to BITREGION_END for conformance to the C++ memory
 424    model.  */
 425
 426 static bool
 427 strict_volatile_bitfield_p (rtx op0, unsigned HOST_WIDE_INT bitsize,
 428                             unsigned HOST_WIDE_INT bitnum,
 429                             machine_mode fieldmode,
 430                             unsigned HOST_WIDE_INT bitregion_start,
 431                             unsigned HOST_WIDE_INT bitregion_end)
 432 {
 433   unsigned HOST_WIDE_INT modesize = GET_MODE_BITSIZE (fieldmode);
 434
 435   /* -fstrict-volatile-bitfields must be enabled and we must have a
 436      volatile MEM.  */
 437   if (!MEM_P (op0)
 438       || !MEM_VOLATILE_P (op0)
 439       || flag_strict_volatile_bitfields <= 0)
 440     return false;
 441
 442   /* Non-integral modes likely only happen with packed structures.
 443      Punt.  */
 444   if (!SCALAR_INT_MODE_P (fieldmode))
 445     return false;
 446
 447   /* The bit size must not be larger than the field mode, and
 448      the field mode must not be larger than a word.  */
 449   if (bitsize > modesize || modesize > BITS_PER_WORD)
 450     return false;
 451
 452   /* Check for cases of unaligned fields that must be split.  */
 453   if (bitnum % BITS_PER_UNIT + bitsize > modesize
 454       || (STRICT_ALIGNMENT
 455           && bitnum % GET_MODE_ALIGNMENT (fieldmode) + bitsize > modesize))
 456     return false;
 457
 458   /* Check for cases where the C++ memory model applies.  */
 459   if (bitregion_end != 0
 460       && (bitnum - bitnum % modesize < bitregion_start
 461           || bitnum - bitnum % modesize + modesize - 1 > bitregion_end))
 462     return false;
 463
 464   return true;
 465 }
 466
 467 /* Return true if OP is a memory and if a bitfield of size BITSIZE at
 468    bit number BITNUM can be treated as a simple value of mode MODE.  */
 469
 470 static bool
 471 simple_mem_bitfield_p (rtx op0, unsigned HOST_WIDE_INT bitsize,
 472                        unsigned HOST_WIDE_INT bitnum, machine_mode mode)
 473 {
 474   return (MEM_P (op0)
 475           && bitnum % BITS_PER_UNIT == 0
 476           && bitsize == GET_MODE_BITSIZE (mode)
 477           && (!SLOW_UNALIGNED_ACCESS (mode, MEM_ALIGN (op0))
 478               || (bitnum % GET_MODE_ALIGNMENT (mode) == 0
 479                   && MEM_ALIGN (op0) >= GET_MODE_ALIGNMENT (mode))));
 480 }
 481 \f
 482 /* Try to use instruction INSV to store VALUE into a field of OP0.
 483    BITSIZE and BITNUM are as for store_bit_field.  */
 484
 485 static bool
 486 store_bit_field_using_insv (const extraction_insn *insv, rtx op0,
 487                             unsigned HOST_WIDE_INT bitsize,
 488                             unsigned HOST_WIDE_INT bitnum,
 489                             rtx value)
 490 {
 491   struct expand_operand ops[4];
 492   rtx value1;
 493   rtx xop0 = op0;
 494   rtx_insn *last = get_last_insn ();
 495   bool copy_back = false;
 496
 497   machine_mode op_mode = insv->field_mode;
 498   unsigned int unit = GET_MODE_BITSIZE (op_mode);
 499   if (bitsize == 0 || bitsize > unit)
 500     return false;
 501
 502   if (MEM_P (xop0))
 503     /* Get a reference to the first byte of the field.  */
 504     xop0 = narrow_bit_field_mem (xop0, insv->struct_mode, bitsize, bitnum,
 505                                  &bitnum);
 506   else
 507     {
 508       /* Convert from counting within OP0 to counting in OP_MODE.  */
 509       if (BYTES_BIG_ENDIAN)
 510         bitnum += unit - GET_MODE_BITSIZE (GET_MODE (op0));
 511
 512       /* If xop0 is a register, we need it in OP_MODE
 513          to make it acceptable to the format of insv.  */
 514       if (GET_CODE (xop0) == SUBREG)
 515         /* We can't just change the mode, because this might clobber op0,
 516            and we will need the original value of op0 if insv fails.  */
 517         xop0 = gen_rtx_SUBREG (op_mode, SUBREG_REG (xop0), SUBREG_BYTE (xop0));
 518       if (REG_P (xop0) && GET_MODE (xop0) != op_mode)
 519         xop0 = gen_lowpart_SUBREG (op_mode, xop0);
 520     }
 521
 522   /* If the destination is a paradoxical subreg such that we need a
 523      truncate to the inner mode, perform the insertion on a temporary and
 524      truncate the result to the original destination.  Note that we can't
 525      just truncate the paradoxical subreg as (truncate:N (subreg:W (reg:N
 526      X) 0)) is (reg:N X).  */
 527   if (GET_CODE (xop0) == SUBREG
 528       && REG_P (SUBREG_REG (xop0))
 529       && !TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (SUBREG_REG (xop0)),
 530                                          op_mode))
 531     {
 532       rtx tem = gen_reg_rtx (op_mode);
 533       emit_move_insn (tem, xop0);
 534       xop0 = tem;
 535       copy_back = true;
 536     }
 537
 538   /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
 539      "backwards" from the size of the unit we are inserting into.
 540      Otherwise, we count bits from the most significant on a
 541      BYTES/BITS_BIG_ENDIAN machine.  */
 542
 543   if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
 544     bitnum = unit - bitsize - bitnum;
 545
 546   /* Convert VALUE to op_mode (which insv insn wants) in VALUE1.  */
 547   value1 = value;
 548   if (GET_MODE (value) != op_mode)
 549     {
 550       if (GET_MODE_BITSIZE (GET_MODE (value)) >= bitsize)
 551         {
 552           /* Optimization: Don't bother really extending VALUE
 553              if it has all the bits we will actually use.  However,
 554              if we must narrow it, be sure we do it correctly.  */
 555
 556           if (GET_MODE_SIZE (GET_MODE (value)) < GET_MODE_SIZE (op_mode))
 557             {
 558               rtx tmp;
 559
 560               tmp = simplify_subreg (op_mode, value1, GET_MODE (value), 0);
 561               if (! tmp)
 562                 tmp = simplify_gen_subreg (op_mode,
 563                                            force_reg (GET_MODE (value),
 564                                                       value1),
 565                                            GET_MODE (value), 0);
 566               value1 = tmp;
 567             }
 568           else
 569             value1 = gen_lowpart (op_mode, value1);
 570         }
 571       else if (CONST_INT_P (value))
 572         value1 = gen_int_mode (INTVAL (value), op_mode);
 573       else
 574         /* Parse phase is supposed to make VALUE's data type
 575            match that of the component reference, which is a type
 576            at least as wide as the field; so VALUE should have
 577            a mode that corresponds to that type.  */
 578         gcc_assert (CONSTANT_P (value));
 579     }
 580
 581   create_fixed_operand (&ops[0], xop0);
 582   create_integer_operand (&ops[1], bitsize);
 583   create_integer_operand (&ops[2], bitnum);
 584   create_input_operand (&ops[3], value1, op_mode);
 585   if (maybe_expand_insn (insv->icode, 4, ops))
 586     {
 587       if (copy_back)
 588         convert_move (op0, xop0, true);
 589       return true;
 590     }
 591   delete_insns_since (last);
 592   return false;
 593 }
 594
 595 /* A subroutine of store_bit_field, with the same arguments.  Return true
 596    if the operation could be implemented.
 597
 598    If FALLBACK_P is true, fall back to store_fixed_bit_field if we have
 599    no other way of implementing the operation.  If FALLBACK_P is false,
 600    return false instead.  */
 601
 602 static bool
 603 store_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
 604                    unsigned HOST_WIDE_INT bitnum,
 605                    unsigned HOST_WIDE_INT bitregion_start,
 606                    unsigned HOST_WIDE_INT bitregion_end,
 607                    machine_mode fieldmode,
 608                    rtx value, bool fallback_p)
 609 {
 610   rtx op0 = str_rtx;
 611   rtx orig_value;
 612
 613   while (GET_CODE (op0) == SUBREG)
 614     {
 615       /* The following line once was done only if WORDS_BIG_ENDIAN,
 616          but I think that is a mistake.  WORDS_BIG_ENDIAN is
 617          meaningful at a much higher level; when structures are copied
 618          between memory and regs, the higher-numbered regs
 619          always get higher addresses.  */
 620       int inner_mode_size = GET_MODE_SIZE (GET_MODE (SUBREG_REG (op0)));
 621       int outer_mode_size = GET_MODE_SIZE (GET_MODE (op0));
 622       int byte_offset = 0;
 623
 624       /* Paradoxical subregs need special handling on big endian machines.  */
 625       if (SUBREG_BYTE (op0) == 0 && inner_mode_size < outer_mode_size)
 626         {
 627           int difference = inner_mode_size - outer_mode_size;
 628
 629           if (WORDS_BIG_ENDIAN)
 630             byte_offset += (difference / UNITS_PER_WORD) * UNITS_PER_WORD;
 631           if (BYTES_BIG_ENDIAN)
 632             byte_offset += difference % UNITS_PER_WORD;
 633         }
 634       else
 635         byte_offset = SUBREG_BYTE (op0);
 636
 637       bitnum += byte_offset * BITS_PER_UNIT;
 638       op0 = SUBREG_REG (op0);
 639     }
 640
 641   /* No action is needed if the target is a register and if the field
 642      lies completely outside that register.  This can occur if the source
 643      code contains an out-of-bounds access to a small array.  */
 644   if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
 645     return true;
 646
 647   /* Use vec_set patterns for inserting parts of vectors whenever
 648      available.  */
 649   if (VECTOR_MODE_P (GET_MODE (op0))
 650       && !MEM_P (op0)
 651       && optab_handler (vec_set_optab, GET_MODE (op0)) != CODE_FOR_nothing
 652       && fieldmode == GET_MODE_INNER (GET_MODE (op0))
 653       && bitsize == GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
 654       && !(bitnum % GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
 655     {
 656       struct expand_operand ops[3];
 657       machine_mode outermode = GET_MODE (op0);
 658       machine_mode innermode = GET_MODE_INNER (outermode);
 659       enum insn_code icode = optab_handler (vec_set_optab, outermode);
 660       int pos = bitnum / GET_MODE_BITSIZE (innermode);
 661
 662       create_fixed_operand (&ops[0], op0);
 663       create_input_operand (&ops[1], value, innermode);
 664       create_integer_operand (&ops[2], pos);
 665       if (maybe_expand_insn (icode, 3, ops))
 666         return true;
 667     }
 668
 669   /* If the target is a register, overwriting the entire object, or storing
 670      a full-word or multi-word field can be done with just a SUBREG.  */
 671   if (!MEM_P (op0)
 672       && bitsize == GET_MODE_BITSIZE (fieldmode)
 673       && ((bitsize == GET_MODE_BITSIZE (GET_MODE (op0)) && bitnum == 0)
 674           || (bitsize % BITS_PER_WORD == 0 && bitnum % BITS_PER_WORD == 0)))
 675     {
 676       /* Use the subreg machinery either to narrow OP0 to the required
 677          words or to cope with mode punning between equal-sized modes.
 678          In the latter case, use subreg on the rhs side, not lhs.  */
 679       rtx sub;
 680
 681       if (bitsize == GET_MODE_BITSIZE (GET_MODE (op0)))
 682         {
 683           sub = simplify_gen_subreg (GET_MODE (op0), value, fieldmode, 0);
 684           if (sub)
 685             {
 686               emit_move_insn (op0, sub);
 687               return true;
 688             }
 689         }
 690       else
 691         {
 692           sub = simplify_gen_subreg (fieldmode, op0, GET_MODE (op0),
 693                                      bitnum / BITS_PER_UNIT);
 694           if (sub)
 695             {
 696               emit_move_insn (sub, value);
 697               return true;
 698             }
 699         }
 700     }
 701
 702   /* If the target is memory, storing any naturally aligned field can be
 703      done with a simple store.  For targets that support fast unaligned
 704      memory, any naturally sized, unit aligned field can be done directly.  */
 705   if (simple_mem_bitfield_p (op0, bitsize, bitnum, fieldmode))
 706     {
 707       op0 = adjust_bitfield_address (op0, fieldmode, bitnum / BITS_PER_UNIT);
 708       emit_move_insn (op0, value);
 709       return true;
 710     }
 711
 712   /* Make sure we are playing with integral modes.  Pun with subregs
 713      if we aren't.  This must come after the entire register case above,
 714      since that case is valid for any mode.  The following cases are only
 715      valid for integral modes.  */
 716   {
 717     machine_mode imode = int_mode_for_mode (GET_MODE (op0));
 718     if (imode != GET_MODE (op0))
 719       {
 720         if (MEM_P (op0))
 721           op0 = adjust_bitfield_address_size (op0, imode, 0, MEM_SIZE (op0));
 722         else
 723           {
 724             gcc_assert (imode != BLKmode);
 725             op0 = gen_lowpart (imode, op0);
 726           }
 727       }
 728   }
 729
 730   /* Storing an lsb-aligned field in a register
 731      can be done with a movstrict instruction.  */
 732
 733   if (!MEM_P (op0)
 734       && lowpart_bit_field_p (bitnum, bitsize, GET_MODE (op0))
 735       && bitsize == GET_MODE_BITSIZE (fieldmode)
 736       && optab_handler (movstrict_optab, fieldmode) != CODE_FOR_nothing)
 737     {
 738       struct expand_operand ops[2];
 739       enum insn_code icode = optab_handler (movstrict_optab, fieldmode);
 740       rtx arg0 = op0;
 741       unsigned HOST_WIDE_INT subreg_off;
 742
 743       if (GET_CODE (arg0) == SUBREG)
 744         {
 745           /* Else we've got some float mode source being extracted into
 746              a different float mode destination -- this combination of
 747              subregs results in Severe Tire Damage.  */
 748           gcc_assert (GET_MODE (SUBREG_REG (arg0)) == fieldmode
 749                       || GET_MODE_CLASS (fieldmode) == MODE_INT
 750                       || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT);
 751           arg0 = SUBREG_REG (arg0);
 752         }
 753
 754       subreg_off = bitnum / BITS_PER_UNIT;
 755       if (validate_subreg (fieldmode, GET_MODE (arg0), arg0, subreg_off))
 756         {
 757           arg0 = gen_rtx_SUBREG (fieldmode, arg0, subreg_off);
 758
 759           create_fixed_operand (&ops[0], arg0);
 760           /* Shrink the source operand to FIELDMODE.  */
 761           create_convert_operand_to (&ops[1], value, fieldmode, false);
 762           if (maybe_expand_insn (icode, 2, ops))
 763             return true;
 764         }
 765     }
 766
 767   /* Handle fields bigger than a word.  */
 768
 769   if (bitsize > BITS_PER_WORD)
 770     {
 771       /* Here we transfer the words of the field
 772          in the order least significant first.
 773          This is because the most significant word is the one which may
 774          be less than full.
 775          However, only do that if the value is not BLKmode.  */
 776
 777       unsigned int backwards = WORDS_BIG_ENDIAN && fieldmode != BLKmode;
 778       unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
 779       unsigned int i;
 780       rtx_insn *last;
 781
 782       /* This is the mode we must force value to, so that there will be enough
 783          subwords to extract.  Note that fieldmode will often (always?) be
 784          VOIDmode, because that is what store_field uses to indicate that this
 785          is a bit field, but passing VOIDmode to operand_subword_force
 786          is not allowed.  */
 787       fieldmode = GET_MODE (value);
 788       if (fieldmode == VOIDmode)
 789         fieldmode = smallest_mode_for_size (nwords * BITS_PER_WORD, MODE_INT);
 790
 791       last = get_last_insn ();
 792       for (i = 0; i < nwords; i++)
 793         {
 794           /* If I is 0, use the low-order word in both field and target;
 795              if I is 1, use the next to lowest word; and so on.  */
 796           unsigned int wordnum = (backwards
 797                                   ? GET_MODE_SIZE (fieldmode) / UNITS_PER_WORD
 798                                   - i - 1
 799                                   : i);
 800           unsigned int bit_offset = (backwards
 801                                      ? MAX ((int) bitsize - ((int) i + 1)
 802                                             * BITS_PER_WORD,
 803                                             0)
 804                                      : (int) i * BITS_PER_WORD);
 805           rtx value_word = operand_subword_force (value, wordnum, fieldmode);
 806           unsigned HOST_WIDE_INT new_bitsize =
 807             MIN (BITS_PER_WORD, bitsize - i * BITS_PER_WORD);
 808
 809           /* If the remaining chunk doesn't have full wordsize we have
 810              to make sure that for big endian machines the higher order
 811              bits are used.  */
 812           if (new_bitsize < BITS_PER_WORD && BYTES_BIG_ENDIAN && !backwards)
 813             value_word = simplify_expand_binop (word_mode, lshr_optab,
 814                                                 value_word,
 815                                                 GEN_INT (BITS_PER_WORD
 816                                                          - new_bitsize),
 817                                                 NULL_RTX, true,
 818                                                 OPTAB_LIB_WIDEN);
 819
 820           if (!store_bit_field_1 (op0, new_bitsize,
 821                                   bitnum + bit_offset,
 822                                   bitregion_start, bitregion_end,
 823                                   word_mode,
 824                                   value_word, fallback_p))
 825             {
 826               delete_insns_since (last);
 827               return false;
 828             }
 829         }
 830       return true;
 831     }
 832
 833   /* If VALUE has a floating-point or complex mode, access it as an
 834      integer of the corresponding size.  This can occur on a machine
 835      with 64 bit registers that uses SFmode for float.  It can also
 836      occur for unaligned float or complex fields.  */
 837   orig_value = value;
 838   if (GET_MODE (value) != VOIDmode
 839       && GET_MODE_CLASS (GET_MODE (value)) != MODE_INT
 840       && GET_MODE_CLASS (GET_MODE (value)) != MODE_PARTIAL_INT)
 841     {
 842       value = gen_reg_rtx (int_mode_for_mode (GET_MODE (value)));
 843       emit_move_insn (gen_lowpart (GET_MODE (orig_value), value), orig_value);
 844     }
 845
 846   /* If OP0 is a multi-word register, narrow it to the affected word.
 847      If the region spans two words, defer to store_split_bit_field.  */
 848   if (!MEM_P (op0) && GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
 849     {
 850       op0 = simplify_gen_subreg (word_mode, op0, GET_MODE (op0),
 851                                  bitnum / BITS_PER_WORD * UNITS_PER_WORD);
 852       gcc_assert (op0);
 853       bitnum %= BITS_PER_WORD;
 854       if (bitnum + bitsize > BITS_PER_WORD)
 855         {
 856           if (!fallback_p)
 857             return false;
 858
 859           store_split_bit_field (op0, bitsize, bitnum, bitregion_start,
 860                                  bitregion_end, value);
 861           return true;
 862         }
 863     }
 864
 865   /* From here on we can assume that the field to be stored in fits
 866      within a word.  If the destination is a register, it too fits
 867      in a word.  */
 868
 869   extraction_insn insv;
 870   if (!MEM_P (op0)
 871       && get_best_reg_extraction_insn (&insv, EP_insv,
 872                                        GET_MODE_BITSIZE (GET_MODE (op0)),
 873                                        fieldmode)
 874       && store_bit_field_using_insv (&insv, op0, bitsize, bitnum, value))
 875     return true;
 876
 877   /* If OP0 is a memory, try copying it to a register and seeing if a
 878      cheap register alternative is available.  */
 879   if (MEM_P (op0))
 880     {
 881       if (get_best_mem_extraction_insn (&insv, EP_insv, bitsize, bitnum,
 882                                         fieldmode)
 883           && store_bit_field_using_insv (&insv, op0, bitsize, bitnum, value))
 884         return true;
 885
 886       rtx_insn *last = get_last_insn ();
 887
 888       /* Try loading part of OP0 into a register, inserting the bitfield
 889          into that, and then copying the result back to OP0.  */
 890       unsigned HOST_WIDE_INT bitpos;
 891       rtx xop0 = adjust_bit_field_mem_for_reg (EP_insv, op0, bitsize, bitnum,
 892                                                bitregion_start, bitregion_end,
 893                                                fieldmode, &bitpos);
 894       if (xop0)
 895         {
 896           rtx tempreg = copy_to_reg (xop0);
 897           if (store_bit_field_1 (tempreg, bitsize, bitpos,
 898                                  bitregion_start, bitregion_end,
 899                                  fieldmode, orig_value, false))
 900             {
 901               emit_move_insn (xop0, tempreg);
 902               return true;
 903             }
 904           delete_insns_since (last);
 905         }
 906     }
 907
 908   if (!fallback_p)
 909     return false;
 910
 911   store_fixed_bit_field (op0, bitsize, bitnum, bitregion_start,
 912                          bitregion_end, value);
 913   return true;
 914 }
 915
 916 /* Generate code to store value from rtx VALUE
 917    into a bit-field within structure STR_RTX
 918    containing BITSIZE bits starting at bit BITNUM.
 919
 920    BITREGION_START is bitpos of the first bitfield in this region.
 921    BITREGION_END is the bitpos of the ending bitfield in this region.
 922    These two fields are 0, if the C++ memory model does not apply,
 923    or we are not interested in keeping track of bitfield regions.
 924
 925    FIELDMODE is the machine-mode of the FIELD_DECL node for this field.  */
 926
 927 void
 928 store_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
 929                  unsigned HOST_WIDE_INT bitnum,
 930                  unsigned HOST_WIDE_INT bitregion_start,
 931                  unsigned HOST_WIDE_INT bitregion_end,
 932                  machine_mode fieldmode,
 933                  rtx value)
 934 {
 935   /* Handle -fstrict-volatile-bitfields in the cases where it applies.  */
 936   if (strict_volatile_bitfield_p (str_rtx, bitsize, bitnum, fieldmode,
 937                                   bitregion_start, bitregion_end))
 938     {
 939       /* Storing any naturally aligned field can be done with a simple
 940          store.  For targets that support fast unaligned memory, any
 941          naturally sized, unit aligned field can be done directly.  */
 942       if (simple_mem_bitfield_p (str_rtx, bitsize, bitnum, fieldmode))
 943         {
 944           str_rtx = adjust_bitfield_address (str_rtx, fieldmode,
 945                                              bitnum / BITS_PER_UNIT);
 946           emit_move_insn (str_rtx, value);
 947         }
 948       else
 949         {
 950           str_rtx = narrow_bit_field_mem (str_rtx, fieldmode, bitsize, bitnum,
 951                                           &bitnum);
 952           /* Explicitly override the C/C++ memory model; ignore the
 953              bit range so that we can do the access in the mode mandated
 954              by -fstrict-volatile-bitfields instead.  */
 955           store_fixed_bit_field_1 (str_rtx, bitsize, bitnum, value);
 956         }
 957
 958       return;
 959     }
 960
 961   /* Under the C++0x memory model, we must not touch bits outside the
 962      bit region.  Adjust the address to start at the beginning of the
 963      bit region.  */
 964   if (MEM_P (str_rtx) && bitregion_start > 0)
 965     {
 966       machine_mode bestmode;
 967       HOST_WIDE_INT offset, size;
 968
 969       gcc_assert ((bitregion_start % BITS_PER_UNIT) == 0);
 970
 971       offset = bitregion_start / BITS_PER_UNIT;
 972       bitnum -= bitregion_start;
 973       size = (bitnum + bitsize + BITS_PER_UNIT - 1) / BITS_PER_UNIT;
 974       bitregion_end -= bitregion_start;
 975       bitregion_start = 0;
 976       bestmode = get_best_mode (bitsize, bitnum,
 977                                 bitregion_start, bitregion_end,
 978                                 MEM_ALIGN (str_rtx), VOIDmode,
 979                                 MEM_VOLATILE_P (str_rtx));
 980       str_rtx = adjust_bitfield_address_size (str_rtx, bestmode, offset, size);
 981     }
 982
 983   if (!store_bit_field_1 (str_rtx, bitsize, bitnum,
 984                           bitregion_start, bitregion_end,
 985                           fieldmode, value, true))
 986     gcc_unreachable ();
 987 }
 988 \f
 989 /* Use shifts and boolean operations to store VALUE into a bit field of
 990    width BITSIZE in OP0, starting at bit BITNUM.  */
 991
 992 static void
 993 store_fixed_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
 994                        unsigned HOST_WIDE_INT bitnum,
 995                        unsigned HOST_WIDE_INT bitregion_start,
 996                        unsigned HOST_WIDE_INT bitregion_end,
 997                        rtx value)
 998 {
 999   /* There is a case not handled here:
1000      a structure with a known alignment of just a halfword
1001      and a field split across two aligned halfwords within the structure.
1002      Or likewise a structure with a known alignment of just a byte
1003      and a field split across two bytes.
1004      Such cases are not supposed to be able to occur.  */
1005
1006   if (MEM_P (op0))
1007     {
1008       machine_mode mode = GET_MODE (op0);
1009       if (GET_MODE_BITSIZE (mode) == 0
1010           || GET_MODE_BITSIZE (mode) > GET_MODE_BITSIZE (word_mode))
1011         mode = word_mode;
1012       mode = get_best_mode (bitsize, bitnum, bitregion_start, bitregion_end,
1013                             MEM_ALIGN (op0), mode, MEM_VOLATILE_P (op0));
1014
1015       if (mode == VOIDmode)
1016         {
1017           /* The only way this should occur is if the field spans word
1018              boundaries.  */
1019           store_split_bit_field (op0, bitsize, bitnum, bitregion_start,
1020                                  bitregion_end, value);
1021           return;
1022         }
1023
1024       op0 = narrow_bit_field_mem (op0, mode, bitsize, bitnum, &bitnum);
1025     }
1026
1027   store_fixed_bit_field_1 (op0, bitsize, bitnum, value);
1028 }
1029
1030 /* Helper function for store_fixed_bit_field, stores
1031    the bit field always using the MODE of OP0.  */
1032
1033 static void
1034 store_fixed_bit_field_1 (rtx op0, unsigned HOST_WIDE_INT bitsize,
1035                          unsigned HOST_WIDE_INT bitnum,
1036                          rtx value)
1037 {
1038   machine_mode mode;
1039   rtx temp;
1040   int all_zero = 0;
1041   int all_one = 0;
1042
1043   mode = GET_MODE (op0);
1044   gcc_assert (SCALAR_INT_MODE_P (mode));
1045
1046   /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode)
1047      for invalid input, such as f5 from gcc.dg/pr48335-2.c.  */
1048
1049   if (BYTES_BIG_ENDIAN)
1050     /* BITNUM is the distance between our msb
1051        and that of the containing datum.
1052        Convert it to the distance from the lsb.  */
1053     bitnum = GET_MODE_BITSIZE (mode) - bitsize - bitnum;
1054
1055   /* Now BITNUM is always the distance between our lsb
1056      and that of OP0.  */
1057
1058   /* Shift VALUE left by BITNUM bits.  If VALUE is not constant,
1059      we must first convert its mode to MODE.  */
1060
1061   if (CONST_INT_P (value))
1062     {
1063       unsigned HOST_WIDE_INT v = UINTVAL (value);
1064
1065       if (bitsize < HOST_BITS_PER_WIDE_INT)
1066         v &= ((unsigned HOST_WIDE_INT) 1 << bitsize) - 1;
1067
1068       if (v == 0)
1069         all_zero = 1;
1070       else if ((bitsize < HOST_BITS_PER_WIDE_INT
1071                 && v == ((unsigned HOST_WIDE_INT) 1 << bitsize) - 1)
1072                || (bitsize == HOST_BITS_PER_WIDE_INT
1073                    && v == (unsigned HOST_WIDE_INT) -1))
1074         all_one = 1;
1075
1076       value = lshift_value (mode, v, bitnum);
1077     }
1078   else
1079     {
1080       int must_and = (GET_MODE_BITSIZE (GET_MODE (value)) != bitsize
1081                       && bitnum + bitsize != GET_MODE_BITSIZE (mode));
1082
1083       if (GET_MODE (value) != mode)
1084         value = convert_to_mode (mode, value, 1);
1085
1086       if (must_and)
1087         value = expand_binop (mode, and_optab, value,
1088                               mask_rtx (mode, 0, bitsize, 0),
1089                               NULL_RTX, 1, OPTAB_LIB_WIDEN);
1090       if (bitnum > 0)
1091         value = expand_shift (LSHIFT_EXPR, mode, value,
1092                               bitnum, NULL_RTX, 1);
1093     }
1094
1095   /* Now clear the chosen bits in OP0,
1096      except that if VALUE is -1 we need not bother.  */
1097   /* We keep the intermediates in registers to allow CSE to combine
1098      consecutive bitfield assignments.  */
1099
1100   temp = force_reg (mode, op0);
1101
1102   if (! all_one)
1103     {
1104       temp = expand_binop (mode, and_optab, temp,
1105                            mask_rtx (mode, bitnum, bitsize, 1),
1106                            NULL_RTX, 1, OPTAB_LIB_WIDEN);
1107       temp = force_reg (mode, temp);
1108     }
1109
1110   /* Now logical-or VALUE into OP0, unless it is zero.  */
1111
1112   if (! all_zero)
1113     {
1114       temp = expand_binop (mode, ior_optab, temp, value,
1115                            NULL_RTX, 1, OPTAB_LIB_WIDEN);
1116       temp = force_reg (mode, temp);
1117     }
1118
1119   if (op0 != temp)
1120     {
1121       op0 = copy_rtx (op0);
1122       emit_move_insn (op0, temp);
1123     }
1124 }
1125 \f
1126 /* Store a bit field that is split across multiple accessible memory objects.
1127
1128    OP0 is the REG, SUBREG or MEM rtx for the first of the objects.
1129    BITSIZE is the field width; BITPOS the position of its first bit
1130    (within the word).
1131    VALUE is the value to store.
1132
1133    This does not yet handle fields wider than BITS_PER_WORD.  */
1134
1135 static void
1136 store_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1137                        unsigned HOST_WIDE_INT bitpos,
1138                        unsigned HOST_WIDE_INT bitregion_start,
1139                        unsigned HOST_WIDE_INT bitregion_end,
1140                        rtx value)
1141 {
1142   unsigned int unit;
1143   unsigned int bitsdone = 0;
1144
1145   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1146      much at a time.  */
1147   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
1148     unit = BITS_PER_WORD;
1149   else
1150     unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1151
1152   /* If OP0 is a memory with a mode, then UNIT must not be larger than
1153      OP0's mode as well.  Otherwise, store_fixed_bit_field will call us
1154      again, and we will mutually recurse forever.  */
1155   if (MEM_P (op0) && GET_MODE_BITSIZE (GET_MODE (op0)) > 0)
1156     unit = MIN (unit, GET_MODE_BITSIZE (GET_MODE (op0)));
1157
1158   /* If VALUE is a constant other than a CONST_INT, get it into a register in
1159      WORD_MODE.  If we can do this using gen_lowpart_common, do so.  Note
1160      that VALUE might be a floating-point constant.  */
1161   if (CONSTANT_P (value) && !CONST_INT_P (value))
1162     {
1163       rtx word = gen_lowpart_common (word_mode, value);
1164
1165       if (word && (value != word))
1166         value = word;
1167       else
1168         value = gen_lowpart_common (word_mode,
1169                                     force_reg (GET_MODE (value) != VOIDmode
1170                                                ? GET_MODE (value)
1171                                                : word_mode, value));
1172     }
1173
1174   while (bitsdone < bitsize)
1175     {
1176       unsigned HOST_WIDE_INT thissize;
1177       rtx part, word;
1178       unsigned HOST_WIDE_INT thispos;
1179       unsigned HOST_WIDE_INT offset;
1180
1181       offset = (bitpos + bitsdone) / unit;
1182       thispos = (bitpos + bitsdone) % unit;
1183
1184       /* When region of bytes we can touch is restricted, decrease
1185          UNIT close to the end of the region as needed.  If op0 is a REG
1186          or SUBREG of REG, don't do this, as there can't be data races
1187          on a register and we can expand shorter code in some cases.  */
1188       if (bitregion_end
1189           && unit > BITS_PER_UNIT
1190           && bitpos + bitsdone - thispos + unit > bitregion_end + 1
1191           && !REG_P (op0)
1192           && (GET_CODE (op0) != SUBREG || !REG_P (SUBREG_REG (op0))))
1193         {
1194           unit = unit / 2;
1195           continue;
1196         }
1197
1198       /* THISSIZE must not overrun a word boundary.  Otherwise,
1199          store_fixed_bit_field will call us again, and we will mutually
1200          recurse forever.  */
1201       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1202       thissize = MIN (thissize, unit - thispos);
1203
1204       if (BYTES_BIG_ENDIAN)
1205         {
1206           /* Fetch successively less significant portions.  */
1207           if (CONST_INT_P (value))
1208             part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1209                              >> (bitsize - bitsdone - thissize))
1210                             & (((HOST_WIDE_INT) 1 << thissize) - 1));
1211           else
1212             {
1213               int total_bits = GET_MODE_BITSIZE (GET_MODE (value));
1214               /* The args are chosen so that the last part includes the
1215                  lsb.  Give extract_bit_field the value it needs (with
1216                  endianness compensation) to fetch the piece we want.  */
1217               part = extract_fixed_bit_field (word_mode, value, thissize,
1218                                               total_bits - bitsize + bitsdone,
1219                                               NULL_RTX, 1);
1220             }
1221         }
1222       else
1223         {
1224           /* Fetch successively more significant portions.  */
1225           if (CONST_INT_P (value))
1226             part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1227                              >> bitsdone)
1228                             & (((HOST_WIDE_INT) 1 << thissize) - 1));
1229           else
1230             part = extract_fixed_bit_field (word_mode, value, thissize,
1231                                             bitsdone, NULL_RTX, 1);
1232         }
1233
1234       /* If OP0 is a register, then handle OFFSET here.
1235
1236          When handling multiword bitfields, extract_bit_field may pass
1237          down a word_mode SUBREG of a larger REG for a bitfield that actually
1238          crosses a word boundary.  Thus, for a SUBREG, we must find
1239          the current word starting from the base register.  */
1240       if (GET_CODE (op0) == SUBREG)
1241         {
1242           int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD)
1243                             + (offset * unit / BITS_PER_WORD);
1244           machine_mode sub_mode = GET_MODE (SUBREG_REG (op0));
1245           if (sub_mode != BLKmode && GET_MODE_SIZE (sub_mode) < UNITS_PER_WORD)
1246             word = word_offset ? const0_rtx : op0;
1247           else
1248             word = operand_subword_force (SUBREG_REG (op0), word_offset,
1249                                           GET_MODE (SUBREG_REG (op0)));
1250           offset &= BITS_PER_WORD / unit - 1;
1251         }
1252       else if (REG_P (op0))
1253         {
1254           machine_mode op0_mode = GET_MODE (op0);
1255           if (op0_mode != BLKmode && GET_MODE_SIZE (op0_mode) < UNITS_PER_WORD)
1256             word = offset ? const0_rtx : op0;
1257           else
1258             word = operand_subword_force (op0, offset * unit / BITS_PER_WORD,
1259                                           GET_MODE (op0));
1260           offset &= BITS_PER_WORD / unit - 1;
1261         }
1262       else
1263         word = op0;
1264
1265       /* OFFSET is in UNITs, and UNIT is in bits.  If WORD is const0_rtx,
1266          it is just an out-of-bounds access.  Ignore it.  */
1267       if (word != const0_rtx)
1268         store_fixed_bit_field (word, thissize, offset * unit + thispos,
1269                                bitregion_start, bitregion_end, part);
1270       bitsdone += thissize;
1271     }
1272 }
1273 \f
1274 /* A subroutine of extract_bit_field_1 that converts return value X
1275    to either MODE or TMODE.  MODE, TMODE and UNSIGNEDP are arguments
1276    to extract_bit_field.  */
1277
1278 static rtx
1279 convert_extracted_bit_field (rtx x, machine_mode mode,
1280                              machine_mode tmode, bool unsignedp)
1281 {
1282   if (GET_MODE (x) == tmode || GET_MODE (x) == mode)
1283     return x;
1284
1285   /* If the x mode is not a scalar integral, first convert to the
1286      integer mode of that size and then access it as a floating-point
1287      value via a SUBREG.  */
1288   if (!SCALAR_INT_MODE_P (tmode))
1289     {
1290       machine_mode smode;
1291
1292       smode = mode_for_size (GET_MODE_BITSIZE (tmode), MODE_INT, 0);
1293       x = convert_to_mode (smode, x, unsignedp);
1294       x = force_reg (smode, x);
1295       return gen_lowpart (tmode, x);
1296     }
1297
1298   return convert_to_mode (tmode, x, unsignedp);
1299 }
1300
1301 /* Try to use an ext(z)v pattern to extract a field from OP0.
1302    Return the extracted value on success, otherwise return null.
1303    EXT_MODE is the mode of the extraction and the other arguments
1304    are as for extract_bit_field.  */
1305
1306 static rtx
1307 extract_bit_field_using_extv (const extraction_insn *extv, rtx op0,
1308                               unsigned HOST_WIDE_INT bitsize,
1309                               unsigned HOST_WIDE_INT bitnum,
1310                               int unsignedp, rtx target,
1311                               machine_mode mode, machine_mode tmode)
1312 {
1313   struct expand_operand ops[4];
1314   rtx spec_target = target;
1315   rtx spec_target_subreg = 0;
1316   machine_mode ext_mode = extv->field_mode;
1317   unsigned unit = GET_MODE_BITSIZE (ext_mode);
1318
1319   if (bitsize == 0 || unit < bitsize)
1320     return NULL_RTX;
1321
1322   if (MEM_P (op0))
1323     /* Get a reference to the first byte of the field.  */
1324     op0 = narrow_bit_field_mem (op0, extv->struct_mode, bitsize, bitnum,
1325                                 &bitnum);
1326   else
1327     {
1328       /* Convert from counting within OP0 to counting in EXT_MODE.  */
1329       if (BYTES_BIG_ENDIAN)
1330         bitnum += unit - GET_MODE_BITSIZE (GET_MODE (op0));
1331
1332       /* If op0 is a register, we need it in EXT_MODE to make it
1333          acceptable to the format of ext(z)v.  */
1334       if (GET_CODE (op0) == SUBREG && GET_MODE (op0) != ext_mode)
1335         return NULL_RTX;
1336       if (REG_P (op0) && GET_MODE (op0) != ext_mode)
1337         op0 = gen_lowpart_SUBREG (ext_mode, op0);
1338     }
1339
1340   /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
1341      "backwards" from the size of the unit we are extracting from.
1342      Otherwise, we count bits from the most significant on a
1343      BYTES/BITS_BIG_ENDIAN machine.  */
1344
1345   if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1346     bitnum = unit - bitsize - bitnum;
1347
1348   if (target == 0)
1349     target = spec_target = gen_reg_rtx (tmode);
1350
1351   if (GET_MODE (target) != ext_mode)
1352     {
1353       /* Don't use LHS paradoxical subreg if explicit truncation is needed
1354          between the mode of the extraction (word_mode) and the target
1355          mode.  Instead, create a temporary and use convert_move to set
1356          the target.  */
1357       if (REG_P (target)
1358           && TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (target), ext_mode))
1359         {
1360           target = gen_lowpart (ext_mode, target);
1361           if (GET_MODE_PRECISION (ext_mode)
1362               > GET_MODE_PRECISION (GET_MODE (spec_target)))
1363             spec_target_subreg = target;
1364         }
1365       else
1366         target = gen_reg_rtx (ext_mode);
1367     }
1368
1369   create_output_operand (&ops[0], target, ext_mode);
1370   create_fixed_operand (&ops[1], op0);
1371   create_integer_operand (&ops[2], bitsize);
1372   create_integer_operand (&ops[3], bitnum);
1373   if (maybe_expand_insn (extv->icode, 4, ops))
1374     {
1375       target = ops[0].value;
1376       if (target == spec_target)
1377         return target;
1378       if (target == spec_target_subreg)
1379         return spec_target;
1380       return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1381     }
1382   return NULL_RTX;
1383 }
1384
1385 /* A subroutine of extract_bit_field, with the same arguments.
1386    If FALLBACK_P is true, fall back to extract_fixed_bit_field
1387    if we can find no other means of implementing the operation.
1388    if FALLBACK_P is false, return NULL instead.  */
1389
1390 static rtx
1391 extract_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1392                      unsigned HOST_WIDE_INT bitnum, int unsignedp, rtx target,
1393                      machine_mode mode, machine_mode tmode,
1394                      bool fallback_p)
1395 {
1396   rtx op0 = str_rtx;
1397   machine_mode int_mode;
1398   machine_mode mode1;
1399
1400   if (tmode == VOIDmode)
1401     tmode = mode;
1402
1403   while (GET_CODE (op0) == SUBREG)
1404     {
1405       bitnum += SUBREG_BYTE (op0) * BITS_PER_UNIT;
1406       op0 = SUBREG_REG (op0);
1407     }
1408
1409   /* If we have an out-of-bounds access to a register, just return an
1410      uninitialized register of the required mode.  This can occur if the
1411      source code contains an out-of-bounds access to a small array.  */
1412   if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
1413     return gen_reg_rtx (tmode);
1414
1415   if (REG_P (op0)
1416       && mode == GET_MODE (op0)
1417       && bitnum == 0
1418       && bitsize == GET_MODE_BITSIZE (GET_MODE (op0)))
1419     {
1420       /* We're trying to extract a full register from itself.  */
1421       return op0;
1422     }
1423
1424   /* See if we can get a better vector mode before extracting.  */
1425   if (VECTOR_MODE_P (GET_MODE (op0))
1426       && !MEM_P (op0)
1427       && GET_MODE_INNER (GET_MODE (op0)) != tmode)
1428     {
1429       machine_mode new_mode;
1430
1431       if (GET_MODE_CLASS (tmode) == MODE_FLOAT)
1432         new_mode = MIN_MODE_VECTOR_FLOAT;
1433       else if (GET_MODE_CLASS (tmode) == MODE_FRACT)
1434         new_mode = MIN_MODE_VECTOR_FRACT;
1435       else if (GET_MODE_CLASS (tmode) == MODE_UFRACT)
1436         new_mode = MIN_MODE_VECTOR_UFRACT;
1437       else if (GET_MODE_CLASS (tmode) == MODE_ACCUM)
1438         new_mode = MIN_MODE_VECTOR_ACCUM;
1439       else if (GET_MODE_CLASS (tmode) == MODE_UACCUM)
1440         new_mode = MIN_MODE_VECTOR_UACCUM;
1441       else
1442         new_mode = MIN_MODE_VECTOR_INT;
1443
1444       for (; new_mode != VOIDmode ; new_mode = GET_MODE_WIDER_MODE (new_mode))
1445         if (GET_MODE_SIZE (new_mode) == GET_MODE_SIZE (GET_MODE (op0))
1446             && targetm.vector_mode_supported_p (new_mode))
1447           break;
1448       if (new_mode != VOIDmode)
1449         op0 = gen_lowpart (new_mode, op0);
1450     }
1451
1452   /* Use vec_extract patterns for extracting parts of vectors whenever
1453      available.  */
1454   if (VECTOR_MODE_P (GET_MODE (op0))
1455       && !MEM_P (op0)
1456       && optab_handler (vec_extract_optab, GET_MODE (op0)) != CODE_FOR_nothing
1457       && ((bitnum + bitsize - 1) / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
1458           == bitnum / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
1459     {
1460       struct expand_operand ops[3];
1461       machine_mode outermode = GET_MODE (op0);
1462       machine_mode innermode = GET_MODE_INNER (outermode);
1463       enum insn_code icode = optab_handler (vec_extract_optab, outermode);
1464       unsigned HOST_WIDE_INT pos = bitnum / GET_MODE_BITSIZE (innermode);
1465
1466       create_output_operand (&ops[0], target, innermode);
1467       create_input_operand (&ops[1], op0, outermode);
1468       create_integer_operand (&ops[2], pos);
1469       if (maybe_expand_insn (icode, 3, ops))
1470         {
1471           target = ops[0].value;
1472           if (GET_MODE (target) != mode)
1473             return gen_lowpart (tmode, target);
1474           return target;
1475         }
1476     }
1477
1478   /* Make sure we are playing with integral modes.  Pun with subregs
1479      if we aren't.  */
1480   {
1481     machine_mode imode = int_mode_for_mode (GET_MODE (op0));
1482     if (imode != GET_MODE (op0))
1483       {
1484         if (MEM_P (op0))
1485           op0 = adjust_bitfield_address_size (op0, imode, 0, MEM_SIZE (op0));
1486         else if (imode != BLKmode)
1487           {
1488             op0 = gen_lowpart (imode, op0);
1489
1490             /* If we got a SUBREG, force it into a register since we
1491                aren't going to be able to do another SUBREG on it.  */
1492             if (GET_CODE (op0) == SUBREG)
1493               op0 = force_reg (imode, op0);
1494           }
1495         else if (REG_P (op0))
1496           {
1497             rtx reg, subreg;
1498             imode = smallest_mode_for_size (GET_MODE_BITSIZE (GET_MODE (op0)),
1499                                             MODE_INT);
1500             reg = gen_reg_rtx (imode);
1501             subreg = gen_lowpart_SUBREG (GET_MODE (op0), reg);
1502             emit_move_insn (subreg, op0);
1503             op0 = reg;
1504             bitnum += SUBREG_BYTE (subreg) * BITS_PER_UNIT;
1505           }
1506         else
1507           {
1508             HOST_WIDE_INT size = GET_MODE_SIZE (GET_MODE (op0));
1509             rtx mem = assign_stack_temp (GET_MODE (op0), size);
1510             emit_move_insn (mem, op0);
1511             op0 = adjust_bitfield_address_size (mem, BLKmode, 0, size);
1512           }
1513       }
1514   }
1515
1516   /* ??? We currently assume TARGET is at least as big as BITSIZE.
1517      If that's wrong, the solution is to test for it and set TARGET to 0
1518      if needed.  */
1519
1520   /* Get the mode of the field to use for atomic access or subreg
1521      conversion.  */
1522   mode1 = mode;
1523   if (SCALAR_INT_MODE_P (tmode))
1524     {
1525       machine_mode try_mode = mode_for_size (bitsize,
1526                                                   GET_MODE_CLASS (tmode), 0);
1527       if (try_mode != BLKmode)
1528         mode1 = try_mode;
1529     }
1530   gcc_assert (mode1 != BLKmode);
1531
1532   /* Extraction of a full MODE1 value can be done with a subreg as long
1533      as the least significant bit of the value is the least significant
1534      bit of either OP0 or a word of OP0.  */
1535   if (!MEM_P (op0)
1536       && lowpart_bit_field_p (bitnum, bitsize, GET_MODE (op0))
1537       && bitsize == GET_MODE_BITSIZE (mode1)
1538       && TRULY_NOOP_TRUNCATION_MODES_P (mode1, GET_MODE (op0)))
1539     {
1540       rtx sub = simplify_gen_subreg (mode1, op0, GET_MODE (op0),
1541                                      bitnum / BITS_PER_UNIT);
1542       if (sub)
1543         return convert_extracted_bit_field (sub, mode, tmode, unsignedp);
1544     }
1545
1546   /* Extraction of a full MODE1 value can be done with a load as long as
1547      the field is on a byte boundary and is sufficiently aligned.  */
1548   if (simple_mem_bitfield_p (op0, bitsize, bitnum, mode1))
1549     {
1550       op0 = adjust_bitfield_address (op0, mode1, bitnum / BITS_PER_UNIT);
1551       return convert_extracted_bit_field (op0, mode, tmode, unsignedp);
1552     }
1553
1554   /* Handle fields bigger than a word.  */
1555
1556   if (bitsize > BITS_PER_WORD)
1557     {
1558       /* Here we transfer the words of the field
1559          in the order least significant first.
1560          This is because the most significant word is the one which may
1561          be less than full.  */
1562
1563       unsigned int backwards = WORDS_BIG_ENDIAN;
1564       unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
1565       unsigned int i;
1566       rtx_insn *last;
1567
1568       if (target == 0 || !REG_P (target) || !valid_multiword_target_p (target))
1569         target = gen_reg_rtx (mode);
1570
1571       /* Indicate for flow that the entire target reg is being set.  */
1572       emit_clobber (target);
1573
1574       last = get_last_insn ();
1575       for (i = 0; i < nwords; i++)
1576         {
1577           /* If I is 0, use the low-order word in both field and target;
1578              if I is 1, use the next to lowest word; and so on.  */
1579           /* Word number in TARGET to use.  */
1580           unsigned int wordnum
1581             = (backwards
1582                ? GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD - i - 1
1583                : i);
1584           /* Offset from start of field in OP0.  */
1585           unsigned int bit_offset = (backwards
1586                                      ? MAX ((int) bitsize - ((int) i + 1)
1587                                             * BITS_PER_WORD,
1588                                             0)
1589                                      : (int) i * BITS_PER_WORD);
1590           rtx target_part = operand_subword (target, wordnum, 1, VOIDmode);
1591           rtx result_part
1592             = extract_bit_field_1 (op0, MIN (BITS_PER_WORD,
1593                                              bitsize - i * BITS_PER_WORD),
1594                                    bitnum + bit_offset, 1, target_part,
1595                                    mode, word_mode, fallback_p);
1596
1597           gcc_assert (target_part);
1598           if (!result_part)
1599             {
1600               delete_insns_since (last);
1601               return NULL;
1602             }
1603
1604           if (result_part != target_part)
1605             emit_move_insn (target_part, result_part);
1606         }
1607
1608       if (unsignedp)
1609         {
1610           /* Unless we've filled TARGET, the upper regs in a multi-reg value
1611              need to be zero'd out.  */
1612           if (GET_MODE_SIZE (GET_MODE (target)) > nwords * UNITS_PER_WORD)
1613             {
1614               unsigned int i, total_words;
1615
1616               total_words = GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD;
1617               for (i = nwords; i < total_words; i++)
1618                 emit_move_insn
1619                   (operand_subword (target,
1620                                     backwards ? total_words - i - 1 : i,
1621                                     1, VOIDmode),
1622                    const0_rtx);
1623             }
1624           return target;
1625         }
1626
1627       /* Signed bit field: sign-extend with two arithmetic shifts.  */
1628       target = expand_shift (LSHIFT_EXPR, mode, target,
1629                              GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1630       return expand_shift (RSHIFT_EXPR, mode, target,
1631                            GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1632     }
1633
1634   /* If OP0 is a multi-word register, narrow it to the affected word.
1635      If the region spans two words, defer to extract_split_bit_field.  */
1636   if (!MEM_P (op0) && GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
1637     {
1638       op0 = simplify_gen_subreg (word_mode, op0, GET_MODE (op0),
1639                                  bitnum / BITS_PER_WORD * UNITS_PER_WORD);
1640       bitnum %= BITS_PER_WORD;
1641       if (bitnum + bitsize > BITS_PER_WORD)
1642         {
1643           if (!fallback_p)
1644             return NULL_RTX;
1645           target = extract_split_bit_field (op0, bitsize, bitnum, unsignedp);
1646           return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1647         }
1648     }
1649
1650   /* From here on we know the desired field is smaller than a word.
1651      If OP0 is a register, it too fits within a word.  */
1652   enum extraction_pattern pattern = unsignedp ? EP_extzv : EP_extv;
1653   extraction_insn extv;
1654   if (!MEM_P (op0)
1655       /* ??? We could limit the structure size to the part of OP0 that
1656          contains the field, with appropriate checks for endianness
1657          and TRULY_NOOP_TRUNCATION.  */
1658       && get_best_reg_extraction_insn (&extv, pattern,
1659                                        GET_MODE_BITSIZE (GET_MODE (op0)),
1660                                        tmode))
1661     {
1662       rtx result = extract_bit_field_using_extv (&extv, op0, bitsize, bitnum,
1663                                                  unsignedp, target, mode,
1664                                                  tmode);
1665       if (result)
1666         return result;
1667     }
1668
1669   /* If OP0 is a memory, try copying it to a register and seeing if a
1670      cheap register alternative is available.  */
1671   if (MEM_P (op0))
1672     {
1673       if (get_best_mem_extraction_insn (&extv, pattern, bitsize, bitnum,
1674                                         tmode))
1675         {
1676           rtx result = extract_bit_field_using_extv (&extv, op0, bitsize,
1677                                                      bitnum, unsignedp,
1678                                                      target, mode,
1679                                                      tmode);
1680           if (result)
1681             return result;
1682         }
1683
1684       rtx_insn *last = get_last_insn ();
1685
1686       /* Try loading part of OP0 into a register and extracting the
1687          bitfield from that.  */
1688       unsigned HOST_WIDE_INT bitpos;
1689       rtx xop0 = adjust_bit_field_mem_for_reg (pattern, op0, bitsize, bitnum,
1690                                                0, 0, tmode, &bitpos);
1691       if (xop0)
1692         {
1693           xop0 = copy_to_reg (xop0);
1694           rtx result = extract_bit_field_1 (xop0, bitsize, bitpos,
1695                                             unsignedp, target,
1696                                             mode, tmode, false);
1697           if (result)
1698             return result;
1699           delete_insns_since (last);
1700         }
1701     }
1702
1703   if (!fallback_p)
1704     return NULL;
1705
1706   /* Find a correspondingly-sized integer field, so we can apply
1707      shifts and masks to it.  */
1708   int_mode = int_mode_for_mode (tmode);
1709   if (int_mode == BLKmode)
1710     int_mode = int_mode_for_mode (mode);
1711   /* Should probably push op0 out to memory and then do a load.  */
1712   gcc_assert (int_mode != BLKmode);
1713
1714   target = extract_fixed_bit_field (int_mode, op0, bitsize, bitnum,
1715                                     target, unsignedp);
1716   return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1717 }
1718
1719 /* Generate code to extract a byte-field from STR_RTX
1720    containing BITSIZE bits, starting at BITNUM,
1721    and put it in TARGET if possible (if TARGET is nonzero).
1722    Regardless of TARGET, we return the rtx for where the value is placed.
1723
1724    STR_RTX is the structure containing the byte (a REG or MEM).
1725    UNSIGNEDP is nonzero if this is an unsigned bit field.
1726    MODE is the natural mode of the field value once extracted.
1727    TMODE is the mode the caller would like the value to have;
1728    but the value may be returned with type MODE instead.
1729
1730    If a TARGET is specified and we can store in it at no extra cost,
1731    we do so, and return TARGET.
1732    Otherwise, we return a REG of mode TMODE or MODE, with TMODE preferred
1733    if they are equally easy.  */
1734
1735 rtx
1736 extract_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1737                    unsigned HOST_WIDE_INT bitnum, int unsignedp, rtx target,
1738                    machine_mode mode, machine_mode tmode)
1739 {
1740   machine_mode mode1;
1741
1742   /* Handle -fstrict-volatile-bitfields in the cases where it applies.  */
1743   if (GET_MODE_BITSIZE (GET_MODE (str_rtx)) > 0)
1744     mode1 = GET_MODE (str_rtx);
1745   else if (target && GET_MODE_BITSIZE (GET_MODE (target)) > 0)
1746     mode1 = GET_MODE (target);
1747   else
1748     mode1 = tmode;
1749
1750   if (strict_volatile_bitfield_p (str_rtx, bitsize, bitnum, mode1, 0, 0))
1751     {
1752       rtx result;
1753
1754       /* Extraction of a full MODE1 value can be done with a load as long as
1755          the field is on a byte boundary and is sufficiently aligned.  */
1756       if (simple_mem_bitfield_p (str_rtx, bitsize, bitnum, mode1))
1757         result = adjust_bitfield_address (str_rtx, mode1,
1758                                           bitnum / BITS_PER_UNIT);
1759       else
1760         {
1761           str_rtx = narrow_bit_field_mem (str_rtx, mode1, bitsize, bitnum,
1762                                           &bitnum);
1763           result = extract_fixed_bit_field_1 (mode, str_rtx, bitsize, bitnum,
1764                                               target, unsignedp);
1765         }
1766
1767       return convert_extracted_bit_field (result, mode, tmode, unsignedp);
1768     }
1769
1770   return extract_bit_field_1 (str_rtx, bitsize, bitnum, unsignedp,
1771                               target, mode, tmode, true);
1772 }
1773 \f
1774 /* Use shifts and boolean operations to extract a field of BITSIZE bits
1775    from bit BITNUM of OP0.
1776
1777    UNSIGNEDP is nonzero for an unsigned bit field (don't sign-extend value).
1778    If TARGET is nonzero, attempts to store the value there
1779    and return TARGET, but this is not guaranteed.
1780    If TARGET is not used, create a pseudo-reg of mode TMODE for the value.  */
1781
1782 static rtx
1783 extract_fixed_bit_field (machine_mode tmode, rtx op0,
1784                          unsigned HOST_WIDE_INT bitsize,
1785                          unsigned HOST_WIDE_INT bitnum, rtx target,
1786                          int unsignedp)
1787 {
1788   if (MEM_P (op0))
1789     {
1790       machine_mode mode
1791         = get_best_mode (bitsize, bitnum, 0, 0, MEM_ALIGN (op0), word_mode,
1792                          MEM_VOLATILE_P (op0));
1793
1794       if (mode == VOIDmode)
1795         /* The only way this should occur is if the field spans word
1796            boundaries.  */
1797         return extract_split_bit_field (op0, bitsize, bitnum, unsignedp);
1798
1799       op0 = narrow_bit_field_mem (op0, mode, bitsize, bitnum, &bitnum);
1800     }
1801
1802   return extract_fixed_bit_field_1 (tmode, op0, bitsize, bitnum,
1803                                     target, unsignedp);
1804 }
1805
1806 /* Helper function for extract_fixed_bit_field, extracts
1807    the bit field always using the MODE of OP0.  */
1808
1809 static rtx
1810 extract_fixed_bit_field_1 (machine_mode tmode, rtx op0,
1811                            unsigned HOST_WIDE_INT bitsize,
1812                            unsigned HOST_WIDE_INT bitnum, rtx target,
1813                            int unsignedp)
1814 {
1815   machine_mode mode = GET_MODE (op0);
1816   gcc_assert (SCALAR_INT_MODE_P (mode));
1817
1818   /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode)
1819      for invalid input, such as extract equivalent of f5 from
1820      gcc.dg/pr48335-2.c.  */
1821
1822   if (BYTES_BIG_ENDIAN)
1823     /* BITNUM is the distance between our msb and that of OP0.
1824        Convert it to the distance from the lsb.  */
1825     bitnum = GET_MODE_BITSIZE (mode) - bitsize - bitnum;
1826
1827   /* Now BITNUM is always the distance between the field's lsb and that of OP0.
1828      We have reduced the big-endian case to the little-endian case.  */
1829
1830   if (unsignedp)
1831     {
1832       if (bitnum)
1833         {
1834           /* If the field does not already start at the lsb,
1835              shift it so it does.  */
1836           /* Maybe propagate the target for the shift.  */
1837           rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
1838           if (tmode != mode)
1839             subtarget = 0;
1840           op0 = expand_shift (RSHIFT_EXPR, mode, op0, bitnum, subtarget, 1);
1841         }
1842       /* Convert the value to the desired mode.  */
1843       if (mode != tmode)
1844         op0 = convert_to_mode (tmode, op0, 1);
1845
1846       /* Unless the msb of the field used to be the msb when we shifted,
1847          mask out the upper bits.  */
1848
1849       if (GET_MODE_BITSIZE (mode) != bitnum + bitsize)
1850         return expand_binop (GET_MODE (op0), and_optab, op0,
1851                              mask_rtx (GET_MODE (op0), 0, bitsize, 0),
1852                              target, 1, OPTAB_LIB_WIDEN);
1853       return op0;
1854     }
1855
1856   /* To extract a signed bit-field, first shift its msb to the msb of the word,
1857      then arithmetic-shift its lsb to the lsb of the word.  */
1858   op0 = force_reg (mode, op0);
1859
1860   /* Find the narrowest integer mode that contains the field.  */
1861
1862   for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT); mode != VOIDmode;
1863        mode = GET_MODE_WIDER_MODE (mode))
1864     if (GET_MODE_BITSIZE (mode) >= bitsize + bitnum)
1865       {
1866         op0 = convert_to_mode (mode, op0, 0);
1867         break;
1868       }
1869
1870   if (mode != tmode)
1871     target = 0;
1872
1873   if (GET_MODE_BITSIZE (mode) != (bitsize + bitnum))
1874     {
1875       int amount = GET_MODE_BITSIZE (mode) - (bitsize + bitnum);
1876       /* Maybe propagate the target for the shift.  */
1877       rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
1878       op0 = expand_shift (LSHIFT_EXPR, mode, op0, amount, subtarget, 1);
1879     }
1880
1881   return expand_shift (RSHIFT_EXPR, mode, op0,
1882                        GET_MODE_BITSIZE (mode) - bitsize, target, 0);
1883 }
1884
1885 /* Return a constant integer (CONST_INT or CONST_DOUBLE) rtx with the value
1886    VALUE << BITPOS.  */
1887
1888 static rtx
1889 lshift_value (machine_mode mode, unsigned HOST_WIDE_INT value,
1890               int bitpos)
1891 {
1892   return immed_wide_int_const (wi::lshift (value, bitpos), mode);
1893 }
1894 \f
1895 /* Extract a bit field that is split across two words
1896    and return an RTX for the result.
1897
1898    OP0 is the REG, SUBREG or MEM rtx for the first of the two words.
1899    BITSIZE is the field width; BITPOS, position of its first bit, in the word.
1900    UNSIGNEDP is 1 if should zero-extend the contents; else sign-extend.  */
1901
1902 static rtx
1903 extract_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1904                          unsigned HOST_WIDE_INT bitpos, int unsignedp)
1905 {
1906   unsigned int unit;
1907   unsigned int bitsdone = 0;
1908   rtx result = NULL_RTX;
1909   int first = 1;
1910
1911   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1912      much at a time.  */
1913   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
1914     unit = BITS_PER_WORD;
1915   else
1916     unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1917
1918   while (bitsdone < bitsize)
1919     {
1920       unsigned HOST_WIDE_INT thissize;
1921       rtx part, word;
1922       unsigned HOST_WIDE_INT thispos;
1923       unsigned HOST_WIDE_INT offset;
1924
1925       offset = (bitpos + bitsdone) / unit;
1926       thispos = (bitpos + bitsdone) % unit;
1927
1928       /* THISSIZE must not overrun a word boundary.  Otherwise,
1929          extract_fixed_bit_field will call us again, and we will mutually
1930          recurse forever.  */
1931       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1932       thissize = MIN (thissize, unit - thispos);
1933
1934       /* If OP0 is a register, then handle OFFSET here.
1935
1936          When handling multiword bitfields, extract_bit_field may pass
1937          down a word_mode SUBREG of a larger REG for a bitfield that actually
1938          crosses a word boundary.  Thus, for a SUBREG, we must find
1939          the current word starting from the base register.  */
1940       if (GET_CODE (op0) == SUBREG)
1941         {
1942           int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset;
1943           word = operand_subword_force (SUBREG_REG (op0), word_offset,
1944                                         GET_MODE (SUBREG_REG (op0)));
1945           offset = 0;
1946         }
1947       else if (REG_P (op0))
1948         {
1949           word = operand_subword_force (op0, offset, GET_MODE (op0));
1950           offset = 0;
1951         }
1952       else
1953         word = op0;
1954
1955       /* Extract the parts in bit-counting order,
1956          whose meaning is determined by BYTES_PER_UNIT.
1957          OFFSET is in UNITs, and UNIT is in bits.  */
1958       part = extract_fixed_bit_field (word_mode, word, thissize,
1959                                       offset * unit + thispos, 0, 1);
1960       bitsdone += thissize;
1961
1962       /* Shift this part into place for the result.  */
1963       if (BYTES_BIG_ENDIAN)
1964         {
1965           if (bitsize != bitsdone)
1966             part = expand_shift (LSHIFT_EXPR, word_mode, part,
1967                                  bitsize - bitsdone, 0, 1);
1968         }
1969       else
1970         {
1971           if (bitsdone != thissize)
1972             part = expand_shift (LSHIFT_EXPR, word_mode, part,
1973                                  bitsdone - thissize, 0, 1);
1974         }
1975
1976       if (first)
1977         result = part;
1978       else
1979         /* Combine the parts with bitwise or.  This works
1980            because we extracted each part as an unsigned bit field.  */
1981         result = expand_binop (word_mode, ior_optab, part, result, NULL_RTX, 1,
1982                                OPTAB_LIB_WIDEN);
1983
1984       first = 0;
1985     }
1986
1987   /* Unsigned bit field: we are done.  */
1988   if (unsignedp)
1989     return result;
1990   /* Signed bit field: sign-extend with two arithmetic shifts.  */
1991   result = expand_shift (LSHIFT_EXPR, word_mode, result,
1992                          BITS_PER_WORD - bitsize, NULL_RTX, 0);
1993   return expand_shift (RSHIFT_EXPR, word_mode, result,
1994                        BITS_PER_WORD - bitsize, NULL_RTX, 0);
1995 }
1996 \f
1997 /* Try to read the low bits of SRC as an rvalue of mode MODE, preserving
1998    the bit pattern.  SRC_MODE is the mode of SRC; if this is smaller than
1999    MODE, fill the upper bits with zeros.  Fail if the layout of either
2000    mode is unknown (as for CC modes) or if the extraction would involve
2001    unprofitable mode punning.  Return the value on success, otherwise
2002    return null.
2003
2004    This is different from gen_lowpart* in these respects:
2005
2006      - the returned value must always be considered an rvalue
2007
2008      - when MODE is wider than SRC_MODE, the extraction involves
2009        a zero extension
2010
2011      - when MODE is smaller than SRC_MODE, the extraction involves
2012        a truncation (and is thus subject to TRULY_NOOP_TRUNCATION).
2013
2014    In other words, this routine performs a computation, whereas the
2015    gen_lowpart* routines are conceptually lvalue or rvalue subreg
2016    operations.  */
2017
2018 rtx
2019 extract_low_bits (machine_mode mode, machine_mode src_mode, rtx src)
2020 {
2021   machine_mode int_mode, src_int_mode;
2022
2023   if (mode == src_mode)
2024     return src;
2025
2026   if (CONSTANT_P (src))
2027     {
2028       /* simplify_gen_subreg can't be used here, as if simplify_subreg
2029          fails, it will happily create (subreg (symbol_ref)) or similar
2030          invalid SUBREGs.  */
2031       unsigned int byte = subreg_lowpart_offset (mode, src_mode);
2032       rtx ret = simplify_subreg (mode, src, src_mode, byte);
2033       if (ret)
2034         return ret;
2035
2036       if (GET_MODE (src) == VOIDmode
2037           || !validate_subreg (mode, src_mode, src, byte))
2038         return NULL_RTX;
2039
2040       src = force_reg (GET_MODE (src), src);
2041       return gen_rtx_SUBREG (mode, src, byte);
2042     }
2043
2044   if (GET_MODE_CLASS (mode) == MODE_CC || GET_MODE_CLASS (src_mode) == MODE_CC)
2045     return NULL_RTX;
2046
2047   if (GET_MODE_BITSIZE (mode) == GET_MODE_BITSIZE (src_mode)
2048       && MODES_TIEABLE_P (mode, src_mode))
2049     {
2050       rtx x = gen_lowpart_common (mode, src);
2051       if (x)
2052         return x;
2053     }
2054
2055   src_int_mode = int_mode_for_mode (src_mode);
2056   int_mode = int_mode_for_mode (mode);
2057   if (src_int_mode == BLKmode || int_mode == BLKmode)
2058     return NULL_RTX;
2059
2060   if (!MODES_TIEABLE_P (src_int_mode, src_mode))
2061     return NULL_RTX;
2062   if (!MODES_TIEABLE_P (int_mode, mode))
2063     return NULL_RTX;
2064
2065   src = gen_lowpart (src_int_mode, src);
2066   src = convert_modes (int_mode, src_int_mode, src, true);
2067   src = gen_lowpart (mode, src);
2068   return src;
2069 }
2070 \f
2071 /* Add INC into TARGET.  */
2072
2073 void
2074 expand_inc (rtx target, rtx inc)
2075 {
2076   rtx value = expand_binop (GET_MODE (target), add_optab,
2077                             target, inc,
2078                             target, 0, OPTAB_LIB_WIDEN);
2079   if (value != target)
2080     emit_move_insn (target, value);
2081 }
2082
2083 /* Subtract DEC from TARGET.  */
2084
2085 void
2086 expand_dec (rtx target, rtx dec)
2087 {
2088   rtx value = expand_binop (GET_MODE (target), sub_optab,
2089                             target, dec,
2090                             target, 0, OPTAB_LIB_WIDEN);
2091   if (value != target)
2092     emit_move_insn (target, value);
2093 }
2094 \f
2095 /* Output a shift instruction for expression code CODE,
2096    with SHIFTED being the rtx for the value to shift,
2097    and AMOUNT the rtx for the amount to shift by.
2098    Store the result in the rtx TARGET, if that is convenient.
2099    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2100    Return the rtx for where the value is.  */
2101
2102 static rtx
2103 expand_shift_1 (enum tree_code code, machine_mode mode, rtx shifted,
2104                 rtx amount, rtx target, int unsignedp)
2105 {
2106   rtx op1, temp = 0;
2107   int left = (code == LSHIFT_EXPR || code == LROTATE_EXPR);
2108   int rotate = (code == LROTATE_EXPR || code == RROTATE_EXPR);
2109   optab lshift_optab = ashl_optab;
2110   optab rshift_arith_optab = ashr_optab;
2111   optab rshift_uns_optab = lshr_optab;
2112   optab lrotate_optab = rotl_optab;
2113   optab rrotate_optab = rotr_optab;
2114   machine_mode op1_mode;
2115   machine_mode scalar_mode = mode;
2116   int attempt;
2117   bool speed = optimize_insn_for_speed_p ();
2118
2119   if (VECTOR_MODE_P (mode))
2120     scalar_mode = GET_MODE_INNER (mode);
2121   op1 = amount;
2122   op1_mode = GET_MODE (op1);
2123
2124   /* Determine whether the shift/rotate amount is a vector, or scalar.  If the
2125      shift amount is a vector, use the vector/vector shift patterns.  */
2126   if (VECTOR_MODE_P (mode) && VECTOR_MODE_P (op1_mode))
2127     {
2128       lshift_optab = vashl_optab;
2129       rshift_arith_optab = vashr_optab;
2130       rshift_uns_optab = vlshr_optab;
2131       lrotate_optab = vrotl_optab;
2132       rrotate_optab = vrotr_optab;
2133     }
2134
2135   /* Previously detected shift-counts computed by NEGATE_EXPR
2136      and shifted in the other direction; but that does not work
2137      on all machines.  */
2138
2139   if (SHIFT_COUNT_TRUNCATED)
2140     {
2141       if (CONST_INT_P (op1)
2142           && ((unsigned HOST_WIDE_INT) INTVAL (op1) >=
2143               (unsigned HOST_WIDE_INT) GET_MODE_BITSIZE (scalar_mode)))
2144         op1 = GEN_INT ((unsigned HOST_WIDE_INT) INTVAL (op1)
2145                        % GET_MODE_BITSIZE (scalar_mode));
2146       else if (GET_CODE (op1) == SUBREG
2147                && subreg_lowpart_p (op1)
2148                && SCALAR_INT_MODE_P (GET_MODE (SUBREG_REG (op1)))
2149                && SCALAR_INT_MODE_P (GET_MODE (op1)))
2150         op1 = SUBREG_REG (op1);
2151     }
2152
2153   /* Canonicalize rotates by constant amount.  If op1 is bitsize / 2,
2154      prefer left rotation, if op1 is from bitsize / 2 + 1 to
2155      bitsize - 1, use other direction of rotate with 1 .. bitsize / 2 - 1
2156      amount instead.  */
2157   if (rotate
2158       && CONST_INT_P (op1)
2159       && IN_RANGE (INTVAL (op1), GET_MODE_BITSIZE (scalar_mode) / 2 + left,
2160                    GET_MODE_BITSIZE (scalar_mode) - 1))
2161     {
2162       op1 = GEN_INT (GET_MODE_BITSIZE (scalar_mode) - INTVAL (op1));
2163       left = !left;
2164       code = left ? LROTATE_EXPR : RROTATE_EXPR;
2165     }
2166
2167   /* Rotation of 16bit values by 8 bits is effectively equivalent to a bswaphi.
2168      Note that this is not the case for bigger values.  For instance a rotation
2169      of 0x01020304 by 16 bits gives 0x03040102 which is different from
2170      0x04030201 (bswapsi).  */
2171   if (rotate
2172       && CONST_INT_P (op1)
2173       && INTVAL (op1) == BITS_PER_UNIT
2174       && GET_MODE_SIZE (scalar_mode) == 2
2175       && optab_handler (bswap_optab, HImode) != CODE_FOR_nothing)
2176     return expand_unop (HImode, bswap_optab, shifted, NULL_RTX,
2177                                   unsignedp);
2178
2179   if (op1 == const0_rtx)
2180     return shifted;
2181
2182   /* Check whether its cheaper to implement a left shift by a constant
2183      bit count by a sequence of additions.  */
2184   if (code == LSHIFT_EXPR
2185       && CONST_INT_P (op1)
2186       && INTVAL (op1) > 0
2187       && INTVAL (op1) < GET_MODE_PRECISION (scalar_mode)
2188       && INTVAL (op1) < MAX_BITS_PER_WORD
2189       && (shift_cost (speed, mode, INTVAL (op1))
2190           > INTVAL (op1) * add_cost (speed, mode))
2191       && shift_cost (speed, mode, INTVAL (op1)) != MAX_COST)
2192     {
2193       int i;
2194       for (i = 0; i < INTVAL (op1); i++)
2195         {
2196           temp = force_reg (mode, shifted);
2197           shifted = expand_binop (mode, add_optab, temp, temp, NULL_RTX,
2198                                   unsignedp, OPTAB_LIB_WIDEN);
2199         }
2200       return shifted;
2201     }
2202
2203   for (attempt = 0; temp == 0 && attempt < 3; attempt++)
2204     {
2205       enum optab_methods methods;
2206
2207       if (attempt == 0)
2208         methods = OPTAB_DIRECT;
2209       else if (attempt == 1)
2210         methods = OPTAB_WIDEN;
2211       else
2212         methods = OPTAB_LIB_WIDEN;
2213
2214       if (rotate)
2215         {
2216           /* Widening does not work for rotation.  */
2217           if (methods == OPTAB_WIDEN)
2218             continue;
2219           else if (methods == OPTAB_LIB_WIDEN)
2220             {
2221               /* If we have been unable to open-code this by a rotation,
2222                  do it as the IOR of two shifts.  I.e., to rotate A
2223                  by N bits, compute
2224                  (A << N) | ((unsigned) A >> ((-N) & (C - 1)))
2225                  where C is the bitsize of A.
2226
2227                  It is theoretically possible that the target machine might
2228                  not be able to perform either shift and hence we would
2229                  be making two libcalls rather than just the one for the
2230                  shift (similarly if IOR could not be done).  We will allow
2231                  this extremely unlikely lossage to avoid complicating the
2232                  code below.  */
2233
2234               rtx subtarget = target == shifted ? 0 : target;
2235               rtx new_amount, other_amount;
2236               rtx temp1;
2237
2238               new_amount = op1;
2239               if (op1 == const0_rtx)
2240                 return shifted;
2241               else if (CONST_INT_P (op1))
2242                 other_amount = GEN_INT (GET_MODE_BITSIZE (scalar_mode)
2243                                         - INTVAL (op1));
2244               else
2245                 {
2246                   other_amount
2247                     = simplify_gen_unary (NEG, GET_MODE (op1),
2248                                           op1, GET_MODE (op1));
2249                   HOST_WIDE_INT mask = GET_MODE_PRECISION (scalar_mode) - 1;
2250                   other_amount
2251                     = simplify_gen_binary (AND, GET_MODE (op1), other_amount,
2252                                            gen_int_mode (mask, GET_MODE (op1)));
2253                 }
2254
2255               shifted = force_reg (mode, shifted);
2256
2257               temp = expand_shift_1 (left ? LSHIFT_EXPR : RSHIFT_EXPR,
2258                                      mode, shifted, new_amount, 0, 1);
2259               temp1 = expand_shift_1 (left ? RSHIFT_EXPR : LSHIFT_EXPR,
2260                                       mode, shifted, other_amount,
2261                                       subtarget, 1);
2262               return expand_binop (mode, ior_optab, temp, temp1, target,
2263                                    unsignedp, methods);
2264             }
2265
2266           temp = expand_binop (mode,
2267                                left ? lrotate_optab : rrotate_optab,
2268                                shifted, op1, target, unsignedp, methods);
2269         }
2270       else if (unsignedp)
2271         temp = expand_binop (mode,
2272                              left ? lshift_optab : rshift_uns_optab,
2273                              shifted, op1, target, unsignedp, methods);
2274
2275       /* Do arithmetic shifts.
2276          Also, if we are going to widen the operand, we can just as well
2277          use an arithmetic right-shift instead of a logical one.  */
2278       if (temp == 0 && ! rotate
2279           && (! unsignedp || (! left && methods == OPTAB_WIDEN)))
2280         {
2281           enum optab_methods methods1 = methods;
2282
2283           /* If trying to widen a log shift to an arithmetic shift,
2284              don't accept an arithmetic shift of the same size.  */
2285           if (unsignedp)
2286             methods1 = OPTAB_MUST_WIDEN;
2287
2288           /* Arithmetic shift */
2289
2290           temp = expand_binop (mode,
2291                                left ? lshift_optab : rshift_arith_optab,
2292                                shifted, op1, target, unsignedp, methods1);
2293         }
2294
2295       /* We used to try extzv here for logical right shifts, but that was
2296          only useful for one machine, the VAX, and caused poor code
2297          generation there for lshrdi3, so the code was deleted and a
2298          define_expand for lshrsi3 was added to vax.md.  */
2299     }
2300
2301   gcc_assert (temp);
2302   return temp;
2303 }
2304
2305 /* Output a shift instruction for expression code CODE,
2306    with SHIFTED being the rtx for the value to shift,
2307    and AMOUNT the amount to shift by.
2308    Store the result in the rtx TARGET, if that is convenient.
2309    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2310    Return the rtx for where the value is.  */
2311
2312 rtx
2313 expand_shift (enum tree_code code, machine_mode mode, rtx shifted,
2314               int amount, rtx target, int unsignedp)
2315 {
2316   return expand_shift_1 (code, mode,
2317                          shifted, GEN_INT (amount), target, unsignedp);
2318 }
2319
2320 /* Output a shift instruction for expression code CODE,
2321    with SHIFTED being the rtx for the value to shift,
2322    and AMOUNT the tree for the amount to shift by.
2323    Store the result in the rtx TARGET, if that is convenient.
2324    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2325    Return the rtx for where the value is.  */
2326
2327 rtx
2328 expand_variable_shift (enum tree_code code, machine_mode mode, rtx shifted,
2329                        tree amount, rtx target, int unsignedp)
2330 {
2331   return expand_shift_1 (code, mode,
2332                          shifted, expand_normal (amount), target, unsignedp);
2333 }
2334
2335 \f
2336 /* Indicates the type of fixup needed after a constant multiplication.
2337    BASIC_VARIANT means no fixup is needed, NEGATE_VARIANT means that
2338    the result should be negated, and ADD_VARIANT means that the
2339    multiplicand should be added to the result.  */
2340 enum mult_variant {basic_variant, negate_variant, add_variant};
2341
2342 static void synth_mult (struct algorithm *, unsigned HOST_WIDE_INT,
2343                         const struct mult_cost *, machine_mode mode);
2344 static bool choose_mult_variant (machine_mode, HOST_WIDE_INT,
2345                                  struct algorithm *, enum mult_variant *, int);
2346 static rtx expand_mult_const (machine_mode, rtx, HOST_WIDE_INT, rtx,
2347                               const struct algorithm *, enum mult_variant);
2348 static unsigned HOST_WIDE_INT invert_mod2n (unsigned HOST_WIDE_INT, int);
2349 static rtx extract_high_half (machine_mode, rtx);
2350 static rtx expmed_mult_highpart (machine_mode, rtx, rtx, rtx, int, int);
2351 static rtx expmed_mult_highpart_optab (machine_mode, rtx, rtx, rtx,
2352                                        int, int);
2353 /* Compute and return the best algorithm for multiplying by T.
2354    The algorithm must cost less than cost_limit
2355    If retval.cost >= COST_LIMIT, no algorithm was found and all
2356    other field of the returned struct are undefined.
2357    MODE is the machine mode of the multiplication.  */
2358
2359 static void
2360 synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
2361             const struct mult_cost *cost_limit, machine_mode mode)
2362 {
2363   int m;
2364   struct algorithm *alg_in, *best_alg;
2365   struct mult_cost best_cost;
2366   struct mult_cost new_limit;
2367   int op_cost, op_latency;
2368   unsigned HOST_WIDE_INT orig_t = t;
2369   unsigned HOST_WIDE_INT q;
2370   int maxm, hash_index;
2371   bool cache_hit = false;
2372   enum alg_code cache_alg = alg_zero;
2373   bool speed = optimize_insn_for_speed_p ();
2374   machine_mode imode;
2375   struct alg_hash_entry *entry_ptr;
2376
2377   /* Indicate that no algorithm is yet found.  If no algorithm
2378      is found, this value will be returned and indicate failure.  */
2379   alg_out->cost.cost = cost_limit->cost + 1;
2380   alg_out->cost.latency = cost_limit->latency + 1;
2381
2382   if (cost_limit->cost < 0
2383       || (cost_limit->cost == 0 && cost_limit->latency <= 0))
2384     return;
2385
2386   /* Be prepared for vector modes.  */
2387   imode = GET_MODE_INNER (mode);
2388   if (imode == VOIDmode)
2389     imode = mode;
2390
2391   maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (imode));
2392
2393   /* Restrict the bits of "t" to the multiplication's mode.  */
2394   t &= GET_MODE_MASK (imode);
2395
2396   /* t == 1 can be done in zero cost.  */
2397   if (t == 1)
2398     {
2399       alg_out->ops = 1;
2400       alg_out->cost.cost = 0;
2401       alg_out->cost.latency = 0;
2402       alg_out->op[0] = alg_m;
2403       return;
2404     }
2405
2406   /* t == 0 sometimes has a cost.  If it does and it exceeds our limit,
2407      fail now.  */
2408   if (t == 0)
2409     {
2410       if (MULT_COST_LESS (cost_limit, zero_cost (speed)))
2411         return;
2412       else
2413         {
2414           alg_out->ops = 1;
2415           alg_out->cost.cost = zero_cost (speed);
2416           alg_out->cost.latency = zero_cost (speed);
2417           alg_out->op[0] = alg_zero;
2418           return;
2419         }
2420     }
2421
2422   /* We'll be needing a couple extra algorithm structures now.  */
2423
2424   alg_in = XALLOCA (struct algorithm);
2425   best_alg = XALLOCA (struct algorithm);
2426   best_cost = *cost_limit;
2427
2428   /* Compute the hash index.  */
2429   hash_index = (t ^ (unsigned int) mode ^ (speed * 256)) % NUM_ALG_HASH_ENTRIES;
2430
2431   /* See if we already know what to do for T.  */
2432   entry_ptr = alg_hash_entry_ptr (hash_index);
2433   if (entry_ptr->t == t
2434       && entry_ptr->mode == mode
2435       && entry_ptr->mode == mode
2436       && entry_ptr->speed == speed
2437       && entry_ptr->alg != alg_unknown)
2438     {
2439       cache_alg = entry_ptr->alg;
2440
2441       if (cache_alg == alg_impossible)
2442         {
2443           /* The cache tells us that it's impossible to synthesize
2444              multiplication by T within entry_ptr->cost.  */
2445           if (!CHEAPER_MULT_COST (&entry_ptr->cost, cost_limit))
2446             /* COST_LIMIT is at least as restrictive as the one
2447                recorded in the hash table, in which case we have no
2448                hope of synthesizing a multiplication.  Just
2449                return.  */
2450             return;
2451
2452           /* If we get here, COST_LIMIT is less restrictive than the
2453              one recorded in the hash table, so we may be able to
2454              synthesize a multiplication.  Proceed as if we didn't
2455              have the cache entry.  */
2456         }
2457       else
2458         {
2459           if (CHEAPER_MULT_COST (cost_limit, &entry_ptr->cost))
2460             /* The cached algorithm shows that this multiplication
2461                requires more cost than COST_LIMIT.  Just return.  This
2462                way, we don't clobber this cache entry with
2463                alg_impossible but retain useful information.  */
2464             return;
2465
2466           cache_hit = true;
2467
2468           switch (cache_alg)
2469             {
2470             case alg_shift:
2471               goto do_alg_shift;
2472
2473             case alg_add_t_m2:
2474             case alg_sub_t_m2:
2475               goto do_alg_addsub_t_m2;
2476
2477             case alg_add_factor:
2478             case alg_sub_factor:
2479               goto do_alg_addsub_factor;
2480
2481             case alg_add_t2_m:
2482               goto do_alg_add_t2_m;
2483
2484             case alg_sub_t2_m:
2485               goto do_alg_sub_t2_m;
2486
2487             default:
2488               gcc_unreachable ();
2489             }
2490         }
2491     }
2492
2493   /* If we have a group of zero bits at the low-order part of T, try
2494      multiplying by the remaining bits and then doing a shift.  */
2495
2496   if ((t & 1) == 0)
2497     {
2498     do_alg_shift:
2499       m = floor_log2 (t & -t);  /* m = number of low zero bits */
2500       if (m < maxm)
2501         {
2502           q = t >> m;
2503           /* The function expand_shift will choose between a shift and
2504              a sequence of additions, so the observed cost is given as
2505              MIN (m * add_cost(speed, mode), shift_cost(speed, mode, m)).  */
2506           op_cost = m * add_cost (speed, mode);
2507           if (shift_cost (speed, mode, m) < op_cost)
2508             op_cost = shift_cost (speed, mode, m);
2509           new_limit.cost = best_cost.cost - op_cost;
2510           new_limit.latency = best_cost.latency - op_cost;
2511           synth_mult (alg_in, q, &new_limit, mode);
2512
2513           alg_in->cost.cost += op_cost;
2514           alg_in->cost.latency += op_cost;
2515           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2516             {
2517               struct algorithm *x;
2518               best_cost = alg_in->cost;
2519               x = alg_in, alg_in = best_alg, best_alg = x;
2520               best_alg->log[best_alg->ops] = m;
2521               best_alg->op[best_alg->ops] = alg_shift;
2522             }
2523
2524           /* See if treating ORIG_T as a signed number yields a better
2525              sequence.  Try this sequence only for a negative ORIG_T
2526              as it would be useless for a non-negative ORIG_T.  */
2527           if ((HOST_WIDE_INT) orig_t < 0)
2528             {
2529               /* Shift ORIG_T as follows because a right shift of a
2530                  negative-valued signed type is implementation
2531                  defined.  */
2532               q = ~(~orig_t >> m);
2533               /* The function expand_shift will choose between a shift
2534                  and a sequence of additions, so the observed cost is
2535                  given as MIN (m * add_cost(speed, mode),
2536                  shift_cost(speed, mode, m)).  */
2537               op_cost = m * add_cost (speed, mode);
2538               if (shift_cost (speed, mode, m) < op_cost)
2539                 op_cost = shift_cost (speed, mode, m);
2540               new_limit.cost = best_cost.cost - op_cost;
2541               new_limit.latency = best_cost.latency - op_cost;
2542               synth_mult (alg_in, q, &new_limit, mode);
2543
2544               alg_in->cost.cost += op_cost;
2545               alg_in->cost.latency += op_cost;
2546               if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2547                 {
2548                   struct algorithm *x;
2549                   best_cost = alg_in->cost;
2550                   x = alg_in, alg_in = best_alg, best_alg = x;
2551                   best_alg->log[best_alg->ops] = m;
2552                   best_alg->op[best_alg->ops] = alg_shift;
2553                 }
2554             }
2555         }
2556       if (cache_hit)
2557         goto done;
2558     }
2559
2560   /* If we have an odd number, add or subtract one.  */
2561   if ((t & 1) != 0)
2562     {
2563       unsigned HOST_WIDE_INT w;
2564
2565     do_alg_addsub_t_m2:
2566       for (w = 1; (w & t) != 0; w <<= 1)
2567         ;
2568       /* If T was -1, then W will be zero after the loop.  This is another
2569          case where T ends with ...111.  Handling this with (T + 1) and
2570          subtract 1 produces slightly better code and results in algorithm
2571          selection much faster than treating it like the ...0111 case
2572          below.  */
2573       if (w == 0
2574           || (w > 2
2575               /* Reject the case where t is 3.
2576                  Thus we prefer addition in that case.  */
2577               && t != 3))
2578         {
2579           /* T ends with ...111.  Multiply by (T + 1) and subtract 1.  */
2580
2581           op_cost = add_cost (speed, mode);
2582           new_limit.cost = best_cost.cost - op_cost;
2583           new_limit.latency = best_cost.latency - op_cost;
2584           synth_mult (alg_in, t + 1, &new_limit, mode);
2585
2586           alg_in->cost.cost += op_cost;
2587           alg_in->cost.latency += op_cost;
2588           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2589             {
2590               struct algorithm *x;
2591               best_cost = alg_in->cost;
2592               x = alg_in, alg_in = best_alg, best_alg = x;
2593               best_alg->log[best_alg->ops] = 0;
2594               best_alg->op[best_alg->ops] = alg_sub_t_m2;
2595             }
2596         }
2597       else
2598         {
2599           /* T ends with ...01 or ...011.  Multiply by (T - 1) and add 1.  */
2600
2601           op_cost = add_cost (speed, mode);
2602           new_limit.cost = best_cost.cost - op_cost;
2603           new_limit.latency = best_cost.latency - op_cost;
2604           synth_mult (alg_in, t - 1, &new_limit, mode);
2605
2606           alg_in->cost.cost += op_cost;
2607           alg_in->cost.latency += op_cost;
2608           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2609             {
2610               struct algorithm *x;
2611               best_cost = alg_in->cost;
2612               x = alg_in, alg_in = best_alg, best_alg = x;
2613               best_alg->log[best_alg->ops] = 0;
2614               best_alg->op[best_alg->ops] = alg_add_t_m2;
2615             }
2616         }
2617
2618       /* We may be able to calculate a * -7, a * -15, a * -31, etc
2619          quickly with a - a * n for some appropriate constant n.  */
2620       m = exact_log2 (-orig_t + 1);
2621       if (m >= 0 && m < maxm)
2622         {
2623           op_cost = shiftsub1_cost (speed, mode, m);
2624           new_limit.cost = best_cost.cost - op_cost;
2625           new_limit.latency = best_cost.latency - op_cost;
2626           synth_mult (alg_in, (unsigned HOST_WIDE_INT) (-orig_t + 1) >> m,
2627                       &new_limit, mode);
2628
2629           alg_in->cost.cost += op_cost;
2630           alg_in->cost.latency += op_cost;
2631           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2632             {
2633               struct algorithm *x;
2634               best_cost = alg_in->cost;
2635               x = alg_in, alg_in = best_alg, best_alg = x;
2636               best_alg->log[best_alg->ops] = m;
2637               best_alg->op[best_alg->ops] = alg_sub_t_m2;
2638             }
2639         }
2640
2641       if (cache_hit)
2642         goto done;
2643     }
2644
2645   /* Look for factors of t of the form
2646      t = q(2**m +- 1), 2 <= m <= floor(log2(t - 1)).
2647      If we find such a factor, we can multiply by t using an algorithm that
2648      multiplies by q, shift the result by m and add/subtract it to itself.
2649
2650      We search for large factors first and loop down, even if large factors
2651      are less probable than small; if we find a large factor we will find a
2652      good sequence quickly, and therefore be able to prune (by decreasing
2653      COST_LIMIT) the search.  */
2654
2655  do_alg_addsub_factor:
2656   for (m = floor_log2 (t - 1); m >= 2; m--)
2657     {
2658       unsigned HOST_WIDE_INT d;
2659
2660       d = ((unsigned HOST_WIDE_INT) 1 << m) + 1;
2661       if (t % d == 0 && t > d && m < maxm
2662           && (!cache_hit || cache_alg == alg_add_factor))
2663         {
2664           /* If the target has a cheap shift-and-add instruction use
2665              that in preference to a shift insn followed by an add insn.
2666              Assume that the shift-and-add is "atomic" with a latency
2667              equal to its cost, otherwise assume that on superscalar
2668              hardware the shift may be executed concurrently with the
2669              earlier steps in the algorithm.  */
2670           op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
2671           if (shiftadd_cost (speed, mode, m) < op_cost)
2672             {
2673               op_cost = shiftadd_cost (speed, mode, m);
2674               op_latency = op_cost;
2675             }
2676           else
2677             op_latency = add_cost (speed, mode);
2678
2679           new_limit.cost = best_cost.cost - op_cost;
2680           new_limit.latency = best_cost.latency - op_latency;
2681           synth_mult (alg_in, t / d, &new_limit, mode);
2682
2683           alg_in->cost.cost += op_cost;
2684           alg_in->cost.latency += op_latency;
2685           if (alg_in->cost.latency < op_cost)
2686             alg_in->cost.latency = op_cost;
2687           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2688             {
2689               struct algorithm *x;
2690               best_cost = alg_in->cost;
2691               x = alg_in, alg_in = best_alg, best_alg = x;
2692               best_alg->log[best_alg->ops] = m;
2693               best_alg->op[best_alg->ops] = alg_add_factor;
2694             }
2695           /* Other factors will have been taken care of in the recursion.  */
2696           break;
2697         }
2698
2699       d = ((unsigned HOST_WIDE_INT) 1 << m) - 1;
2700       if (t % d == 0 && t > d && m < maxm
2701           && (!cache_hit || cache_alg == alg_sub_factor))
2702         {
2703           /* If the target has a cheap shift-and-subtract insn use
2704              that in preference to a shift insn followed by a sub insn.
2705              Assume that the shift-and-sub is "atomic" with a latency
2706              equal to it's cost, otherwise assume that on superscalar
2707              hardware the shift may be executed concurrently with the
2708              earlier steps in the algorithm.  */
2709           op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
2710           if (shiftsub0_cost (speed, mode, m) < op_cost)
2711             {
2712               op_cost = shiftsub0_cost (speed, mode, m);
2713               op_latency = op_cost;
2714             }
2715           else
2716             op_latency = add_cost (speed, mode);
2717
2718           new_limit.cost = best_cost.cost - op_cost;
2719           new_limit.latency = best_cost.latency - op_latency;
2720           synth_mult (alg_in, t / d, &new_limit, mode);
2721
2722           alg_in->cost.cost += op_cost;
2723           alg_in->cost.latency += op_latency;
2724           if (alg_in->cost.latency < op_cost)
2725             alg_in->cost.latency = op_cost;
2726           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2727             {
2728               struct algorithm *x;
2729               best_cost = alg_in->cost;
2730               x = alg_in, alg_in = best_alg, best_alg = x;
2731               best_alg->log[best_alg->ops] = m;
2732               best_alg->op[best_alg->ops] = alg_sub_factor;
2733             }
2734           break;
2735         }
2736     }
2737   if (cache_hit)
2738     goto done;
2739
2740   /* Try shift-and-add (load effective address) instructions,
2741      i.e. do a*3, a*5, a*9.  */
2742   if ((t & 1) != 0)
2743     {
2744     do_alg_add_t2_m:
2745       q = t - 1;
2746       q = q & -q;
2747       m = exact_log2 (q);
2748       if (m >= 0 && m < maxm)
2749         {
2750           op_cost = shiftadd_cost (speed, mode, m);
2751           new_limit.cost = best_cost.cost - op_cost;
2752           new_limit.latency = best_cost.latency - op_cost;
2753           synth_mult (alg_in, (t - 1) >> m, &new_limit, mode);
2754
2755           alg_in->cost.cost += op_cost;
2756           alg_in->cost.latency += op_cost;
2757           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2758             {
2759               struct algorithm *x;
2760               best_cost = alg_in->cost;
2761               x = alg_in, alg_in = best_alg, best_alg = x;
2762               best_alg->log[best_alg->ops] = m;
2763               best_alg->op[best_alg->ops] = alg_add_t2_m;
2764             }
2765         }
2766       if (cache_hit)
2767         goto done;
2768
2769     do_alg_sub_t2_m:
2770       q = t + 1;
2771       q = q & -q;
2772       m = exact_log2 (q);
2773       if (m >= 0 && m < maxm)
2774         {
2775           op_cost = shiftsub0_cost (speed, mode, m);
2776           new_limit.cost = best_cost.cost - op_cost;
2777           new_limit.latency = best_cost.latency - op_cost;
2778           synth_mult (alg_in, (t + 1) >> m, &new_limit, mode);
2779
2780           alg_in->cost.cost += op_cost;
2781           alg_in->cost.latency += op_cost;
2782           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2783             {
2784               struct algorithm *x;
2785               best_cost = alg_in->cost;
2786               x = alg_in, alg_in = best_alg, best_alg = x;
2787               best_alg->log[best_alg->ops] = m;
2788               best_alg->op[best_alg->ops] = alg_sub_t2_m;
2789             }
2790         }
2791       if (cache_hit)
2792         goto done;
2793     }
2794
2795  done:
2796   /* If best_cost has not decreased, we have not found any algorithm.  */
2797   if (!CHEAPER_MULT_COST (&best_cost, cost_limit))
2798     {
2799       /* We failed to find an algorithm.  Record alg_impossible for
2800          this case (that is, <T, MODE, COST_LIMIT>) so that next time
2801          we are asked to find an algorithm for T within the same or
2802          lower COST_LIMIT, we can immediately return to the
2803          caller.  */
2804       entry_ptr->t = t;
2805       entry_ptr->mode = mode;
2806       entry_ptr->speed = speed;
2807       entry_ptr->alg = alg_impossible;
2808       entry_ptr->cost = *cost_limit;
2809       return;
2810     }
2811
2812   /* Cache the result.  */
2813   if (!cache_hit)
2814     {
2815       entry_ptr->t = t;
2816       entry_ptr->mode = mode;
2817       entry_ptr->speed = speed;
2818       entry_ptr->alg = best_alg->op[best_alg->ops];
2819       entry_ptr->cost.cost = best_cost.cost;
2820       entry_ptr->cost.latency = best_cost.latency;
2821     }
2822
2823   /* If we are getting a too long sequence for `struct algorithm'
2824      to record, make this search fail.  */
2825   if (best_alg->ops == MAX_BITS_PER_WORD)
2826     return;
2827
2828   /* Copy the algorithm from temporary space to the space at alg_out.
2829      We avoid using structure assignment because the majority of
2830      best_alg is normally undefined, and this is a critical function.  */
2831   alg_out->ops = best_alg->ops + 1;
2832   alg_out->cost = best_cost;
2833   memcpy (alg_out->op, best_alg->op,
2834           alg_out->ops * sizeof *alg_out->op);
2835   memcpy (alg_out->log, best_alg->log,
2836           alg_out->ops * sizeof *alg_out->log);
2837 }
2838 \f
2839 /* Find the cheapest way of multiplying a value of mode MODE by VAL.
2840    Try three variations:
2841
2842        - a shift/add sequence based on VAL itself
2843        - a shift/add sequence based on -VAL, followed by a negation
2844        - a shift/add sequence based on VAL - 1, followed by an addition.
2845
2846    Return true if the cheapest of these cost less than MULT_COST,
2847    describing the algorithm in *ALG and final fixup in *VARIANT.  */
2848
2849 static bool
2850 choose_mult_variant (machine_mode mode, HOST_WIDE_INT val,
2851                      struct algorithm *alg, enum mult_variant *variant,
2852                      int mult_cost)
2853 {
2854   struct algorithm alg2;
2855   struct mult_cost limit;
2856   int op_cost;
2857   bool speed = optimize_insn_for_speed_p ();
2858
2859   /* Fail quickly for impossible bounds.  */
2860   if (mult_cost < 0)
2861     return false;
2862
2863   /* Ensure that mult_cost provides a reasonable upper bound.
2864      Any constant multiplication can be performed with less
2865      than 2 * bits additions.  */
2866   op_cost = 2 * GET_MODE_UNIT_BITSIZE (mode) * add_cost (speed, mode);
2867   if (mult_cost > op_cost)
2868     mult_cost = op_cost;
2869
2870   *variant = basic_variant;
2871   limit.cost = mult_cost;
2872   limit.latency = mult_cost;
2873   synth_mult (alg, val, &limit, mode);
2874
2875   /* This works only if the inverted value actually fits in an
2876      `unsigned int' */
2877   if (HOST_BITS_PER_INT >= GET_MODE_UNIT_BITSIZE (mode))
2878     {
2879       op_cost = neg_cost (speed, mode);
2880       if (MULT_COST_LESS (&alg->cost, mult_cost))
2881         {
2882           limit.cost = alg->cost.cost - op_cost;
2883           limit.latency = alg->cost.latency - op_cost;
2884         }
2885       else
2886         {
2887           limit.cost = mult_cost - op_cost;
2888           limit.latency = mult_cost - op_cost;
2889         }
2890
2891       synth_mult (&alg2, -val, &limit, mode);
2892       alg2.cost.cost += op_cost;
2893       alg2.cost.latency += op_cost;
2894       if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
2895         *alg = alg2, *variant = negate_variant;
2896     }
2897
2898   /* This proves very useful for division-by-constant.  */
2899   op_cost = add_cost (speed, mode);
2900   if (MULT_COST_LESS (&alg->cost, mult_cost))
2901     {
2902       limit.cost = alg->cost.cost - op_cost;
2903       limit.latency = alg->cost.latency - op_cost;
2904     }
2905   else
2906     {
2907       limit.cost = mult_cost - op_cost;
2908       limit.latency = mult_cost - op_cost;
2909     }
2910
2911   synth_mult (&alg2, val - 1, &limit, mode);
2912   alg2.cost.cost += op_cost;
2913   alg2.cost.latency += op_cost;
2914   if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
2915     *alg = alg2, *variant = add_variant;
2916
2917   return MULT_COST_LESS (&alg->cost, mult_cost);
2918 }
2919
2920 /* A subroutine of expand_mult, used for constant multiplications.
2921    Multiply OP0 by VAL in mode MODE, storing the result in TARGET if
2922    convenient.  Use the shift/add sequence described by ALG and apply
2923    the final fixup specified by VARIANT.  */
2924
2925 static rtx
2926 expand_mult_const (machine_mode mode, rtx op0, HOST_WIDE_INT val,
2927                    rtx target, const struct algorithm *alg,
2928                    enum mult_variant variant)
2929 {
2930   HOST_WIDE_INT val_so_far;
2931   rtx_insn *insn;
2932   rtx accum, tem;
2933   int opno;
2934   machine_mode nmode;
2935
2936   /* Avoid referencing memory over and over and invalid sharing
2937      on SUBREGs.  */
2938   op0 = force_reg (mode, op0);
2939
2940   /* ACCUM starts out either as OP0 or as a zero, depending on
2941      the first operation.  */
2942
2943   if (alg->op[0] == alg_zero)
2944     {
2945       accum = copy_to_mode_reg (mode, CONST0_RTX (mode));
2946       val_so_far = 0;
2947     }
2948   else if (alg->op[0] == alg_m)
2949     {
2950       accum = copy_to_mode_reg (mode, op0);
2951       val_so_far = 1;
2952     }
2953   else
2954     gcc_unreachable ();
2955
2956   for (opno = 1; opno < alg->ops; opno++)
2957     {
2958       int log = alg->log[opno];
2959       rtx shift_subtarget = optimize ? 0 : accum;
2960       rtx add_target
2961         = (opno == alg->ops - 1 && target != 0 && variant != add_variant
2962            && !optimize)
2963           ? target : 0;
2964       rtx accum_target = optimize ? 0 : accum;
2965       rtx accum_inner;
2966
2967       switch (alg->op[opno])
2968         {
2969         case alg_shift:
2970           tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
2971           /* REG_EQUAL note will be attached to the following insn.  */
2972           emit_move_insn (accum, tem);
2973           val_so_far <<= log;
2974           break;
2975
2976         case alg_add_t_m2:
2977           tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
2978           accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
2979                                  add_target ? add_target : accum_target);
2980           val_so_far += (HOST_WIDE_INT) 1 << log;
2981           break;
2982
2983         case alg_sub_t_m2:
2984           tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
2985           accum = force_operand (gen_rtx_MINUS (mode, accum, tem),
2986                                  add_target ? add_target : accum_target);
2987           val_so_far -= (HOST_WIDE_INT) 1 << log;
2988           break;
2989
2990         case alg_add_t2_m:
2991           accum = expand_shift (LSHIFT_EXPR, mode, accum,
2992                                 log, shift_subtarget, 0);
2993           accum = force_operand (gen_rtx_PLUS (mode, accum, op0),
2994                                  add_target ? add_target : accum_target);
2995           val_so_far = (val_so_far << log) + 1;
2996           break;
2997
2998         case alg_sub_t2_m:
2999           accum = expand_shift (LSHIFT_EXPR, mode, accum,
3000                                 log, shift_subtarget, 0);
3001           accum = force_operand (gen_rtx_MINUS (mode, accum, op0),
3002                                  add_target ? add_target : accum_target);
3003           val_so_far = (val_so_far << log) - 1;
3004           break;
3005
3006         case alg_add_factor:
3007           tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3008           accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
3009                                  add_target ? add_target : accum_target);
3010           val_so_far += val_so_far << log;
3011           break;
3012
3013         case alg_sub_factor:
3014           tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3015           accum = force_operand (gen_rtx_MINUS (mode, tem, accum),
3016                                  (add_target
3017                                   ? add_target : (optimize ? 0 : tem)));
3018           val_so_far = (val_so_far << log) - val_so_far;
3019           break;
3020
3021         default:
3022           gcc_unreachable ();
3023         }
3024
3025       if (SCALAR_INT_MODE_P (mode))
3026         {
3027           /* Write a REG_EQUAL note on the last insn so that we can cse
3028              multiplication sequences.  Note that if ACCUM is a SUBREG,
3029              we've set the inner register and must properly indicate that.  */
3030           tem = op0, nmode = mode;
3031           accum_inner = accum;
3032           if (GET_CODE (accum) == SUBREG)
3033             {
3034               accum_inner = SUBREG_REG (accum);
3035               nmode = GET_MODE (accum_inner);
3036               tem = gen_lowpart (nmode, op0);
3037             }
3038
3039           insn = get_last_insn ();
3040           set_dst_reg_note (insn, REG_EQUAL,
3041                             gen_rtx_MULT (nmode, tem,
3042                                           gen_int_mode (val_so_far, nmode)),
3043                             accum_inner);
3044         }
3045     }
3046
3047   if (variant == negate_variant)
3048     {
3049       val_so_far = -val_so_far;
3050       accum = expand_unop (mode, neg_optab, accum, target, 0);
3051     }
3052   else if (variant == add_variant)
3053     {
3054       val_so_far = val_so_far + 1;
3055       accum = force_operand (gen_rtx_PLUS (mode, accum, op0), target);
3056     }
3057
3058   /* Compare only the bits of val and val_so_far that are significant
3059      in the result mode, to avoid sign-/zero-extension confusion.  */
3060   nmode = GET_MODE_INNER (mode);
3061   if (nmode == VOIDmode)
3062     nmode = mode;
3063   val &= GET_MODE_MASK (nmode);
3064   val_so_far &= GET_MODE_MASK (nmode);
3065   gcc_assert (val == val_so_far);
3066
3067   return accum;
3068 }
3069
3070 /* Perform a multiplication and return an rtx for the result.
3071    MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3072    TARGET is a suggestion for where to store the result (an rtx).
3073
3074    We check specially for a constant integer as OP1.
3075    If you want this check for OP0 as well, then before calling
3076    you should swap the two operands if OP0 would be constant.  */
3077
3078 rtx
3079 expand_mult (machine_mode mode, rtx op0, rtx op1, rtx target,
3080              int unsignedp)
3081 {
3082   enum mult_variant variant;
3083   struct algorithm algorithm;
3084   rtx scalar_op1;
3085   int max_cost;
3086   bool speed = optimize_insn_for_speed_p ();
3087   bool do_trapv = flag_trapv && SCALAR_INT_MODE_P (mode) && !unsignedp;
3088
3089   if (CONSTANT_P (op0))
3090     {
3091       rtx temp = op0;
3092       op0 = op1;
3093       op1 = temp;
3094     }
3095
3096   /* For vectors, there are several simplifications that can be made if
3097      all elements of the vector constant are identical.  */
3098   scalar_op1 = op1;
3099   if (GET_CODE (op1) == CONST_VECTOR)
3100     {
3101       int i, n = CONST_VECTOR_NUNITS (op1);
3102       scalar_op1 = CONST_VECTOR_ELT (op1, 0);
3103       for (i = 1; i < n; ++i)
3104         if (!rtx_equal_p (scalar_op1, CONST_VECTOR_ELT (op1, i)))
3105           goto skip_scalar;
3106     }
3107
3108   if (INTEGRAL_MODE_P (mode))
3109     {
3110       rtx fake_reg;
3111       HOST_WIDE_INT coeff;
3112       bool is_neg;
3113       int mode_bitsize;
3114
3115       if (op1 == CONST0_RTX (mode))
3116         return op1;
3117       if (op1 == CONST1_RTX (mode))
3118         return op0;
3119       if (op1 == CONSTM1_RTX (mode))
3120         return expand_unop (mode, do_trapv ? negv_optab : neg_optab,
3121                             op0, target, 0);
3122
3123       if (do_trapv)
3124         goto skip_synth;
3125
3126       /* If mode is integer vector mode, check if the backend supports
3127          vector lshift (by scalar or vector) at all.  If not, we can't use
3128          synthetized multiply.  */
3129       if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
3130           && optab_handler (vashl_optab, mode) == CODE_FOR_nothing
3131           && optab_handler (ashl_optab, mode) == CODE_FOR_nothing)
3132         goto skip_synth;
3133
3134       /* These are the operations that are potentially turned into
3135          a sequence of shifts and additions.  */
3136       mode_bitsize = GET_MODE_UNIT_BITSIZE (mode);
3137
3138       /* synth_mult does an `unsigned int' multiply.  As long as the mode is
3139          less than or equal in size to `unsigned int' this doesn't matter.
3140          If the mode is larger than `unsigned int', then synth_mult works
3141          only if the constant value exactly fits in an `unsigned int' without
3142          any truncation.  This means that multiplying by negative values does
3143          not work; results are off by 2^32 on a 32 bit machine.  */
3144       if (CONST_INT_P (scalar_op1))
3145         {
3146           coeff = INTVAL (scalar_op1);
3147           is_neg = coeff < 0;
3148         }
3149 #if TARGET_SUPPORTS_WIDE_INT
3150       else if (CONST_WIDE_INT_P (scalar_op1))
3151 #else
3152       else if (CONST_DOUBLE_AS_INT_P (scalar_op1))
3153 #endif
3154         {
3155           int shift = wi::exact_log2 (std::make_pair (scalar_op1, mode));
3156           /* Perfect power of 2 (other than 1, which is handled above).  */
3157           if (shift > 0)
3158             return expand_shift (LSHIFT_EXPR, mode, op0,
3159                                  shift, target, unsignedp);
3160           else
3161             goto skip_synth;
3162         }
3163       else
3164         goto skip_synth;
3165
3166       /* We used to test optimize here, on the grounds that it's better to
3167          produce a smaller program when -O is not used.  But this causes
3168          such a terrible slowdown sometimes that it seems better to always
3169          use synth_mult.  */
3170
3171       /* Special case powers of two.  */
3172       if (EXACT_POWER_OF_2_OR_ZERO_P (coeff)
3173           && !(is_neg && mode_bitsize > HOST_BITS_PER_WIDE_INT))
3174         return expand_shift (LSHIFT_EXPR, mode, op0,
3175                              floor_log2 (coeff), target, unsignedp);
3176
3177       fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3178
3179       /* Attempt to handle multiplication of DImode values by negative
3180          coefficients, by performing the multiplication by a positive
3181          multiplier and then inverting the result.  */
3182       if (is_neg && mode_bitsize > HOST_BITS_PER_WIDE_INT)
3183         {
3184           /* Its safe to use -coeff even for INT_MIN, as the
3185              result is interpreted as an unsigned coefficient.
3186              Exclude cost of op0 from max_cost to match the cost
3187              calculation of the synth_mult.  */
3188           coeff = -(unsigned HOST_WIDE_INT) coeff;
3189           max_cost = (set_src_cost (gen_rtx_MULT (mode, fake_reg, op1), speed)
3190                       - neg_cost (speed, mode));
3191           if (max_cost <= 0)
3192             goto skip_synth;
3193
3194           /* Special case powers of two.  */
3195           if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3196             {
3197               rtx temp = expand_shift (LSHIFT_EXPR, mode, op0,
3198                                        floor_log2 (coeff), target, unsignedp);
3199               return expand_unop (mode, neg_optab, temp, target, 0);
3200             }
3201
3202           if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3203                                    max_cost))
3204             {
3205               rtx temp = expand_mult_const (mode, op0, coeff, NULL_RTX,
3206                                             &algorithm, variant);
3207               return expand_unop (mode, neg_optab, temp, target, 0);
3208             }
3209           goto skip_synth;
3210         }
3211
3212       /* Exclude cost of op0 from max_cost to match the cost
3213          calculation of the synth_mult.  */
3214       max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, op1), speed);
3215       if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost))
3216         return expand_mult_const (mode, op0, coeff, target,
3217                                   &algorithm, variant);
3218     }
3219  skip_synth:
3220
3221   /* Expand x*2.0 as x+x.  */
3222   if (CONST_DOUBLE_AS_FLOAT_P (scalar_op1))
3223     {
3224       REAL_VALUE_TYPE d;
3225       REAL_VALUE_FROM_CONST_DOUBLE (d, scalar_op1);
3226
3227       if (REAL_VALUES_EQUAL (d, dconst2))
3228         {
3229           op0 = force_reg (GET_MODE (op0), op0);
3230           return expand_binop (mode, add_optab, op0, op0,
3231                                target, unsignedp, OPTAB_LIB_WIDEN);
3232         }
3233     }
3234  skip_scalar:
3235
3236   /* This used to use umul_optab if unsigned, but for non-widening multiply
3237      there is no difference between signed and unsigned.  */
3238   op0 = expand_binop (mode, do_trapv ? smulv_optab : smul_optab,
3239                       op0, op1, target, unsignedp, OPTAB_LIB_WIDEN);
3240   gcc_assert (op0);
3241   return op0;
3242 }
3243
3244 /* Return a cost estimate for multiplying a register by the given
3245    COEFFicient in the given MODE and SPEED.  */
3246
3247 int
3248 mult_by_coeff_cost (HOST_WIDE_INT coeff, machine_mode mode, bool speed)
3249 {
3250   int max_cost;
3251   struct algorithm algorithm;
3252   enum mult_variant variant;
3253
3254   rtx fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3255   max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, fake_reg), speed);
3256   if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost))
3257     return algorithm.cost.cost;
3258   else
3259     return max_cost;
3260 }
3261
3262 /* Perform a widening multiplication and return an rtx for the result.
3263    MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3264    TARGET is a suggestion for where to store the result (an rtx).
3265    THIS_OPTAB is the optab we should use, it must be either umul_widen_optab
3266    or smul_widen_optab.
3267
3268    We check specially for a constant integer as OP1, comparing the
3269    cost of a widening multiply against the cost of a sequence of shifts
3270    and adds.  */
3271
3272 rtx
3273 expand_widening_mult (machine_mode mode, rtx op0, rtx op1, rtx target,
3274                       int unsignedp, optab this_optab)
3275 {
3276   bool speed = optimize_insn_for_speed_p ();
3277   rtx cop1;
3278
3279   if (CONST_INT_P (op1)
3280       && GET_MODE (op0) != VOIDmode
3281       && (cop1 = convert_modes (mode, GET_MODE (op0), op1,
3282                                 this_optab == umul_widen_optab))
3283       && CONST_INT_P (cop1)
3284       && (INTVAL (cop1) >= 0
3285           || HWI_COMPUTABLE_MODE_P (mode)))
3286     {
3287       HOST_WIDE_INT coeff = INTVAL (cop1);
3288       int max_cost;
3289       enum mult_variant variant;
3290       struct algorithm algorithm;
3291
3292       /* Special case powers of two.  */
3293       if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3294         {
3295           op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3296           return expand_shift (LSHIFT_EXPR, mode, op0,
3297                                floor_log2 (coeff), target, unsignedp);
3298         }
3299
3300       /* Exclude cost of op0 from max_cost to match the cost
3301          calculation of the synth_mult.  */
3302       max_cost = mul_widen_cost (speed, mode);
3303       if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3304                                max_cost))
3305         {
3306           op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3307           return expand_mult_const (mode, op0, coeff, target,
3308                                     &algorithm, variant);
3309         }
3310     }
3311   return expand_binop (mode, this_optab, op0, op1, target,
3312                        unsignedp, OPTAB_LIB_WIDEN);
3313 }
3314 \f
3315 /* Choose a minimal N + 1 bit approximation to 1/D that can be used to
3316    replace division by D, and put the least significant N bits of the result
3317    in *MULTIPLIER_PTR and return the most significant bit.
3318
3319    The width of operations is N (should be <= HOST_BITS_PER_WIDE_INT), the
3320    needed precision is in PRECISION (should be <= N).
3321
3322    PRECISION should be as small as possible so this function can choose
3323    multiplier more freely.
3324
3325    The rounded-up logarithm of D is placed in *lgup_ptr.  A shift count that
3326    is to be used for a final right shift is placed in *POST_SHIFT_PTR.
3327
3328    Using this function, x/D will be equal to (x * m) >> (*POST_SHIFT_PTR),
3329    where m is the full HOST_BITS_PER_WIDE_INT + 1 bit multiplier.  */
3330
3331 unsigned HOST_WIDE_INT
3332 choose_multiplier (unsigned HOST_WIDE_INT d, int n, int precision,
3333                    unsigned HOST_WIDE_INT *multiplier_ptr,
3334                    int *post_shift_ptr, int *lgup_ptr)
3335 {
3336   int lgup, post_shift;
3337   int pow, pow2;
3338
3339   /* lgup = ceil(log2(divisor)); */
3340   lgup = ceil_log2 (d);
3341
3342   gcc_assert (lgup <= n);
3343
3344   pow = n + lgup;
3345   pow2 = n + lgup - precision;
3346
3347   /* mlow = 2^(N + lgup)/d */
3348   wide_int val = wi::set_bit_in_zero (pow, HOST_BITS_PER_DOUBLE_INT);
3349   wide_int mlow = wi::udiv_trunc (val, d);
3350
3351   /* mhigh = (2^(N + lgup) + 2^(N + lgup - precision))/d */
3352   val |= wi::set_bit_in_zero (pow2, HOST_BITS_PER_DOUBLE_INT);
3353   wide_int mhigh = wi::udiv_trunc (val, d);
3354
3355   /* If precision == N, then mlow, mhigh exceed 2^N
3356      (but they do not exceed 2^(N+1)).  */
3357
3358   /* Reduce to lowest terms.  */
3359   for (post_shift = lgup; post_shift > 0; post_shift--)
3360     {
3361       unsigned HOST_WIDE_INT ml_lo = wi::extract_uhwi (mlow, 1,
3362                                                        HOST_BITS_PER_WIDE_INT);
3363       unsigned HOST_WIDE_INT mh_lo = wi::extract_uhwi (mhigh, 1,
3364                                                        HOST_BITS_PER_WIDE_INT);
3365       if (ml_lo >= mh_lo)
3366         break;
3367
3368       mlow = wi::uhwi (ml_lo, HOST_BITS_PER_DOUBLE_INT);
3369       mhigh = wi::uhwi (mh_lo, HOST_BITS_PER_DOUBLE_INT);
3370     }
3371
3372   *post_shift_ptr = post_shift;
3373   *lgup_ptr = lgup;
3374   if (n < HOST_BITS_PER_WIDE_INT)
3375     {
3376       unsigned HOST_WIDE_INT mask = ((unsigned HOST_WIDE_INT) 1 << n) - 1;
3377       *multiplier_ptr = mhigh.to_uhwi () & mask;
3378       return mhigh.to_uhwi () >= mask;
3379     }
3380   else
3381     {
3382       *multiplier_ptr = mhigh.to_uhwi ();
3383       return wi::extract_uhwi (mhigh, HOST_BITS_PER_WIDE_INT, 1);
3384     }
3385 }
3386
3387 /* Compute the inverse of X mod 2**n, i.e., find Y such that X * Y is
3388    congruent to 1 (mod 2**N).  */
3389
3390 static unsigned HOST_WIDE_INT
3391 invert_mod2n (unsigned HOST_WIDE_INT x, int n)
3392 {
3393   /* Solve x*y == 1 (mod 2^n), where x is odd.  Return y.  */
3394
3395   /* The algorithm notes that the choice y = x satisfies
3396      x*y == 1 mod 2^3, since x is assumed odd.
3397      Each iteration doubles the number of bits of significance in y.  */
3398
3399   unsigned HOST_WIDE_INT mask;
3400   unsigned HOST_WIDE_INT y = x;
3401   int nbit = 3;
3402
3403   mask = (n == HOST_BITS_PER_WIDE_INT
3404           ? ~(unsigned HOST_WIDE_INT) 0
3405           : ((unsigned HOST_WIDE_INT) 1 << n) - 1);
3406
3407   while (nbit < n)
3408     {
3409       y = y * (2 - x*y) & mask;         /* Modulo 2^N */
3410       nbit *= 2;
3411     }
3412   return y;
3413 }
3414
3415 /* Emit code to adjust ADJ_OPERAND after multiplication of wrong signedness
3416    flavor of OP0 and OP1.  ADJ_OPERAND is already the high half of the
3417    product OP0 x OP1.  If UNSIGNEDP is nonzero, adjust the signed product
3418    to become unsigned, if UNSIGNEDP is zero, adjust the unsigned product to
3419    become signed.
3420
3421    The result is put in TARGET if that is convenient.
3422
3423    MODE is the mode of operation.  */
3424
3425 rtx
3426 expand_mult_highpart_adjust (machine_mode mode, rtx adj_operand, rtx op0,
3427                              rtx op1, rtx target, int unsignedp)
3428 {
3429   rtx tem;
3430   enum rtx_code adj_code = unsignedp ? PLUS : MINUS;
3431
3432   tem = expand_shift (RSHIFT_EXPR, mode, op0,
3433                       GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3434   tem = expand_and (mode, tem, op1, NULL_RTX);
3435   adj_operand
3436     = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3437                      adj_operand);
3438
3439   tem = expand_shift (RSHIFT_EXPR, mode, op1,
3440                       GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3441   tem = expand_and (mode, tem, op0, NULL_RTX);
3442   target = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3443                           target);
3444
3445   return target;
3446 }
3447
3448 /* Subroutine of expmed_mult_highpart.  Return the MODE high part of OP.  */
3449
3450 static rtx
3451 extract_high_half (machine_mode mode, rtx op)
3452 {
3453   machine_mode wider_mode;
3454
3455   if (mode == word_mode)
3456     return gen_highpart (mode, op);
3457
3458   gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3459
3460   wider_mode = GET_MODE_WIDER_MODE (mode);
3461   op = expand_shift (RSHIFT_EXPR, wider_mode, op,
3462                      GET_MODE_BITSIZE (mode), 0, 1);
3463   return convert_modes (mode, wider_mode, op, 0);
3464 }
3465
3466 /* Like expmed_mult_highpart, but only consider using a multiplication
3467    optab.  OP1 is an rtx for the constant operand.  */
3468
3469 static rtx
3470 expmed_mult_highpart_optab (machine_mode mode, rtx op0, rtx op1,
3471                             rtx target, int unsignedp, int max_cost)
3472 {
3473   rtx narrow_op1 = gen_int_mode (INTVAL (op1), mode);
3474   machine_mode wider_mode;
3475   optab moptab;
3476   rtx tem;
3477   int size;
3478   bool speed = optimize_insn_for_speed_p ();
3479
3480   gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3481
3482   wider_mode = GET_MODE_WIDER_MODE (mode);
3483   size = GET_MODE_BITSIZE (mode);
3484
3485   /* Firstly, try using a multiplication insn that only generates the needed
3486      high part of the product, and in the sign flavor of unsignedp.  */
3487   if (mul_highpart_cost (speed, mode) < max_cost)
3488     {
3489       moptab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
3490       tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3491                           unsignedp, OPTAB_DIRECT);
3492       if (tem)
3493         return tem;
3494     }
3495
3496   /* Secondly, same as above, but use sign flavor opposite of unsignedp.
3497      Need to adjust the result after the multiplication.  */
3498   if (size - 1 < BITS_PER_WORD
3499       && (mul_highpart_cost (speed, mode)
3500           + 2 * shift_cost (speed, mode, size-1)
3501           + 4 * add_cost (speed, mode) < max_cost))
3502     {
3503       moptab = unsignedp ? smul_highpart_optab : umul_highpart_optab;
3504       tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3505                           unsignedp, OPTAB_DIRECT);
3506       if (tem)
3507         /* We used the wrong signedness.  Adjust the result.  */
3508         return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3509                                             tem, unsignedp);
3510     }
3511
3512   /* Try widening multiplication.  */
3513   moptab = unsignedp ? umul_widen_optab : smul_widen_optab;
3514   if (widening_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3515       && mul_widen_cost (speed, wider_mode) < max_cost)
3516     {
3517       tem = expand_binop (wider_mode, moptab, op0, narrow_op1, 0,
3518                           unsignedp, OPTAB_WIDEN);
3519       if (tem)
3520         return extract_high_half (mode, tem);
3521     }
3522
3523   /* Try widening the mode and perform a non-widening multiplication.  */
3524   if (optab_handler (smul_optab, wider_mode) != CODE_FOR_nothing
3525       && size - 1 < BITS_PER_WORD
3526       && (mul_cost (speed, wider_mode) + shift_cost (speed, mode, size-1)
3527           < max_cost))
3528     {
3529       rtx_insn *insns;
3530       rtx wop0, wop1;
3531
3532       /* We need to widen the operands, for example to ensure the
3533          constant multiplier is correctly sign or zero extended.
3534          Use a sequence to clean-up any instructions emitted by
3535          the conversions if things don't work out.  */
3536       start_sequence ();
3537       wop0 = convert_modes (wider_mode, mode, op0, unsignedp);
3538       wop1 = convert_modes (wider_mode, mode, op1, unsignedp);
3539       tem = expand_binop (wider_mode, smul_optab, wop0, wop1, 0,
3540                           unsignedp, OPTAB_WIDEN);
3541       insns = get_insns ();
3542       end_sequence ();
3543
3544       if (tem)
3545         {
3546           emit_insn (insns);
3547           return extract_high_half (mode, tem);
3548         }
3549     }
3550
3551   /* Try widening multiplication of opposite signedness, and adjust.  */
3552   moptab = unsignedp ? smul_widen_optab : umul_widen_optab;
3553   if (widening_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3554       && size - 1 < BITS_PER_WORD
3555       && (mul_widen_cost (speed, wider_mode)
3556           + 2 * shift_cost (speed, mode, size-1)
3557           + 4 * add_cost (speed, mode) < max_cost))
3558     {
3559       tem = expand_binop (wider_mode, moptab, op0, narrow_op1,
3560                           NULL_RTX, ! unsignedp, OPTAB_WIDEN);
3561       if (tem != 0)
3562         {
3563           tem = extract_high_half (mode, tem);
3564           /* We used the wrong signedness.  Adjust the result.  */
3565           return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3566                                               target, unsignedp);
3567         }
3568     }
3569
3570   return 0;
3571 }
3572
3573 /* Emit code to multiply OP0 and OP1 (where OP1 is an integer constant),
3574    putting the high half of the result in TARGET if that is convenient,
3575    and return where the result is.  If the operation can not be performed,
3576    0 is returned.
3577
3578    MODE is the mode of operation and result.
3579
3580    UNSIGNEDP nonzero means unsigned multiply.
3581
3582    MAX_COST is the total allowed cost for the expanded RTL.  */
3583
3584 static rtx
3585 expmed_mult_highpart (machine_mode mode, rtx op0, rtx op1,
3586                       rtx target, int unsignedp, int max_cost)
3587 {
3588   machine_mode wider_mode = GET_MODE_WIDER_MODE (mode);
3589   unsigned HOST_WIDE_INT cnst1;
3590   int extra_cost;
3591   bool sign_adjust = false;
3592   enum mult_variant variant;
3593   struct algorithm alg;
3594   rtx tem;
3595   bool speed = optimize_insn_for_speed_p ();
3596
3597   gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3598   /* We can't support modes wider than HOST_BITS_PER_INT.  */
3599   gcc_assert (HWI_COMPUTABLE_MODE_P (mode));
3600
3601   cnst1 = INTVAL (op1) & GET_MODE_MASK (mode);
3602
3603   /* We can't optimize modes wider than BITS_PER_WORD.
3604      ??? We might be able to perform double-word arithmetic if
3605      mode == word_mode, however all the cost calculations in
3606      synth_mult etc. assume single-word operations.  */
3607   if (GET_MODE_BITSIZE (wider_mode) > BITS_PER_WORD)
3608     return expmed_mult_highpart_optab (mode, op0, op1, target,
3609                                        unsignedp, max_cost);
3610
3611   extra_cost = shift_cost (speed, mode, GET_MODE_BITSIZE (mode) - 1);
3612
3613   /* Check whether we try to multiply by a negative constant.  */
3614   if (!unsignedp && ((cnst1 >> (GET_MODE_BITSIZE (mode) - 1)) & 1))
3615     {
3616       sign_adjust = true;
3617       extra_cost += add_cost (speed, mode);
3618     }
3619
3620   /* See whether shift/add multiplication is cheap enough.  */
3621   if (choose_mult_variant (wider_mode, cnst1, &alg, &variant,
3622                            max_cost - extra_cost))
3623     {
3624       /* See whether the specialized multiplication optabs are
3625          cheaper than the shift/add version.  */
3626       tem = expmed_mult_highpart_optab (mode, op0, op1, target, unsignedp,
3627                                         alg.cost.cost + extra_cost);
3628       if (tem)
3629         return tem;
3630
3631       tem = convert_to_mode (wider_mode, op0, unsignedp);
3632       tem = expand_mult_const (wider_mode, tem, cnst1, 0, &alg, variant);
3633       tem = extract_high_half (mode, tem);
3634
3635       /* Adjust result for signedness.  */
3636       if (sign_adjust)
3637         tem = force_operand (gen_rtx_MINUS (mode, tem, op0), tem);
3638
3639       return tem;
3640     }
3641   return expmed_mult_highpart_optab (mode, op0, op1, target,
3642                                      unsignedp, max_cost);
3643 }
3644
3645
3646 /* Expand signed modulus of OP0 by a power of two D in mode MODE.  */
3647
3648 static rtx
3649 expand_smod_pow2 (machine_mode mode, rtx op0, HOST_WIDE_INT d)
3650 {
3651   rtx result, temp, shift;
3652   rtx_code_label *label;
3653   int logd;
3654   int prec = GET_MODE_PRECISION (mode);
3655
3656   logd = floor_log2 (d);
3657   result = gen_reg_rtx (mode);
3658
3659   /* Avoid conditional branches when they're expensive.  */
3660   if (BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2
3661       && optimize_insn_for_speed_p ())
3662     {
3663       rtx signmask = emit_store_flag (result, LT, op0, const0_rtx,
3664                                       mode, 0, -1);
3665       if (signmask)
3666         {
3667           HOST_WIDE_INT masklow = ((HOST_WIDE_INT) 1 << logd) - 1;
3668           signmask = force_reg (mode, signmask);
3669           shift = GEN_INT (GET_MODE_BITSIZE (mode) - logd);
3670
3671           /* Use the rtx_cost of a LSHIFTRT instruction to determine
3672              which instruction sequence to use.  If logical right shifts
3673              are expensive the use 2 XORs, 2 SUBs and an AND, otherwise
3674              use a LSHIFTRT, 1 ADD, 1 SUB and an AND.  */
3675
3676           temp = gen_rtx_LSHIFTRT (mode, result, shift);
3677           if (optab_handler (lshr_optab, mode) == CODE_FOR_nothing
3678               || (set_src_cost (temp, optimize_insn_for_speed_p ())
3679                   > COSTS_N_INSNS (2)))
3680             {
3681               temp = expand_binop (mode, xor_optab, op0, signmask,
3682                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3683               temp = expand_binop (mode, sub_optab, temp, signmask,
3684                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3685               temp = expand_binop (mode, and_optab, temp,
3686                                    gen_int_mode (masklow, mode),
3687                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3688               temp = expand_binop (mode, xor_optab, temp, signmask,
3689                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3690               temp = expand_binop (mode, sub_optab, temp, signmask,
3691                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3692             }
3693           else
3694             {
3695               signmask = expand_binop (mode, lshr_optab, signmask, shift,
3696                                        NULL_RTX, 1, OPTAB_LIB_WIDEN);
3697               signmask = force_reg (mode, signmask);
3698
3699               temp = expand_binop (mode, add_optab, op0, signmask,
3700                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3701               temp = expand_binop (mode, and_optab, temp,
3702                                    gen_int_mode (masklow, mode),
3703                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3704               temp = expand_binop (mode, sub_optab, temp, signmask,
3705                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3706             }
3707           return temp;
3708         }
3709     }
3710
3711   /* Mask contains the mode's signbit and the significant bits of the
3712      modulus.  By including the signbit in the operation, many targets
3713      can avoid an explicit compare operation in the following comparison
3714      against zero.  */
3715   wide_int mask = wi::mask (logd, false, prec);
3716   mask = wi::set_bit (mask, prec - 1);
3717
3718   temp = expand_binop (mode, and_optab, op0,
3719                        immed_wide_int_const (mask, mode),
3720                        result, 1, OPTAB_LIB_WIDEN);
3721   if (temp != result)
3722     emit_move_insn (result, temp);
3723
3724   label = gen_label_rtx ();
3725   do_cmp_and_jump (result, const0_rtx, GE, mode, label);
3726
3727   temp = expand_binop (mode, sub_optab, result, const1_rtx, result,
3728                        0, OPTAB_LIB_WIDEN);
3729
3730   mask = wi::mask (logd, true, prec);
3731   temp = expand_binop (mode, ior_optab, temp,
3732                        immed_wide_int_const (mask, mode),
3733                        result, 1, OPTAB_LIB_WIDEN);
3734   temp = expand_binop (mode, add_optab, temp, const1_rtx, result,
3735                        0, OPTAB_LIB_WIDEN);
3736   if (temp != result)
3737     emit_move_insn (result, temp);
3738   emit_label (label);
3739   return result;
3740 }
3741
3742 /* Expand signed division of OP0 by a power of two D in mode MODE.
3743    This routine is only called for positive values of D.  */
3744
3745 static rtx
3746 expand_sdiv_pow2 (machine_mode mode, rtx op0, HOST_WIDE_INT d)
3747 {
3748   rtx temp;
3749   rtx_code_label *label;
3750   int logd;
3751
3752   logd = floor_log2 (d);
3753
3754   if (d == 2
3755       && BRANCH_COST (optimize_insn_for_speed_p (),
3756                       false) >= 1)
3757     {
3758       temp = gen_reg_rtx (mode);
3759       temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, 1);
3760       temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3761                            0, OPTAB_LIB_WIDEN);
3762       return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3763     }
3764
3765 #ifdef HAVE_conditional_move
3766   if (BRANCH_COST (optimize_insn_for_speed_p (), false)
3767       >= 2)
3768     {
3769       rtx temp2;
3770
3771       start_sequence ();
3772       temp2 = copy_to_mode_reg (mode, op0);
3773       temp = expand_binop (mode, add_optab, temp2, gen_int_mode (d - 1, mode),
3774                            NULL_RTX, 0, OPTAB_LIB_WIDEN);
3775       temp = force_reg (mode, temp);
3776
3777       /* Construct "temp2 = (temp2 < 0) ? temp : temp2".  */
3778       temp2 = emit_conditional_move (temp2, LT, temp2, const0_rtx,
3779                                      mode, temp, temp2, mode, 0);
3780       if (temp2)
3781         {
3782           rtx_insn *seq = get_insns ();
3783           end_sequence ();
3784           emit_insn (seq);
3785           return expand_shift (RSHIFT_EXPR, mode, temp2, logd, NULL_RTX, 0);
3786         }
3787       end_sequence ();
3788     }
3789 #endif
3790
3791   if (BRANCH_COST (optimize_insn_for_speed_p (),
3792                    false) >= 2)
3793     {
3794       int ushift = GET_MODE_BITSIZE (mode) - logd;
3795
3796       temp = gen_reg_rtx (mode);
3797       temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, -1);
3798       if (GET_MODE_BITSIZE (mode) >= BITS_PER_WORD
3799           || shift_cost (optimize_insn_for_speed_p (), mode, ushift)
3800              > COSTS_N_INSNS (1))
3801         temp = expand_binop (mode, and_optab, temp, gen_int_mode (d - 1, mode),
3802                              NULL_RTX, 0, OPTAB_LIB_WIDEN);
3803       else
3804         temp = expand_shift (RSHIFT_EXPR, mode, temp,
3805                              ushift, NULL_RTX, 1);
3806       temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3807                            0, OPTAB_LIB_WIDEN);
3808       return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3809     }
3810
3811   label = gen_label_rtx ();
3812   temp = copy_to_mode_reg (mode, op0);
3813   do_cmp_and_jump (temp, const0_rtx, GE, mode, label);
3814   expand_inc (temp, gen_int_mode (d - 1, mode));
3815   emit_label (label);
3816   return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3817 }
3818 \f
3819 /* Emit the code to divide OP0 by OP1, putting the result in TARGET
3820    if that is convenient, and returning where the result is.
3821    You may request either the quotient or the remainder as the result;
3822    specify REM_FLAG nonzero to get the remainder.
3823
3824    CODE is the expression code for which kind of division this is;
3825    it controls how rounding is done.  MODE is the machine mode to use.
3826    UNSIGNEDP nonzero means do unsigned division.  */
3827
3828 /* ??? For CEIL_MOD_EXPR, can compute incorrect remainder with ANDI
3829    and then correct it by or'ing in missing high bits
3830    if result of ANDI is nonzero.
3831    For ROUND_MOD_EXPR, can use ANDI and then sign-extend the result.
3832    This could optimize to a bfexts instruction.
3833    But C doesn't use these operations, so their optimizations are
3834    left for later.  */
3835 /* ??? For modulo, we don't actually need the highpart of the first product,
3836    the low part will do nicely.  And for small divisors, the second multiply
3837    can also be a low-part only multiply or even be completely left out.
3838    E.g. to calculate the remainder of a division by 3 with a 32 bit
3839    multiply, multiply with 0x55555556 and extract the upper two bits;
3840    the result is exact for inputs up to 0x1fffffff.
3841    The input range can be reduced by using cross-sum rules.
3842    For odd divisors >= 3, the following table gives right shift counts
3843    so that if a number is shifted by an integer multiple of the given
3844    amount, the remainder stays the same:
3845    2, 4, 3, 6, 10, 12, 4, 8, 18, 6, 11, 20, 18, 0, 5, 10, 12, 0, 12, 20,
3846    14, 12, 23, 21, 8, 0, 20, 18, 0, 0, 6, 12, 0, 22, 0, 18, 20, 30, 0, 0,
3847    0, 8, 0, 11, 12, 10, 36, 0, 30, 0, 0, 12, 0, 0, 0, 0, 44, 12, 24, 0,
3848    20, 0, 7, 14, 0, 18, 36, 0, 0, 46, 60, 0, 42, 0, 15, 24, 20, 0, 0, 33,
3849    0, 20, 0, 0, 18, 0, 60, 0, 0, 0, 0, 0, 40, 18, 0, 0, 12
3850
3851    Cross-sum rules for even numbers can be derived by leaving as many bits
3852    to the right alone as the divisor has zeros to the right.
3853    E.g. if x is an unsigned 32 bit number:
3854    (x mod 12) == (((x & 1023) + ((x >> 8) & ~3)) * 0x15555558 >> 2 * 3) >> 28
3855    */
3856
3857 rtx
3858 expand_divmod (int rem_flag, enum tree_code code, machine_mode mode,
3859                rtx op0, rtx op1, rtx target, int unsignedp)
3860 {
3861   machine_mode compute_mode;
3862   rtx tquotient;
3863   rtx quotient = 0, remainder = 0;
3864   rtx_insn *last;
3865   int size;
3866   rtx_insn *insn;
3867   optab optab1, optab2;
3868   int op1_is_constant, op1_is_pow2 = 0;
3869   int max_cost, extra_cost;
3870   static HOST_WIDE_INT last_div_const = 0;
3871   bool speed = optimize_insn_for_speed_p ();
3872
3873   op1_is_constant = CONST_INT_P (op1);
3874   if (op1_is_constant)
3875     {
3876       unsigned HOST_WIDE_INT ext_op1 = UINTVAL (op1);
3877       if (unsignedp)
3878         ext_op1 &= GET_MODE_MASK (mode);
3879       op1_is_pow2 = ((EXACT_POWER_OF_2_OR_ZERO_P (ext_op1)
3880                      || (! unsignedp && EXACT_POWER_OF_2_OR_ZERO_P (-ext_op1))));
3881     }
3882
3883   /*
3884      This is the structure of expand_divmod:
3885
3886      First comes code to fix up the operands so we can perform the operations
3887      correctly and efficiently.
3888
3889      Second comes a switch statement with code specific for each rounding mode.
3890      For some special operands this code emits all RTL for the desired
3891      operation, for other cases, it generates only a quotient and stores it in
3892      QUOTIENT.  The case for trunc division/remainder might leave quotient = 0,
3893      to indicate that it has not done anything.
3894
3895      Last comes code that finishes the operation.  If QUOTIENT is set and
3896      REM_FLAG is set, the remainder is computed as OP0 - QUOTIENT * OP1.  If
3897      QUOTIENT is not set, it is computed using trunc rounding.
3898
3899      We try to generate special code for division and remainder when OP1 is a
3900      constant.  If |OP1| = 2**n we can use shifts and some other fast
3901      operations.  For other values of OP1, we compute a carefully selected
3902      fixed-point approximation m = 1/OP1, and generate code that multiplies OP0
3903      by m.
3904
3905      In all cases but EXACT_DIV_EXPR, this multiplication requires the upper
3906      half of the product.  Different strategies for generating the product are
3907      implemented in expmed_mult_highpart.
3908
3909      If what we actually want is the remainder, we generate that by another
3910      by-constant multiplication and a subtraction.  */
3911
3912   /* We shouldn't be called with OP1 == const1_rtx, but some of the
3913      code below will malfunction if we are, so check here and handle
3914      the special case if so.  */
3915   if (op1 == const1_rtx)
3916     return rem_flag ? const0_rtx : op0;
3917
3918     /* When dividing by -1, we could get an overflow.
3919      negv_optab can handle overflows.  */
3920   if (! unsignedp && op1 == constm1_rtx)
3921     {
3922       if (rem_flag)
3923         return const0_rtx;
3924       return expand_unop (mode, flag_trapv && GET_MODE_CLASS (mode) == MODE_INT
3925                           ? negv_optab : neg_optab, op0, target, 0);
3926     }
3927
3928   if (target
3929       /* Don't use the function value register as a target
3930          since we have to read it as well as write it,
3931          and function-inlining gets confused by this.  */
3932       && ((REG_P (target) && REG_FUNCTION_VALUE_P (target))
3933           /* Don't clobber an operand while doing a multi-step calculation.  */
3934           || ((rem_flag || op1_is_constant)
3935               && (reg_mentioned_p (target, op0)
3936                   || (MEM_P (op0) && MEM_P (target))))
3937           || reg_mentioned_p (target, op1)
3938           || (MEM_P (op1) && MEM_P (target))))
3939     target = 0;
3940
3941   /* Get the mode in which to perform this computation.  Normally it will
3942      be MODE, but sometimes we can't do the desired operation in MODE.
3943      If so, pick a wider mode in which we can do the operation.  Convert
3944      to that mode at the start to avoid repeated conversions.
3945
3946      First see what operations we need.  These depend on the expression
3947      we are evaluating.  (We assume that divxx3 insns exist under the
3948      same conditions that modxx3 insns and that these insns don't normally
3949      fail.  If these assumptions are not correct, we may generate less
3950      efficient code in some cases.)
3951
3952      Then see if we find a mode in which we can open-code that operation
3953      (either a division, modulus, or shift).  Finally, check for the smallest
3954      mode for which we can do the operation with a library call.  */
3955
3956   /* We might want to refine this now that we have division-by-constant
3957      optimization.  Since expmed_mult_highpart tries so many variants, it is
3958      not straightforward to generalize this.  Maybe we should make an array
3959      of possible modes in init_expmed?  Save this for GCC 2.7.  */
3960
3961   optab1 = ((op1_is_pow2 && op1 != const0_rtx)
3962             ? (unsignedp ? lshr_optab : ashr_optab)
3963             : (unsignedp ? udiv_optab : sdiv_optab));
3964   optab2 = ((op1_is_pow2 && op1 != const0_rtx)
3965             ? optab1
3966             : (unsignedp ? udivmod_optab : sdivmod_optab));
3967
3968   for (compute_mode = mode; compute_mode != VOIDmode;
3969        compute_mode = GET_MODE_WIDER_MODE (compute_mode))
3970     if (optab_handler (optab1, compute_mode) != CODE_FOR_nothing
3971         || optab_handler (optab2, compute_mode) != CODE_FOR_nothing)
3972       break;
3973
3974   if (compute_mode == VOIDmode)
3975     for (compute_mode = mode; compute_mode != VOIDmode;
3976          compute_mode = GET_MODE_WIDER_MODE (compute_mode))
3977       if (optab_libfunc (optab1, compute_mode)
3978           || optab_libfunc (optab2, compute_mode))
3979         break;
3980
3981   /* If we still couldn't find a mode, use MODE, but expand_binop will
3982      probably die.  */
3983   if (compute_mode == VOIDmode)
3984     compute_mode = mode;
3985
3986   if (target && GET_MODE (target) == compute_mode)
3987     tquotient = target;
3988   else
3989     tquotient = gen_reg_rtx (compute_mode);
3990
3991   size = GET_MODE_BITSIZE (compute_mode);
3992 #if 0
3993   /* It should be possible to restrict the precision to GET_MODE_BITSIZE
3994      (mode), and thereby get better code when OP1 is a constant.  Do that
3995      later.  It will require going over all usages of SIZE below.  */
3996   size = GET_MODE_BITSIZE (mode);
3997 #endif
3998
3999   /* Only deduct something for a REM if the last divide done was
4000      for a different constant.   Then set the constant of the last
4001      divide.  */
4002   max_cost = (unsignedp
4003               ? udiv_cost (speed, compute_mode)
4004               : sdiv_cost (speed, compute_mode));
4005   if (rem_flag && ! (last_div_const != 0 && op1_is_constant
4006                      && INTVAL (op1) == last_div_const))
4007     max_cost -= (mul_cost (speed, compute_mode)
4008                  + add_cost (speed, compute_mode));
4009
4010   last_div_const = ! rem_flag && op1_is_constant ? INTVAL (op1) : 0;
4011
4012   /* Now convert to the best mode to use.  */
4013   if (compute_mode != mode)
4014     {
4015       op0 = convert_modes (compute_mode, mode, op0, unsignedp);
4016       op1 = convert_modes (compute_mode, mode, op1, unsignedp);
4017
4018       /* convert_modes may have placed op1 into a register, so we
4019          must recompute the following.  */
4020       op1_is_constant = CONST_INT_P (op1);
4021       op1_is_pow2 = (op1_is_constant
4022                      && ((EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4023                           || (! unsignedp
4024                               && EXACT_POWER_OF_2_OR_ZERO_P (-UINTVAL (op1))))));
4025     }
4026
4027   /* If one of the operands is a volatile MEM, copy it into a register.  */
4028
4029   if (MEM_P (op0) && MEM_VOLATILE_P (op0))
4030     op0 = force_reg (compute_mode, op0);
4031   if (MEM_P (op1) && MEM_VOLATILE_P (op1))
4032     op1 = force_reg (compute_mode, op1);
4033
4034   /* If we need the remainder or if OP1 is constant, we need to
4035      put OP0 in a register in case it has any queued subexpressions.  */
4036   if (rem_flag || op1_is_constant)
4037     op0 = force_reg (compute_mode, op0);
4038
4039   last = get_last_insn ();
4040
4041   /* Promote floor rounding to trunc rounding for unsigned operations.  */
4042   if (unsignedp)
4043     {
4044       if (code == FLOOR_DIV_EXPR)
4045         code = TRUNC_DIV_EXPR;
4046       if (code == FLOOR_MOD_EXPR)
4047         code = TRUNC_MOD_EXPR;
4048       if (code == EXACT_DIV_EXPR && op1_is_pow2)
4049         code = TRUNC_DIV_EXPR;
4050     }
4051
4052   if (op1 != const0_rtx)
4053     switch (code)
4054       {
4055       case TRUNC_MOD_EXPR:
4056       case TRUNC_DIV_EXPR:
4057         if (op1_is_constant)
4058           {
4059             if (unsignedp)
4060               {
4061                 unsigned HOST_WIDE_INT mh, ml;
4062                 int pre_shift, post_shift;
4063                 int dummy;
4064                 unsigned HOST_WIDE_INT d = (INTVAL (op1)
4065                                             & GET_MODE_MASK (compute_mode));
4066
4067                 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4068                   {
4069                     pre_shift = floor_log2 (d);
4070                     if (rem_flag)
4071                       {
4072                         unsigned HOST_WIDE_INT mask
4073                           = ((unsigned HOST_WIDE_INT) 1 << pre_shift) - 1;
4074                         remainder
4075                           = expand_binop (compute_mode, and_optab, op0,
4076                                           gen_int_mode (mask, compute_mode),
4077                                           remainder, 1,
4078                                           OPTAB_LIB_WIDEN);
4079                         if (remainder)
4080                           return gen_lowpart (mode, remainder);
4081                       }
4082                     quotient = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4083                                              pre_shift, tquotient, 1);
4084                   }
4085                 else if (size <= HOST_BITS_PER_WIDE_INT)
4086                   {
4087                     if (d >= ((unsigned HOST_WIDE_INT) 1 << (size - 1)))
4088                       {
4089                         /* Most significant bit of divisor is set; emit an scc
4090                            insn.  */
4091                         quotient = emit_store_flag_force (tquotient, GEU, op0, op1,
4092                                                           compute_mode, 1, 1);
4093                       }
4094                     else
4095                       {
4096                         /* Find a suitable multiplier and right shift count
4097                            instead of multiplying with D.  */
4098
4099                         mh = choose_multiplier (d, size, size,
4100                                                 &ml, &post_shift, &dummy);
4101
4102                         /* If the suggested multiplier is more than SIZE bits,
4103                            we can do better for even divisors, using an
4104                            initial right shift.  */
4105                         if (mh != 0 && (d & 1) == 0)
4106                           {
4107                             pre_shift = floor_log2 (d & -d);
4108                             mh = choose_multiplier (d >> pre_shift, size,
4109                                                     size - pre_shift,
4110                                                     &ml, &post_shift, &dummy);
4111                             gcc_assert (!mh);
4112                           }
4113                         else
4114                           pre_shift = 0;
4115
4116                         if (mh != 0)
4117                           {
4118                             rtx t1, t2, t3, t4;
4119
4120                             if (post_shift - 1 >= BITS_PER_WORD)
4121                               goto fail1;
4122
4123                             extra_cost
4124                               = (shift_cost (speed, compute_mode, post_shift - 1)
4125                                  + shift_cost (speed, compute_mode, 1)
4126                                  + 2 * add_cost (speed, compute_mode));
4127                             t1 = expmed_mult_highpart
4128                               (compute_mode, op0,
4129                                gen_int_mode (ml, compute_mode),
4130                                NULL_RTX, 1, max_cost - extra_cost);
4131                             if (t1 == 0)
4132                               goto fail1;
4133                             t2 = force_operand (gen_rtx_MINUS (compute_mode,
4134                                                                op0, t1),
4135                                                 NULL_RTX);
4136                             t3 = expand_shift (RSHIFT_EXPR, compute_mode,
4137                                                t2, 1, NULL_RTX, 1);
4138                             t4 = force_operand (gen_rtx_PLUS (compute_mode,
4139                                                               t1, t3),
4140                                                 NULL_RTX);
4141                             quotient = expand_shift
4142                               (RSHIFT_EXPR, compute_mode, t4,
4143                                post_shift - 1, tquotient, 1);
4144                           }
4145                         else
4146                           {
4147                             rtx t1, t2;
4148
4149                             if (pre_shift >= BITS_PER_WORD
4150                                 || post_shift >= BITS_PER_WORD)
4151                               goto fail1;
4152
4153                             t1 = expand_shift
4154                               (RSHIFT_EXPR, compute_mode, op0,
4155                                pre_shift, NULL_RTX, 1);
4156                             extra_cost
4157                               = (shift_cost (speed, compute_mode, pre_shift)
4158                                  + shift_cost (speed, compute_mode, post_shift));
4159                             t2 = expmed_mult_highpart
4160                               (compute_mode, t1,
4161                                gen_int_mode (ml, compute_mode),
4162                                NULL_RTX, 1, max_cost - extra_cost);
4163                             if (t2 == 0)
4164                               goto fail1;
4165                             quotient = expand_shift
4166                               (RSHIFT_EXPR, compute_mode, t2,
4167                                post_shift, tquotient, 1);
4168                           }
4169                       }
4170                   }
4171                 else            /* Too wide mode to use tricky code */
4172                   break;
4173
4174                 insn = get_last_insn ();
4175                 if (insn != last)
4176                   set_dst_reg_note (insn, REG_EQUAL,
4177                                     gen_rtx_UDIV (compute_mode, op0, op1),
4178                                     quotient);
4179               }
4180             else                /* TRUNC_DIV, signed */
4181               {
4182                 unsigned HOST_WIDE_INT ml;
4183                 int lgup, post_shift;
4184                 rtx mlr;
4185                 HOST_WIDE_INT d = INTVAL (op1);
4186                 unsigned HOST_WIDE_INT abs_d;
4187
4188                 /* Since d might be INT_MIN, we have to cast to
4189                    unsigned HOST_WIDE_INT before negating to avoid
4190                    undefined signed overflow.  */
4191                 abs_d = (d >= 0
4192                          ? (unsigned HOST_WIDE_INT) d
4193                          : - (unsigned HOST_WIDE_INT) d);
4194
4195                 /* n rem d = n rem -d */
4196                 if (rem_flag && d < 0)
4197                   {
4198                     d = abs_d;
4199                     op1 = gen_int_mode (abs_d, compute_mode);
4200                   }
4201
4202                 if (d == 1)
4203                   quotient = op0;
4204                 else if (d == -1)
4205                   quotient = expand_unop (compute_mode, neg_optab, op0,
4206                                           tquotient, 0);
4207                 else if (HOST_BITS_PER_WIDE_INT >= size
4208                          && abs_d == (unsigned HOST_WIDE_INT) 1 << (size - 1))
4209                   {
4210                     /* This case is not handled correctly below.  */
4211                     quotient = emit_store_flag (tquotient, EQ, op0, op1,
4212                                                 compute_mode, 1, 1);
4213                     if (quotient == 0)
4214                       goto fail1;
4215                   }
4216                 else if (EXACT_POWER_OF_2_OR_ZERO_P (d)
4217                          && (rem_flag
4218                              ? smod_pow2_cheap (speed, compute_mode)
4219                              : sdiv_pow2_cheap (speed, compute_mode))
4220                          /* We assume that cheap metric is true if the
4221                             optab has an expander for this mode.  */
4222                          && ((optab_handler ((rem_flag ? smod_optab
4223                                               : sdiv_optab),
4224                                              compute_mode)
4225                               != CODE_FOR_nothing)
4226                              || (optab_handler (sdivmod_optab,
4227                                                 compute_mode)
4228                                  != CODE_FOR_nothing)))
4229                   ;
4230                 else if (EXACT_POWER_OF_2_OR_ZERO_P (abs_d))
4231                   {
4232                     if (rem_flag)
4233                       {
4234                         remainder = expand_smod_pow2 (compute_mode, op0, d);
4235                         if (remainder)
4236                           return gen_lowpart (mode, remainder);
4237                       }
4238
4239                     if (sdiv_pow2_cheap (speed, compute_mode)
4240                         && ((optab_handler (sdiv_optab, compute_mode)
4241                              != CODE_FOR_nothing)
4242                             || (optab_handler (sdivmod_optab, compute_mode)
4243                                 != CODE_FOR_nothing)))
4244                       quotient = expand_divmod (0, TRUNC_DIV_EXPR,
4245                                                 compute_mode, op0,
4246                                                 gen_int_mode (abs_d,
4247                                                               compute_mode),
4248                                                 NULL_RTX, 0);
4249                     else
4250                       quotient = expand_sdiv_pow2 (compute_mode, op0, abs_d);
4251
4252                     /* We have computed OP0 / abs(OP1).  If OP1 is negative,
4253                        negate the quotient.  */
4254                     if (d < 0)
4255                       {
4256                         insn = get_last_insn ();
4257                         if (insn != last
4258                             && abs_d < ((unsigned HOST_WIDE_INT) 1
4259                                         << (HOST_BITS_PER_WIDE_INT - 1)))
4260                           set_dst_reg_note (insn, REG_EQUAL,
4261                                             gen_rtx_DIV (compute_mode, op0,
4262                                                          gen_int_mode
4263                                                            (abs_d,
4264                                                             compute_mode)),
4265                                             quotient);
4266
4267                         quotient = expand_unop (compute_mode, neg_optab,
4268                                                 quotient, quotient, 0);
4269                       }
4270                   }
4271                 else if (size <= HOST_BITS_PER_WIDE_INT)
4272                   {
4273                     choose_multiplier (abs_d, size, size - 1,
4274                                        &ml, &post_shift, &lgup);
4275                     if (ml < (unsigned HOST_WIDE_INT) 1 << (size - 1))
4276                       {
4277                         rtx t1, t2, t3;
4278
4279                         if (post_shift >= BITS_PER_WORD
4280                             || size - 1 >= BITS_PER_WORD)
4281                           goto fail1;
4282
4283                         extra_cost = (shift_cost (speed, compute_mode, post_shift)
4284                                       + shift_cost (speed, compute_mode, size - 1)
4285                                       + add_cost (speed, compute_mode));
4286                         t1 = expmed_mult_highpart
4287                           (compute_mode, op0, gen_int_mode (ml, compute_mode),
4288                            NULL_RTX, 0, max_cost - extra_cost);
4289                         if (t1 == 0)
4290                           goto fail1;
4291                         t2 = expand_shift
4292                           (RSHIFT_EXPR, compute_mode, t1,
4293                            post_shift, NULL_RTX, 0);
4294                         t3 = expand_shift
4295                           (RSHIFT_EXPR, compute_mode, op0,
4296                            size - 1, NULL_RTX, 0);
4297                         if (d < 0)
4298                           quotient
4299                             = force_operand (gen_rtx_MINUS (compute_mode,
4300                                                             t3, t2),
4301                                              tquotient);
4302                         else
4303                           quotient
4304                             = force_operand (gen_rtx_MINUS (compute_mode,
4305                                                             t2, t3),
4306                                              tquotient);
4307                       }
4308                     else
4309                       {
4310                         rtx t1, t2, t3, t4;
4311
4312                         if (post_shift >= BITS_PER_WORD
4313                             || size - 1 >= BITS_PER_WORD)
4314                           goto fail1;
4315
4316                         ml |= (~(unsigned HOST_WIDE_INT) 0) << (size - 1);
4317                         mlr = gen_int_mode (ml, compute_mode);
4318                         extra_cost = (shift_cost (speed, compute_mode, post_shift)
4319                                       + shift_cost (speed, compute_mode, size - 1)
4320                                       + 2 * add_cost (speed, compute_mode));
4321                         t1 = expmed_mult_highpart (compute_mode, op0, mlr,
4322                                                    NULL_RTX, 0,
4323                                                    max_cost - extra_cost);
4324                         if (t1 == 0)
4325                           goto fail1;
4326                         t2 = force_operand (gen_rtx_PLUS (compute_mode,
4327                                                           t1, op0),
4328                                             NULL_RTX);
4329                         t3 = expand_shift
4330                           (RSHIFT_EXPR, compute_mode, t2,
4331                            post_shift, NULL_RTX, 0);
4332                         t4 = expand_shift
4333                           (RSHIFT_EXPR, compute_mode, op0,
4334                            size - 1, NULL_RTX, 0);
4335                         if (d < 0)
4336                           quotient
4337                             = force_operand (gen_rtx_MINUS (compute_mode,
4338                                                             t4, t3),
4339                                              tquotient);
4340                         else
4341                           quotient
4342                             = force_operand (gen_rtx_MINUS (compute_mode,
4343                                                             t3, t4),
4344                                              tquotient);
4345                       }
4346                   }
4347                 else            /* Too wide mode to use tricky code */
4348                   break;
4349
4350                 insn = get_last_insn ();
4351                 if (insn != last)
4352                   set_dst_reg_note (insn, REG_EQUAL,
4353                                     gen_rtx_DIV (compute_mode, op0, op1),
4354                                     quotient);
4355               }
4356             break;
4357           }
4358       fail1:
4359         delete_insns_since (last);
4360         break;
4361
4362       case FLOOR_DIV_EXPR:
4363       case FLOOR_MOD_EXPR:
4364       /* We will come here only for signed operations.  */
4365         if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4366           {
4367             unsigned HOST_WIDE_INT mh, ml;
4368             int pre_shift, lgup, post_shift;
4369             HOST_WIDE_INT d = INTVAL (op1);
4370
4371             if (d > 0)
4372               {
4373                 /* We could just as easily deal with negative constants here,
4374                    but it does not seem worth the trouble for GCC 2.6.  */
4375                 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4376                   {
4377                     pre_shift = floor_log2 (d);
4378                     if (rem_flag)
4379                       {
4380                         unsigned HOST_WIDE_INT mask
4381                           = ((unsigned HOST_WIDE_INT) 1 << pre_shift) - 1;
4382                         remainder = expand_binop
4383                           (compute_mode, and_optab, op0,
4384                            gen_int_mode (mask, compute_mode),
4385                            remainder, 0, OPTAB_LIB_WIDEN);
4386                         if (remainder)
4387                           return gen_lowpart (mode, remainder);
4388                       }
4389                     quotient = expand_shift
4390                       (RSHIFT_EXPR, compute_mode, op0,
4391                        pre_shift, tquotient, 0);
4392                   }
4393                 else
4394                   {
4395                     rtx t1, t2, t3, t4;
4396
4397                     mh = choose_multiplier (d, size, size - 1,
4398                                             &ml, &post_shift, &lgup);
4399                     gcc_assert (!mh);
4400
4401                     if (post_shift < BITS_PER_WORD
4402                         && size - 1 < BITS_PER_WORD)
4403                       {
4404                         t1 = expand_shift
4405                           (RSHIFT_EXPR, compute_mode, op0,
4406                            size - 1, NULL_RTX, 0);
4407                         t2 = expand_binop (compute_mode, xor_optab, op0, t1,
4408                                            NULL_RTX, 0, OPTAB_WIDEN);
4409                         extra_cost = (shift_cost (speed, compute_mode, post_shift)
4410                                       + shift_cost (speed, compute_mode, size - 1)
4411                                       + 2 * add_cost (speed, compute_mode));
4412                         t3 = expmed_mult_highpart
4413                           (compute_mode, t2, gen_int_mode (ml, compute_mode),
4414                            NULL_RTX, 1, max_cost - extra_cost);
4415                         if (t3 != 0)
4416                           {
4417                             t4 = expand_shift
4418                               (RSHIFT_EXPR, compute_mode, t3,
4419                                post_shift, NULL_RTX, 1);
4420                             quotient = expand_binop (compute_mode, xor_optab,
4421                                                      t4, t1, tquotient, 0,
4422                                                      OPTAB_WIDEN);
4423                           }
4424                       }
4425                   }
4426               }
4427             else
4428               {
4429                 rtx nsign, t1, t2, t3, t4;
4430                 t1 = force_operand (gen_rtx_PLUS (compute_mode,
4431                                                   op0, constm1_rtx), NULL_RTX);
4432                 t2 = expand_binop (compute_mode, ior_optab, op0, t1, NULL_RTX,
4433                                    0, OPTAB_WIDEN);
4434                 nsign = expand_shift
4435                   (RSHIFT_EXPR, compute_mode, t2,
4436                    size - 1, NULL_RTX, 0);
4437                 t3 = force_operand (gen_rtx_MINUS (compute_mode, t1, nsign),
4438                                     NULL_RTX);
4439                 t4 = expand_divmod (0, TRUNC_DIV_EXPR, compute_mode, t3, op1,
4440                                     NULL_RTX, 0);
4441                 if (t4)
4442                   {
4443                     rtx t5;
4444                     t5 = expand_unop (compute_mode, one_cmpl_optab, nsign,
4445                                       NULL_RTX, 0);
4446                     quotient = force_operand (gen_rtx_PLUS (compute_mode,
4447                                                             t4, t5),
4448                                               tquotient);
4449                   }
4450               }
4451           }
4452
4453         if (quotient != 0)
4454           break;
4455         delete_insns_since (last);
4456
4457         /* Try using an instruction that produces both the quotient and
4458            remainder, using truncation.  We can easily compensate the quotient
4459            or remainder to get floor rounding, once we have the remainder.
4460            Notice that we compute also the final remainder value here,
4461            and return the result right away.  */
4462         if (target == 0 || GET_MODE (target) != compute_mode)
4463           target = gen_reg_rtx (compute_mode);
4464
4465         if (rem_flag)
4466           {
4467             remainder
4468               = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4469             quotient = gen_reg_rtx (compute_mode);
4470           }
4471         else
4472           {
4473             quotient
4474               = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4475             remainder = gen_reg_rtx (compute_mode);
4476           }
4477
4478         if (expand_twoval_binop (sdivmod_optab, op0, op1,
4479                                  quotient, remainder, 0))
4480           {
4481             /* This could be computed with a branch-less sequence.
4482                Save that for later.  */
4483             rtx tem;
4484             rtx_code_label *label = gen_label_rtx ();
4485             do_cmp_and_jump (remainder, const0_rtx, EQ, compute_mode, label);
4486             tem = expand_binop (compute_mode, xor_optab, op0, op1,
4487                                 NULL_RTX, 0, OPTAB_WIDEN);
4488             do_cmp_and_jump (tem, const0_rtx, GE, compute_mode, label);
4489             expand_dec (quotient, const1_rtx);
4490             expand_inc (remainder, op1);
4491             emit_label (label);
4492             return gen_lowpart (mode, rem_flag ? remainder : quotient);
4493           }
4494
4495         /* No luck with division elimination or divmod.  Have to do it
4496            by conditionally adjusting op0 *and* the result.  */
4497         {
4498           rtx_code_label *label1, *label2, *label3, *label4, *label5;
4499           rtx adjusted_op0;
4500           rtx tem;
4501
4502           quotient = gen_reg_rtx (compute_mode);
4503           adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4504           label1 = gen_label_rtx ();
4505           label2 = gen_label_rtx ();
4506           label3 = gen_label_rtx ();
4507           label4 = gen_label_rtx ();
4508           label5 = gen_label_rtx ();
4509           do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4510           do_cmp_and_jump (adjusted_op0, const0_rtx, LT, compute_mode, label1);
4511           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4512                               quotient, 0, OPTAB_LIB_WIDEN);
4513           if (tem != quotient)
4514             emit_move_insn (quotient, tem);
4515           emit_jump_insn (gen_jump (label5));
4516           emit_barrier ();
4517           emit_label (label1);
4518           expand_inc (adjusted_op0, const1_rtx);
4519           emit_jump_insn (gen_jump (label4));
4520           emit_barrier ();
4521           emit_label (label2);
4522           do_cmp_and_jump (adjusted_op0, const0_rtx, GT, compute_mode, label3);
4523           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4524                               quotient, 0, OPTAB_LIB_WIDEN);
4525           if (tem != quotient)
4526             emit_move_insn (quotient, tem);
4527           emit_jump_insn (gen_jump (label5));
4528           emit_barrier ();
4529           emit_label (label3);
4530           expand_dec (adjusted_op0, const1_rtx);
4531           emit_label (label4);
4532           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4533                               quotient, 0, OPTAB_LIB_WIDEN);
4534           if (tem != quotient)
4535             emit_move_insn (quotient, tem);
4536           expand_dec (quotient, const1_rtx);
4537           emit_label (label5);
4538         }
4539         break;
4540
4541       case CEIL_DIV_EXPR:
4542       case CEIL_MOD_EXPR:
4543         if (unsignedp)
4544           {
4545             if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1)))
4546               {
4547                 rtx t1, t2, t3;
4548                 unsigned HOST_WIDE_INT d = INTVAL (op1);
4549                 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4550                                    floor_log2 (d), tquotient, 1);
4551                 t2 = expand_binop (compute_mode, and_optab, op0,
4552                                    gen_int_mode (d - 1, compute_mode),
4553                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4554                 t3 = gen_reg_rtx (compute_mode);
4555                 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4556                                       compute_mode, 1, 1);
4557                 if (t3 == 0)
4558                   {
4559                     rtx_code_label *lab;
4560                     lab = gen_label_rtx ();
4561                     do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4562                     expand_inc (t1, const1_rtx);
4563                     emit_label (lab);
4564                     quotient = t1;
4565                   }
4566                 else
4567                   quotient = force_operand (gen_rtx_PLUS (compute_mode,
4568                                                           t1, t3),
4569                                             tquotient);
4570                 break;
4571               }
4572
4573             /* Try using an instruction that produces both the quotient and
4574                remainder, using truncation.  We can easily compensate the
4575                quotient or remainder to get ceiling rounding, once we have the
4576                remainder.  Notice that we compute also the final remainder
4577                value here, and return the result right away.  */
4578             if (target == 0 || GET_MODE (target) != compute_mode)
4579               target = gen_reg_rtx (compute_mode);
4580
4581             if (rem_flag)
4582               {
4583                 remainder = (REG_P (target)
4584                              ? target : gen_reg_rtx (compute_mode));
4585                 quotient = gen_reg_rtx (compute_mode);
4586               }
4587             else
4588               {
4589                 quotient = (REG_P (target)
4590                             ? target : gen_reg_rtx (compute_mode));
4591                 remainder = gen_reg_rtx (compute_mode);
4592               }
4593
4594             if (expand_twoval_binop (udivmod_optab, op0, op1, quotient,
4595                                      remainder, 1))
4596               {
4597                 /* This could be computed with a branch-less sequence.
4598                    Save that for later.  */
4599                 rtx_code_label *label = gen_label_rtx ();
4600                 do_cmp_and_jump (remainder, const0_rtx, EQ,
4601                                  compute_mode, label);
4602                 expand_inc (quotient, const1_rtx);
4603                 expand_dec (remainder, op1);
4604                 emit_label (label);
4605                 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4606               }
4607
4608             /* No luck with division elimination or divmod.  Have to do it
4609                by conditionally adjusting op0 *and* the result.  */
4610             {
4611               rtx_code_label *label1, *label2;
4612               rtx adjusted_op0, tem;
4613
4614               quotient = gen_reg_rtx (compute_mode);
4615               adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4616               label1 = gen_label_rtx ();
4617               label2 = gen_label_rtx ();
4618               do_cmp_and_jump (adjusted_op0, const0_rtx, NE,
4619                                compute_mode, label1);
4620               emit_move_insn  (quotient, const0_rtx);
4621               emit_jump_insn (gen_jump (label2));
4622               emit_barrier ();
4623               emit_label (label1);
4624               expand_dec (adjusted_op0, const1_rtx);
4625               tem = expand_binop (compute_mode, udiv_optab, adjusted_op0, op1,
4626                                   quotient, 1, OPTAB_LIB_WIDEN);
4627               if (tem != quotient)
4628                 emit_move_insn (quotient, tem);
4629               expand_inc (quotient, const1_rtx);
4630               emit_label (label2);
4631             }
4632           }
4633         else /* signed */
4634           {
4635             if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4636                 && INTVAL (op1) >= 0)
4637               {
4638                 /* This is extremely similar to the code for the unsigned case
4639                    above.  For 2.7 we should merge these variants, but for
4640                    2.6.1 I don't want to touch the code for unsigned since that
4641                    get used in C.  The signed case will only be used by other
4642                    languages (Ada).  */
4643
4644                 rtx t1, t2, t3;
4645                 unsigned HOST_WIDE_INT d = INTVAL (op1);
4646                 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4647                                    floor_log2 (d), tquotient, 0);
4648                 t2 = expand_binop (compute_mode, and_optab, op0,
4649                                    gen_int_mode (d - 1, compute_mode),
4650                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4651                 t3 = gen_reg_rtx (compute_mode);
4652                 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4653                                       compute_mode, 1, 1);
4654                 if (t3 == 0)
4655                   {
4656                     rtx_code_label *lab;
4657                     lab = gen_label_rtx ();
4658                     do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4659                     expand_inc (t1, const1_rtx);
4660                     emit_label (lab);
4661                     quotient = t1;
4662                   }
4663                 else
4664                   quotient = force_operand (gen_rtx_PLUS (compute_mode,
4665                                                           t1, t3),
4666                                             tquotient);
4667                 break;
4668               }
4669
4670             /* Try using an instruction that produces both the quotient and
4671                remainder, using truncation.  We can easily compensate the
4672                quotient or remainder to get ceiling rounding, once we have the
4673                remainder.  Notice that we compute also the final remainder
4674                value here, and return the result right away.  */
4675             if (target == 0 || GET_MODE (target) != compute_mode)
4676               target = gen_reg_rtx (compute_mode);
4677             if (rem_flag)
4678               {
4679                 remainder= (REG_P (target)
4680                             ? target : gen_reg_rtx (compute_mode));
4681                 quotient = gen_reg_rtx (compute_mode);
4682               }
4683             else
4684               {
4685                 quotient = (REG_P (target)
4686                             ? target : gen_reg_rtx (compute_mode));
4687                 remainder = gen_reg_rtx (compute_mode);
4688               }
4689
4690             if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient,
4691                                      remainder, 0))
4692               {
4693                 /* This could be computed with a branch-less sequence.
4694                    Save that for later.  */
4695                 rtx tem;
4696                 rtx_code_label *label = gen_label_rtx ();
4697                 do_cmp_and_jump (remainder, const0_rtx, EQ,
4698                                  compute_mode, label);
4699                 tem = expand_binop (compute_mode, xor_optab, op0, op1,
4700                                     NULL_RTX, 0, OPTAB_WIDEN);
4701                 do_cmp_and_jump (tem, const0_rtx, LT, compute_mode, label);
4702                 expand_inc (quotient, const1_rtx);
4703                 expand_dec (remainder, op1);
4704                 emit_label (label);
4705                 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4706               }
4707
4708             /* No luck with division elimination or divmod.  Have to do it
4709                by conditionally adjusting op0 *and* the result.  */
4710             {
4711               rtx_code_label *label1, *label2, *label3, *label4, *label5;
4712               rtx adjusted_op0;
4713               rtx tem;
4714
4715               quotient = gen_reg_rtx (compute_mode);
4716               adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4717               label1 = gen_label_rtx ();
4718               label2 = gen_label_rtx ();
4719               label3 = gen_label_rtx ();
4720               label4 = gen_label_rtx ();
4721               label5 = gen_label_rtx ();
4722               do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4723               do_cmp_and_jump (adjusted_op0, const0_rtx, GT,
4724                                compute_mode, label1);
4725               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4726                                   quotient, 0, OPTAB_LIB_WIDEN);
4727               if (tem != quotient)
4728                 emit_move_insn (quotient, tem);
4729               emit_jump_insn (gen_jump (label5));
4730               emit_barrier ();
4731               emit_label (label1);
4732               expand_dec (adjusted_op0, const1_rtx);
4733               emit_jump_insn (gen_jump (label4));
4734               emit_barrier ();
4735               emit_label (label2);
4736               do_cmp_and_jump (adjusted_op0, const0_rtx, LT,
4737                                compute_mode, label3);
4738               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4739                                   quotient, 0, OPTAB_LIB_WIDEN);
4740               if (tem != quotient)
4741                 emit_move_insn (quotient, tem);
4742               emit_jump_insn (gen_jump (label5));
4743               emit_barrier ();
4744               emit_label (label3);
4745               expand_inc (adjusted_op0, const1_rtx);
4746               emit_label (label4);
4747               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4748                                   quotient, 0, OPTAB_LIB_WIDEN);
4749               if (tem != quotient)
4750                 emit_move_insn (quotient, tem);
4751               expand_inc (quotient, const1_rtx);
4752               emit_label (label5);
4753             }
4754           }
4755         break;
4756
4757       case EXACT_DIV_EXPR:
4758         if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4759           {
4760             HOST_WIDE_INT d = INTVAL (op1);
4761             unsigned HOST_WIDE_INT ml;
4762             int pre_shift;
4763             rtx t1;
4764
4765             pre_shift = floor_log2 (d & -d);
4766             ml = invert_mod2n (d >> pre_shift, size);
4767             t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4768                                pre_shift, NULL_RTX, unsignedp);
4769             quotient = expand_mult (compute_mode, t1,
4770                                     gen_int_mode (ml, compute_mode),
4771                                     NULL_RTX, 1);
4772
4773             insn = get_last_insn ();
4774             set_dst_reg_note (insn, REG_EQUAL,
4775                               gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
4776                                               compute_mode, op0, op1),
4777                               quotient);
4778           }
4779         break;
4780
4781       case ROUND_DIV_EXPR:
4782       case ROUND_MOD_EXPR:
4783         if (unsignedp)
4784           {
4785             rtx tem;
4786             rtx_code_label *label;
4787             label = gen_label_rtx ();
4788             quotient = gen_reg_rtx (compute_mode);
4789             remainder = gen_reg_rtx (compute_mode);
4790             if (expand_twoval_binop (udivmod_optab, op0, op1, quotient, remainder, 1) == 0)
4791               {
4792                 rtx tem;
4793                 quotient = expand_binop (compute_mode, udiv_optab, op0, op1,
4794                                          quotient, 1, OPTAB_LIB_WIDEN);
4795                 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 1);
4796                 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4797                                           remainder, 1, OPTAB_LIB_WIDEN);
4798               }
4799             tem = plus_constant (compute_mode, op1, -1);
4800             tem = expand_shift (RSHIFT_EXPR, compute_mode, tem, 1, NULL_RTX, 1);
4801             do_cmp_and_jump (remainder, tem, LEU, compute_mode, label);
4802             expand_inc (quotient, const1_rtx);
4803             expand_dec (remainder, op1);
4804             emit_label (label);
4805           }
4806         else
4807           {
4808             rtx abs_rem, abs_op1, tem, mask;
4809             rtx_code_label *label;
4810             label = gen_label_rtx ();
4811             quotient = gen_reg_rtx (compute_mode);
4812             remainder = gen_reg_rtx (compute_mode);
4813             if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient, remainder, 0) == 0)
4814               {
4815                 rtx tem;
4816                 quotient = expand_binop (compute_mode, sdiv_optab, op0, op1,
4817                                          quotient, 0, OPTAB_LIB_WIDEN);
4818                 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 0);
4819                 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4820                                           remainder, 0, OPTAB_LIB_WIDEN);
4821               }
4822             abs_rem = expand_abs (compute_mode, remainder, NULL_RTX, 1, 0);
4823             abs_op1 = expand_abs (compute_mode, op1, NULL_RTX, 1, 0);
4824             tem = expand_shift (LSHIFT_EXPR, compute_mode, abs_rem,
4825                                 1, NULL_RTX, 1);
4826             do_cmp_and_jump (tem, abs_op1, LTU, compute_mode, label);
4827             tem = expand_binop (compute_mode, xor_optab, op0, op1,
4828                                 NULL_RTX, 0, OPTAB_WIDEN);
4829             mask = expand_shift (RSHIFT_EXPR, compute_mode, tem,
4830                                  size - 1, NULL_RTX, 0);
4831             tem = expand_binop (compute_mode, xor_optab, mask, const1_rtx,
4832                                 NULL_RTX, 0, OPTAB_WIDEN);
4833             tem = expand_binop (compute_mode, sub_optab, tem, mask,
4834                                 NULL_RTX, 0, OPTAB_WIDEN);
4835             expand_inc (quotient, tem);
4836             tem = expand_binop (compute_mode, xor_optab, mask, op1,
4837                                 NULL_RTX, 0, OPTAB_WIDEN);
4838             tem = expand_binop (compute_mode, sub_optab, tem, mask,
4839                                 NULL_RTX, 0, OPTAB_WIDEN);
4840             expand_dec (remainder, tem);
4841             emit_label (label);
4842           }
4843         return gen_lowpart (mode, rem_flag ? remainder : quotient);
4844
4845       default:
4846         gcc_unreachable ();
4847       }
4848
4849   if (quotient == 0)
4850     {
4851       if (target && GET_MODE (target) != compute_mode)
4852         target = 0;
4853
4854       if (rem_flag)
4855         {
4856           /* Try to produce the remainder without producing the quotient.
4857              If we seem to have a divmod pattern that does not require widening,
4858              don't try widening here.  We should really have a WIDEN argument
4859              to expand_twoval_binop, since what we'd really like to do here is
4860              1) try a mod insn in compute_mode
4861              2) try a divmod insn in compute_mode
4862              3) try a div insn in compute_mode and multiply-subtract to get
4863                 remainder
4864              4) try the same things with widening allowed.  */
4865           remainder
4866             = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4867                                  op0, op1, target,
4868                                  unsignedp,
4869                                  ((optab_handler (optab2, compute_mode)
4870                                    != CODE_FOR_nothing)
4871                                   ? OPTAB_DIRECT : OPTAB_WIDEN));
4872           if (remainder == 0)
4873             {
4874               /* No luck there.  Can we do remainder and divide at once
4875                  without a library call?  */
4876               remainder = gen_reg_rtx (compute_mode);
4877               if (! expand_twoval_binop ((unsignedp
4878                                           ? udivmod_optab
4879                                           : sdivmod_optab),
4880                                          op0, op1,
4881                                          NULL_RTX, remainder, unsignedp))
4882                 remainder = 0;
4883             }
4884
4885           if (remainder)
4886             return gen_lowpart (mode, remainder);
4887         }
4888
4889       /* Produce the quotient.  Try a quotient insn, but not a library call.
4890          If we have a divmod in this mode, use it in preference to widening
4891          the div (for this test we assume it will not fail). Note that optab2
4892          is set to the one of the two optabs that the call below will use.  */
4893       quotient
4894         = sign_expand_binop (compute_mode, udiv_optab, sdiv_optab,
4895                              op0, op1, rem_flag ? NULL_RTX : target,
4896                              unsignedp,
4897                              ((optab_handler (optab2, compute_mode)
4898                                != CODE_FOR_nothing)
4899                               ? OPTAB_DIRECT : OPTAB_WIDEN));
4900
4901       if (quotient == 0)
4902         {
4903           /* No luck there.  Try a quotient-and-remainder insn,
4904              keeping the quotient alone.  */
4905           quotient = gen_reg_rtx (compute_mode);
4906           if (! expand_twoval_binop (unsignedp ? udivmod_optab : sdivmod_optab,
4907                                      op0, op1,
4908                                      quotient, NULL_RTX, unsignedp))
4909             {
4910               quotient = 0;
4911               if (! rem_flag)
4912                 /* Still no luck.  If we are not computing the remainder,
4913                    use a library call for the quotient.  */
4914                 quotient = sign_expand_binop (compute_mode,
4915                                               udiv_optab, sdiv_optab,
4916                                               op0, op1, target,
4917                                               unsignedp, OPTAB_LIB_WIDEN);
4918             }
4919         }
4920     }
4921
4922   if (rem_flag)
4923     {
4924       if (target && GET_MODE (target) != compute_mode)
4925         target = 0;
4926
4927       if (quotient == 0)
4928         {
4929           /* No divide instruction either.  Use library for remainder.  */
4930           remainder = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4931                                          op0, op1, target,
4932                                          unsignedp, OPTAB_LIB_WIDEN);
4933           /* No remainder function.  Try a quotient-and-remainder
4934              function, keeping the remainder.  */
4935           if (!remainder)
4936             {
4937               remainder = gen_reg_rtx (compute_mode);
4938               if (!expand_twoval_binop_libfunc
4939                   (unsignedp ? udivmod_optab : sdivmod_optab,
4940                    op0, op1,
4941                    NULL_RTX, remainder,
4942                    unsignedp ? UMOD : MOD))
4943                 remainder = NULL_RTX;
4944             }
4945         }
4946       else
4947         {
4948           /* We divided.  Now finish doing X - Y * (X / Y).  */
4949           remainder = expand_mult (compute_mode, quotient, op1,
4950                                    NULL_RTX, unsignedp);
4951           remainder = expand_binop (compute_mode, sub_optab, op0,
4952                                     remainder, target, unsignedp,
4953                                     OPTAB_LIB_WIDEN);
4954         }
4955     }
4956
4957   return gen_lowpart (mode, rem_flag ? remainder : quotient);
4958 }
4959 \f
4960 /* Return a tree node with data type TYPE, describing the value of X.
4961    Usually this is an VAR_DECL, if there is no obvious better choice.
4962    X may be an expression, however we only support those expressions
4963    generated by loop.c.  */
4964
4965 tree
4966 make_tree (tree type, rtx x)
4967 {
4968   tree t;
4969
4970   switch (GET_CODE (x))
4971     {
4972     case CONST_INT:
4973     case CONST_WIDE_INT:
4974       t = wide_int_to_tree (type, std::make_pair (x, TYPE_MODE (type)));
4975       return t;
4976
4977     case CONST_DOUBLE:
4978       STATIC_ASSERT (HOST_BITS_PER_WIDE_INT * 2 <= MAX_BITSIZE_MODE_ANY_INT);
4979       if (TARGET_SUPPORTS_WIDE_INT == 0 && GET_MODE (x) == VOIDmode)
4980         t = wide_int_to_tree (type,
4981                               wide_int::from_array (&CONST_DOUBLE_LOW (x), 2,
4982                                                     HOST_BITS_PER_WIDE_INT * 2));
4983       else
4984         {
4985           REAL_VALUE_TYPE d;
4986
4987           REAL_VALUE_FROM_CONST_DOUBLE (d, x);
4988           t = build_real (type, d);
4989         }
4990
4991       return t;
4992
4993     case CONST_VECTOR:
4994       {
4995         int units = CONST_VECTOR_NUNITS (x);
4996         tree itype = TREE_TYPE (type);
4997         tree *elts;
4998         int i;
4999
5000         /* Build a tree with vector elements.  */
5001         elts = XALLOCAVEC (tree, units);
5002         for (i = units - 1; i >= 0; --i)
5003           {
5004             rtx elt = CONST_VECTOR_ELT (x, i);
5005             elts[i] = make_tree (itype, elt);
5006           }
5007
5008         return build_vector (type, elts);
5009       }
5010
5011     case PLUS:
5012       return fold_build2 (PLUS_EXPR, type, make_tree (type, XEXP (x, 0)),
5013                           make_tree (type, XEXP (x, 1)));
5014
5015     case MINUS:
5016       return fold_build2 (MINUS_EXPR, type, make_tree (type, XEXP (x, 0)),
5017                           make_tree (type, XEXP (x, 1)));
5018
5019     case NEG:
5020       return fold_build1 (NEGATE_EXPR, type, make_tree (type, XEXP (x, 0)));
5021
5022     case MULT:
5023       return fold_build2 (MULT_EXPR, type, make_tree (type, XEXP (x, 0)),
5024                           make_tree (type, XEXP (x, 1)));
5025
5026     case ASHIFT:
5027       return fold_build2 (LSHIFT_EXPR, type, make_tree (type, XEXP (x, 0)),
5028                           make_tree (type, XEXP (x, 1)));
5029
5030     case LSHIFTRT:
5031       t = unsigned_type_for (type);
5032       return fold_convert (type, build2 (RSHIFT_EXPR, t,
5033                                          make_tree (t, XEXP (x, 0)),
5034                                          make_tree (type, XEXP (x, 1))));
5035
5036     case ASHIFTRT:
5037       t = signed_type_for (type);
5038       return fold_convert (type, build2 (RSHIFT_EXPR, t,
5039                                          make_tree (t, XEXP (x, 0)),
5040                                          make_tree (type, XEXP (x, 1))));
5041
5042     case DIV:
5043       if (TREE_CODE (type) != REAL_TYPE)
5044         t = signed_type_for (type);
5045       else
5046         t = type;
5047
5048       return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5049                                          make_tree (t, XEXP (x, 0)),
5050                                          make_tree (t, XEXP (x, 1))));
5051     case UDIV:
5052       t = unsigned_type_for (type);
5053       return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5054                                          make_tree (t, XEXP (x, 0)),
5055                                          make_tree (t, XEXP (x, 1))));
5056
5057     case SIGN_EXTEND:
5058     case ZERO_EXTEND:
5059       t = lang_hooks.types.type_for_mode (GET_MODE (XEXP (x, 0)),
5060                                           GET_CODE (x) == ZERO_EXTEND);
5061       return fold_convert (type, make_tree (t, XEXP (x, 0)));
5062
5063     case CONST:
5064       return make_tree (type, XEXP (x, 0));
5065
5066     case SYMBOL_REF:
5067       t = SYMBOL_REF_DECL (x);
5068       if (t)
5069         return fold_convert (type, build_fold_addr_expr (t));
5070       /* else fall through.  */
5071
5072     default:
5073       t = build_decl (RTL_LOCATION (x), VAR_DECL, NULL_TREE, type);
5074
5075       /* If TYPE is a POINTER_TYPE, we might need to convert X from
5076          address mode to pointer mode.  */
5077       if (POINTER_TYPE_P (type))
5078         x = convert_memory_address_addr_space
5079               (TYPE_MODE (type), x, TYPE_ADDR_SPACE (TREE_TYPE (type)));
5080
5081       /* Note that we do *not* use SET_DECL_RTL here, because we do not
5082          want set_decl_rtl to go adjusting REG_ATTRS for this temporary.  */
5083       t->decl_with_rtl.rtl = x;
5084
5085       return t;
5086     }
5087 }
5088 \f
5089 /* Compute the logical-and of OP0 and OP1, storing it in TARGET
5090    and returning TARGET.
5091
5092    If TARGET is 0, a pseudo-register or constant is returned.  */
5093
5094 rtx
5095 expand_and (machine_mode mode, rtx op0, rtx op1, rtx target)
5096 {
5097   rtx tem = 0;
5098
5099   if (GET_MODE (op0) == VOIDmode && GET_MODE (op1) == VOIDmode)
5100     tem = simplify_binary_operation (AND, mode, op0, op1);
5101   if (tem == 0)
5102     tem = expand_binop (mode, and_optab, op0, op1, target, 0, OPTAB_LIB_WIDEN);
5103
5104   if (target == 0)
5105     target = tem;
5106   else if (tem != target)
5107     emit_move_insn (target, tem);
5108   return target;
5109 }
5110
5111 /* Helper function for emit_store_flag.  */
5112 rtx
5113 emit_cstore (rtx target, enum insn_code icode, enum rtx_code code,
5114              machine_mode mode, machine_mode compare_mode,
5115              int unsignedp, rtx x, rtx y, int normalizep,
5116              machine_mode target_mode)
5117 {
5118   struct expand_operand ops[4];
5119   rtx op0, comparison, subtarget;
5120   rtx_insn *last;
5121   machine_mode result_mode = targetm.cstore_mode (icode);
5122
5123   last = get_last_insn ();
5124   x = prepare_operand (icode, x, 2, mode, compare_mode, unsignedp);
5125   y = prepare_operand (icode, y, 3, mode, compare_mode, unsignedp);
5126   if (!x || !y)
5127     {
5128       delete_insns_since (last);
5129       return NULL_RTX;
5130     }
5131
5132   if (target_mode == VOIDmode)
5133     target_mode = result_mode;
5134   if (!target)
5135     target = gen_reg_rtx (target_mode);
5136
5137   comparison = gen_rtx_fmt_ee (code, result_mode, x, y);
5138
5139   create_output_operand (&ops[0], optimize ? NULL_RTX : target, result_mode);
5140   create_fixed_operand (&ops[1], comparison);
5141   create_fixed_operand (&ops[2], x);
5142   create_fixed_operand (&ops[3], y);
5143   if (!maybe_expand_insn (icode, 4, ops))
5144     {
5145       delete_insns_since (last);
5146       return NULL_RTX;
5147     }
5148   subtarget = ops[0].value;
5149
5150   /* If we are converting to a wider mode, first convert to
5151      TARGET_MODE, then normalize.  This produces better combining
5152      opportunities on machines that have a SIGN_EXTRACT when we are
5153      testing a single bit.  This mostly benefits the 68k.
5154
5155      If STORE_FLAG_VALUE does not have the sign bit set when
5156      interpreted in MODE, we can do this conversion as unsigned, which
5157      is usually more efficient.  */
5158   if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (result_mode))
5159     {
5160       convert_move (target, subtarget,
5161                     val_signbit_known_clear_p (result_mode,
5162                                                STORE_FLAG_VALUE));
5163       op0 = target;
5164       result_mode = target_mode;
5165     }
5166   else
5167     op0 = subtarget;
5168
5169   /* If we want to keep subexpressions around, don't reuse our last
5170      target.  */
5171   if (optimize)
5172     subtarget = 0;
5173
5174   /* Now normalize to the proper value in MODE.  Sometimes we don't
5175      have to do anything.  */
5176   if (normalizep == 0 || normalizep == STORE_FLAG_VALUE)
5177     ;
5178   /* STORE_FLAG_VALUE might be the most negative number, so write
5179      the comparison this way to avoid a compiler-time warning.  */
5180   else if (- normalizep == STORE_FLAG_VALUE)
5181     op0 = expand_unop (result_mode, neg_optab, op0, subtarget, 0);
5182
5183   /* We don't want to use STORE_FLAG_VALUE < 0 below since this makes
5184      it hard to use a value of just the sign bit due to ANSI integer
5185      constant typing rules.  */
5186   else if (val_signbit_known_set_p (result_mode, STORE_FLAG_VALUE))
5187     op0 = expand_shift (RSHIFT_EXPR, result_mode, op0,
5188                         GET_MODE_BITSIZE (result_mode) - 1, subtarget,
5189                         normalizep == 1);
5190   else
5191     {
5192       gcc_assert (STORE_FLAG_VALUE & 1);
5193
5194       op0 = expand_and (result_mode, op0, const1_rtx, subtarget);
5195       if (normalizep == -1)
5196         op0 = expand_unop (result_mode, neg_optab, op0, op0, 0);
5197     }
5198
5199   /* If we were converting to a smaller mode, do the conversion now.  */
5200   if (target_mode != result_mode)
5201     {
5202       convert_move (target, op0, 0);
5203       return target;
5204     }
5205   else
5206     return op0;
5207 }
5208
5209
5210 /* A subroutine of emit_store_flag only including "tricks" that do not
5211    need a recursive call.  These are kept separate to avoid infinite
5212    loops.  */
5213
5214 static rtx
5215 emit_store_flag_1 (rtx target, enum rtx_code code, rtx op0, rtx op1,
5216                    machine_mode mode, int unsignedp, int normalizep,
5217                    machine_mode target_mode)
5218 {
5219   rtx subtarget;
5220   enum insn_code icode;
5221   machine_mode compare_mode;
5222   enum mode_class mclass;
5223   enum rtx_code scode;
5224   rtx tem;
5225
5226   if (unsignedp)
5227     code = unsigned_condition (code);
5228   scode = swap_condition (code);
5229
5230   /* If one operand is constant, make it the second one.  Only do this
5231      if the other operand is not constant as well.  */
5232
5233   if (swap_commutative_operands_p (op0, op1))
5234     {
5235       tem = op0;
5236       op0 = op1;
5237       op1 = tem;
5238       code = swap_condition (code);
5239     }
5240
5241   if (mode == VOIDmode)
5242     mode = GET_MODE (op0);
5243
5244   /* For some comparisons with 1 and -1, we can convert this to
5245      comparisons with zero.  This will often produce more opportunities for
5246      store-flag insns.  */
5247
5248   switch (code)
5249     {
5250     case LT:
5251       if (op1 == const1_rtx)
5252         op1 = const0_rtx, code = LE;
5253       break;
5254     case LE:
5255       if (op1 == constm1_rtx)
5256         op1 = const0_rtx, code = LT;
5257       break;
5258     case GE:
5259       if (op1 == const1_rtx)
5260         op1 = const0_rtx, code = GT;
5261       break;
5262     case GT:
5263       if (op1 == constm1_rtx)
5264         op1 = const0_rtx, code = GE;
5265       break;
5266     case GEU:
5267       if (op1 == const1_rtx)
5268         op1 = const0_rtx, code = NE;
5269       break;
5270     case LTU:
5271       if (op1 == const1_rtx)
5272         op1 = const0_rtx, code = EQ;
5273       break;
5274     default:
5275       break;
5276     }
5277
5278   /* If we are comparing a double-word integer with zero or -1, we can
5279      convert the comparison into one involving a single word.  */
5280   if (GET_MODE_BITSIZE (mode) == BITS_PER_WORD * 2
5281       && GET_MODE_CLASS (mode) == MODE_INT
5282       && (!MEM_P (op0) || ! MEM_VOLATILE_P (op0)))
5283     {
5284       if ((code == EQ || code == NE)
5285           && (op1 == const0_rtx || op1 == constm1_rtx))
5286         {
5287           rtx op00, op01;
5288
5289           /* Do a logical OR or AND of the two words and compare the
5290              result.  */
5291           op00 = simplify_gen_subreg (word_mode, op0, mode, 0);
5292           op01 = simplify_gen_subreg (word_mode, op0, mode, UNITS_PER_WORD);
5293           tem = expand_binop (word_mode,
5294                               op1 == const0_rtx ? ior_optab : and_optab,
5295                               op00, op01, NULL_RTX, unsignedp,
5296                               OPTAB_DIRECT);
5297
5298           if (tem != 0)
5299             tem = emit_store_flag (NULL_RTX, code, tem, op1, word_mode,
5300                                    unsignedp, normalizep);
5301         }
5302       else if ((code == LT || code == GE) && op1 == const0_rtx)
5303         {
5304           rtx op0h;
5305
5306           /* If testing the sign bit, can just test on high word.  */
5307           op0h = simplify_gen_subreg (word_mode, op0, mode,
5308                                       subreg_highpart_offset (word_mode,
5309                                                               mode));
5310           tem = emit_store_flag (NULL_RTX, code, op0h, op1, word_mode,
5311                                  unsignedp, normalizep);
5312         }
5313       else
5314         tem = NULL_RTX;
5315
5316       if (tem)
5317         {
5318           if (target_mode == VOIDmode || GET_MODE (tem) == target_mode)
5319             return tem;
5320           if (!target)
5321             target = gen_reg_rtx (target_mode);
5322
5323           convert_move (target, tem,
5324                         !val_signbit_known_set_p (word_mode,
5325                                                   (normalizep ? normalizep
5326                                                    : STORE_FLAG_VALUE)));
5327           return target;
5328         }
5329     }
5330
5331   /* If this is A < 0 or A >= 0, we can do this by taking the ones
5332      complement of A (for GE) and shifting the sign bit to the low bit.  */
5333   if (op1 == const0_rtx && (code == LT || code == GE)
5334       && GET_MODE_CLASS (mode) == MODE_INT
5335       && (normalizep || STORE_FLAG_VALUE == 1
5336           || val_signbit_p (mode, STORE_FLAG_VALUE)))
5337     {
5338       subtarget = target;
5339
5340       if (!target)
5341         target_mode = mode;
5342
5343       /* If the result is to be wider than OP0, it is best to convert it
5344          first.  If it is to be narrower, it is *incorrect* to convert it
5345          first.  */
5346       else if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (mode))
5347         {
5348           op0 = convert_modes (target_mode, mode, op0, 0);
5349           mode = target_mode;
5350         }
5351
5352       if (target_mode != mode)
5353         subtarget = 0;
5354
5355       if (code == GE)
5356         op0 = expand_unop (mode, one_cmpl_optab, op0,
5357                            ((STORE_FLAG_VALUE == 1 || normalizep)
5358                             ? 0 : subtarget), 0);
5359
5360       if (STORE_FLAG_VALUE == 1 || normalizep)
5361         /* If we are supposed to produce a 0/1 value, we want to do
5362            a logical shift from the sign bit to the low-order bit; for
5363            a -1/0 value, we do an arithmetic shift.  */
5364         op0 = expand_shift (RSHIFT_EXPR, mode, op0,
5365                             GET_MODE_BITSIZE (mode) - 1,
5366                             subtarget, normalizep != -1);
5367
5368       if (mode != target_mode)
5369         op0 = convert_modes (target_mode, mode, op0, 0);
5370
5371       return op0;
5372     }
5373
5374   mclass = GET_MODE_CLASS (mode);
5375   for (compare_mode = mode; compare_mode != VOIDmode;
5376        compare_mode = GET_MODE_WIDER_MODE (compare_mode))
5377     {
5378      machine_mode optab_mode = mclass == MODE_CC ? CCmode : compare_mode;
5379      icode = optab_handler (cstore_optab, optab_mode);
5380      if (icode != CODE_FOR_nothing)
5381         {
5382           do_pending_stack_adjust ();
5383           tem = emit_cstore (target, icode, code, mode, compare_mode,
5384                              unsignedp, op0, op1, normalizep, target_mode);
5385           if (tem)
5386             return tem;
5387
5388           if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5389             {
5390               tem = emit_cstore (target, icode, scode, mode, compare_mode,
5391                                  unsignedp, op1, op0, normalizep, target_mode);
5392               if (tem)
5393                 return tem;
5394             }
5395           break;
5396         }
5397     }
5398
5399   return 0;
5400 }
5401
5402 /* Emit a store-flags instruction for comparison CODE on OP0 and OP1
5403    and storing in TARGET.  Normally return TARGET.
5404    Return 0 if that cannot be done.
5405
5406    MODE is the mode to use for OP0 and OP1 should they be CONST_INTs.  If
5407    it is VOIDmode, they cannot both be CONST_INT.
5408
5409    UNSIGNEDP is for the case where we have to widen the operands
5410    to perform the operation.  It says to use zero-extension.
5411
5412    NORMALIZEP is 1 if we should convert the result to be either zero
5413    or one.  Normalize is -1 if we should convert the result to be
5414    either zero or -1.  If NORMALIZEP is zero, the result will be left
5415    "raw" out of the scc insn.  */
5416
5417 rtx
5418 emit_store_flag (rtx target, enum rtx_code code, rtx op0, rtx op1,
5419                  machine_mode mode, int unsignedp, int normalizep)
5420 {
5421   machine_mode target_mode = target ? GET_MODE (target) : VOIDmode;
5422   enum rtx_code rcode;
5423   rtx subtarget;
5424   rtx tem, trueval;
5425   rtx_insn *last;
5426
5427   /* If we compare constants, we shouldn't use a store-flag operation,
5428      but a constant load.  We can get there via the vanilla route that
5429      usually generates a compare-branch sequence, but will in this case
5430      fold the comparison to a constant, and thus elide the branch.  */
5431   if (CONSTANT_P (op0) && CONSTANT_P (op1))
5432     return NULL_RTX;
5433
5434   tem = emit_store_flag_1 (target, code, op0, op1, mode, unsignedp, normalizep,
5435                            target_mode);
5436   if (tem)
5437     return tem;
5438
5439   /* If we reached here, we can't do this with a scc insn, however there
5440      are some comparisons that can be done in other ways.  Don't do any
5441      of these cases if branches are very cheap.  */
5442   if (BRANCH_COST (optimize_insn_for_speed_p (), false) == 0)
5443     return 0;
5444
5445   /* See what we need to return.  We can only return a 1, -1, or the
5446      sign bit.  */
5447
5448   if (normalizep == 0)
5449     {
5450       if (STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1)
5451         normalizep = STORE_FLAG_VALUE;
5452
5453       else if (val_signbit_p (mode, STORE_FLAG_VALUE))
5454         ;
5455       else
5456         return 0;
5457     }
5458
5459   last = get_last_insn ();
5460
5461   /* If optimizing, use different pseudo registers for each insn, instead
5462      of reusing the same pseudo.  This leads to better CSE, but slows
5463      down the compiler, since there are more pseudos */
5464   subtarget = (!optimize
5465                && (target_mode == mode)) ? target : NULL_RTX;
5466   trueval = GEN_INT (normalizep ? normalizep : STORE_FLAG_VALUE);
5467
5468   /* For floating-point comparisons, try the reverse comparison or try
5469      changing the "orderedness" of the comparison.  */
5470   if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5471     {
5472       enum rtx_code first_code;
5473       bool and_them;
5474
5475       rcode = reverse_condition_maybe_unordered (code);
5476       if (can_compare_p (rcode, mode, ccp_store_flag)
5477           && (code == ORDERED || code == UNORDERED
5478               || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
5479               || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
5480         {
5481           int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5482                           || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5483
5484           /* For the reverse comparison, use either an addition or a XOR.  */
5485           if (want_add
5486               && rtx_cost (GEN_INT (normalizep), PLUS, 1,
5487                            optimize_insn_for_speed_p ()) == 0)
5488             {
5489               tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5490                                        STORE_FLAG_VALUE, target_mode);
5491               if (tem)
5492                 return expand_binop (target_mode, add_optab, tem,
5493                                      gen_int_mode (normalizep, target_mode),
5494                                      target, 0, OPTAB_WIDEN);
5495             }
5496           else if (!want_add
5497                    && rtx_cost (trueval, XOR, 1,
5498                                 optimize_insn_for_speed_p ()) == 0)
5499             {
5500               tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5501                                        normalizep, target_mode);
5502               if (tem)
5503                 return expand_binop (target_mode, xor_optab, tem, trueval,
5504                                      target, INTVAL (trueval) >= 0, OPTAB_WIDEN);
5505             }
5506         }
5507
5508       delete_insns_since (last);
5509
5510       /* Cannot split ORDERED and UNORDERED, only try the above trick.   */
5511       if (code == ORDERED || code == UNORDERED)
5512         return 0;
5513
5514       and_them = split_comparison (code, mode, &first_code, &code);
5515
5516       /* If there are no NaNs, the first comparison should always fall through.
5517          Effectively change the comparison to the other one.  */
5518       if (!HONOR_NANS (mode))
5519         {
5520           gcc_assert (first_code == (and_them ? ORDERED : UNORDERED));
5521           return emit_store_flag_1 (target, code, op0, op1, mode, 0, normalizep,
5522                                     target_mode);
5523         }
5524
5525 #ifdef HAVE_conditional_move
5526       /* Try using a setcc instruction for ORDERED/UNORDERED, followed by a
5527          conditional move.  */
5528       tem = emit_store_flag_1 (subtarget, first_code, op0, op1, mode, 0,
5529                                normalizep, target_mode);
5530       if (tem == 0)
5531         return 0;
5532
5533       if (and_them)
5534         tem = emit_conditional_move (target, code, op0, op1, mode,
5535                                      tem, const0_rtx, GET_MODE (tem), 0);
5536       else
5537         tem = emit_conditional_move (target, code, op0, op1, mode,
5538                                      trueval, tem, GET_MODE (tem), 0);
5539
5540       if (tem == 0)
5541         delete_insns_since (last);
5542       return tem;
5543 #else
5544       return 0;
5545 #endif
5546     }
5547
5548   /* The remaining tricks only apply to integer comparisons.  */
5549
5550   if (GET_MODE_CLASS (mode) != MODE_INT)
5551     return 0;
5552
5553   /* If this is an equality comparison of integers, we can try to exclusive-or
5554      (or subtract) the two operands and use a recursive call to try the
5555      comparison with zero.  Don't do any of these cases if branches are
5556      very cheap.  */
5557
5558   if ((code == EQ || code == NE) && op1 != const0_rtx)
5559     {
5560       tem = expand_binop (mode, xor_optab, op0, op1, subtarget, 1,
5561                           OPTAB_WIDEN);
5562
5563       if (tem == 0)
5564         tem = expand_binop (mode, sub_optab, op0, op1, subtarget, 1,
5565                             OPTAB_WIDEN);
5566       if (tem != 0)
5567         tem = emit_store_flag (target, code, tem, const0_rtx,
5568                                mode, unsignedp, normalizep);
5569       if (tem != 0)
5570         return tem;
5571
5572       delete_insns_since (last);
5573     }
5574
5575   /* For integer comparisons, try the reverse comparison.  However, for
5576      small X and if we'd have anyway to extend, implementing "X != 0"
5577      as "-(int)X >> 31" is still cheaper than inverting "(int)X == 0".  */
5578   rcode = reverse_condition (code);
5579   if (can_compare_p (rcode, mode, ccp_store_flag)
5580       && ! (optab_handler (cstore_optab, mode) == CODE_FOR_nothing
5581             && code == NE
5582             && GET_MODE_SIZE (mode) < UNITS_PER_WORD
5583             && op1 == const0_rtx))
5584     {
5585       int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5586                       || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5587
5588       /* Again, for the reverse comparison, use either an addition or a XOR.  */
5589       if (want_add
5590           && rtx_cost (GEN_INT (normalizep), PLUS, 1,
5591                        optimize_insn_for_speed_p ()) == 0)
5592         {
5593           tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5594                                    STORE_FLAG_VALUE, target_mode);
5595           if (tem != 0)
5596             tem = expand_binop (target_mode, add_optab, tem,
5597                                 gen_int_mode (normalizep, target_mode),
5598                                 target, 0, OPTAB_WIDEN);
5599         }
5600       else if (!want_add
5601                && rtx_cost (trueval, XOR, 1,
5602                             optimize_insn_for_speed_p ()) == 0)
5603         {
5604           tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5605                                    normalizep, target_mode);
5606           if (tem != 0)
5607             tem = expand_binop (target_mode, xor_optab, tem, trueval, target,
5608                                 INTVAL (trueval) >= 0, OPTAB_WIDEN);
5609         }
5610
5611       if (tem != 0)
5612         return tem;
5613       delete_insns_since (last);
5614     }
5615
5616   /* Some other cases we can do are EQ, NE, LE, and GT comparisons with
5617      the constant zero.  Reject all other comparisons at this point.  Only
5618      do LE and GT if branches are expensive since they are expensive on
5619      2-operand machines.  */
5620
5621   if (op1 != const0_rtx
5622       || (code != EQ && code != NE
5623           && (BRANCH_COST (optimize_insn_for_speed_p (),
5624                            false) <= 1 || (code != LE && code != GT))))
5625     return 0;
5626
5627   /* Try to put the result of the comparison in the sign bit.  Assume we can't
5628      do the necessary operation below.  */
5629
5630   tem = 0;
5631
5632   /* To see if A <= 0, compute (A | (A - 1)).  A <= 0 iff that result has
5633      the sign bit set.  */
5634
5635   if (code == LE)
5636     {
5637       /* This is destructive, so SUBTARGET can't be OP0.  */
5638       if (rtx_equal_p (subtarget, op0))
5639         subtarget = 0;
5640
5641       tem = expand_binop (mode, sub_optab, op0, const1_rtx, subtarget, 0,
5642                           OPTAB_WIDEN);
5643       if (tem)
5644         tem = expand_binop (mode, ior_optab, op0, tem, subtarget, 0,
5645                             OPTAB_WIDEN);
5646     }
5647
5648   /* To see if A > 0, compute (((signed) A) << BITS) - A, where BITS is the
5649      number of bits in the mode of OP0, minus one.  */
5650
5651   if (code == GT)
5652     {
5653       if (rtx_equal_p (subtarget, op0))
5654         subtarget = 0;
5655
5656       tem = expand_shift (RSHIFT_EXPR, mode, op0,
5657                           GET_MODE_BITSIZE (mode) - 1,
5658                           subtarget, 0);
5659       tem = expand_binop (mode, sub_optab, tem, op0, subtarget, 0,
5660                           OPTAB_WIDEN);
5661     }
5662
5663   if (code == EQ || code == NE)
5664     {
5665       /* For EQ or NE, one way to do the comparison is to apply an operation
5666          that converts the operand into a positive number if it is nonzero
5667          or zero if it was originally zero.  Then, for EQ, we subtract 1 and
5668          for NE we negate.  This puts the result in the sign bit.  Then we
5669          normalize with a shift, if needed.
5670
5671          Two operations that can do the above actions are ABS and FFS, so try
5672          them.  If that doesn't work, and MODE is smaller than a full word,
5673          we can use zero-extension to the wider mode (an unsigned conversion)
5674          as the operation.  */
5675
5676       /* Note that ABS doesn't yield a positive number for INT_MIN, but
5677          that is compensated by the subsequent overflow when subtracting
5678          one / negating.  */
5679
5680       if (optab_handler (abs_optab, mode) != CODE_FOR_nothing)
5681         tem = expand_unop (mode, abs_optab, op0, subtarget, 1);
5682       else if (optab_handler (ffs_optab, mode) != CODE_FOR_nothing)
5683         tem = expand_unop (mode, ffs_optab, op0, subtarget, 1);
5684       else if (GET_MODE_SIZE (mode) < UNITS_PER_WORD)
5685         {
5686           tem = convert_modes (word_mode, mode, op0, 1);
5687           mode = word_mode;
5688         }
5689
5690       if (tem != 0)
5691         {
5692           if (code == EQ)
5693             tem = expand_binop (mode, sub_optab, tem, const1_rtx, subtarget,
5694                                 0, OPTAB_WIDEN);
5695           else
5696             tem = expand_unop (mode, neg_optab, tem, subtarget, 0);
5697         }
5698
5699       /* If we couldn't do it that way, for NE we can "or" the two's complement
5700          of the value with itself.  For EQ, we take the one's complement of
5701          that "or", which is an extra insn, so we only handle EQ if branches
5702          are expensive.  */
5703
5704       if (tem == 0
5705           && (code == NE
5706               || BRANCH_COST (optimize_insn_for_speed_p (),
5707                               false) > 1))
5708         {
5709           if (rtx_equal_p (subtarget, op0))
5710             subtarget = 0;
5711
5712           tem = expand_unop (mode, neg_optab, op0, subtarget, 0);
5713           tem = expand_binop (mode, ior_optab, tem, op0, subtarget, 0,
5714                               OPTAB_WIDEN);
5715
5716           if (tem && code == EQ)
5717             tem = expand_unop (mode, one_cmpl_optab, tem, subtarget, 0);
5718         }
5719     }
5720
5721   if (tem && normalizep)
5722     tem = expand_shift (RSHIFT_EXPR, mode, tem,
5723                         GET_MODE_BITSIZE (mode) - 1,
5724                         subtarget, normalizep == 1);
5725
5726   if (tem)
5727     {
5728       if (!target)
5729         ;
5730       else if (GET_MODE (tem) != target_mode)
5731         {
5732           convert_move (target, tem, 0);
5733           tem = target;
5734         }
5735       else if (!subtarget)
5736         {
5737           emit_move_insn (target, tem);
5738           tem = target;
5739         }
5740     }
5741   else
5742     delete_insns_since (last);
5743
5744   return tem;
5745 }
5746
5747 /* Like emit_store_flag, but always succeeds.  */
5748
5749 rtx
5750 emit_store_flag_force (rtx target, enum rtx_code code, rtx op0, rtx op1,
5751                        machine_mode mode, int unsignedp, int normalizep)
5752 {
5753   rtx tem;
5754   rtx_code_label *label;
5755   rtx trueval, falseval;
5756
5757   /* First see if emit_store_flag can do the job.  */
5758   tem = emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep);
5759   if (tem != 0)
5760     return tem;
5761
5762   if (!target)
5763     target = gen_reg_rtx (word_mode);
5764
5765   /* If this failed, we have to do this with set/compare/jump/set code.
5766      For foo != 0, if foo is in OP0, just replace it with 1 if nonzero.  */
5767   trueval = normalizep ? GEN_INT (normalizep) : const1_rtx;
5768   if (code == NE
5769       && GET_MODE_CLASS (mode) == MODE_INT
5770       && REG_P (target)
5771       && op0 == target
5772       && op1 == const0_rtx)
5773     {
5774       label = gen_label_rtx ();
5775       do_compare_rtx_and_jump (target, const0_rtx, EQ, unsignedp,
5776                                mode, NULL_RTX, NULL_RTX, label, -1);
5777       emit_move_insn (target, trueval);
5778       emit_label (label);
5779       return target;
5780     }
5781
5782   if (!REG_P (target)
5783       || reg_mentioned_p (target, op0) || reg_mentioned_p (target, op1))
5784     target = gen_reg_rtx (GET_MODE (target));
5785
5786   /* Jump in the right direction if the target cannot implement CODE
5787      but can jump on its reverse condition.  */
5788   falseval = const0_rtx;
5789   if (! can_compare_p (code, mode, ccp_jump)
5790       && (! FLOAT_MODE_P (mode)
5791           || code == ORDERED || code == UNORDERED
5792           || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
5793           || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
5794     {
5795       enum rtx_code rcode;
5796       if (FLOAT_MODE_P (mode))
5797         rcode = reverse_condition_maybe_unordered (code);
5798       else
5799         rcode = reverse_condition (code);
5800
5801       /* Canonicalize to UNORDERED for the libcall.  */
5802       if (can_compare_p (rcode, mode, ccp_jump)
5803           || (code == ORDERED && ! can_compare_p (ORDERED, mode, ccp_jump)))
5804         {
5805           falseval = trueval;
5806           trueval = const0_rtx;
5807           code = rcode;
5808         }
5809     }
5810
5811   emit_move_insn (target, trueval);
5812   label = gen_label_rtx ();
5813   do_compare_rtx_and_jump (op0, op1, code, unsignedp, mode, NULL_RTX,
5814                            NULL_RTX, label, -1);
5815
5816   emit_move_insn (target, falseval);
5817   emit_label (label);
5818
5819   return target;
5820 }
5821 \f
5822 /* Perform possibly multi-word comparison and conditional jump to LABEL
5823    if ARG1 OP ARG2 true where ARG1 and ARG2 are of mode MODE.  This is
5824    now a thin wrapper around do_compare_rtx_and_jump.  */
5825
5826 static void
5827 do_cmp_and_jump (rtx arg1, rtx arg2, enum rtx_code op, machine_mode mode,
5828                  rtx_code_label *label)
5829 {
5830   int unsignedp = (op == LTU || op == LEU || op == GTU || op == GEU);
5831   do_compare_rtx_and_jump (arg1, arg2, op, unsignedp, mode,
5832                            NULL_RTX, NULL_RTX, label, -1);
5833 }