gcc/expmed.c

   1 /* Medium-level subroutines: convert bit-field store and extract
   2    and shifts, multiplies and divides to rtl instructions.
   3    Copyright (C) 1987-2013 Free Software Foundation, Inc.
   4
   5 This file is part of GCC.
   6
   7 GCC is free software; you can redistribute it and/or modify it under
   8 the terms of the GNU General Public License as published by the Free
   9 Software Foundation; either version 3, or (at your option) any later
  10 version.
  11
  12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  15 for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GCC; see the file COPYING3.  If not see
  19 <http://www.gnu.org/licenses/>.  */
  20
  21
  22 #include "config.h"
  23 #include "system.h"
  24 #include "coretypes.h"
  25 #include "tm.h"
  26 #include "diagnostic-core.h"
  27 #include "rtl.h"
  28 #include "tree.h"
  29 #include "stor-layout.h"
  30 #include "tm_p.h"
  31 #include "flags.h"
  32 #include "insn-config.h"
  33 #include "expr.h"
  34 #include "optabs.h"
  35 #include "recog.h"
  36 #include "langhooks.h"
  37 #include "df.h"
  38 #include "target.h"
  39 #include "expmed.h"
  40
  41 struct target_expmed default_target_expmed;
  42 #if SWITCHABLE_TARGET
  43 struct target_expmed *this_target_expmed = &default_target_expmed;
  44 #endif
  45
  46 static void store_fixed_bit_field (rtx, unsigned HOST_WIDE_INT,
  47                                    unsigned HOST_WIDE_INT,
  48                                    unsigned HOST_WIDE_INT,
  49                                    unsigned HOST_WIDE_INT,
  50                                    rtx);
  51 static void store_split_bit_field (rtx, unsigned HOST_WIDE_INT,
  52                                    unsigned HOST_WIDE_INT,
  53                                    unsigned HOST_WIDE_INT,
  54                                    unsigned HOST_WIDE_INT,
  55                                    rtx);
  56 static rtx extract_fixed_bit_field (enum machine_mode, rtx,
  57                                     unsigned HOST_WIDE_INT,
  58                                     unsigned HOST_WIDE_INT, rtx, int);
  59 static rtx mask_rtx (enum machine_mode, int, int, int);
  60 static rtx lshift_value (enum machine_mode, unsigned HOST_WIDE_INT, int);
  61 static rtx extract_split_bit_field (rtx, unsigned HOST_WIDE_INT,
  62                                     unsigned HOST_WIDE_INT, int);
  63 static void do_cmp_and_jump (rtx, rtx, enum rtx_code, enum machine_mode, rtx);
  64 static rtx expand_smod_pow2 (enum machine_mode, rtx, HOST_WIDE_INT);
  65 static rtx expand_sdiv_pow2 (enum machine_mode, rtx, HOST_WIDE_INT);
  66
  67 /* Test whether a value is zero of a power of two.  */
  68 #define EXACT_POWER_OF_2_OR_ZERO_P(x) \
  69   (((x) & ((x) - (unsigned HOST_WIDE_INT) 1)) == 0)
  70
  71 struct init_expmed_rtl
  72 {
  73   struct rtx_def reg;
  74   struct rtx_def plus;
  75   struct rtx_def neg;
  76   struct rtx_def mult;
  77   struct rtx_def sdiv;
  78   struct rtx_def udiv;
  79   struct rtx_def sdiv_32;
  80   struct rtx_def smod_32;
  81   struct rtx_def wide_mult;
  82   struct rtx_def wide_lshr;
  83   struct rtx_def wide_trunc;
  84   struct rtx_def shift;
  85   struct rtx_def shift_mult;
  86   struct rtx_def shift_add;
  87   struct rtx_def shift_sub0;
  88   struct rtx_def shift_sub1;
  89   struct rtx_def zext;
  90   struct rtx_def trunc;
  91
  92   rtx pow2[MAX_BITS_PER_WORD];
  93   rtx cint[MAX_BITS_PER_WORD];
  94 };
  95
  96 static void
  97 init_expmed_one_conv (struct init_expmed_rtl *all, enum machine_mode to_mode,
  98                       enum machine_mode from_mode, bool speed)
  99 {
 100   int to_size, from_size;
 101   rtx which;
 102
 103   /* We're given no information about the true size of a partial integer,
 104      only the size of the "full" integer it requires for storage.  For
 105      comparison purposes here, reduce the bit size by one in that case.  */
 106   to_size = (GET_MODE_BITSIZE (to_mode)
 107              - (GET_MODE_CLASS (to_mode) == MODE_PARTIAL_INT));
 108   from_size = (GET_MODE_BITSIZE (from_mode)
 109                - (GET_MODE_CLASS (from_mode) == MODE_PARTIAL_INT));
 110
 111   /* Assume cost of zero-extend and sign-extend is the same.  */
 112   which = (to_size < from_size ? &all->trunc : &all->zext);
 113
 114   PUT_MODE (&all->reg, from_mode);
 115   set_convert_cost (to_mode, from_mode, speed, set_src_cost (which, speed));
 116 }
 117
 118 static void
 119 init_expmed_one_mode (struct init_expmed_rtl *all,
 120                       enum machine_mode mode, int speed)
 121 {
 122   int m, n, mode_bitsize;
 123   enum machine_mode mode_from;
 124
 125   mode_bitsize = GET_MODE_UNIT_BITSIZE (mode);
 126
 127   PUT_MODE (&all->reg, mode);
 128   PUT_MODE (&all->plus, mode);
 129   PUT_MODE (&all->neg, mode);
 130   PUT_MODE (&all->mult, mode);
 131   PUT_MODE (&all->sdiv, mode);
 132   PUT_MODE (&all->udiv, mode);
 133   PUT_MODE (&all->sdiv_32, mode);
 134   PUT_MODE (&all->smod_32, mode);
 135   PUT_MODE (&all->wide_trunc, mode);
 136   PUT_MODE (&all->shift, mode);
 137   PUT_MODE (&all->shift_mult, mode);
 138   PUT_MODE (&all->shift_add, mode);
 139   PUT_MODE (&all->shift_sub0, mode);
 140   PUT_MODE (&all->shift_sub1, mode);
 141   PUT_MODE (&all->zext, mode);
 142   PUT_MODE (&all->trunc, mode);
 143
 144   set_add_cost (speed, mode, set_src_cost (&all->plus, speed));
 145   set_neg_cost (speed, mode, set_src_cost (&all->neg, speed));
 146   set_mul_cost (speed, mode, set_src_cost (&all->mult, speed));
 147   set_sdiv_cost (speed, mode, set_src_cost (&all->sdiv, speed));
 148   set_udiv_cost (speed, mode, set_src_cost (&all->udiv, speed));
 149
 150   set_sdiv_pow2_cheap (speed, mode, (set_src_cost (&all->sdiv_32, speed)
 151                                      <= 2 * add_cost (speed, mode)));
 152   set_smod_pow2_cheap (speed, mode, (set_src_cost (&all->smod_32, speed)
 153                                      <= 4 * add_cost (speed, mode)));
 154
 155   set_shift_cost (speed, mode, 0, 0);
 156   {
 157     int cost = add_cost (speed, mode);
 158     set_shiftadd_cost (speed, mode, 0, cost);
 159     set_shiftsub0_cost (speed, mode, 0, cost);
 160     set_shiftsub1_cost (speed, mode, 0, cost);
 161   }
 162
 163   n = MIN (MAX_BITS_PER_WORD, mode_bitsize);
 164   for (m = 1; m < n; m++)
 165     {
 166       XEXP (&all->shift, 1) = all->cint[m];
 167       XEXP (&all->shift_mult, 1) = all->pow2[m];
 168
 169       set_shift_cost (speed, mode, m, set_src_cost (&all->shift, speed));
 170       set_shiftadd_cost (speed, mode, m, set_src_cost (&all->shift_add, speed));
 171       set_shiftsub0_cost (speed, mode, m, set_src_cost (&all->shift_sub0, speed));
 172       set_shiftsub1_cost (speed, mode, m, set_src_cost (&all->shift_sub1, speed));
 173     }
 174
 175   if (SCALAR_INT_MODE_P (mode))
 176     {
 177       for (mode_from = MIN_MODE_INT; mode_from <= MAX_MODE_INT;
 178            mode_from = (enum machine_mode)(mode_from + 1))
 179         init_expmed_one_conv (all, mode, mode_from, speed);
 180     }
 181   if (GET_MODE_CLASS (mode) == MODE_INT)
 182     {
 183       enum machine_mode  wider_mode = GET_MODE_WIDER_MODE (mode);
 184       if (wider_mode != VOIDmode)
 185         {
 186           PUT_MODE (&all->zext, wider_mode);
 187           PUT_MODE (&all->wide_mult, wider_mode);
 188           PUT_MODE (&all->wide_lshr, wider_mode);
 189           XEXP (&all->wide_lshr, 1) = GEN_INT (mode_bitsize);
 190
 191           set_mul_widen_cost (speed, wider_mode,
 192                               set_src_cost (&all->wide_mult, speed));
 193           set_mul_highpart_cost (speed, mode,
 194                                  set_src_cost (&all->wide_trunc, speed));
 195         }
 196     }
 197 }
 198
 199 void
 200 init_expmed (void)
 201 {
 202   struct init_expmed_rtl all;
 203   enum machine_mode mode;
 204   int m, speed;
 205
 206   memset (&all, 0, sizeof all);
 207   for (m = 1; m < MAX_BITS_PER_WORD; m++)
 208     {
 209       all.pow2[m] = GEN_INT ((HOST_WIDE_INT) 1 << m);
 210       all.cint[m] = GEN_INT (m);
 211     }
 212
 213   PUT_CODE (&all.reg, REG);
 214   /* Avoid using hard regs in ways which may be unsupported.  */
 215   SET_REGNO (&all.reg, LAST_VIRTUAL_REGISTER + 1);
 216
 217   PUT_CODE (&all.plus, PLUS);
 218   XEXP (&all.plus, 0) = &all.reg;
 219   XEXP (&all.plus, 1) = &all.reg;
 220
 221   PUT_CODE (&all.neg, NEG);
 222   XEXP (&all.neg, 0) = &all.reg;
 223
 224   PUT_CODE (&all.mult, MULT);
 225   XEXP (&all.mult, 0) = &all.reg;
 226   XEXP (&all.mult, 1) = &all.reg;
 227
 228   PUT_CODE (&all.sdiv, DIV);
 229   XEXP (&all.sdiv, 0) = &all.reg;
 230   XEXP (&all.sdiv, 1) = &all.reg;
 231
 232   PUT_CODE (&all.udiv, UDIV);
 233   XEXP (&all.udiv, 0) = &all.reg;
 234   XEXP (&all.udiv, 1) = &all.reg;
 235
 236   PUT_CODE (&all.sdiv_32, DIV);
 237   XEXP (&all.sdiv_32, 0) = &all.reg;
 238   XEXP (&all.sdiv_32, 1) = 32 < MAX_BITS_PER_WORD ? all.cint[32] : GEN_INT (32);
 239
 240   PUT_CODE (&all.smod_32, MOD);
 241   XEXP (&all.smod_32, 0) = &all.reg;
 242   XEXP (&all.smod_32, 1) = XEXP (&all.sdiv_32, 1);
 243
 244   PUT_CODE (&all.zext, ZERO_EXTEND);
 245   XEXP (&all.zext, 0) = &all.reg;
 246
 247   PUT_CODE (&all.wide_mult, MULT);
 248   XEXP (&all.wide_mult, 0) = &all.zext;
 249   XEXP (&all.wide_mult, 1) = &all.zext;
 250
 251   PUT_CODE (&all.wide_lshr, LSHIFTRT);
 252   XEXP (&all.wide_lshr, 0) = &all.wide_mult;
 253
 254   PUT_CODE (&all.wide_trunc, TRUNCATE);
 255   XEXP (&all.wide_trunc, 0) = &all.wide_lshr;
 256
 257   PUT_CODE (&all.shift, ASHIFT);
 258   XEXP (&all.shift, 0) = &all.reg;
 259
 260   PUT_CODE (&all.shift_mult, MULT);
 261   XEXP (&all.shift_mult, 0) = &all.reg;
 262
 263   PUT_CODE (&all.shift_add, PLUS);
 264   XEXP (&all.shift_add, 0) = &all.shift_mult;
 265   XEXP (&all.shift_add, 1) = &all.reg;
 266
 267   PUT_CODE (&all.shift_sub0, MINUS);
 268   XEXP (&all.shift_sub0, 0) = &all.shift_mult;
 269   XEXP (&all.shift_sub0, 1) = &all.reg;
 270
 271   PUT_CODE (&all.shift_sub1, MINUS);
 272   XEXP (&all.shift_sub1, 0) = &all.reg;
 273   XEXP (&all.shift_sub1, 1) = &all.shift_mult;
 274
 275   PUT_CODE (&all.trunc, TRUNCATE);
 276   XEXP (&all.trunc, 0) = &all.reg;
 277
 278   for (speed = 0; speed < 2; speed++)
 279     {
 280       crtl->maybe_hot_insn_p = speed;
 281       set_zero_cost (speed, set_src_cost (const0_rtx, speed));
 282
 283       for (mode = MIN_MODE_INT; mode <= MAX_MODE_INT;
 284            mode = (enum machine_mode)(mode + 1))
 285         init_expmed_one_mode (&all, mode, speed);
 286
 287       if (MIN_MODE_PARTIAL_INT != VOIDmode)
 288         for (mode = MIN_MODE_PARTIAL_INT; mode <= MAX_MODE_PARTIAL_INT;
 289              mode = (enum machine_mode)(mode + 1))
 290           init_expmed_one_mode (&all, mode, speed);
 291
 292       if (MIN_MODE_VECTOR_INT != VOIDmode)
 293         for (mode = MIN_MODE_VECTOR_INT; mode <= MAX_MODE_VECTOR_INT;
 294              mode = (enum machine_mode)(mode + 1))
 295           init_expmed_one_mode (&all, mode, speed);
 296     }
 297
 298   if (alg_hash_used_p ())
 299     {
 300       struct alg_hash_entry *p = alg_hash_entry_ptr (0);
 301       memset (p, 0, sizeof (*p) * NUM_ALG_HASH_ENTRIES);
 302     }
 303   else
 304     set_alg_hash_used_p (true);
 305   default_rtl_profile ();
 306 }
 307
 308 /* Return an rtx representing minus the value of X.
 309    MODE is the intended mode of the result,
 310    useful if X is a CONST_INT.  */
 311
 312 rtx
 313 negate_rtx (enum machine_mode mode, rtx x)
 314 {
 315   rtx result = simplify_unary_operation (NEG, mode, x, mode);
 316
 317   if (result == 0)
 318     result = expand_unop (mode, neg_optab, x, NULL_RTX, 0);
 319
 320   return result;
 321 }
 322
 323 /* Adjust bitfield memory MEM so that it points to the first unit of mode
 324    MODE that contains a bitfield of size BITSIZE at bit position BITNUM.
 325    If MODE is BLKmode, return a reference to every byte in the bitfield.
 326    Set *NEW_BITNUM to the bit position of the field within the new memory.  */
 327
 328 static rtx
 329 narrow_bit_field_mem (rtx mem, enum machine_mode mode,
 330                       unsigned HOST_WIDE_INT bitsize,
 331                       unsigned HOST_WIDE_INT bitnum,
 332                       unsigned HOST_WIDE_INT *new_bitnum)
 333 {
 334   if (mode == BLKmode)
 335     {
 336       *new_bitnum = bitnum % BITS_PER_UNIT;
 337       HOST_WIDE_INT offset = bitnum / BITS_PER_UNIT;
 338       HOST_WIDE_INT size = ((*new_bitnum + bitsize + BITS_PER_UNIT - 1)
 339                             / BITS_PER_UNIT);
 340       return adjust_bitfield_address_size (mem, mode, offset, size);
 341     }
 342   else
 343     {
 344       unsigned int unit = GET_MODE_BITSIZE (mode);
 345       *new_bitnum = bitnum % unit;
 346       HOST_WIDE_INT offset = (bitnum - *new_bitnum) / BITS_PER_UNIT;
 347       return adjust_bitfield_address (mem, mode, offset);
 348     }
 349 }
 350
 351 /* The caller wants to perform insertion or extraction PATTERN on a
 352    bitfield of size BITSIZE at BITNUM bits into memory operand OP0.
 353    BITREGION_START and BITREGION_END are as for store_bit_field
 354    and FIELDMODE is the natural mode of the field.
 355
 356    Search for a mode that is compatible with the memory access
 357    restrictions and (where applicable) with a register insertion or
 358    extraction.  Return the new memory on success, storing the adjusted
 359    bit position in *NEW_BITNUM.  Return null otherwise.  */
 360
 361 static rtx
 362 adjust_bit_field_mem_for_reg (enum extraction_pattern pattern,
 363                               rtx op0, HOST_WIDE_INT bitsize,
 364                               HOST_WIDE_INT bitnum,
 365                               unsigned HOST_WIDE_INT bitregion_start,
 366                               unsigned HOST_WIDE_INT bitregion_end,
 367                               enum machine_mode fieldmode,
 368                               unsigned HOST_WIDE_INT *new_bitnum)
 369 {
 370   bit_field_mode_iterator iter (bitsize, bitnum, bitregion_start,
 371                                 bitregion_end, MEM_ALIGN (op0),
 372                                 MEM_VOLATILE_P (op0));
 373   enum machine_mode best_mode;
 374   if (iter.next_mode (&best_mode))
 375     {
 376       /* We can use a memory in BEST_MODE.  See whether this is true for
 377          any wider modes.  All other things being equal, we prefer to
 378          use the widest mode possible because it tends to expose more
 379          CSE opportunities.  */
 380       if (!iter.prefer_smaller_modes ())
 381         {
 382           /* Limit the search to the mode required by the corresponding
 383              register insertion or extraction instruction, if any.  */
 384           enum machine_mode limit_mode = word_mode;
 385           extraction_insn insn;
 386           if (get_best_reg_extraction_insn (&insn, pattern,
 387                                             GET_MODE_BITSIZE (best_mode),
 388                                             fieldmode))
 389             limit_mode = insn.field_mode;
 390
 391           enum machine_mode wider_mode;
 392           while (iter.next_mode (&wider_mode)
 393                  && GET_MODE_SIZE (wider_mode) <= GET_MODE_SIZE (limit_mode))
 394             best_mode = wider_mode;
 395         }
 396       return narrow_bit_field_mem (op0, best_mode, bitsize, bitnum,
 397                                    new_bitnum);
 398     }
 399   return NULL_RTX;
 400 }
 401
 402 /* Return true if a bitfield of size BITSIZE at bit number BITNUM within
 403    a structure of mode STRUCT_MODE represents a lowpart subreg.   The subreg
 404    offset is then BITNUM / BITS_PER_UNIT.  */
 405
 406 static bool
 407 lowpart_bit_field_p (unsigned HOST_WIDE_INT bitnum,
 408                      unsigned HOST_WIDE_INT bitsize,
 409                      enum machine_mode struct_mode)
 410 {
 411   if (BYTES_BIG_ENDIAN)
 412     return (bitnum % BITS_PER_UNIT == 0
 413             && (bitnum + bitsize == GET_MODE_BITSIZE (struct_mode)
 414                 || (bitnum + bitsize) % BITS_PER_WORD == 0));
 415   else
 416     return bitnum % BITS_PER_WORD == 0;
 417 }
 418
 419 /* Return true if -fstrict-volatile-bitfields applies an access of OP0
 420    containing BITSIZE bits starting at BITNUM, with field mode FIELDMODE.  */
 421
 422 static bool
 423 strict_volatile_bitfield_p (rtx op0, unsigned HOST_WIDE_INT bitsize,
 424                             unsigned HOST_WIDE_INT bitnum,
 425                             enum machine_mode fieldmode)
 426 {
 427   unsigned HOST_WIDE_INT modesize = GET_MODE_BITSIZE (fieldmode);
 428
 429   /* -fstrict-volatile-bitfields must be enabled and we must have a
 430      volatile MEM.  */
 431   if (!MEM_P (op0)
 432       || !MEM_VOLATILE_P (op0)
 433       || flag_strict_volatile_bitfields <= 0)
 434     return false;
 435
 436   /* Non-integral modes likely only happen with packed structures.
 437      Punt.  */
 438   if (!SCALAR_INT_MODE_P (fieldmode))
 439     return false;
 440
 441   /* The bit size must not be larger than the field mode, and
 442      the field mode must not be larger than a word.  */
 443   if (bitsize > modesize || modesize > BITS_PER_WORD)
 444     return false;
 445
 446   /* Check for cases of unaligned fields that must be split.  */
 447   if (bitnum % BITS_PER_UNIT + bitsize > modesize
 448       || (STRICT_ALIGNMENT
 449           && bitnum % GET_MODE_ALIGNMENT (fieldmode) + bitsize > modesize))
 450     return false;
 451
 452   return true;
 453 }
 454
 455 /* Return true if OP is a memory and if a bitfield of size BITSIZE at
 456    bit number BITNUM can be treated as a simple value of mode MODE.  */
 457
 458 static bool
 459 simple_mem_bitfield_p (rtx op0, unsigned HOST_WIDE_INT bitsize,
 460                        unsigned HOST_WIDE_INT bitnum, enum machine_mode mode)
 461 {
 462   return (MEM_P (op0)
 463           && bitnum % BITS_PER_UNIT == 0
 464           && bitsize == GET_MODE_BITSIZE (mode)
 465           && (!SLOW_UNALIGNED_ACCESS (mode, MEM_ALIGN (op0))
 466               || (bitnum % GET_MODE_ALIGNMENT (mode) == 0
 467                   && MEM_ALIGN (op0) >= GET_MODE_ALIGNMENT (mode))));
 468 }
 469 \f
 470 /* Try to use instruction INSV to store VALUE into a field of OP0.
 471    BITSIZE and BITNUM are as for store_bit_field.  */
 472
 473 static bool
 474 store_bit_field_using_insv (const extraction_insn *insv, rtx op0,
 475                             unsigned HOST_WIDE_INT bitsize,
 476                             unsigned HOST_WIDE_INT bitnum, rtx value)
 477 {
 478   struct expand_operand ops[4];
 479   rtx value1;
 480   rtx xop0 = op0;
 481   rtx last = get_last_insn ();
 482   bool copy_back = false;
 483
 484   enum machine_mode op_mode = insv->field_mode;
 485   unsigned int unit = GET_MODE_BITSIZE (op_mode);
 486   if (bitsize == 0 || bitsize > unit)
 487     return false;
 488
 489   if (MEM_P (xop0))
 490     /* Get a reference to the first byte of the field.  */
 491     xop0 = narrow_bit_field_mem (xop0, insv->struct_mode, bitsize, bitnum,
 492                                  &bitnum);
 493   else
 494     {
 495       /* Convert from counting within OP0 to counting in OP_MODE.  */
 496       if (BYTES_BIG_ENDIAN)
 497         bitnum += unit - GET_MODE_BITSIZE (GET_MODE (op0));
 498
 499       /* If xop0 is a register, we need it in OP_MODE
 500          to make it acceptable to the format of insv.  */
 501       if (GET_CODE (xop0) == SUBREG)
 502         /* We can't just change the mode, because this might clobber op0,
 503            and we will need the original value of op0 if insv fails.  */
 504         xop0 = gen_rtx_SUBREG (op_mode, SUBREG_REG (xop0), SUBREG_BYTE (xop0));
 505       if (REG_P (xop0) && GET_MODE (xop0) != op_mode)
 506         xop0 = gen_lowpart_SUBREG (op_mode, xop0);
 507     }
 508
 509   /* If the destination is a paradoxical subreg such that we need a
 510      truncate to the inner mode, perform the insertion on a temporary and
 511      truncate the result to the original destination.  Note that we can't
 512      just truncate the paradoxical subreg as (truncate:N (subreg:W (reg:N
 513      X) 0)) is (reg:N X).  */
 514   if (GET_CODE (xop0) == SUBREG
 515       && REG_P (SUBREG_REG (xop0))
 516       && !TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (SUBREG_REG (xop0)),
 517                                          op_mode))
 518     {
 519       rtx tem = gen_reg_rtx (op_mode);
 520       emit_move_insn (tem, xop0);
 521       xop0 = tem;
 522       copy_back = true;
 523     }
 524
 525   /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
 526      "backwards" from the size of the unit we are inserting into.
 527      Otherwise, we count bits from the most significant on a
 528      BYTES/BITS_BIG_ENDIAN machine.  */
 529
 530   if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
 531     bitnum = unit - bitsize - bitnum;
 532
 533   /* Convert VALUE to op_mode (which insv insn wants) in VALUE1.  */
 534   value1 = value;
 535   if (GET_MODE (value) != op_mode)
 536     {
 537       if (GET_MODE_BITSIZE (GET_MODE (value)) >= bitsize)
 538         {
 539           /* Optimization: Don't bother really extending VALUE
 540              if it has all the bits we will actually use.  However,
 541              if we must narrow it, be sure we do it correctly.  */
 542
 543           if (GET_MODE_SIZE (GET_MODE (value)) < GET_MODE_SIZE (op_mode))
 544             {
 545               rtx tmp;
 546
 547               tmp = simplify_subreg (op_mode, value1, GET_MODE (value), 0);
 548               if (! tmp)
 549                 tmp = simplify_gen_subreg (op_mode,
 550                                            force_reg (GET_MODE (value),
 551                                                       value1),
 552                                            GET_MODE (value), 0);
 553               value1 = tmp;
 554             }
 555           else
 556             value1 = gen_lowpart (op_mode, value1);
 557         }
 558       else if (CONST_INT_P (value))
 559         value1 = gen_int_mode (INTVAL (value), op_mode);
 560       else
 561         /* Parse phase is supposed to make VALUE's data type
 562            match that of the component reference, which is a type
 563            at least as wide as the field; so VALUE should have
 564            a mode that corresponds to that type.  */
 565         gcc_assert (CONSTANT_P (value));
 566     }
 567
 568   create_fixed_operand (&ops[0], xop0);
 569   create_integer_operand (&ops[1], bitsize);
 570   create_integer_operand (&ops[2], bitnum);
 571   create_input_operand (&ops[3], value1, op_mode);
 572   if (maybe_expand_insn (insv->icode, 4, ops))
 573     {
 574       if (copy_back)
 575         convert_move (op0, xop0, true);
 576       return true;
 577     }
 578   delete_insns_since (last);
 579   return false;
 580 }
 581
 582 /* A subroutine of store_bit_field, with the same arguments.  Return true
 583    if the operation could be implemented.
 584
 585    If FALLBACK_P is true, fall back to store_fixed_bit_field if we have
 586    no other way of implementing the operation.  If FALLBACK_P is false,
 587    return false instead.  */
 588
 589 static bool
 590 store_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
 591                    unsigned HOST_WIDE_INT bitnum,
 592                    unsigned HOST_WIDE_INT bitregion_start,
 593                    unsigned HOST_WIDE_INT bitregion_end,
 594                    enum machine_mode fieldmode,
 595                    rtx value, bool fallback_p)
 596 {
 597   rtx op0 = str_rtx;
 598   rtx orig_value;
 599
 600   while (GET_CODE (op0) == SUBREG)
 601     {
 602       /* The following line once was done only if WORDS_BIG_ENDIAN,
 603          but I think that is a mistake.  WORDS_BIG_ENDIAN is
 604          meaningful at a much higher level; when structures are copied
 605          between memory and regs, the higher-numbered regs
 606          always get higher addresses.  */
 607       int inner_mode_size = GET_MODE_SIZE (GET_MODE (SUBREG_REG (op0)));
 608       int outer_mode_size = GET_MODE_SIZE (GET_MODE (op0));
 609       int byte_offset = 0;
 610
 611       /* Paradoxical subregs need special handling on big endian machines.  */
 612       if (SUBREG_BYTE (op0) == 0 && inner_mode_size < outer_mode_size)
 613         {
 614           int difference = inner_mode_size - outer_mode_size;
 615
 616           if (WORDS_BIG_ENDIAN)
 617             byte_offset += (difference / UNITS_PER_WORD) * UNITS_PER_WORD;
 618           if (BYTES_BIG_ENDIAN)
 619             byte_offset += difference % UNITS_PER_WORD;
 620         }
 621       else
 622         byte_offset = SUBREG_BYTE (op0);
 623
 624       bitnum += byte_offset * BITS_PER_UNIT;
 625       op0 = SUBREG_REG (op0);
 626     }
 627
 628   /* No action is needed if the target is a register and if the field
 629      lies completely outside that register.  This can occur if the source
 630      code contains an out-of-bounds access to a small array.  */
 631   if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
 632     return true;
 633
 634   /* Use vec_set patterns for inserting parts of vectors whenever
 635      available.  */
 636   if (VECTOR_MODE_P (GET_MODE (op0))
 637       && !MEM_P (op0)
 638       && optab_handler (vec_set_optab, GET_MODE (op0)) != CODE_FOR_nothing
 639       && fieldmode == GET_MODE_INNER (GET_MODE (op0))
 640       && bitsize == GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
 641       && !(bitnum % GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
 642     {
 643       struct expand_operand ops[3];
 644       enum machine_mode outermode = GET_MODE (op0);
 645       enum machine_mode innermode = GET_MODE_INNER (outermode);
 646       enum insn_code icode = optab_handler (vec_set_optab, outermode);
 647       int pos = bitnum / GET_MODE_BITSIZE (innermode);
 648
 649       create_fixed_operand (&ops[0], op0);
 650       create_input_operand (&ops[1], value, innermode);
 651       create_integer_operand (&ops[2], pos);
 652       if (maybe_expand_insn (icode, 3, ops))
 653         return true;
 654     }
 655
 656   /* If the target is a register, overwriting the entire object, or storing
 657      a full-word or multi-word field can be done with just a SUBREG.  */
 658   if (!MEM_P (op0)
 659       && bitsize == GET_MODE_BITSIZE (fieldmode)
 660       && ((bitsize == GET_MODE_BITSIZE (GET_MODE (op0)) && bitnum == 0)
 661           || (bitsize % BITS_PER_WORD == 0 && bitnum % BITS_PER_WORD == 0)))
 662     {
 663       /* Use the subreg machinery either to narrow OP0 to the required
 664          words or to cope with mode punning between equal-sized modes.
 665          In the latter case, use subreg on the rhs side, not lhs.  */
 666       rtx sub;
 667
 668       if (bitsize == GET_MODE_BITSIZE (GET_MODE (op0)))
 669         {
 670           sub = simplify_gen_subreg (GET_MODE (op0), value, fieldmode, 0);
 671           if (sub)
 672             {
 673               emit_move_insn (op0, sub);
 674               return true;
 675             }
 676         }
 677       else
 678         {
 679           sub = simplify_gen_subreg (fieldmode, op0, GET_MODE (op0),
 680                                      bitnum / BITS_PER_UNIT);
 681           if (sub)
 682             {
 683               emit_move_insn (sub, value);
 684               return true;
 685             }
 686         }
 687     }
 688
 689   /* If the target is memory, storing any naturally aligned field can be
 690      done with a simple store.  For targets that support fast unaligned
 691      memory, any naturally sized, unit aligned field can be done directly.  */
 692   if (simple_mem_bitfield_p (op0, bitsize, bitnum, fieldmode))
 693     {
 694       op0 = adjust_bitfield_address (op0, fieldmode, bitnum / BITS_PER_UNIT);
 695       emit_move_insn (op0, value);
 696       return true;
 697     }
 698
 699   /* Make sure we are playing with integral modes.  Pun with subregs
 700      if we aren't.  This must come after the entire register case above,
 701      since that case is valid for any mode.  The following cases are only
 702      valid for integral modes.  */
 703   {
 704     enum machine_mode imode = int_mode_for_mode (GET_MODE (op0));
 705     if (imode != GET_MODE (op0))
 706       {
 707         if (MEM_P (op0))
 708           op0 = adjust_bitfield_address_size (op0, imode, 0, MEM_SIZE (op0));
 709         else
 710           {
 711             gcc_assert (imode != BLKmode);
 712             op0 = gen_lowpart (imode, op0);
 713           }
 714       }
 715   }
 716
 717   /* Storing an lsb-aligned field in a register
 718      can be done with a movstrict instruction.  */
 719
 720   if (!MEM_P (op0)
 721       && lowpart_bit_field_p (bitnum, bitsize, GET_MODE (op0))
 722       && bitsize == GET_MODE_BITSIZE (fieldmode)
 723       && optab_handler (movstrict_optab, fieldmode) != CODE_FOR_nothing)
 724     {
 725       struct expand_operand ops[2];
 726       enum insn_code icode = optab_handler (movstrict_optab, fieldmode);
 727       rtx arg0 = op0;
 728       unsigned HOST_WIDE_INT subreg_off;
 729
 730       if (GET_CODE (arg0) == SUBREG)
 731         {
 732           /* Else we've got some float mode source being extracted into
 733              a different float mode destination -- this combination of
 734              subregs results in Severe Tire Damage.  */
 735           gcc_assert (GET_MODE (SUBREG_REG (arg0)) == fieldmode
 736                       || GET_MODE_CLASS (fieldmode) == MODE_INT
 737                       || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT);
 738           arg0 = SUBREG_REG (arg0);
 739         }
 740
 741       subreg_off = bitnum / BITS_PER_UNIT;
 742       if (validate_subreg (fieldmode, GET_MODE (arg0), arg0, subreg_off))
 743         {
 744           arg0 = gen_rtx_SUBREG (fieldmode, arg0, subreg_off);
 745
 746           create_fixed_operand (&ops[0], arg0);
 747           /* Shrink the source operand to FIELDMODE.  */
 748           create_convert_operand_to (&ops[1], value, fieldmode, false);
 749           if (maybe_expand_insn (icode, 2, ops))
 750             return true;
 751         }
 752     }
 753
 754   /* Handle fields bigger than a word.  */
 755
 756   if (bitsize > BITS_PER_WORD)
 757     {
 758       /* Here we transfer the words of the field
 759          in the order least significant first.
 760          This is because the most significant word is the one which may
 761          be less than full.
 762          However, only do that if the value is not BLKmode.  */
 763
 764       unsigned int backwards = WORDS_BIG_ENDIAN && fieldmode != BLKmode;
 765       unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
 766       unsigned int i;
 767       rtx last;
 768
 769       /* This is the mode we must force value to, so that there will be enough
 770          subwords to extract.  Note that fieldmode will often (always?) be
 771          VOIDmode, because that is what store_field uses to indicate that this
 772          is a bit field, but passing VOIDmode to operand_subword_force
 773          is not allowed.  */
 774       fieldmode = GET_MODE (value);
 775       if (fieldmode == VOIDmode)
 776         fieldmode = smallest_mode_for_size (nwords * BITS_PER_WORD, MODE_INT);
 777
 778       last = get_last_insn ();
 779       for (i = 0; i < nwords; i++)
 780         {
 781           /* If I is 0, use the low-order word in both field and target;
 782              if I is 1, use the next to lowest word; and so on.  */
 783           unsigned int wordnum = (backwards
 784                                   ? GET_MODE_SIZE (fieldmode) / UNITS_PER_WORD
 785                                   - i - 1
 786                                   : i);
 787           unsigned int bit_offset = (backwards
 788                                      ? MAX ((int) bitsize - ((int) i + 1)
 789                                             * BITS_PER_WORD,
 790                                             0)
 791                                      : (int) i * BITS_PER_WORD);
 792           rtx value_word = operand_subword_force (value, wordnum, fieldmode);
 793           unsigned HOST_WIDE_INT new_bitsize =
 794             MIN (BITS_PER_WORD, bitsize - i * BITS_PER_WORD);
 795
 796           /* If the remaining chunk doesn't have full wordsize we have
 797              to make sure that for big endian machines the higher order
 798              bits are used.  */
 799           if (new_bitsize < BITS_PER_WORD && BYTES_BIG_ENDIAN && !backwards)
 800             value_word = simplify_expand_binop (word_mode, lshr_optab,
 801                                                 value_word,
 802                                                 GEN_INT (BITS_PER_WORD
 803                                                          - new_bitsize),
 804                                                 NULL_RTX, true,
 805                                                 OPTAB_LIB_WIDEN);
 806
 807           if (!store_bit_field_1 (op0, new_bitsize,
 808                                   bitnum + bit_offset,
 809                                   bitregion_start, bitregion_end,
 810                                   word_mode,
 811                                   value_word, fallback_p))
 812             {
 813               delete_insns_since (last);
 814               return false;
 815             }
 816         }
 817       return true;
 818     }
 819
 820   /* If VALUE has a floating-point or complex mode, access it as an
 821      integer of the corresponding size.  This can occur on a machine
 822      with 64 bit registers that uses SFmode for float.  It can also
 823      occur for unaligned float or complex fields.  */
 824   orig_value = value;
 825   if (GET_MODE (value) != VOIDmode
 826       && GET_MODE_CLASS (GET_MODE (value)) != MODE_INT
 827       && GET_MODE_CLASS (GET_MODE (value)) != MODE_PARTIAL_INT)
 828     {
 829       value = gen_reg_rtx (int_mode_for_mode (GET_MODE (value)));
 830       emit_move_insn (gen_lowpart (GET_MODE (orig_value), value), orig_value);
 831     }
 832
 833   /* If OP0 is a multi-word register, narrow it to the affected word.
 834      If the region spans two words, defer to store_split_bit_field.  */
 835   if (!MEM_P (op0) && GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
 836     {
 837       op0 = simplify_gen_subreg (word_mode, op0, GET_MODE (op0),
 838                                  bitnum / BITS_PER_WORD * UNITS_PER_WORD);
 839       gcc_assert (op0);
 840       bitnum %= BITS_PER_WORD;
 841       if (bitnum + bitsize > BITS_PER_WORD)
 842         {
 843           if (!fallback_p)
 844             return false;
 845
 846           store_split_bit_field (op0, bitsize, bitnum, bitregion_start,
 847                                  bitregion_end, value);
 848           return true;
 849         }
 850     }
 851
 852   /* From here on we can assume that the field to be stored in fits
 853      within a word.  If the destination is a register, it too fits
 854      in a word.  */
 855
 856   extraction_insn insv;
 857   if (!MEM_P (op0)
 858       && get_best_reg_extraction_insn (&insv, EP_insv,
 859                                        GET_MODE_BITSIZE (GET_MODE (op0)),
 860                                        fieldmode)
 861       && store_bit_field_using_insv (&insv, op0, bitsize, bitnum, value))
 862     return true;
 863
 864   /* If OP0 is a memory, try copying it to a register and seeing if a
 865      cheap register alternative is available.  */
 866   if (MEM_P (op0))
 867     {
 868       if (get_best_mem_extraction_insn (&insv, EP_insv, bitsize, bitnum,
 869                                         fieldmode)
 870           && store_bit_field_using_insv (&insv, op0, bitsize, bitnum, value))
 871         return true;
 872
 873       rtx last = get_last_insn ();
 874
 875       /* Try loading part of OP0 into a register, inserting the bitfield
 876          into that, and then copying the result back to OP0.  */
 877       unsigned HOST_WIDE_INT bitpos;
 878       rtx xop0 = adjust_bit_field_mem_for_reg (EP_insv, op0, bitsize, bitnum,
 879                                                bitregion_start, bitregion_end,
 880                                                fieldmode, &bitpos);
 881       if (xop0)
 882         {
 883           rtx tempreg = copy_to_reg (xop0);
 884           if (store_bit_field_1 (tempreg, bitsize, bitpos,
 885                                  bitregion_start, bitregion_end,
 886                                  fieldmode, orig_value, false))
 887             {
 888               emit_move_insn (xop0, tempreg);
 889               return true;
 890             }
 891           delete_insns_since (last);
 892         }
 893     }
 894
 895   if (!fallback_p)
 896     return false;
 897
 898   store_fixed_bit_field (op0, bitsize, bitnum, bitregion_start,
 899                          bitregion_end, value);
 900   return true;
 901 }
 902
 903 /* Generate code to store value from rtx VALUE
 904    into a bit-field within structure STR_RTX
 905    containing BITSIZE bits starting at bit BITNUM.
 906
 907    BITREGION_START is bitpos of the first bitfield in this region.
 908    BITREGION_END is the bitpos of the ending bitfield in this region.
 909    These two fields are 0, if the C++ memory model does not apply,
 910    or we are not interested in keeping track of bitfield regions.
 911
 912    FIELDMODE is the machine-mode of the FIELD_DECL node for this field.  */
 913
 914 void
 915 store_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
 916                  unsigned HOST_WIDE_INT bitnum,
 917                  unsigned HOST_WIDE_INT bitregion_start,
 918                  unsigned HOST_WIDE_INT bitregion_end,
 919                  enum machine_mode fieldmode,
 920                  rtx value)
 921 {
 922   /* Handle -fstrict-volatile-bitfields in the cases where it applies.  */
 923   if (strict_volatile_bitfield_p (str_rtx, bitsize, bitnum, fieldmode))
 924     {
 925
 926       /* Storing any naturally aligned field can be done with a simple
 927          store.  For targets that support fast unaligned memory, any
 928          naturally sized, unit aligned field can be done directly.  */
 929       if (simple_mem_bitfield_p (str_rtx, bitsize, bitnum, fieldmode))
 930         {
 931           str_rtx = adjust_bitfield_address (str_rtx, fieldmode,
 932                                              bitnum / BITS_PER_UNIT);
 933           emit_move_insn (str_rtx, value);
 934         }
 935       else
 936         /* Explicitly override the C/C++ memory model; ignore the
 937            bit range so that we can do the access in the mode mandated
 938            by -fstrict-volatile-bitfields instead.  */
 939         store_fixed_bit_field (str_rtx, bitsize, bitnum, 0, 0, value);
 940       return;
 941     }
 942
 943   /* Under the C++0x memory model, we must not touch bits outside the
 944      bit region.  Adjust the address to start at the beginning of the
 945      bit region.  */
 946   if (MEM_P (str_rtx) && bitregion_start > 0)
 947     {
 948       enum machine_mode bestmode;
 949       HOST_WIDE_INT offset, size;
 950
 951       gcc_assert ((bitregion_start % BITS_PER_UNIT) == 0);
 952
 953       offset = bitregion_start / BITS_PER_UNIT;
 954       bitnum -= bitregion_start;
 955       size = (bitnum + bitsize + BITS_PER_UNIT - 1) / BITS_PER_UNIT;
 956       bitregion_end -= bitregion_start;
 957       bitregion_start = 0;
 958       bestmode = get_best_mode (bitsize, bitnum,
 959                                 bitregion_start, bitregion_end,
 960                                 MEM_ALIGN (str_rtx), VOIDmode,
 961                                 MEM_VOLATILE_P (str_rtx));
 962       str_rtx = adjust_bitfield_address_size (str_rtx, bestmode, offset, size);
 963     }
 964
 965   if (!store_bit_field_1 (str_rtx, bitsize, bitnum,
 966                           bitregion_start, bitregion_end,
 967                           fieldmode, value, true))
 968     gcc_unreachable ();
 969 }
 970 \f
 971 /* Use shifts and boolean operations to store VALUE into a bit field of
 972    width BITSIZE in OP0, starting at bit BITNUM.  */
 973
 974 static void
 975 store_fixed_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
 976                        unsigned HOST_WIDE_INT bitnum,
 977                        unsigned HOST_WIDE_INT bitregion_start,
 978                        unsigned HOST_WIDE_INT bitregion_end,
 979                        rtx value)
 980 {
 981   enum machine_mode mode;
 982   rtx temp;
 983   int all_zero = 0;
 984   int all_one = 0;
 985
 986   /* There is a case not handled here:
 987      a structure with a known alignment of just a halfword
 988      and a field split across two aligned halfwords within the structure.
 989      Or likewise a structure with a known alignment of just a byte
 990      and a field split across two bytes.
 991      Such cases are not supposed to be able to occur.  */
 992
 993   if (MEM_P (op0))
 994     {
 995       mode = GET_MODE (op0);
 996       if (GET_MODE_BITSIZE (mode) == 0
 997           || GET_MODE_BITSIZE (mode) > GET_MODE_BITSIZE (word_mode))
 998         mode = word_mode;
 999       mode = get_best_mode (bitsize, bitnum, bitregion_start, bitregion_end,
1000                             MEM_ALIGN (op0), mode, MEM_VOLATILE_P (op0));
1001
1002       if (mode == VOIDmode)
1003         {
1004           /* The only way this should occur is if the field spans word
1005              boundaries.  */
1006           store_split_bit_field (op0, bitsize, bitnum, bitregion_start,
1007                                  bitregion_end, value);
1008           return;
1009         }
1010
1011       op0 = narrow_bit_field_mem (op0, mode, bitsize, bitnum, &bitnum);
1012     }
1013
1014   mode = GET_MODE (op0);
1015   gcc_assert (SCALAR_INT_MODE_P (mode));
1016
1017   /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode)
1018      for invalid input, such as f5 from gcc.dg/pr48335-2.c.  */
1019
1020   if (BYTES_BIG_ENDIAN)
1021     /* BITNUM is the distance between our msb
1022        and that of the containing datum.
1023        Convert it to the distance from the lsb.  */
1024     bitnum = GET_MODE_BITSIZE (mode) - bitsize - bitnum;
1025
1026   /* Now BITNUM is always the distance between our lsb
1027      and that of OP0.  */
1028
1029   /* Shift VALUE left by BITNUM bits.  If VALUE is not constant,
1030      we must first convert its mode to MODE.  */
1031
1032   if (CONST_INT_P (value))
1033     {
1034       HOST_WIDE_INT v = INTVAL (value);
1035
1036       if (bitsize < HOST_BITS_PER_WIDE_INT)
1037         v &= ((HOST_WIDE_INT) 1 << bitsize) - 1;
1038
1039       if (v == 0)
1040         all_zero = 1;
1041       else if ((bitsize < HOST_BITS_PER_WIDE_INT
1042                 && v == ((HOST_WIDE_INT) 1 << bitsize) - 1)
1043                || (bitsize == HOST_BITS_PER_WIDE_INT && v == -1))
1044         all_one = 1;
1045
1046       value = lshift_value (mode, v, bitnum);
1047     }
1048   else
1049     {
1050       int must_and = (GET_MODE_BITSIZE (GET_MODE (value)) != bitsize
1051                       && bitnum + bitsize != GET_MODE_BITSIZE (mode));
1052
1053       if (GET_MODE (value) != mode)
1054         value = convert_to_mode (mode, value, 1);
1055
1056       if (must_and)
1057         value = expand_binop (mode, and_optab, value,
1058                               mask_rtx (mode, 0, bitsize, 0),
1059                               NULL_RTX, 1, OPTAB_LIB_WIDEN);
1060       if (bitnum > 0)
1061         value = expand_shift (LSHIFT_EXPR, mode, value,
1062                               bitnum, NULL_RTX, 1);
1063     }
1064
1065   /* Now clear the chosen bits in OP0,
1066      except that if VALUE is -1 we need not bother.  */
1067   /* We keep the intermediates in registers to allow CSE to combine
1068      consecutive bitfield assignments.  */
1069
1070   temp = force_reg (mode, op0);
1071
1072   if (! all_one)
1073     {
1074       temp = expand_binop (mode, and_optab, temp,
1075                            mask_rtx (mode, bitnum, bitsize, 1),
1076                            NULL_RTX, 1, OPTAB_LIB_WIDEN);
1077       temp = force_reg (mode, temp);
1078     }
1079
1080   /* Now logical-or VALUE into OP0, unless it is zero.  */
1081
1082   if (! all_zero)
1083     {
1084       temp = expand_binop (mode, ior_optab, temp, value,
1085                            NULL_RTX, 1, OPTAB_LIB_WIDEN);
1086       temp = force_reg (mode, temp);
1087     }
1088
1089   if (op0 != temp)
1090     {
1091       op0 = copy_rtx (op0);
1092       emit_move_insn (op0, temp);
1093     }
1094 }
1095 \f
1096 /* Store a bit field that is split across multiple accessible memory objects.
1097
1098    OP0 is the REG, SUBREG or MEM rtx for the first of the objects.
1099    BITSIZE is the field width; BITPOS the position of its first bit
1100    (within the word).
1101    VALUE is the value to store.
1102
1103    This does not yet handle fields wider than BITS_PER_WORD.  */
1104
1105 static void
1106 store_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1107                        unsigned HOST_WIDE_INT bitpos,
1108                        unsigned HOST_WIDE_INT bitregion_start,
1109                        unsigned HOST_WIDE_INT bitregion_end,
1110                        rtx value)
1111 {
1112   unsigned int unit;
1113   unsigned int bitsdone = 0;
1114
1115   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1116      much at a time.  */
1117   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
1118     unit = BITS_PER_WORD;
1119   else
1120     unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1121
1122   /* If VALUE is a constant other than a CONST_INT, get it into a register in
1123      WORD_MODE.  If we can do this using gen_lowpart_common, do so.  Note
1124      that VALUE might be a floating-point constant.  */
1125   if (CONSTANT_P (value) && !CONST_INT_P (value))
1126     {
1127       rtx word = gen_lowpart_common (word_mode, value);
1128
1129       if (word && (value != word))
1130         value = word;
1131       else
1132         value = gen_lowpart_common (word_mode,
1133                                     force_reg (GET_MODE (value) != VOIDmode
1134                                                ? GET_MODE (value)
1135                                                : word_mode, value));
1136     }
1137
1138   while (bitsdone < bitsize)
1139     {
1140       unsigned HOST_WIDE_INT thissize;
1141       rtx part, word;
1142       unsigned HOST_WIDE_INT thispos;
1143       unsigned HOST_WIDE_INT offset;
1144
1145       offset = (bitpos + bitsdone) / unit;
1146       thispos = (bitpos + bitsdone) % unit;
1147
1148       /* When region of bytes we can touch is restricted, decrease
1149          UNIT close to the end of the region as needed.  If op0 is a REG
1150          or SUBREG of REG, don't do this, as there can't be data races
1151          on a register and we can expand shorter code in some cases.  */
1152       if (bitregion_end
1153           && unit > BITS_PER_UNIT
1154           && bitpos + bitsdone - thispos + unit > bitregion_end + 1
1155           && !REG_P (op0)
1156           && (GET_CODE (op0) != SUBREG || !REG_P (SUBREG_REG (op0))))
1157         {
1158           unit = unit / 2;
1159           continue;
1160         }
1161
1162       /* THISSIZE must not overrun a word boundary.  Otherwise,
1163          store_fixed_bit_field will call us again, and we will mutually
1164          recurse forever.  */
1165       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1166       thissize = MIN (thissize, unit - thispos);
1167
1168       if (BYTES_BIG_ENDIAN)
1169         {
1170           /* Fetch successively less significant portions.  */
1171           if (CONST_INT_P (value))
1172             part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1173                              >> (bitsize - bitsdone - thissize))
1174                             & (((HOST_WIDE_INT) 1 << thissize) - 1));
1175           else
1176             {
1177               int total_bits = GET_MODE_BITSIZE (GET_MODE (value));
1178               /* The args are chosen so that the last part includes the
1179                  lsb.  Give extract_bit_field the value it needs (with
1180                  endianness compensation) to fetch the piece we want.  */
1181               part = extract_fixed_bit_field (word_mode, value, thissize,
1182                                               total_bits - bitsize + bitsdone,
1183                                               NULL_RTX, 1);
1184             }
1185         }
1186       else
1187         {
1188           /* Fetch successively more significant portions.  */
1189           if (CONST_INT_P (value))
1190             part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1191                              >> bitsdone)
1192                             & (((HOST_WIDE_INT) 1 << thissize) - 1));
1193           else
1194             part = extract_fixed_bit_field (word_mode, value, thissize,
1195                                             bitsdone, NULL_RTX, 1);
1196         }
1197
1198       /* If OP0 is a register, then handle OFFSET here.
1199
1200          When handling multiword bitfields, extract_bit_field may pass
1201          down a word_mode SUBREG of a larger REG for a bitfield that actually
1202          crosses a word boundary.  Thus, for a SUBREG, we must find
1203          the current word starting from the base register.  */
1204       if (GET_CODE (op0) == SUBREG)
1205         {
1206           int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD)
1207                             + (offset * unit / BITS_PER_WORD);
1208           enum machine_mode sub_mode = GET_MODE (SUBREG_REG (op0));
1209           if (sub_mode != BLKmode && GET_MODE_SIZE (sub_mode) < UNITS_PER_WORD)
1210             word = word_offset ? const0_rtx : op0;
1211           else
1212             word = operand_subword_force (SUBREG_REG (op0), word_offset,
1213                                           GET_MODE (SUBREG_REG (op0)));
1214           offset &= BITS_PER_WORD / unit - 1;
1215         }
1216       else if (REG_P (op0))
1217         {
1218           enum machine_mode op0_mode = GET_MODE (op0);
1219           if (op0_mode != BLKmode && GET_MODE_SIZE (op0_mode) < UNITS_PER_WORD)
1220             word = offset ? const0_rtx : op0;
1221           else
1222             word = operand_subword_force (op0, offset * unit / BITS_PER_WORD,
1223                                           GET_MODE (op0));
1224           offset &= BITS_PER_WORD / unit - 1;
1225         }
1226       else
1227         word = op0;
1228
1229       /* OFFSET is in UNITs, and UNIT is in bits.  If WORD is const0_rtx,
1230          it is just an out-of-bounds access.  Ignore it.  */
1231       if (word != const0_rtx)
1232         store_fixed_bit_field (word, thissize, offset * unit + thispos,
1233                                bitregion_start, bitregion_end, part);
1234       bitsdone += thissize;
1235     }
1236 }
1237 \f
1238 /* A subroutine of extract_bit_field_1 that converts return value X
1239    to either MODE or TMODE.  MODE, TMODE and UNSIGNEDP are arguments
1240    to extract_bit_field.  */
1241
1242 static rtx
1243 convert_extracted_bit_field (rtx x, enum machine_mode mode,
1244                              enum machine_mode tmode, bool unsignedp)
1245 {
1246   if (GET_MODE (x) == tmode || GET_MODE (x) == mode)
1247     return x;
1248
1249   /* If the x mode is not a scalar integral, first convert to the
1250      integer mode of that size and then access it as a floating-point
1251      value via a SUBREG.  */
1252   if (!SCALAR_INT_MODE_P (tmode))
1253     {
1254       enum machine_mode smode;
1255
1256       smode = mode_for_size (GET_MODE_BITSIZE (tmode), MODE_INT, 0);
1257       x = convert_to_mode (smode, x, unsignedp);
1258       x = force_reg (smode, x);
1259       return gen_lowpart (tmode, x);
1260     }
1261
1262   return convert_to_mode (tmode, x, unsignedp);
1263 }
1264
1265 /* Try to use an ext(z)v pattern to extract a field from OP0.
1266    Return the extracted value on success, otherwise return null.
1267    EXT_MODE is the mode of the extraction and the other arguments
1268    are as for extract_bit_field.  */
1269
1270 static rtx
1271 extract_bit_field_using_extv (const extraction_insn *extv, rtx op0,
1272                               unsigned HOST_WIDE_INT bitsize,
1273                               unsigned HOST_WIDE_INT bitnum,
1274                               int unsignedp, rtx target,
1275                               enum machine_mode mode, enum machine_mode tmode)
1276 {
1277   struct expand_operand ops[4];
1278   rtx spec_target = target;
1279   rtx spec_target_subreg = 0;
1280   enum machine_mode ext_mode = extv->field_mode;
1281   unsigned unit = GET_MODE_BITSIZE (ext_mode);
1282
1283   if (bitsize == 0 || unit < bitsize)
1284     return NULL_RTX;
1285
1286   if (MEM_P (op0))
1287     /* Get a reference to the first byte of the field.  */
1288     op0 = narrow_bit_field_mem (op0, extv->struct_mode, bitsize, bitnum,
1289                                 &bitnum);
1290   else
1291     {
1292       /* Convert from counting within OP0 to counting in EXT_MODE.  */
1293       if (BYTES_BIG_ENDIAN)
1294         bitnum += unit - GET_MODE_BITSIZE (GET_MODE (op0));
1295
1296       /* If op0 is a register, we need it in EXT_MODE to make it
1297          acceptable to the format of ext(z)v.  */
1298       if (GET_CODE (op0) == SUBREG && GET_MODE (op0) != ext_mode)
1299         return NULL_RTX;
1300       if (REG_P (op0) && GET_MODE (op0) != ext_mode)
1301         op0 = gen_lowpart_SUBREG (ext_mode, op0);
1302     }
1303
1304   /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
1305      "backwards" from the size of the unit we are extracting from.
1306      Otherwise, we count bits from the most significant on a
1307      BYTES/BITS_BIG_ENDIAN machine.  */
1308
1309   if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1310     bitnum = unit - bitsize - bitnum;
1311
1312   if (target == 0)
1313     target = spec_target = gen_reg_rtx (tmode);
1314
1315   if (GET_MODE (target) != ext_mode)
1316     {
1317       /* Don't use LHS paradoxical subreg if explicit truncation is needed
1318          between the mode of the extraction (word_mode) and the target
1319          mode.  Instead, create a temporary and use convert_move to set
1320          the target.  */
1321       if (REG_P (target)
1322           && TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (target), ext_mode))
1323         {
1324           target = gen_lowpart (ext_mode, target);
1325           if (GET_MODE_PRECISION (ext_mode)
1326               > GET_MODE_PRECISION (GET_MODE (spec_target)))
1327             spec_target_subreg = target;
1328         }
1329       else
1330         target = gen_reg_rtx (ext_mode);
1331     }
1332
1333   create_output_operand (&ops[0], target, ext_mode);
1334   create_fixed_operand (&ops[1], op0);
1335   create_integer_operand (&ops[2], bitsize);
1336   create_integer_operand (&ops[3], bitnum);
1337   if (maybe_expand_insn (extv->icode, 4, ops))
1338     {
1339       target = ops[0].value;
1340       if (target == spec_target)
1341         return target;
1342       if (target == spec_target_subreg)
1343         return spec_target;
1344       return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1345     }
1346   return NULL_RTX;
1347 }
1348
1349 /* A subroutine of extract_bit_field, with the same arguments.
1350    If FALLBACK_P is true, fall back to extract_fixed_bit_field
1351    if we can find no other means of implementing the operation.
1352    if FALLBACK_P is false, return NULL instead.  */
1353
1354 static rtx
1355 extract_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1356                      unsigned HOST_WIDE_INT bitnum, int unsignedp, rtx target,
1357                      enum machine_mode mode, enum machine_mode tmode,
1358                      bool fallback_p)
1359 {
1360   rtx op0 = str_rtx;
1361   enum machine_mode int_mode;
1362   enum machine_mode mode1;
1363
1364   if (tmode == VOIDmode)
1365     tmode = mode;
1366
1367   while (GET_CODE (op0) == SUBREG)
1368     {
1369       bitnum += SUBREG_BYTE (op0) * BITS_PER_UNIT;
1370       op0 = SUBREG_REG (op0);
1371     }
1372
1373   /* If we have an out-of-bounds access to a register, just return an
1374      uninitialized register of the required mode.  This can occur if the
1375      source code contains an out-of-bounds access to a small array.  */
1376   if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
1377     return gen_reg_rtx (tmode);
1378
1379   if (REG_P (op0)
1380       && mode == GET_MODE (op0)
1381       && bitnum == 0
1382       && bitsize == GET_MODE_BITSIZE (GET_MODE (op0)))
1383     {
1384       /* We're trying to extract a full register from itself.  */
1385       return op0;
1386     }
1387
1388   /* See if we can get a better vector mode before extracting.  */
1389   if (VECTOR_MODE_P (GET_MODE (op0))
1390       && !MEM_P (op0)
1391       && GET_MODE_INNER (GET_MODE (op0)) != tmode)
1392     {
1393       enum machine_mode new_mode;
1394
1395       if (GET_MODE_CLASS (tmode) == MODE_FLOAT)
1396         new_mode = MIN_MODE_VECTOR_FLOAT;
1397       else if (GET_MODE_CLASS (tmode) == MODE_FRACT)
1398         new_mode = MIN_MODE_VECTOR_FRACT;
1399       else if (GET_MODE_CLASS (tmode) == MODE_UFRACT)
1400         new_mode = MIN_MODE_VECTOR_UFRACT;
1401       else if (GET_MODE_CLASS (tmode) == MODE_ACCUM)
1402         new_mode = MIN_MODE_VECTOR_ACCUM;
1403       else if (GET_MODE_CLASS (tmode) == MODE_UACCUM)
1404         new_mode = MIN_MODE_VECTOR_UACCUM;
1405       else
1406         new_mode = MIN_MODE_VECTOR_INT;
1407
1408       for (; new_mode != VOIDmode ; new_mode = GET_MODE_WIDER_MODE (new_mode))
1409         if (GET_MODE_SIZE (new_mode) == GET_MODE_SIZE (GET_MODE (op0))
1410             && targetm.vector_mode_supported_p (new_mode))
1411           break;
1412       if (new_mode != VOIDmode)
1413         op0 = gen_lowpart (new_mode, op0);
1414     }
1415
1416   /* Use vec_extract patterns for extracting parts of vectors whenever
1417      available.  */
1418   if (VECTOR_MODE_P (GET_MODE (op0))
1419       && !MEM_P (op0)
1420       && optab_handler (vec_extract_optab, GET_MODE (op0)) != CODE_FOR_nothing
1421       && ((bitnum + bitsize - 1) / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
1422           == bitnum / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
1423     {
1424       struct expand_operand ops[3];
1425       enum machine_mode outermode = GET_MODE (op0);
1426       enum machine_mode innermode = GET_MODE_INNER (outermode);
1427       enum insn_code icode = optab_handler (vec_extract_optab, outermode);
1428       unsigned HOST_WIDE_INT pos = bitnum / GET_MODE_BITSIZE (innermode);
1429
1430       create_output_operand (&ops[0], target, innermode);
1431       create_input_operand (&ops[1], op0, outermode);
1432       create_integer_operand (&ops[2], pos);
1433       if (maybe_expand_insn (icode, 3, ops))
1434         {
1435           target = ops[0].value;
1436           if (GET_MODE (target) != mode)
1437             return gen_lowpart (tmode, target);
1438           return target;
1439         }
1440     }
1441
1442   /* Make sure we are playing with integral modes.  Pun with subregs
1443      if we aren't.  */
1444   {
1445     enum machine_mode imode = int_mode_for_mode (GET_MODE (op0));
1446     if (imode != GET_MODE (op0))
1447       {
1448         if (MEM_P (op0))
1449           op0 = adjust_bitfield_address_size (op0, imode, 0, MEM_SIZE (op0));
1450         else if (imode != BLKmode)
1451           {
1452             op0 = gen_lowpart (imode, op0);
1453
1454             /* If we got a SUBREG, force it into a register since we
1455                aren't going to be able to do another SUBREG on it.  */
1456             if (GET_CODE (op0) == SUBREG)
1457               op0 = force_reg (imode, op0);
1458           }
1459         else if (REG_P (op0))
1460           {
1461             rtx reg, subreg;
1462             imode = smallest_mode_for_size (GET_MODE_BITSIZE (GET_MODE (op0)),
1463                                             MODE_INT);
1464             reg = gen_reg_rtx (imode);
1465             subreg = gen_lowpart_SUBREG (GET_MODE (op0), reg);
1466             emit_move_insn (subreg, op0);
1467             op0 = reg;
1468             bitnum += SUBREG_BYTE (subreg) * BITS_PER_UNIT;
1469           }
1470         else
1471           {
1472             HOST_WIDE_INT size = GET_MODE_SIZE (GET_MODE (op0));
1473             rtx mem = assign_stack_temp (GET_MODE (op0), size);
1474             emit_move_insn (mem, op0);
1475             op0 = adjust_bitfield_address_size (mem, BLKmode, 0, size);
1476           }
1477       }
1478   }
1479
1480   /* ??? We currently assume TARGET is at least as big as BITSIZE.
1481      If that's wrong, the solution is to test for it and set TARGET to 0
1482      if needed.  */
1483
1484   /* Get the mode of the field to use for atomic access or subreg
1485      conversion.  */
1486   mode1 = mode;
1487   if (SCALAR_INT_MODE_P (tmode))
1488     {
1489       enum machine_mode try_mode = mode_for_size (bitsize,
1490                                                   GET_MODE_CLASS (tmode), 0);
1491       if (try_mode != BLKmode)
1492         mode1 = try_mode;
1493     }
1494   gcc_assert (mode1 != BLKmode);
1495
1496   /* Extraction of a full MODE1 value can be done with a subreg as long
1497      as the least significant bit of the value is the least significant
1498      bit of either OP0 or a word of OP0.  */
1499   if (!MEM_P (op0)
1500       && lowpart_bit_field_p (bitnum, bitsize, GET_MODE (op0))
1501       && bitsize == GET_MODE_BITSIZE (mode1)
1502       && TRULY_NOOP_TRUNCATION_MODES_P (mode1, GET_MODE (op0)))
1503     {
1504       rtx sub = simplify_gen_subreg (mode1, op0, GET_MODE (op0),
1505                                      bitnum / BITS_PER_UNIT);
1506       if (sub)
1507         return convert_extracted_bit_field (sub, mode, tmode, unsignedp);
1508     }
1509
1510   /* Extraction of a full MODE1 value can be done with a load as long as
1511      the field is on a byte boundary and is sufficiently aligned.  */
1512   if (simple_mem_bitfield_p (op0, bitsize, bitnum, mode1))
1513     {
1514       op0 = adjust_bitfield_address (op0, mode1, bitnum / BITS_PER_UNIT);
1515       return convert_extracted_bit_field (op0, mode, tmode, unsignedp);
1516     }
1517
1518   /* Handle fields bigger than a word.  */
1519
1520   if (bitsize > BITS_PER_WORD)
1521     {
1522       /* Here we transfer the words of the field
1523          in the order least significant first.
1524          This is because the most significant word is the one which may
1525          be less than full.  */
1526
1527       unsigned int backwards = WORDS_BIG_ENDIAN;
1528       unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
1529       unsigned int i;
1530       rtx last;
1531
1532       if (target == 0 || !REG_P (target) || !valid_multiword_target_p (target))
1533         target = gen_reg_rtx (mode);
1534
1535       /* Indicate for flow that the entire target reg is being set.  */
1536       emit_clobber (target);
1537
1538       last = get_last_insn ();
1539       for (i = 0; i < nwords; i++)
1540         {
1541           /* If I is 0, use the low-order word in both field and target;
1542              if I is 1, use the next to lowest word; and so on.  */
1543           /* Word number in TARGET to use.  */
1544           unsigned int wordnum
1545             = (backwards
1546                ? GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD - i - 1
1547                : i);
1548           /* Offset from start of field in OP0.  */
1549           unsigned int bit_offset = (backwards
1550                                      ? MAX ((int) bitsize - ((int) i + 1)
1551                                             * BITS_PER_WORD,
1552                                             0)
1553                                      : (int) i * BITS_PER_WORD);
1554           rtx target_part = operand_subword (target, wordnum, 1, VOIDmode);
1555           rtx result_part
1556             = extract_bit_field_1 (op0, MIN (BITS_PER_WORD,
1557                                              bitsize - i * BITS_PER_WORD),
1558                                    bitnum + bit_offset, 1, target_part,
1559                                    mode, word_mode, fallback_p);
1560
1561           gcc_assert (target_part);
1562           if (!result_part)
1563             {
1564               delete_insns_since (last);
1565               return NULL;
1566             }
1567
1568           if (result_part != target_part)
1569             emit_move_insn (target_part, result_part);
1570         }
1571
1572       if (unsignedp)
1573         {
1574           /* Unless we've filled TARGET, the upper regs in a multi-reg value
1575              need to be zero'd out.  */
1576           if (GET_MODE_SIZE (GET_MODE (target)) > nwords * UNITS_PER_WORD)
1577             {
1578               unsigned int i, total_words;
1579
1580               total_words = GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD;
1581               for (i = nwords; i < total_words; i++)
1582                 emit_move_insn
1583                   (operand_subword (target,
1584                                     backwards ? total_words - i - 1 : i,
1585                                     1, VOIDmode),
1586                    const0_rtx);
1587             }
1588           return target;
1589         }
1590
1591       /* Signed bit field: sign-extend with two arithmetic shifts.  */
1592       target = expand_shift (LSHIFT_EXPR, mode, target,
1593                              GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1594       return expand_shift (RSHIFT_EXPR, mode, target,
1595                            GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1596     }
1597
1598   /* If OP0 is a multi-word register, narrow it to the affected word.
1599      If the region spans two words, defer to extract_split_bit_field.  */
1600   if (!MEM_P (op0) && GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
1601     {
1602       op0 = simplify_gen_subreg (word_mode, op0, GET_MODE (op0),
1603                                  bitnum / BITS_PER_WORD * UNITS_PER_WORD);
1604       bitnum %= BITS_PER_WORD;
1605       if (bitnum + bitsize > BITS_PER_WORD)
1606         {
1607           if (!fallback_p)
1608             return NULL_RTX;
1609           target = extract_split_bit_field (op0, bitsize, bitnum, unsignedp);
1610           return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1611         }
1612     }
1613
1614   /* From here on we know the desired field is smaller than a word.
1615      If OP0 is a register, it too fits within a word.  */
1616   enum extraction_pattern pattern = unsignedp ? EP_extzv : EP_extv;
1617   extraction_insn extv;
1618   if (!MEM_P (op0)
1619       /* ??? We could limit the structure size to the part of OP0 that
1620          contains the field, with appropriate checks for endianness
1621          and TRULY_NOOP_TRUNCATION.  */
1622       && get_best_reg_extraction_insn (&extv, pattern,
1623                                        GET_MODE_BITSIZE (GET_MODE (op0)),
1624                                        tmode))
1625     {
1626       rtx result = extract_bit_field_using_extv (&extv, op0, bitsize, bitnum,
1627                                                  unsignedp, target, mode,
1628                                                  tmode);
1629       if (result)
1630         return result;
1631     }
1632
1633   /* If OP0 is a memory, try copying it to a register and seeing if a
1634      cheap register alternative is available.  */
1635   if (MEM_P (op0))
1636     {
1637       if (get_best_mem_extraction_insn (&extv, pattern, bitsize, bitnum,
1638                                         tmode))
1639         {
1640           rtx result = extract_bit_field_using_extv (&extv, op0, bitsize,
1641                                                      bitnum, unsignedp,
1642                                                      target, mode,
1643                                                      tmode);
1644           if (result)
1645             return result;
1646         }
1647
1648       rtx last = get_last_insn ();
1649
1650       /* Try loading part of OP0 into a register and extracting the
1651          bitfield from that.  */
1652       unsigned HOST_WIDE_INT bitpos;
1653       rtx xop0 = adjust_bit_field_mem_for_reg (pattern, op0, bitsize, bitnum,
1654                                                0, 0, tmode, &bitpos);
1655       if (xop0)
1656         {
1657           xop0 = copy_to_reg (xop0);
1658           rtx result = extract_bit_field_1 (xop0, bitsize, bitpos,
1659                                             unsignedp, target,
1660                                             mode, tmode, false);
1661           if (result)
1662             return result;
1663           delete_insns_since (last);
1664         }
1665     }
1666
1667   if (!fallback_p)
1668     return NULL;
1669
1670   /* Find a correspondingly-sized integer field, so we can apply
1671      shifts and masks to it.  */
1672   int_mode = int_mode_for_mode (tmode);
1673   if (int_mode == BLKmode)
1674     int_mode = int_mode_for_mode (mode);
1675   /* Should probably push op0 out to memory and then do a load.  */
1676   gcc_assert (int_mode != BLKmode);
1677
1678   target = extract_fixed_bit_field (int_mode, op0, bitsize, bitnum,
1679                                     target, unsignedp);
1680   return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1681 }
1682
1683 /* Generate code to extract a byte-field from STR_RTX
1684    containing BITSIZE bits, starting at BITNUM,
1685    and put it in TARGET if possible (if TARGET is nonzero).
1686    Regardless of TARGET, we return the rtx for where the value is placed.
1687
1688    STR_RTX is the structure containing the byte (a REG or MEM).
1689    UNSIGNEDP is nonzero if this is an unsigned bit field.
1690    MODE is the natural mode of the field value once extracted.
1691    TMODE is the mode the caller would like the value to have;
1692    but the value may be returned with type MODE instead.
1693
1694    If a TARGET is specified and we can store in it at no extra cost,
1695    we do so, and return TARGET.
1696    Otherwise, we return a REG of mode TMODE or MODE, with TMODE preferred
1697    if they are equally easy.  */
1698
1699 rtx
1700 extract_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1701                    unsigned HOST_WIDE_INT bitnum, int unsignedp, rtx target,
1702                    enum machine_mode mode, enum machine_mode tmode)
1703 {
1704   enum machine_mode mode1;
1705
1706   /* Handle -fstrict-volatile-bitfields in the cases where it applies.  */
1707   if (GET_MODE_BITSIZE (GET_MODE (str_rtx)) > 0)
1708     mode1 = GET_MODE (str_rtx);
1709   else if (target && GET_MODE_BITSIZE (GET_MODE (target)) > 0)
1710     mode1 = GET_MODE (target);
1711   else
1712     mode1 = tmode;
1713
1714   if (strict_volatile_bitfield_p (str_rtx, bitsize, bitnum, mode1))
1715     {
1716       rtx result;
1717
1718       /* Extraction of a full MODE1 value can be done with a load as long as
1719          the field is on a byte boundary and is sufficiently aligned.  */
1720       if (simple_mem_bitfield_p (str_rtx, bitsize, bitnum, mode1))
1721         result = adjust_bitfield_address (str_rtx, mode1,
1722                                           bitnum / BITS_PER_UNIT);
1723       else
1724         result = extract_fixed_bit_field (mode, str_rtx, bitsize, bitnum,
1725                                           target, unsignedp);
1726       return convert_extracted_bit_field (result, mode, tmode, unsignedp);
1727     }
1728
1729   return extract_bit_field_1 (str_rtx, bitsize, bitnum, unsignedp,
1730                               target, mode, tmode, true);
1731 }
1732 \f
1733 /* Use shifts and boolean operations to extract a field of BITSIZE bits
1734    from bit BITNUM of OP0.
1735
1736    UNSIGNEDP is nonzero for an unsigned bit field (don't sign-extend value).
1737    If TARGET is nonzero, attempts to store the value there
1738    and return TARGET, but this is not guaranteed.
1739    If TARGET is not used, create a pseudo-reg of mode TMODE for the value.  */
1740
1741 static rtx
1742 extract_fixed_bit_field (enum machine_mode tmode, rtx op0,
1743                          unsigned HOST_WIDE_INT bitsize,
1744                          unsigned HOST_WIDE_INT bitnum, rtx target,
1745                          int unsignedp)
1746 {
1747   enum machine_mode mode;
1748
1749   if (MEM_P (op0))
1750     {
1751       /* Get the proper mode to use for this field.  We want a mode that
1752          includes the entire field.  If such a mode would be larger than
1753          a word, we won't be doing the extraction the normal way.  */
1754
1755       mode = GET_MODE (op0);
1756       if (GET_MODE_BITSIZE (mode) == 0
1757           || GET_MODE_BITSIZE (mode) > GET_MODE_BITSIZE (word_mode))
1758         mode = word_mode;
1759       mode = get_best_mode (bitsize, bitnum, 0, 0,
1760                             MEM_ALIGN (op0), mode, MEM_VOLATILE_P (op0));
1761
1762       if (mode == VOIDmode)
1763         /* The only way this should occur is if the field spans word
1764            boundaries.  */
1765         return extract_split_bit_field (op0, bitsize, bitnum, unsignedp);
1766
1767       op0 = narrow_bit_field_mem (op0, mode, bitsize, bitnum, &bitnum);
1768     }
1769
1770   mode = GET_MODE (op0);
1771   gcc_assert (SCALAR_INT_MODE_P (mode));
1772
1773   /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode)
1774      for invalid input, such as extract equivalent of f5 from
1775      gcc.dg/pr48335-2.c.  */
1776
1777   if (BYTES_BIG_ENDIAN)
1778     /* BITNUM is the distance between our msb and that of OP0.
1779        Convert it to the distance from the lsb.  */
1780     bitnum = GET_MODE_BITSIZE (mode) - bitsize - bitnum;
1781
1782   /* Now BITNUM is always the distance between the field's lsb and that of OP0.
1783      We have reduced the big-endian case to the little-endian case.  */
1784
1785   if (unsignedp)
1786     {
1787       if (bitnum)
1788         {
1789           /* If the field does not already start at the lsb,
1790              shift it so it does.  */
1791           /* Maybe propagate the target for the shift.  */
1792           rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
1793           if (tmode != mode)
1794             subtarget = 0;
1795           op0 = expand_shift (RSHIFT_EXPR, mode, op0, bitnum, subtarget, 1);
1796         }
1797       /* Convert the value to the desired mode.  */
1798       if (mode != tmode)
1799         op0 = convert_to_mode (tmode, op0, 1);
1800
1801       /* Unless the msb of the field used to be the msb when we shifted,
1802          mask out the upper bits.  */
1803
1804       if (GET_MODE_BITSIZE (mode) != bitnum + bitsize)
1805         return expand_binop (GET_MODE (op0), and_optab, op0,
1806                              mask_rtx (GET_MODE (op0), 0, bitsize, 0),
1807                              target, 1, OPTAB_LIB_WIDEN);
1808       return op0;
1809     }
1810
1811   /* To extract a signed bit-field, first shift its msb to the msb of the word,
1812      then arithmetic-shift its lsb to the lsb of the word.  */
1813   op0 = force_reg (mode, op0);
1814
1815   /* Find the narrowest integer mode that contains the field.  */
1816
1817   for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT); mode != VOIDmode;
1818        mode = GET_MODE_WIDER_MODE (mode))
1819     if (GET_MODE_BITSIZE (mode) >= bitsize + bitnum)
1820       {
1821         op0 = convert_to_mode (mode, op0, 0);
1822         break;
1823       }
1824
1825   if (mode != tmode)
1826     target = 0;
1827
1828   if (GET_MODE_BITSIZE (mode) != (bitsize + bitnum))
1829     {
1830       int amount = GET_MODE_BITSIZE (mode) - (bitsize + bitnum);
1831       /* Maybe propagate the target for the shift.  */
1832       rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
1833       op0 = expand_shift (LSHIFT_EXPR, mode, op0, amount, subtarget, 1);
1834     }
1835
1836   return expand_shift (RSHIFT_EXPR, mode, op0,
1837                        GET_MODE_BITSIZE (mode) - bitsize, target, 0);
1838 }
1839 \f
1840 /* Return a constant integer (CONST_INT or CONST_DOUBLE) mask value
1841    of mode MODE with BITSIZE ones followed by BITPOS zeros, or the
1842    complement of that if COMPLEMENT.  The mask is truncated if
1843    necessary to the width of mode MODE.  The mask is zero-extended if
1844    BITSIZE+BITPOS is too small for MODE.  */
1845
1846 static rtx
1847 mask_rtx (enum machine_mode mode, int bitpos, int bitsize, int complement)
1848 {
1849   double_int mask;
1850
1851   mask = double_int::mask (bitsize);
1852   mask = mask.llshift (bitpos, HOST_BITS_PER_DOUBLE_INT);
1853
1854   if (complement)
1855     mask = ~mask;
1856
1857   return immed_double_int_const (mask, mode);
1858 }
1859
1860 /* Return a constant integer (CONST_INT or CONST_DOUBLE) rtx with the value
1861    VALUE << BITPOS.  */
1862
1863 static rtx
1864 lshift_value (enum machine_mode mode, unsigned HOST_WIDE_INT value,
1865               int bitpos)
1866 {
1867   double_int val;
1868
1869   val = double_int::from_uhwi (value);
1870   val = val.llshift (bitpos, HOST_BITS_PER_DOUBLE_INT);
1871
1872   return immed_double_int_const (val, mode);
1873 }
1874 \f
1875 /* Extract a bit field that is split across two words
1876    and return an RTX for the result.
1877
1878    OP0 is the REG, SUBREG or MEM rtx for the first of the two words.
1879    BITSIZE is the field width; BITPOS, position of its first bit, in the word.
1880    UNSIGNEDP is 1 if should zero-extend the contents; else sign-extend.  */
1881
1882 static rtx
1883 extract_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1884                          unsigned HOST_WIDE_INT bitpos, int unsignedp)
1885 {
1886   unsigned int unit;
1887   unsigned int bitsdone = 0;
1888   rtx result = NULL_RTX;
1889   int first = 1;
1890
1891   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1892      much at a time.  */
1893   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
1894     unit = BITS_PER_WORD;
1895   else
1896     unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1897
1898   while (bitsdone < bitsize)
1899     {
1900       unsigned HOST_WIDE_INT thissize;
1901       rtx part, word;
1902       unsigned HOST_WIDE_INT thispos;
1903       unsigned HOST_WIDE_INT offset;
1904
1905       offset = (bitpos + bitsdone) / unit;
1906       thispos = (bitpos + bitsdone) % unit;
1907
1908       /* THISSIZE must not overrun a word boundary.  Otherwise,
1909          extract_fixed_bit_field will call us again, and we will mutually
1910          recurse forever.  */
1911       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1912       thissize = MIN (thissize, unit - thispos);
1913
1914       /* If OP0 is a register, then handle OFFSET here.
1915
1916          When handling multiword bitfields, extract_bit_field may pass
1917          down a word_mode SUBREG of a larger REG for a bitfield that actually
1918          crosses a word boundary.  Thus, for a SUBREG, we must find
1919          the current word starting from the base register.  */
1920       if (GET_CODE (op0) == SUBREG)
1921         {
1922           int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset;
1923           word = operand_subword_force (SUBREG_REG (op0), word_offset,
1924                                         GET_MODE (SUBREG_REG (op0)));
1925           offset = 0;
1926         }
1927       else if (REG_P (op0))
1928         {
1929           word = operand_subword_force (op0, offset, GET_MODE (op0));
1930           offset = 0;
1931         }
1932       else
1933         word = op0;
1934
1935       /* Extract the parts in bit-counting order,
1936          whose meaning is determined by BYTES_PER_UNIT.
1937          OFFSET is in UNITs, and UNIT is in bits.  */
1938       part = extract_fixed_bit_field (word_mode, word, thissize,
1939                                       offset * unit + thispos, 0, 1);
1940       bitsdone += thissize;
1941
1942       /* Shift this part into place for the result.  */
1943       if (BYTES_BIG_ENDIAN)
1944         {
1945           if (bitsize != bitsdone)
1946             part = expand_shift (LSHIFT_EXPR, word_mode, part,
1947                                  bitsize - bitsdone, 0, 1);
1948         }
1949       else
1950         {
1951           if (bitsdone != thissize)
1952             part = expand_shift (LSHIFT_EXPR, word_mode, part,
1953                                  bitsdone - thissize, 0, 1);
1954         }
1955
1956       if (first)
1957         result = part;
1958       else
1959         /* Combine the parts with bitwise or.  This works
1960            because we extracted each part as an unsigned bit field.  */
1961         result = expand_binop (word_mode, ior_optab, part, result, NULL_RTX, 1,
1962                                OPTAB_LIB_WIDEN);
1963
1964       first = 0;
1965     }
1966
1967   /* Unsigned bit field: we are done.  */
1968   if (unsignedp)
1969     return result;
1970   /* Signed bit field: sign-extend with two arithmetic shifts.  */
1971   result = expand_shift (LSHIFT_EXPR, word_mode, result,
1972                          BITS_PER_WORD - bitsize, NULL_RTX, 0);
1973   return expand_shift (RSHIFT_EXPR, word_mode, result,
1974                        BITS_PER_WORD - bitsize, NULL_RTX, 0);
1975 }
1976 \f
1977 /* Try to read the low bits of SRC as an rvalue of mode MODE, preserving
1978    the bit pattern.  SRC_MODE is the mode of SRC; if this is smaller than
1979    MODE, fill the upper bits with zeros.  Fail if the layout of either
1980    mode is unknown (as for CC modes) or if the extraction would involve
1981    unprofitable mode punning.  Return the value on success, otherwise
1982    return null.
1983
1984    This is different from gen_lowpart* in these respects:
1985
1986      - the returned value must always be considered an rvalue
1987
1988      - when MODE is wider than SRC_MODE, the extraction involves
1989        a zero extension
1990
1991      - when MODE is smaller than SRC_MODE, the extraction involves
1992        a truncation (and is thus subject to TRULY_NOOP_TRUNCATION).
1993
1994    In other words, this routine performs a computation, whereas the
1995    gen_lowpart* routines are conceptually lvalue or rvalue subreg
1996    operations.  */
1997
1998 rtx
1999 extract_low_bits (enum machine_mode mode, enum machine_mode src_mode, rtx src)
2000 {
2001   enum machine_mode int_mode, src_int_mode;
2002
2003   if (mode == src_mode)
2004     return src;
2005
2006   if (CONSTANT_P (src))
2007     {
2008       /* simplify_gen_subreg can't be used here, as if simplify_subreg
2009          fails, it will happily create (subreg (symbol_ref)) or similar
2010          invalid SUBREGs.  */
2011       unsigned int byte = subreg_lowpart_offset (mode, src_mode);
2012       rtx ret = simplify_subreg (mode, src, src_mode, byte);
2013       if (ret)
2014         return ret;
2015
2016       if (GET_MODE (src) == VOIDmode
2017           || !validate_subreg (mode, src_mode, src, byte))
2018         return NULL_RTX;
2019
2020       src = force_reg (GET_MODE (src), src);
2021       return gen_rtx_SUBREG (mode, src, byte);
2022     }
2023
2024   if (GET_MODE_CLASS (mode) == MODE_CC || GET_MODE_CLASS (src_mode) == MODE_CC)
2025     return NULL_RTX;
2026
2027   if (GET_MODE_BITSIZE (mode) == GET_MODE_BITSIZE (src_mode)
2028       && MODES_TIEABLE_P (mode, src_mode))
2029     {
2030       rtx x = gen_lowpart_common (mode, src);
2031       if (x)
2032         return x;
2033     }
2034
2035   src_int_mode = int_mode_for_mode (src_mode);
2036   int_mode = int_mode_for_mode (mode);
2037   if (src_int_mode == BLKmode || int_mode == BLKmode)
2038     return NULL_RTX;
2039
2040   if (!MODES_TIEABLE_P (src_int_mode, src_mode))
2041     return NULL_RTX;
2042   if (!MODES_TIEABLE_P (int_mode, mode))
2043     return NULL_RTX;
2044
2045   src = gen_lowpart (src_int_mode, src);
2046   src = convert_modes (int_mode, src_int_mode, src, true);
2047   src = gen_lowpart (mode, src);
2048   return src;
2049 }
2050 \f
2051 /* Add INC into TARGET.  */
2052
2053 void
2054 expand_inc (rtx target, rtx inc)
2055 {
2056   rtx value = expand_binop (GET_MODE (target), add_optab,
2057                             target, inc,
2058                             target, 0, OPTAB_LIB_WIDEN);
2059   if (value != target)
2060     emit_move_insn (target, value);
2061 }
2062
2063 /* Subtract DEC from TARGET.  */
2064
2065 void
2066 expand_dec (rtx target, rtx dec)
2067 {
2068   rtx value = expand_binop (GET_MODE (target), sub_optab,
2069                             target, dec,
2070                             target, 0, OPTAB_LIB_WIDEN);
2071   if (value != target)
2072     emit_move_insn (target, value);
2073 }
2074 \f
2075 /* Output a shift instruction for expression code CODE,
2076    with SHIFTED being the rtx for the value to shift,
2077    and AMOUNT the rtx for the amount to shift by.
2078    Store the result in the rtx TARGET, if that is convenient.
2079    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2080    Return the rtx for where the value is.  */
2081
2082 static rtx
2083 expand_shift_1 (enum tree_code code, enum machine_mode mode, rtx shifted,
2084                 rtx amount, rtx target, int unsignedp)
2085 {
2086   rtx op1, temp = 0;
2087   int left = (code == LSHIFT_EXPR || code == LROTATE_EXPR);
2088   int rotate = (code == LROTATE_EXPR || code == RROTATE_EXPR);
2089   optab lshift_optab = ashl_optab;
2090   optab rshift_arith_optab = ashr_optab;
2091   optab rshift_uns_optab = lshr_optab;
2092   optab lrotate_optab = rotl_optab;
2093   optab rrotate_optab = rotr_optab;
2094   enum machine_mode op1_mode;
2095   int attempt;
2096   bool speed = optimize_insn_for_speed_p ();
2097
2098   op1 = amount;
2099   op1_mode = GET_MODE (op1);
2100
2101   /* Determine whether the shift/rotate amount is a vector, or scalar.  If the
2102      shift amount is a vector, use the vector/vector shift patterns.  */
2103   if (VECTOR_MODE_P (mode) && VECTOR_MODE_P (op1_mode))
2104     {
2105       lshift_optab = vashl_optab;
2106       rshift_arith_optab = vashr_optab;
2107       rshift_uns_optab = vlshr_optab;
2108       lrotate_optab = vrotl_optab;
2109       rrotate_optab = vrotr_optab;
2110     }
2111
2112   /* Previously detected shift-counts computed by NEGATE_EXPR
2113      and shifted in the other direction; but that does not work
2114      on all machines.  */
2115
2116   if (SHIFT_COUNT_TRUNCATED)
2117     {
2118       if (CONST_INT_P (op1)
2119           && ((unsigned HOST_WIDE_INT) INTVAL (op1) >=
2120               (unsigned HOST_WIDE_INT) GET_MODE_BITSIZE (mode)))
2121         op1 = GEN_INT ((unsigned HOST_WIDE_INT) INTVAL (op1)
2122                        % GET_MODE_BITSIZE (mode));
2123       else if (GET_CODE (op1) == SUBREG
2124                && subreg_lowpart_p (op1)
2125                && SCALAR_INT_MODE_P (GET_MODE (SUBREG_REG (op1)))
2126                && SCALAR_INT_MODE_P (GET_MODE (op1)))
2127         op1 = SUBREG_REG (op1);
2128     }
2129
2130   /* Canonicalize rotates by constant amount.  If op1 is bitsize / 2,
2131      prefer left rotation, if op1 is from bitsize / 2 + 1 to
2132      bitsize - 1, use other direction of rotate with 1 .. bitsize / 2 - 1
2133      amount instead.  */
2134   if (rotate
2135       && CONST_INT_P (op1)
2136       && IN_RANGE (INTVAL (op1), GET_MODE_BITSIZE (mode) / 2 + left,
2137                    GET_MODE_BITSIZE (mode) - 1))
2138     {
2139       op1 = GEN_INT (GET_MODE_BITSIZE (mode) - INTVAL (op1));
2140       left = !left;
2141       code = left ? LROTATE_EXPR : RROTATE_EXPR;
2142     }
2143
2144   if (op1 == const0_rtx)
2145     return shifted;
2146
2147   /* Check whether its cheaper to implement a left shift by a constant
2148      bit count by a sequence of additions.  */
2149   if (code == LSHIFT_EXPR
2150       && CONST_INT_P (op1)
2151       && INTVAL (op1) > 0
2152       && INTVAL (op1) < GET_MODE_PRECISION (mode)
2153       && INTVAL (op1) < MAX_BITS_PER_WORD
2154       && (shift_cost (speed, mode, INTVAL (op1))
2155           > INTVAL (op1) * add_cost (speed, mode))
2156       && shift_cost (speed, mode, INTVAL (op1)) != MAX_COST)
2157     {
2158       int i;
2159       for (i = 0; i < INTVAL (op1); i++)
2160         {
2161           temp = force_reg (mode, shifted);
2162           shifted = expand_binop (mode, add_optab, temp, temp, NULL_RTX,
2163                                   unsignedp, OPTAB_LIB_WIDEN);
2164         }
2165       return shifted;
2166     }
2167
2168   for (attempt = 0; temp == 0 && attempt < 3; attempt++)
2169     {
2170       enum optab_methods methods;
2171
2172       if (attempt == 0)
2173         methods = OPTAB_DIRECT;
2174       else if (attempt == 1)
2175         methods = OPTAB_WIDEN;
2176       else
2177         methods = OPTAB_LIB_WIDEN;
2178
2179       if (rotate)
2180         {
2181           /* Widening does not work for rotation.  */
2182           if (methods == OPTAB_WIDEN)
2183             continue;
2184           else if (methods == OPTAB_LIB_WIDEN)
2185             {
2186               /* If we have been unable to open-code this by a rotation,
2187                  do it as the IOR of two shifts.  I.e., to rotate A
2188                  by N bits, compute
2189                  (A << N) | ((unsigned) A >> ((-N) & (C - 1)))
2190                  where C is the bitsize of A.
2191
2192                  It is theoretically possible that the target machine might
2193                  not be able to perform either shift and hence we would
2194                  be making two libcalls rather than just the one for the
2195                  shift (similarly if IOR could not be done).  We will allow
2196                  this extremely unlikely lossage to avoid complicating the
2197                  code below.  */
2198
2199               rtx subtarget = target == shifted ? 0 : target;
2200               rtx new_amount, other_amount;
2201               rtx temp1;
2202
2203               new_amount = op1;
2204               if (op1 == const0_rtx)
2205                 return shifted;
2206               else if (CONST_INT_P (op1))
2207                 other_amount = GEN_INT (GET_MODE_BITSIZE (mode)
2208                                         - INTVAL (op1));
2209               else
2210                 {
2211                   other_amount
2212                     = simplify_gen_unary (NEG, GET_MODE (op1),
2213                                           op1, GET_MODE (op1));
2214                   HOST_WIDE_INT mask = GET_MODE_PRECISION (mode) - 1;
2215                   other_amount
2216                     = simplify_gen_binary (AND, GET_MODE (op1), other_amount,
2217                                            gen_int_mode (mask, GET_MODE (op1)));
2218                 }
2219
2220               shifted = force_reg (mode, shifted);
2221
2222               temp = expand_shift_1 (left ? LSHIFT_EXPR : RSHIFT_EXPR,
2223                                      mode, shifted, new_amount, 0, 1);
2224               temp1 = expand_shift_1 (left ? RSHIFT_EXPR : LSHIFT_EXPR,
2225                                       mode, shifted, other_amount,
2226                                       subtarget, 1);
2227               return expand_binop (mode, ior_optab, temp, temp1, target,
2228                                    unsignedp, methods);
2229             }
2230
2231           temp = expand_binop (mode,
2232                                left ? lrotate_optab : rrotate_optab,
2233                                shifted, op1, target, unsignedp, methods);
2234         }
2235       else if (unsignedp)
2236         temp = expand_binop (mode,
2237                              left ? lshift_optab : rshift_uns_optab,
2238                              shifted, op1, target, unsignedp, methods);
2239
2240       /* Do arithmetic shifts.
2241          Also, if we are going to widen the operand, we can just as well
2242          use an arithmetic right-shift instead of a logical one.  */
2243       if (temp == 0 && ! rotate
2244           && (! unsignedp || (! left && methods == OPTAB_WIDEN)))
2245         {
2246           enum optab_methods methods1 = methods;
2247
2248           /* If trying to widen a log shift to an arithmetic shift,
2249              don't accept an arithmetic shift of the same size.  */
2250           if (unsignedp)
2251             methods1 = OPTAB_MUST_WIDEN;
2252
2253           /* Arithmetic shift */
2254
2255           temp = expand_binop (mode,
2256                                left ? lshift_optab : rshift_arith_optab,
2257                                shifted, op1, target, unsignedp, methods1);
2258         }
2259
2260       /* We used to try extzv here for logical right shifts, but that was
2261          only useful for one machine, the VAX, and caused poor code
2262          generation there for lshrdi3, so the code was deleted and a
2263          define_expand for lshrsi3 was added to vax.md.  */
2264     }
2265
2266   gcc_assert (temp);
2267   return temp;
2268 }
2269
2270 /* Output a shift instruction for expression code CODE,
2271    with SHIFTED being the rtx for the value to shift,
2272    and AMOUNT the amount to shift by.
2273    Store the result in the rtx TARGET, if that is convenient.
2274    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2275    Return the rtx for where the value is.  */
2276
2277 rtx
2278 expand_shift (enum tree_code code, enum machine_mode mode, rtx shifted,
2279               int amount, rtx target, int unsignedp)
2280 {
2281   return expand_shift_1 (code, mode,
2282                          shifted, GEN_INT (amount), target, unsignedp);
2283 }
2284
2285 /* Output a shift instruction for expression code CODE,
2286    with SHIFTED being the rtx for the value to shift,
2287    and AMOUNT the tree for the amount to shift by.
2288    Store the result in the rtx TARGET, if that is convenient.
2289    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2290    Return the rtx for where the value is.  */
2291
2292 rtx
2293 expand_variable_shift (enum tree_code code, enum machine_mode mode, rtx shifted,
2294                        tree amount, rtx target, int unsignedp)
2295 {
2296   return expand_shift_1 (code, mode,
2297                          shifted, expand_normal (amount), target, unsignedp);
2298 }
2299
2300 \f
2301 /* Indicates the type of fixup needed after a constant multiplication.
2302    BASIC_VARIANT means no fixup is needed, NEGATE_VARIANT means that
2303    the result should be negated, and ADD_VARIANT means that the
2304    multiplicand should be added to the result.  */
2305 enum mult_variant {basic_variant, negate_variant, add_variant};
2306
2307 static void synth_mult (struct algorithm *, unsigned HOST_WIDE_INT,
2308                         const struct mult_cost *, enum machine_mode mode);
2309 static bool choose_mult_variant (enum machine_mode, HOST_WIDE_INT,
2310                                  struct algorithm *, enum mult_variant *, int);
2311 static rtx expand_mult_const (enum machine_mode, rtx, HOST_WIDE_INT, rtx,
2312                               const struct algorithm *, enum mult_variant);
2313 static unsigned HOST_WIDE_INT invert_mod2n (unsigned HOST_WIDE_INT, int);
2314 static rtx extract_high_half (enum machine_mode, rtx);
2315 static rtx expmed_mult_highpart (enum machine_mode, rtx, rtx, rtx, int, int);
2316 static rtx expmed_mult_highpart_optab (enum machine_mode, rtx, rtx, rtx,
2317                                        int, int);
2318 /* Compute and return the best algorithm for multiplying by T.
2319    The algorithm must cost less than cost_limit
2320    If retval.cost >= COST_LIMIT, no algorithm was found and all
2321    other field of the returned struct are undefined.
2322    MODE is the machine mode of the multiplication.  */
2323
2324 static void
2325 synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
2326             const struct mult_cost *cost_limit, enum machine_mode mode)
2327 {
2328   int m;
2329   struct algorithm *alg_in, *best_alg;
2330   struct mult_cost best_cost;
2331   struct mult_cost new_limit;
2332   int op_cost, op_latency;
2333   unsigned HOST_WIDE_INT orig_t = t;
2334   unsigned HOST_WIDE_INT q;
2335   int maxm, hash_index;
2336   bool cache_hit = false;
2337   enum alg_code cache_alg = alg_zero;
2338   bool speed = optimize_insn_for_speed_p ();
2339   enum machine_mode imode;
2340   struct alg_hash_entry *entry_ptr;
2341
2342   /* Indicate that no algorithm is yet found.  If no algorithm
2343      is found, this value will be returned and indicate failure.  */
2344   alg_out->cost.cost = cost_limit->cost + 1;
2345   alg_out->cost.latency = cost_limit->latency + 1;
2346
2347   if (cost_limit->cost < 0
2348       || (cost_limit->cost == 0 && cost_limit->latency <= 0))
2349     return;
2350
2351   /* Be prepared for vector modes.  */
2352   imode = GET_MODE_INNER (mode);
2353   if (imode == VOIDmode)
2354     imode = mode;
2355
2356   maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (imode));
2357
2358   /* Restrict the bits of "t" to the multiplication's mode.  */
2359   t &= GET_MODE_MASK (imode);
2360
2361   /* t == 1 can be done in zero cost.  */
2362   if (t == 1)
2363     {
2364       alg_out->ops = 1;
2365       alg_out->cost.cost = 0;
2366       alg_out->cost.latency = 0;
2367       alg_out->op[0] = alg_m;
2368       return;
2369     }
2370
2371   /* t == 0 sometimes has a cost.  If it does and it exceeds our limit,
2372      fail now.  */
2373   if (t == 0)
2374     {
2375       if (MULT_COST_LESS (cost_limit, zero_cost (speed)))
2376         return;
2377       else
2378         {
2379           alg_out->ops = 1;
2380           alg_out->cost.cost = zero_cost (speed);
2381           alg_out->cost.latency = zero_cost (speed);
2382           alg_out->op[0] = alg_zero;
2383           return;
2384         }
2385     }
2386
2387   /* We'll be needing a couple extra algorithm structures now.  */
2388
2389   alg_in = XALLOCA (struct algorithm);
2390   best_alg = XALLOCA (struct algorithm);
2391   best_cost = *cost_limit;
2392
2393   /* Compute the hash index.  */
2394   hash_index = (t ^ (unsigned int) mode ^ (speed * 256)) % NUM_ALG_HASH_ENTRIES;
2395
2396   /* See if we already know what to do for T.  */
2397   entry_ptr = alg_hash_entry_ptr (hash_index);
2398   if (entry_ptr->t == t
2399       && entry_ptr->mode == mode
2400       && entry_ptr->mode == mode
2401       && entry_ptr->speed == speed
2402       && entry_ptr->alg != alg_unknown)
2403     {
2404       cache_alg = entry_ptr->alg;
2405
2406       if (cache_alg == alg_impossible)
2407         {
2408           /* The cache tells us that it's impossible to synthesize
2409              multiplication by T within entry_ptr->cost.  */
2410           if (!CHEAPER_MULT_COST (&entry_ptr->cost, cost_limit))
2411             /* COST_LIMIT is at least as restrictive as the one
2412                recorded in the hash table, in which case we have no
2413                hope of synthesizing a multiplication.  Just
2414                return.  */
2415             return;
2416
2417           /* If we get here, COST_LIMIT is less restrictive than the
2418              one recorded in the hash table, so we may be able to
2419              synthesize a multiplication.  Proceed as if we didn't
2420              have the cache entry.  */
2421         }
2422       else
2423         {
2424           if (CHEAPER_MULT_COST (cost_limit, &entry_ptr->cost))
2425             /* The cached algorithm shows that this multiplication
2426                requires more cost than COST_LIMIT.  Just return.  This
2427                way, we don't clobber this cache entry with
2428                alg_impossible but retain useful information.  */
2429             return;
2430
2431           cache_hit = true;
2432
2433           switch (cache_alg)
2434             {
2435             case alg_shift:
2436               goto do_alg_shift;
2437
2438             case alg_add_t_m2:
2439             case alg_sub_t_m2:
2440               goto do_alg_addsub_t_m2;
2441
2442             case alg_add_factor:
2443             case alg_sub_factor:
2444               goto do_alg_addsub_factor;
2445
2446             case alg_add_t2_m:
2447               goto do_alg_add_t2_m;
2448
2449             case alg_sub_t2_m:
2450               goto do_alg_sub_t2_m;
2451
2452             default:
2453               gcc_unreachable ();
2454             }
2455         }
2456     }
2457
2458   /* If we have a group of zero bits at the low-order part of T, try
2459      multiplying by the remaining bits and then doing a shift.  */
2460
2461   if ((t & 1) == 0)
2462     {
2463     do_alg_shift:
2464       m = floor_log2 (t & -t);  /* m = number of low zero bits */
2465       if (m < maxm)
2466         {
2467           q = t >> m;
2468           /* The function expand_shift will choose between a shift and
2469              a sequence of additions, so the observed cost is given as
2470              MIN (m * add_cost(speed, mode), shift_cost(speed, mode, m)).  */
2471           op_cost = m * add_cost (speed, mode);
2472           if (shift_cost (speed, mode, m) < op_cost)
2473             op_cost = shift_cost (speed, mode, m);
2474           new_limit.cost = best_cost.cost - op_cost;
2475           new_limit.latency = best_cost.latency - op_cost;
2476           synth_mult (alg_in, q, &new_limit, mode);
2477
2478           alg_in->cost.cost += op_cost;
2479           alg_in->cost.latency += op_cost;
2480           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2481             {
2482               struct algorithm *x;
2483               best_cost = alg_in->cost;
2484               x = alg_in, alg_in = best_alg, best_alg = x;
2485               best_alg->log[best_alg->ops] = m;
2486               best_alg->op[best_alg->ops] = alg_shift;
2487             }
2488
2489           /* See if treating ORIG_T as a signed number yields a better
2490              sequence.  Try this sequence only for a negative ORIG_T
2491              as it would be useless for a non-negative ORIG_T.  */
2492           if ((HOST_WIDE_INT) orig_t < 0)
2493             {
2494               /* Shift ORIG_T as follows because a right shift of a
2495                  negative-valued signed type is implementation
2496                  defined.  */
2497               q = ~(~orig_t >> m);
2498               /* The function expand_shift will choose between a shift
2499                  and a sequence of additions, so the observed cost is
2500                  given as MIN (m * add_cost(speed, mode),
2501                  shift_cost(speed, mode, m)).  */
2502               op_cost = m * add_cost (speed, mode);
2503               if (shift_cost (speed, mode, m) < op_cost)
2504                 op_cost = shift_cost (speed, mode, m);
2505               new_limit.cost = best_cost.cost - op_cost;
2506               new_limit.latency = best_cost.latency - op_cost;
2507               synth_mult (alg_in, q, &new_limit, mode);
2508
2509               alg_in->cost.cost += op_cost;
2510               alg_in->cost.latency += op_cost;
2511               if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2512                 {
2513                   struct algorithm *x;
2514                   best_cost = alg_in->cost;
2515                   x = alg_in, alg_in = best_alg, best_alg = x;
2516                   best_alg->log[best_alg->ops] = m;
2517                   best_alg->op[best_alg->ops] = alg_shift;
2518                 }
2519             }
2520         }
2521       if (cache_hit)
2522         goto done;
2523     }
2524
2525   /* If we have an odd number, add or subtract one.  */
2526   if ((t & 1) != 0)
2527     {
2528       unsigned HOST_WIDE_INT w;
2529
2530     do_alg_addsub_t_m2:
2531       for (w = 1; (w & t) != 0; w <<= 1)
2532         ;
2533       /* If T was -1, then W will be zero after the loop.  This is another
2534          case where T ends with ...111.  Handling this with (T + 1) and
2535          subtract 1 produces slightly better code and results in algorithm
2536          selection much faster than treating it like the ...0111 case
2537          below.  */
2538       if (w == 0
2539           || (w > 2
2540               /* Reject the case where t is 3.
2541                  Thus we prefer addition in that case.  */
2542               && t != 3))
2543         {
2544           /* T ends with ...111.  Multiply by (T + 1) and subtract 1.  */
2545
2546           op_cost = add_cost (speed, mode);
2547           new_limit.cost = best_cost.cost - op_cost;
2548           new_limit.latency = best_cost.latency - op_cost;
2549           synth_mult (alg_in, t + 1, &new_limit, mode);
2550
2551           alg_in->cost.cost += op_cost;
2552           alg_in->cost.latency += op_cost;
2553           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2554             {
2555               struct algorithm *x;
2556               best_cost = alg_in->cost;
2557               x = alg_in, alg_in = best_alg, best_alg = x;
2558               best_alg->log[best_alg->ops] = 0;
2559               best_alg->op[best_alg->ops] = alg_sub_t_m2;
2560             }
2561         }
2562       else
2563         {
2564           /* T ends with ...01 or ...011.  Multiply by (T - 1) and add 1.  */
2565
2566           op_cost = add_cost (speed, mode);
2567           new_limit.cost = best_cost.cost - op_cost;
2568           new_limit.latency = best_cost.latency - op_cost;
2569           synth_mult (alg_in, t - 1, &new_limit, mode);
2570
2571           alg_in->cost.cost += op_cost;
2572           alg_in->cost.latency += op_cost;
2573           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2574             {
2575               struct algorithm *x;
2576               best_cost = alg_in->cost;
2577               x = alg_in, alg_in = best_alg, best_alg = x;
2578               best_alg->log[best_alg->ops] = 0;
2579               best_alg->op[best_alg->ops] = alg_add_t_m2;
2580             }
2581         }
2582
2583       /* We may be able to calculate a * -7, a * -15, a * -31, etc
2584          quickly with a - a * n for some appropriate constant n.  */
2585       m = exact_log2 (-orig_t + 1);
2586       if (m >= 0 && m < maxm)
2587         {
2588           op_cost = shiftsub1_cost (speed, mode, m);
2589           new_limit.cost = best_cost.cost - op_cost;
2590           new_limit.latency = best_cost.latency - op_cost;
2591           synth_mult (alg_in, (unsigned HOST_WIDE_INT) (-orig_t + 1) >> m,
2592                       &new_limit, mode);
2593
2594           alg_in->cost.cost += op_cost;
2595           alg_in->cost.latency += op_cost;
2596           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2597             {
2598               struct algorithm *x;
2599               best_cost = alg_in->cost;
2600               x = alg_in, alg_in = best_alg, best_alg = x;
2601               best_alg->log[best_alg->ops] = m;
2602               best_alg->op[best_alg->ops] = alg_sub_t_m2;
2603             }
2604         }
2605
2606       if (cache_hit)
2607         goto done;
2608     }
2609
2610   /* Look for factors of t of the form
2611      t = q(2**m +- 1), 2 <= m <= floor(log2(t - 1)).
2612      If we find such a factor, we can multiply by t using an algorithm that
2613      multiplies by q, shift the result by m and add/subtract it to itself.
2614
2615      We search for large factors first and loop down, even if large factors
2616      are less probable than small; if we find a large factor we will find a
2617      good sequence quickly, and therefore be able to prune (by decreasing
2618      COST_LIMIT) the search.  */
2619
2620  do_alg_addsub_factor:
2621   for (m = floor_log2 (t - 1); m >= 2; m--)
2622     {
2623       unsigned HOST_WIDE_INT d;
2624
2625       d = ((unsigned HOST_WIDE_INT) 1 << m) + 1;
2626       if (t % d == 0 && t > d && m < maxm
2627           && (!cache_hit || cache_alg == alg_add_factor))
2628         {
2629           /* If the target has a cheap shift-and-add instruction use
2630              that in preference to a shift insn followed by an add insn.
2631              Assume that the shift-and-add is "atomic" with a latency
2632              equal to its cost, otherwise assume that on superscalar
2633              hardware the shift may be executed concurrently with the
2634              earlier steps in the algorithm.  */
2635           op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
2636           if (shiftadd_cost (speed, mode, m) < op_cost)
2637             {
2638               op_cost = shiftadd_cost (speed, mode, m);
2639               op_latency = op_cost;
2640             }
2641           else
2642             op_latency = add_cost (speed, mode);
2643
2644           new_limit.cost = best_cost.cost - op_cost;
2645           new_limit.latency = best_cost.latency - op_latency;
2646           synth_mult (alg_in, t / d, &new_limit, mode);
2647
2648           alg_in->cost.cost += op_cost;
2649           alg_in->cost.latency += op_latency;
2650           if (alg_in->cost.latency < op_cost)
2651             alg_in->cost.latency = op_cost;
2652           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2653             {
2654               struct algorithm *x;
2655               best_cost = alg_in->cost;
2656               x = alg_in, alg_in = best_alg, best_alg = x;
2657               best_alg->log[best_alg->ops] = m;
2658               best_alg->op[best_alg->ops] = alg_add_factor;
2659             }
2660           /* Other factors will have been taken care of in the recursion.  */
2661           break;
2662         }
2663
2664       d = ((unsigned HOST_WIDE_INT) 1 << m) - 1;
2665       if (t % d == 0 && t > d && m < maxm
2666           && (!cache_hit || cache_alg == alg_sub_factor))
2667         {
2668           /* If the target has a cheap shift-and-subtract insn use
2669              that in preference to a shift insn followed by a sub insn.
2670              Assume that the shift-and-sub is "atomic" with a latency
2671              equal to it's cost, otherwise assume that on superscalar
2672              hardware the shift may be executed concurrently with the
2673              earlier steps in the algorithm.  */
2674           op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
2675           if (shiftsub0_cost (speed, mode, m) < op_cost)
2676             {
2677               op_cost = shiftsub0_cost (speed, mode, m);
2678               op_latency = op_cost;
2679             }
2680           else
2681             op_latency = add_cost (speed, mode);
2682
2683           new_limit.cost = best_cost.cost - op_cost;
2684           new_limit.latency = best_cost.latency - op_latency;
2685           synth_mult (alg_in, t / d, &new_limit, mode);
2686
2687           alg_in->cost.cost += op_cost;
2688           alg_in->cost.latency += op_latency;
2689           if (alg_in->cost.latency < op_cost)
2690             alg_in->cost.latency = op_cost;
2691           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2692             {
2693               struct algorithm *x;
2694               best_cost = alg_in->cost;
2695               x = alg_in, alg_in = best_alg, best_alg = x;
2696               best_alg->log[best_alg->ops] = m;
2697               best_alg->op[best_alg->ops] = alg_sub_factor;
2698             }
2699           break;
2700         }
2701     }
2702   if (cache_hit)
2703     goto done;
2704
2705   /* Try shift-and-add (load effective address) instructions,
2706      i.e. do a*3, a*5, a*9.  */
2707   if ((t & 1) != 0)
2708     {
2709     do_alg_add_t2_m:
2710       q = t - 1;
2711       q = q & -q;
2712       m = exact_log2 (q);
2713       if (m >= 0 && m < maxm)
2714         {
2715           op_cost = shiftadd_cost (speed, mode, m);
2716           new_limit.cost = best_cost.cost - op_cost;
2717           new_limit.latency = best_cost.latency - op_cost;
2718           synth_mult (alg_in, (t - 1) >> m, &new_limit, mode);
2719
2720           alg_in->cost.cost += op_cost;
2721           alg_in->cost.latency += op_cost;
2722           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2723             {
2724               struct algorithm *x;
2725               best_cost = alg_in->cost;
2726               x = alg_in, alg_in = best_alg, best_alg = x;
2727               best_alg->log[best_alg->ops] = m;
2728               best_alg->op[best_alg->ops] = alg_add_t2_m;
2729             }
2730         }
2731       if (cache_hit)
2732         goto done;
2733
2734     do_alg_sub_t2_m:
2735       q = t + 1;
2736       q = q & -q;
2737       m = exact_log2 (q);
2738       if (m >= 0 && m < maxm)
2739         {
2740           op_cost = shiftsub0_cost (speed, mode, m);
2741           new_limit.cost = best_cost.cost - op_cost;
2742           new_limit.latency = best_cost.latency - op_cost;
2743           synth_mult (alg_in, (t + 1) >> m, &new_limit, mode);
2744
2745           alg_in->cost.cost += op_cost;
2746           alg_in->cost.latency += op_cost;
2747           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2748             {
2749               struct algorithm *x;
2750               best_cost = alg_in->cost;
2751               x = alg_in, alg_in = best_alg, best_alg = x;
2752               best_alg->log[best_alg->ops] = m;
2753               best_alg->op[best_alg->ops] = alg_sub_t2_m;
2754             }
2755         }
2756       if (cache_hit)
2757         goto done;
2758     }
2759
2760  done:
2761   /* If best_cost has not decreased, we have not found any algorithm.  */
2762   if (!CHEAPER_MULT_COST (&best_cost, cost_limit))
2763     {
2764       /* We failed to find an algorithm.  Record alg_impossible for
2765          this case (that is, <T, MODE, COST_LIMIT>) so that next time
2766          we are asked to find an algorithm for T within the same or
2767          lower COST_LIMIT, we can immediately return to the
2768          caller.  */
2769       entry_ptr->t = t;
2770       entry_ptr->mode = mode;
2771       entry_ptr->speed = speed;
2772       entry_ptr->alg = alg_impossible;
2773       entry_ptr->cost = *cost_limit;
2774       return;
2775     }
2776
2777   /* Cache the result.  */
2778   if (!cache_hit)
2779     {
2780       entry_ptr->t = t;
2781       entry_ptr->mode = mode;
2782       entry_ptr->speed = speed;
2783       entry_ptr->alg = best_alg->op[best_alg->ops];
2784       entry_ptr->cost.cost = best_cost.cost;
2785       entry_ptr->cost.latency = best_cost.latency;
2786     }
2787
2788   /* If we are getting a too long sequence for `struct algorithm'
2789      to record, make this search fail.  */
2790   if (best_alg->ops == MAX_BITS_PER_WORD)
2791     return;
2792
2793   /* Copy the algorithm from temporary space to the space at alg_out.
2794      We avoid using structure assignment because the majority of
2795      best_alg is normally undefined, and this is a critical function.  */
2796   alg_out->ops = best_alg->ops + 1;
2797   alg_out->cost = best_cost;
2798   memcpy (alg_out->op, best_alg->op,
2799           alg_out->ops * sizeof *alg_out->op);
2800   memcpy (alg_out->log, best_alg->log,
2801           alg_out->ops * sizeof *alg_out->log);
2802 }
2803 \f
2804 /* Find the cheapest way of multiplying a value of mode MODE by VAL.
2805    Try three variations:
2806
2807        - a shift/add sequence based on VAL itself
2808        - a shift/add sequence based on -VAL, followed by a negation
2809        - a shift/add sequence based on VAL - 1, followed by an addition.
2810
2811    Return true if the cheapest of these cost less than MULT_COST,
2812    describing the algorithm in *ALG and final fixup in *VARIANT.  */
2813
2814 static bool
2815 choose_mult_variant (enum machine_mode mode, HOST_WIDE_INT val,
2816                      struct algorithm *alg, enum mult_variant *variant,
2817                      int mult_cost)
2818 {
2819   struct algorithm alg2;
2820   struct mult_cost limit;
2821   int op_cost;
2822   bool speed = optimize_insn_for_speed_p ();
2823
2824   /* Fail quickly for impossible bounds.  */
2825   if (mult_cost < 0)
2826     return false;
2827
2828   /* Ensure that mult_cost provides a reasonable upper bound.
2829      Any constant multiplication can be performed with less
2830      than 2 * bits additions.  */
2831   op_cost = 2 * GET_MODE_UNIT_BITSIZE (mode) * add_cost (speed, mode);
2832   if (mult_cost > op_cost)
2833     mult_cost = op_cost;
2834
2835   *variant = basic_variant;
2836   limit.cost = mult_cost;
2837   limit.latency = mult_cost;
2838   synth_mult (alg, val, &limit, mode);
2839
2840   /* This works only if the inverted value actually fits in an
2841      `unsigned int' */
2842   if (HOST_BITS_PER_INT >= GET_MODE_UNIT_BITSIZE (mode))
2843     {
2844       op_cost = neg_cost (speed, mode);
2845       if (MULT_COST_LESS (&alg->cost, mult_cost))
2846         {
2847           limit.cost = alg->cost.cost - op_cost;
2848           limit.latency = alg->cost.latency - op_cost;
2849         }
2850       else
2851         {
2852           limit.cost = mult_cost - op_cost;
2853           limit.latency = mult_cost - op_cost;
2854         }
2855
2856       synth_mult (&alg2, -val, &limit, mode);
2857       alg2.cost.cost += op_cost;
2858       alg2.cost.latency += op_cost;
2859       if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
2860         *alg = alg2, *variant = negate_variant;
2861     }
2862
2863   /* This proves very useful for division-by-constant.  */
2864   op_cost = add_cost (speed, mode);
2865   if (MULT_COST_LESS (&alg->cost, mult_cost))
2866     {
2867       limit.cost = alg->cost.cost - op_cost;
2868       limit.latency = alg->cost.latency - op_cost;
2869     }
2870   else
2871     {
2872       limit.cost = mult_cost - op_cost;
2873       limit.latency = mult_cost - op_cost;
2874     }
2875
2876   synth_mult (&alg2, val - 1, &limit, mode);
2877   alg2.cost.cost += op_cost;
2878   alg2.cost.latency += op_cost;
2879   if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
2880     *alg = alg2, *variant = add_variant;
2881
2882   return MULT_COST_LESS (&alg->cost, mult_cost);
2883 }
2884
2885 /* A subroutine of expand_mult, used for constant multiplications.
2886    Multiply OP0 by VAL in mode MODE, storing the result in TARGET if
2887    convenient.  Use the shift/add sequence described by ALG and apply
2888    the final fixup specified by VARIANT.  */
2889
2890 static rtx
2891 expand_mult_const (enum machine_mode mode, rtx op0, HOST_WIDE_INT val,
2892                    rtx target, const struct algorithm *alg,
2893                    enum mult_variant variant)
2894 {
2895   HOST_WIDE_INT val_so_far;
2896   rtx insn, accum, tem;
2897   int opno;
2898   enum machine_mode nmode;
2899
2900   /* Avoid referencing memory over and over and invalid sharing
2901      on SUBREGs.  */
2902   op0 = force_reg (mode, op0);
2903
2904   /* ACCUM starts out either as OP0 or as a zero, depending on
2905      the first operation.  */
2906
2907   if (alg->op[0] == alg_zero)
2908     {
2909       accum = copy_to_mode_reg (mode, CONST0_RTX (mode));
2910       val_so_far = 0;
2911     }
2912   else if (alg->op[0] == alg_m)
2913     {
2914       accum = copy_to_mode_reg (mode, op0);
2915       val_so_far = 1;
2916     }
2917   else
2918     gcc_unreachable ();
2919
2920   for (opno = 1; opno < alg->ops; opno++)
2921     {
2922       int log = alg->log[opno];
2923       rtx shift_subtarget = optimize ? 0 : accum;
2924       rtx add_target
2925         = (opno == alg->ops - 1 && target != 0 && variant != add_variant
2926            && !optimize)
2927           ? target : 0;
2928       rtx accum_target = optimize ? 0 : accum;
2929       rtx accum_inner;
2930
2931       switch (alg->op[opno])
2932         {
2933         case alg_shift:
2934           tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
2935           /* REG_EQUAL note will be attached to the following insn.  */
2936           emit_move_insn (accum, tem);
2937           val_so_far <<= log;
2938           break;
2939
2940         case alg_add_t_m2:
2941           tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
2942           accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
2943                                  add_target ? add_target : accum_target);
2944           val_so_far += (HOST_WIDE_INT) 1 << log;
2945           break;
2946
2947         case alg_sub_t_m2:
2948           tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
2949           accum = force_operand (gen_rtx_MINUS (mode, accum, tem),
2950                                  add_target ? add_target : accum_target);
2951           val_so_far -= (HOST_WIDE_INT) 1 << log;
2952           break;
2953
2954         case alg_add_t2_m:
2955           accum = expand_shift (LSHIFT_EXPR, mode, accum,
2956                                 log, shift_subtarget, 0);
2957           accum = force_operand (gen_rtx_PLUS (mode, accum, op0),
2958                                  add_target ? add_target : accum_target);
2959           val_so_far = (val_so_far << log) + 1;
2960           break;
2961
2962         case alg_sub_t2_m:
2963           accum = expand_shift (LSHIFT_EXPR, mode, accum,
2964                                 log, shift_subtarget, 0);
2965           accum = force_operand (gen_rtx_MINUS (mode, accum, op0),
2966                                  add_target ? add_target : accum_target);
2967           val_so_far = (val_so_far << log) - 1;
2968           break;
2969
2970         case alg_add_factor:
2971           tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
2972           accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
2973                                  add_target ? add_target : accum_target);
2974           val_so_far += val_so_far << log;
2975           break;
2976
2977         case alg_sub_factor:
2978           tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
2979           accum = force_operand (gen_rtx_MINUS (mode, tem, accum),
2980                                  (add_target
2981                                   ? add_target : (optimize ? 0 : tem)));
2982           val_so_far = (val_so_far << log) - val_so_far;
2983           break;
2984
2985         default:
2986           gcc_unreachable ();
2987         }
2988
2989       if (SCALAR_INT_MODE_P (mode))
2990         {
2991           /* Write a REG_EQUAL note on the last insn so that we can cse
2992              multiplication sequences.  Note that if ACCUM is a SUBREG,
2993              we've set the inner register and must properly indicate that.  */
2994           tem = op0, nmode = mode;
2995           accum_inner = accum;
2996           if (GET_CODE (accum) == SUBREG)
2997             {
2998               accum_inner = SUBREG_REG (accum);
2999               nmode = GET_MODE (accum_inner);
3000               tem = gen_lowpart (nmode, op0);
3001             }
3002
3003           insn = get_last_insn ();
3004           set_dst_reg_note (insn, REG_EQUAL,
3005                             gen_rtx_MULT (nmode, tem,
3006                                           gen_int_mode (val_so_far, nmode)),
3007                             accum_inner);
3008         }
3009     }
3010
3011   if (variant == negate_variant)
3012     {
3013       val_so_far = -val_so_far;
3014       accum = expand_unop (mode, neg_optab, accum, target, 0);
3015     }
3016   else if (variant == add_variant)
3017     {
3018       val_so_far = val_so_far + 1;
3019       accum = force_operand (gen_rtx_PLUS (mode, accum, op0), target);
3020     }
3021
3022   /* Compare only the bits of val and val_so_far that are significant
3023      in the result mode, to avoid sign-/zero-extension confusion.  */
3024   nmode = GET_MODE_INNER (mode);
3025   if (nmode == VOIDmode)
3026     nmode = mode;
3027   val &= GET_MODE_MASK (nmode);
3028   val_so_far &= GET_MODE_MASK (nmode);
3029   gcc_assert (val == val_so_far);
3030
3031   return accum;
3032 }
3033
3034 /* Perform a multiplication and return an rtx for the result.
3035    MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3036    TARGET is a suggestion for where to store the result (an rtx).
3037
3038    We check specially for a constant integer as OP1.
3039    If you want this check for OP0 as well, then before calling
3040    you should swap the two operands if OP0 would be constant.  */
3041
3042 rtx
3043 expand_mult (enum machine_mode mode, rtx op0, rtx op1, rtx target,
3044              int unsignedp)
3045 {
3046   enum mult_variant variant;
3047   struct algorithm algorithm;
3048   rtx scalar_op1;
3049   int max_cost;
3050   bool speed = optimize_insn_for_speed_p ();
3051   bool do_trapv = flag_trapv && SCALAR_INT_MODE_P (mode) && !unsignedp;
3052
3053   if (CONSTANT_P (op0))
3054     {
3055       rtx temp = op0;
3056       op0 = op1;
3057       op1 = temp;
3058     }
3059
3060   /* For vectors, there are several simplifications that can be made if
3061      all elements of the vector constant are identical.  */
3062   scalar_op1 = op1;
3063   if (GET_CODE (op1) == CONST_VECTOR)
3064     {
3065       int i, n = CONST_VECTOR_NUNITS (op1);
3066       scalar_op1 = CONST_VECTOR_ELT (op1, 0);
3067       for (i = 1; i < n; ++i)
3068         if (!rtx_equal_p (scalar_op1, CONST_VECTOR_ELT (op1, i)))
3069           goto skip_scalar;
3070     }
3071
3072   if (INTEGRAL_MODE_P (mode))
3073     {
3074       rtx fake_reg;
3075       HOST_WIDE_INT coeff;
3076       bool is_neg;
3077       int mode_bitsize;
3078
3079       if (op1 == CONST0_RTX (mode))
3080         return op1;
3081       if (op1 == CONST1_RTX (mode))
3082         return op0;
3083       if (op1 == CONSTM1_RTX (mode))
3084         return expand_unop (mode, do_trapv ? negv_optab : neg_optab,
3085                             op0, target, 0);
3086
3087       if (do_trapv)
3088         goto skip_synth;
3089
3090       /* These are the operations that are potentially turned into
3091          a sequence of shifts and additions.  */
3092       mode_bitsize = GET_MODE_UNIT_BITSIZE (mode);
3093
3094       /* synth_mult does an `unsigned int' multiply.  As long as the mode is
3095          less than or equal in size to `unsigned int' this doesn't matter.
3096          If the mode is larger than `unsigned int', then synth_mult works
3097          only if the constant value exactly fits in an `unsigned int' without
3098          any truncation.  This means that multiplying by negative values does
3099          not work; results are off by 2^32 on a 32 bit machine.  */
3100
3101       if (CONST_INT_P (scalar_op1))
3102         {
3103           coeff = INTVAL (scalar_op1);
3104           is_neg = coeff < 0;
3105         }
3106       else if (CONST_DOUBLE_AS_INT_P (scalar_op1))
3107         {
3108           /* If we are multiplying in DImode, it may still be a win
3109              to try to work with shifts and adds.  */
3110           if (CONST_DOUBLE_HIGH (scalar_op1) == 0
3111               && (CONST_DOUBLE_LOW (scalar_op1) > 0
3112                   || (CONST_DOUBLE_LOW (scalar_op1) < 0
3113                       && EXACT_POWER_OF_2_OR_ZERO_P
3114                            (CONST_DOUBLE_LOW (scalar_op1)))))
3115             {
3116               coeff = CONST_DOUBLE_LOW (scalar_op1);
3117               is_neg = false;
3118             }
3119           else if (CONST_DOUBLE_LOW (scalar_op1) == 0)
3120             {
3121               coeff = CONST_DOUBLE_HIGH (scalar_op1);
3122               if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3123                 {
3124                   int shift = floor_log2 (coeff) + HOST_BITS_PER_WIDE_INT;
3125                   if (shift < HOST_BITS_PER_DOUBLE_INT - 1
3126                       || mode_bitsize <= HOST_BITS_PER_DOUBLE_INT)
3127                     return expand_shift (LSHIFT_EXPR, mode, op0,
3128                                          shift, target, unsignedp);
3129                 }
3130               goto skip_synth;
3131             }
3132           else
3133             goto skip_synth;
3134         }
3135       else
3136         goto skip_synth;
3137
3138       /* We used to test optimize here, on the grounds that it's better to
3139          produce a smaller program when -O is not used.  But this causes
3140          such a terrible slowdown sometimes that it seems better to always
3141          use synth_mult.  */
3142
3143       /* Special case powers of two.  */
3144       if (EXACT_POWER_OF_2_OR_ZERO_P (coeff)
3145           && !(is_neg && mode_bitsize > HOST_BITS_PER_WIDE_INT))
3146         return expand_shift (LSHIFT_EXPR, mode, op0,
3147                              floor_log2 (coeff), target, unsignedp);
3148
3149       fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3150
3151       /* Attempt to handle multiplication of DImode values by negative
3152          coefficients, by performing the multiplication by a positive
3153          multiplier and then inverting the result.  */
3154       if (is_neg && mode_bitsize > HOST_BITS_PER_WIDE_INT)
3155         {
3156           /* Its safe to use -coeff even for INT_MIN, as the
3157              result is interpreted as an unsigned coefficient.
3158              Exclude cost of op0 from max_cost to match the cost
3159              calculation of the synth_mult.  */
3160           coeff = -(unsigned HOST_WIDE_INT) coeff;
3161           max_cost = (set_src_cost (gen_rtx_MULT (mode, fake_reg, op1), speed)
3162                       - neg_cost (speed, mode));
3163           if (max_cost <= 0)
3164             goto skip_synth;
3165
3166           /* Special case powers of two.  */
3167           if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3168             {
3169               rtx temp = expand_shift (LSHIFT_EXPR, mode, op0,
3170                                        floor_log2 (coeff), target, unsignedp);
3171               return expand_unop (mode, neg_optab, temp, target, 0);
3172             }
3173
3174           if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3175                                    max_cost))
3176             {
3177               rtx temp = expand_mult_const (mode, op0, coeff, NULL_RTX,
3178                                             &algorithm, variant);
3179               return expand_unop (mode, neg_optab, temp, target, 0);
3180             }
3181           goto skip_synth;
3182         }
3183
3184       /* Exclude cost of op0 from max_cost to match the cost
3185          calculation of the synth_mult.  */
3186       max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, op1), speed);
3187       if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost))
3188         return expand_mult_const (mode, op0, coeff, target,
3189                                   &algorithm, variant);
3190     }
3191  skip_synth:
3192
3193   /* Expand x*2.0 as x+x.  */
3194   if (CONST_DOUBLE_AS_FLOAT_P (scalar_op1))
3195     {
3196       REAL_VALUE_TYPE d;
3197       REAL_VALUE_FROM_CONST_DOUBLE (d, scalar_op1);
3198
3199       if (REAL_VALUES_EQUAL (d, dconst2))
3200         {
3201           op0 = force_reg (GET_MODE (op0), op0);
3202           return expand_binop (mode, add_optab, op0, op0,
3203                                target, unsignedp, OPTAB_LIB_WIDEN);
3204         }
3205     }
3206  skip_scalar:
3207
3208   /* This used to use umul_optab if unsigned, but for non-widening multiply
3209      there is no difference between signed and unsigned.  */
3210   op0 = expand_binop (mode, do_trapv ? smulv_optab : smul_optab,
3211                       op0, op1, target, unsignedp, OPTAB_LIB_WIDEN);
3212   gcc_assert (op0);
3213   return op0;
3214 }
3215
3216 /* Return a cost estimate for multiplying a register by the given
3217    COEFFicient in the given MODE and SPEED.  */
3218
3219 int
3220 mult_by_coeff_cost (HOST_WIDE_INT coeff, enum machine_mode mode, bool speed)
3221 {
3222   int max_cost;
3223   struct algorithm algorithm;
3224   enum mult_variant variant;
3225
3226   rtx fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3227   max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, fake_reg), speed);
3228   if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost))
3229     return algorithm.cost.cost;
3230   else
3231     return max_cost;
3232 }
3233
3234 /* Perform a widening multiplication and return an rtx for the result.
3235    MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3236    TARGET is a suggestion for where to store the result (an rtx).
3237    THIS_OPTAB is the optab we should use, it must be either umul_widen_optab
3238    or smul_widen_optab.
3239
3240    We check specially for a constant integer as OP1, comparing the
3241    cost of a widening multiply against the cost of a sequence of shifts
3242    and adds.  */
3243
3244 rtx
3245 expand_widening_mult (enum machine_mode mode, rtx op0, rtx op1, rtx target,
3246                       int unsignedp, optab this_optab)
3247 {
3248   bool speed = optimize_insn_for_speed_p ();
3249   rtx cop1;
3250
3251   if (CONST_INT_P (op1)
3252       && GET_MODE (op0) != VOIDmode
3253       && (cop1 = convert_modes (mode, GET_MODE (op0), op1,
3254                                 this_optab == umul_widen_optab))
3255       && CONST_INT_P (cop1)
3256       && (INTVAL (cop1) >= 0
3257           || HWI_COMPUTABLE_MODE_P (mode)))
3258     {
3259       HOST_WIDE_INT coeff = INTVAL (cop1);
3260       int max_cost;
3261       enum mult_variant variant;
3262       struct algorithm algorithm;
3263
3264       /* Special case powers of two.  */
3265       if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3266         {
3267           op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3268           return expand_shift (LSHIFT_EXPR, mode, op0,
3269                                floor_log2 (coeff), target, unsignedp);
3270         }
3271
3272       /* Exclude cost of op0 from max_cost to match the cost
3273          calculation of the synth_mult.  */
3274       max_cost = mul_widen_cost (speed, mode);
3275       if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3276                                max_cost))
3277         {
3278           op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3279           return expand_mult_const (mode, op0, coeff, target,
3280                                     &algorithm, variant);
3281         }
3282     }
3283   return expand_binop (mode, this_optab, op0, op1, target,
3284                        unsignedp, OPTAB_LIB_WIDEN);
3285 }
3286 \f
3287 /* Choose a minimal N + 1 bit approximation to 1/D that can be used to
3288    replace division by D, and put the least significant N bits of the result
3289    in *MULTIPLIER_PTR and return the most significant bit.
3290
3291    The width of operations is N (should be <= HOST_BITS_PER_WIDE_INT), the
3292    needed precision is in PRECISION (should be <= N).
3293
3294    PRECISION should be as small as possible so this function can choose
3295    multiplier more freely.
3296
3297    The rounded-up logarithm of D is placed in *lgup_ptr.  A shift count that
3298    is to be used for a final right shift is placed in *POST_SHIFT_PTR.
3299
3300    Using this function, x/D will be equal to (x * m) >> (*POST_SHIFT_PTR),
3301    where m is the full HOST_BITS_PER_WIDE_INT + 1 bit multiplier.  */
3302
3303 unsigned HOST_WIDE_INT
3304 choose_multiplier (unsigned HOST_WIDE_INT d, int n, int precision,
3305                    unsigned HOST_WIDE_INT *multiplier_ptr,
3306                    int *post_shift_ptr, int *lgup_ptr)
3307 {
3308   double_int mhigh, mlow;
3309   int lgup, post_shift;
3310   int pow, pow2;
3311
3312   /* lgup = ceil(log2(divisor)); */
3313   lgup = ceil_log2 (d);
3314
3315   gcc_assert (lgup <= n);
3316
3317   pow = n + lgup;
3318   pow2 = n + lgup - precision;
3319
3320   /* We could handle this with some effort, but this case is much
3321      better handled directly with a scc insn, so rely on caller using
3322      that.  */
3323   gcc_assert (pow != HOST_BITS_PER_DOUBLE_INT);
3324
3325   /* mlow = 2^(N + lgup)/d */
3326   double_int val = double_int_zero.set_bit (pow);
3327   mlow = val.div (double_int::from_uhwi (d), true, TRUNC_DIV_EXPR);
3328
3329   /* mhigh = (2^(N + lgup) + 2^(N + lgup - precision))/d */
3330   val |= double_int_zero.set_bit (pow2);
3331   mhigh = val.div (double_int::from_uhwi (d), true, TRUNC_DIV_EXPR);
3332
3333   gcc_assert (!mhigh.high || val.high - d < d);
3334   gcc_assert (mhigh.high <= 1 && mlow.high <= 1);
3335   /* Assert that mlow < mhigh.  */
3336   gcc_assert (mlow.ult (mhigh));
3337
3338   /* If precision == N, then mlow, mhigh exceed 2^N
3339      (but they do not exceed 2^(N+1)).  */
3340
3341   /* Reduce to lowest terms.  */
3342   for (post_shift = lgup; post_shift > 0; post_shift--)
3343     {
3344       int shft = HOST_BITS_PER_WIDE_INT - 1;
3345       unsigned HOST_WIDE_INT ml_lo = (mlow.high << shft) | (mlow.low >> 1);
3346       unsigned HOST_WIDE_INT mh_lo = (mhigh.high << shft) | (mhigh.low >> 1);
3347       if (ml_lo >= mh_lo)
3348         break;
3349
3350       mlow = double_int::from_uhwi (ml_lo);
3351       mhigh = double_int::from_uhwi (mh_lo);
3352     }
3353
3354   *post_shift_ptr = post_shift;
3355   *lgup_ptr = lgup;
3356   if (n < HOST_BITS_PER_WIDE_INT)
3357     {
3358       unsigned HOST_WIDE_INT mask = ((unsigned HOST_WIDE_INT) 1 << n) - 1;
3359       *multiplier_ptr = mhigh.low & mask;
3360       return mhigh.low >= mask;
3361     }
3362   else
3363     {
3364       *multiplier_ptr = mhigh.low;
3365       return mhigh.high;
3366     }
3367 }
3368
3369 /* Compute the inverse of X mod 2**n, i.e., find Y such that X * Y is
3370    congruent to 1 (mod 2**N).  */
3371
3372 static unsigned HOST_WIDE_INT
3373 invert_mod2n (unsigned HOST_WIDE_INT x, int n)
3374 {
3375   /* Solve x*y == 1 (mod 2^n), where x is odd.  Return y.  */
3376
3377   /* The algorithm notes that the choice y = x satisfies
3378      x*y == 1 mod 2^3, since x is assumed odd.
3379      Each iteration doubles the number of bits of significance in y.  */
3380
3381   unsigned HOST_WIDE_INT mask;
3382   unsigned HOST_WIDE_INT y = x;
3383   int nbit = 3;
3384
3385   mask = (n == HOST_BITS_PER_WIDE_INT
3386           ? ~(unsigned HOST_WIDE_INT) 0
3387           : ((unsigned HOST_WIDE_INT) 1 << n) - 1);
3388
3389   while (nbit < n)
3390     {
3391       y = y * (2 - x*y) & mask;         /* Modulo 2^N */
3392       nbit *= 2;
3393     }
3394   return y;
3395 }
3396
3397 /* Emit code to adjust ADJ_OPERAND after multiplication of wrong signedness
3398    flavor of OP0 and OP1.  ADJ_OPERAND is already the high half of the
3399    product OP0 x OP1.  If UNSIGNEDP is nonzero, adjust the signed product
3400    to become unsigned, if UNSIGNEDP is zero, adjust the unsigned product to
3401    become signed.
3402
3403    The result is put in TARGET if that is convenient.
3404
3405    MODE is the mode of operation.  */
3406
3407 rtx
3408 expand_mult_highpart_adjust (enum machine_mode mode, rtx adj_operand, rtx op0,
3409                              rtx op1, rtx target, int unsignedp)
3410 {
3411   rtx tem;
3412   enum rtx_code adj_code = unsignedp ? PLUS : MINUS;
3413
3414   tem = expand_shift (RSHIFT_EXPR, mode, op0,
3415                       GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3416   tem = expand_and (mode, tem, op1, NULL_RTX);
3417   adj_operand
3418     = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3419                      adj_operand);
3420
3421   tem = expand_shift (RSHIFT_EXPR, mode, op1,
3422                       GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3423   tem = expand_and (mode, tem, op0, NULL_RTX);
3424   target = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3425                           target);
3426
3427   return target;
3428 }
3429
3430 /* Subroutine of expmed_mult_highpart.  Return the MODE high part of OP.  */
3431
3432 static rtx
3433 extract_high_half (enum machine_mode mode, rtx op)
3434 {
3435   enum machine_mode wider_mode;
3436
3437   if (mode == word_mode)
3438     return gen_highpart (mode, op);
3439
3440   gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3441
3442   wider_mode = GET_MODE_WIDER_MODE (mode);
3443   op = expand_shift (RSHIFT_EXPR, wider_mode, op,
3444                      GET_MODE_BITSIZE (mode), 0, 1);
3445   return convert_modes (mode, wider_mode, op, 0);
3446 }
3447
3448 /* Like expmed_mult_highpart, but only consider using a multiplication
3449    optab.  OP1 is an rtx for the constant operand.  */
3450
3451 static rtx
3452 expmed_mult_highpart_optab (enum machine_mode mode, rtx op0, rtx op1,
3453                             rtx target, int unsignedp, int max_cost)
3454 {
3455   rtx narrow_op1 = gen_int_mode (INTVAL (op1), mode);
3456   enum machine_mode wider_mode;
3457   optab moptab;
3458   rtx tem;
3459   int size;
3460   bool speed = optimize_insn_for_speed_p ();
3461
3462   gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3463
3464   wider_mode = GET_MODE_WIDER_MODE (mode);
3465   size = GET_MODE_BITSIZE (mode);
3466
3467   /* Firstly, try using a multiplication insn that only generates the needed
3468      high part of the product, and in the sign flavor of unsignedp.  */
3469   if (mul_highpart_cost (speed, mode) < max_cost)
3470     {
3471       moptab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
3472       tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3473                           unsignedp, OPTAB_DIRECT);
3474       if (tem)
3475         return tem;
3476     }
3477
3478   /* Secondly, same as above, but use sign flavor opposite of unsignedp.
3479      Need to adjust the result after the multiplication.  */
3480   if (size - 1 < BITS_PER_WORD
3481       && (mul_highpart_cost (speed, mode)
3482           + 2 * shift_cost (speed, mode, size-1)
3483           + 4 * add_cost (speed, mode) < max_cost))
3484     {
3485       moptab = unsignedp ? smul_highpart_optab : umul_highpart_optab;
3486       tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3487                           unsignedp, OPTAB_DIRECT);
3488       if (tem)
3489         /* We used the wrong signedness.  Adjust the result.  */
3490         return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3491                                             tem, unsignedp);
3492     }
3493
3494   /* Try widening multiplication.  */
3495   moptab = unsignedp ? umul_widen_optab : smul_widen_optab;
3496   if (widening_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3497       && mul_widen_cost (speed, wider_mode) < max_cost)
3498     {
3499       tem = expand_binop (wider_mode, moptab, op0, narrow_op1, 0,
3500                           unsignedp, OPTAB_WIDEN);
3501       if (tem)
3502         return extract_high_half (mode, tem);
3503     }
3504
3505   /* Try widening the mode and perform a non-widening multiplication.  */
3506   if (optab_handler (smul_optab, wider_mode) != CODE_FOR_nothing
3507       && size - 1 < BITS_PER_WORD
3508       && (mul_cost (speed, wider_mode) + shift_cost (speed, mode, size-1)
3509           < max_cost))
3510     {
3511       rtx insns, wop0, wop1;
3512
3513       /* We need to widen the operands, for example to ensure the
3514          constant multiplier is correctly sign or zero extended.
3515          Use a sequence to clean-up any instructions emitted by
3516          the conversions if things don't work out.  */
3517       start_sequence ();
3518       wop0 = convert_modes (wider_mode, mode, op0, unsignedp);
3519       wop1 = convert_modes (wider_mode, mode, op1, unsignedp);
3520       tem = expand_binop (wider_mode, smul_optab, wop0, wop1, 0,
3521                           unsignedp, OPTAB_WIDEN);
3522       insns = get_insns ();
3523       end_sequence ();
3524
3525       if (tem)
3526         {
3527           emit_insn (insns);
3528           return extract_high_half (mode, tem);
3529         }
3530     }
3531
3532   /* Try widening multiplication of opposite signedness, and adjust.  */
3533   moptab = unsignedp ? smul_widen_optab : umul_widen_optab;
3534   if (widening_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3535       && size - 1 < BITS_PER_WORD
3536       && (mul_widen_cost (speed, wider_mode)
3537           + 2 * shift_cost (speed, mode, size-1)
3538           + 4 * add_cost (speed, mode) < max_cost))
3539     {
3540       tem = expand_binop (wider_mode, moptab, op0, narrow_op1,
3541                           NULL_RTX, ! unsignedp, OPTAB_WIDEN);
3542       if (tem != 0)
3543         {
3544           tem = extract_high_half (mode, tem);
3545           /* We used the wrong signedness.  Adjust the result.  */
3546           return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3547                                               target, unsignedp);
3548         }
3549     }
3550
3551   return 0;
3552 }
3553
3554 /* Emit code to multiply OP0 and OP1 (where OP1 is an integer constant),
3555    putting the high half of the result in TARGET if that is convenient,
3556    and return where the result is.  If the operation can not be performed,
3557    0 is returned.
3558
3559    MODE is the mode of operation and result.
3560
3561    UNSIGNEDP nonzero means unsigned multiply.
3562
3563    MAX_COST is the total allowed cost for the expanded RTL.  */
3564
3565 static rtx
3566 expmed_mult_highpart (enum machine_mode mode, rtx op0, rtx op1,
3567                       rtx target, int unsignedp, int max_cost)
3568 {
3569   enum machine_mode wider_mode = GET_MODE_WIDER_MODE (mode);
3570   unsigned HOST_WIDE_INT cnst1;
3571   int extra_cost;
3572   bool sign_adjust = false;
3573   enum mult_variant variant;
3574   struct algorithm alg;
3575   rtx tem;
3576   bool speed = optimize_insn_for_speed_p ();
3577
3578   gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3579   /* We can't support modes wider than HOST_BITS_PER_INT.  */
3580   gcc_assert (HWI_COMPUTABLE_MODE_P (mode));
3581
3582   cnst1 = INTVAL (op1) & GET_MODE_MASK (mode);
3583
3584   /* We can't optimize modes wider than BITS_PER_WORD.
3585      ??? We might be able to perform double-word arithmetic if
3586      mode == word_mode, however all the cost calculations in
3587      synth_mult etc. assume single-word operations.  */
3588   if (GET_MODE_BITSIZE (wider_mode) > BITS_PER_WORD)
3589     return expmed_mult_highpart_optab (mode, op0, op1, target,
3590                                        unsignedp, max_cost);
3591
3592   extra_cost = shift_cost (speed, mode, GET_MODE_BITSIZE (mode) - 1);
3593
3594   /* Check whether we try to multiply by a negative constant.  */
3595   if (!unsignedp && ((cnst1 >> (GET_MODE_BITSIZE (mode) - 1)) & 1))
3596     {
3597       sign_adjust = true;
3598       extra_cost += add_cost (speed, mode);
3599     }
3600
3601   /* See whether shift/add multiplication is cheap enough.  */
3602   if (choose_mult_variant (wider_mode, cnst1, &alg, &variant,
3603                            max_cost - extra_cost))
3604     {
3605       /* See whether the specialized multiplication optabs are
3606          cheaper than the shift/add version.  */
3607       tem = expmed_mult_highpart_optab (mode, op0, op1, target, unsignedp,
3608                                         alg.cost.cost + extra_cost);
3609       if (tem)
3610         return tem;
3611
3612       tem = convert_to_mode (wider_mode, op0, unsignedp);
3613       tem = expand_mult_const (wider_mode, tem, cnst1, 0, &alg, variant);
3614       tem = extract_high_half (mode, tem);
3615
3616       /* Adjust result for signedness.  */
3617       if (sign_adjust)
3618         tem = force_operand (gen_rtx_MINUS (mode, tem, op0), tem);
3619
3620       return tem;
3621     }
3622   return expmed_mult_highpart_optab (mode, op0, op1, target,
3623                                      unsignedp, max_cost);
3624 }
3625
3626
3627 /* Expand signed modulus of OP0 by a power of two D in mode MODE.  */
3628
3629 static rtx
3630 expand_smod_pow2 (enum machine_mode mode, rtx op0, HOST_WIDE_INT d)
3631 {
3632   unsigned HOST_WIDE_INT masklow, maskhigh;
3633   rtx result, temp, shift, label;
3634   int logd;
3635
3636   logd = floor_log2 (d);
3637   result = gen_reg_rtx (mode);
3638
3639   /* Avoid conditional branches when they're expensive.  */
3640   if (BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2
3641       && optimize_insn_for_speed_p ())
3642     {
3643       rtx signmask = emit_store_flag (result, LT, op0, const0_rtx,
3644                                       mode, 0, -1);
3645       if (signmask)
3646         {
3647           signmask = force_reg (mode, signmask);
3648           masklow = ((HOST_WIDE_INT) 1 << logd) - 1;
3649           shift = GEN_INT (GET_MODE_BITSIZE (mode) - logd);
3650
3651           /* Use the rtx_cost of a LSHIFTRT instruction to determine
3652              which instruction sequence to use.  If logical right shifts
3653              are expensive the use 2 XORs, 2 SUBs and an AND, otherwise
3654              use a LSHIFTRT, 1 ADD, 1 SUB and an AND.  */
3655
3656           temp = gen_rtx_LSHIFTRT (mode, result, shift);
3657           if (optab_handler (lshr_optab, mode) == CODE_FOR_nothing
3658               || (set_src_cost (temp, optimize_insn_for_speed_p ())
3659                   > COSTS_N_INSNS (2)))
3660             {
3661               temp = expand_binop (mode, xor_optab, op0, signmask,
3662                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3663               temp = expand_binop (mode, sub_optab, temp, signmask,
3664                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3665               temp = expand_binop (mode, and_optab, temp,
3666                                    gen_int_mode (masklow, mode),
3667                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3668               temp = expand_binop (mode, xor_optab, temp, signmask,
3669                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3670               temp = expand_binop (mode, sub_optab, temp, signmask,
3671                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3672             }
3673           else
3674             {
3675               signmask = expand_binop (mode, lshr_optab, signmask, shift,
3676                                        NULL_RTX, 1, OPTAB_LIB_WIDEN);
3677               signmask = force_reg (mode, signmask);
3678
3679               temp = expand_binop (mode, add_optab, op0, signmask,
3680                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3681               temp = expand_binop (mode, and_optab, temp,
3682                                    gen_int_mode (masklow, mode),
3683                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3684               temp = expand_binop (mode, sub_optab, temp, signmask,
3685                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3686             }
3687           return temp;
3688         }
3689     }
3690
3691   /* Mask contains the mode's signbit and the significant bits of the
3692      modulus.  By including the signbit in the operation, many targets
3693      can avoid an explicit compare operation in the following comparison
3694      against zero.  */
3695
3696   masklow = ((HOST_WIDE_INT) 1 << logd) - 1;
3697   if (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
3698     {
3699       masklow |= HOST_WIDE_INT_M1U << (GET_MODE_BITSIZE (mode) - 1);
3700       maskhigh = -1;
3701     }
3702   else
3703     maskhigh = HOST_WIDE_INT_M1U
3704                  << (GET_MODE_BITSIZE (mode) - HOST_BITS_PER_WIDE_INT - 1);
3705
3706   temp = expand_binop (mode, and_optab, op0,
3707                        immed_double_const (masklow, maskhigh, mode),
3708                        result, 1, OPTAB_LIB_WIDEN);
3709   if (temp != result)
3710     emit_move_insn (result, temp);
3711
3712   label = gen_label_rtx ();
3713   do_cmp_and_jump (result, const0_rtx, GE, mode, label);
3714
3715   temp = expand_binop (mode, sub_optab, result, const1_rtx, result,
3716                        0, OPTAB_LIB_WIDEN);
3717   masklow = HOST_WIDE_INT_M1U << logd;
3718   maskhigh = -1;
3719   temp = expand_binop (mode, ior_optab, temp,
3720                        immed_double_const (masklow, maskhigh, mode),
3721                        result, 1, OPTAB_LIB_WIDEN);
3722   temp = expand_binop (mode, add_optab, temp, const1_rtx, result,
3723                        0, OPTAB_LIB_WIDEN);
3724   if (temp != result)
3725     emit_move_insn (result, temp);
3726   emit_label (label);
3727   return result;
3728 }
3729
3730 /* Expand signed division of OP0 by a power of two D in mode MODE.
3731    This routine is only called for positive values of D.  */
3732
3733 static rtx
3734 expand_sdiv_pow2 (enum machine_mode mode, rtx op0, HOST_WIDE_INT d)
3735 {
3736   rtx temp, label;
3737   int logd;
3738
3739   logd = floor_log2 (d);
3740
3741   if (d == 2
3742       && BRANCH_COST (optimize_insn_for_speed_p (),
3743                       false) >= 1)
3744     {
3745       temp = gen_reg_rtx (mode);
3746       temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, 1);
3747       temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3748                            0, OPTAB_LIB_WIDEN);
3749       return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3750     }
3751
3752 #ifdef HAVE_conditional_move
3753   if (BRANCH_COST (optimize_insn_for_speed_p (), false)
3754       >= 2)
3755     {
3756       rtx temp2;
3757
3758       start_sequence ();
3759       temp2 = copy_to_mode_reg (mode, op0);
3760       temp = expand_binop (mode, add_optab, temp2, gen_int_mode (d - 1, mode),
3761                            NULL_RTX, 0, OPTAB_LIB_WIDEN);
3762       temp = force_reg (mode, temp);
3763
3764       /* Construct "temp2 = (temp2 < 0) ? temp : temp2".  */
3765       temp2 = emit_conditional_move (temp2, LT, temp2, const0_rtx,
3766                                      mode, temp, temp2, mode, 0);
3767       if (temp2)
3768         {
3769           rtx seq = get_insns ();
3770           end_sequence ();
3771           emit_insn (seq);
3772           return expand_shift (RSHIFT_EXPR, mode, temp2, logd, NULL_RTX, 0);
3773         }
3774       end_sequence ();
3775     }
3776 #endif
3777
3778   if (BRANCH_COST (optimize_insn_for_speed_p (),
3779                    false) >= 2)
3780     {
3781       int ushift = GET_MODE_BITSIZE (mode) - logd;
3782
3783       temp = gen_reg_rtx (mode);
3784       temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, -1);
3785       if (shift_cost (optimize_insn_for_speed_p (), mode, ushift)
3786           > COSTS_N_INSNS (1))
3787         temp = expand_binop (mode, and_optab, temp, gen_int_mode (d - 1, mode),
3788                              NULL_RTX, 0, OPTAB_LIB_WIDEN);
3789       else
3790         temp = expand_shift (RSHIFT_EXPR, mode, temp,
3791                              ushift, NULL_RTX, 1);
3792       temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3793                            0, OPTAB_LIB_WIDEN);
3794       return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3795     }
3796
3797   label = gen_label_rtx ();
3798   temp = copy_to_mode_reg (mode, op0);
3799   do_cmp_and_jump (temp, const0_rtx, GE, mode, label);
3800   expand_inc (temp, gen_int_mode (d - 1, mode));
3801   emit_label (label);
3802   return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3803 }
3804 \f
3805 /* Emit the code to divide OP0 by OP1, putting the result in TARGET
3806    if that is convenient, and returning where the result is.
3807    You may request either the quotient or the remainder as the result;
3808    specify REM_FLAG nonzero to get the remainder.
3809
3810    CODE is the expression code for which kind of division this is;
3811    it controls how rounding is done.  MODE is the machine mode to use.
3812    UNSIGNEDP nonzero means do unsigned division.  */
3813
3814 /* ??? For CEIL_MOD_EXPR, can compute incorrect remainder with ANDI
3815    and then correct it by or'ing in missing high bits
3816    if result of ANDI is nonzero.
3817    For ROUND_MOD_EXPR, can use ANDI and then sign-extend the result.
3818    This could optimize to a bfexts instruction.
3819    But C doesn't use these operations, so their optimizations are
3820    left for later.  */
3821 /* ??? For modulo, we don't actually need the highpart of the first product,
3822    the low part will do nicely.  And for small divisors, the second multiply
3823    can also be a low-part only multiply or even be completely left out.
3824    E.g. to calculate the remainder of a division by 3 with a 32 bit
3825    multiply, multiply with 0x55555556 and extract the upper two bits;
3826    the result is exact for inputs up to 0x1fffffff.
3827    The input range can be reduced by using cross-sum rules.
3828    For odd divisors >= 3, the following table gives right shift counts
3829    so that if a number is shifted by an integer multiple of the given
3830    amount, the remainder stays the same:
3831    2, 4, 3, 6, 10, 12, 4, 8, 18, 6, 11, 20, 18, 0, 5, 10, 12, 0, 12, 20,
3832    14, 12, 23, 21, 8, 0, 20, 18, 0, 0, 6, 12, 0, 22, 0, 18, 20, 30, 0, 0,
3833    0, 8, 0, 11, 12, 10, 36, 0, 30, 0, 0, 12, 0, 0, 0, 0, 44, 12, 24, 0,
3834    20, 0, 7, 14, 0, 18, 36, 0, 0, 46, 60, 0, 42, 0, 15, 24, 20, 0, 0, 33,
3835    0, 20, 0, 0, 18, 0, 60, 0, 0, 0, 0, 0, 40, 18, 0, 0, 12
3836
3837    Cross-sum rules for even numbers can be derived by leaving as many bits
3838    to the right alone as the divisor has zeros to the right.
3839    E.g. if x is an unsigned 32 bit number:
3840    (x mod 12) == (((x & 1023) + ((x >> 8) & ~3)) * 0x15555558 >> 2 * 3) >> 28
3841    */
3842
3843 rtx
3844 expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode,
3845                rtx op0, rtx op1, rtx target, int unsignedp)
3846 {
3847   enum machine_mode compute_mode;
3848   rtx tquotient;
3849   rtx quotient = 0, remainder = 0;
3850   rtx last;
3851   int size;
3852   rtx insn;
3853   optab optab1, optab2;
3854   int op1_is_constant, op1_is_pow2 = 0;
3855   int max_cost, extra_cost;
3856   static HOST_WIDE_INT last_div_const = 0;
3857   bool speed = optimize_insn_for_speed_p ();
3858
3859   op1_is_constant = CONST_INT_P (op1);
3860   if (op1_is_constant)
3861     {
3862       unsigned HOST_WIDE_INT ext_op1 = UINTVAL (op1);
3863       if (unsignedp)
3864         ext_op1 &= GET_MODE_MASK (mode);
3865       op1_is_pow2 = ((EXACT_POWER_OF_2_OR_ZERO_P (ext_op1)
3866                      || (! unsignedp && EXACT_POWER_OF_2_OR_ZERO_P (-ext_op1))));
3867     }
3868
3869   /*
3870      This is the structure of expand_divmod:
3871
3872      First comes code to fix up the operands so we can perform the operations
3873      correctly and efficiently.
3874
3875      Second comes a switch statement with code specific for each rounding mode.
3876      For some special operands this code emits all RTL for the desired
3877      operation, for other cases, it generates only a quotient and stores it in
3878      QUOTIENT.  The case for trunc division/remainder might leave quotient = 0,
3879      to indicate that it has not done anything.
3880
3881      Last comes code that finishes the operation.  If QUOTIENT is set and
3882      REM_FLAG is set, the remainder is computed as OP0 - QUOTIENT * OP1.  If
3883      QUOTIENT is not set, it is computed using trunc rounding.
3884
3885      We try to generate special code for division and remainder when OP1 is a
3886      constant.  If |OP1| = 2**n we can use shifts and some other fast
3887      operations.  For other values of OP1, we compute a carefully selected
3888      fixed-point approximation m = 1/OP1, and generate code that multiplies OP0
3889      by m.
3890
3891      In all cases but EXACT_DIV_EXPR, this multiplication requires the upper
3892      half of the product.  Different strategies for generating the product are
3893      implemented in expmed_mult_highpart.
3894
3895      If what we actually want is the remainder, we generate that by another
3896      by-constant multiplication and a subtraction.  */
3897
3898   /* We shouldn't be called with OP1 == const1_rtx, but some of the
3899      code below will malfunction if we are, so check here and handle
3900      the special case if so.  */
3901   if (op1 == const1_rtx)
3902     return rem_flag ? const0_rtx : op0;
3903
3904     /* When dividing by -1, we could get an overflow.
3905      negv_optab can handle overflows.  */
3906   if (! unsignedp && op1 == constm1_rtx)
3907     {
3908       if (rem_flag)
3909         return const0_rtx;
3910       return expand_unop (mode, flag_trapv && GET_MODE_CLASS (mode) == MODE_INT
3911                           ? negv_optab : neg_optab, op0, target, 0);
3912     }
3913
3914   if (target
3915       /* Don't use the function value register as a target
3916          since we have to read it as well as write it,
3917          and function-inlining gets confused by this.  */
3918       && ((REG_P (target) && REG_FUNCTION_VALUE_P (target))
3919           /* Don't clobber an operand while doing a multi-step calculation.  */
3920           || ((rem_flag || op1_is_constant)
3921               && (reg_mentioned_p (target, op0)
3922                   || (MEM_P (op0) && MEM_P (target))))
3923           || reg_mentioned_p (target, op1)
3924           || (MEM_P (op1) && MEM_P (target))))
3925     target = 0;
3926
3927   /* Get the mode in which to perform this computation.  Normally it will
3928      be MODE, but sometimes we can't do the desired operation in MODE.
3929      If so, pick a wider mode in which we can do the operation.  Convert
3930      to that mode at the start to avoid repeated conversions.
3931
3932      First see what operations we need.  These depend on the expression
3933      we are evaluating.  (We assume that divxx3 insns exist under the
3934      same conditions that modxx3 insns and that these insns don't normally
3935      fail.  If these assumptions are not correct, we may generate less
3936      efficient code in some cases.)
3937
3938      Then see if we find a mode in which we can open-code that operation
3939      (either a division, modulus, or shift).  Finally, check for the smallest
3940      mode for which we can do the operation with a library call.  */
3941
3942   /* We might want to refine this now that we have division-by-constant
3943      optimization.  Since expmed_mult_highpart tries so many variants, it is
3944      not straightforward to generalize this.  Maybe we should make an array
3945      of possible modes in init_expmed?  Save this for GCC 2.7.  */
3946
3947   optab1 = ((op1_is_pow2 && op1 != const0_rtx)
3948             ? (unsignedp ? lshr_optab : ashr_optab)
3949             : (unsignedp ? udiv_optab : sdiv_optab));
3950   optab2 = ((op1_is_pow2 && op1 != const0_rtx)
3951             ? optab1
3952             : (unsignedp ? udivmod_optab : sdivmod_optab));
3953
3954   for (compute_mode = mode; compute_mode != VOIDmode;
3955        compute_mode = GET_MODE_WIDER_MODE (compute_mode))
3956     if (optab_handler (optab1, compute_mode) != CODE_FOR_nothing
3957         || optab_handler (optab2, compute_mode) != CODE_FOR_nothing)
3958       break;
3959
3960   if (compute_mode == VOIDmode)
3961     for (compute_mode = mode; compute_mode != VOIDmode;
3962          compute_mode = GET_MODE_WIDER_MODE (compute_mode))
3963       if (optab_libfunc (optab1, compute_mode)
3964           || optab_libfunc (optab2, compute_mode))
3965         break;
3966
3967   /* If we still couldn't find a mode, use MODE, but expand_binop will
3968      probably die.  */
3969   if (compute_mode == VOIDmode)
3970     compute_mode = mode;
3971
3972   if (target && GET_MODE (target) == compute_mode)
3973     tquotient = target;
3974   else
3975     tquotient = gen_reg_rtx (compute_mode);
3976
3977   size = GET_MODE_BITSIZE (compute_mode);
3978 #if 0
3979   /* It should be possible to restrict the precision to GET_MODE_BITSIZE
3980      (mode), and thereby get better code when OP1 is a constant.  Do that
3981      later.  It will require going over all usages of SIZE below.  */
3982   size = GET_MODE_BITSIZE (mode);
3983 #endif
3984
3985   /* Only deduct something for a REM if the last divide done was
3986      for a different constant.   Then set the constant of the last
3987      divide.  */
3988   max_cost = (unsignedp
3989               ? udiv_cost (speed, compute_mode)
3990               : sdiv_cost (speed, compute_mode));
3991   if (rem_flag && ! (last_div_const != 0 && op1_is_constant
3992                      && INTVAL (op1) == last_div_const))
3993     max_cost -= (mul_cost (speed, compute_mode)
3994                  + add_cost (speed, compute_mode));
3995
3996   last_div_const = ! rem_flag && op1_is_constant ? INTVAL (op1) : 0;
3997
3998   /* Now convert to the best mode to use.  */
3999   if (compute_mode != mode)
4000     {
4001       op0 = convert_modes (compute_mode, mode, op0, unsignedp);
4002       op1 = convert_modes (compute_mode, mode, op1, unsignedp);
4003
4004       /* convert_modes may have placed op1 into a register, so we
4005          must recompute the following.  */
4006       op1_is_constant = CONST_INT_P (op1);
4007       op1_is_pow2 = (op1_is_constant
4008                      && ((EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4009                           || (! unsignedp
4010                               && EXACT_POWER_OF_2_OR_ZERO_P (-UINTVAL (op1))))));
4011     }
4012
4013   /* If one of the operands is a volatile MEM, copy it into a register.  */
4014
4015   if (MEM_P (op0) && MEM_VOLATILE_P (op0))
4016     op0 = force_reg (compute_mode, op0);
4017   if (MEM_P (op1) && MEM_VOLATILE_P (op1))
4018     op1 = force_reg (compute_mode, op1);
4019
4020   /* If we need the remainder or if OP1 is constant, we need to
4021      put OP0 in a register in case it has any queued subexpressions.  */
4022   if (rem_flag || op1_is_constant)
4023     op0 = force_reg (compute_mode, op0);
4024
4025   last = get_last_insn ();
4026
4027   /* Promote floor rounding to trunc rounding for unsigned operations.  */
4028   if (unsignedp)
4029     {
4030       if (code == FLOOR_DIV_EXPR)
4031         code = TRUNC_DIV_EXPR;
4032       if (code == FLOOR_MOD_EXPR)
4033         code = TRUNC_MOD_EXPR;
4034       if (code == EXACT_DIV_EXPR && op1_is_pow2)
4035         code = TRUNC_DIV_EXPR;
4036     }
4037
4038   if (op1 != const0_rtx)
4039     switch (code)
4040       {
4041       case TRUNC_MOD_EXPR:
4042       case TRUNC_DIV_EXPR:
4043         if (op1_is_constant)
4044           {
4045             if (unsignedp)
4046               {
4047                 unsigned HOST_WIDE_INT mh, ml;
4048                 int pre_shift, post_shift;
4049                 int dummy;
4050                 unsigned HOST_WIDE_INT d = (INTVAL (op1)
4051                                             & GET_MODE_MASK (compute_mode));
4052
4053                 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4054                   {
4055                     pre_shift = floor_log2 (d);
4056                     if (rem_flag)
4057                       {
4058                         unsigned HOST_WIDE_INT mask
4059                           = ((unsigned HOST_WIDE_INT) 1 << pre_shift) - 1;
4060                         remainder
4061                           = expand_binop (compute_mode, and_optab, op0,
4062                                           gen_int_mode (mask, compute_mode),
4063                                           remainder, 1,
4064                                           OPTAB_LIB_WIDEN);
4065                         if (remainder)
4066                           return gen_lowpart (mode, remainder);
4067                       }
4068                     quotient = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4069                                              pre_shift, tquotient, 1);
4070                   }
4071                 else if (size <= HOST_BITS_PER_WIDE_INT)
4072                   {
4073                     if (d >= ((unsigned HOST_WIDE_INT) 1 << (size - 1)))
4074                       {
4075                         /* Most significant bit of divisor is set; emit an scc
4076                            insn.  */
4077                         quotient = emit_store_flag_force (tquotient, GEU, op0, op1,
4078                                                           compute_mode, 1, 1);
4079                       }
4080                     else
4081                       {
4082                         /* Find a suitable multiplier and right shift count
4083                            instead of multiplying with D.  */
4084
4085                         mh = choose_multiplier (d, size, size,
4086                                                 &ml, &post_shift, &dummy);
4087
4088                         /* If the suggested multiplier is more than SIZE bits,
4089                            we can do better for even divisors, using an
4090                            initial right shift.  */
4091                         if (mh != 0 && (d & 1) == 0)
4092                           {
4093                             pre_shift = floor_log2 (d & -d);
4094                             mh = choose_multiplier (d >> pre_shift, size,
4095                                                     size - pre_shift,
4096                                                     &ml, &post_shift, &dummy);
4097                             gcc_assert (!mh);
4098                           }
4099                         else
4100                           pre_shift = 0;
4101
4102                         if (mh != 0)
4103                           {
4104                             rtx t1, t2, t3, t4;
4105
4106                             if (post_shift - 1 >= BITS_PER_WORD)
4107                               goto fail1;
4108
4109                             extra_cost
4110                               = (shift_cost (speed, compute_mode, post_shift - 1)
4111                                  + shift_cost (speed, compute_mode, 1)
4112                                  + 2 * add_cost (speed, compute_mode));
4113                             t1 = expmed_mult_highpart
4114                               (compute_mode, op0,
4115                                gen_int_mode (ml, compute_mode),
4116                                NULL_RTX, 1, max_cost - extra_cost);
4117                             if (t1 == 0)
4118                               goto fail1;
4119                             t2 = force_operand (gen_rtx_MINUS (compute_mode,
4120                                                                op0, t1),
4121                                                 NULL_RTX);
4122                             t3 = expand_shift (RSHIFT_EXPR, compute_mode,
4123                                                t2, 1, NULL_RTX, 1);
4124                             t4 = force_operand (gen_rtx_PLUS (compute_mode,
4125                                                               t1, t3),
4126                                                 NULL_RTX);
4127                             quotient = expand_shift
4128                               (RSHIFT_EXPR, compute_mode, t4,
4129                                post_shift - 1, tquotient, 1);
4130                           }
4131                         else
4132                           {
4133                             rtx t1, t2;
4134
4135                             if (pre_shift >= BITS_PER_WORD
4136                                 || post_shift >= BITS_PER_WORD)
4137                               goto fail1;
4138
4139                             t1 = expand_shift
4140                               (RSHIFT_EXPR, compute_mode, op0,
4141                                pre_shift, NULL_RTX, 1);
4142                             extra_cost
4143                               = (shift_cost (speed, compute_mode, pre_shift)
4144                                  + shift_cost (speed, compute_mode, post_shift));
4145                             t2 = expmed_mult_highpart
4146                               (compute_mode, t1,
4147                                gen_int_mode (ml, compute_mode),
4148                                NULL_RTX, 1, max_cost - extra_cost);
4149                             if (t2 == 0)
4150                               goto fail1;
4151                             quotient = expand_shift
4152                               (RSHIFT_EXPR, compute_mode, t2,
4153                                post_shift, tquotient, 1);
4154                           }
4155                       }
4156                   }
4157                 else            /* Too wide mode to use tricky code */
4158                   break;
4159
4160                 insn = get_last_insn ();
4161                 if (insn != last)
4162                   set_dst_reg_note (insn, REG_EQUAL,
4163                                     gen_rtx_UDIV (compute_mode, op0, op1),
4164                                     quotient);
4165               }
4166             else                /* TRUNC_DIV, signed */
4167               {
4168                 unsigned HOST_WIDE_INT ml;
4169                 int lgup, post_shift;
4170                 rtx mlr;
4171                 HOST_WIDE_INT d = INTVAL (op1);
4172                 unsigned HOST_WIDE_INT abs_d;
4173
4174                 /* Since d might be INT_MIN, we have to cast to
4175                    unsigned HOST_WIDE_INT before negating to avoid
4176                    undefined signed overflow.  */
4177                 abs_d = (d >= 0
4178                          ? (unsigned HOST_WIDE_INT) d
4179                          : - (unsigned HOST_WIDE_INT) d);
4180
4181                 /* n rem d = n rem -d */
4182                 if (rem_flag && d < 0)
4183                   {
4184                     d = abs_d;
4185                     op1 = gen_int_mode (abs_d, compute_mode);
4186                   }
4187
4188                 if (d == 1)
4189                   quotient = op0;
4190                 else if (d == -1)
4191                   quotient = expand_unop (compute_mode, neg_optab, op0,
4192                                           tquotient, 0);
4193                 else if (HOST_BITS_PER_WIDE_INT >= size
4194                          && abs_d == (unsigned HOST_WIDE_INT) 1 << (size - 1))
4195                   {
4196                     /* This case is not handled correctly below.  */
4197                     quotient = emit_store_flag (tquotient, EQ, op0, op1,
4198                                                 compute_mode, 1, 1);
4199                     if (quotient == 0)
4200                       goto fail1;
4201                   }
4202                 else if (EXACT_POWER_OF_2_OR_ZERO_P (d)
4203                          && (rem_flag
4204                              ? smod_pow2_cheap (speed, compute_mode)
4205                              : sdiv_pow2_cheap (speed, compute_mode))
4206                          /* We assume that cheap metric is true if the
4207                             optab has an expander for this mode.  */
4208                          && ((optab_handler ((rem_flag ? smod_optab
4209                                               : sdiv_optab),
4210                                              compute_mode)
4211                               != CODE_FOR_nothing)
4212                              || (optab_handler (sdivmod_optab,
4213                                                 compute_mode)
4214                                  != CODE_FOR_nothing)))
4215                   ;
4216                 else if (EXACT_POWER_OF_2_OR_ZERO_P (abs_d))
4217                   {
4218                     if (rem_flag)
4219                       {
4220                         remainder = expand_smod_pow2 (compute_mode, op0, d);
4221                         if (remainder)
4222                           return gen_lowpart (mode, remainder);
4223                       }
4224
4225                     if (sdiv_pow2_cheap (speed, compute_mode)
4226                         && ((optab_handler (sdiv_optab, compute_mode)
4227                              != CODE_FOR_nothing)
4228                             || (optab_handler (sdivmod_optab, compute_mode)
4229                                 != CODE_FOR_nothing)))
4230                       quotient = expand_divmod (0, TRUNC_DIV_EXPR,
4231                                                 compute_mode, op0,
4232                                                 gen_int_mode (abs_d,
4233                                                               compute_mode),
4234                                                 NULL_RTX, 0);
4235                     else
4236                       quotient = expand_sdiv_pow2 (compute_mode, op0, abs_d);
4237
4238                     /* We have computed OP0 / abs(OP1).  If OP1 is negative,
4239                        negate the quotient.  */
4240                     if (d < 0)
4241                       {
4242                         insn = get_last_insn ();
4243                         if (insn != last
4244                             && abs_d < ((unsigned HOST_WIDE_INT) 1
4245                                         << (HOST_BITS_PER_WIDE_INT - 1)))
4246                           set_dst_reg_note (insn, REG_EQUAL,
4247                                             gen_rtx_DIV (compute_mode, op0,
4248                                                          gen_int_mode
4249                                                            (abs_d,
4250                                                             compute_mode)),
4251                                             quotient);
4252
4253                         quotient = expand_unop (compute_mode, neg_optab,
4254                                                 quotient, quotient, 0);
4255                       }
4256                   }
4257                 else if (size <= HOST_BITS_PER_WIDE_INT)
4258                   {
4259                     choose_multiplier (abs_d, size, size - 1,
4260                                        &ml, &post_shift, &lgup);
4261                     if (ml < (unsigned HOST_WIDE_INT) 1 << (size - 1))
4262                       {
4263                         rtx t1, t2, t3;
4264
4265                         if (post_shift >= BITS_PER_WORD
4266                             || size - 1 >= BITS_PER_WORD)
4267                           goto fail1;
4268
4269                         extra_cost = (shift_cost (speed, compute_mode, post_shift)
4270                                       + shift_cost (speed, compute_mode, size - 1)
4271                                       + add_cost (speed, compute_mode));
4272                         t1 = expmed_mult_highpart
4273                           (compute_mode, op0, gen_int_mode (ml, compute_mode),
4274                            NULL_RTX, 0, max_cost - extra_cost);
4275                         if (t1 == 0)
4276                           goto fail1;
4277                         t2 = expand_shift
4278                           (RSHIFT_EXPR, compute_mode, t1,
4279                            post_shift, NULL_RTX, 0);
4280                         t3 = expand_shift
4281                           (RSHIFT_EXPR, compute_mode, op0,
4282                            size - 1, NULL_RTX, 0);
4283                         if (d < 0)
4284                           quotient
4285                             = force_operand (gen_rtx_MINUS (compute_mode,
4286                                                             t3, t2),
4287                                              tquotient);
4288                         else
4289                           quotient
4290                             = force_operand (gen_rtx_MINUS (compute_mode,
4291                                                             t2, t3),
4292                                              tquotient);
4293                       }
4294                     else
4295                       {
4296                         rtx t1, t2, t3, t4;
4297
4298                         if (post_shift >= BITS_PER_WORD
4299                             || size - 1 >= BITS_PER_WORD)
4300                           goto fail1;
4301
4302                         ml |= (~(unsigned HOST_WIDE_INT) 0) << (size - 1);
4303                         mlr = gen_int_mode (ml, compute_mode);
4304                         extra_cost = (shift_cost (speed, compute_mode, post_shift)
4305                                       + shift_cost (speed, compute_mode, size - 1)
4306                                       + 2 * add_cost (speed, compute_mode));
4307                         t1 = expmed_mult_highpart (compute_mode, op0, mlr,
4308                                                    NULL_RTX, 0,
4309                                                    max_cost - extra_cost);
4310                         if (t1 == 0)
4311                           goto fail1;
4312                         t2 = force_operand (gen_rtx_PLUS (compute_mode,
4313                                                           t1, op0),
4314                                             NULL_RTX);
4315                         t3 = expand_shift
4316                           (RSHIFT_EXPR, compute_mode, t2,
4317                            post_shift, NULL_RTX, 0);
4318                         t4 = expand_shift
4319                           (RSHIFT_EXPR, compute_mode, op0,
4320                            size - 1, NULL_RTX, 0);
4321                         if (d < 0)
4322                           quotient
4323                             = force_operand (gen_rtx_MINUS (compute_mode,
4324                                                             t4, t3),
4325                                              tquotient);
4326                         else
4327                           quotient
4328                             = force_operand (gen_rtx_MINUS (compute_mode,
4329                                                             t3, t4),
4330                                              tquotient);
4331                       }
4332                   }
4333                 else            /* Too wide mode to use tricky code */
4334                   break;
4335
4336                 insn = get_last_insn ();
4337                 if (insn != last)
4338                   set_dst_reg_note (insn, REG_EQUAL,
4339                                     gen_rtx_DIV (compute_mode, op0, op1),
4340                                     quotient);
4341               }
4342             break;
4343           }
4344       fail1:
4345         delete_insns_since (last);
4346         break;
4347
4348       case FLOOR_DIV_EXPR:
4349       case FLOOR_MOD_EXPR:
4350       /* We will come here only for signed operations.  */
4351         if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4352           {
4353             unsigned HOST_WIDE_INT mh, ml;
4354             int pre_shift, lgup, post_shift;
4355             HOST_WIDE_INT d = INTVAL (op1);
4356
4357             if (d > 0)
4358               {
4359                 /* We could just as easily deal with negative constants here,
4360                    but it does not seem worth the trouble for GCC 2.6.  */
4361                 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4362                   {
4363                     pre_shift = floor_log2 (d);
4364                     if (rem_flag)
4365                       {
4366                         unsigned HOST_WIDE_INT mask
4367                           = ((unsigned HOST_WIDE_INT) 1 << pre_shift) - 1;
4368                         remainder = expand_binop
4369                           (compute_mode, and_optab, op0,
4370                            gen_int_mode (mask, compute_mode),
4371                            remainder, 0, OPTAB_LIB_WIDEN);
4372                         if (remainder)
4373                           return gen_lowpart (mode, remainder);
4374                       }
4375                     quotient = expand_shift
4376                       (RSHIFT_EXPR, compute_mode, op0,
4377                        pre_shift, tquotient, 0);
4378                   }
4379                 else
4380                   {
4381                     rtx t1, t2, t3, t4;
4382
4383                     mh = choose_multiplier (d, size, size - 1,
4384                                             &ml, &post_shift, &lgup);
4385                     gcc_assert (!mh);
4386
4387                     if (post_shift < BITS_PER_WORD
4388                         && size - 1 < BITS_PER_WORD)
4389                       {
4390                         t1 = expand_shift
4391                           (RSHIFT_EXPR, compute_mode, op0,
4392                            size - 1, NULL_RTX, 0);
4393                         t2 = expand_binop (compute_mode, xor_optab, op0, t1,
4394                                            NULL_RTX, 0, OPTAB_WIDEN);
4395                         extra_cost = (shift_cost (speed, compute_mode, post_shift)
4396                                       + shift_cost (speed, compute_mode, size - 1)
4397                                       + 2 * add_cost (speed, compute_mode));
4398                         t3 = expmed_mult_highpart
4399                           (compute_mode, t2, gen_int_mode (ml, compute_mode),
4400                            NULL_RTX, 1, max_cost - extra_cost);
4401                         if (t3 != 0)
4402                           {
4403                             t4 = expand_shift
4404                               (RSHIFT_EXPR, compute_mode, t3,
4405                                post_shift, NULL_RTX, 1);
4406                             quotient = expand_binop (compute_mode, xor_optab,
4407                                                      t4, t1, tquotient, 0,
4408                                                      OPTAB_WIDEN);
4409                           }
4410                       }
4411                   }
4412               }
4413             else
4414               {
4415                 rtx nsign, t1, t2, t3, t4;
4416                 t1 = force_operand (gen_rtx_PLUS (compute_mode,
4417                                                   op0, constm1_rtx), NULL_RTX);
4418                 t2 = expand_binop (compute_mode, ior_optab, op0, t1, NULL_RTX,
4419                                    0, OPTAB_WIDEN);
4420                 nsign = expand_shift
4421                   (RSHIFT_EXPR, compute_mode, t2,
4422                    size - 1, NULL_RTX, 0);
4423                 t3 = force_operand (gen_rtx_MINUS (compute_mode, t1, nsign),
4424                                     NULL_RTX);
4425                 t4 = expand_divmod (0, TRUNC_DIV_EXPR, compute_mode, t3, op1,
4426                                     NULL_RTX, 0);
4427                 if (t4)
4428                   {
4429                     rtx t5;
4430                     t5 = expand_unop (compute_mode, one_cmpl_optab, nsign,
4431                                       NULL_RTX, 0);
4432                     quotient = force_operand (gen_rtx_PLUS (compute_mode,
4433                                                             t4, t5),
4434                                               tquotient);
4435                   }
4436               }
4437           }
4438
4439         if (quotient != 0)
4440           break;
4441         delete_insns_since (last);
4442
4443         /* Try using an instruction that produces both the quotient and
4444            remainder, using truncation.  We can easily compensate the quotient
4445            or remainder to get floor rounding, once we have the remainder.
4446            Notice that we compute also the final remainder value here,
4447            and return the result right away.  */
4448         if (target == 0 || GET_MODE (target) != compute_mode)
4449           target = gen_reg_rtx (compute_mode);
4450
4451         if (rem_flag)
4452           {
4453             remainder
4454               = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4455             quotient = gen_reg_rtx (compute_mode);
4456           }
4457         else
4458           {
4459             quotient
4460               = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4461             remainder = gen_reg_rtx (compute_mode);
4462           }
4463
4464         if (expand_twoval_binop (sdivmod_optab, op0, op1,
4465                                  quotient, remainder, 0))
4466           {
4467             /* This could be computed with a branch-less sequence.
4468                Save that for later.  */
4469             rtx tem;
4470             rtx label = gen_label_rtx ();
4471             do_cmp_and_jump (remainder, const0_rtx, EQ, compute_mode, label);
4472             tem = expand_binop (compute_mode, xor_optab, op0, op1,
4473                                 NULL_RTX, 0, OPTAB_WIDEN);
4474             do_cmp_and_jump (tem, const0_rtx, GE, compute_mode, label);
4475             expand_dec (quotient, const1_rtx);
4476             expand_inc (remainder, op1);
4477             emit_label (label);
4478             return gen_lowpart (mode, rem_flag ? remainder : quotient);
4479           }
4480
4481         /* No luck with division elimination or divmod.  Have to do it
4482            by conditionally adjusting op0 *and* the result.  */
4483         {
4484           rtx label1, label2, label3, label4, label5;
4485           rtx adjusted_op0;
4486           rtx tem;
4487
4488           quotient = gen_reg_rtx (compute_mode);
4489           adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4490           label1 = gen_label_rtx ();
4491           label2 = gen_label_rtx ();
4492           label3 = gen_label_rtx ();
4493           label4 = gen_label_rtx ();
4494           label5 = gen_label_rtx ();
4495           do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4496           do_cmp_and_jump (adjusted_op0, const0_rtx, LT, compute_mode, label1);
4497           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4498                               quotient, 0, OPTAB_LIB_WIDEN);
4499           if (tem != quotient)
4500             emit_move_insn (quotient, tem);
4501           emit_jump_insn (gen_jump (label5));
4502           emit_barrier ();
4503           emit_label (label1);
4504           expand_inc (adjusted_op0, const1_rtx);
4505           emit_jump_insn (gen_jump (label4));
4506           emit_barrier ();
4507           emit_label (label2);
4508           do_cmp_and_jump (adjusted_op0, const0_rtx, GT, compute_mode, label3);
4509           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4510                               quotient, 0, OPTAB_LIB_WIDEN);
4511           if (tem != quotient)
4512             emit_move_insn (quotient, tem);
4513           emit_jump_insn (gen_jump (label5));
4514           emit_barrier ();
4515           emit_label (label3);
4516           expand_dec (adjusted_op0, const1_rtx);
4517           emit_label (label4);
4518           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4519                               quotient, 0, OPTAB_LIB_WIDEN);
4520           if (tem != quotient)
4521             emit_move_insn (quotient, tem);
4522           expand_dec (quotient, const1_rtx);
4523           emit_label (label5);
4524         }
4525         break;
4526
4527       case CEIL_DIV_EXPR:
4528       case CEIL_MOD_EXPR:
4529         if (unsignedp)
4530           {
4531             if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1)))
4532               {
4533                 rtx t1, t2, t3;
4534                 unsigned HOST_WIDE_INT d = INTVAL (op1);
4535                 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4536                                    floor_log2 (d), tquotient, 1);
4537                 t2 = expand_binop (compute_mode, and_optab, op0,
4538                                    gen_int_mode (d - 1, compute_mode),
4539                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4540                 t3 = gen_reg_rtx (compute_mode);
4541                 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4542                                       compute_mode, 1, 1);
4543                 if (t3 == 0)
4544                   {
4545                     rtx lab;
4546                     lab = gen_label_rtx ();
4547                     do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4548                     expand_inc (t1, const1_rtx);
4549                     emit_label (lab);
4550                     quotient = t1;
4551                   }
4552                 else
4553                   quotient = force_operand (gen_rtx_PLUS (compute_mode,
4554                                                           t1, t3),
4555                                             tquotient);
4556                 break;
4557               }
4558
4559             /* Try using an instruction that produces both the quotient and
4560                remainder, using truncation.  We can easily compensate the
4561                quotient or remainder to get ceiling rounding, once we have the
4562                remainder.  Notice that we compute also the final remainder
4563                value here, and return the result right away.  */
4564             if (target == 0 || GET_MODE (target) != compute_mode)
4565               target = gen_reg_rtx (compute_mode);
4566
4567             if (rem_flag)
4568               {
4569                 remainder = (REG_P (target)
4570                              ? target : gen_reg_rtx (compute_mode));
4571                 quotient = gen_reg_rtx (compute_mode);
4572               }
4573             else
4574               {
4575                 quotient = (REG_P (target)
4576                             ? target : gen_reg_rtx (compute_mode));
4577                 remainder = gen_reg_rtx (compute_mode);
4578               }
4579
4580             if (expand_twoval_binop (udivmod_optab, op0, op1, quotient,
4581                                      remainder, 1))
4582               {
4583                 /* This could be computed with a branch-less sequence.
4584                    Save that for later.  */
4585                 rtx label = gen_label_rtx ();
4586                 do_cmp_and_jump (remainder, const0_rtx, EQ,
4587                                  compute_mode, label);
4588                 expand_inc (quotient, const1_rtx);
4589                 expand_dec (remainder, op1);
4590                 emit_label (label);
4591                 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4592               }
4593
4594             /* No luck with division elimination or divmod.  Have to do it
4595                by conditionally adjusting op0 *and* the result.  */
4596             {
4597               rtx label1, label2;
4598               rtx adjusted_op0, tem;
4599
4600               quotient = gen_reg_rtx (compute_mode);
4601               adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4602               label1 = gen_label_rtx ();
4603               label2 = gen_label_rtx ();
4604               do_cmp_and_jump (adjusted_op0, const0_rtx, NE,
4605                                compute_mode, label1);
4606               emit_move_insn  (quotient, const0_rtx);
4607               emit_jump_insn (gen_jump (label2));
4608               emit_barrier ();
4609               emit_label (label1);
4610               expand_dec (adjusted_op0, const1_rtx);
4611               tem = expand_binop (compute_mode, udiv_optab, adjusted_op0, op1,
4612                                   quotient, 1, OPTAB_LIB_WIDEN);
4613               if (tem != quotient)
4614                 emit_move_insn (quotient, tem);
4615               expand_inc (quotient, const1_rtx);
4616               emit_label (label2);
4617             }
4618           }
4619         else /* signed */
4620           {
4621             if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4622                 && INTVAL (op1) >= 0)
4623               {
4624                 /* This is extremely similar to the code for the unsigned case
4625                    above.  For 2.7 we should merge these variants, but for
4626                    2.6.1 I don't want to touch the code for unsigned since that
4627                    get used in C.  The signed case will only be used by other
4628                    languages (Ada).  */
4629
4630                 rtx t1, t2, t3;
4631                 unsigned HOST_WIDE_INT d = INTVAL (op1);
4632                 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4633                                    floor_log2 (d), tquotient, 0);
4634                 t2 = expand_binop (compute_mode, and_optab, op0,
4635                                    gen_int_mode (d - 1, compute_mode),
4636                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4637                 t3 = gen_reg_rtx (compute_mode);
4638                 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4639                                       compute_mode, 1, 1);
4640                 if (t3 == 0)
4641                   {
4642                     rtx lab;
4643                     lab = gen_label_rtx ();
4644                     do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4645                     expand_inc (t1, const1_rtx);
4646                     emit_label (lab);
4647                     quotient = t1;
4648                   }
4649                 else
4650                   quotient = force_operand (gen_rtx_PLUS (compute_mode,
4651                                                           t1, t3),
4652                                             tquotient);
4653                 break;
4654               }
4655
4656             /* Try using an instruction that produces both the quotient and
4657                remainder, using truncation.  We can easily compensate the
4658                quotient or remainder to get ceiling rounding, once we have the
4659                remainder.  Notice that we compute also the final remainder
4660                value here, and return the result right away.  */
4661             if (target == 0 || GET_MODE (target) != compute_mode)
4662               target = gen_reg_rtx (compute_mode);
4663             if (rem_flag)
4664               {
4665                 remainder= (REG_P (target)
4666                             ? target : gen_reg_rtx (compute_mode));
4667                 quotient = gen_reg_rtx (compute_mode);
4668               }
4669             else
4670               {
4671                 quotient = (REG_P (target)
4672                             ? target : gen_reg_rtx (compute_mode));
4673                 remainder = gen_reg_rtx (compute_mode);
4674               }
4675
4676             if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient,
4677                                      remainder, 0))
4678               {
4679                 /* This could be computed with a branch-less sequence.
4680                    Save that for later.  */
4681                 rtx tem;
4682                 rtx label = gen_label_rtx ();
4683                 do_cmp_and_jump (remainder, const0_rtx, EQ,
4684                                  compute_mode, label);
4685                 tem = expand_binop (compute_mode, xor_optab, op0, op1,
4686                                     NULL_RTX, 0, OPTAB_WIDEN);
4687                 do_cmp_and_jump (tem, const0_rtx, LT, compute_mode, label);
4688                 expand_inc (quotient, const1_rtx);
4689                 expand_dec (remainder, op1);
4690                 emit_label (label);
4691                 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4692               }
4693
4694             /* No luck with division elimination or divmod.  Have to do it
4695                by conditionally adjusting op0 *and* the result.  */
4696             {
4697               rtx label1, label2, label3, label4, label5;
4698               rtx adjusted_op0;
4699               rtx tem;
4700
4701               quotient = gen_reg_rtx (compute_mode);
4702               adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4703               label1 = gen_label_rtx ();
4704               label2 = gen_label_rtx ();
4705               label3 = gen_label_rtx ();
4706               label4 = gen_label_rtx ();
4707               label5 = gen_label_rtx ();
4708               do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4709               do_cmp_and_jump (adjusted_op0, const0_rtx, GT,
4710                                compute_mode, label1);
4711               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4712                                   quotient, 0, OPTAB_LIB_WIDEN);
4713               if (tem != quotient)
4714                 emit_move_insn (quotient, tem);
4715               emit_jump_insn (gen_jump (label5));
4716               emit_barrier ();
4717               emit_label (label1);
4718               expand_dec (adjusted_op0, const1_rtx);
4719               emit_jump_insn (gen_jump (label4));
4720               emit_barrier ();
4721               emit_label (label2);
4722               do_cmp_and_jump (adjusted_op0, const0_rtx, LT,
4723                                compute_mode, label3);
4724               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4725                                   quotient, 0, OPTAB_LIB_WIDEN);
4726               if (tem != quotient)
4727                 emit_move_insn (quotient, tem);
4728               emit_jump_insn (gen_jump (label5));
4729               emit_barrier ();
4730               emit_label (label3);
4731               expand_inc (adjusted_op0, const1_rtx);
4732               emit_label (label4);
4733               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4734                                   quotient, 0, OPTAB_LIB_WIDEN);
4735               if (tem != quotient)
4736                 emit_move_insn (quotient, tem);
4737               expand_inc (quotient, const1_rtx);
4738               emit_label (label5);
4739             }
4740           }
4741         break;
4742
4743       case EXACT_DIV_EXPR:
4744         if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4745           {
4746             HOST_WIDE_INT d = INTVAL (op1);
4747             unsigned HOST_WIDE_INT ml;
4748             int pre_shift;
4749             rtx t1;
4750
4751             pre_shift = floor_log2 (d & -d);
4752             ml = invert_mod2n (d >> pre_shift, size);
4753             t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4754                                pre_shift, NULL_RTX, unsignedp);
4755             quotient = expand_mult (compute_mode, t1,
4756                                     gen_int_mode (ml, compute_mode),
4757                                     NULL_RTX, 1);
4758
4759             insn = get_last_insn ();
4760             set_dst_reg_note (insn, REG_EQUAL,
4761                               gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
4762                                               compute_mode, op0, op1),
4763                               quotient);
4764           }
4765         break;
4766
4767       case ROUND_DIV_EXPR:
4768       case ROUND_MOD_EXPR:
4769         if (unsignedp)
4770           {
4771             rtx tem;
4772             rtx label;
4773             label = gen_label_rtx ();
4774             quotient = gen_reg_rtx (compute_mode);
4775             remainder = gen_reg_rtx (compute_mode);
4776             if (expand_twoval_binop (udivmod_optab, op0, op1, quotient, remainder, 1) == 0)
4777               {
4778                 rtx tem;
4779                 quotient = expand_binop (compute_mode, udiv_optab, op0, op1,
4780                                          quotient, 1, OPTAB_LIB_WIDEN);
4781                 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 1);
4782                 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4783                                           remainder, 1, OPTAB_LIB_WIDEN);
4784               }
4785             tem = plus_constant (compute_mode, op1, -1);
4786             tem = expand_shift (RSHIFT_EXPR, compute_mode, tem, 1, NULL_RTX, 1);
4787             do_cmp_and_jump (remainder, tem, LEU, compute_mode, label);
4788             expand_inc (quotient, const1_rtx);
4789             expand_dec (remainder, op1);
4790             emit_label (label);
4791           }
4792         else
4793           {
4794             rtx abs_rem, abs_op1, tem, mask;
4795             rtx label;
4796             label = gen_label_rtx ();
4797             quotient = gen_reg_rtx (compute_mode);
4798             remainder = gen_reg_rtx (compute_mode);
4799             if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient, remainder, 0) == 0)
4800               {
4801                 rtx tem;
4802                 quotient = expand_binop (compute_mode, sdiv_optab, op0, op1,
4803                                          quotient, 0, OPTAB_LIB_WIDEN);
4804                 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 0);
4805                 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4806                                           remainder, 0, OPTAB_LIB_WIDEN);
4807               }
4808             abs_rem = expand_abs (compute_mode, remainder, NULL_RTX, 1, 0);
4809             abs_op1 = expand_abs (compute_mode, op1, NULL_RTX, 1, 0);
4810             tem = expand_shift (LSHIFT_EXPR, compute_mode, abs_rem,
4811                                 1, NULL_RTX, 1);
4812             do_cmp_and_jump (tem, abs_op1, LTU, compute_mode, label);
4813             tem = expand_binop (compute_mode, xor_optab, op0, op1,
4814                                 NULL_RTX, 0, OPTAB_WIDEN);
4815             mask = expand_shift (RSHIFT_EXPR, compute_mode, tem,
4816                                  size - 1, NULL_RTX, 0);
4817             tem = expand_binop (compute_mode, xor_optab, mask, const1_rtx,
4818                                 NULL_RTX, 0, OPTAB_WIDEN);
4819             tem = expand_binop (compute_mode, sub_optab, tem, mask,
4820                                 NULL_RTX, 0, OPTAB_WIDEN);
4821             expand_inc (quotient, tem);
4822             tem = expand_binop (compute_mode, xor_optab, mask, op1,
4823                                 NULL_RTX, 0, OPTAB_WIDEN);
4824             tem = expand_binop (compute_mode, sub_optab, tem, mask,
4825                                 NULL_RTX, 0, OPTAB_WIDEN);
4826             expand_dec (remainder, tem);
4827             emit_label (label);
4828           }
4829         return gen_lowpart (mode, rem_flag ? remainder : quotient);
4830
4831       default:
4832         gcc_unreachable ();
4833       }
4834
4835   if (quotient == 0)
4836     {
4837       if (target && GET_MODE (target) != compute_mode)
4838         target = 0;
4839
4840       if (rem_flag)
4841         {
4842           /* Try to produce the remainder without producing the quotient.
4843              If we seem to have a divmod pattern that does not require widening,
4844              don't try widening here.  We should really have a WIDEN argument
4845              to expand_twoval_binop, since what we'd really like to do here is
4846              1) try a mod insn in compute_mode
4847              2) try a divmod insn in compute_mode
4848              3) try a div insn in compute_mode and multiply-subtract to get
4849                 remainder
4850              4) try the same things with widening allowed.  */
4851           remainder
4852             = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4853                                  op0, op1, target,
4854                                  unsignedp,
4855                                  ((optab_handler (optab2, compute_mode)
4856                                    != CODE_FOR_nothing)
4857                                   ? OPTAB_DIRECT : OPTAB_WIDEN));
4858           if (remainder == 0)
4859             {
4860               /* No luck there.  Can we do remainder and divide at once
4861                  without a library call?  */
4862               remainder = gen_reg_rtx (compute_mode);
4863               if (! expand_twoval_binop ((unsignedp
4864                                           ? udivmod_optab
4865                                           : sdivmod_optab),
4866                                          op0, op1,
4867                                          NULL_RTX, remainder, unsignedp))
4868                 remainder = 0;
4869             }
4870
4871           if (remainder)
4872             return gen_lowpart (mode, remainder);
4873         }
4874
4875       /* Produce the quotient.  Try a quotient insn, but not a library call.
4876          If we have a divmod in this mode, use it in preference to widening
4877          the div (for this test we assume it will not fail). Note that optab2
4878          is set to the one of the two optabs that the call below will use.  */
4879       quotient
4880         = sign_expand_binop (compute_mode, udiv_optab, sdiv_optab,
4881                              op0, op1, rem_flag ? NULL_RTX : target,
4882                              unsignedp,
4883                              ((optab_handler (optab2, compute_mode)
4884                                != CODE_FOR_nothing)
4885                               ? OPTAB_DIRECT : OPTAB_WIDEN));
4886
4887       if (quotient == 0)
4888         {
4889           /* No luck there.  Try a quotient-and-remainder insn,
4890              keeping the quotient alone.  */
4891           quotient = gen_reg_rtx (compute_mode);
4892           if (! expand_twoval_binop (unsignedp ? udivmod_optab : sdivmod_optab,
4893                                      op0, op1,
4894                                      quotient, NULL_RTX, unsignedp))
4895             {
4896               quotient = 0;
4897               if (! rem_flag)
4898                 /* Still no luck.  If we are not computing the remainder,
4899                    use a library call for the quotient.  */
4900                 quotient = sign_expand_binop (compute_mode,
4901                                               udiv_optab, sdiv_optab,
4902                                               op0, op1, target,
4903                                               unsignedp, OPTAB_LIB_WIDEN);
4904             }
4905         }
4906     }
4907
4908   if (rem_flag)
4909     {
4910       if (target && GET_MODE (target) != compute_mode)
4911         target = 0;
4912
4913       if (quotient == 0)
4914         {
4915           /* No divide instruction either.  Use library for remainder.  */
4916           remainder = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4917                                          op0, op1, target,
4918                                          unsignedp, OPTAB_LIB_WIDEN);
4919           /* No remainder function.  Try a quotient-and-remainder
4920              function, keeping the remainder.  */
4921           if (!remainder)
4922             {
4923               remainder = gen_reg_rtx (compute_mode);
4924               if (!expand_twoval_binop_libfunc
4925                   (unsignedp ? udivmod_optab : sdivmod_optab,
4926                    op0, op1,
4927                    NULL_RTX, remainder,
4928                    unsignedp ? UMOD : MOD))
4929                 remainder = NULL_RTX;
4930             }
4931         }
4932       else
4933         {
4934           /* We divided.  Now finish doing X - Y * (X / Y).  */
4935           remainder = expand_mult (compute_mode, quotient, op1,
4936                                    NULL_RTX, unsignedp);
4937           remainder = expand_binop (compute_mode, sub_optab, op0,
4938                                     remainder, target, unsignedp,
4939                                     OPTAB_LIB_WIDEN);
4940         }
4941     }
4942
4943   return gen_lowpart (mode, rem_flag ? remainder : quotient);
4944 }
4945 \f
4946 /* Return a tree node with data type TYPE, describing the value of X.
4947    Usually this is an VAR_DECL, if there is no obvious better choice.
4948    X may be an expression, however we only support those expressions
4949    generated by loop.c.  */
4950
4951 tree
4952 make_tree (tree type, rtx x)
4953 {
4954   tree t;
4955
4956   switch (GET_CODE (x))
4957     {
4958     case CONST_INT:
4959       {
4960         HOST_WIDE_INT hi = 0;
4961
4962         if (INTVAL (x) < 0
4963             && !(TYPE_UNSIGNED (type)
4964                  && (GET_MODE_BITSIZE (TYPE_MODE (type))
4965                      < HOST_BITS_PER_WIDE_INT)))
4966           hi = -1;
4967
4968         t = build_int_cst_wide (type, INTVAL (x), hi);
4969
4970         return t;
4971       }
4972
4973     case CONST_DOUBLE:
4974       if (GET_MODE (x) == VOIDmode)
4975         t = build_int_cst_wide (type,
4976                                 CONST_DOUBLE_LOW (x), CONST_DOUBLE_HIGH (x));
4977       else
4978         {
4979           REAL_VALUE_TYPE d;
4980
4981           REAL_VALUE_FROM_CONST_DOUBLE (d, x);
4982           t = build_real (type, d);
4983         }
4984
4985       return t;
4986
4987     case CONST_VECTOR:
4988       {
4989         int units = CONST_VECTOR_NUNITS (x);
4990         tree itype = TREE_TYPE (type);
4991         tree *elts;
4992         int i;
4993
4994         /* Build a tree with vector elements.  */
4995         elts = XALLOCAVEC (tree, units);
4996         for (i = units - 1; i >= 0; --i)
4997           {
4998             rtx elt = CONST_VECTOR_ELT (x, i);
4999             elts[i] = make_tree (itype, elt);
5000           }
5001
5002         return build_vector (type, elts);
5003       }
5004
5005     case PLUS:
5006       return fold_build2 (PLUS_EXPR, type, make_tree (type, XEXP (x, 0)),
5007                           make_tree (type, XEXP (x, 1)));
5008
5009     case MINUS:
5010       return fold_build2 (MINUS_EXPR, type, make_tree (type, XEXP (x, 0)),
5011                           make_tree (type, XEXP (x, 1)));
5012
5013     case NEG:
5014       return fold_build1 (NEGATE_EXPR, type, make_tree (type, XEXP (x, 0)));
5015
5016     case MULT:
5017       return fold_build2 (MULT_EXPR, type, make_tree (type, XEXP (x, 0)),
5018                           make_tree (type, XEXP (x, 1)));
5019
5020     case ASHIFT:
5021       return fold_build2 (LSHIFT_EXPR, type, make_tree (type, XEXP (x, 0)),
5022                           make_tree (type, XEXP (x, 1)));
5023
5024     case LSHIFTRT:
5025       t = unsigned_type_for (type);
5026       return fold_convert (type, build2 (RSHIFT_EXPR, t,
5027                                          make_tree (t, XEXP (x, 0)),
5028                                          make_tree (type, XEXP (x, 1))));
5029
5030     case ASHIFTRT:
5031       t = signed_type_for (type);
5032       return fold_convert (type, build2 (RSHIFT_EXPR, t,
5033                                          make_tree (t, XEXP (x, 0)),
5034                                          make_tree (type, XEXP (x, 1))));
5035
5036     case DIV:
5037       if (TREE_CODE (type) != REAL_TYPE)
5038         t = signed_type_for (type);
5039       else
5040         t = type;
5041
5042       return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5043                                          make_tree (t, XEXP (x, 0)),
5044                                          make_tree (t, XEXP (x, 1))));
5045     case UDIV:
5046       t = unsigned_type_for (type);
5047       return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5048                                          make_tree (t, XEXP (x, 0)),
5049                                          make_tree (t, XEXP (x, 1))));
5050
5051     case SIGN_EXTEND:
5052     case ZERO_EXTEND:
5053       t = lang_hooks.types.type_for_mode (GET_MODE (XEXP (x, 0)),
5054                                           GET_CODE (x) == ZERO_EXTEND);
5055       return fold_convert (type, make_tree (t, XEXP (x, 0)));
5056
5057     case CONST:
5058       return make_tree (type, XEXP (x, 0));
5059
5060     case SYMBOL_REF:
5061       t = SYMBOL_REF_DECL (x);
5062       if (t)
5063         return fold_convert (type, build_fold_addr_expr (t));
5064       /* else fall through.  */
5065
5066     default:
5067       t = build_decl (RTL_LOCATION (x), VAR_DECL, NULL_TREE, type);
5068
5069       /* If TYPE is a POINTER_TYPE, we might need to convert X from
5070          address mode to pointer mode.  */
5071       if (POINTER_TYPE_P (type))
5072         x = convert_memory_address_addr_space
5073               (TYPE_MODE (type), x, TYPE_ADDR_SPACE (TREE_TYPE (type)));
5074
5075       /* Note that we do *not* use SET_DECL_RTL here, because we do not
5076          want set_decl_rtl to go adjusting REG_ATTRS for this temporary.  */
5077       t->decl_with_rtl.rtl = x;
5078
5079       return t;
5080     }
5081 }
5082 \f
5083 /* Compute the logical-and of OP0 and OP1, storing it in TARGET
5084    and returning TARGET.
5085
5086    If TARGET is 0, a pseudo-register or constant is returned.  */
5087
5088 rtx
5089 expand_and (enum machine_mode mode, rtx op0, rtx op1, rtx target)
5090 {
5091   rtx tem = 0;
5092
5093   if (GET_MODE (op0) == VOIDmode && GET_MODE (op1) == VOIDmode)
5094     tem = simplify_binary_operation (AND, mode, op0, op1);
5095   if (tem == 0)
5096     tem = expand_binop (mode, and_optab, op0, op1, target, 0, OPTAB_LIB_WIDEN);
5097
5098   if (target == 0)
5099     target = tem;
5100   else if (tem != target)
5101     emit_move_insn (target, tem);
5102   return target;
5103 }
5104
5105 /* Helper function for emit_store_flag.  */
5106 static rtx
5107 emit_cstore (rtx target, enum insn_code icode, enum rtx_code code,
5108              enum machine_mode mode, enum machine_mode compare_mode,
5109              int unsignedp, rtx x, rtx y, int normalizep,
5110              enum machine_mode target_mode)
5111 {
5112   struct expand_operand ops[4];
5113   rtx op0, last, comparison, subtarget;
5114   enum machine_mode result_mode = targetm.cstore_mode (icode);
5115
5116   last = get_last_insn ();
5117   x = prepare_operand (icode, x, 2, mode, compare_mode, unsignedp);
5118   y = prepare_operand (icode, y, 3, mode, compare_mode, unsignedp);
5119   if (!x || !y)
5120     {
5121       delete_insns_since (last);
5122       return NULL_RTX;
5123     }
5124
5125   if (target_mode == VOIDmode)
5126     target_mode = result_mode;
5127   if (!target)
5128     target = gen_reg_rtx (target_mode);
5129
5130   comparison = gen_rtx_fmt_ee (code, result_mode, x, y);
5131
5132   create_output_operand (&ops[0], optimize ? NULL_RTX : target, result_mode);
5133   create_fixed_operand (&ops[1], comparison);
5134   create_fixed_operand (&ops[2], x);
5135   create_fixed_operand (&ops[3], y);
5136   if (!maybe_expand_insn (icode, 4, ops))
5137     {
5138       delete_insns_since (last);
5139       return NULL_RTX;
5140     }
5141   subtarget = ops[0].value;
5142
5143   /* If we are converting to a wider mode, first convert to
5144      TARGET_MODE, then normalize.  This produces better combining
5145      opportunities on machines that have a SIGN_EXTRACT when we are
5146      testing a single bit.  This mostly benefits the 68k.
5147
5148      If STORE_FLAG_VALUE does not have the sign bit set when
5149      interpreted in MODE, we can do this conversion as unsigned, which
5150      is usually more efficient.  */
5151   if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (result_mode))
5152     {
5153       convert_move (target, subtarget,
5154                     val_signbit_known_clear_p (result_mode,
5155                                                STORE_FLAG_VALUE));
5156       op0 = target;
5157       result_mode = target_mode;
5158     }
5159   else
5160     op0 = subtarget;
5161
5162   /* If we want to keep subexpressions around, don't reuse our last
5163      target.  */
5164   if (optimize)
5165     subtarget = 0;
5166
5167   /* Now normalize to the proper value in MODE.  Sometimes we don't
5168      have to do anything.  */
5169   if (normalizep == 0 || normalizep == STORE_FLAG_VALUE)
5170     ;
5171   /* STORE_FLAG_VALUE might be the most negative number, so write
5172      the comparison this way to avoid a compiler-time warning.  */
5173   else if (- normalizep == STORE_FLAG_VALUE)
5174     op0 = expand_unop (result_mode, neg_optab, op0, subtarget, 0);
5175
5176   /* We don't want to use STORE_FLAG_VALUE < 0 below since this makes
5177      it hard to use a value of just the sign bit due to ANSI integer
5178      constant typing rules.  */
5179   else if (val_signbit_known_set_p (result_mode, STORE_FLAG_VALUE))
5180     op0 = expand_shift (RSHIFT_EXPR, result_mode, op0,
5181                         GET_MODE_BITSIZE (result_mode) - 1, subtarget,
5182                         normalizep == 1);
5183   else
5184     {
5185       gcc_assert (STORE_FLAG_VALUE & 1);
5186
5187       op0 = expand_and (result_mode, op0, const1_rtx, subtarget);
5188       if (normalizep == -1)
5189         op0 = expand_unop (result_mode, neg_optab, op0, op0, 0);
5190     }
5191
5192   /* If we were converting to a smaller mode, do the conversion now.  */
5193   if (target_mode != result_mode)
5194     {
5195       convert_move (target, op0, 0);
5196       return target;
5197     }
5198   else
5199     return op0;
5200 }
5201
5202
5203 /* A subroutine of emit_store_flag only including "tricks" that do not
5204    need a recursive call.  These are kept separate to avoid infinite
5205    loops.  */
5206
5207 static rtx
5208 emit_store_flag_1 (rtx target, enum rtx_code code, rtx op0, rtx op1,
5209                    enum machine_mode mode, int unsignedp, int normalizep,
5210                    enum machine_mode target_mode)
5211 {
5212   rtx subtarget;
5213   enum insn_code icode;
5214   enum machine_mode compare_mode;
5215   enum mode_class mclass;
5216   enum rtx_code scode;
5217   rtx tem;
5218
5219   if (unsignedp)
5220     code = unsigned_condition (code);
5221   scode = swap_condition (code);
5222
5223   /* If one operand is constant, make it the second one.  Only do this
5224      if the other operand is not constant as well.  */
5225
5226   if (swap_commutative_operands_p (op0, op1))
5227     {
5228       tem = op0;
5229       op0 = op1;
5230       op1 = tem;
5231       code = swap_condition (code);
5232     }
5233
5234   if (mode == VOIDmode)
5235     mode = GET_MODE (op0);
5236
5237   /* For some comparisons with 1 and -1, we can convert this to
5238      comparisons with zero.  This will often produce more opportunities for
5239      store-flag insns.  */
5240
5241   switch (code)
5242     {
5243     case LT:
5244       if (op1 == const1_rtx)
5245         op1 = const0_rtx, code = LE;
5246       break;
5247     case LE:
5248       if (op1 == constm1_rtx)
5249         op1 = const0_rtx, code = LT;
5250       break;
5251     case GE:
5252       if (op1 == const1_rtx)
5253         op1 = const0_rtx, code = GT;
5254       break;
5255     case GT:
5256       if (op1 == constm1_rtx)
5257         op1 = const0_rtx, code = GE;
5258       break;
5259     case GEU:
5260       if (op1 == const1_rtx)
5261         op1 = const0_rtx, code = NE;
5262       break;
5263     case LTU:
5264       if (op1 == const1_rtx)
5265         op1 = const0_rtx, code = EQ;
5266       break;
5267     default:
5268       break;
5269     }
5270
5271   /* If we are comparing a double-word integer with zero or -1, we can
5272      convert the comparison into one involving a single word.  */
5273   if (GET_MODE_BITSIZE (mode) == BITS_PER_WORD * 2
5274       && GET_MODE_CLASS (mode) == MODE_INT
5275       && (!MEM_P (op0) || ! MEM_VOLATILE_P (op0)))
5276     {
5277       if ((code == EQ || code == NE)
5278           && (op1 == const0_rtx || op1 == constm1_rtx))
5279         {
5280           rtx op00, op01;
5281
5282           /* Do a logical OR or AND of the two words and compare the
5283              result.  */
5284           op00 = simplify_gen_subreg (word_mode, op0, mode, 0);
5285           op01 = simplify_gen_subreg (word_mode, op0, mode, UNITS_PER_WORD);
5286           tem = expand_binop (word_mode,
5287                               op1 == const0_rtx ? ior_optab : and_optab,
5288                               op00, op01, NULL_RTX, unsignedp,
5289                               OPTAB_DIRECT);
5290
5291           if (tem != 0)
5292             tem = emit_store_flag (NULL_RTX, code, tem, op1, word_mode,
5293                                    unsignedp, normalizep);
5294         }
5295       else if ((code == LT || code == GE) && op1 == const0_rtx)
5296         {
5297           rtx op0h;
5298
5299           /* If testing the sign bit, can just test on high word.  */
5300           op0h = simplify_gen_subreg (word_mode, op0, mode,
5301                                       subreg_highpart_offset (word_mode,
5302                                                               mode));
5303           tem = emit_store_flag (NULL_RTX, code, op0h, op1, word_mode,
5304                                  unsignedp, normalizep);
5305         }
5306       else
5307         tem = NULL_RTX;
5308
5309       if (tem)
5310         {
5311           if (target_mode == VOIDmode || GET_MODE (tem) == target_mode)
5312             return tem;
5313           if (!target)
5314             target = gen_reg_rtx (target_mode);
5315
5316           convert_move (target, tem,
5317                         !val_signbit_known_set_p (word_mode,
5318                                                   (normalizep ? normalizep
5319                                                    : STORE_FLAG_VALUE)));
5320           return target;
5321         }
5322     }
5323
5324   /* If this is A < 0 or A >= 0, we can do this by taking the ones
5325      complement of A (for GE) and shifting the sign bit to the low bit.  */
5326   if (op1 == const0_rtx && (code == LT || code == GE)
5327       && GET_MODE_CLASS (mode) == MODE_INT
5328       && (normalizep || STORE_FLAG_VALUE == 1
5329           || val_signbit_p (mode, STORE_FLAG_VALUE)))
5330     {
5331       subtarget = target;
5332
5333       if (!target)
5334         target_mode = mode;
5335
5336       /* If the result is to be wider than OP0, it is best to convert it
5337          first.  If it is to be narrower, it is *incorrect* to convert it
5338          first.  */
5339       else if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (mode))
5340         {
5341           op0 = convert_modes (target_mode, mode, op0, 0);
5342           mode = target_mode;
5343         }
5344
5345       if (target_mode != mode)
5346         subtarget = 0;
5347
5348       if (code == GE)
5349         op0 = expand_unop (mode, one_cmpl_optab, op0,
5350                            ((STORE_FLAG_VALUE == 1 || normalizep)
5351                             ? 0 : subtarget), 0);
5352
5353       if (STORE_FLAG_VALUE == 1 || normalizep)
5354         /* If we are supposed to produce a 0/1 value, we want to do
5355            a logical shift from the sign bit to the low-order bit; for
5356            a -1/0 value, we do an arithmetic shift.  */
5357         op0 = expand_shift (RSHIFT_EXPR, mode, op0,
5358                             GET_MODE_BITSIZE (mode) - 1,
5359                             subtarget, normalizep != -1);
5360
5361       if (mode != target_mode)
5362         op0 = convert_modes (target_mode, mode, op0, 0);
5363
5364       return op0;
5365     }
5366
5367   mclass = GET_MODE_CLASS (mode);
5368   for (compare_mode = mode; compare_mode != VOIDmode;
5369        compare_mode = GET_MODE_WIDER_MODE (compare_mode))
5370     {
5371      enum machine_mode optab_mode = mclass == MODE_CC ? CCmode : compare_mode;
5372      icode = optab_handler (cstore_optab, optab_mode);
5373      if (icode != CODE_FOR_nothing)
5374         {
5375           do_pending_stack_adjust ();
5376           tem = emit_cstore (target, icode, code, mode, compare_mode,
5377                              unsignedp, op0, op1, normalizep, target_mode);
5378           if (tem)
5379             return tem;
5380
5381           if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5382             {
5383               tem = emit_cstore (target, icode, scode, mode, compare_mode,
5384                                  unsignedp, op1, op0, normalizep, target_mode);
5385               if (tem)
5386                 return tem;
5387             }
5388           break;
5389         }
5390     }
5391
5392   return 0;
5393 }
5394
5395 /* Emit a store-flags instruction for comparison CODE on OP0 and OP1
5396    and storing in TARGET.  Normally return TARGET.
5397    Return 0 if that cannot be done.
5398
5399    MODE is the mode to use for OP0 and OP1 should they be CONST_INTs.  If
5400    it is VOIDmode, they cannot both be CONST_INT.
5401
5402    UNSIGNEDP is for the case where we have to widen the operands
5403    to perform the operation.  It says to use zero-extension.
5404
5405    NORMALIZEP is 1 if we should convert the result to be either zero
5406    or one.  Normalize is -1 if we should convert the result to be
5407    either zero or -1.  If NORMALIZEP is zero, the result will be left
5408    "raw" out of the scc insn.  */
5409
5410 rtx
5411 emit_store_flag (rtx target, enum rtx_code code, rtx op0, rtx op1,
5412                  enum machine_mode mode, int unsignedp, int normalizep)
5413 {
5414   enum machine_mode target_mode = target ? GET_MODE (target) : VOIDmode;
5415   enum rtx_code rcode;
5416   rtx subtarget;
5417   rtx tem, last, trueval;
5418
5419   /* If we compare constants, we shouldn't use a store-flag operation,
5420      but a constant load.  We can get there via the vanilla route that
5421      usually generates a compare-branch sequence, but will in this case
5422      fold the comparison to a constant, and thus elide the branch.  */
5423   if (CONSTANT_P (op0) && CONSTANT_P (op1))
5424     return NULL_RTX;
5425
5426   tem = emit_store_flag_1 (target, code, op0, op1, mode, unsignedp, normalizep,
5427                            target_mode);
5428   if (tem)
5429     return tem;
5430
5431   /* If we reached here, we can't do this with a scc insn, however there
5432      are some comparisons that can be done in other ways.  Don't do any
5433      of these cases if branches are very cheap.  */
5434   if (BRANCH_COST (optimize_insn_for_speed_p (), false) == 0)
5435     return 0;
5436
5437   /* See what we need to return.  We can only return a 1, -1, or the
5438      sign bit.  */
5439
5440   if (normalizep == 0)
5441     {
5442       if (STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1)
5443         normalizep = STORE_FLAG_VALUE;
5444
5445       else if (val_signbit_p (mode, STORE_FLAG_VALUE))
5446         ;
5447       else
5448         return 0;
5449     }
5450
5451   last = get_last_insn ();
5452
5453   /* If optimizing, use different pseudo registers for each insn, instead
5454      of reusing the same pseudo.  This leads to better CSE, but slows
5455      down the compiler, since there are more pseudos */
5456   subtarget = (!optimize
5457                && (target_mode == mode)) ? target : NULL_RTX;
5458   trueval = GEN_INT (normalizep ? normalizep : STORE_FLAG_VALUE);
5459
5460   /* For floating-point comparisons, try the reverse comparison or try
5461      changing the "orderedness" of the comparison.  */
5462   if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5463     {
5464       enum rtx_code first_code;
5465       bool and_them;
5466
5467       rcode = reverse_condition_maybe_unordered (code);
5468       if (can_compare_p (rcode, mode, ccp_store_flag)
5469           && (code == ORDERED || code == UNORDERED
5470               || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
5471               || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
5472         {
5473           int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5474                           || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5475
5476           /* For the reverse comparison, use either an addition or a XOR.  */
5477           if (want_add
5478               && rtx_cost (GEN_INT (normalizep), PLUS, 1,
5479                            optimize_insn_for_speed_p ()) == 0)
5480             {
5481               tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5482                                        STORE_FLAG_VALUE, target_mode);
5483               if (tem)
5484                 return expand_binop (target_mode, add_optab, tem,
5485                                      gen_int_mode (normalizep, target_mode),
5486                                      target, 0, OPTAB_WIDEN);
5487             }
5488           else if (!want_add
5489                    && rtx_cost (trueval, XOR, 1,
5490                                 optimize_insn_for_speed_p ()) == 0)
5491             {
5492               tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5493                                        normalizep, target_mode);
5494               if (tem)
5495                 return expand_binop (target_mode, xor_optab, tem, trueval,
5496                                      target, INTVAL (trueval) >= 0, OPTAB_WIDEN);
5497             }
5498         }
5499
5500       delete_insns_since (last);
5501
5502       /* Cannot split ORDERED and UNORDERED, only try the above trick.   */
5503       if (code == ORDERED || code == UNORDERED)
5504         return 0;
5505
5506       and_them = split_comparison (code, mode, &first_code, &code);
5507
5508       /* If there are no NaNs, the first comparison should always fall through.
5509          Effectively change the comparison to the other one.  */
5510       if (!HONOR_NANS (mode))
5511         {
5512           gcc_assert (first_code == (and_them ? ORDERED : UNORDERED));
5513           return emit_store_flag_1 (target, code, op0, op1, mode, 0, normalizep,
5514                                     target_mode);
5515         }
5516
5517 #ifdef HAVE_conditional_move
5518       /* Try using a setcc instruction for ORDERED/UNORDERED, followed by a
5519          conditional move.  */
5520       tem = emit_store_flag_1 (subtarget, first_code, op0, op1, mode, 0,
5521                                normalizep, target_mode);
5522       if (tem == 0)
5523         return 0;
5524
5525       if (and_them)
5526         tem = emit_conditional_move (target, code, op0, op1, mode,
5527                                      tem, const0_rtx, GET_MODE (tem), 0);
5528       else
5529         tem = emit_conditional_move (target, code, op0, op1, mode,
5530                                      trueval, tem, GET_MODE (tem), 0);
5531
5532       if (tem == 0)
5533         delete_insns_since (last);
5534       return tem;
5535 #else
5536       return 0;
5537 #endif
5538     }
5539
5540   /* The remaining tricks only apply to integer comparisons.  */
5541
5542   if (GET_MODE_CLASS (mode) != MODE_INT)
5543     return 0;
5544
5545   /* If this is an equality comparison of integers, we can try to exclusive-or
5546      (or subtract) the two operands and use a recursive call to try the
5547      comparison with zero.  Don't do any of these cases if branches are
5548      very cheap.  */
5549
5550   if ((code == EQ || code == NE) && op1 != const0_rtx)
5551     {
5552       tem = expand_binop (mode, xor_optab, op0, op1, subtarget, 1,
5553                           OPTAB_WIDEN);
5554
5555       if (tem == 0)
5556         tem = expand_binop (mode, sub_optab, op0, op1, subtarget, 1,
5557                             OPTAB_WIDEN);
5558       if (tem != 0)
5559         tem = emit_store_flag (target, code, tem, const0_rtx,
5560                                mode, unsignedp, normalizep);
5561       if (tem != 0)
5562         return tem;
5563
5564       delete_insns_since (last);
5565     }
5566
5567   /* For integer comparisons, try the reverse comparison.  However, for
5568      small X and if we'd have anyway to extend, implementing "X != 0"
5569      as "-(int)X >> 31" is still cheaper than inverting "(int)X == 0".  */
5570   rcode = reverse_condition (code);
5571   if (can_compare_p (rcode, mode, ccp_store_flag)
5572       && ! (optab_handler (cstore_optab, mode) == CODE_FOR_nothing
5573             && code == NE
5574             && GET_MODE_SIZE (mode) < UNITS_PER_WORD
5575             && op1 == const0_rtx))
5576     {
5577       int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5578                       || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5579
5580       /* Again, for the reverse comparison, use either an addition or a XOR.  */
5581       if (want_add
5582           && rtx_cost (GEN_INT (normalizep), PLUS, 1,
5583                        optimize_insn_for_speed_p ()) == 0)
5584         {
5585           tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5586                                    STORE_FLAG_VALUE, target_mode);
5587           if (tem != 0)
5588             tem = expand_binop (target_mode, add_optab, tem,
5589                                 gen_int_mode (normalizep, target_mode),
5590                                 target, 0, OPTAB_WIDEN);
5591         }
5592       else if (!want_add
5593                && rtx_cost (trueval, XOR, 1,
5594                             optimize_insn_for_speed_p ()) == 0)
5595         {
5596           tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5597                                    normalizep, target_mode);
5598           if (tem != 0)
5599             tem = expand_binop (target_mode, xor_optab, tem, trueval, target,
5600                                 INTVAL (trueval) >= 0, OPTAB_WIDEN);
5601         }
5602
5603       if (tem != 0)
5604         return tem;
5605       delete_insns_since (last);
5606     }
5607
5608   /* Some other cases we can do are EQ, NE, LE, and GT comparisons with
5609      the constant zero.  Reject all other comparisons at this point.  Only
5610      do LE and GT if branches are expensive since they are expensive on
5611      2-operand machines.  */
5612
5613   if (op1 != const0_rtx
5614       || (code != EQ && code != NE
5615           && (BRANCH_COST (optimize_insn_for_speed_p (),
5616                            false) <= 1 || (code != LE && code != GT))))
5617     return 0;
5618
5619   /* Try to put the result of the comparison in the sign bit.  Assume we can't
5620      do the necessary operation below.  */
5621
5622   tem = 0;
5623
5624   /* To see if A <= 0, compute (A | (A - 1)).  A <= 0 iff that result has
5625      the sign bit set.  */
5626
5627   if (code == LE)
5628     {
5629       /* This is destructive, so SUBTARGET can't be OP0.  */
5630       if (rtx_equal_p (subtarget, op0))
5631         subtarget = 0;
5632
5633       tem = expand_binop (mode, sub_optab, op0, const1_rtx, subtarget, 0,
5634                           OPTAB_WIDEN);
5635       if (tem)
5636         tem = expand_binop (mode, ior_optab, op0, tem, subtarget, 0,
5637                             OPTAB_WIDEN);
5638     }
5639
5640   /* To see if A > 0, compute (((signed) A) << BITS) - A, where BITS is the
5641      number of bits in the mode of OP0, minus one.  */
5642
5643   if (code == GT)
5644     {
5645       if (rtx_equal_p (subtarget, op0))
5646         subtarget = 0;
5647
5648       tem = expand_shift (RSHIFT_EXPR, mode, op0,
5649                           GET_MODE_BITSIZE (mode) - 1,
5650                           subtarget, 0);
5651       tem = expand_binop (mode, sub_optab, tem, op0, subtarget, 0,
5652                           OPTAB_WIDEN);
5653     }
5654
5655   if (code == EQ || code == NE)
5656     {
5657       /* For EQ or NE, one way to do the comparison is to apply an operation
5658          that converts the operand into a positive number if it is nonzero
5659          or zero if it was originally zero.  Then, for EQ, we subtract 1 and
5660          for NE we negate.  This puts the result in the sign bit.  Then we
5661          normalize with a shift, if needed.
5662
5663          Two operations that can do the above actions are ABS and FFS, so try
5664          them.  If that doesn't work, and MODE is smaller than a full word,
5665          we can use zero-extension to the wider mode (an unsigned conversion)
5666          as the operation.  */
5667
5668       /* Note that ABS doesn't yield a positive number for INT_MIN, but
5669          that is compensated by the subsequent overflow when subtracting
5670          one / negating.  */
5671
5672       if (optab_handler (abs_optab, mode) != CODE_FOR_nothing)
5673         tem = expand_unop (mode, abs_optab, op0, subtarget, 1);
5674       else if (optab_handler (ffs_optab, mode) != CODE_FOR_nothing)
5675         tem = expand_unop (mode, ffs_optab, op0, subtarget, 1);
5676       else if (GET_MODE_SIZE (mode) < UNITS_PER_WORD)
5677         {
5678           tem = convert_modes (word_mode, mode, op0, 1);
5679           mode = word_mode;
5680         }
5681
5682       if (tem != 0)
5683         {
5684           if (code == EQ)
5685             tem = expand_binop (mode, sub_optab, tem, const1_rtx, subtarget,
5686                                 0, OPTAB_WIDEN);
5687           else
5688             tem = expand_unop (mode, neg_optab, tem, subtarget, 0);
5689         }
5690
5691       /* If we couldn't do it that way, for NE we can "or" the two's complement
5692          of the value with itself.  For EQ, we take the one's complement of
5693          that "or", which is an extra insn, so we only handle EQ if branches
5694          are expensive.  */
5695
5696       if (tem == 0
5697           && (code == NE
5698               || BRANCH_COST (optimize_insn_for_speed_p (),
5699                               false) > 1))
5700         {
5701           if (rtx_equal_p (subtarget, op0))
5702             subtarget = 0;
5703
5704           tem = expand_unop (mode, neg_optab, op0, subtarget, 0);
5705           tem = expand_binop (mode, ior_optab, tem, op0, subtarget, 0,
5706                               OPTAB_WIDEN);
5707
5708           if (tem && code == EQ)
5709             tem = expand_unop (mode, one_cmpl_optab, tem, subtarget, 0);
5710         }
5711     }
5712
5713   if (tem && normalizep)
5714     tem = expand_shift (RSHIFT_EXPR, mode, tem,
5715                         GET_MODE_BITSIZE (mode) - 1,
5716                         subtarget, normalizep == 1);
5717
5718   if (tem)
5719     {
5720       if (!target)
5721         ;
5722       else if (GET_MODE (tem) != target_mode)
5723         {
5724           convert_move (target, tem, 0);
5725           tem = target;
5726         }
5727       else if (!subtarget)
5728         {
5729           emit_move_insn (target, tem);
5730           tem = target;
5731         }
5732     }
5733   else
5734     delete_insns_since (last);
5735
5736   return tem;
5737 }
5738
5739 /* Like emit_store_flag, but always succeeds.  */
5740
5741 rtx
5742 emit_store_flag_force (rtx target, enum rtx_code code, rtx op0, rtx op1,
5743                        enum machine_mode mode, int unsignedp, int normalizep)
5744 {
5745   rtx tem, label;
5746   rtx trueval, falseval;
5747
5748   /* First see if emit_store_flag can do the job.  */
5749   tem = emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep);
5750   if (tem != 0)
5751     return tem;
5752
5753   if (!target)
5754     target = gen_reg_rtx (word_mode);
5755
5756   /* If this failed, we have to do this with set/compare/jump/set code.
5757      For foo != 0, if foo is in OP0, just replace it with 1 if nonzero.  */
5758   trueval = normalizep ? GEN_INT (normalizep) : const1_rtx;
5759   if (code == NE
5760       && GET_MODE_CLASS (mode) == MODE_INT
5761       && REG_P (target)
5762       && op0 == target
5763       && op1 == const0_rtx)
5764     {
5765       label = gen_label_rtx ();
5766       do_compare_rtx_and_jump (target, const0_rtx, EQ, unsignedp,
5767                                mode, NULL_RTX, NULL_RTX, label, -1);
5768       emit_move_insn (target, trueval);
5769       emit_label (label);
5770       return target;
5771     }
5772
5773   if (!REG_P (target)
5774       || reg_mentioned_p (target, op0) || reg_mentioned_p (target, op1))
5775     target = gen_reg_rtx (GET_MODE (target));
5776
5777   /* Jump in the right direction if the target cannot implement CODE
5778      but can jump on its reverse condition.  */
5779   falseval = const0_rtx;
5780   if (! can_compare_p (code, mode, ccp_jump)
5781       && (! FLOAT_MODE_P (mode)
5782           || code == ORDERED || code == UNORDERED
5783           || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
5784           || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
5785     {
5786       enum rtx_code rcode;
5787       if (FLOAT_MODE_P (mode))
5788         rcode = reverse_condition_maybe_unordered (code);
5789       else
5790         rcode = reverse_condition (code);
5791
5792       /* Canonicalize to UNORDERED for the libcall.  */
5793       if (can_compare_p (rcode, mode, ccp_jump)
5794           || (code == ORDERED && ! can_compare_p (ORDERED, mode, ccp_jump)))
5795         {
5796           falseval = trueval;
5797           trueval = const0_rtx;
5798           code = rcode;
5799         }
5800     }
5801
5802   emit_move_insn (target, trueval);
5803   label = gen_label_rtx ();
5804   do_compare_rtx_and_jump (op0, op1, code, unsignedp, mode, NULL_RTX,
5805                            NULL_RTX, label, -1);
5806
5807   emit_move_insn (target, falseval);
5808   emit_label (label);
5809
5810   return target;
5811 }
5812 \f
5813 /* Perform possibly multi-word comparison and conditional jump to LABEL
5814    if ARG1 OP ARG2 true where ARG1 and ARG2 are of mode MODE.  This is
5815    now a thin wrapper around do_compare_rtx_and_jump.  */
5816
5817 static void
5818 do_cmp_and_jump (rtx arg1, rtx arg2, enum rtx_code op, enum machine_mode mode,
5819                  rtx label)
5820 {
5821   int unsignedp = (op == LTU || op == LEU || op == GTU || op == GEU);
5822   do_compare_rtx_and_jump (arg1, arg2, op, unsignedp, mode,
5823                            NULL_RTX, NULL_RTX, label, -1);
5824 }