gcc/expmed.c

   1 /* Medium-level subroutines: convert bit-field store and extract
   2    and shifts, multiplies and divides to rtl instructions.
   3    Copyright (C) 1987, 1988, 1989, 1992, 1993, 1994, 1995, 1996, 1997, 1998,
   4    1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010,
   5    2011, 2012
   6    Free Software Foundation, Inc.
   7
   8 This file is part of GCC.
   9
  10 GCC is free software; you can redistribute it and/or modify it under
  11 the terms of the GNU General Public License as published by the Free
  12 Software Foundation; either version 3, or (at your option) any later
  13 version.
  14
  15 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  16 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  17 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  18 for more details.
  19
  20 You should have received a copy of the GNU General Public License
  21 along with GCC; see the file COPYING3.  If not see
  22 <http://www.gnu.org/licenses/>.  */
  23
  24
  25 #include "config.h"
  26 #include "system.h"
  27 #include "coretypes.h"
  28 #include "tm.h"
  29 #include "diagnostic-core.h"
  30 #include "rtl.h"
  31 #include "tree.h"
  32 #include "tm_p.h"
  33 #include "flags.h"
  34 #include "insn-config.h"
  35 #include "expr.h"
  36 #include "optabs.h"
  37 #include "recog.h"
  38 #include "langhooks.h"
  39 #include "df.h"
  40 #include "target.h"
  41 #include "expmed.h"
  42
  43 struct target_expmed default_target_expmed;
  44 #if SWITCHABLE_TARGET
  45 struct target_expmed *this_target_expmed = &default_target_expmed;
  46 #endif
  47
  48 static void store_fixed_bit_field (rtx, unsigned HOST_WIDE_INT,
  49                                    unsigned HOST_WIDE_INT,
  50                                    unsigned HOST_WIDE_INT,
  51                                    unsigned HOST_WIDE_INT,
  52                                    rtx);
  53 static void store_split_bit_field (rtx, unsigned HOST_WIDE_INT,
  54                                    unsigned HOST_WIDE_INT,
  55                                    unsigned HOST_WIDE_INT,
  56                                    unsigned HOST_WIDE_INT,
  57                                    rtx);
  58 static rtx extract_fixed_bit_field (enum machine_mode, rtx,
  59                                     unsigned HOST_WIDE_INT,
  60                                     unsigned HOST_WIDE_INT, rtx, int, bool);
  61 static rtx mask_rtx (enum machine_mode, int, int, int);
  62 static rtx lshift_value (enum machine_mode, rtx, int, int);
  63 static rtx extract_split_bit_field (rtx, unsigned HOST_WIDE_INT,
  64                                     unsigned HOST_WIDE_INT, int);
  65 static void do_cmp_and_jump (rtx, rtx, enum rtx_code, enum machine_mode, rtx);
  66 static rtx expand_smod_pow2 (enum machine_mode, rtx, HOST_WIDE_INT);
  67 static rtx expand_sdiv_pow2 (enum machine_mode, rtx, HOST_WIDE_INT);
  68
  69 /* Test whether a value is zero of a power of two.  */
  70 #define EXACT_POWER_OF_2_OR_ZERO_P(x) (((x) & ((x) - 1)) == 0)
  71
  72 struct init_expmed_rtl
  73 {
  74   struct rtx_def reg;           rtunion reg_fld[2];
  75   struct rtx_def plus;  rtunion plus_fld1;
  76   struct rtx_def neg;
  77   struct rtx_def mult;  rtunion mult_fld1;
  78   struct rtx_def sdiv;  rtunion sdiv_fld1;
  79   struct rtx_def udiv;  rtunion udiv_fld1;
  80   struct rtx_def sdiv_32;       rtunion sdiv_32_fld1;
  81   struct rtx_def smod_32;       rtunion smod_32_fld1;
  82   struct rtx_def wide_mult;     rtunion wide_mult_fld1;
  83   struct rtx_def wide_lshr;     rtunion wide_lshr_fld1;
  84   struct rtx_def wide_trunc;
  85   struct rtx_def shift; rtunion shift_fld1;
  86   struct rtx_def shift_mult;    rtunion shift_mult_fld1;
  87   struct rtx_def shift_add;     rtunion shift_add_fld1;
  88   struct rtx_def shift_sub0;    rtunion shift_sub0_fld1;
  89   struct rtx_def shift_sub1;    rtunion shift_sub1_fld1;
  90   struct rtx_def zext;
  91   struct rtx_def trunc;
  92
  93   rtx pow2[MAX_BITS_PER_WORD];
  94   rtx cint[MAX_BITS_PER_WORD];
  95 };
  96
  97 static void
  98 init_expmed_one_conv (struct init_expmed_rtl *all, enum machine_mode to_mode,
  99                       enum machine_mode from_mode, bool speed)
 100 {
 101   int to_size, from_size;
 102   rtx which;
 103
 104   /* We're given no information about the true size of a partial integer,
 105      only the size of the "full" integer it requires for storage.  For
 106      comparison purposes here, reduce the bit size by one in that case.  */
 107   to_size = (GET_MODE_BITSIZE (to_mode)
 108              - (GET_MODE_CLASS (to_mode) == MODE_PARTIAL_INT));
 109   from_size = (GET_MODE_BITSIZE (from_mode)
 110                - (GET_MODE_CLASS (from_mode) == MODE_PARTIAL_INT));
 111
 112   /* Assume cost of zero-extend and sign-extend is the same.  */
 113   which = (to_size < from_size ? &all->trunc : &all->zext);
 114
 115   PUT_MODE (&all->reg, from_mode);
 116   set_convert_cost (to_mode, from_mode, speed, set_src_cost (which, speed));
 117 }
 118
 119 static void
 120 init_expmed_one_mode (struct init_expmed_rtl *all,
 121                       enum machine_mode mode, int speed)
 122 {
 123   int m, n, mode_bitsize;
 124   enum machine_mode mode_from;
 125
 126   mode_bitsize = GET_MODE_UNIT_BITSIZE (mode);
 127
 128   PUT_MODE (&all->reg, mode);
 129   PUT_MODE (&all->plus, mode);
 130   PUT_MODE (&all->neg, mode);
 131   PUT_MODE (&all->mult, mode);
 132   PUT_MODE (&all->sdiv, mode);
 133   PUT_MODE (&all->udiv, mode);
 134   PUT_MODE (&all->sdiv_32, mode);
 135   PUT_MODE (&all->smod_32, mode);
 136   PUT_MODE (&all->wide_trunc, mode);
 137   PUT_MODE (&all->shift, mode);
 138   PUT_MODE (&all->shift_mult, mode);
 139   PUT_MODE (&all->shift_add, mode);
 140   PUT_MODE (&all->shift_sub0, mode);
 141   PUT_MODE (&all->shift_sub1, mode);
 142   PUT_MODE (&all->zext, mode);
 143   PUT_MODE (&all->trunc, mode);
 144
 145   set_add_cost (speed, mode, set_src_cost (&all->plus, speed));
 146   set_neg_cost (speed, mode, set_src_cost (&all->neg, speed));
 147   set_mul_cost (speed, mode, set_src_cost (&all->mult, speed));
 148   set_sdiv_cost (speed, mode, set_src_cost (&all->sdiv, speed));
 149   set_udiv_cost (speed, mode, set_src_cost (&all->udiv, speed));
 150
 151   set_sdiv_pow2_cheap (speed, mode, (set_src_cost (&all->sdiv_32, speed)
 152                                      <= 2 * add_cost (speed, mode)));
 153   set_smod_pow2_cheap (speed, mode, (set_src_cost (&all->smod_32, speed)
 154                                      <= 4 * add_cost (speed, mode)));
 155
 156   set_shift_cost (speed, mode, 0, 0);
 157   {
 158     int cost = add_cost (speed, mode);
 159     set_shiftadd_cost (speed, mode, 0, cost);
 160     set_shiftsub0_cost (speed, mode, 0, cost);
 161     set_shiftsub1_cost (speed, mode, 0, cost);
 162   }
 163
 164   n = MIN (MAX_BITS_PER_WORD, mode_bitsize);
 165   for (m = 1; m < n; m++)
 166     {
 167       XEXP (&all->shift, 1) = all->cint[m];
 168       XEXP (&all->shift_mult, 1) = all->pow2[m];
 169
 170       set_shift_cost (speed, mode, m, set_src_cost (&all->shift, speed));
 171       set_shiftadd_cost (speed, mode, m, set_src_cost (&all->shift_add, speed));
 172       set_shiftsub0_cost (speed, mode, m, set_src_cost (&all->shift_sub0, speed));
 173       set_shiftsub1_cost (speed, mode, m, set_src_cost (&all->shift_sub1, speed));
 174     }
 175
 176   if (SCALAR_INT_MODE_P (mode))
 177     {
 178       for (mode_from = MIN_MODE_INT; mode_from <= MAX_MODE_INT;
 179            mode_from = (enum machine_mode)(mode_from + 1))
 180         init_expmed_one_conv (all, mode, mode_from, speed);
 181     }
 182   if (GET_MODE_CLASS (mode) == MODE_INT)
 183     {
 184       enum machine_mode  wider_mode = GET_MODE_WIDER_MODE (mode);
 185       if (wider_mode != VOIDmode)
 186         {
 187           PUT_MODE (&all->zext, wider_mode);
 188           PUT_MODE (&all->wide_mult, wider_mode);
 189           PUT_MODE (&all->wide_lshr, wider_mode);
 190           XEXP (&all->wide_lshr, 1) = GEN_INT (mode_bitsize);
 191
 192           set_mul_widen_cost (speed, wider_mode,
 193                               set_src_cost (&all->wide_mult, speed));
 194           set_mul_highpart_cost (speed, mode,
 195                                  set_src_cost (&all->wide_trunc, speed));
 196         }
 197     }
 198 }
 199
 200 void
 201 init_expmed (void)
 202 {
 203   struct init_expmed_rtl all;
 204   enum machine_mode mode;
 205   int m, speed;
 206
 207   memset (&all, 0, sizeof all);
 208   for (m = 1; m < MAX_BITS_PER_WORD; m++)
 209     {
 210       all.pow2[m] = GEN_INT ((HOST_WIDE_INT) 1 << m);
 211       all.cint[m] = GEN_INT (m);
 212     }
 213
 214   PUT_CODE (&all.reg, REG);
 215   /* Avoid using hard regs in ways which may be unsupported.  */
 216   SET_REGNO (&all.reg, LAST_VIRTUAL_REGISTER + 1);
 217
 218   PUT_CODE (&all.plus, PLUS);
 219   XEXP (&all.plus, 0) = &all.reg;
 220   XEXP (&all.plus, 1) = &all.reg;
 221
 222   PUT_CODE (&all.neg, NEG);
 223   XEXP (&all.neg, 0) = &all.reg;
 224
 225   PUT_CODE (&all.mult, MULT);
 226   XEXP (&all.mult, 0) = &all.reg;
 227   XEXP (&all.mult, 1) = &all.reg;
 228
 229   PUT_CODE (&all.sdiv, DIV);
 230   XEXP (&all.sdiv, 0) = &all.reg;
 231   XEXP (&all.sdiv, 1) = &all.reg;
 232
 233   PUT_CODE (&all.udiv, UDIV);
 234   XEXP (&all.udiv, 0) = &all.reg;
 235   XEXP (&all.udiv, 1) = &all.reg;
 236
 237   PUT_CODE (&all.sdiv_32, DIV);
 238   XEXP (&all.sdiv_32, 0) = &all.reg;
 239   XEXP (&all.sdiv_32, 1) = 32 < MAX_BITS_PER_WORD ? all.cint[32] : GEN_INT (32);
 240
 241   PUT_CODE (&all.smod_32, MOD);
 242   XEXP (&all.smod_32, 0) = &all.reg;
 243   XEXP (&all.smod_32, 1) = XEXP (&all.sdiv_32, 1);
 244
 245   PUT_CODE (&all.zext, ZERO_EXTEND);
 246   XEXP (&all.zext, 0) = &all.reg;
 247
 248   PUT_CODE (&all.wide_mult, MULT);
 249   XEXP (&all.wide_mult, 0) = &all.zext;
 250   XEXP (&all.wide_mult, 1) = &all.zext;
 251
 252   PUT_CODE (&all.wide_lshr, LSHIFTRT);
 253   XEXP (&all.wide_lshr, 0) = &all.wide_mult;
 254
 255   PUT_CODE (&all.wide_trunc, TRUNCATE);
 256   XEXP (&all.wide_trunc, 0) = &all.wide_lshr;
 257
 258   PUT_CODE (&all.shift, ASHIFT);
 259   XEXP (&all.shift, 0) = &all.reg;
 260
 261   PUT_CODE (&all.shift_mult, MULT);
 262   XEXP (&all.shift_mult, 0) = &all.reg;
 263
 264   PUT_CODE (&all.shift_add, PLUS);
 265   XEXP (&all.shift_add, 0) = &all.shift_mult;
 266   XEXP (&all.shift_add, 1) = &all.reg;
 267
 268   PUT_CODE (&all.shift_sub0, MINUS);
 269   XEXP (&all.shift_sub0, 0) = &all.shift_mult;
 270   XEXP (&all.shift_sub0, 1) = &all.reg;
 271
 272   PUT_CODE (&all.shift_sub1, MINUS);
 273   XEXP (&all.shift_sub1, 0) = &all.reg;
 274   XEXP (&all.shift_sub1, 1) = &all.shift_mult;
 275
 276   PUT_CODE (&all.trunc, TRUNCATE);
 277   XEXP (&all.trunc, 0) = &all.reg;
 278
 279   for (speed = 0; speed < 2; speed++)
 280     {
 281       crtl->maybe_hot_insn_p = speed;
 282       set_zero_cost (speed, set_src_cost (const0_rtx, speed));
 283
 284       for (mode = MIN_MODE_INT; mode <= MAX_MODE_INT;
 285            mode = (enum machine_mode)(mode + 1))
 286         init_expmed_one_mode (&all, mode, speed);
 287
 288       if (MIN_MODE_PARTIAL_INT != VOIDmode)
 289         for (mode = MIN_MODE_PARTIAL_INT; mode <= MAX_MODE_PARTIAL_INT;
 290              mode = (enum machine_mode)(mode + 1))
 291           init_expmed_one_mode (&all, mode, speed);
 292
 293       if (MIN_MODE_VECTOR_INT != VOIDmode)
 294         for (mode = MIN_MODE_VECTOR_INT; mode <= MAX_MODE_VECTOR_INT;
 295              mode = (enum machine_mode)(mode + 1))
 296           init_expmed_one_mode (&all, mode, speed);
 297     }
 298
 299   if (alg_hash_used_p ())
 300     {
 301       struct alg_hash_entry *p = alg_hash_entry_ptr (0);
 302       memset (p, 0, sizeof (*p) * NUM_ALG_HASH_ENTRIES);
 303     }
 304   else
 305     set_alg_hash_used_p (true);
 306   default_rtl_profile ();
 307 }
 308
 309 /* Return an rtx representing minus the value of X.
 310    MODE is the intended mode of the result,
 311    useful if X is a CONST_INT.  */
 312
 313 rtx
 314 negate_rtx (enum machine_mode mode, rtx x)
 315 {
 316   rtx result = simplify_unary_operation (NEG, mode, x, mode);
 317
 318   if (result == 0)
 319     result = expand_unop (mode, neg_optab, x, NULL_RTX, 0);
 320
 321   return result;
 322 }
 323
 324 /* Adjust bitfield memory MEM so that it points to the first unit of mode
 325    MODE that contains a bitfield of size BITSIZE at bit position BITNUM.
 326    If MODE is BLKmode, return a reference to every byte in the bitfield.
 327    Set *NEW_BITNUM to the bit position of the field within the new memory.  */
 328
 329 static rtx
 330 narrow_bit_field_mem (rtx mem, enum machine_mode mode,
 331                       unsigned HOST_WIDE_INT bitsize,
 332                       unsigned HOST_WIDE_INT bitnum,
 333                       unsigned HOST_WIDE_INT *new_bitnum)
 334 {
 335   if (mode == BLKmode)
 336     {
 337       *new_bitnum = bitnum % BITS_PER_UNIT;
 338       HOST_WIDE_INT offset = bitnum / BITS_PER_UNIT;
 339       HOST_WIDE_INT size = ((*new_bitnum + bitsize + BITS_PER_UNIT - 1)
 340                             / BITS_PER_UNIT);
 341       return adjust_bitfield_address_size (mem, mode, offset, size);
 342     }
 343   else
 344     {
 345       unsigned int unit = GET_MODE_BITSIZE (mode);
 346       *new_bitnum = bitnum % unit;
 347       HOST_WIDE_INT offset = (bitnum - *new_bitnum) / BITS_PER_UNIT;
 348       return adjust_bitfield_address (mem, mode, offset);
 349     }
 350 }
 351
 352 /* The caller wants to perform insertion or extraction PATTERN on a
 353    bitfield of size BITSIZE at BITNUM bits into memory operand OP0.
 354    BITREGION_START and BITREGION_END are as for store_bit_field
 355    and FIELDMODE is the natural mode of the field.
 356
 357    Search for a mode that is compatible with the memory access
 358    restrictions and (where applicable) with a register insertion or
 359    extraction.  Return the new memory on success, storing the adjusted
 360    bit position in *NEW_BITNUM.  Return null otherwise.  */
 361
 362 static rtx
 363 adjust_bit_field_mem_for_reg (enum extraction_pattern pattern,
 364                               rtx op0, HOST_WIDE_INT bitsize,
 365                               HOST_WIDE_INT bitnum,
 366                               unsigned HOST_WIDE_INT bitregion_start,
 367                               unsigned HOST_WIDE_INT bitregion_end,
 368                               enum machine_mode fieldmode,
 369                               unsigned HOST_WIDE_INT *new_bitnum)
 370 {
 371   bit_field_mode_iterator iter (bitsize, bitnum, bitregion_start,
 372                                 bitregion_end, MEM_ALIGN (op0),
 373                                 MEM_VOLATILE_P (op0));
 374   enum machine_mode best_mode;
 375   if (iter.next_mode (&best_mode))
 376     {
 377       /* We can use a memory in BEST_MODE.  See whether this is true for
 378          any wider modes.  All other things being equal, we prefer to
 379          use the widest mode possible because it tends to expose more
 380          CSE opportunities.  */
 381       if (!iter.prefer_smaller_modes ())
 382         {
 383           /* Limit the search to the mode required by the corresponding
 384              register insertion or extraction instruction, if any.  */
 385           enum machine_mode limit_mode = word_mode;
 386           extraction_insn insn;
 387           if (get_best_reg_extraction_insn (&insn, pattern,
 388                                             GET_MODE_BITSIZE (best_mode),
 389                                             fieldmode))
 390             limit_mode = insn.field_mode;
 391
 392           enum machine_mode wider_mode;
 393           while (iter.next_mode (&wider_mode)
 394                  && GET_MODE_SIZE (wider_mode) <= GET_MODE_SIZE (limit_mode))
 395             best_mode = wider_mode;
 396         }
 397       return narrow_bit_field_mem (op0, best_mode, bitsize, bitnum,
 398                                    new_bitnum);
 399     }
 400   return NULL_RTX;
 401 }
 402
 403 /* Return true if a bitfield of size BITSIZE at bit number BITNUM within
 404    a structure of mode STRUCT_MODE represents a lowpart subreg.   The subreg
 405    offset is then BITNUM / BITS_PER_UNIT.  */
 406
 407 static bool
 408 lowpart_bit_field_p (unsigned HOST_WIDE_INT bitnum,
 409                      unsigned HOST_WIDE_INT bitsize,
 410                      enum machine_mode struct_mode)
 411 {
 412   if (BYTES_BIG_ENDIAN)
 413     return (bitnum % BITS_PER_UNIT == 0
 414             && (bitnum + bitsize == GET_MODE_BITSIZE (struct_mode)
 415                 || (bitnum + bitsize) % BITS_PER_WORD == 0));
 416   else
 417     return bitnum % BITS_PER_WORD == 0;
 418 }
 419 \f
 420 /* Try to use instruction INSV to store VALUE into a field of OP0.
 421    BITSIZE and BITNUM are as for store_bit_field.  */
 422
 423 static bool
 424 store_bit_field_using_insv (const extraction_insn *insv, rtx op0,
 425                             unsigned HOST_WIDE_INT bitsize,
 426                             unsigned HOST_WIDE_INT bitnum, rtx value)
 427 {
 428   struct expand_operand ops[4];
 429   rtx value1;
 430   rtx xop0 = op0;
 431   rtx last = get_last_insn ();
 432   bool copy_back = false;
 433
 434   enum machine_mode op_mode = insv->field_mode;
 435   unsigned int unit = GET_MODE_BITSIZE (op_mode);
 436   if (bitsize == 0 || bitsize > unit)
 437     return false;
 438
 439   if (MEM_P (xop0))
 440     /* Get a reference to the first byte of the field.  */
 441     xop0 = narrow_bit_field_mem (xop0, insv->struct_mode, bitsize, bitnum,
 442                                  &bitnum);
 443   else
 444     {
 445       /* Convert from counting within OP0 to counting in OP_MODE.  */
 446       if (BYTES_BIG_ENDIAN)
 447         bitnum += unit - GET_MODE_BITSIZE (GET_MODE (op0));
 448
 449       /* If xop0 is a register, we need it in OP_MODE
 450          to make it acceptable to the format of insv.  */
 451       if (GET_CODE (xop0) == SUBREG)
 452         /* We can't just change the mode, because this might clobber op0,
 453            and we will need the original value of op0 if insv fails.  */
 454         xop0 = gen_rtx_SUBREG (op_mode, SUBREG_REG (xop0), SUBREG_BYTE (xop0));
 455       if (REG_P (xop0) && GET_MODE (xop0) != op_mode)
 456         xop0 = gen_lowpart_SUBREG (op_mode, xop0);
 457     }
 458
 459   /* If the destination is a paradoxical subreg such that we need a
 460      truncate to the inner mode, perform the insertion on a temporary and
 461      truncate the result to the original destination.  Note that we can't
 462      just truncate the paradoxical subreg as (truncate:N (subreg:W (reg:N
 463      X) 0)) is (reg:N X).  */
 464   if (GET_CODE (xop0) == SUBREG
 465       && REG_P (SUBREG_REG (xop0))
 466       && !TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (SUBREG_REG (xop0)),
 467                                          op_mode))
 468     {
 469       rtx tem = gen_reg_rtx (op_mode);
 470       emit_move_insn (tem, xop0);
 471       xop0 = tem;
 472       copy_back = true;
 473     }
 474
 475   /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
 476      "backwards" from the size of the unit we are inserting into.
 477      Otherwise, we count bits from the most significant on a
 478      BYTES/BITS_BIG_ENDIAN machine.  */
 479
 480   if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
 481     bitnum = unit - bitsize - bitnum;
 482
 483   /* Convert VALUE to op_mode (which insv insn wants) in VALUE1.  */
 484   value1 = value;
 485   if (GET_MODE (value) != op_mode)
 486     {
 487       if (GET_MODE_BITSIZE (GET_MODE (value)) >= bitsize)
 488         {
 489           /* Optimization: Don't bother really extending VALUE
 490              if it has all the bits we will actually use.  However,
 491              if we must narrow it, be sure we do it correctly.  */
 492
 493           if (GET_MODE_SIZE (GET_MODE (value)) < GET_MODE_SIZE (op_mode))
 494             {
 495               rtx tmp;
 496
 497               tmp = simplify_subreg (op_mode, value1, GET_MODE (value), 0);
 498               if (! tmp)
 499                 tmp = simplify_gen_subreg (op_mode,
 500                                            force_reg (GET_MODE (value),
 501                                                       value1),
 502                                            GET_MODE (value), 0);
 503               value1 = tmp;
 504             }
 505           else
 506             value1 = gen_lowpart (op_mode, value1);
 507         }
 508       else if (CONST_INT_P (value))
 509         value1 = gen_int_mode (INTVAL (value), op_mode);
 510       else
 511         /* Parse phase is supposed to make VALUE's data type
 512            match that of the component reference, which is a type
 513            at least as wide as the field; so VALUE should have
 514            a mode that corresponds to that type.  */
 515         gcc_assert (CONSTANT_P (value));
 516     }
 517
 518   create_fixed_operand (&ops[0], xop0);
 519   create_integer_operand (&ops[1], bitsize);
 520   create_integer_operand (&ops[2], bitnum);
 521   create_input_operand (&ops[3], value1, op_mode);
 522   if (maybe_expand_insn (insv->icode, 4, ops))
 523     {
 524       if (copy_back)
 525         convert_move (op0, xop0, true);
 526       return true;
 527     }
 528   delete_insns_since (last);
 529   return false;
 530 }
 531
 532 /* A subroutine of store_bit_field, with the same arguments.  Return true
 533    if the operation could be implemented.
 534
 535    If FALLBACK_P is true, fall back to store_fixed_bit_field if we have
 536    no other way of implementing the operation.  If FALLBACK_P is false,
 537    return false instead.  */
 538
 539 static bool
 540 store_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
 541                    unsigned HOST_WIDE_INT bitnum,
 542                    unsigned HOST_WIDE_INT bitregion_start,
 543                    unsigned HOST_WIDE_INT bitregion_end,
 544                    enum machine_mode fieldmode,
 545                    rtx value, bool fallback_p)
 546 {
 547   rtx op0 = str_rtx;
 548   rtx orig_value;
 549
 550   while (GET_CODE (op0) == SUBREG)
 551     {
 552       /* The following line once was done only if WORDS_BIG_ENDIAN,
 553          but I think that is a mistake.  WORDS_BIG_ENDIAN is
 554          meaningful at a much higher level; when structures are copied
 555          between memory and regs, the higher-numbered regs
 556          always get higher addresses.  */
 557       int inner_mode_size = GET_MODE_SIZE (GET_MODE (SUBREG_REG (op0)));
 558       int outer_mode_size = GET_MODE_SIZE (GET_MODE (op0));
 559       int byte_offset = 0;
 560
 561       /* Paradoxical subregs need special handling on big endian machines.  */
 562       if (SUBREG_BYTE (op0) == 0 && inner_mode_size < outer_mode_size)
 563         {
 564           int difference = inner_mode_size - outer_mode_size;
 565
 566           if (WORDS_BIG_ENDIAN)
 567             byte_offset += (difference / UNITS_PER_WORD) * UNITS_PER_WORD;
 568           if (BYTES_BIG_ENDIAN)
 569             byte_offset += difference % UNITS_PER_WORD;
 570         }
 571       else
 572         byte_offset = SUBREG_BYTE (op0);
 573
 574       bitnum += byte_offset * BITS_PER_UNIT;
 575       op0 = SUBREG_REG (op0);
 576     }
 577
 578   /* No action is needed if the target is a register and if the field
 579      lies completely outside that register.  This can occur if the source
 580      code contains an out-of-bounds access to a small array.  */
 581   if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
 582     return true;
 583
 584   /* Use vec_set patterns for inserting parts of vectors whenever
 585      available.  */
 586   if (VECTOR_MODE_P (GET_MODE (op0))
 587       && !MEM_P (op0)
 588       && optab_handler (vec_set_optab, GET_MODE (op0)) != CODE_FOR_nothing
 589       && fieldmode == GET_MODE_INNER (GET_MODE (op0))
 590       && bitsize == GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
 591       && !(bitnum % GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
 592     {
 593       struct expand_operand ops[3];
 594       enum machine_mode outermode = GET_MODE (op0);
 595       enum machine_mode innermode = GET_MODE_INNER (outermode);
 596       enum insn_code icode = optab_handler (vec_set_optab, outermode);
 597       int pos = bitnum / GET_MODE_BITSIZE (innermode);
 598
 599       create_fixed_operand (&ops[0], op0);
 600       create_input_operand (&ops[1], value, innermode);
 601       create_integer_operand (&ops[2], pos);
 602       if (maybe_expand_insn (icode, 3, ops))
 603         return true;
 604     }
 605
 606   /* If the target is a register, overwriting the entire object, or storing
 607      a full-word or multi-word field can be done with just a SUBREG.  */
 608   if (!MEM_P (op0)
 609       && bitsize == GET_MODE_BITSIZE (fieldmode)
 610       && ((bitsize == GET_MODE_BITSIZE (GET_MODE (op0)) && bitnum == 0)
 611           || (bitsize % BITS_PER_WORD == 0 && bitnum % BITS_PER_WORD == 0)))
 612     {
 613       /* Use the subreg machinery either to narrow OP0 to the required
 614          words or to cope with mode punning between equal-sized modes.  */
 615       rtx sub = simplify_gen_subreg (fieldmode, op0, GET_MODE (op0),
 616                                      bitnum / BITS_PER_UNIT);
 617       if (sub)
 618         {
 619           emit_move_insn (sub, value);
 620           return true;
 621         }
 622     }
 623
 624   /* If the target is memory, storing any naturally aligned field can be
 625      done with a simple store.  For targets that support fast unaligned
 626      memory, any naturally sized, unit aligned field can be done directly.  */
 627   if (MEM_P (op0)
 628       && bitnum % BITS_PER_UNIT == 0
 629       && bitsize == GET_MODE_BITSIZE (fieldmode)
 630       && (!SLOW_UNALIGNED_ACCESS (fieldmode, MEM_ALIGN (op0))
 631           || (bitnum % bitsize == 0
 632               && MEM_ALIGN (op0) % bitsize == 0)))
 633     {
 634       op0 = adjust_bitfield_address (op0, fieldmode, bitnum / BITS_PER_UNIT);
 635       emit_move_insn (op0, value);
 636       return true;
 637     }
 638
 639   /* Make sure we are playing with integral modes.  Pun with subregs
 640      if we aren't.  This must come after the entire register case above,
 641      since that case is valid for any mode.  The following cases are only
 642      valid for integral modes.  */
 643   {
 644     enum machine_mode imode = int_mode_for_mode (GET_MODE (op0));
 645     if (imode != GET_MODE (op0))
 646       {
 647         if (MEM_P (op0))
 648           op0 = adjust_bitfield_address_size (op0, imode, 0, MEM_SIZE (op0));
 649         else
 650           {
 651             gcc_assert (imode != BLKmode);
 652             op0 = gen_lowpart (imode, op0);
 653           }
 654       }
 655   }
 656
 657   /* Storing an lsb-aligned field in a register
 658      can be done with a movstrict instruction.  */
 659
 660   if (!MEM_P (op0)
 661       && lowpart_bit_field_p (bitnum, bitsize, GET_MODE (op0))
 662       && bitsize == GET_MODE_BITSIZE (fieldmode)
 663       && optab_handler (movstrict_optab, fieldmode) != CODE_FOR_nothing)
 664     {
 665       struct expand_operand ops[2];
 666       enum insn_code icode = optab_handler (movstrict_optab, fieldmode);
 667       rtx arg0 = op0;
 668       unsigned HOST_WIDE_INT subreg_off;
 669
 670       if (GET_CODE (arg0) == SUBREG)
 671         {
 672           /* Else we've got some float mode source being extracted into
 673              a different float mode destination -- this combination of
 674              subregs results in Severe Tire Damage.  */
 675           gcc_assert (GET_MODE (SUBREG_REG (arg0)) == fieldmode
 676                       || GET_MODE_CLASS (fieldmode) == MODE_INT
 677                       || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT);
 678           arg0 = SUBREG_REG (arg0);
 679         }
 680
 681       subreg_off = bitnum / BITS_PER_UNIT;
 682       if (validate_subreg (fieldmode, GET_MODE (arg0), arg0, subreg_off))
 683         {
 684           arg0 = gen_rtx_SUBREG (fieldmode, arg0, subreg_off);
 685
 686           create_fixed_operand (&ops[0], arg0);
 687           /* Shrink the source operand to FIELDMODE.  */
 688           create_convert_operand_to (&ops[1], value, fieldmode, false);
 689           if (maybe_expand_insn (icode, 2, ops))
 690             return true;
 691         }
 692     }
 693
 694   /* Handle fields bigger than a word.  */
 695
 696   if (bitsize > BITS_PER_WORD)
 697     {
 698       /* Here we transfer the words of the field
 699          in the order least significant first.
 700          This is because the most significant word is the one which may
 701          be less than full.
 702          However, only do that if the value is not BLKmode.  */
 703
 704       unsigned int backwards = WORDS_BIG_ENDIAN && fieldmode != BLKmode;
 705       unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
 706       unsigned int i;
 707       rtx last;
 708
 709       /* This is the mode we must force value to, so that there will be enough
 710          subwords to extract.  Note that fieldmode will often (always?) be
 711          VOIDmode, because that is what store_field uses to indicate that this
 712          is a bit field, but passing VOIDmode to operand_subword_force
 713          is not allowed.  */
 714       fieldmode = GET_MODE (value);
 715       if (fieldmode == VOIDmode)
 716         fieldmode = smallest_mode_for_size (nwords * BITS_PER_WORD, MODE_INT);
 717
 718       last = get_last_insn ();
 719       for (i = 0; i < nwords; i++)
 720         {
 721           /* If I is 0, use the low-order word in both field and target;
 722              if I is 1, use the next to lowest word; and so on.  */
 723           unsigned int wordnum = (backwards
 724                                   ? GET_MODE_SIZE (fieldmode) / UNITS_PER_WORD
 725                                   - i - 1
 726                                   : i);
 727           unsigned int bit_offset = (backwards
 728                                      ? MAX ((int) bitsize - ((int) i + 1)
 729                                             * BITS_PER_WORD,
 730                                             0)
 731                                      : (int) i * BITS_PER_WORD);
 732           rtx value_word = operand_subword_force (value, wordnum, fieldmode);
 733           unsigned HOST_WIDE_INT new_bitsize =
 734             MIN (BITS_PER_WORD, bitsize - i * BITS_PER_WORD);
 735
 736           /* If the remaining chunk doesn't have full wordsize we have
 737              to make sure that for big endian machines the higher order
 738              bits are used.  */
 739           if (new_bitsize < BITS_PER_WORD && BYTES_BIG_ENDIAN && !backwards)
 740             value_word = simplify_expand_binop (word_mode, lshr_optab,
 741                                                 value_word,
 742                                                 GEN_INT (BITS_PER_WORD
 743                                                          - new_bitsize),
 744                                                 NULL_RTX, true,
 745                                                 OPTAB_LIB_WIDEN);
 746
 747           if (!store_bit_field_1 (op0, new_bitsize,
 748                                   bitnum + bit_offset,
 749                                   bitregion_start, bitregion_end,
 750                                   word_mode,
 751                                   value_word, fallback_p))
 752             {
 753               delete_insns_since (last);
 754               return false;
 755             }
 756         }
 757       return true;
 758     }
 759
 760   /* If VALUE has a floating-point or complex mode, access it as an
 761      integer of the corresponding size.  This can occur on a machine
 762      with 64 bit registers that uses SFmode for float.  It can also
 763      occur for unaligned float or complex fields.  */
 764   orig_value = value;
 765   if (GET_MODE (value) != VOIDmode
 766       && GET_MODE_CLASS (GET_MODE (value)) != MODE_INT
 767       && GET_MODE_CLASS (GET_MODE (value)) != MODE_PARTIAL_INT)
 768     {
 769       value = gen_reg_rtx (int_mode_for_mode (GET_MODE (value)));
 770       emit_move_insn (gen_lowpart (GET_MODE (orig_value), value), orig_value);
 771     }
 772
 773   /* If OP0 is a multi-word register, narrow it to the affected word.
 774      If the region spans two words, defer to store_split_bit_field.  */
 775   if (!MEM_P (op0) && GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
 776     {
 777       op0 = simplify_gen_subreg (word_mode, op0, GET_MODE (op0),
 778                                  bitnum / BITS_PER_WORD * UNITS_PER_WORD);
 779       gcc_assert (op0);
 780       bitnum %= BITS_PER_WORD;
 781       if (bitnum + bitsize > BITS_PER_WORD)
 782         {
 783           if (!fallback_p)
 784             return false;
 785
 786           store_split_bit_field (op0, bitsize, bitnum, bitregion_start,
 787                                  bitregion_end, value);
 788           return true;
 789         }
 790     }
 791
 792   /* From here on we can assume that the field to be stored in fits
 793      within a word.  If the destination is a register, it too fits
 794      in a word.  */
 795
 796   extraction_insn insv;
 797   if (!MEM_P (op0)
 798       && get_best_reg_extraction_insn (&insv, EP_insv,
 799                                        GET_MODE_BITSIZE (GET_MODE (op0)),
 800                                        fieldmode)
 801       && store_bit_field_using_insv (&insv, op0, bitsize, bitnum, value))
 802     return true;
 803
 804   /* If OP0 is a memory, try copying it to a register and seeing if a
 805      cheap register alternative is available.  */
 806   if (MEM_P (op0))
 807     {
 808       /* Do not use unaligned memory insvs for volatile bitfields when
 809          -fstrict-volatile-bitfields is in effect.  */
 810       if (!(MEM_VOLATILE_P (op0)
 811             && flag_strict_volatile_bitfields > 0)
 812           && get_best_mem_extraction_insn (&insv, EP_insv, bitsize, bitnum,
 813                                            fieldmode)
 814           && store_bit_field_using_insv (&insv, op0, bitsize, bitnum, value))
 815         return true;
 816
 817       rtx last = get_last_insn ();
 818
 819       /* Try loading part of OP0 into a register, inserting the bitfield
 820          into that, and then copying the result back to OP0.  */
 821       unsigned HOST_WIDE_INT bitpos;
 822       rtx xop0 = adjust_bit_field_mem_for_reg (EP_insv, op0, bitsize, bitnum,
 823                                                bitregion_start, bitregion_end,
 824                                                fieldmode, &bitpos);
 825       if (xop0)
 826         {
 827           rtx tempreg = copy_to_reg (xop0);
 828           if (store_bit_field_1 (tempreg, bitsize, bitpos,
 829                                  bitregion_start, bitregion_end,
 830                                  fieldmode, orig_value, false))
 831             {
 832               emit_move_insn (xop0, tempreg);
 833               return true;
 834             }
 835           delete_insns_since (last);
 836         }
 837     }
 838
 839   if (!fallback_p)
 840     return false;
 841
 842   store_fixed_bit_field (op0, bitsize, bitnum, bitregion_start,
 843                          bitregion_end, value);
 844   return true;
 845 }
 846
 847 /* Generate code to store value from rtx VALUE
 848    into a bit-field within structure STR_RTX
 849    containing BITSIZE bits starting at bit BITNUM.
 850
 851    BITREGION_START is bitpos of the first bitfield in this region.
 852    BITREGION_END is the bitpos of the ending bitfield in this region.
 853    These two fields are 0, if the C++ memory model does not apply,
 854    or we are not interested in keeping track of bitfield regions.
 855
 856    FIELDMODE is the machine-mode of the FIELD_DECL node for this field.  */
 857
 858 void
 859 store_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
 860                  unsigned HOST_WIDE_INT bitnum,
 861                  unsigned HOST_WIDE_INT bitregion_start,
 862                  unsigned HOST_WIDE_INT bitregion_end,
 863                  enum machine_mode fieldmode,
 864                  rtx value)
 865 {
 866   /* Under the C++0x memory model, we must not touch bits outside the
 867      bit region.  Adjust the address to start at the beginning of the
 868      bit region.  */
 869   if (MEM_P (str_rtx) && bitregion_start > 0)
 870     {
 871       enum machine_mode bestmode;
 872       HOST_WIDE_INT offset, size;
 873
 874       gcc_assert ((bitregion_start % BITS_PER_UNIT) == 0);
 875
 876       offset = bitregion_start / BITS_PER_UNIT;
 877       bitnum -= bitregion_start;
 878       size = (bitnum + bitsize + BITS_PER_UNIT - 1) / BITS_PER_UNIT;
 879       bitregion_end -= bitregion_start;
 880       bitregion_start = 0;
 881       bestmode = get_best_mode (bitsize, bitnum,
 882                                 bitregion_start, bitregion_end,
 883                                 MEM_ALIGN (str_rtx), VOIDmode,
 884                                 MEM_VOLATILE_P (str_rtx));
 885       str_rtx = adjust_bitfield_address_size (str_rtx, bestmode, offset, size);
 886     }
 887
 888   if (!store_bit_field_1 (str_rtx, bitsize, bitnum,
 889                           bitregion_start, bitregion_end,
 890                           fieldmode, value, true))
 891     gcc_unreachable ();
 892 }
 893 \f
 894 /* Use shifts and boolean operations to store VALUE into a bit field of
 895    width BITSIZE in OP0, starting at bit BITNUM.  */
 896
 897 static void
 898 store_fixed_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
 899                        unsigned HOST_WIDE_INT bitnum,
 900                        unsigned HOST_WIDE_INT bitregion_start,
 901                        unsigned HOST_WIDE_INT bitregion_end,
 902                        rtx value)
 903 {
 904   enum machine_mode mode;
 905   rtx temp;
 906   int all_zero = 0;
 907   int all_one = 0;
 908
 909   /* There is a case not handled here:
 910      a structure with a known alignment of just a halfword
 911      and a field split across two aligned halfwords within the structure.
 912      Or likewise a structure with a known alignment of just a byte
 913      and a field split across two bytes.
 914      Such cases are not supposed to be able to occur.  */
 915
 916   if (MEM_P (op0))
 917     {
 918       unsigned HOST_WIDE_INT maxbits = MAX_FIXED_MODE_SIZE;
 919
 920       if (bitregion_end)
 921         maxbits = bitregion_end - bitregion_start + 1;
 922
 923       /* Get the proper mode to use for this field.  We want a mode that
 924          includes the entire field.  If such a mode would be larger than
 925          a word, we won't be doing the extraction the normal way.
 926          We don't want a mode bigger than the destination.  */
 927
 928       mode = GET_MODE (op0);
 929       if (GET_MODE_BITSIZE (mode) == 0
 930           || GET_MODE_BITSIZE (mode) > GET_MODE_BITSIZE (word_mode))
 931         mode = word_mode;
 932
 933       if (MEM_VOLATILE_P (op0)
 934           && GET_MODE_BITSIZE (GET_MODE (op0)) > 0
 935           && GET_MODE_BITSIZE (GET_MODE (op0)) <= maxbits
 936           && flag_strict_volatile_bitfields > 0)
 937         mode = GET_MODE (op0);
 938       else
 939         mode = get_best_mode (bitsize, bitnum, bitregion_start, bitregion_end,
 940                               MEM_ALIGN (op0), mode, MEM_VOLATILE_P (op0));
 941
 942       if (mode == VOIDmode)
 943         {
 944           /* The only way this should occur is if the field spans word
 945              boundaries.  */
 946           store_split_bit_field (op0, bitsize, bitnum, bitregion_start,
 947                                  bitregion_end, value);
 948           return;
 949         }
 950
 951       op0 = narrow_bit_field_mem (op0, mode, bitsize, bitnum, &bitnum);
 952     }
 953
 954   mode = GET_MODE (op0);
 955   gcc_assert (SCALAR_INT_MODE_P (mode));
 956
 957   /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode)
 958      for invalid input, such as f5 from gcc.dg/pr48335-2.c.  */
 959
 960   if (BYTES_BIG_ENDIAN)
 961     /* BITNUM is the distance between our msb
 962        and that of the containing datum.
 963        Convert it to the distance from the lsb.  */
 964     bitnum = GET_MODE_BITSIZE (mode) - bitsize - bitnum;
 965
 966   /* Now BITNUM is always the distance between our lsb
 967      and that of OP0.  */
 968
 969   /* Shift VALUE left by BITNUM bits.  If VALUE is not constant,
 970      we must first convert its mode to MODE.  */
 971
 972   if (CONST_INT_P (value))
 973     {
 974       HOST_WIDE_INT v = INTVAL (value);
 975
 976       if (bitsize < HOST_BITS_PER_WIDE_INT)
 977         v &= ((HOST_WIDE_INT) 1 << bitsize) - 1;
 978
 979       if (v == 0)
 980         all_zero = 1;
 981       else if ((bitsize < HOST_BITS_PER_WIDE_INT
 982                 && v == ((HOST_WIDE_INT) 1 << bitsize) - 1)
 983                || (bitsize == HOST_BITS_PER_WIDE_INT && v == -1))
 984         all_one = 1;
 985
 986       value = lshift_value (mode, value, bitnum, bitsize);
 987     }
 988   else
 989     {
 990       int must_and = (GET_MODE_BITSIZE (GET_MODE (value)) != bitsize
 991                       && bitnum + bitsize != GET_MODE_BITSIZE (mode));
 992
 993       if (GET_MODE (value) != mode)
 994         value = convert_to_mode (mode, value, 1);
 995
 996       if (must_and)
 997         value = expand_binop (mode, and_optab, value,
 998                               mask_rtx (mode, 0, bitsize, 0),
 999                               NULL_RTX, 1, OPTAB_LIB_WIDEN);
1000       if (bitnum > 0)
1001         value = expand_shift (LSHIFT_EXPR, mode, value,
1002                               bitnum, NULL_RTX, 1);
1003     }
1004
1005   /* Now clear the chosen bits in OP0,
1006      except that if VALUE is -1 we need not bother.  */
1007   /* We keep the intermediates in registers to allow CSE to combine
1008      consecutive bitfield assignments.  */
1009
1010   temp = force_reg (mode, op0);
1011
1012   if (! all_one)
1013     {
1014       temp = expand_binop (mode, and_optab, temp,
1015                            mask_rtx (mode, bitnum, bitsize, 1),
1016                            NULL_RTX, 1, OPTAB_LIB_WIDEN);
1017       temp = force_reg (mode, temp);
1018     }
1019
1020   /* Now logical-or VALUE into OP0, unless it is zero.  */
1021
1022   if (! all_zero)
1023     {
1024       temp = expand_binop (mode, ior_optab, temp, value,
1025                            NULL_RTX, 1, OPTAB_LIB_WIDEN);
1026       temp = force_reg (mode, temp);
1027     }
1028
1029   if (op0 != temp)
1030     {
1031       op0 = copy_rtx (op0);
1032       emit_move_insn (op0, temp);
1033     }
1034 }
1035 \f
1036 /* Store a bit field that is split across multiple accessible memory objects.
1037
1038    OP0 is the REG, SUBREG or MEM rtx for the first of the objects.
1039    BITSIZE is the field width; BITPOS the position of its first bit
1040    (within the word).
1041    VALUE is the value to store.
1042
1043    This does not yet handle fields wider than BITS_PER_WORD.  */
1044
1045 static void
1046 store_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1047                        unsigned HOST_WIDE_INT bitpos,
1048                        unsigned HOST_WIDE_INT bitregion_start,
1049                        unsigned HOST_WIDE_INT bitregion_end,
1050                        rtx value)
1051 {
1052   unsigned int unit;
1053   unsigned int bitsdone = 0;
1054
1055   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1056      much at a time.  */
1057   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
1058     unit = BITS_PER_WORD;
1059   else
1060     unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1061
1062   /* If VALUE is a constant other than a CONST_INT, get it into a register in
1063      WORD_MODE.  If we can do this using gen_lowpart_common, do so.  Note
1064      that VALUE might be a floating-point constant.  */
1065   if (CONSTANT_P (value) && !CONST_INT_P (value))
1066     {
1067       rtx word = gen_lowpart_common (word_mode, value);
1068
1069       if (word && (value != word))
1070         value = word;
1071       else
1072         value = gen_lowpart_common (word_mode,
1073                                     force_reg (GET_MODE (value) != VOIDmode
1074                                                ? GET_MODE (value)
1075                                                : word_mode, value));
1076     }
1077
1078   while (bitsdone < bitsize)
1079     {
1080       unsigned HOST_WIDE_INT thissize;
1081       rtx part, word;
1082       unsigned HOST_WIDE_INT thispos;
1083       unsigned HOST_WIDE_INT offset;
1084
1085       offset = (bitpos + bitsdone) / unit;
1086       thispos = (bitpos + bitsdone) % unit;
1087
1088       /* When region of bytes we can touch is restricted, decrease
1089          UNIT close to the end of the region as needed.  */
1090       if (bitregion_end
1091           && unit > BITS_PER_UNIT
1092           && bitpos + bitsdone - thispos + unit > bitregion_end + 1)
1093         {
1094           unit = unit / 2;
1095           continue;
1096         }
1097
1098       /* THISSIZE must not overrun a word boundary.  Otherwise,
1099          store_fixed_bit_field will call us again, and we will mutually
1100          recurse forever.  */
1101       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1102       thissize = MIN (thissize, unit - thispos);
1103
1104       if (BYTES_BIG_ENDIAN)
1105         {
1106           /* Fetch successively less significant portions.  */
1107           if (CONST_INT_P (value))
1108             part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1109                              >> (bitsize - bitsdone - thissize))
1110                             & (((HOST_WIDE_INT) 1 << thissize) - 1));
1111           else
1112             {
1113               int total_bits = GET_MODE_BITSIZE (GET_MODE (value));
1114               /* The args are chosen so that the last part includes the
1115                  lsb.  Give extract_bit_field the value it needs (with
1116                  endianness compensation) to fetch the piece we want.  */
1117               part = extract_fixed_bit_field (word_mode, value, thissize,
1118                                               total_bits - bitsize + bitsdone,
1119                                               NULL_RTX, 1, false);
1120             }
1121         }
1122       else
1123         {
1124           /* Fetch successively more significant portions.  */
1125           if (CONST_INT_P (value))
1126             part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1127                              >> bitsdone)
1128                             & (((HOST_WIDE_INT) 1 << thissize) - 1));
1129           else
1130             part = extract_fixed_bit_field (word_mode, value, thissize,
1131                                             bitsdone, NULL_RTX, 1, false);
1132         }
1133
1134       /* If OP0 is a register, then handle OFFSET here.
1135
1136          When handling multiword bitfields, extract_bit_field may pass
1137          down a word_mode SUBREG of a larger REG for a bitfield that actually
1138          crosses a word boundary.  Thus, for a SUBREG, we must find
1139          the current word starting from the base register.  */
1140       if (GET_CODE (op0) == SUBREG)
1141         {
1142           int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset;
1143           enum machine_mode sub_mode = GET_MODE (SUBREG_REG (op0));
1144           if (sub_mode != BLKmode && GET_MODE_SIZE (sub_mode) < UNITS_PER_WORD)
1145             word = word_offset ? const0_rtx : op0;
1146           else
1147             word = operand_subword_force (SUBREG_REG (op0), word_offset,
1148                                           GET_MODE (SUBREG_REG (op0)));
1149           offset = 0;
1150         }
1151       else if (REG_P (op0))
1152         {
1153           enum machine_mode op0_mode = GET_MODE (op0);
1154           if (op0_mode != BLKmode && GET_MODE_SIZE (op0_mode) < UNITS_PER_WORD)
1155             word = offset ? const0_rtx : op0;
1156           else
1157             word = operand_subword_force (op0, offset, GET_MODE (op0));
1158           offset = 0;
1159         }
1160       else
1161         word = op0;
1162
1163       /* OFFSET is in UNITs, and UNIT is in bits.  If WORD is const0_rtx,
1164          it is just an out-of-bounds access.  Ignore it.  */
1165       if (word != const0_rtx)
1166         store_fixed_bit_field (word, thissize, offset * unit + thispos,
1167                                bitregion_start, bitregion_end, part);
1168       bitsdone += thissize;
1169     }
1170 }
1171 \f
1172 /* A subroutine of extract_bit_field_1 that converts return value X
1173    to either MODE or TMODE.  MODE, TMODE and UNSIGNEDP are arguments
1174    to extract_bit_field.  */
1175
1176 static rtx
1177 convert_extracted_bit_field (rtx x, enum machine_mode mode,
1178                              enum machine_mode tmode, bool unsignedp)
1179 {
1180   if (GET_MODE (x) == tmode || GET_MODE (x) == mode)
1181     return x;
1182
1183   /* If the x mode is not a scalar integral, first convert to the
1184      integer mode of that size and then access it as a floating-point
1185      value via a SUBREG.  */
1186   if (!SCALAR_INT_MODE_P (tmode))
1187     {
1188       enum machine_mode smode;
1189
1190       smode = mode_for_size (GET_MODE_BITSIZE (tmode), MODE_INT, 0);
1191       x = convert_to_mode (smode, x, unsignedp);
1192       x = force_reg (smode, x);
1193       return gen_lowpart (tmode, x);
1194     }
1195
1196   return convert_to_mode (tmode, x, unsignedp);
1197 }
1198
1199 /* Try to use an ext(z)v pattern to extract a field from OP0.
1200    Return the extracted value on success, otherwise return null.
1201    EXT_MODE is the mode of the extraction and the other arguments
1202    are as for extract_bit_field.  */
1203
1204 static rtx
1205 extract_bit_field_using_extv (const extraction_insn *extv, rtx op0,
1206                               unsigned HOST_WIDE_INT bitsize,
1207                               unsigned HOST_WIDE_INT bitnum,
1208                               int unsignedp, rtx target,
1209                               enum machine_mode mode, enum machine_mode tmode)
1210 {
1211   struct expand_operand ops[4];
1212   rtx spec_target = target;
1213   rtx spec_target_subreg = 0;
1214   enum machine_mode ext_mode = extv->field_mode;
1215   unsigned unit = GET_MODE_BITSIZE (ext_mode);
1216
1217   if (bitsize == 0 || unit < bitsize)
1218     return NULL_RTX;
1219
1220   if (MEM_P (op0))
1221     /* Get a reference to the first byte of the field.  */
1222     op0 = narrow_bit_field_mem (op0, extv->struct_mode, bitsize, bitnum,
1223                                 &bitnum);
1224   else
1225     {
1226       /* Convert from counting within OP0 to counting in EXT_MODE.  */
1227       if (BYTES_BIG_ENDIAN)
1228         bitnum += unit - GET_MODE_BITSIZE (GET_MODE (op0));
1229
1230       /* If op0 is a register, we need it in EXT_MODE to make it
1231          acceptable to the format of ext(z)v.  */
1232       if (GET_CODE (op0) == SUBREG && GET_MODE (op0) != ext_mode)
1233         return NULL_RTX;
1234       if (REG_P (op0) && GET_MODE (op0) != ext_mode)
1235         op0 = gen_lowpart_SUBREG (ext_mode, op0);
1236     }
1237
1238   /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
1239      "backwards" from the size of the unit we are extracting from.
1240      Otherwise, we count bits from the most significant on a
1241      BYTES/BITS_BIG_ENDIAN machine.  */
1242
1243   if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1244     bitnum = unit - bitsize - bitnum;
1245
1246   if (target == 0)
1247     target = spec_target = gen_reg_rtx (tmode);
1248
1249   if (GET_MODE (target) != ext_mode)
1250     {
1251       /* Don't use LHS paradoxical subreg if explicit truncation is needed
1252          between the mode of the extraction (word_mode) and the target
1253          mode.  Instead, create a temporary and use convert_move to set
1254          the target.  */
1255       if (REG_P (target)
1256           && TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (target), ext_mode))
1257         {
1258           target = gen_lowpart (ext_mode, target);
1259           if (GET_MODE_PRECISION (ext_mode)
1260               > GET_MODE_PRECISION (GET_MODE (spec_target)))
1261             spec_target_subreg = target;
1262         }
1263       else
1264         target = gen_reg_rtx (ext_mode);
1265     }
1266
1267   create_output_operand (&ops[0], target, ext_mode);
1268   create_fixed_operand (&ops[1], op0);
1269   create_integer_operand (&ops[2], bitsize);
1270   create_integer_operand (&ops[3], bitnum);
1271   if (maybe_expand_insn (extv->icode, 4, ops))
1272     {
1273       target = ops[0].value;
1274       if (target == spec_target)
1275         return target;
1276       if (target == spec_target_subreg)
1277         return spec_target;
1278       return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1279     }
1280   return NULL_RTX;
1281 }
1282
1283 /* A subroutine of extract_bit_field, with the same arguments.
1284    If FALLBACK_P is true, fall back to extract_fixed_bit_field
1285    if we can find no other means of implementing the operation.
1286    if FALLBACK_P is false, return NULL instead.  */
1287
1288 static rtx
1289 extract_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1290                      unsigned HOST_WIDE_INT bitnum,
1291                      int unsignedp, bool packedp, rtx target,
1292                      enum machine_mode mode, enum machine_mode tmode,
1293                      bool fallback_p)
1294 {
1295   rtx op0 = str_rtx;
1296   enum machine_mode int_mode;
1297   enum machine_mode mode1;
1298
1299   if (tmode == VOIDmode)
1300     tmode = mode;
1301
1302   while (GET_CODE (op0) == SUBREG)
1303     {
1304       bitnum += SUBREG_BYTE (op0) * BITS_PER_UNIT;
1305       op0 = SUBREG_REG (op0);
1306     }
1307
1308   /* If we have an out-of-bounds access to a register, just return an
1309      uninitialized register of the required mode.  This can occur if the
1310      source code contains an out-of-bounds access to a small array.  */
1311   if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
1312     return gen_reg_rtx (tmode);
1313
1314   if (REG_P (op0)
1315       && mode == GET_MODE (op0)
1316       && bitnum == 0
1317       && bitsize == GET_MODE_BITSIZE (GET_MODE (op0)))
1318     {
1319       /* We're trying to extract a full register from itself.  */
1320       return op0;
1321     }
1322
1323   /* See if we can get a better vector mode before extracting.  */
1324   if (VECTOR_MODE_P (GET_MODE (op0))
1325       && !MEM_P (op0)
1326       && GET_MODE_INNER (GET_MODE (op0)) != tmode)
1327     {
1328       enum machine_mode new_mode;
1329
1330       if (GET_MODE_CLASS (tmode) == MODE_FLOAT)
1331         new_mode = MIN_MODE_VECTOR_FLOAT;
1332       else if (GET_MODE_CLASS (tmode) == MODE_FRACT)
1333         new_mode = MIN_MODE_VECTOR_FRACT;
1334       else if (GET_MODE_CLASS (tmode) == MODE_UFRACT)
1335         new_mode = MIN_MODE_VECTOR_UFRACT;
1336       else if (GET_MODE_CLASS (tmode) == MODE_ACCUM)
1337         new_mode = MIN_MODE_VECTOR_ACCUM;
1338       else if (GET_MODE_CLASS (tmode) == MODE_UACCUM)
1339         new_mode = MIN_MODE_VECTOR_UACCUM;
1340       else
1341         new_mode = MIN_MODE_VECTOR_INT;
1342
1343       for (; new_mode != VOIDmode ; new_mode = GET_MODE_WIDER_MODE (new_mode))
1344         if (GET_MODE_SIZE (new_mode) == GET_MODE_SIZE (GET_MODE (op0))
1345             && targetm.vector_mode_supported_p (new_mode))
1346           break;
1347       if (new_mode != VOIDmode)
1348         op0 = gen_lowpart (new_mode, op0);
1349     }
1350
1351   /* Use vec_extract patterns for extracting parts of vectors whenever
1352      available.  */
1353   if (VECTOR_MODE_P (GET_MODE (op0))
1354       && !MEM_P (op0)
1355       && optab_handler (vec_extract_optab, GET_MODE (op0)) != CODE_FOR_nothing
1356       && ((bitnum + bitsize - 1) / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
1357           == bitnum / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
1358     {
1359       struct expand_operand ops[3];
1360       enum machine_mode outermode = GET_MODE (op0);
1361       enum machine_mode innermode = GET_MODE_INNER (outermode);
1362       enum insn_code icode = optab_handler (vec_extract_optab, outermode);
1363       unsigned HOST_WIDE_INT pos = bitnum / GET_MODE_BITSIZE (innermode);
1364
1365       create_output_operand (&ops[0], target, innermode);
1366       create_input_operand (&ops[1], op0, outermode);
1367       create_integer_operand (&ops[2], pos);
1368       if (maybe_expand_insn (icode, 3, ops))
1369         {
1370           target = ops[0].value;
1371           if (GET_MODE (target) != mode)
1372             return gen_lowpart (tmode, target);
1373           return target;
1374         }
1375     }
1376
1377   /* Make sure we are playing with integral modes.  Pun with subregs
1378      if we aren't.  */
1379   {
1380     enum machine_mode imode = int_mode_for_mode (GET_MODE (op0));
1381     if (imode != GET_MODE (op0))
1382       {
1383         if (MEM_P (op0))
1384           op0 = adjust_bitfield_address_size (op0, imode, 0, MEM_SIZE (op0));
1385         else if (imode != BLKmode)
1386           {
1387             op0 = gen_lowpart (imode, op0);
1388
1389             /* If we got a SUBREG, force it into a register since we
1390                aren't going to be able to do another SUBREG on it.  */
1391             if (GET_CODE (op0) == SUBREG)
1392               op0 = force_reg (imode, op0);
1393           }
1394         else if (REG_P (op0))
1395           {
1396             rtx reg, subreg;
1397             imode = smallest_mode_for_size (GET_MODE_BITSIZE (GET_MODE (op0)),
1398                                             MODE_INT);
1399             reg = gen_reg_rtx (imode);
1400             subreg = gen_lowpart_SUBREG (GET_MODE (op0), reg);
1401             emit_move_insn (subreg, op0);
1402             op0 = reg;
1403             bitnum += SUBREG_BYTE (subreg) * BITS_PER_UNIT;
1404           }
1405         else
1406           {
1407             HOST_WIDE_INT size = GET_MODE_SIZE (GET_MODE (op0));
1408             rtx mem = assign_stack_temp (GET_MODE (op0), size);
1409             emit_move_insn (mem, op0);
1410             op0 = adjust_bitfield_address_size (mem, BLKmode, 0, size);
1411           }
1412       }
1413   }
1414
1415   /* ??? We currently assume TARGET is at least as big as BITSIZE.
1416      If that's wrong, the solution is to test for it and set TARGET to 0
1417      if needed.  */
1418
1419   /* If the bitfield is volatile, we need to make sure the access
1420      remains on a type-aligned boundary.  */
1421   if (GET_CODE (op0) == MEM
1422       && MEM_VOLATILE_P (op0)
1423       && GET_MODE_BITSIZE (GET_MODE (op0)) > 0
1424       && flag_strict_volatile_bitfields > 0)
1425     goto no_subreg_mode_swap;
1426
1427   /* Only scalar integer modes can be converted via subregs.  There is an
1428      additional problem for FP modes here in that they can have a precision
1429      which is different from the size.  mode_for_size uses precision, but
1430      we want a mode based on the size, so we must avoid calling it for FP
1431      modes.  */
1432   mode1 = mode;
1433   if (SCALAR_INT_MODE_P (tmode))
1434     {
1435       enum machine_mode try_mode = mode_for_size (bitsize,
1436                                                   GET_MODE_CLASS (tmode), 0);
1437       if (try_mode != BLKmode)
1438         mode1 = try_mode;
1439     }
1440   gcc_assert (mode1 != BLKmode);
1441
1442   /* Extraction of a full MODE1 value can be done with a subreg as long
1443      as the least significant bit of the value is the least significant
1444      bit of either OP0 or a word of OP0.  */
1445   if (!MEM_P (op0)
1446       && lowpart_bit_field_p (bitnum, bitsize, GET_MODE (op0))
1447       && bitsize == GET_MODE_BITSIZE (mode1)
1448       && TRULY_NOOP_TRUNCATION_MODES_P (mode1, GET_MODE (op0)))
1449     {
1450       rtx sub = simplify_gen_subreg (mode1, op0, GET_MODE (op0),
1451                                      bitnum / BITS_PER_UNIT);
1452       if (sub)
1453         return convert_extracted_bit_field (sub, mode, tmode, unsignedp);
1454     }
1455
1456   /* Extraction of a full MODE1 value can be done with a load as long as
1457      the field is on a byte boundary and is sufficiently aligned.  */
1458   if (MEM_P (op0)
1459       && bitnum % BITS_PER_UNIT == 0
1460       && bitsize == GET_MODE_BITSIZE (mode1)
1461       && (!SLOW_UNALIGNED_ACCESS (mode1, MEM_ALIGN (op0))
1462           || (bitnum % bitsize == 0
1463               && MEM_ALIGN (op0) % bitsize == 0)))
1464     {
1465       op0 = adjust_bitfield_address (op0, mode1, bitnum / BITS_PER_UNIT);
1466       return convert_extracted_bit_field (op0, mode, tmode, unsignedp);
1467     }
1468
1469  no_subreg_mode_swap:
1470
1471   /* Handle fields bigger than a word.  */
1472
1473   if (bitsize > BITS_PER_WORD)
1474     {
1475       /* Here we transfer the words of the field
1476          in the order least significant first.
1477          This is because the most significant word is the one which may
1478          be less than full.  */
1479
1480       unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
1481       unsigned int i;
1482       rtx last;
1483
1484       if (target == 0 || !REG_P (target) || !valid_multiword_target_p (target))
1485         target = gen_reg_rtx (mode);
1486
1487       /* Indicate for flow that the entire target reg is being set.  */
1488       emit_clobber (target);
1489
1490       last = get_last_insn ();
1491       for (i = 0; i < nwords; i++)
1492         {
1493           /* If I is 0, use the low-order word in both field and target;
1494              if I is 1, use the next to lowest word; and so on.  */
1495           /* Word number in TARGET to use.  */
1496           unsigned int wordnum
1497             = (WORDS_BIG_ENDIAN
1498                ? GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD - i - 1
1499                : i);
1500           /* Offset from start of field in OP0.  */
1501           unsigned int bit_offset = (WORDS_BIG_ENDIAN
1502                                      ? MAX (0, ((int) bitsize - ((int) i + 1)
1503                                                 * (int) BITS_PER_WORD))
1504                                      : (int) i * BITS_PER_WORD);
1505           rtx target_part = operand_subword (target, wordnum, 1, VOIDmode);
1506           rtx result_part
1507             = extract_bit_field_1 (op0, MIN (BITS_PER_WORD,
1508                                              bitsize - i * BITS_PER_WORD),
1509                                    bitnum + bit_offset, 1, false, target_part,
1510                                    mode, word_mode, fallback_p);
1511
1512           gcc_assert (target_part);
1513           if (!result_part)
1514             {
1515               delete_insns_since (last);
1516               return NULL;
1517             }
1518
1519           if (result_part != target_part)
1520             emit_move_insn (target_part, result_part);
1521         }
1522
1523       if (unsignedp)
1524         {
1525           /* Unless we've filled TARGET, the upper regs in a multi-reg value
1526              need to be zero'd out.  */
1527           if (GET_MODE_SIZE (GET_MODE (target)) > nwords * UNITS_PER_WORD)
1528             {
1529               unsigned int i, total_words;
1530
1531               total_words = GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD;
1532               for (i = nwords; i < total_words; i++)
1533                 emit_move_insn
1534                   (operand_subword (target,
1535                                     WORDS_BIG_ENDIAN ? total_words - i - 1 : i,
1536                                     1, VOIDmode),
1537                    const0_rtx);
1538             }
1539           return target;
1540         }
1541
1542       /* Signed bit field: sign-extend with two arithmetic shifts.  */
1543       target = expand_shift (LSHIFT_EXPR, mode, target,
1544                              GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1545       return expand_shift (RSHIFT_EXPR, mode, target,
1546                            GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1547     }
1548
1549   /* If OP0 is a multi-word register, narrow it to the affected word.
1550      If the region spans two words, defer to extract_split_bit_field.  */
1551   if (!MEM_P (op0) && GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
1552     {
1553       op0 = simplify_gen_subreg (word_mode, op0, GET_MODE (op0),
1554                                  bitnum / BITS_PER_WORD * UNITS_PER_WORD);
1555       bitnum %= BITS_PER_WORD;
1556       if (bitnum + bitsize > BITS_PER_WORD)
1557         {
1558           if (!fallback_p)
1559             return NULL_RTX;
1560           target = extract_split_bit_field (op0, bitsize, bitnum, unsignedp);
1561           return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1562         }
1563     }
1564
1565   /* From here on we know the desired field is smaller than a word.
1566      If OP0 is a register, it too fits within a word.  */
1567   enum extraction_pattern pattern = unsignedp ? EP_extzv : EP_extv;
1568   extraction_insn extv;
1569   if (!MEM_P (op0)
1570       && get_best_reg_extraction_insn (&extv, pattern, bitnum + bitsize,
1571                                        tmode))
1572     {
1573       rtx result = extract_bit_field_using_extv (&extv, op0, bitsize, bitnum,
1574                                                  unsignedp, target, mode,
1575                                                  tmode);
1576       if (result)
1577         return result;
1578     }
1579
1580   /* If OP0 is a memory, try copying it to a register and seeing if a
1581      cheap register alternative is available.  */
1582   if (MEM_P (op0))
1583     {
1584       /* Do not use extv/extzv for volatile bitfields when
1585          -fstrict-volatile-bitfields is in effect.  */
1586       if (!(MEM_VOLATILE_P (op0) && flag_strict_volatile_bitfields > 0)
1587           && get_best_mem_extraction_insn (&extv, pattern, bitsize, bitnum,
1588                                            tmode))
1589         {
1590           rtx result = extract_bit_field_using_extv (&extv, op0, bitsize,
1591                                                      bitnum, unsignedp,
1592                                                      target, mode,
1593                                                      tmode);
1594           if (result)
1595             return result;
1596         }
1597
1598       rtx last = get_last_insn ();
1599
1600       /* Try loading part of OP0 into a register and extracting the
1601          bitfield from that.  */
1602       unsigned HOST_WIDE_INT bitpos;
1603       rtx xop0 = adjust_bit_field_mem_for_reg (pattern, op0, bitsize, bitnum,
1604                                                0, 0, tmode, &bitpos);
1605       if (xop0)
1606         {
1607           xop0 = copy_to_reg (xop0);
1608           rtx result = extract_bit_field_1 (xop0, bitsize, bitpos,
1609                                             unsignedp, packedp, target,
1610                                             mode, tmode, false);
1611           if (result)
1612             return result;
1613           delete_insns_since (last);
1614         }
1615     }
1616
1617   if (!fallback_p)
1618     return NULL;
1619
1620   /* Find a correspondingly-sized integer field, so we can apply
1621      shifts and masks to it.  */
1622   int_mode = int_mode_for_mode (tmode);
1623   if (int_mode == BLKmode)
1624     int_mode = int_mode_for_mode (mode);
1625   /* Should probably push op0 out to memory and then do a load.  */
1626   gcc_assert (int_mode != BLKmode);
1627
1628   target = extract_fixed_bit_field (int_mode, op0, bitsize, bitnum,
1629                                     target, unsignedp, packedp);
1630   return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1631 }
1632
1633 /* Generate code to extract a byte-field from STR_RTX
1634    containing BITSIZE bits, starting at BITNUM,
1635    and put it in TARGET if possible (if TARGET is nonzero).
1636    Regardless of TARGET, we return the rtx for where the value is placed.
1637
1638    STR_RTX is the structure containing the byte (a REG or MEM).
1639    UNSIGNEDP is nonzero if this is an unsigned bit field.
1640    PACKEDP is nonzero if the field has the packed attribute.
1641    MODE is the natural mode of the field value once extracted.
1642    TMODE is the mode the caller would like the value to have;
1643    but the value may be returned with type MODE instead.
1644
1645    If a TARGET is specified and we can store in it at no extra cost,
1646    we do so, and return TARGET.
1647    Otherwise, we return a REG of mode TMODE or MODE, with TMODE preferred
1648    if they are equally easy.  */
1649
1650 rtx
1651 extract_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1652                    unsigned HOST_WIDE_INT bitnum, int unsignedp, bool packedp,
1653                    rtx target, enum machine_mode mode, enum machine_mode tmode)
1654 {
1655   return extract_bit_field_1 (str_rtx, bitsize, bitnum, unsignedp, packedp,
1656                               target, mode, tmode, true);
1657 }
1658 \f
1659 /* Use shifts and boolean operations to extract a field of BITSIZE bits
1660    from bit BITNUM of OP0.
1661
1662    UNSIGNEDP is nonzero for an unsigned bit field (don't sign-extend value).
1663    PACKEDP is true if the field has the packed attribute.
1664
1665    If TARGET is nonzero, attempts to store the value there
1666    and return TARGET, but this is not guaranteed.
1667    If TARGET is not used, create a pseudo-reg of mode TMODE for the value.  */
1668
1669 static rtx
1670 extract_fixed_bit_field (enum machine_mode tmode, rtx op0,
1671                          unsigned HOST_WIDE_INT bitsize,
1672                          unsigned HOST_WIDE_INT bitnum, rtx target,
1673                          int unsignedp, bool packedp)
1674 {
1675   enum machine_mode mode;
1676
1677   if (MEM_P (op0))
1678     {
1679       /* Get the proper mode to use for this field.  We want a mode that
1680          includes the entire field.  If such a mode would be larger than
1681          a word, we won't be doing the extraction the normal way.  */
1682
1683       if (MEM_VOLATILE_P (op0)
1684           && flag_strict_volatile_bitfields > 0)
1685         {
1686           if (GET_MODE_BITSIZE (GET_MODE (op0)) > 0)
1687             mode = GET_MODE (op0);
1688           else if (target && GET_MODE_BITSIZE (GET_MODE (target)) > 0)
1689             mode = GET_MODE (target);
1690           else
1691             mode = tmode;
1692         }
1693       else
1694         mode = get_best_mode (bitsize, bitnum, 0, 0,
1695                               MEM_ALIGN (op0), word_mode, MEM_VOLATILE_P (op0));
1696
1697       if (mode == VOIDmode)
1698         /* The only way this should occur is if the field spans word
1699            boundaries.  */
1700         return extract_split_bit_field (op0, bitsize, bitnum, unsignedp);
1701
1702       unsigned int total_bits = GET_MODE_BITSIZE (mode);
1703       HOST_WIDE_INT bit_offset = bitnum - bitnum % total_bits;
1704
1705       /* If we're accessing a volatile MEM, we can't apply BIT_OFFSET
1706          if it results in a multi-word access where we otherwise wouldn't
1707          have one.  So, check for that case here.  */
1708       if (MEM_P (op0)
1709           && MEM_VOLATILE_P (op0)
1710           && flag_strict_volatile_bitfields > 0
1711           && bitnum % BITS_PER_UNIT + bitsize <= total_bits
1712           && bitnum % GET_MODE_BITSIZE (mode) + bitsize > total_bits)
1713         {
1714           if (STRICT_ALIGNMENT)
1715             {
1716               static bool informed_about_misalignment = false;
1717
1718               if (packedp)
1719                 {
1720                   if (bitsize == total_bits)
1721                     warning_at (input_location, OPT_fstrict_volatile_bitfields,
1722                                 "multiple accesses to volatile structure"
1723                                 " member because of packed attribute");
1724                   else
1725                     warning_at (input_location, OPT_fstrict_volatile_bitfields,
1726                                 "multiple accesses to volatile structure"
1727                                 " bitfield because of packed attribute");
1728
1729                   return extract_split_bit_field (op0, bitsize, bitnum,
1730                                                   unsignedp);
1731                 }
1732
1733               if (bitsize == total_bits)
1734                 warning_at (input_location, OPT_fstrict_volatile_bitfields,
1735                             "mis-aligned access used for structure member");
1736               else
1737                 warning_at (input_location, OPT_fstrict_volatile_bitfields,
1738                             "mis-aligned access used for structure bitfield");
1739
1740               if (! informed_about_misalignment)
1741                 {
1742                   informed_about_misalignment = true;
1743                   inform (input_location,
1744                           "when a volatile object spans multiple type-sized"
1745                           " locations, the compiler must choose between using"
1746                           " a single mis-aligned access to preserve the"
1747                           " volatility, or using multiple aligned accesses"
1748                           " to avoid runtime faults; this code may fail at"
1749                           " runtime if the hardware does not allow this"
1750                           " access");
1751                 }
1752             }
1753           bit_offset = bitnum - bitnum % BITS_PER_UNIT;
1754         }
1755       op0 = adjust_bitfield_address (op0, mode, bit_offset / BITS_PER_UNIT);
1756       bitnum -= bit_offset;
1757     }
1758
1759   mode = GET_MODE (op0);
1760   gcc_assert (SCALAR_INT_MODE_P (mode));
1761
1762   /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode)
1763      for invalid input, such as extract equivalent of f5 from
1764      gcc.dg/pr48335-2.c.  */
1765
1766   if (BYTES_BIG_ENDIAN)
1767     /* BITNUM is the distance between our msb and that of OP0.
1768        Convert it to the distance from the lsb.  */
1769     bitnum = GET_MODE_BITSIZE (mode) - bitsize - bitnum;
1770
1771   /* Now BITNUM is always the distance between the field's lsb and that of OP0.
1772      We have reduced the big-endian case to the little-endian case.  */
1773
1774   if (unsignedp)
1775     {
1776       if (bitnum)
1777         {
1778           /* If the field does not already start at the lsb,
1779              shift it so it does.  */
1780           /* Maybe propagate the target for the shift.  */
1781           rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
1782           if (tmode != mode)
1783             subtarget = 0;
1784           op0 = expand_shift (RSHIFT_EXPR, mode, op0, bitnum, subtarget, 1);
1785         }
1786       /* Convert the value to the desired mode.  */
1787       if (mode != tmode)
1788         op0 = convert_to_mode (tmode, op0, 1);
1789
1790       /* Unless the msb of the field used to be the msb when we shifted,
1791          mask out the upper bits.  */
1792
1793       if (GET_MODE_BITSIZE (mode) != bitnum + bitsize)
1794         return expand_binop (GET_MODE (op0), and_optab, op0,
1795                              mask_rtx (GET_MODE (op0), 0, bitsize, 0),
1796                              target, 1, OPTAB_LIB_WIDEN);
1797       return op0;
1798     }
1799
1800   /* To extract a signed bit-field, first shift its msb to the msb of the word,
1801      then arithmetic-shift its lsb to the lsb of the word.  */
1802   op0 = force_reg (mode, op0);
1803
1804   /* Find the narrowest integer mode that contains the field.  */
1805
1806   for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT); mode != VOIDmode;
1807        mode = GET_MODE_WIDER_MODE (mode))
1808     if (GET_MODE_BITSIZE (mode) >= bitsize + bitnum)
1809       {
1810         op0 = convert_to_mode (mode, op0, 0);
1811         break;
1812       }
1813
1814   if (mode != tmode)
1815     target = 0;
1816
1817   if (GET_MODE_BITSIZE (mode) != (bitsize + bitnum))
1818     {
1819       int amount = GET_MODE_BITSIZE (mode) - (bitsize + bitnum);
1820       /* Maybe propagate the target for the shift.  */
1821       rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
1822       op0 = expand_shift (LSHIFT_EXPR, mode, op0, amount, subtarget, 1);
1823     }
1824
1825   return expand_shift (RSHIFT_EXPR, mode, op0,
1826                        GET_MODE_BITSIZE (mode) - bitsize, target, 0);
1827 }
1828 \f
1829 /* Return a constant integer (CONST_INT or CONST_DOUBLE) mask value
1830    of mode MODE with BITSIZE ones followed by BITPOS zeros, or the
1831    complement of that if COMPLEMENT.  The mask is truncated if
1832    necessary to the width of mode MODE.  The mask is zero-extended if
1833    BITSIZE+BITPOS is too small for MODE.  */
1834
1835 static rtx
1836 mask_rtx (enum machine_mode mode, int bitpos, int bitsize, int complement)
1837 {
1838   double_int mask;
1839
1840   mask = double_int::mask (bitsize);
1841   mask = mask.llshift (bitpos, HOST_BITS_PER_DOUBLE_INT);
1842
1843   if (complement)
1844     mask = ~mask;
1845
1846   return immed_double_int_const (mask, mode);
1847 }
1848
1849 /* Return a constant integer (CONST_INT or CONST_DOUBLE) rtx with the value
1850    VALUE truncated to BITSIZE bits and then shifted left BITPOS bits.  */
1851
1852 static rtx
1853 lshift_value (enum machine_mode mode, rtx value, int bitpos, int bitsize)
1854 {
1855   double_int val;
1856
1857   val = double_int::from_uhwi (INTVAL (value)).zext (bitsize);
1858   val = val.llshift (bitpos, HOST_BITS_PER_DOUBLE_INT);
1859
1860   return immed_double_int_const (val, mode);
1861 }
1862 \f
1863 /* Extract a bit field that is split across two words
1864    and return an RTX for the result.
1865
1866    OP0 is the REG, SUBREG or MEM rtx for the first of the two words.
1867    BITSIZE is the field width; BITPOS, position of its first bit, in the word.
1868    UNSIGNEDP is 1 if should zero-extend the contents; else sign-extend.  */
1869
1870 static rtx
1871 extract_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1872                          unsigned HOST_WIDE_INT bitpos, int unsignedp)
1873 {
1874   unsigned int unit;
1875   unsigned int bitsdone = 0;
1876   rtx result = NULL_RTX;
1877   int first = 1;
1878
1879   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1880      much at a time.  */
1881   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
1882     unit = BITS_PER_WORD;
1883   else
1884     unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1885
1886   while (bitsdone < bitsize)
1887     {
1888       unsigned HOST_WIDE_INT thissize;
1889       rtx part, word;
1890       unsigned HOST_WIDE_INT thispos;
1891       unsigned HOST_WIDE_INT offset;
1892
1893       offset = (bitpos + bitsdone) / unit;
1894       thispos = (bitpos + bitsdone) % unit;
1895
1896       /* THISSIZE must not overrun a word boundary.  Otherwise,
1897          extract_fixed_bit_field will call us again, and we will mutually
1898          recurse forever.  */
1899       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1900       thissize = MIN (thissize, unit - thispos);
1901
1902       /* If OP0 is a register, then handle OFFSET here.
1903
1904          When handling multiword bitfields, extract_bit_field may pass
1905          down a word_mode SUBREG of a larger REG for a bitfield that actually
1906          crosses a word boundary.  Thus, for a SUBREG, we must find
1907          the current word starting from the base register.  */
1908       if (GET_CODE (op0) == SUBREG)
1909         {
1910           int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset;
1911           word = operand_subword_force (SUBREG_REG (op0), word_offset,
1912                                         GET_MODE (SUBREG_REG (op0)));
1913           offset = 0;
1914         }
1915       else if (REG_P (op0))
1916         {
1917           word = operand_subword_force (op0, offset, GET_MODE (op0));
1918           offset = 0;
1919         }
1920       else
1921         word = op0;
1922
1923       /* Extract the parts in bit-counting order,
1924          whose meaning is determined by BYTES_PER_UNIT.
1925          OFFSET is in UNITs, and UNIT is in bits.  */
1926       part = extract_fixed_bit_field (word_mode, word, thissize,
1927                                       offset * unit + thispos, 0, 1, false);
1928       bitsdone += thissize;
1929
1930       /* Shift this part into place for the result.  */
1931       if (BYTES_BIG_ENDIAN)
1932         {
1933           if (bitsize != bitsdone)
1934             part = expand_shift (LSHIFT_EXPR, word_mode, part,
1935                                  bitsize - bitsdone, 0, 1);
1936         }
1937       else
1938         {
1939           if (bitsdone != thissize)
1940             part = expand_shift (LSHIFT_EXPR, word_mode, part,
1941                                  bitsdone - thissize, 0, 1);
1942         }
1943
1944       if (first)
1945         result = part;
1946       else
1947         /* Combine the parts with bitwise or.  This works
1948            because we extracted each part as an unsigned bit field.  */
1949         result = expand_binop (word_mode, ior_optab, part, result, NULL_RTX, 1,
1950                                OPTAB_LIB_WIDEN);
1951
1952       first = 0;
1953     }
1954
1955   /* Unsigned bit field: we are done.  */
1956   if (unsignedp)
1957     return result;
1958   /* Signed bit field: sign-extend with two arithmetic shifts.  */
1959   result = expand_shift (LSHIFT_EXPR, word_mode, result,
1960                          BITS_PER_WORD - bitsize, NULL_RTX, 0);
1961   return expand_shift (RSHIFT_EXPR, word_mode, result,
1962                        BITS_PER_WORD - bitsize, NULL_RTX, 0);
1963 }
1964 \f
1965 /* Try to read the low bits of SRC as an rvalue of mode MODE, preserving
1966    the bit pattern.  SRC_MODE is the mode of SRC; if this is smaller than
1967    MODE, fill the upper bits with zeros.  Fail if the layout of either
1968    mode is unknown (as for CC modes) or if the extraction would involve
1969    unprofitable mode punning.  Return the value on success, otherwise
1970    return null.
1971
1972    This is different from gen_lowpart* in these respects:
1973
1974      - the returned value must always be considered an rvalue
1975
1976      - when MODE is wider than SRC_MODE, the extraction involves
1977        a zero extension
1978
1979      - when MODE is smaller than SRC_MODE, the extraction involves
1980        a truncation (and is thus subject to TRULY_NOOP_TRUNCATION).
1981
1982    In other words, this routine performs a computation, whereas the
1983    gen_lowpart* routines are conceptually lvalue or rvalue subreg
1984    operations.  */
1985
1986 rtx
1987 extract_low_bits (enum machine_mode mode, enum machine_mode src_mode, rtx src)
1988 {
1989   enum machine_mode int_mode, src_int_mode;
1990
1991   if (mode == src_mode)
1992     return src;
1993
1994   if (CONSTANT_P (src))
1995     {
1996       /* simplify_gen_subreg can't be used here, as if simplify_subreg
1997          fails, it will happily create (subreg (symbol_ref)) or similar
1998          invalid SUBREGs.  */
1999       unsigned int byte = subreg_lowpart_offset (mode, src_mode);
2000       rtx ret = simplify_subreg (mode, src, src_mode, byte);
2001       if (ret)
2002         return ret;
2003
2004       if (GET_MODE (src) == VOIDmode
2005           || !validate_subreg (mode, src_mode, src, byte))
2006         return NULL_RTX;
2007
2008       src = force_reg (GET_MODE (src), src);
2009       return gen_rtx_SUBREG (mode, src, byte);
2010     }
2011
2012   if (GET_MODE_CLASS (mode) == MODE_CC || GET_MODE_CLASS (src_mode) == MODE_CC)
2013     return NULL_RTX;
2014
2015   if (GET_MODE_BITSIZE (mode) == GET_MODE_BITSIZE (src_mode)
2016       && MODES_TIEABLE_P (mode, src_mode))
2017     {
2018       rtx x = gen_lowpart_common (mode, src);
2019       if (x)
2020         return x;
2021     }
2022
2023   src_int_mode = int_mode_for_mode (src_mode);
2024   int_mode = int_mode_for_mode (mode);
2025   if (src_int_mode == BLKmode || int_mode == BLKmode)
2026     return NULL_RTX;
2027
2028   if (!MODES_TIEABLE_P (src_int_mode, src_mode))
2029     return NULL_RTX;
2030   if (!MODES_TIEABLE_P (int_mode, mode))
2031     return NULL_RTX;
2032
2033   src = gen_lowpart (src_int_mode, src);
2034   src = convert_modes (int_mode, src_int_mode, src, true);
2035   src = gen_lowpart (mode, src);
2036   return src;
2037 }
2038 \f
2039 /* Add INC into TARGET.  */
2040
2041 void
2042 expand_inc (rtx target, rtx inc)
2043 {
2044   rtx value = expand_binop (GET_MODE (target), add_optab,
2045                             target, inc,
2046                             target, 0, OPTAB_LIB_WIDEN);
2047   if (value != target)
2048     emit_move_insn (target, value);
2049 }
2050
2051 /* Subtract DEC from TARGET.  */
2052
2053 void
2054 expand_dec (rtx target, rtx dec)
2055 {
2056   rtx value = expand_binop (GET_MODE (target), sub_optab,
2057                             target, dec,
2058                             target, 0, OPTAB_LIB_WIDEN);
2059   if (value != target)
2060     emit_move_insn (target, value);
2061 }
2062 \f
2063 /* Output a shift instruction for expression code CODE,
2064    with SHIFTED being the rtx for the value to shift,
2065    and AMOUNT the rtx for the amount to shift by.
2066    Store the result in the rtx TARGET, if that is convenient.
2067    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2068    Return the rtx for where the value is.  */
2069
2070 static rtx
2071 expand_shift_1 (enum tree_code code, enum machine_mode mode, rtx shifted,
2072                 rtx amount, rtx target, int unsignedp)
2073 {
2074   rtx op1, temp = 0;
2075   int left = (code == LSHIFT_EXPR || code == LROTATE_EXPR);
2076   int rotate = (code == LROTATE_EXPR || code == RROTATE_EXPR);
2077   optab lshift_optab = ashl_optab;
2078   optab rshift_arith_optab = ashr_optab;
2079   optab rshift_uns_optab = lshr_optab;
2080   optab lrotate_optab = rotl_optab;
2081   optab rrotate_optab = rotr_optab;
2082   enum machine_mode op1_mode;
2083   int attempt;
2084   bool speed = optimize_insn_for_speed_p ();
2085
2086   op1 = amount;
2087   op1_mode = GET_MODE (op1);
2088
2089   /* Determine whether the shift/rotate amount is a vector, or scalar.  If the
2090      shift amount is a vector, use the vector/vector shift patterns.  */
2091   if (VECTOR_MODE_P (mode) && VECTOR_MODE_P (op1_mode))
2092     {
2093       lshift_optab = vashl_optab;
2094       rshift_arith_optab = vashr_optab;
2095       rshift_uns_optab = vlshr_optab;
2096       lrotate_optab = vrotl_optab;
2097       rrotate_optab = vrotr_optab;
2098     }
2099
2100   /* Previously detected shift-counts computed by NEGATE_EXPR
2101      and shifted in the other direction; but that does not work
2102      on all machines.  */
2103
2104   if (SHIFT_COUNT_TRUNCATED)
2105     {
2106       if (CONST_INT_P (op1)
2107           && ((unsigned HOST_WIDE_INT) INTVAL (op1) >=
2108               (unsigned HOST_WIDE_INT) GET_MODE_BITSIZE (mode)))
2109         op1 = GEN_INT ((unsigned HOST_WIDE_INT) INTVAL (op1)
2110                        % GET_MODE_BITSIZE (mode));
2111       else if (GET_CODE (op1) == SUBREG
2112                && subreg_lowpart_p (op1)
2113                && INTEGRAL_MODE_P (GET_MODE (SUBREG_REG (op1)))
2114                && INTEGRAL_MODE_P (GET_MODE (op1)))
2115         op1 = SUBREG_REG (op1);
2116     }
2117
2118   if (op1 == const0_rtx)
2119     return shifted;
2120
2121   /* Check whether its cheaper to implement a left shift by a constant
2122      bit count by a sequence of additions.  */
2123   if (code == LSHIFT_EXPR
2124       && CONST_INT_P (op1)
2125       && INTVAL (op1) > 0
2126       && INTVAL (op1) < GET_MODE_PRECISION (mode)
2127       && INTVAL (op1) < MAX_BITS_PER_WORD
2128       && (shift_cost (speed, mode, INTVAL (op1))
2129           > INTVAL (op1) * add_cost (speed, mode))
2130       && shift_cost (speed, mode, INTVAL (op1)) != MAX_COST)
2131     {
2132       int i;
2133       for (i = 0; i < INTVAL (op1); i++)
2134         {
2135           temp = force_reg (mode, shifted);
2136           shifted = expand_binop (mode, add_optab, temp, temp, NULL_RTX,
2137                                   unsignedp, OPTAB_LIB_WIDEN);
2138         }
2139       return shifted;
2140     }
2141
2142   for (attempt = 0; temp == 0 && attempt < 3; attempt++)
2143     {
2144       enum optab_methods methods;
2145
2146       if (attempt == 0)
2147         methods = OPTAB_DIRECT;
2148       else if (attempt == 1)
2149         methods = OPTAB_WIDEN;
2150       else
2151         methods = OPTAB_LIB_WIDEN;
2152
2153       if (rotate)
2154         {
2155           /* Widening does not work for rotation.  */
2156           if (methods == OPTAB_WIDEN)
2157             continue;
2158           else if (methods == OPTAB_LIB_WIDEN)
2159             {
2160               /* If we have been unable to open-code this by a rotation,
2161                  do it as the IOR of two shifts.  I.e., to rotate A
2162                  by N bits, compute (A << N) | ((unsigned) A >> (C - N))
2163                  where C is the bitsize of A.
2164
2165                  It is theoretically possible that the target machine might
2166                  not be able to perform either shift and hence we would
2167                  be making two libcalls rather than just the one for the
2168                  shift (similarly if IOR could not be done).  We will allow
2169                  this extremely unlikely lossage to avoid complicating the
2170                  code below.  */
2171
2172               rtx subtarget = target == shifted ? 0 : target;
2173               rtx new_amount, other_amount;
2174               rtx temp1;
2175
2176               new_amount = op1;
2177               if (CONST_INT_P (op1))
2178                 other_amount = GEN_INT (GET_MODE_BITSIZE (mode)
2179                                         - INTVAL (op1));
2180               else
2181                 other_amount
2182                   = simplify_gen_binary (MINUS, GET_MODE (op1),
2183                                          GEN_INT (GET_MODE_PRECISION (mode)),
2184                                          op1);
2185
2186               shifted = force_reg (mode, shifted);
2187
2188               temp = expand_shift_1 (left ? LSHIFT_EXPR : RSHIFT_EXPR,
2189                                      mode, shifted, new_amount, 0, 1);
2190               temp1 = expand_shift_1 (left ? RSHIFT_EXPR : LSHIFT_EXPR,
2191                                       mode, shifted, other_amount,
2192                                       subtarget, 1);
2193               return expand_binop (mode, ior_optab, temp, temp1, target,
2194                                    unsignedp, methods);
2195             }
2196
2197           temp = expand_binop (mode,
2198                                left ? lrotate_optab : rrotate_optab,
2199                                shifted, op1, target, unsignedp, methods);
2200         }
2201       else if (unsignedp)
2202         temp = expand_binop (mode,
2203                              left ? lshift_optab : rshift_uns_optab,
2204                              shifted, op1, target, unsignedp, methods);
2205
2206       /* Do arithmetic shifts.
2207          Also, if we are going to widen the operand, we can just as well
2208          use an arithmetic right-shift instead of a logical one.  */
2209       if (temp == 0 && ! rotate
2210           && (! unsignedp || (! left && methods == OPTAB_WIDEN)))
2211         {
2212           enum optab_methods methods1 = methods;
2213
2214           /* If trying to widen a log shift to an arithmetic shift,
2215              don't accept an arithmetic shift of the same size.  */
2216           if (unsignedp)
2217             methods1 = OPTAB_MUST_WIDEN;
2218
2219           /* Arithmetic shift */
2220
2221           temp = expand_binop (mode,
2222                                left ? lshift_optab : rshift_arith_optab,
2223                                shifted, op1, target, unsignedp, methods1);
2224         }
2225
2226       /* We used to try extzv here for logical right shifts, but that was
2227          only useful for one machine, the VAX, and caused poor code
2228          generation there for lshrdi3, so the code was deleted and a
2229          define_expand for lshrsi3 was added to vax.md.  */
2230     }
2231
2232   gcc_assert (temp);
2233   return temp;
2234 }
2235
2236 /* Output a shift instruction for expression code CODE,
2237    with SHIFTED being the rtx for the value to shift,
2238    and AMOUNT the amount to shift by.
2239    Store the result in the rtx TARGET, if that is convenient.
2240    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2241    Return the rtx for where the value is.  */
2242
2243 rtx
2244 expand_shift (enum tree_code code, enum machine_mode mode, rtx shifted,
2245               int amount, rtx target, int unsignedp)
2246 {
2247   return expand_shift_1 (code, mode,
2248                          shifted, GEN_INT (amount), target, unsignedp);
2249 }
2250
2251 /* Output a shift instruction for expression code CODE,
2252    with SHIFTED being the rtx for the value to shift,
2253    and AMOUNT the tree for the amount to shift by.
2254    Store the result in the rtx TARGET, if that is convenient.
2255    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2256    Return the rtx for where the value is.  */
2257
2258 rtx
2259 expand_variable_shift (enum tree_code code, enum machine_mode mode, rtx shifted,
2260                        tree amount, rtx target, int unsignedp)
2261 {
2262   return expand_shift_1 (code, mode,
2263                          shifted, expand_normal (amount), target, unsignedp);
2264 }
2265
2266 \f
2267 /* Indicates the type of fixup needed after a constant multiplication.
2268    BASIC_VARIANT means no fixup is needed, NEGATE_VARIANT means that
2269    the result should be negated, and ADD_VARIANT means that the
2270    multiplicand should be added to the result.  */
2271 enum mult_variant {basic_variant, negate_variant, add_variant};
2272
2273 static void synth_mult (struct algorithm *, unsigned HOST_WIDE_INT,
2274                         const struct mult_cost *, enum machine_mode mode);
2275 static bool choose_mult_variant (enum machine_mode, HOST_WIDE_INT,
2276                                  struct algorithm *, enum mult_variant *, int);
2277 static rtx expand_mult_const (enum machine_mode, rtx, HOST_WIDE_INT, rtx,
2278                               const struct algorithm *, enum mult_variant);
2279 static unsigned HOST_WIDE_INT invert_mod2n (unsigned HOST_WIDE_INT, int);
2280 static rtx extract_high_half (enum machine_mode, rtx);
2281 static rtx expmed_mult_highpart (enum machine_mode, rtx, rtx, rtx, int, int);
2282 static rtx expmed_mult_highpart_optab (enum machine_mode, rtx, rtx, rtx,
2283                                        int, int);
2284 /* Compute and return the best algorithm for multiplying by T.
2285    The algorithm must cost less than cost_limit
2286    If retval.cost >= COST_LIMIT, no algorithm was found and all
2287    other field of the returned struct are undefined.
2288    MODE is the machine mode of the multiplication.  */
2289
2290 static void
2291 synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
2292             const struct mult_cost *cost_limit, enum machine_mode mode)
2293 {
2294   int m;
2295   struct algorithm *alg_in, *best_alg;
2296   struct mult_cost best_cost;
2297   struct mult_cost new_limit;
2298   int op_cost, op_latency;
2299   unsigned HOST_WIDE_INT orig_t = t;
2300   unsigned HOST_WIDE_INT q;
2301   int maxm, hash_index;
2302   bool cache_hit = false;
2303   enum alg_code cache_alg = alg_zero;
2304   bool speed = optimize_insn_for_speed_p ();
2305   enum machine_mode imode;
2306   struct alg_hash_entry *entry_ptr;
2307
2308   /* Indicate that no algorithm is yet found.  If no algorithm
2309      is found, this value will be returned and indicate failure.  */
2310   alg_out->cost.cost = cost_limit->cost + 1;
2311   alg_out->cost.latency = cost_limit->latency + 1;
2312
2313   if (cost_limit->cost < 0
2314       || (cost_limit->cost == 0 && cost_limit->latency <= 0))
2315     return;
2316
2317   /* Be prepared for vector modes.  */
2318   imode = GET_MODE_INNER (mode);
2319   if (imode == VOIDmode)
2320     imode = mode;
2321
2322   maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (imode));
2323
2324   /* Restrict the bits of "t" to the multiplication's mode.  */
2325   t &= GET_MODE_MASK (imode);
2326
2327   /* t == 1 can be done in zero cost.  */
2328   if (t == 1)
2329     {
2330       alg_out->ops = 1;
2331       alg_out->cost.cost = 0;
2332       alg_out->cost.latency = 0;
2333       alg_out->op[0] = alg_m;
2334       return;
2335     }
2336
2337   /* t == 0 sometimes has a cost.  If it does and it exceeds our limit,
2338      fail now.  */
2339   if (t == 0)
2340     {
2341       if (MULT_COST_LESS (cost_limit, zero_cost (speed)))
2342         return;
2343       else
2344         {
2345           alg_out->ops = 1;
2346           alg_out->cost.cost = zero_cost (speed);
2347           alg_out->cost.latency = zero_cost (speed);
2348           alg_out->op[0] = alg_zero;
2349           return;
2350         }
2351     }
2352
2353   /* We'll be needing a couple extra algorithm structures now.  */
2354
2355   alg_in = XALLOCA (struct algorithm);
2356   best_alg = XALLOCA (struct algorithm);
2357   best_cost = *cost_limit;
2358
2359   /* Compute the hash index.  */
2360   hash_index = (t ^ (unsigned int) mode ^ (speed * 256)) % NUM_ALG_HASH_ENTRIES;
2361
2362   /* See if we already know what to do for T.  */
2363   entry_ptr = alg_hash_entry_ptr (hash_index);
2364   if (entry_ptr->t == t
2365       && entry_ptr->mode == mode
2366       && entry_ptr->mode == mode
2367       && entry_ptr->speed == speed
2368       && entry_ptr->alg != alg_unknown)
2369     {
2370       cache_alg = entry_ptr->alg;
2371
2372       if (cache_alg == alg_impossible)
2373         {
2374           /* The cache tells us that it's impossible to synthesize
2375              multiplication by T within entry_ptr->cost.  */
2376           if (!CHEAPER_MULT_COST (&entry_ptr->cost, cost_limit))
2377             /* COST_LIMIT is at least as restrictive as the one
2378                recorded in the hash table, in which case we have no
2379                hope of synthesizing a multiplication.  Just
2380                return.  */
2381             return;
2382
2383           /* If we get here, COST_LIMIT is less restrictive than the
2384              one recorded in the hash table, so we may be able to
2385              synthesize a multiplication.  Proceed as if we didn't
2386              have the cache entry.  */
2387         }
2388       else
2389         {
2390           if (CHEAPER_MULT_COST (cost_limit, &entry_ptr->cost))
2391             /* The cached algorithm shows that this multiplication
2392                requires more cost than COST_LIMIT.  Just return.  This
2393                way, we don't clobber this cache entry with
2394                alg_impossible but retain useful information.  */
2395             return;
2396
2397           cache_hit = true;
2398
2399           switch (cache_alg)
2400             {
2401             case alg_shift:
2402               goto do_alg_shift;
2403
2404             case alg_add_t_m2:
2405             case alg_sub_t_m2:
2406               goto do_alg_addsub_t_m2;
2407
2408             case alg_add_factor:
2409             case alg_sub_factor:
2410               goto do_alg_addsub_factor;
2411
2412             case alg_add_t2_m:
2413               goto do_alg_add_t2_m;
2414
2415             case alg_sub_t2_m:
2416               goto do_alg_sub_t2_m;
2417
2418             default:
2419               gcc_unreachable ();
2420             }
2421         }
2422     }
2423
2424   /* If we have a group of zero bits at the low-order part of T, try
2425      multiplying by the remaining bits and then doing a shift.  */
2426
2427   if ((t & 1) == 0)
2428     {
2429     do_alg_shift:
2430       m = floor_log2 (t & -t);  /* m = number of low zero bits */
2431       if (m < maxm)
2432         {
2433           q = t >> m;
2434           /* The function expand_shift will choose between a shift and
2435              a sequence of additions, so the observed cost is given as
2436              MIN (m * add_cost(speed, mode), shift_cost(speed, mode, m)).  */
2437           op_cost = m * add_cost (speed, mode);
2438           if (shift_cost (speed, mode, m) < op_cost)
2439             op_cost = shift_cost (speed, mode, m);
2440           new_limit.cost = best_cost.cost - op_cost;
2441           new_limit.latency = best_cost.latency - op_cost;
2442           synth_mult (alg_in, q, &new_limit, mode);
2443
2444           alg_in->cost.cost += op_cost;
2445           alg_in->cost.latency += op_cost;
2446           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2447             {
2448               struct algorithm *x;
2449               best_cost = alg_in->cost;
2450               x = alg_in, alg_in = best_alg, best_alg = x;
2451               best_alg->log[best_alg->ops] = m;
2452               best_alg->op[best_alg->ops] = alg_shift;
2453             }
2454
2455           /* See if treating ORIG_T as a signed number yields a better
2456              sequence.  Try this sequence only for a negative ORIG_T
2457              as it would be useless for a non-negative ORIG_T.  */
2458           if ((HOST_WIDE_INT) orig_t < 0)
2459             {
2460               /* Shift ORIG_T as follows because a right shift of a
2461                  negative-valued signed type is implementation
2462                  defined.  */
2463               q = ~(~orig_t >> m);
2464               /* The function expand_shift will choose between a shift
2465                  and a sequence of additions, so the observed cost is
2466                  given as MIN (m * add_cost(speed, mode),
2467                  shift_cost(speed, mode, m)).  */
2468               op_cost = m * add_cost (speed, mode);
2469               if (shift_cost (speed, mode, m) < op_cost)
2470                 op_cost = shift_cost (speed, mode, m);
2471               new_limit.cost = best_cost.cost - op_cost;
2472               new_limit.latency = best_cost.latency - op_cost;
2473               synth_mult (alg_in, q, &new_limit, mode);
2474
2475               alg_in->cost.cost += op_cost;
2476               alg_in->cost.latency += op_cost;
2477               if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2478                 {
2479                   struct algorithm *x;
2480                   best_cost = alg_in->cost;
2481                   x = alg_in, alg_in = best_alg, best_alg = x;
2482                   best_alg->log[best_alg->ops] = m;
2483                   best_alg->op[best_alg->ops] = alg_shift;
2484                 }
2485             }
2486         }
2487       if (cache_hit)
2488         goto done;
2489     }
2490
2491   /* If we have an odd number, add or subtract one.  */
2492   if ((t & 1) != 0)
2493     {
2494       unsigned HOST_WIDE_INT w;
2495
2496     do_alg_addsub_t_m2:
2497       for (w = 1; (w & t) != 0; w <<= 1)
2498         ;
2499       /* If T was -1, then W will be zero after the loop.  This is another
2500          case where T ends with ...111.  Handling this with (T + 1) and
2501          subtract 1 produces slightly better code and results in algorithm
2502          selection much faster than treating it like the ...0111 case
2503          below.  */
2504       if (w == 0
2505           || (w > 2
2506               /* Reject the case where t is 3.
2507                  Thus we prefer addition in that case.  */
2508               && t != 3))
2509         {
2510           /* T ends with ...111.  Multiply by (T + 1) and subtract 1.  */
2511
2512           op_cost = add_cost (speed, mode);
2513           new_limit.cost = best_cost.cost - op_cost;
2514           new_limit.latency = best_cost.latency - op_cost;
2515           synth_mult (alg_in, t + 1, &new_limit, mode);
2516
2517           alg_in->cost.cost += op_cost;
2518           alg_in->cost.latency += op_cost;
2519           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2520             {
2521               struct algorithm *x;
2522               best_cost = alg_in->cost;
2523               x = alg_in, alg_in = best_alg, best_alg = x;
2524               best_alg->log[best_alg->ops] = 0;
2525               best_alg->op[best_alg->ops] = alg_sub_t_m2;
2526             }
2527         }
2528       else
2529         {
2530           /* T ends with ...01 or ...011.  Multiply by (T - 1) and add 1.  */
2531
2532           op_cost = add_cost (speed, mode);
2533           new_limit.cost = best_cost.cost - op_cost;
2534           new_limit.latency = best_cost.latency - op_cost;
2535           synth_mult (alg_in, t - 1, &new_limit, mode);
2536
2537           alg_in->cost.cost += op_cost;
2538           alg_in->cost.latency += op_cost;
2539           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2540             {
2541               struct algorithm *x;
2542               best_cost = alg_in->cost;
2543               x = alg_in, alg_in = best_alg, best_alg = x;
2544               best_alg->log[best_alg->ops] = 0;
2545               best_alg->op[best_alg->ops] = alg_add_t_m2;
2546             }
2547         }
2548
2549       /* We may be able to calculate a * -7, a * -15, a * -31, etc
2550          quickly with a - a * n for some appropriate constant n.  */
2551       m = exact_log2 (-orig_t + 1);
2552       if (m >= 0 && m < maxm)
2553         {
2554           op_cost = shiftsub1_cost (speed, mode, m);
2555           new_limit.cost = best_cost.cost - op_cost;
2556           new_limit.latency = best_cost.latency - op_cost;
2557           synth_mult (alg_in, (unsigned HOST_WIDE_INT) (-orig_t + 1) >> m,
2558                       &new_limit, mode);
2559
2560           alg_in->cost.cost += op_cost;
2561           alg_in->cost.latency += op_cost;
2562           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2563             {
2564               struct algorithm *x;
2565               best_cost = alg_in->cost;
2566               x = alg_in, alg_in = best_alg, best_alg = x;
2567               best_alg->log[best_alg->ops] = m;
2568               best_alg->op[best_alg->ops] = alg_sub_t_m2;
2569             }
2570         }
2571
2572       if (cache_hit)
2573         goto done;
2574     }
2575
2576   /* Look for factors of t of the form
2577      t = q(2**m +- 1), 2 <= m <= floor(log2(t - 1)).
2578      If we find such a factor, we can multiply by t using an algorithm that
2579      multiplies by q, shift the result by m and add/subtract it to itself.
2580
2581      We search for large factors first and loop down, even if large factors
2582      are less probable than small; if we find a large factor we will find a
2583      good sequence quickly, and therefore be able to prune (by decreasing
2584      COST_LIMIT) the search.  */
2585
2586  do_alg_addsub_factor:
2587   for (m = floor_log2 (t - 1); m >= 2; m--)
2588     {
2589       unsigned HOST_WIDE_INT d;
2590
2591       d = ((unsigned HOST_WIDE_INT) 1 << m) + 1;
2592       if (t % d == 0 && t > d && m < maxm
2593           && (!cache_hit || cache_alg == alg_add_factor))
2594         {
2595           /* If the target has a cheap shift-and-add instruction use
2596              that in preference to a shift insn followed by an add insn.
2597              Assume that the shift-and-add is "atomic" with a latency
2598              equal to its cost, otherwise assume that on superscalar
2599              hardware the shift may be executed concurrently with the
2600              earlier steps in the algorithm.  */
2601           op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
2602           if (shiftadd_cost (speed, mode, m) < op_cost)
2603             {
2604               op_cost = shiftadd_cost (speed, mode, m);
2605               op_latency = op_cost;
2606             }
2607           else
2608             op_latency = add_cost (speed, mode);
2609
2610           new_limit.cost = best_cost.cost - op_cost;
2611           new_limit.latency = best_cost.latency - op_latency;
2612           synth_mult (alg_in, t / d, &new_limit, mode);
2613
2614           alg_in->cost.cost += op_cost;
2615           alg_in->cost.latency += op_latency;
2616           if (alg_in->cost.latency < op_cost)
2617             alg_in->cost.latency = op_cost;
2618           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2619             {
2620               struct algorithm *x;
2621               best_cost = alg_in->cost;
2622               x = alg_in, alg_in = best_alg, best_alg = x;
2623               best_alg->log[best_alg->ops] = m;
2624               best_alg->op[best_alg->ops] = alg_add_factor;
2625             }
2626           /* Other factors will have been taken care of in the recursion.  */
2627           break;
2628         }
2629
2630       d = ((unsigned HOST_WIDE_INT) 1 << m) - 1;
2631       if (t % d == 0 && t > d && m < maxm
2632           && (!cache_hit || cache_alg == alg_sub_factor))
2633         {
2634           /* If the target has a cheap shift-and-subtract insn use
2635              that in preference to a shift insn followed by a sub insn.
2636              Assume that the shift-and-sub is "atomic" with a latency
2637              equal to it's cost, otherwise assume that on superscalar
2638              hardware the shift may be executed concurrently with the
2639              earlier steps in the algorithm.  */
2640           op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
2641           if (shiftsub0_cost (speed, mode, m) < op_cost)
2642             {
2643               op_cost = shiftsub0_cost (speed, mode, m);
2644               op_latency = op_cost;
2645             }
2646           else
2647             op_latency = add_cost (speed, mode);
2648
2649           new_limit.cost = best_cost.cost - op_cost;
2650           new_limit.latency = best_cost.latency - op_latency;
2651           synth_mult (alg_in, t / d, &new_limit, mode);
2652
2653           alg_in->cost.cost += op_cost;
2654           alg_in->cost.latency += op_latency;
2655           if (alg_in->cost.latency < op_cost)
2656             alg_in->cost.latency = op_cost;
2657           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2658             {
2659               struct algorithm *x;
2660               best_cost = alg_in->cost;
2661               x = alg_in, alg_in = best_alg, best_alg = x;
2662               best_alg->log[best_alg->ops] = m;
2663               best_alg->op[best_alg->ops] = alg_sub_factor;
2664             }
2665           break;
2666         }
2667     }
2668   if (cache_hit)
2669     goto done;
2670
2671   /* Try shift-and-add (load effective address) instructions,
2672      i.e. do a*3, a*5, a*9.  */
2673   if ((t & 1) != 0)
2674     {
2675     do_alg_add_t2_m:
2676       q = t - 1;
2677       q = q & -q;
2678       m = exact_log2 (q);
2679       if (m >= 0 && m < maxm)
2680         {
2681           op_cost = shiftadd_cost (speed, mode, m);
2682           new_limit.cost = best_cost.cost - op_cost;
2683           new_limit.latency = best_cost.latency - op_cost;
2684           synth_mult (alg_in, (t - 1) >> m, &new_limit, mode);
2685
2686           alg_in->cost.cost += op_cost;
2687           alg_in->cost.latency += op_cost;
2688           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2689             {
2690               struct algorithm *x;
2691               best_cost = alg_in->cost;
2692               x = alg_in, alg_in = best_alg, best_alg = x;
2693               best_alg->log[best_alg->ops] = m;
2694               best_alg->op[best_alg->ops] = alg_add_t2_m;
2695             }
2696         }
2697       if (cache_hit)
2698         goto done;
2699
2700     do_alg_sub_t2_m:
2701       q = t + 1;
2702       q = q & -q;
2703       m = exact_log2 (q);
2704       if (m >= 0 && m < maxm)
2705         {
2706           op_cost = shiftsub0_cost (speed, mode, m);
2707           new_limit.cost = best_cost.cost - op_cost;
2708           new_limit.latency = best_cost.latency - op_cost;
2709           synth_mult (alg_in, (t + 1) >> m, &new_limit, mode);
2710
2711           alg_in->cost.cost += op_cost;
2712           alg_in->cost.latency += op_cost;
2713           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2714             {
2715               struct algorithm *x;
2716               best_cost = alg_in->cost;
2717               x = alg_in, alg_in = best_alg, best_alg = x;
2718               best_alg->log[best_alg->ops] = m;
2719               best_alg->op[best_alg->ops] = alg_sub_t2_m;
2720             }
2721         }
2722       if (cache_hit)
2723         goto done;
2724     }
2725
2726  done:
2727   /* If best_cost has not decreased, we have not found any algorithm.  */
2728   if (!CHEAPER_MULT_COST (&best_cost, cost_limit))
2729     {
2730       /* We failed to find an algorithm.  Record alg_impossible for
2731          this case (that is, <T, MODE, COST_LIMIT>) so that next time
2732          we are asked to find an algorithm for T within the same or
2733          lower COST_LIMIT, we can immediately return to the
2734          caller.  */
2735       entry_ptr->t = t;
2736       entry_ptr->mode = mode;
2737       entry_ptr->speed = speed;
2738       entry_ptr->alg = alg_impossible;
2739       entry_ptr->cost = *cost_limit;
2740       return;
2741     }
2742
2743   /* Cache the result.  */
2744   if (!cache_hit)
2745     {
2746       entry_ptr->t = t;
2747       entry_ptr->mode = mode;
2748       entry_ptr->speed = speed;
2749       entry_ptr->alg = best_alg->op[best_alg->ops];
2750       entry_ptr->cost.cost = best_cost.cost;
2751       entry_ptr->cost.latency = best_cost.latency;
2752     }
2753
2754   /* If we are getting a too long sequence for `struct algorithm'
2755      to record, make this search fail.  */
2756   if (best_alg->ops == MAX_BITS_PER_WORD)
2757     return;
2758
2759   /* Copy the algorithm from temporary space to the space at alg_out.
2760      We avoid using structure assignment because the majority of
2761      best_alg is normally undefined, and this is a critical function.  */
2762   alg_out->ops = best_alg->ops + 1;
2763   alg_out->cost = best_cost;
2764   memcpy (alg_out->op, best_alg->op,
2765           alg_out->ops * sizeof *alg_out->op);
2766   memcpy (alg_out->log, best_alg->log,
2767           alg_out->ops * sizeof *alg_out->log);
2768 }
2769 \f
2770 /* Find the cheapest way of multiplying a value of mode MODE by VAL.
2771    Try three variations:
2772
2773        - a shift/add sequence based on VAL itself
2774        - a shift/add sequence based on -VAL, followed by a negation
2775        - a shift/add sequence based on VAL - 1, followed by an addition.
2776
2777    Return true if the cheapest of these cost less than MULT_COST,
2778    describing the algorithm in *ALG and final fixup in *VARIANT.  */
2779
2780 static bool
2781 choose_mult_variant (enum machine_mode mode, HOST_WIDE_INT val,
2782                      struct algorithm *alg, enum mult_variant *variant,
2783                      int mult_cost)
2784 {
2785   struct algorithm alg2;
2786   struct mult_cost limit;
2787   int op_cost;
2788   bool speed = optimize_insn_for_speed_p ();
2789
2790   /* Fail quickly for impossible bounds.  */
2791   if (mult_cost < 0)
2792     return false;
2793
2794   /* Ensure that mult_cost provides a reasonable upper bound.
2795      Any constant multiplication can be performed with less
2796      than 2 * bits additions.  */
2797   op_cost = 2 * GET_MODE_UNIT_BITSIZE (mode) * add_cost (speed, mode);
2798   if (mult_cost > op_cost)
2799     mult_cost = op_cost;
2800
2801   *variant = basic_variant;
2802   limit.cost = mult_cost;
2803   limit.latency = mult_cost;
2804   synth_mult (alg, val, &limit, mode);
2805
2806   /* This works only if the inverted value actually fits in an
2807      `unsigned int' */
2808   if (HOST_BITS_PER_INT >= GET_MODE_UNIT_BITSIZE (mode))
2809     {
2810       op_cost = neg_cost(speed, mode);
2811       if (MULT_COST_LESS (&alg->cost, mult_cost))
2812         {
2813           limit.cost = alg->cost.cost - op_cost;
2814           limit.latency = alg->cost.latency - op_cost;
2815         }
2816       else
2817         {
2818           limit.cost = mult_cost - op_cost;
2819           limit.latency = mult_cost - op_cost;
2820         }
2821
2822       synth_mult (&alg2, -val, &limit, mode);
2823       alg2.cost.cost += op_cost;
2824       alg2.cost.latency += op_cost;
2825       if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
2826         *alg = alg2, *variant = negate_variant;
2827     }
2828
2829   /* This proves very useful for division-by-constant.  */
2830   op_cost = add_cost (speed, mode);
2831   if (MULT_COST_LESS (&alg->cost, mult_cost))
2832     {
2833       limit.cost = alg->cost.cost - op_cost;
2834       limit.latency = alg->cost.latency - op_cost;
2835     }
2836   else
2837     {
2838       limit.cost = mult_cost - op_cost;
2839       limit.latency = mult_cost - op_cost;
2840     }
2841
2842   synth_mult (&alg2, val - 1, &limit, mode);
2843   alg2.cost.cost += op_cost;
2844   alg2.cost.latency += op_cost;
2845   if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
2846     *alg = alg2, *variant = add_variant;
2847
2848   return MULT_COST_LESS (&alg->cost, mult_cost);
2849 }
2850
2851 /* A subroutine of expand_mult, used for constant multiplications.
2852    Multiply OP0 by VAL in mode MODE, storing the result in TARGET if
2853    convenient.  Use the shift/add sequence described by ALG and apply
2854    the final fixup specified by VARIANT.  */
2855
2856 static rtx
2857 expand_mult_const (enum machine_mode mode, rtx op0, HOST_WIDE_INT val,
2858                    rtx target, const struct algorithm *alg,
2859                    enum mult_variant variant)
2860 {
2861   HOST_WIDE_INT val_so_far;
2862   rtx insn, accum, tem;
2863   int opno;
2864   enum machine_mode nmode;
2865
2866   /* Avoid referencing memory over and over and invalid sharing
2867      on SUBREGs.  */
2868   op0 = force_reg (mode, op0);
2869
2870   /* ACCUM starts out either as OP0 or as a zero, depending on
2871      the first operation.  */
2872
2873   if (alg->op[0] == alg_zero)
2874     {
2875       accum = copy_to_mode_reg (mode, CONST0_RTX (mode));
2876       val_so_far = 0;
2877     }
2878   else if (alg->op[0] == alg_m)
2879     {
2880       accum = copy_to_mode_reg (mode, op0);
2881       val_so_far = 1;
2882     }
2883   else
2884     gcc_unreachable ();
2885
2886   for (opno = 1; opno < alg->ops; opno++)
2887     {
2888       int log = alg->log[opno];
2889       rtx shift_subtarget = optimize ? 0 : accum;
2890       rtx add_target
2891         = (opno == alg->ops - 1 && target != 0 && variant != add_variant
2892            && !optimize)
2893           ? target : 0;
2894       rtx accum_target = optimize ? 0 : accum;
2895       rtx accum_inner;
2896
2897       switch (alg->op[opno])
2898         {
2899         case alg_shift:
2900           tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
2901           /* REG_EQUAL note will be attached to the following insn.  */
2902           emit_move_insn (accum, tem);
2903           val_so_far <<= log;
2904           break;
2905
2906         case alg_add_t_m2:
2907           tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
2908           accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
2909                                  add_target ? add_target : accum_target);
2910           val_so_far += (HOST_WIDE_INT) 1 << log;
2911           break;
2912
2913         case alg_sub_t_m2:
2914           tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
2915           accum = force_operand (gen_rtx_MINUS (mode, accum, tem),
2916                                  add_target ? add_target : accum_target);
2917           val_so_far -= (HOST_WIDE_INT) 1 << log;
2918           break;
2919
2920         case alg_add_t2_m:
2921           accum = expand_shift (LSHIFT_EXPR, mode, accum,
2922                                 log, shift_subtarget, 0);
2923           accum = force_operand (gen_rtx_PLUS (mode, accum, op0),
2924                                  add_target ? add_target : accum_target);
2925           val_so_far = (val_so_far << log) + 1;
2926           break;
2927
2928         case alg_sub_t2_m:
2929           accum = expand_shift (LSHIFT_EXPR, mode, accum,
2930                                 log, shift_subtarget, 0);
2931           accum = force_operand (gen_rtx_MINUS (mode, accum, op0),
2932                                  add_target ? add_target : accum_target);
2933           val_so_far = (val_so_far << log) - 1;
2934           break;
2935
2936         case alg_add_factor:
2937           tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
2938           accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
2939                                  add_target ? add_target : accum_target);
2940           val_so_far += val_so_far << log;
2941           break;
2942
2943         case alg_sub_factor:
2944           tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
2945           accum = force_operand (gen_rtx_MINUS (mode, tem, accum),
2946                                  (add_target
2947                                   ? add_target : (optimize ? 0 : tem)));
2948           val_so_far = (val_so_far << log) - val_so_far;
2949           break;
2950
2951         default:
2952           gcc_unreachable ();
2953         }
2954
2955       if (SCALAR_INT_MODE_P (mode))
2956         {
2957           /* Write a REG_EQUAL note on the last insn so that we can cse
2958              multiplication sequences.  Note that if ACCUM is a SUBREG,
2959              we've set the inner register and must properly indicate that.  */
2960           tem = op0, nmode = mode;
2961           accum_inner = accum;
2962           if (GET_CODE (accum) == SUBREG)
2963             {
2964               accum_inner = SUBREG_REG (accum);
2965               nmode = GET_MODE (accum_inner);
2966               tem = gen_lowpart (nmode, op0);
2967             }
2968
2969           insn = get_last_insn ();
2970           set_dst_reg_note (insn, REG_EQUAL,
2971                             gen_rtx_MULT (nmode, tem, GEN_INT (val_so_far)),
2972                             accum_inner);
2973         }
2974     }
2975
2976   if (variant == negate_variant)
2977     {
2978       val_so_far = -val_so_far;
2979       accum = expand_unop (mode, neg_optab, accum, target, 0);
2980     }
2981   else if (variant == add_variant)
2982     {
2983       val_so_far = val_so_far + 1;
2984       accum = force_operand (gen_rtx_PLUS (mode, accum, op0), target);
2985     }
2986
2987   /* Compare only the bits of val and val_so_far that are significant
2988      in the result mode, to avoid sign-/zero-extension confusion.  */
2989   nmode = GET_MODE_INNER (mode);
2990   if (nmode == VOIDmode)
2991     nmode = mode;
2992   val &= GET_MODE_MASK (nmode);
2993   val_so_far &= GET_MODE_MASK (nmode);
2994   gcc_assert (val == val_so_far);
2995
2996   return accum;
2997 }
2998
2999 /* Perform a multiplication and return an rtx for the result.
3000    MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3001    TARGET is a suggestion for where to store the result (an rtx).
3002
3003    We check specially for a constant integer as OP1.
3004    If you want this check for OP0 as well, then before calling
3005    you should swap the two operands if OP0 would be constant.  */
3006
3007 rtx
3008 expand_mult (enum machine_mode mode, rtx op0, rtx op1, rtx target,
3009              int unsignedp)
3010 {
3011   enum mult_variant variant;
3012   struct algorithm algorithm;
3013   rtx scalar_op1;
3014   int max_cost;
3015   bool speed = optimize_insn_for_speed_p ();
3016   bool do_trapv = flag_trapv && SCALAR_INT_MODE_P (mode) && !unsignedp;
3017
3018   if (CONSTANT_P (op0))
3019     {
3020       rtx temp = op0;
3021       op0 = op1;
3022       op1 = temp;
3023     }
3024
3025   /* For vectors, there are several simplifications that can be made if
3026      all elements of the vector constant are identical.  */
3027   scalar_op1 = op1;
3028   if (GET_CODE (op1) == CONST_VECTOR)
3029     {
3030       int i, n = CONST_VECTOR_NUNITS (op1);
3031       scalar_op1 = CONST_VECTOR_ELT (op1, 0);
3032       for (i = 1; i < n; ++i)
3033         if (!rtx_equal_p (scalar_op1, CONST_VECTOR_ELT (op1, i)))
3034           goto skip_scalar;
3035     }
3036
3037   if (INTEGRAL_MODE_P (mode))
3038     {
3039       rtx fake_reg;
3040       HOST_WIDE_INT coeff;
3041       bool is_neg;
3042       int mode_bitsize;
3043
3044       if (op1 == CONST0_RTX (mode))
3045         return op1;
3046       if (op1 == CONST1_RTX (mode))
3047         return op0;
3048       if (op1 == CONSTM1_RTX (mode))
3049         return expand_unop (mode, do_trapv ? negv_optab : neg_optab,
3050                             op0, target, 0);
3051
3052       if (do_trapv)
3053         goto skip_synth;
3054
3055       /* These are the operations that are potentially turned into
3056          a sequence of shifts and additions.  */
3057       mode_bitsize = GET_MODE_UNIT_BITSIZE (mode);
3058
3059       /* synth_mult does an `unsigned int' multiply.  As long as the mode is
3060          less than or equal in size to `unsigned int' this doesn't matter.
3061          If the mode is larger than `unsigned int', then synth_mult works
3062          only if the constant value exactly fits in an `unsigned int' without
3063          any truncation.  This means that multiplying by negative values does
3064          not work; results are off by 2^32 on a 32 bit machine.  */
3065
3066       if (CONST_INT_P (scalar_op1))
3067         {
3068           coeff = INTVAL (scalar_op1);
3069           is_neg = coeff < 0;
3070         }
3071       else if (CONST_DOUBLE_AS_INT_P (scalar_op1))
3072         {
3073           /* If we are multiplying in DImode, it may still be a win
3074              to try to work with shifts and adds.  */
3075           if (CONST_DOUBLE_HIGH (scalar_op1) == 0
3076               && CONST_DOUBLE_LOW (scalar_op1) > 0)
3077             {
3078               coeff = CONST_DOUBLE_LOW (scalar_op1);
3079               is_neg = false;
3080             }
3081           else if (CONST_DOUBLE_LOW (scalar_op1) == 0)
3082             {
3083               coeff = CONST_DOUBLE_HIGH (scalar_op1);
3084               if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3085                 {
3086                   int shift = floor_log2 (coeff) + HOST_BITS_PER_WIDE_INT;
3087                   if (shift < HOST_BITS_PER_DOUBLE_INT - 1
3088                       || mode_bitsize <= HOST_BITS_PER_DOUBLE_INT)
3089                     return expand_shift (LSHIFT_EXPR, mode, op0,
3090                                          shift, target, unsignedp);
3091                 }
3092               goto skip_synth;
3093             }
3094           else
3095             goto skip_synth;
3096         }
3097       else
3098         goto skip_synth;
3099
3100       /* We used to test optimize here, on the grounds that it's better to
3101          produce a smaller program when -O is not used.  But this causes
3102          such a terrible slowdown sometimes that it seems better to always
3103          use synth_mult.  */
3104
3105       /* Special case powers of two.  */
3106       if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3107         return expand_shift (LSHIFT_EXPR, mode, op0,
3108                              floor_log2 (coeff), target, unsignedp);
3109
3110       fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3111
3112       /* Attempt to handle multiplication of DImode values by negative
3113          coefficients, by performing the multiplication by a positive
3114          multiplier and then inverting the result.  */
3115       if (is_neg && mode_bitsize > HOST_BITS_PER_WIDE_INT)
3116         {
3117           /* Its safe to use -coeff even for INT_MIN, as the
3118              result is interpreted as an unsigned coefficient.
3119              Exclude cost of op0 from max_cost to match the cost
3120              calculation of the synth_mult.  */
3121           max_cost = (set_src_cost (gen_rtx_MULT (mode, fake_reg, op1), speed)
3122                       - neg_cost(speed, mode));
3123           if (max_cost > 0
3124               && choose_mult_variant (mode, -coeff, &algorithm,
3125                                       &variant, max_cost))
3126             {
3127               rtx temp = expand_mult_const (mode, op0, -coeff, NULL_RTX,
3128                                             &algorithm, variant);
3129               return expand_unop (mode, neg_optab, temp, target, 0);
3130             }
3131           goto skip_synth;
3132         }
3133
3134       /* Exclude cost of op0 from max_cost to match the cost
3135          calculation of the synth_mult.  */
3136       max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, op1), speed);
3137       if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost))
3138         return expand_mult_const (mode, op0, coeff, target,
3139                                   &algorithm, variant);
3140     }
3141  skip_synth:
3142
3143   /* Expand x*2.0 as x+x.  */
3144   if (CONST_DOUBLE_AS_FLOAT_P (scalar_op1))
3145     {
3146       REAL_VALUE_TYPE d;
3147       REAL_VALUE_FROM_CONST_DOUBLE (d, scalar_op1);
3148
3149       if (REAL_VALUES_EQUAL (d, dconst2))
3150         {
3151           op0 = force_reg (GET_MODE (op0), op0);
3152           return expand_binop (mode, add_optab, op0, op0,
3153                                target, unsignedp, OPTAB_LIB_WIDEN);
3154         }
3155     }
3156  skip_scalar:
3157
3158   /* This used to use umul_optab if unsigned, but for non-widening multiply
3159      there is no difference between signed and unsigned.  */
3160   op0 = expand_binop (mode, do_trapv ? smulv_optab : smul_optab,
3161                       op0, op1, target, unsignedp, OPTAB_LIB_WIDEN);
3162   gcc_assert (op0);
3163   return op0;
3164 }
3165
3166 /* Return a cost estimate for multiplying a register by the given
3167    COEFFicient in the given MODE and SPEED.  */
3168
3169 int
3170 mult_by_coeff_cost (HOST_WIDE_INT coeff, enum machine_mode mode, bool speed)
3171 {
3172   int max_cost;
3173   struct algorithm algorithm;
3174   enum mult_variant variant;
3175
3176   rtx fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3177   max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, fake_reg), speed);
3178   if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost))
3179     return algorithm.cost.cost;
3180   else
3181     return max_cost;
3182 }
3183
3184 /* Perform a widening multiplication and return an rtx for the result.
3185    MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3186    TARGET is a suggestion for where to store the result (an rtx).
3187    THIS_OPTAB is the optab we should use, it must be either umul_widen_optab
3188    or smul_widen_optab.
3189
3190    We check specially for a constant integer as OP1, comparing the
3191    cost of a widening multiply against the cost of a sequence of shifts
3192    and adds.  */
3193
3194 rtx
3195 expand_widening_mult (enum machine_mode mode, rtx op0, rtx op1, rtx target,
3196                       int unsignedp, optab this_optab)
3197 {
3198   bool speed = optimize_insn_for_speed_p ();
3199   rtx cop1;
3200
3201   if (CONST_INT_P (op1)
3202       && GET_MODE (op0) != VOIDmode
3203       && (cop1 = convert_modes (mode, GET_MODE (op0), op1,
3204                                 this_optab == umul_widen_optab))
3205       && CONST_INT_P (cop1)
3206       && (INTVAL (cop1) >= 0
3207           || HWI_COMPUTABLE_MODE_P (mode)))
3208     {
3209       HOST_WIDE_INT coeff = INTVAL (cop1);
3210       int max_cost;
3211       enum mult_variant variant;
3212       struct algorithm algorithm;
3213
3214       /* Special case powers of two.  */
3215       if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3216         {
3217           op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3218           return expand_shift (LSHIFT_EXPR, mode, op0,
3219                                floor_log2 (coeff), target, unsignedp);
3220         }
3221
3222       /* Exclude cost of op0 from max_cost to match the cost
3223          calculation of the synth_mult.  */
3224       max_cost = mul_widen_cost (speed, mode);
3225       if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3226                                max_cost))
3227         {
3228           op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3229           return expand_mult_const (mode, op0, coeff, target,
3230                                     &algorithm, variant);
3231         }
3232     }
3233   return expand_binop (mode, this_optab, op0, op1, target,
3234                        unsignedp, OPTAB_LIB_WIDEN);
3235 }
3236 \f
3237 /* Choose a minimal N + 1 bit approximation to 1/D that can be used to
3238    replace division by D, and put the least significant N bits of the result
3239    in *MULTIPLIER_PTR and return the most significant bit.
3240
3241    The width of operations is N (should be <= HOST_BITS_PER_WIDE_INT), the
3242    needed precision is in PRECISION (should be <= N).
3243
3244    PRECISION should be as small as possible so this function can choose
3245    multiplier more freely.
3246
3247    The rounded-up logarithm of D is placed in *lgup_ptr.  A shift count that
3248    is to be used for a final right shift is placed in *POST_SHIFT_PTR.
3249
3250    Using this function, x/D will be equal to (x * m) >> (*POST_SHIFT_PTR),
3251    where m is the full HOST_BITS_PER_WIDE_INT + 1 bit multiplier.  */
3252
3253 unsigned HOST_WIDE_INT
3254 choose_multiplier (unsigned HOST_WIDE_INT d, int n, int precision,
3255                    unsigned HOST_WIDE_INT *multiplier_ptr,
3256                    int *post_shift_ptr, int *lgup_ptr)
3257 {
3258   double_int mhigh, mlow;
3259   int lgup, post_shift;
3260   int pow, pow2;
3261
3262   /* lgup = ceil(log2(divisor)); */
3263   lgup = ceil_log2 (d);
3264
3265   gcc_assert (lgup <= n);
3266
3267   pow = n + lgup;
3268   pow2 = n + lgup - precision;
3269
3270   /* We could handle this with some effort, but this case is much
3271      better handled directly with a scc insn, so rely on caller using
3272      that.  */
3273   gcc_assert (pow != HOST_BITS_PER_DOUBLE_INT);
3274
3275   /* mlow = 2^(N + lgup)/d */
3276   double_int val = double_int_zero.set_bit (pow);
3277   mlow = val.div (double_int::from_uhwi (d), true, TRUNC_DIV_EXPR);
3278
3279   /* mhigh = (2^(N + lgup) + 2^(N + lgup - precision))/d */
3280   val |= double_int_zero.set_bit (pow2);
3281   mhigh = val.div (double_int::from_uhwi (d), true, TRUNC_DIV_EXPR);
3282
3283   gcc_assert (!mhigh.high || val.high - d < d);
3284   gcc_assert (mhigh.high <= 1 && mlow.high <= 1);
3285   /* Assert that mlow < mhigh.  */
3286   gcc_assert (mlow.ult (mhigh));
3287
3288   /* If precision == N, then mlow, mhigh exceed 2^N
3289      (but they do not exceed 2^(N+1)).  */
3290
3291   /* Reduce to lowest terms.  */
3292   for (post_shift = lgup; post_shift > 0; post_shift--)
3293     {
3294       int shft = HOST_BITS_PER_WIDE_INT - 1;
3295       unsigned HOST_WIDE_INT ml_lo = (mlow.high << shft) | (mlow.low >> 1);
3296       unsigned HOST_WIDE_INT mh_lo = (mhigh.high << shft) | (mhigh.low >> 1);
3297       if (ml_lo >= mh_lo)
3298         break;
3299
3300       mlow = double_int::from_uhwi (ml_lo);
3301       mhigh = double_int::from_uhwi (mh_lo);
3302     }
3303
3304   *post_shift_ptr = post_shift;
3305   *lgup_ptr = lgup;
3306   if (n < HOST_BITS_PER_WIDE_INT)
3307     {
3308       unsigned HOST_WIDE_INT mask = ((unsigned HOST_WIDE_INT) 1 << n) - 1;
3309       *multiplier_ptr = mhigh.low & mask;
3310       return mhigh.low >= mask;
3311     }
3312   else
3313     {
3314       *multiplier_ptr = mhigh.low;
3315       return mhigh.high;
3316     }
3317 }
3318
3319 /* Compute the inverse of X mod 2**n, i.e., find Y such that X * Y is
3320    congruent to 1 (mod 2**N).  */
3321
3322 static unsigned HOST_WIDE_INT
3323 invert_mod2n (unsigned HOST_WIDE_INT x, int n)
3324 {
3325   /* Solve x*y == 1 (mod 2^n), where x is odd.  Return y.  */
3326
3327   /* The algorithm notes that the choice y = x satisfies
3328      x*y == 1 mod 2^3, since x is assumed odd.
3329      Each iteration doubles the number of bits of significance in y.  */
3330
3331   unsigned HOST_WIDE_INT mask;
3332   unsigned HOST_WIDE_INT y = x;
3333   int nbit = 3;
3334
3335   mask = (n == HOST_BITS_PER_WIDE_INT
3336           ? ~(unsigned HOST_WIDE_INT) 0
3337           : ((unsigned HOST_WIDE_INT) 1 << n) - 1);
3338
3339   while (nbit < n)
3340     {
3341       y = y * (2 - x*y) & mask;         /* Modulo 2^N */
3342       nbit *= 2;
3343     }
3344   return y;
3345 }
3346
3347 /* Emit code to adjust ADJ_OPERAND after multiplication of wrong signedness
3348    flavor of OP0 and OP1.  ADJ_OPERAND is already the high half of the
3349    product OP0 x OP1.  If UNSIGNEDP is nonzero, adjust the signed product
3350    to become unsigned, if UNSIGNEDP is zero, adjust the unsigned product to
3351    become signed.
3352
3353    The result is put in TARGET if that is convenient.
3354
3355    MODE is the mode of operation.  */
3356
3357 rtx
3358 expand_mult_highpart_adjust (enum machine_mode mode, rtx adj_operand, rtx op0,
3359                              rtx op1, rtx target, int unsignedp)
3360 {
3361   rtx tem;
3362   enum rtx_code adj_code = unsignedp ? PLUS : MINUS;
3363
3364   tem = expand_shift (RSHIFT_EXPR, mode, op0,
3365                       GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3366   tem = expand_and (mode, tem, op1, NULL_RTX);
3367   adj_operand
3368     = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3369                      adj_operand);
3370
3371   tem = expand_shift (RSHIFT_EXPR, mode, op1,
3372                       GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3373   tem = expand_and (mode, tem, op0, NULL_RTX);
3374   target = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3375                           target);
3376
3377   return target;
3378 }
3379
3380 /* Subroutine of expmed_mult_highpart.  Return the MODE high part of OP.  */
3381
3382 static rtx
3383 extract_high_half (enum machine_mode mode, rtx op)
3384 {
3385   enum machine_mode wider_mode;
3386
3387   if (mode == word_mode)
3388     return gen_highpart (mode, op);
3389
3390   gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3391
3392   wider_mode = GET_MODE_WIDER_MODE (mode);
3393   op = expand_shift (RSHIFT_EXPR, wider_mode, op,
3394                      GET_MODE_BITSIZE (mode), 0, 1);
3395   return convert_modes (mode, wider_mode, op, 0);
3396 }
3397
3398 /* Like expmed_mult_highpart, but only consider using a multiplication
3399    optab.  OP1 is an rtx for the constant operand.  */
3400
3401 static rtx
3402 expmed_mult_highpart_optab (enum machine_mode mode, rtx op0, rtx op1,
3403                             rtx target, int unsignedp, int max_cost)
3404 {
3405   rtx narrow_op1 = gen_int_mode (INTVAL (op1), mode);
3406   enum machine_mode wider_mode;
3407   optab moptab;
3408   rtx tem;
3409   int size;
3410   bool speed = optimize_insn_for_speed_p ();
3411
3412   gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3413
3414   wider_mode = GET_MODE_WIDER_MODE (mode);
3415   size = GET_MODE_BITSIZE (mode);
3416
3417   /* Firstly, try using a multiplication insn that only generates the needed
3418      high part of the product, and in the sign flavor of unsignedp.  */
3419   if (mul_highpart_cost (speed, mode) < max_cost)
3420     {
3421       moptab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
3422       tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3423                           unsignedp, OPTAB_DIRECT);
3424       if (tem)
3425         return tem;
3426     }
3427
3428   /* Secondly, same as above, but use sign flavor opposite of unsignedp.
3429      Need to adjust the result after the multiplication.  */
3430   if (size - 1 < BITS_PER_WORD
3431       && (mul_highpart_cost (speed, mode)
3432           + 2 * shift_cost (speed, mode, size-1)
3433           + 4 * add_cost (speed, mode) < max_cost))
3434     {
3435       moptab = unsignedp ? smul_highpart_optab : umul_highpart_optab;
3436       tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3437                           unsignedp, OPTAB_DIRECT);
3438       if (tem)
3439         /* We used the wrong signedness.  Adjust the result.  */
3440         return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3441                                             tem, unsignedp);
3442     }
3443
3444   /* Try widening multiplication.  */
3445   moptab = unsignedp ? umul_widen_optab : smul_widen_optab;
3446   if (widening_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3447       && mul_widen_cost (speed, wider_mode) < max_cost)
3448     {
3449       tem = expand_binop (wider_mode, moptab, op0, narrow_op1, 0,
3450                           unsignedp, OPTAB_WIDEN);
3451       if (tem)
3452         return extract_high_half (mode, tem);
3453     }
3454
3455   /* Try widening the mode and perform a non-widening multiplication.  */
3456   if (optab_handler (smul_optab, wider_mode) != CODE_FOR_nothing
3457       && size - 1 < BITS_PER_WORD
3458       && (mul_cost (speed, wider_mode) + shift_cost (speed, mode, size-1)
3459           < max_cost))
3460     {
3461       rtx insns, wop0, wop1;
3462
3463       /* We need to widen the operands, for example to ensure the
3464          constant multiplier is correctly sign or zero extended.
3465          Use a sequence to clean-up any instructions emitted by
3466          the conversions if things don't work out.  */
3467       start_sequence ();
3468       wop0 = convert_modes (wider_mode, mode, op0, unsignedp);
3469       wop1 = convert_modes (wider_mode, mode, op1, unsignedp);
3470       tem = expand_binop (wider_mode, smul_optab, wop0, wop1, 0,
3471                           unsignedp, OPTAB_WIDEN);
3472       insns = get_insns ();
3473       end_sequence ();
3474
3475       if (tem)
3476         {
3477           emit_insn (insns);
3478           return extract_high_half (mode, tem);
3479         }
3480     }
3481
3482   /* Try widening multiplication of opposite signedness, and adjust.  */
3483   moptab = unsignedp ? smul_widen_optab : umul_widen_optab;
3484   if (widening_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3485       && size - 1 < BITS_PER_WORD
3486       && (mul_widen_cost (speed, wider_mode)
3487           + 2 * shift_cost (speed, mode, size-1)
3488           + 4 * add_cost (speed, mode) < max_cost))
3489     {
3490       tem = expand_binop (wider_mode, moptab, op0, narrow_op1,
3491                           NULL_RTX, ! unsignedp, OPTAB_WIDEN);
3492       if (tem != 0)
3493         {
3494           tem = extract_high_half (mode, tem);
3495           /* We used the wrong signedness.  Adjust the result.  */
3496           return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3497                                               target, unsignedp);
3498         }
3499     }
3500
3501   return 0;
3502 }
3503
3504 /* Emit code to multiply OP0 and OP1 (where OP1 is an integer constant),
3505    putting the high half of the result in TARGET if that is convenient,
3506    and return where the result is.  If the operation can not be performed,
3507    0 is returned.
3508
3509    MODE is the mode of operation and result.
3510
3511    UNSIGNEDP nonzero means unsigned multiply.
3512
3513    MAX_COST is the total allowed cost for the expanded RTL.  */
3514
3515 static rtx
3516 expmed_mult_highpart (enum machine_mode mode, rtx op0, rtx op1,
3517                       rtx target, int unsignedp, int max_cost)
3518 {
3519   enum machine_mode wider_mode = GET_MODE_WIDER_MODE (mode);
3520   unsigned HOST_WIDE_INT cnst1;
3521   int extra_cost;
3522   bool sign_adjust = false;
3523   enum mult_variant variant;
3524   struct algorithm alg;
3525   rtx tem;
3526   bool speed = optimize_insn_for_speed_p ();
3527
3528   gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3529   /* We can't support modes wider than HOST_BITS_PER_INT.  */
3530   gcc_assert (HWI_COMPUTABLE_MODE_P (mode));
3531
3532   cnst1 = INTVAL (op1) & GET_MODE_MASK (mode);
3533
3534   /* We can't optimize modes wider than BITS_PER_WORD.
3535      ??? We might be able to perform double-word arithmetic if
3536      mode == word_mode, however all the cost calculations in
3537      synth_mult etc. assume single-word operations.  */
3538   if (GET_MODE_BITSIZE (wider_mode) > BITS_PER_WORD)
3539     return expmed_mult_highpart_optab (mode, op0, op1, target,
3540                                        unsignedp, max_cost);
3541
3542   extra_cost = shift_cost (speed, mode, GET_MODE_BITSIZE (mode) - 1);
3543
3544   /* Check whether we try to multiply by a negative constant.  */
3545   if (!unsignedp && ((cnst1 >> (GET_MODE_BITSIZE (mode) - 1)) & 1))
3546     {
3547       sign_adjust = true;
3548       extra_cost += add_cost (speed, mode);
3549     }
3550
3551   /* See whether shift/add multiplication is cheap enough.  */
3552   if (choose_mult_variant (wider_mode, cnst1, &alg, &variant,
3553                            max_cost - extra_cost))
3554     {
3555       /* See whether the specialized multiplication optabs are
3556          cheaper than the shift/add version.  */
3557       tem = expmed_mult_highpart_optab (mode, op0, op1, target, unsignedp,
3558                                         alg.cost.cost + extra_cost);
3559       if (tem)
3560         return tem;
3561
3562       tem = convert_to_mode (wider_mode, op0, unsignedp);
3563       tem = expand_mult_const (wider_mode, tem, cnst1, 0, &alg, variant);
3564       tem = extract_high_half (mode, tem);
3565
3566       /* Adjust result for signedness.  */
3567       if (sign_adjust)
3568         tem = force_operand (gen_rtx_MINUS (mode, tem, op0), tem);
3569
3570       return tem;
3571     }
3572   return expmed_mult_highpart_optab (mode, op0, op1, target,
3573                                      unsignedp, max_cost);
3574 }
3575
3576
3577 /* Expand signed modulus of OP0 by a power of two D in mode MODE.  */
3578
3579 static rtx
3580 expand_smod_pow2 (enum machine_mode mode, rtx op0, HOST_WIDE_INT d)
3581 {
3582   unsigned HOST_WIDE_INT masklow, maskhigh;
3583   rtx result, temp, shift, label;
3584   int logd;
3585
3586   logd = floor_log2 (d);
3587   result = gen_reg_rtx (mode);
3588
3589   /* Avoid conditional branches when they're expensive.  */
3590   if (BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2
3591       && optimize_insn_for_speed_p ())
3592     {
3593       rtx signmask = emit_store_flag (result, LT, op0, const0_rtx,
3594                                       mode, 0, -1);
3595       if (signmask)
3596         {
3597           signmask = force_reg (mode, signmask);
3598           masklow = ((HOST_WIDE_INT) 1 << logd) - 1;
3599           shift = GEN_INT (GET_MODE_BITSIZE (mode) - logd);
3600
3601           /* Use the rtx_cost of a LSHIFTRT instruction to determine
3602              which instruction sequence to use.  If logical right shifts
3603              are expensive the use 2 XORs, 2 SUBs and an AND, otherwise
3604              use a LSHIFTRT, 1 ADD, 1 SUB and an AND.  */
3605
3606           temp = gen_rtx_LSHIFTRT (mode, result, shift);
3607           if (optab_handler (lshr_optab, mode) == CODE_FOR_nothing
3608               || (set_src_cost (temp, optimize_insn_for_speed_p ())
3609                   > COSTS_N_INSNS (2)))
3610             {
3611               temp = expand_binop (mode, xor_optab, op0, signmask,
3612                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3613               temp = expand_binop (mode, sub_optab, temp, signmask,
3614                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3615               temp = expand_binop (mode, and_optab, temp, GEN_INT (masklow),
3616                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3617               temp = expand_binop (mode, xor_optab, temp, signmask,
3618                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3619               temp = expand_binop (mode, sub_optab, temp, signmask,
3620                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3621             }
3622           else
3623             {
3624               signmask = expand_binop (mode, lshr_optab, signmask, shift,
3625                                        NULL_RTX, 1, OPTAB_LIB_WIDEN);
3626               signmask = force_reg (mode, signmask);
3627
3628               temp = expand_binop (mode, add_optab, op0, signmask,
3629                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3630               temp = expand_binop (mode, and_optab, temp, GEN_INT (masklow),
3631                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3632               temp = expand_binop (mode, sub_optab, temp, signmask,
3633                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3634             }
3635           return temp;
3636         }
3637     }
3638
3639   /* Mask contains the mode's signbit and the significant bits of the
3640      modulus.  By including the signbit in the operation, many targets
3641      can avoid an explicit compare operation in the following comparison
3642      against zero.  */
3643
3644   masklow = ((HOST_WIDE_INT) 1 << logd) - 1;
3645   if (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
3646     {
3647       masklow |= (HOST_WIDE_INT) -1 << (GET_MODE_BITSIZE (mode) - 1);
3648       maskhigh = -1;
3649     }
3650   else
3651     maskhigh = (HOST_WIDE_INT) -1
3652                  << (GET_MODE_BITSIZE (mode) - HOST_BITS_PER_WIDE_INT - 1);
3653
3654   temp = expand_binop (mode, and_optab, op0,
3655                        immed_double_const (masklow, maskhigh, mode),
3656                        result, 1, OPTAB_LIB_WIDEN);
3657   if (temp != result)
3658     emit_move_insn (result, temp);
3659
3660   label = gen_label_rtx ();
3661   do_cmp_and_jump (result, const0_rtx, GE, mode, label);
3662
3663   temp = expand_binop (mode, sub_optab, result, const1_rtx, result,
3664                        0, OPTAB_LIB_WIDEN);
3665   masklow = (HOST_WIDE_INT) -1 << logd;
3666   maskhigh = -1;
3667   temp = expand_binop (mode, ior_optab, temp,
3668                        immed_double_const (masklow, maskhigh, mode),
3669                        result, 1, OPTAB_LIB_WIDEN);
3670   temp = expand_binop (mode, add_optab, temp, const1_rtx, result,
3671                        0, OPTAB_LIB_WIDEN);
3672   if (temp != result)
3673     emit_move_insn (result, temp);
3674   emit_label (label);
3675   return result;
3676 }
3677
3678 /* Expand signed division of OP0 by a power of two D in mode MODE.
3679    This routine is only called for positive values of D.  */
3680
3681 static rtx
3682 expand_sdiv_pow2 (enum machine_mode mode, rtx op0, HOST_WIDE_INT d)
3683 {
3684   rtx temp, label;
3685   int logd;
3686
3687   logd = floor_log2 (d);
3688
3689   if (d == 2
3690       && BRANCH_COST (optimize_insn_for_speed_p (),
3691                       false) >= 1)
3692     {
3693       temp = gen_reg_rtx (mode);
3694       temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, 1);
3695       temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3696                            0, OPTAB_LIB_WIDEN);
3697       return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3698     }
3699
3700 #ifdef HAVE_conditional_move
3701   if (BRANCH_COST (optimize_insn_for_speed_p (), false)
3702       >= 2)
3703     {
3704       rtx temp2;
3705
3706       /* ??? emit_conditional_move forces a stack adjustment via
3707          compare_from_rtx so, if the sequence is discarded, it will
3708          be lost.  Do it now instead.  */
3709       do_pending_stack_adjust ();
3710
3711       start_sequence ();
3712       temp2 = copy_to_mode_reg (mode, op0);
3713       temp = expand_binop (mode, add_optab, temp2, GEN_INT (d-1),
3714                            NULL_RTX, 0, OPTAB_LIB_WIDEN);
3715       temp = force_reg (mode, temp);
3716
3717       /* Construct "temp2 = (temp2 < 0) ? temp : temp2".  */
3718       temp2 = emit_conditional_move (temp2, LT, temp2, const0_rtx,
3719                                      mode, temp, temp2, mode, 0);
3720       if (temp2)
3721         {
3722           rtx seq = get_insns ();
3723           end_sequence ();
3724           emit_insn (seq);
3725           return expand_shift (RSHIFT_EXPR, mode, temp2, logd, NULL_RTX, 0);
3726         }
3727       end_sequence ();
3728     }
3729 #endif
3730
3731   if (BRANCH_COST (optimize_insn_for_speed_p (),
3732                    false) >= 2)
3733     {
3734       int ushift = GET_MODE_BITSIZE (mode) - logd;
3735
3736       temp = gen_reg_rtx (mode);
3737       temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, -1);
3738       if (shift_cost (optimize_insn_for_speed_p (), mode, ushift)
3739           > COSTS_N_INSNS (1))
3740         temp = expand_binop (mode, and_optab, temp, GEN_INT (d - 1),
3741                              NULL_RTX, 0, OPTAB_LIB_WIDEN);
3742       else
3743         temp = expand_shift (RSHIFT_EXPR, mode, temp,
3744                              ushift, NULL_RTX, 1);
3745       temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3746                            0, OPTAB_LIB_WIDEN);
3747       return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3748     }
3749
3750   label = gen_label_rtx ();
3751   temp = copy_to_mode_reg (mode, op0);
3752   do_cmp_and_jump (temp, const0_rtx, GE, mode, label);
3753   expand_inc (temp, GEN_INT (d - 1));
3754   emit_label (label);
3755   return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3756 }
3757 \f
3758 /* Emit the code to divide OP0 by OP1, putting the result in TARGET
3759    if that is convenient, and returning where the result is.
3760    You may request either the quotient or the remainder as the result;
3761    specify REM_FLAG nonzero to get the remainder.
3762
3763    CODE is the expression code for which kind of division this is;
3764    it controls how rounding is done.  MODE is the machine mode to use.
3765    UNSIGNEDP nonzero means do unsigned division.  */
3766
3767 /* ??? For CEIL_MOD_EXPR, can compute incorrect remainder with ANDI
3768    and then correct it by or'ing in missing high bits
3769    if result of ANDI is nonzero.
3770    For ROUND_MOD_EXPR, can use ANDI and then sign-extend the result.
3771    This could optimize to a bfexts instruction.
3772    But C doesn't use these operations, so their optimizations are
3773    left for later.  */
3774 /* ??? For modulo, we don't actually need the highpart of the first product,
3775    the low part will do nicely.  And for small divisors, the second multiply
3776    can also be a low-part only multiply or even be completely left out.
3777    E.g. to calculate the remainder of a division by 3 with a 32 bit
3778    multiply, multiply with 0x55555556 and extract the upper two bits;
3779    the result is exact for inputs up to 0x1fffffff.
3780    The input range can be reduced by using cross-sum rules.
3781    For odd divisors >= 3, the following table gives right shift counts
3782    so that if a number is shifted by an integer multiple of the given
3783    amount, the remainder stays the same:
3784    2, 4, 3, 6, 10, 12, 4, 8, 18, 6, 11, 20, 18, 0, 5, 10, 12, 0, 12, 20,
3785    14, 12, 23, 21, 8, 0, 20, 18, 0, 0, 6, 12, 0, 22, 0, 18, 20, 30, 0, 0,
3786    0, 8, 0, 11, 12, 10, 36, 0, 30, 0, 0, 12, 0, 0, 0, 0, 44, 12, 24, 0,
3787    20, 0, 7, 14, 0, 18, 36, 0, 0, 46, 60, 0, 42, 0, 15, 24, 20, 0, 0, 33,
3788    0, 20, 0, 0, 18, 0, 60, 0, 0, 0, 0, 0, 40, 18, 0, 0, 12
3789
3790    Cross-sum rules for even numbers can be derived by leaving as many bits
3791    to the right alone as the divisor has zeros to the right.
3792    E.g. if x is an unsigned 32 bit number:
3793    (x mod 12) == (((x & 1023) + ((x >> 8) & ~3)) * 0x15555558 >> 2 * 3) >> 28
3794    */
3795
3796 rtx
3797 expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode,
3798                rtx op0, rtx op1, rtx target, int unsignedp)
3799 {
3800   enum machine_mode compute_mode;
3801   rtx tquotient;
3802   rtx quotient = 0, remainder = 0;
3803   rtx last;
3804   int size;
3805   rtx insn;
3806   optab optab1, optab2;
3807   int op1_is_constant, op1_is_pow2 = 0;
3808   int max_cost, extra_cost;
3809   static HOST_WIDE_INT last_div_const = 0;
3810   static HOST_WIDE_INT ext_op1;
3811   bool speed = optimize_insn_for_speed_p ();
3812
3813   op1_is_constant = CONST_INT_P (op1);
3814   if (op1_is_constant)
3815     {
3816       ext_op1 = INTVAL (op1);
3817       if (unsignedp)
3818         ext_op1 &= GET_MODE_MASK (mode);
3819       op1_is_pow2 = ((EXACT_POWER_OF_2_OR_ZERO_P (ext_op1)
3820                      || (! unsignedp && EXACT_POWER_OF_2_OR_ZERO_P (-ext_op1))));
3821     }
3822
3823   /*
3824      This is the structure of expand_divmod:
3825
3826      First comes code to fix up the operands so we can perform the operations
3827      correctly and efficiently.
3828
3829      Second comes a switch statement with code specific for each rounding mode.
3830      For some special operands this code emits all RTL for the desired
3831      operation, for other cases, it generates only a quotient and stores it in
3832      QUOTIENT.  The case for trunc division/remainder might leave quotient = 0,
3833      to indicate that it has not done anything.
3834
3835      Last comes code that finishes the operation.  If QUOTIENT is set and
3836      REM_FLAG is set, the remainder is computed as OP0 - QUOTIENT * OP1.  If
3837      QUOTIENT is not set, it is computed using trunc rounding.
3838
3839      We try to generate special code for division and remainder when OP1 is a
3840      constant.  If |OP1| = 2**n we can use shifts and some other fast
3841      operations.  For other values of OP1, we compute a carefully selected
3842      fixed-point approximation m = 1/OP1, and generate code that multiplies OP0
3843      by m.
3844
3845      In all cases but EXACT_DIV_EXPR, this multiplication requires the upper
3846      half of the product.  Different strategies for generating the product are
3847      implemented in expmed_mult_highpart.
3848
3849      If what we actually want is the remainder, we generate that by another
3850      by-constant multiplication and a subtraction.  */
3851
3852   /* We shouldn't be called with OP1 == const1_rtx, but some of the
3853      code below will malfunction if we are, so check here and handle
3854      the special case if so.  */
3855   if (op1 == const1_rtx)
3856     return rem_flag ? const0_rtx : op0;
3857
3858     /* When dividing by -1, we could get an overflow.
3859      negv_optab can handle overflows.  */
3860   if (! unsignedp && op1 == constm1_rtx)
3861     {
3862       if (rem_flag)
3863         return const0_rtx;
3864       return expand_unop (mode, flag_trapv && GET_MODE_CLASS(mode) == MODE_INT
3865                           ? negv_optab : neg_optab, op0, target, 0);
3866     }
3867
3868   if (target
3869       /* Don't use the function value register as a target
3870          since we have to read it as well as write it,
3871          and function-inlining gets confused by this.  */
3872       && ((REG_P (target) && REG_FUNCTION_VALUE_P (target))
3873           /* Don't clobber an operand while doing a multi-step calculation.  */
3874           || ((rem_flag || op1_is_constant)
3875               && (reg_mentioned_p (target, op0)
3876                   || (MEM_P (op0) && MEM_P (target))))
3877           || reg_mentioned_p (target, op1)
3878           || (MEM_P (op1) && MEM_P (target))))
3879     target = 0;
3880
3881   /* Get the mode in which to perform this computation.  Normally it will
3882      be MODE, but sometimes we can't do the desired operation in MODE.
3883      If so, pick a wider mode in which we can do the operation.  Convert
3884      to that mode at the start to avoid repeated conversions.
3885
3886      First see what operations we need.  These depend on the expression
3887      we are evaluating.  (We assume that divxx3 insns exist under the
3888      same conditions that modxx3 insns and that these insns don't normally
3889      fail.  If these assumptions are not correct, we may generate less
3890      efficient code in some cases.)
3891
3892      Then see if we find a mode in which we can open-code that operation
3893      (either a division, modulus, or shift).  Finally, check for the smallest
3894      mode for which we can do the operation with a library call.  */
3895
3896   /* We might want to refine this now that we have division-by-constant
3897      optimization.  Since expmed_mult_highpart tries so many variants, it is
3898      not straightforward to generalize this.  Maybe we should make an array
3899      of possible modes in init_expmed?  Save this for GCC 2.7.  */
3900
3901   optab1 = ((op1_is_pow2 && op1 != const0_rtx)
3902             ? (unsignedp ? lshr_optab : ashr_optab)
3903             : (unsignedp ? udiv_optab : sdiv_optab));
3904   optab2 = ((op1_is_pow2 && op1 != const0_rtx)
3905             ? optab1
3906             : (unsignedp ? udivmod_optab : sdivmod_optab));
3907
3908   for (compute_mode = mode; compute_mode != VOIDmode;
3909        compute_mode = GET_MODE_WIDER_MODE (compute_mode))
3910     if (optab_handler (optab1, compute_mode) != CODE_FOR_nothing
3911         || optab_handler (optab2, compute_mode) != CODE_FOR_nothing)
3912       break;
3913
3914   if (compute_mode == VOIDmode)
3915     for (compute_mode = mode; compute_mode != VOIDmode;
3916          compute_mode = GET_MODE_WIDER_MODE (compute_mode))
3917       if (optab_libfunc (optab1, compute_mode)
3918           || optab_libfunc (optab2, compute_mode))
3919         break;
3920
3921   /* If we still couldn't find a mode, use MODE, but expand_binop will
3922      probably die.  */
3923   if (compute_mode == VOIDmode)
3924     compute_mode = mode;
3925
3926   if (target && GET_MODE (target) == compute_mode)
3927     tquotient = target;
3928   else
3929     tquotient = gen_reg_rtx (compute_mode);
3930
3931   size = GET_MODE_BITSIZE (compute_mode);
3932 #if 0
3933   /* It should be possible to restrict the precision to GET_MODE_BITSIZE
3934      (mode), and thereby get better code when OP1 is a constant.  Do that
3935      later.  It will require going over all usages of SIZE below.  */
3936   size = GET_MODE_BITSIZE (mode);
3937 #endif
3938
3939   /* Only deduct something for a REM if the last divide done was
3940      for a different constant.   Then set the constant of the last
3941      divide.  */
3942   max_cost = (unsignedp
3943               ? udiv_cost (speed, compute_mode)
3944               : sdiv_cost (speed, compute_mode));
3945   if (rem_flag && ! (last_div_const != 0 && op1_is_constant
3946                      && INTVAL (op1) == last_div_const))
3947     max_cost -= (mul_cost (speed, compute_mode)
3948                  + add_cost (speed, compute_mode));
3949
3950   last_div_const = ! rem_flag && op1_is_constant ? INTVAL (op1) : 0;
3951
3952   /* Now convert to the best mode to use.  */
3953   if (compute_mode != mode)
3954     {
3955       op0 = convert_modes (compute_mode, mode, op0, unsignedp);
3956       op1 = convert_modes (compute_mode, mode, op1, unsignedp);
3957
3958       /* convert_modes may have placed op1 into a register, so we
3959          must recompute the following.  */
3960       op1_is_constant = CONST_INT_P (op1);
3961       op1_is_pow2 = (op1_is_constant
3962                      && ((EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
3963                           || (! unsignedp
3964                               && EXACT_POWER_OF_2_OR_ZERO_P (-INTVAL (op1)))))) ;
3965     }
3966
3967   /* If one of the operands is a volatile MEM, copy it into a register.  */
3968
3969   if (MEM_P (op0) && MEM_VOLATILE_P (op0))
3970     op0 = force_reg (compute_mode, op0);
3971   if (MEM_P (op1) && MEM_VOLATILE_P (op1))
3972     op1 = force_reg (compute_mode, op1);
3973
3974   /* If we need the remainder or if OP1 is constant, we need to
3975      put OP0 in a register in case it has any queued subexpressions.  */
3976   if (rem_flag || op1_is_constant)
3977     op0 = force_reg (compute_mode, op0);
3978
3979   last = get_last_insn ();
3980
3981   /* Promote floor rounding to trunc rounding for unsigned operations.  */
3982   if (unsignedp)
3983     {
3984       if (code == FLOOR_DIV_EXPR)
3985         code = TRUNC_DIV_EXPR;
3986       if (code == FLOOR_MOD_EXPR)
3987         code = TRUNC_MOD_EXPR;
3988       if (code == EXACT_DIV_EXPR && op1_is_pow2)
3989         code = TRUNC_DIV_EXPR;
3990     }
3991
3992   if (op1 != const0_rtx)
3993     switch (code)
3994       {
3995       case TRUNC_MOD_EXPR:
3996       case TRUNC_DIV_EXPR:
3997         if (op1_is_constant)
3998           {
3999             if (unsignedp)
4000               {
4001                 unsigned HOST_WIDE_INT mh, ml;
4002                 int pre_shift, post_shift;
4003                 int dummy;
4004                 unsigned HOST_WIDE_INT d = (INTVAL (op1)
4005                                             & GET_MODE_MASK (compute_mode));
4006
4007                 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4008                   {
4009                     pre_shift = floor_log2 (d);
4010                     if (rem_flag)
4011                       {
4012                         remainder
4013                           = expand_binop (compute_mode, and_optab, op0,
4014                                           GEN_INT (((HOST_WIDE_INT) 1 << pre_shift) - 1),
4015                                           remainder, 1,
4016                                           OPTAB_LIB_WIDEN);
4017                         if (remainder)
4018                           return gen_lowpart (mode, remainder);
4019                       }
4020                     quotient = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4021                                              pre_shift, tquotient, 1);
4022                   }
4023                 else if (size <= HOST_BITS_PER_WIDE_INT)
4024                   {
4025                     if (d >= ((unsigned HOST_WIDE_INT) 1 << (size - 1)))
4026                       {
4027                         /* Most significant bit of divisor is set; emit an scc
4028                            insn.  */
4029                         quotient = emit_store_flag_force (tquotient, GEU, op0, op1,
4030                                                           compute_mode, 1, 1);
4031                       }
4032                     else
4033                       {
4034                         /* Find a suitable multiplier and right shift count
4035                            instead of multiplying with D.  */
4036
4037                         mh = choose_multiplier (d, size, size,
4038                                                 &ml, &post_shift, &dummy);
4039
4040                         /* If the suggested multiplier is more than SIZE bits,
4041                            we can do better for even divisors, using an
4042                            initial right shift.  */
4043                         if (mh != 0 && (d & 1) == 0)
4044                           {
4045                             pre_shift = floor_log2 (d & -d);
4046                             mh = choose_multiplier (d >> pre_shift, size,
4047                                                     size - pre_shift,
4048                                                     &ml, &post_shift, &dummy);
4049                             gcc_assert (!mh);
4050                           }
4051                         else
4052                           pre_shift = 0;
4053
4054                         if (mh != 0)
4055                           {
4056                             rtx t1, t2, t3, t4;
4057
4058                             if (post_shift - 1 >= BITS_PER_WORD)
4059                               goto fail1;
4060
4061                             extra_cost
4062                               = (shift_cost (speed, compute_mode, post_shift - 1)
4063                                  + shift_cost (speed, compute_mode, 1)
4064                                  + 2 * add_cost (speed, compute_mode));
4065                             t1 = expmed_mult_highpart (compute_mode, op0,
4066                                                        GEN_INT (ml),
4067                                                        NULL_RTX, 1,
4068                                                        max_cost - extra_cost);
4069                             if (t1 == 0)
4070                               goto fail1;
4071                             t2 = force_operand (gen_rtx_MINUS (compute_mode,
4072                                                                op0, t1),
4073                                                 NULL_RTX);
4074                             t3 = expand_shift (RSHIFT_EXPR, compute_mode,
4075                                                t2, 1, NULL_RTX, 1);
4076                             t4 = force_operand (gen_rtx_PLUS (compute_mode,
4077                                                               t1, t3),
4078                                                 NULL_RTX);
4079                             quotient = expand_shift
4080                               (RSHIFT_EXPR, compute_mode, t4,
4081                                post_shift - 1, tquotient, 1);
4082                           }
4083                         else
4084                           {
4085                             rtx t1, t2;
4086
4087                             if (pre_shift >= BITS_PER_WORD
4088                                 || post_shift >= BITS_PER_WORD)
4089                               goto fail1;
4090
4091                             t1 = expand_shift
4092                               (RSHIFT_EXPR, compute_mode, op0,
4093                                pre_shift, NULL_RTX, 1);
4094                             extra_cost
4095                               = (shift_cost (speed, compute_mode, pre_shift)
4096                                  + shift_cost (speed, compute_mode, post_shift));
4097                             t2 = expmed_mult_highpart (compute_mode, t1,
4098                                                        GEN_INT (ml),
4099                                                        NULL_RTX, 1,
4100                                                        max_cost - extra_cost);
4101                             if (t2 == 0)
4102                               goto fail1;
4103                             quotient = expand_shift
4104                               (RSHIFT_EXPR, compute_mode, t2,
4105                                post_shift, tquotient, 1);
4106                           }
4107                       }
4108                   }
4109                 else            /* Too wide mode to use tricky code */
4110                   break;
4111
4112                 insn = get_last_insn ();
4113                 if (insn != last)
4114                   set_dst_reg_note (insn, REG_EQUAL,
4115                                     gen_rtx_UDIV (compute_mode, op0, op1),
4116                                     quotient);
4117               }
4118             else                /* TRUNC_DIV, signed */
4119               {
4120                 unsigned HOST_WIDE_INT ml;
4121                 int lgup, post_shift;
4122                 rtx mlr;
4123                 HOST_WIDE_INT d = INTVAL (op1);
4124                 unsigned HOST_WIDE_INT abs_d;
4125
4126                 /* Since d might be INT_MIN, we have to cast to
4127                    unsigned HOST_WIDE_INT before negating to avoid
4128                    undefined signed overflow.  */
4129                 abs_d = (d >= 0
4130                          ? (unsigned HOST_WIDE_INT) d
4131                          : - (unsigned HOST_WIDE_INT) d);
4132
4133                 /* n rem d = n rem -d */
4134                 if (rem_flag && d < 0)
4135                   {
4136                     d = abs_d;
4137                     op1 = gen_int_mode (abs_d, compute_mode);
4138                   }
4139
4140                 if (d == 1)
4141                   quotient = op0;
4142                 else if (d == -1)
4143                   quotient = expand_unop (compute_mode, neg_optab, op0,
4144                                           tquotient, 0);
4145                 else if (HOST_BITS_PER_WIDE_INT >= size
4146                          && abs_d == (unsigned HOST_WIDE_INT) 1 << (size - 1))
4147                   {
4148                     /* This case is not handled correctly below.  */
4149                     quotient = emit_store_flag (tquotient, EQ, op0, op1,
4150                                                 compute_mode, 1, 1);
4151                     if (quotient == 0)
4152                       goto fail1;
4153                   }
4154                 else if (EXACT_POWER_OF_2_OR_ZERO_P (d)
4155                          && (rem_flag
4156                              ? smod_pow2_cheap (speed, compute_mode)
4157                              : sdiv_pow2_cheap (speed, compute_mode))
4158                          /* We assume that cheap metric is true if the
4159                             optab has an expander for this mode.  */
4160                          && ((optab_handler ((rem_flag ? smod_optab
4161                                               : sdiv_optab),
4162                                              compute_mode)
4163                               != CODE_FOR_nothing)
4164                              || (optab_handler (sdivmod_optab,
4165                                                 compute_mode)
4166                                  != CODE_FOR_nothing)))
4167                   ;
4168                 else if (EXACT_POWER_OF_2_OR_ZERO_P (abs_d))
4169                   {
4170                     if (rem_flag)
4171                       {
4172                         remainder = expand_smod_pow2 (compute_mode, op0, d);
4173                         if (remainder)
4174                           return gen_lowpart (mode, remainder);
4175                       }
4176
4177                     if (sdiv_pow2_cheap (speed, compute_mode)
4178                         && ((optab_handler (sdiv_optab, compute_mode)
4179                              != CODE_FOR_nothing)
4180                             || (optab_handler (sdivmod_optab, compute_mode)
4181                                 != CODE_FOR_nothing)))
4182                       quotient = expand_divmod (0, TRUNC_DIV_EXPR,
4183                                                 compute_mode, op0,
4184                                                 gen_int_mode (abs_d,
4185                                                               compute_mode),
4186                                                 NULL_RTX, 0);
4187                     else
4188                       quotient = expand_sdiv_pow2 (compute_mode, op0, abs_d);
4189
4190                     /* We have computed OP0 / abs(OP1).  If OP1 is negative,
4191                        negate the quotient.  */
4192                     if (d < 0)
4193                       {
4194                         insn = get_last_insn ();
4195                         if (insn != last
4196                             && abs_d < ((unsigned HOST_WIDE_INT) 1
4197                                         << (HOST_BITS_PER_WIDE_INT - 1)))
4198                           set_dst_reg_note (insn, REG_EQUAL,
4199                                             gen_rtx_DIV (compute_mode, op0,
4200                                                          gen_int_mode
4201                                                            (abs_d,
4202                                                             compute_mode)),
4203                                             quotient);
4204
4205                         quotient = expand_unop (compute_mode, neg_optab,
4206                                                 quotient, quotient, 0);
4207                       }
4208                   }
4209                 else if (size <= HOST_BITS_PER_WIDE_INT)
4210                   {
4211                     choose_multiplier (abs_d, size, size - 1,
4212                                        &ml, &post_shift, &lgup);
4213                     if (ml < (unsigned HOST_WIDE_INT) 1 << (size - 1))
4214                       {
4215                         rtx t1, t2, t3;
4216
4217                         if (post_shift >= BITS_PER_WORD
4218                             || size - 1 >= BITS_PER_WORD)
4219                           goto fail1;
4220
4221                         extra_cost = (shift_cost (speed, compute_mode, post_shift)
4222                                       + shift_cost (speed, compute_mode, size - 1)
4223                                       + add_cost (speed, compute_mode));
4224                         t1 = expmed_mult_highpart (compute_mode, op0,
4225                                                    GEN_INT (ml), NULL_RTX, 0,
4226                                                    max_cost - extra_cost);
4227                         if (t1 == 0)
4228                           goto fail1;
4229                         t2 = expand_shift
4230                           (RSHIFT_EXPR, compute_mode, t1,
4231                            post_shift, NULL_RTX, 0);
4232                         t3 = expand_shift
4233                           (RSHIFT_EXPR, compute_mode, op0,
4234                            size - 1, NULL_RTX, 0);
4235                         if (d < 0)
4236                           quotient
4237                             = force_operand (gen_rtx_MINUS (compute_mode,
4238                                                             t3, t2),
4239                                              tquotient);
4240                         else
4241                           quotient
4242                             = force_operand (gen_rtx_MINUS (compute_mode,
4243                                                             t2, t3),
4244                                              tquotient);
4245                       }
4246                     else
4247                       {
4248                         rtx t1, t2, t3, t4;
4249
4250                         if (post_shift >= BITS_PER_WORD
4251                             || size - 1 >= BITS_PER_WORD)
4252                           goto fail1;
4253
4254                         ml |= (~(unsigned HOST_WIDE_INT) 0) << (size - 1);
4255                         mlr = gen_int_mode (ml, compute_mode);
4256                         extra_cost = (shift_cost (speed, compute_mode, post_shift)
4257                                       + shift_cost (speed, compute_mode, size - 1)
4258                                       + 2 * add_cost (speed, compute_mode));
4259                         t1 = expmed_mult_highpart (compute_mode, op0, mlr,
4260                                                    NULL_RTX, 0,
4261                                                    max_cost - extra_cost);
4262                         if (t1 == 0)
4263                           goto fail1;
4264                         t2 = force_operand (gen_rtx_PLUS (compute_mode,
4265                                                           t1, op0),
4266                                             NULL_RTX);
4267                         t3 = expand_shift
4268                           (RSHIFT_EXPR, compute_mode, t2,
4269                            post_shift, NULL_RTX, 0);
4270                         t4 = expand_shift
4271                           (RSHIFT_EXPR, compute_mode, op0,
4272                            size - 1, NULL_RTX, 0);
4273                         if (d < 0)
4274                           quotient
4275                             = force_operand (gen_rtx_MINUS (compute_mode,
4276                                                             t4, t3),
4277                                              tquotient);
4278                         else
4279                           quotient
4280                             = force_operand (gen_rtx_MINUS (compute_mode,
4281                                                             t3, t4),
4282                                              tquotient);
4283                       }
4284                   }
4285                 else            /* Too wide mode to use tricky code */
4286                   break;
4287
4288                 insn = get_last_insn ();
4289                 if (insn != last)
4290                   set_dst_reg_note (insn, REG_EQUAL,
4291                                     gen_rtx_DIV (compute_mode, op0, op1),
4292                                     quotient);
4293               }
4294             break;
4295           }
4296       fail1:
4297         delete_insns_since (last);
4298         break;
4299
4300       case FLOOR_DIV_EXPR:
4301       case FLOOR_MOD_EXPR:
4302       /* We will come here only for signed operations.  */
4303         if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4304           {
4305             unsigned HOST_WIDE_INT mh, ml;
4306             int pre_shift, lgup, post_shift;
4307             HOST_WIDE_INT d = INTVAL (op1);
4308
4309             if (d > 0)
4310               {
4311                 /* We could just as easily deal with negative constants here,
4312                    but it does not seem worth the trouble for GCC 2.6.  */
4313                 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4314                   {
4315                     pre_shift = floor_log2 (d);
4316                     if (rem_flag)
4317                       {
4318                         remainder = expand_binop (compute_mode, and_optab, op0,
4319                                                   GEN_INT (((HOST_WIDE_INT) 1 << pre_shift) - 1),
4320                                                   remainder, 0, OPTAB_LIB_WIDEN);
4321                         if (remainder)
4322                           return gen_lowpart (mode, remainder);
4323                       }
4324                     quotient = expand_shift
4325                       (RSHIFT_EXPR, compute_mode, op0,
4326                        pre_shift, tquotient, 0);
4327                   }
4328                 else
4329                   {
4330                     rtx t1, t2, t3, t4;
4331
4332                     mh = choose_multiplier (d, size, size - 1,
4333                                             &ml, &post_shift, &lgup);
4334                     gcc_assert (!mh);
4335
4336                     if (post_shift < BITS_PER_WORD
4337                         && size - 1 < BITS_PER_WORD)
4338                       {
4339                         t1 = expand_shift
4340                           (RSHIFT_EXPR, compute_mode, op0,
4341                            size - 1, NULL_RTX, 0);
4342                         t2 = expand_binop (compute_mode, xor_optab, op0, t1,
4343                                            NULL_RTX, 0, OPTAB_WIDEN);
4344                         extra_cost = (shift_cost (speed, compute_mode, post_shift)
4345                                       + shift_cost (speed, compute_mode, size - 1)
4346                                       + 2 * add_cost (speed, compute_mode));
4347                         t3 = expmed_mult_highpart (compute_mode, t2,
4348                                                    GEN_INT (ml), NULL_RTX, 1,
4349                                                    max_cost - extra_cost);
4350                         if (t3 != 0)
4351                           {
4352                             t4 = expand_shift
4353                               (RSHIFT_EXPR, compute_mode, t3,
4354                                post_shift, NULL_RTX, 1);
4355                             quotient = expand_binop (compute_mode, xor_optab,
4356                                                      t4, t1, tquotient, 0,
4357                                                      OPTAB_WIDEN);
4358                           }
4359                       }
4360                   }
4361               }
4362             else
4363               {
4364                 rtx nsign, t1, t2, t3, t4;
4365                 t1 = force_operand (gen_rtx_PLUS (compute_mode,
4366                                                   op0, constm1_rtx), NULL_RTX);
4367                 t2 = expand_binop (compute_mode, ior_optab, op0, t1, NULL_RTX,
4368                                    0, OPTAB_WIDEN);
4369                 nsign = expand_shift
4370                   (RSHIFT_EXPR, compute_mode, t2,
4371                    size - 1, NULL_RTX, 0);
4372                 t3 = force_operand (gen_rtx_MINUS (compute_mode, t1, nsign),
4373                                     NULL_RTX);
4374                 t4 = expand_divmod (0, TRUNC_DIV_EXPR, compute_mode, t3, op1,
4375                                     NULL_RTX, 0);
4376                 if (t4)
4377                   {
4378                     rtx t5;
4379                     t5 = expand_unop (compute_mode, one_cmpl_optab, nsign,
4380                                       NULL_RTX, 0);
4381                     quotient = force_operand (gen_rtx_PLUS (compute_mode,
4382                                                             t4, t5),
4383                                               tquotient);
4384                   }
4385               }
4386           }
4387
4388         if (quotient != 0)
4389           break;
4390         delete_insns_since (last);
4391
4392         /* Try using an instruction that produces both the quotient and
4393            remainder, using truncation.  We can easily compensate the quotient
4394            or remainder to get floor rounding, once we have the remainder.
4395            Notice that we compute also the final remainder value here,
4396            and return the result right away.  */
4397         if (target == 0 || GET_MODE (target) != compute_mode)
4398           target = gen_reg_rtx (compute_mode);
4399
4400         if (rem_flag)
4401           {
4402             remainder
4403               = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4404             quotient = gen_reg_rtx (compute_mode);
4405           }
4406         else
4407           {
4408             quotient
4409               = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4410             remainder = gen_reg_rtx (compute_mode);
4411           }
4412
4413         if (expand_twoval_binop (sdivmod_optab, op0, op1,
4414                                  quotient, remainder, 0))
4415           {
4416             /* This could be computed with a branch-less sequence.
4417                Save that for later.  */
4418             rtx tem;
4419             rtx label = gen_label_rtx ();
4420             do_cmp_and_jump (remainder, const0_rtx, EQ, compute_mode, label);
4421             tem = expand_binop (compute_mode, xor_optab, op0, op1,
4422                                 NULL_RTX, 0, OPTAB_WIDEN);
4423             do_cmp_and_jump (tem, const0_rtx, GE, compute_mode, label);
4424             expand_dec (quotient, const1_rtx);
4425             expand_inc (remainder, op1);
4426             emit_label (label);
4427             return gen_lowpart (mode, rem_flag ? remainder : quotient);
4428           }
4429
4430         /* No luck with division elimination or divmod.  Have to do it
4431            by conditionally adjusting op0 *and* the result.  */
4432         {
4433           rtx label1, label2, label3, label4, label5;
4434           rtx adjusted_op0;
4435           rtx tem;
4436
4437           quotient = gen_reg_rtx (compute_mode);
4438           adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4439           label1 = gen_label_rtx ();
4440           label2 = gen_label_rtx ();
4441           label3 = gen_label_rtx ();
4442           label4 = gen_label_rtx ();
4443           label5 = gen_label_rtx ();
4444           do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4445           do_cmp_and_jump (adjusted_op0, const0_rtx, LT, compute_mode, label1);
4446           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4447                               quotient, 0, OPTAB_LIB_WIDEN);
4448           if (tem != quotient)
4449             emit_move_insn (quotient, tem);
4450           emit_jump_insn (gen_jump (label5));
4451           emit_barrier ();
4452           emit_label (label1);
4453           expand_inc (adjusted_op0, const1_rtx);
4454           emit_jump_insn (gen_jump (label4));
4455           emit_barrier ();
4456           emit_label (label2);
4457           do_cmp_and_jump (adjusted_op0, const0_rtx, GT, compute_mode, label3);
4458           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4459                               quotient, 0, OPTAB_LIB_WIDEN);
4460           if (tem != quotient)
4461             emit_move_insn (quotient, tem);
4462           emit_jump_insn (gen_jump (label5));
4463           emit_barrier ();
4464           emit_label (label3);
4465           expand_dec (adjusted_op0, const1_rtx);
4466           emit_label (label4);
4467           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4468                               quotient, 0, OPTAB_LIB_WIDEN);
4469           if (tem != quotient)
4470             emit_move_insn (quotient, tem);
4471           expand_dec (quotient, const1_rtx);
4472           emit_label (label5);
4473         }
4474         break;
4475
4476       case CEIL_DIV_EXPR:
4477       case CEIL_MOD_EXPR:
4478         if (unsignedp)
4479           {
4480             if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1)))
4481               {
4482                 rtx t1, t2, t3;
4483                 unsigned HOST_WIDE_INT d = INTVAL (op1);
4484                 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4485                                    floor_log2 (d), tquotient, 1);
4486                 t2 = expand_binop (compute_mode, and_optab, op0,
4487                                    GEN_INT (d - 1),
4488                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4489                 t3 = gen_reg_rtx (compute_mode);
4490                 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4491                                       compute_mode, 1, 1);
4492                 if (t3 == 0)
4493                   {
4494                     rtx lab;
4495                     lab = gen_label_rtx ();
4496                     do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4497                     expand_inc (t1, const1_rtx);
4498                     emit_label (lab);
4499                     quotient = t1;
4500                   }
4501                 else
4502                   quotient = force_operand (gen_rtx_PLUS (compute_mode,
4503                                                           t1, t3),
4504                                             tquotient);
4505                 break;
4506               }
4507
4508             /* Try using an instruction that produces both the quotient and
4509                remainder, using truncation.  We can easily compensate the
4510                quotient or remainder to get ceiling rounding, once we have the
4511                remainder.  Notice that we compute also the final remainder
4512                value here, and return the result right away.  */
4513             if (target == 0 || GET_MODE (target) != compute_mode)
4514               target = gen_reg_rtx (compute_mode);
4515
4516             if (rem_flag)
4517               {
4518                 remainder = (REG_P (target)
4519                              ? target : gen_reg_rtx (compute_mode));
4520                 quotient = gen_reg_rtx (compute_mode);
4521               }
4522             else
4523               {
4524                 quotient = (REG_P (target)
4525                             ? target : gen_reg_rtx (compute_mode));
4526                 remainder = gen_reg_rtx (compute_mode);
4527               }
4528
4529             if (expand_twoval_binop (udivmod_optab, op0, op1, quotient,
4530                                      remainder, 1))
4531               {
4532                 /* This could be computed with a branch-less sequence.
4533                    Save that for later.  */
4534                 rtx label = gen_label_rtx ();
4535                 do_cmp_and_jump (remainder, const0_rtx, EQ,
4536                                  compute_mode, label);
4537                 expand_inc (quotient, const1_rtx);
4538                 expand_dec (remainder, op1);
4539                 emit_label (label);
4540                 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4541               }
4542
4543             /* No luck with division elimination or divmod.  Have to do it
4544                by conditionally adjusting op0 *and* the result.  */
4545             {
4546               rtx label1, label2;
4547               rtx adjusted_op0, tem;
4548
4549               quotient = gen_reg_rtx (compute_mode);
4550               adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4551               label1 = gen_label_rtx ();
4552               label2 = gen_label_rtx ();
4553               do_cmp_and_jump (adjusted_op0, const0_rtx, NE,
4554                                compute_mode, label1);
4555               emit_move_insn  (quotient, const0_rtx);
4556               emit_jump_insn (gen_jump (label2));
4557               emit_barrier ();
4558               emit_label (label1);
4559               expand_dec (adjusted_op0, const1_rtx);
4560               tem = expand_binop (compute_mode, udiv_optab, adjusted_op0, op1,
4561                                   quotient, 1, OPTAB_LIB_WIDEN);
4562               if (tem != quotient)
4563                 emit_move_insn (quotient, tem);
4564               expand_inc (quotient, const1_rtx);
4565               emit_label (label2);
4566             }
4567           }
4568         else /* signed */
4569           {
4570             if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4571                 && INTVAL (op1) >= 0)
4572               {
4573                 /* This is extremely similar to the code for the unsigned case
4574                    above.  For 2.7 we should merge these variants, but for
4575                    2.6.1 I don't want to touch the code for unsigned since that
4576                    get used in C.  The signed case will only be used by other
4577                    languages (Ada).  */
4578
4579                 rtx t1, t2, t3;
4580                 unsigned HOST_WIDE_INT d = INTVAL (op1);
4581                 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4582                                    floor_log2 (d), tquotient, 0);
4583                 t2 = expand_binop (compute_mode, and_optab, op0,
4584                                    GEN_INT (d - 1),
4585                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4586                 t3 = gen_reg_rtx (compute_mode);
4587                 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4588                                       compute_mode, 1, 1);
4589                 if (t3 == 0)
4590                   {
4591                     rtx lab;
4592                     lab = gen_label_rtx ();
4593                     do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4594                     expand_inc (t1, const1_rtx);
4595                     emit_label (lab);
4596                     quotient = t1;
4597                   }
4598                 else
4599                   quotient = force_operand (gen_rtx_PLUS (compute_mode,
4600                                                           t1, t3),
4601                                             tquotient);
4602                 break;
4603               }
4604
4605             /* Try using an instruction that produces both the quotient and
4606                remainder, using truncation.  We can easily compensate the
4607                quotient or remainder to get ceiling rounding, once we have the
4608                remainder.  Notice that we compute also the final remainder
4609                value here, and return the result right away.  */
4610             if (target == 0 || GET_MODE (target) != compute_mode)
4611               target = gen_reg_rtx (compute_mode);
4612             if (rem_flag)
4613               {
4614                 remainder= (REG_P (target)
4615                             ? target : gen_reg_rtx (compute_mode));
4616                 quotient = gen_reg_rtx (compute_mode);
4617               }
4618             else
4619               {
4620                 quotient = (REG_P (target)
4621                             ? target : gen_reg_rtx (compute_mode));
4622                 remainder = gen_reg_rtx (compute_mode);
4623               }
4624
4625             if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient,
4626                                      remainder, 0))
4627               {
4628                 /* This could be computed with a branch-less sequence.
4629                    Save that for later.  */
4630                 rtx tem;
4631                 rtx label = gen_label_rtx ();
4632                 do_cmp_and_jump (remainder, const0_rtx, EQ,
4633                                  compute_mode, label);
4634                 tem = expand_binop (compute_mode, xor_optab, op0, op1,
4635                                     NULL_RTX, 0, OPTAB_WIDEN);
4636                 do_cmp_and_jump (tem, const0_rtx, LT, compute_mode, label);
4637                 expand_inc (quotient, const1_rtx);
4638                 expand_dec (remainder, op1);
4639                 emit_label (label);
4640                 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4641               }
4642
4643             /* No luck with division elimination or divmod.  Have to do it
4644                by conditionally adjusting op0 *and* the result.  */
4645             {
4646               rtx label1, label2, label3, label4, label5;
4647               rtx adjusted_op0;
4648               rtx tem;
4649
4650               quotient = gen_reg_rtx (compute_mode);
4651               adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4652               label1 = gen_label_rtx ();
4653               label2 = gen_label_rtx ();
4654               label3 = gen_label_rtx ();
4655               label4 = gen_label_rtx ();
4656               label5 = gen_label_rtx ();
4657               do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4658               do_cmp_and_jump (adjusted_op0, const0_rtx, GT,
4659                                compute_mode, label1);
4660               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4661                                   quotient, 0, OPTAB_LIB_WIDEN);
4662               if (tem != quotient)
4663                 emit_move_insn (quotient, tem);
4664               emit_jump_insn (gen_jump (label5));
4665               emit_barrier ();
4666               emit_label (label1);
4667               expand_dec (adjusted_op0, const1_rtx);
4668               emit_jump_insn (gen_jump (label4));
4669               emit_barrier ();
4670               emit_label (label2);
4671               do_cmp_and_jump (adjusted_op0, const0_rtx, LT,
4672                                compute_mode, label3);
4673               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4674                                   quotient, 0, OPTAB_LIB_WIDEN);
4675               if (tem != quotient)
4676                 emit_move_insn (quotient, tem);
4677               emit_jump_insn (gen_jump (label5));
4678               emit_barrier ();
4679               emit_label (label3);
4680               expand_inc (adjusted_op0, const1_rtx);
4681               emit_label (label4);
4682               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4683                                   quotient, 0, OPTAB_LIB_WIDEN);
4684               if (tem != quotient)
4685                 emit_move_insn (quotient, tem);
4686               expand_inc (quotient, const1_rtx);
4687               emit_label (label5);
4688             }
4689           }
4690         break;
4691
4692       case EXACT_DIV_EXPR:
4693         if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4694           {
4695             HOST_WIDE_INT d = INTVAL (op1);
4696             unsigned HOST_WIDE_INT ml;
4697             int pre_shift;
4698             rtx t1;
4699
4700             pre_shift = floor_log2 (d & -d);
4701             ml = invert_mod2n (d >> pre_shift, size);
4702             t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4703                                pre_shift, NULL_RTX, unsignedp);
4704             quotient = expand_mult (compute_mode, t1,
4705                                     gen_int_mode (ml, compute_mode),
4706                                     NULL_RTX, 1);
4707
4708             insn = get_last_insn ();
4709             set_dst_reg_note (insn, REG_EQUAL,
4710                               gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
4711                                               compute_mode, op0, op1),
4712                               quotient);
4713           }
4714         break;
4715
4716       case ROUND_DIV_EXPR:
4717       case ROUND_MOD_EXPR:
4718         if (unsignedp)
4719           {
4720             rtx tem;
4721             rtx label;
4722             label = gen_label_rtx ();
4723             quotient = gen_reg_rtx (compute_mode);
4724             remainder = gen_reg_rtx (compute_mode);
4725             if (expand_twoval_binop (udivmod_optab, op0, op1, quotient, remainder, 1) == 0)
4726               {
4727                 rtx tem;
4728                 quotient = expand_binop (compute_mode, udiv_optab, op0, op1,
4729                                          quotient, 1, OPTAB_LIB_WIDEN);
4730                 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 1);
4731                 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4732                                           remainder, 1, OPTAB_LIB_WIDEN);
4733               }
4734             tem = plus_constant (compute_mode, op1, -1);
4735             tem = expand_shift (RSHIFT_EXPR, compute_mode, tem, 1, NULL_RTX, 1);
4736             do_cmp_and_jump (remainder, tem, LEU, compute_mode, label);
4737             expand_inc (quotient, const1_rtx);
4738             expand_dec (remainder, op1);
4739             emit_label (label);
4740           }
4741         else
4742           {
4743             rtx abs_rem, abs_op1, tem, mask;
4744             rtx label;
4745             label = gen_label_rtx ();
4746             quotient = gen_reg_rtx (compute_mode);
4747             remainder = gen_reg_rtx (compute_mode);
4748             if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient, remainder, 0) == 0)
4749               {
4750                 rtx tem;
4751                 quotient = expand_binop (compute_mode, sdiv_optab, op0, op1,
4752                                          quotient, 0, OPTAB_LIB_WIDEN);
4753                 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 0);
4754                 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4755                                           remainder, 0, OPTAB_LIB_WIDEN);
4756               }
4757             abs_rem = expand_abs (compute_mode, remainder, NULL_RTX, 1, 0);
4758             abs_op1 = expand_abs (compute_mode, op1, NULL_RTX, 1, 0);
4759             tem = expand_shift (LSHIFT_EXPR, compute_mode, abs_rem,
4760                                 1, NULL_RTX, 1);
4761             do_cmp_and_jump (tem, abs_op1, LTU, compute_mode, label);
4762             tem = expand_binop (compute_mode, xor_optab, op0, op1,
4763                                 NULL_RTX, 0, OPTAB_WIDEN);
4764             mask = expand_shift (RSHIFT_EXPR, compute_mode, tem,
4765                                  size - 1, NULL_RTX, 0);
4766             tem = expand_binop (compute_mode, xor_optab, mask, const1_rtx,
4767                                 NULL_RTX, 0, OPTAB_WIDEN);
4768             tem = expand_binop (compute_mode, sub_optab, tem, mask,
4769                                 NULL_RTX, 0, OPTAB_WIDEN);
4770             expand_inc (quotient, tem);
4771             tem = expand_binop (compute_mode, xor_optab, mask, op1,
4772                                 NULL_RTX, 0, OPTAB_WIDEN);
4773             tem = expand_binop (compute_mode, sub_optab, tem, mask,
4774                                 NULL_RTX, 0, OPTAB_WIDEN);
4775             expand_dec (remainder, tem);
4776             emit_label (label);
4777           }
4778         return gen_lowpart (mode, rem_flag ? remainder : quotient);
4779
4780       default:
4781         gcc_unreachable ();
4782       }
4783
4784   if (quotient == 0)
4785     {
4786       if (target && GET_MODE (target) != compute_mode)
4787         target = 0;
4788
4789       if (rem_flag)
4790         {
4791           /* Try to produce the remainder without producing the quotient.
4792              If we seem to have a divmod pattern that does not require widening,
4793              don't try widening here.  We should really have a WIDEN argument
4794              to expand_twoval_binop, since what we'd really like to do here is
4795              1) try a mod insn in compute_mode
4796              2) try a divmod insn in compute_mode
4797              3) try a div insn in compute_mode and multiply-subtract to get
4798                 remainder
4799              4) try the same things with widening allowed.  */
4800           remainder
4801             = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4802                                  op0, op1, target,
4803                                  unsignedp,
4804                                  ((optab_handler (optab2, compute_mode)
4805                                    != CODE_FOR_nothing)
4806                                   ? OPTAB_DIRECT : OPTAB_WIDEN));
4807           if (remainder == 0)
4808             {
4809               /* No luck there.  Can we do remainder and divide at once
4810                  without a library call?  */
4811               remainder = gen_reg_rtx (compute_mode);
4812               if (! expand_twoval_binop ((unsignedp
4813                                           ? udivmod_optab
4814                                           : sdivmod_optab),
4815                                          op0, op1,
4816                                          NULL_RTX, remainder, unsignedp))
4817                 remainder = 0;
4818             }
4819
4820           if (remainder)
4821             return gen_lowpart (mode, remainder);
4822         }
4823
4824       /* Produce the quotient.  Try a quotient insn, but not a library call.
4825          If we have a divmod in this mode, use it in preference to widening
4826          the div (for this test we assume it will not fail). Note that optab2
4827          is set to the one of the two optabs that the call below will use.  */
4828       quotient
4829         = sign_expand_binop (compute_mode, udiv_optab, sdiv_optab,
4830                              op0, op1, rem_flag ? NULL_RTX : target,
4831                              unsignedp,
4832                              ((optab_handler (optab2, compute_mode)
4833                                != CODE_FOR_nothing)
4834                               ? OPTAB_DIRECT : OPTAB_WIDEN));
4835
4836       if (quotient == 0)
4837         {
4838           /* No luck there.  Try a quotient-and-remainder insn,
4839              keeping the quotient alone.  */
4840           quotient = gen_reg_rtx (compute_mode);
4841           if (! expand_twoval_binop (unsignedp ? udivmod_optab : sdivmod_optab,
4842                                      op0, op1,
4843                                      quotient, NULL_RTX, unsignedp))
4844             {
4845               quotient = 0;
4846               if (! rem_flag)
4847                 /* Still no luck.  If we are not computing the remainder,
4848                    use a library call for the quotient.  */
4849                 quotient = sign_expand_binop (compute_mode,
4850                                               udiv_optab, sdiv_optab,
4851                                               op0, op1, target,
4852                                               unsignedp, OPTAB_LIB_WIDEN);
4853             }
4854         }
4855     }
4856
4857   if (rem_flag)
4858     {
4859       if (target && GET_MODE (target) != compute_mode)
4860         target = 0;
4861
4862       if (quotient == 0)
4863         {
4864           /* No divide instruction either.  Use library for remainder.  */
4865           remainder = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4866                                          op0, op1, target,
4867                                          unsignedp, OPTAB_LIB_WIDEN);
4868           /* No remainder function.  Try a quotient-and-remainder
4869              function, keeping the remainder.  */
4870           if (!remainder)
4871             {
4872               remainder = gen_reg_rtx (compute_mode);
4873               if (!expand_twoval_binop_libfunc
4874                   (unsignedp ? udivmod_optab : sdivmod_optab,
4875                    op0, op1,
4876                    NULL_RTX, remainder,
4877                    unsignedp ? UMOD : MOD))
4878                 remainder = NULL_RTX;
4879             }
4880         }
4881       else
4882         {
4883           /* We divided.  Now finish doing X - Y * (X / Y).  */
4884           remainder = expand_mult (compute_mode, quotient, op1,
4885                                    NULL_RTX, unsignedp);
4886           remainder = expand_binop (compute_mode, sub_optab, op0,
4887                                     remainder, target, unsignedp,
4888                                     OPTAB_LIB_WIDEN);
4889         }
4890     }
4891
4892   return gen_lowpart (mode, rem_flag ? remainder : quotient);
4893 }
4894 \f
4895 /* Return a tree node with data type TYPE, describing the value of X.
4896    Usually this is an VAR_DECL, if there is no obvious better choice.
4897    X may be an expression, however we only support those expressions
4898    generated by loop.c.  */
4899
4900 tree
4901 make_tree (tree type, rtx x)
4902 {
4903   tree t;
4904
4905   switch (GET_CODE (x))
4906     {
4907     case CONST_INT:
4908       {
4909         HOST_WIDE_INT hi = 0;
4910
4911         if (INTVAL (x) < 0
4912             && !(TYPE_UNSIGNED (type)
4913                  && (GET_MODE_BITSIZE (TYPE_MODE (type))
4914                      < HOST_BITS_PER_WIDE_INT)))
4915           hi = -1;
4916
4917         t = build_int_cst_wide (type, INTVAL (x), hi);
4918
4919         return t;
4920       }
4921
4922     case CONST_DOUBLE:
4923       if (GET_MODE (x) == VOIDmode)
4924         t = build_int_cst_wide (type,
4925                                 CONST_DOUBLE_LOW (x), CONST_DOUBLE_HIGH (x));
4926       else
4927         {
4928           REAL_VALUE_TYPE d;
4929
4930           REAL_VALUE_FROM_CONST_DOUBLE (d, x);
4931           t = build_real (type, d);
4932         }
4933
4934       return t;
4935
4936     case CONST_VECTOR:
4937       {
4938         int units = CONST_VECTOR_NUNITS (x);
4939         tree itype = TREE_TYPE (type);
4940         tree *elts;
4941         int i;
4942
4943         /* Build a tree with vector elements.  */
4944         elts = XALLOCAVEC (tree, units);
4945         for (i = units - 1; i >= 0; --i)
4946           {
4947             rtx elt = CONST_VECTOR_ELT (x, i);
4948             elts[i] = make_tree (itype, elt);
4949           }
4950
4951         return build_vector (type, elts);
4952       }
4953
4954     case PLUS:
4955       return fold_build2 (PLUS_EXPR, type, make_tree (type, XEXP (x, 0)),
4956                           make_tree (type, XEXP (x, 1)));
4957
4958     case MINUS:
4959       return fold_build2 (MINUS_EXPR, type, make_tree (type, XEXP (x, 0)),
4960                           make_tree (type, XEXP (x, 1)));
4961
4962     case NEG:
4963       return fold_build1 (NEGATE_EXPR, type, make_tree (type, XEXP (x, 0)));
4964
4965     case MULT:
4966       return fold_build2 (MULT_EXPR, type, make_tree (type, XEXP (x, 0)),
4967                           make_tree (type, XEXP (x, 1)));
4968
4969     case ASHIFT:
4970       return fold_build2 (LSHIFT_EXPR, type, make_tree (type, XEXP (x, 0)),
4971                           make_tree (type, XEXP (x, 1)));
4972
4973     case LSHIFTRT:
4974       t = unsigned_type_for (type);
4975       return fold_convert (type, build2 (RSHIFT_EXPR, t,
4976                                          make_tree (t, XEXP (x, 0)),
4977                                          make_tree (type, XEXP (x, 1))));
4978
4979     case ASHIFTRT:
4980       t = signed_type_for (type);
4981       return fold_convert (type, build2 (RSHIFT_EXPR, t,
4982                                          make_tree (t, XEXP (x, 0)),
4983                                          make_tree (type, XEXP (x, 1))));
4984
4985     case DIV:
4986       if (TREE_CODE (type) != REAL_TYPE)
4987         t = signed_type_for (type);
4988       else
4989         t = type;
4990
4991       return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
4992                                          make_tree (t, XEXP (x, 0)),
4993                                          make_tree (t, XEXP (x, 1))));
4994     case UDIV:
4995       t = unsigned_type_for (type);
4996       return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
4997                                          make_tree (t, XEXP (x, 0)),
4998                                          make_tree (t, XEXP (x, 1))));
4999
5000     case SIGN_EXTEND:
5001     case ZERO_EXTEND:
5002       t = lang_hooks.types.type_for_mode (GET_MODE (XEXP (x, 0)),
5003                                           GET_CODE (x) == ZERO_EXTEND);
5004       return fold_convert (type, make_tree (t, XEXP (x, 0)));
5005
5006     case CONST:
5007       return make_tree (type, XEXP (x, 0));
5008
5009     case SYMBOL_REF:
5010       t = SYMBOL_REF_DECL (x);
5011       if (t)
5012         return fold_convert (type, build_fold_addr_expr (t));
5013       /* else fall through.  */
5014
5015     default:
5016       t = build_decl (RTL_LOCATION (x), VAR_DECL, NULL_TREE, type);
5017
5018       /* If TYPE is a POINTER_TYPE, we might need to convert X from
5019          address mode to pointer mode.  */
5020       if (POINTER_TYPE_P (type))
5021         x = convert_memory_address_addr_space
5022               (TYPE_MODE (type), x, TYPE_ADDR_SPACE (TREE_TYPE (type)));
5023
5024       /* Note that we do *not* use SET_DECL_RTL here, because we do not
5025          want set_decl_rtl to go adjusting REG_ATTRS for this temporary.  */
5026       t->decl_with_rtl.rtl = x;
5027
5028       return t;
5029     }
5030 }
5031 \f
5032 /* Compute the logical-and of OP0 and OP1, storing it in TARGET
5033    and returning TARGET.
5034
5035    If TARGET is 0, a pseudo-register or constant is returned.  */
5036
5037 rtx
5038 expand_and (enum machine_mode mode, rtx op0, rtx op1, rtx target)
5039 {
5040   rtx tem = 0;
5041
5042   if (GET_MODE (op0) == VOIDmode && GET_MODE (op1) == VOIDmode)
5043     tem = simplify_binary_operation (AND, mode, op0, op1);
5044   if (tem == 0)
5045     tem = expand_binop (mode, and_optab, op0, op1, target, 0, OPTAB_LIB_WIDEN);
5046
5047   if (target == 0)
5048     target = tem;
5049   else if (tem != target)
5050     emit_move_insn (target, tem);
5051   return target;
5052 }
5053
5054 /* Helper function for emit_store_flag.  */
5055 static rtx
5056 emit_cstore (rtx target, enum insn_code icode, enum rtx_code code,
5057              enum machine_mode mode, enum machine_mode compare_mode,
5058              int unsignedp, rtx x, rtx y, int normalizep,
5059              enum machine_mode target_mode)
5060 {
5061   struct expand_operand ops[4];
5062   rtx op0, last, comparison, subtarget;
5063   enum machine_mode result_mode = insn_data[(int) icode].operand[0].mode;
5064
5065   last = get_last_insn ();
5066   x = prepare_operand (icode, x, 2, mode, compare_mode, unsignedp);
5067   y = prepare_operand (icode, y, 3, mode, compare_mode, unsignedp);
5068   if (!x || !y)
5069     {
5070       delete_insns_since (last);
5071       return NULL_RTX;
5072     }
5073
5074   if (target_mode == VOIDmode)
5075     target_mode = result_mode;
5076   if (!target)
5077     target = gen_reg_rtx (target_mode);
5078
5079   comparison = gen_rtx_fmt_ee (code, result_mode, x, y);
5080
5081   create_output_operand (&ops[0], optimize ? NULL_RTX : target, result_mode);
5082   create_fixed_operand (&ops[1], comparison);
5083   create_fixed_operand (&ops[2], x);
5084   create_fixed_operand (&ops[3], y);
5085   if (!maybe_expand_insn (icode, 4, ops))
5086     {
5087       delete_insns_since (last);
5088       return NULL_RTX;
5089     }
5090   subtarget = ops[0].value;
5091
5092   /* If we are converting to a wider mode, first convert to
5093      TARGET_MODE, then normalize.  This produces better combining
5094      opportunities on machines that have a SIGN_EXTRACT when we are
5095      testing a single bit.  This mostly benefits the 68k.
5096
5097      If STORE_FLAG_VALUE does not have the sign bit set when
5098      interpreted in MODE, we can do this conversion as unsigned, which
5099      is usually more efficient.  */
5100   if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (result_mode))
5101     {
5102       convert_move (target, subtarget,
5103                     val_signbit_known_clear_p (result_mode,
5104                                                STORE_FLAG_VALUE));
5105       op0 = target;
5106       result_mode = target_mode;
5107     }
5108   else
5109     op0 = subtarget;
5110
5111   /* If we want to keep subexpressions around, don't reuse our last
5112      target.  */
5113   if (optimize)
5114     subtarget = 0;
5115
5116   /* Now normalize to the proper value in MODE.  Sometimes we don't
5117      have to do anything.  */
5118   if (normalizep == 0 || normalizep == STORE_FLAG_VALUE)
5119     ;
5120   /* STORE_FLAG_VALUE might be the most negative number, so write
5121      the comparison this way to avoid a compiler-time warning.  */
5122   else if (- normalizep == STORE_FLAG_VALUE)
5123     op0 = expand_unop (result_mode, neg_optab, op0, subtarget, 0);
5124
5125   /* We don't want to use STORE_FLAG_VALUE < 0 below since this makes
5126      it hard to use a value of just the sign bit due to ANSI integer
5127      constant typing rules.  */
5128   else if (val_signbit_known_set_p (result_mode, STORE_FLAG_VALUE))
5129     op0 = expand_shift (RSHIFT_EXPR, result_mode, op0,
5130                         GET_MODE_BITSIZE (result_mode) - 1, subtarget,
5131                         normalizep == 1);
5132   else
5133     {
5134       gcc_assert (STORE_FLAG_VALUE & 1);
5135
5136       op0 = expand_and (result_mode, op0, const1_rtx, subtarget);
5137       if (normalizep == -1)
5138         op0 = expand_unop (result_mode, neg_optab, op0, op0, 0);
5139     }
5140
5141   /* If we were converting to a smaller mode, do the conversion now.  */
5142   if (target_mode != result_mode)
5143     {
5144       convert_move (target, op0, 0);
5145       return target;
5146     }
5147   else
5148     return op0;
5149 }
5150
5151
5152 /* A subroutine of emit_store_flag only including "tricks" that do not
5153    need a recursive call.  These are kept separate to avoid infinite
5154    loops.  */
5155
5156 static rtx
5157 emit_store_flag_1 (rtx target, enum rtx_code code, rtx op0, rtx op1,
5158                    enum machine_mode mode, int unsignedp, int normalizep,
5159                    enum machine_mode target_mode)
5160 {
5161   rtx subtarget;
5162   enum insn_code icode;
5163   enum machine_mode compare_mode;
5164   enum mode_class mclass;
5165   enum rtx_code scode;
5166   rtx tem;
5167
5168   if (unsignedp)
5169     code = unsigned_condition (code);
5170   scode = swap_condition (code);
5171
5172   /* If one operand is constant, make it the second one.  Only do this
5173      if the other operand is not constant as well.  */
5174
5175   if (swap_commutative_operands_p (op0, op1))
5176     {
5177       tem = op0;
5178       op0 = op1;
5179       op1 = tem;
5180       code = swap_condition (code);
5181     }
5182
5183   if (mode == VOIDmode)
5184     mode = GET_MODE (op0);
5185
5186   /* For some comparisons with 1 and -1, we can convert this to
5187      comparisons with zero.  This will often produce more opportunities for
5188      store-flag insns.  */
5189
5190   switch (code)
5191     {
5192     case LT:
5193       if (op1 == const1_rtx)
5194         op1 = const0_rtx, code = LE;
5195       break;
5196     case LE:
5197       if (op1 == constm1_rtx)
5198         op1 = const0_rtx, code = LT;
5199       break;
5200     case GE:
5201       if (op1 == const1_rtx)
5202         op1 = const0_rtx, code = GT;
5203       break;
5204     case GT:
5205       if (op1 == constm1_rtx)
5206         op1 = const0_rtx, code = GE;
5207       break;
5208     case GEU:
5209       if (op1 == const1_rtx)
5210         op1 = const0_rtx, code = NE;
5211       break;
5212     case LTU:
5213       if (op1 == const1_rtx)
5214         op1 = const0_rtx, code = EQ;
5215       break;
5216     default:
5217       break;
5218     }
5219
5220   /* If we are comparing a double-word integer with zero or -1, we can
5221      convert the comparison into one involving a single word.  */
5222   if (GET_MODE_BITSIZE (mode) == BITS_PER_WORD * 2
5223       && GET_MODE_CLASS (mode) == MODE_INT
5224       && (!MEM_P (op0) || ! MEM_VOLATILE_P (op0)))
5225     {
5226       if ((code == EQ || code == NE)
5227           && (op1 == const0_rtx || op1 == constm1_rtx))
5228         {
5229           rtx op00, op01;
5230
5231           /* Do a logical OR or AND of the two words and compare the
5232              result.  */
5233           op00 = simplify_gen_subreg (word_mode, op0, mode, 0);
5234           op01 = simplify_gen_subreg (word_mode, op0, mode, UNITS_PER_WORD);
5235           tem = expand_binop (word_mode,
5236                               op1 == const0_rtx ? ior_optab : and_optab,
5237                               op00, op01, NULL_RTX, unsignedp,
5238                               OPTAB_DIRECT);
5239
5240           if (tem != 0)
5241             tem = emit_store_flag (NULL_RTX, code, tem, op1, word_mode,
5242                                    unsignedp, normalizep);
5243         }
5244       else if ((code == LT || code == GE) && op1 == const0_rtx)
5245         {
5246           rtx op0h;
5247
5248           /* If testing the sign bit, can just test on high word.  */
5249           op0h = simplify_gen_subreg (word_mode, op0, mode,
5250                                       subreg_highpart_offset (word_mode,
5251                                                               mode));
5252           tem = emit_store_flag (NULL_RTX, code, op0h, op1, word_mode,
5253                                  unsignedp, normalizep);
5254         }
5255       else
5256         tem = NULL_RTX;
5257
5258       if (tem)
5259         {
5260           if (target_mode == VOIDmode || GET_MODE (tem) == target_mode)
5261             return tem;
5262           if (!target)
5263             target = gen_reg_rtx (target_mode);
5264
5265           convert_move (target, tem,
5266                         !val_signbit_known_set_p (word_mode,
5267                                                   (normalizep ? normalizep
5268                                                    : STORE_FLAG_VALUE)));
5269           return target;
5270         }
5271     }
5272
5273   /* If this is A < 0 or A >= 0, we can do this by taking the ones
5274      complement of A (for GE) and shifting the sign bit to the low bit.  */
5275   if (op1 == const0_rtx && (code == LT || code == GE)
5276       && GET_MODE_CLASS (mode) == MODE_INT
5277       && (normalizep || STORE_FLAG_VALUE == 1
5278           || val_signbit_p (mode, STORE_FLAG_VALUE)))
5279     {
5280       subtarget = target;
5281
5282       if (!target)
5283         target_mode = mode;
5284
5285       /* If the result is to be wider than OP0, it is best to convert it
5286          first.  If it is to be narrower, it is *incorrect* to convert it
5287          first.  */
5288       else if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (mode))
5289         {
5290           op0 = convert_modes (target_mode, mode, op0, 0);
5291           mode = target_mode;
5292         }
5293
5294       if (target_mode != mode)
5295         subtarget = 0;
5296
5297       if (code == GE)
5298         op0 = expand_unop (mode, one_cmpl_optab, op0,
5299                            ((STORE_FLAG_VALUE == 1 || normalizep)
5300                             ? 0 : subtarget), 0);
5301
5302       if (STORE_FLAG_VALUE == 1 || normalizep)
5303         /* If we are supposed to produce a 0/1 value, we want to do
5304            a logical shift from the sign bit to the low-order bit; for
5305            a -1/0 value, we do an arithmetic shift.  */
5306         op0 = expand_shift (RSHIFT_EXPR, mode, op0,
5307                             GET_MODE_BITSIZE (mode) - 1,
5308                             subtarget, normalizep != -1);
5309
5310       if (mode != target_mode)
5311         op0 = convert_modes (target_mode, mode, op0, 0);
5312
5313       return op0;
5314     }
5315
5316   mclass = GET_MODE_CLASS (mode);
5317   for (compare_mode = mode; compare_mode != VOIDmode;
5318        compare_mode = GET_MODE_WIDER_MODE (compare_mode))
5319     {
5320      enum machine_mode optab_mode = mclass == MODE_CC ? CCmode : compare_mode;
5321      icode = optab_handler (cstore_optab, optab_mode);
5322      if (icode != CODE_FOR_nothing)
5323         {
5324           do_pending_stack_adjust ();
5325           tem = emit_cstore (target, icode, code, mode, compare_mode,
5326                              unsignedp, op0, op1, normalizep, target_mode);
5327           if (tem)
5328             return tem;
5329
5330           if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5331             {
5332               tem = emit_cstore (target, icode, scode, mode, compare_mode,
5333                                  unsignedp, op1, op0, normalizep, target_mode);
5334               if (tem)
5335                 return tem;
5336             }
5337           break;
5338         }
5339     }
5340
5341   return 0;
5342 }
5343
5344 /* Emit a store-flags instruction for comparison CODE on OP0 and OP1
5345    and storing in TARGET.  Normally return TARGET.
5346    Return 0 if that cannot be done.
5347
5348    MODE is the mode to use for OP0 and OP1 should they be CONST_INTs.  If
5349    it is VOIDmode, they cannot both be CONST_INT.
5350
5351    UNSIGNEDP is for the case where we have to widen the operands
5352    to perform the operation.  It says to use zero-extension.
5353
5354    NORMALIZEP is 1 if we should convert the result to be either zero
5355    or one.  Normalize is -1 if we should convert the result to be
5356    either zero or -1.  If NORMALIZEP is zero, the result will be left
5357    "raw" out of the scc insn.  */
5358
5359 rtx
5360 emit_store_flag (rtx target, enum rtx_code code, rtx op0, rtx op1,
5361                  enum machine_mode mode, int unsignedp, int normalizep)
5362 {
5363   enum machine_mode target_mode = target ? GET_MODE (target) : VOIDmode;
5364   enum rtx_code rcode;
5365   rtx subtarget;
5366   rtx tem, last, trueval;
5367
5368   tem = emit_store_flag_1 (target, code, op0, op1, mode, unsignedp, normalizep,
5369                            target_mode);
5370   if (tem)
5371     return tem;
5372
5373   /* If we reached here, we can't do this with a scc insn, however there
5374      are some comparisons that can be done in other ways.  Don't do any
5375      of these cases if branches are very cheap.  */
5376   if (BRANCH_COST (optimize_insn_for_speed_p (), false) == 0)
5377     return 0;
5378
5379   /* See what we need to return.  We can only return a 1, -1, or the
5380      sign bit.  */
5381
5382   if (normalizep == 0)
5383     {
5384       if (STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1)
5385         normalizep = STORE_FLAG_VALUE;
5386
5387       else if (val_signbit_p (mode, STORE_FLAG_VALUE))
5388         ;
5389       else
5390         return 0;
5391     }
5392
5393   last = get_last_insn ();
5394
5395   /* If optimizing, use different pseudo registers for each insn, instead
5396      of reusing the same pseudo.  This leads to better CSE, but slows
5397      down the compiler, since there are more pseudos */
5398   subtarget = (!optimize
5399                && (target_mode == mode)) ? target : NULL_RTX;
5400   trueval = GEN_INT (normalizep ? normalizep : STORE_FLAG_VALUE);
5401
5402   /* For floating-point comparisons, try the reverse comparison or try
5403      changing the "orderedness" of the comparison.  */
5404   if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5405     {
5406       enum rtx_code first_code;
5407       bool and_them;
5408
5409       rcode = reverse_condition_maybe_unordered (code);
5410       if (can_compare_p (rcode, mode, ccp_store_flag)
5411           && (code == ORDERED || code == UNORDERED
5412               || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
5413               || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
5414         {
5415           int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5416                           || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5417
5418           /* For the reverse comparison, use either an addition or a XOR.  */
5419           if (want_add
5420               && rtx_cost (GEN_INT (normalizep), PLUS, 1,
5421                            optimize_insn_for_speed_p ()) == 0)
5422             {
5423               tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5424                                        STORE_FLAG_VALUE, target_mode);
5425               if (tem)
5426                 return expand_binop (target_mode, add_optab, tem,
5427                                      GEN_INT (normalizep),
5428                                      target, 0, OPTAB_WIDEN);
5429             }
5430           else if (!want_add
5431                    && rtx_cost (trueval, XOR, 1,
5432                                 optimize_insn_for_speed_p ()) == 0)
5433             {
5434               tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5435                                        normalizep, target_mode);
5436               if (tem)
5437                 return expand_binop (target_mode, xor_optab, tem, trueval,
5438                                      target, INTVAL (trueval) >= 0, OPTAB_WIDEN);
5439             }
5440         }
5441
5442       delete_insns_since (last);
5443
5444       /* Cannot split ORDERED and UNORDERED, only try the above trick.   */
5445       if (code == ORDERED || code == UNORDERED)
5446         return 0;
5447
5448       and_them = split_comparison (code, mode, &first_code, &code);
5449
5450       /* If there are no NaNs, the first comparison should always fall through.
5451          Effectively change the comparison to the other one.  */
5452       if (!HONOR_NANS (mode))
5453         {
5454           gcc_assert (first_code == (and_them ? ORDERED : UNORDERED));
5455           return emit_store_flag_1 (target, code, op0, op1, mode, 0, normalizep,
5456                                     target_mode);
5457         }
5458
5459 #ifdef HAVE_conditional_move
5460       /* Try using a setcc instruction for ORDERED/UNORDERED, followed by a
5461          conditional move.  */
5462       tem = emit_store_flag_1 (subtarget, first_code, op0, op1, mode, 0,
5463                                normalizep, target_mode);
5464       if (tem == 0)
5465         return 0;
5466
5467       if (and_them)
5468         tem = emit_conditional_move (target, code, op0, op1, mode,
5469                                      tem, const0_rtx, GET_MODE (tem), 0);
5470       else
5471         tem = emit_conditional_move (target, code, op0, op1, mode,
5472                                      trueval, tem, GET_MODE (tem), 0);
5473
5474       if (tem == 0)
5475         delete_insns_since (last);
5476       return tem;
5477 #else
5478       return 0;
5479 #endif
5480     }
5481
5482   /* The remaining tricks only apply to integer comparisons.  */
5483
5484   if (GET_MODE_CLASS (mode) != MODE_INT)
5485     return 0;
5486
5487   /* If this is an equality comparison of integers, we can try to exclusive-or
5488      (or subtract) the two operands and use a recursive call to try the
5489      comparison with zero.  Don't do any of these cases if branches are
5490      very cheap.  */
5491
5492   if ((code == EQ || code == NE) && op1 != const0_rtx)
5493     {
5494       tem = expand_binop (mode, xor_optab, op0, op1, subtarget, 1,
5495                           OPTAB_WIDEN);
5496
5497       if (tem == 0)
5498         tem = expand_binop (mode, sub_optab, op0, op1, subtarget, 1,
5499                             OPTAB_WIDEN);
5500       if (tem != 0)
5501         tem = emit_store_flag (target, code, tem, const0_rtx,
5502                                mode, unsignedp, normalizep);
5503       if (tem != 0)
5504         return tem;
5505
5506       delete_insns_since (last);
5507     }
5508
5509   /* For integer comparisons, try the reverse comparison.  However, for
5510      small X and if we'd have anyway to extend, implementing "X != 0"
5511      as "-(int)X >> 31" is still cheaper than inverting "(int)X == 0".  */
5512   rcode = reverse_condition (code);
5513   if (can_compare_p (rcode, mode, ccp_store_flag)
5514       && ! (optab_handler (cstore_optab, mode) == CODE_FOR_nothing
5515             && code == NE
5516             && GET_MODE_SIZE (mode) < UNITS_PER_WORD
5517             && op1 == const0_rtx))
5518     {
5519       int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5520                       || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5521
5522       /* Again, for the reverse comparison, use either an addition or a XOR.  */
5523       if (want_add
5524           && rtx_cost (GEN_INT (normalizep), PLUS, 1,
5525                        optimize_insn_for_speed_p ()) == 0)
5526         {
5527           tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5528                                    STORE_FLAG_VALUE, target_mode);
5529           if (tem != 0)
5530             tem = expand_binop (target_mode, add_optab, tem,
5531                                 GEN_INT (normalizep), target, 0, OPTAB_WIDEN);
5532         }
5533       else if (!want_add
5534                && rtx_cost (trueval, XOR, 1,
5535                             optimize_insn_for_speed_p ()) == 0)
5536         {
5537           tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5538                                    normalizep, target_mode);
5539           if (tem != 0)
5540             tem = expand_binop (target_mode, xor_optab, tem, trueval, target,
5541                                 INTVAL (trueval) >= 0, OPTAB_WIDEN);
5542         }
5543
5544       if (tem != 0)
5545         return tem;
5546       delete_insns_since (last);
5547     }
5548
5549   /* Some other cases we can do are EQ, NE, LE, and GT comparisons with
5550      the constant zero.  Reject all other comparisons at this point.  Only
5551      do LE and GT if branches are expensive since they are expensive on
5552      2-operand machines.  */
5553
5554   if (op1 != const0_rtx
5555       || (code != EQ && code != NE
5556           && (BRANCH_COST (optimize_insn_for_speed_p (),
5557                            false) <= 1 || (code != LE && code != GT))))
5558     return 0;
5559
5560   /* Try to put the result of the comparison in the sign bit.  Assume we can't
5561      do the necessary operation below.  */
5562
5563   tem = 0;
5564
5565   /* To see if A <= 0, compute (A | (A - 1)).  A <= 0 iff that result has
5566      the sign bit set.  */
5567
5568   if (code == LE)
5569     {
5570       /* This is destructive, so SUBTARGET can't be OP0.  */
5571       if (rtx_equal_p (subtarget, op0))
5572         subtarget = 0;
5573
5574       tem = expand_binop (mode, sub_optab, op0, const1_rtx, subtarget, 0,
5575                           OPTAB_WIDEN);
5576       if (tem)
5577         tem = expand_binop (mode, ior_optab, op0, tem, subtarget, 0,
5578                             OPTAB_WIDEN);
5579     }
5580
5581   /* To see if A > 0, compute (((signed) A) << BITS) - A, where BITS is the
5582      number of bits in the mode of OP0, minus one.  */
5583
5584   if (code == GT)
5585     {
5586       if (rtx_equal_p (subtarget, op0))
5587         subtarget = 0;
5588
5589       tem = expand_shift (RSHIFT_EXPR, mode, op0,
5590                           GET_MODE_BITSIZE (mode) - 1,
5591                           subtarget, 0);
5592       tem = expand_binop (mode, sub_optab, tem, op0, subtarget, 0,
5593                           OPTAB_WIDEN);
5594     }
5595
5596   if (code == EQ || code == NE)
5597     {
5598       /* For EQ or NE, one way to do the comparison is to apply an operation
5599          that converts the operand into a positive number if it is nonzero
5600          or zero if it was originally zero.  Then, for EQ, we subtract 1 and
5601          for NE we negate.  This puts the result in the sign bit.  Then we
5602          normalize with a shift, if needed.
5603
5604          Two operations that can do the above actions are ABS and FFS, so try
5605          them.  If that doesn't work, and MODE is smaller than a full word,
5606          we can use zero-extension to the wider mode (an unsigned conversion)
5607          as the operation.  */
5608
5609       /* Note that ABS doesn't yield a positive number for INT_MIN, but
5610          that is compensated by the subsequent overflow when subtracting
5611          one / negating.  */
5612
5613       if (optab_handler (abs_optab, mode) != CODE_FOR_nothing)
5614         tem = expand_unop (mode, abs_optab, op0, subtarget, 1);
5615       else if (optab_handler (ffs_optab, mode) != CODE_FOR_nothing)
5616         tem = expand_unop (mode, ffs_optab, op0, subtarget, 1);
5617       else if (GET_MODE_SIZE (mode) < UNITS_PER_WORD)
5618         {
5619           tem = convert_modes (word_mode, mode, op0, 1);
5620           mode = word_mode;
5621         }
5622
5623       if (tem != 0)
5624         {
5625           if (code == EQ)
5626             tem = expand_binop (mode, sub_optab, tem, const1_rtx, subtarget,
5627                                 0, OPTAB_WIDEN);
5628           else
5629             tem = expand_unop (mode, neg_optab, tem, subtarget, 0);
5630         }
5631
5632       /* If we couldn't do it that way, for NE we can "or" the two's complement
5633          of the value with itself.  For EQ, we take the one's complement of
5634          that "or", which is an extra insn, so we only handle EQ if branches
5635          are expensive.  */
5636
5637       if (tem == 0
5638           && (code == NE
5639               || BRANCH_COST (optimize_insn_for_speed_p (),
5640                               false) > 1))
5641         {
5642           if (rtx_equal_p (subtarget, op0))
5643             subtarget = 0;
5644
5645           tem = expand_unop (mode, neg_optab, op0, subtarget, 0);
5646           tem = expand_binop (mode, ior_optab, tem, op0, subtarget, 0,
5647                               OPTAB_WIDEN);
5648
5649           if (tem && code == EQ)
5650             tem = expand_unop (mode, one_cmpl_optab, tem, subtarget, 0);
5651         }
5652     }
5653
5654   if (tem && normalizep)
5655     tem = expand_shift (RSHIFT_EXPR, mode, tem,
5656                         GET_MODE_BITSIZE (mode) - 1,
5657                         subtarget, normalizep == 1);
5658
5659   if (tem)
5660     {
5661       if (!target)
5662         ;
5663       else if (GET_MODE (tem) != target_mode)
5664         {
5665           convert_move (target, tem, 0);
5666           tem = target;
5667         }
5668       else if (!subtarget)
5669         {
5670           emit_move_insn (target, tem);
5671           tem = target;
5672         }
5673     }
5674   else
5675     delete_insns_since (last);
5676
5677   return tem;
5678 }
5679
5680 /* Like emit_store_flag, but always succeeds.  */
5681
5682 rtx
5683 emit_store_flag_force (rtx target, enum rtx_code code, rtx op0, rtx op1,
5684                        enum machine_mode mode, int unsignedp, int normalizep)
5685 {
5686   rtx tem, label;
5687   rtx trueval, falseval;
5688
5689   /* First see if emit_store_flag can do the job.  */
5690   tem = emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep);
5691   if (tem != 0)
5692     return tem;
5693
5694   if (!target)
5695     target = gen_reg_rtx (word_mode);
5696
5697   /* If this failed, we have to do this with set/compare/jump/set code.
5698      For foo != 0, if foo is in OP0, just replace it with 1 if nonzero.  */
5699   trueval = normalizep ? GEN_INT (normalizep) : const1_rtx;
5700   if (code == NE
5701       && GET_MODE_CLASS (mode) == MODE_INT
5702       && REG_P (target)
5703       && op0 == target
5704       && op1 == const0_rtx)
5705     {
5706       label = gen_label_rtx ();
5707       do_compare_rtx_and_jump (target, const0_rtx, EQ, unsignedp,
5708                                mode, NULL_RTX, NULL_RTX, label, -1);
5709       emit_move_insn (target, trueval);
5710       emit_label (label);
5711       return target;
5712     }
5713
5714   if (!REG_P (target)
5715       || reg_mentioned_p (target, op0) || reg_mentioned_p (target, op1))
5716     target = gen_reg_rtx (GET_MODE (target));
5717
5718   /* Jump in the right direction if the target cannot implement CODE
5719      but can jump on its reverse condition.  */
5720   falseval = const0_rtx;
5721   if (! can_compare_p (code, mode, ccp_jump)
5722       && (! FLOAT_MODE_P (mode)
5723           || code == ORDERED || code == UNORDERED
5724           || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
5725           || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
5726     {
5727       enum rtx_code rcode;
5728       if (FLOAT_MODE_P (mode))
5729         rcode = reverse_condition_maybe_unordered (code);
5730       else
5731         rcode = reverse_condition (code);
5732
5733       /* Canonicalize to UNORDERED for the libcall.  */
5734       if (can_compare_p (rcode, mode, ccp_jump)
5735           || (code == ORDERED && ! can_compare_p (ORDERED, mode, ccp_jump)))
5736         {
5737           falseval = trueval;
5738           trueval = const0_rtx;
5739           code = rcode;
5740         }
5741     }
5742
5743   emit_move_insn (target, trueval);
5744   label = gen_label_rtx ();
5745   do_compare_rtx_and_jump (op0, op1, code, unsignedp, mode, NULL_RTX,
5746                            NULL_RTX, label, -1);
5747
5748   emit_move_insn (target, falseval);
5749   emit_label (label);
5750
5751   return target;
5752 }
5753 \f
5754 /* Perform possibly multi-word comparison and conditional jump to LABEL
5755    if ARG1 OP ARG2 true where ARG1 and ARG2 are of mode MODE.  This is
5756    now a thin wrapper around do_compare_rtx_and_jump.  */
5757
5758 static void
5759 do_cmp_and_jump (rtx arg1, rtx arg2, enum rtx_code op, enum machine_mode mode,
5760                  rtx label)
5761 {
5762   int unsignedp = (op == LTU || op == LEU || op == GTU || op == GEU);
5763   do_compare_rtx_and_jump (arg1, arg2, op, unsignedp, mode,
5764                            NULL_RTX, NULL_RTX, label, -1);
5765 }