gcc/expmed.c

   1 /* Medium-level subroutines: convert bit-field store and extract
   2    and shifts, multiplies and divides to rtl instructions.
   3    Copyright (C) 1987, 1988, 1989, 1992, 1993, 1994, 1995, 1996, 1997, 1998,
   4    1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010,
   5    2011, 2012
   6    Free Software Foundation, Inc.
   7
   8 This file is part of GCC.
   9
  10 GCC is free software; you can redistribute it and/or modify it under
  11 the terms of the GNU General Public License as published by the Free
  12 Software Foundation; either version 3, or (at your option) any later
  13 version.
  14
  15 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  16 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  17 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  18 for more details.
  19
  20 You should have received a copy of the GNU General Public License
  21 along with GCC; see the file COPYING3.  If not see
  22 <http://www.gnu.org/licenses/>.  */
  23
  24
  25 #include "config.h"
  26 #include "system.h"
  27 #include "coretypes.h"
  28 #include "tm.h"
  29 #include "diagnostic-core.h"
  30 #include "rtl.h"
  31 #include "tree.h"
  32 #include "tm_p.h"
  33 #include "flags.h"
  34 #include "insn-config.h"
  35 #include "expr.h"
  36 #include "optabs.h"
  37 #include "recog.h"
  38 #include "langhooks.h"
  39 #include "df.h"
  40 #include "target.h"
  41 #include "expmed.h"
  42
  43 struct target_expmed default_target_expmed;
  44 #if SWITCHABLE_TARGET
  45 struct target_expmed *this_target_expmed = &default_target_expmed;
  46 #endif
  47
  48 static void store_fixed_bit_field (rtx, unsigned HOST_WIDE_INT,
  49                                    unsigned HOST_WIDE_INT,
  50                                    unsigned HOST_WIDE_INT,
  51                                    unsigned HOST_WIDE_INT,
  52                                    rtx);
  53 static void store_split_bit_field (rtx, unsigned HOST_WIDE_INT,
  54                                    unsigned HOST_WIDE_INT,
  55                                    unsigned HOST_WIDE_INT,
  56                                    unsigned HOST_WIDE_INT,
  57                                    rtx);
  58 static rtx extract_fixed_bit_field (enum machine_mode, rtx,
  59                                     unsigned HOST_WIDE_INT,
  60                                     unsigned HOST_WIDE_INT, rtx, int, bool);
  61 static rtx mask_rtx (enum machine_mode, int, int, int);
  62 static rtx lshift_value (enum machine_mode, rtx, int, int);
  63 static rtx extract_split_bit_field (rtx, unsigned HOST_WIDE_INT,
  64                                     unsigned HOST_WIDE_INT, int);
  65 static void do_cmp_and_jump (rtx, rtx, enum rtx_code, enum machine_mode, rtx);
  66 static rtx expand_smod_pow2 (enum machine_mode, rtx, HOST_WIDE_INT);
  67 static rtx expand_sdiv_pow2 (enum machine_mode, rtx, HOST_WIDE_INT);
  68
  69 /* Test whether a value is zero of a power of two.  */
  70 #define EXACT_POWER_OF_2_OR_ZERO_P(x) (((x) & ((x) - 1)) == 0)
  71
  72 /* Reduce conditional compilation elsewhere.  */
  73 #ifndef HAVE_insv
  74 #define HAVE_insv       0
  75 #define CODE_FOR_insv   CODE_FOR_nothing
  76 #define gen_insv(a,b,c,d) NULL_RTX
  77 #endif
  78 #ifndef HAVE_extv
  79 #define HAVE_extv       0
  80 #define CODE_FOR_extv   CODE_FOR_nothing
  81 #define gen_extv(a,b,c,d) NULL_RTX
  82 #endif
  83 #ifndef HAVE_extzv
  84 #define HAVE_extzv      0
  85 #define CODE_FOR_extzv  CODE_FOR_nothing
  86 #define gen_extzv(a,b,c,d) NULL_RTX
  87 #endif
  88
  89 struct init_expmed_rtl
  90 {
  91   struct rtx_def reg;           rtunion reg_fld[2];
  92   struct rtx_def plus;  rtunion plus_fld1;
  93   struct rtx_def neg;
  94   struct rtx_def mult;  rtunion mult_fld1;
  95   struct rtx_def sdiv;  rtunion sdiv_fld1;
  96   struct rtx_def udiv;  rtunion udiv_fld1;
  97   struct rtx_def sdiv_32;       rtunion sdiv_32_fld1;
  98   struct rtx_def smod_32;       rtunion smod_32_fld1;
  99   struct rtx_def wide_mult;     rtunion wide_mult_fld1;
 100   struct rtx_def wide_lshr;     rtunion wide_lshr_fld1;
 101   struct rtx_def wide_trunc;
 102   struct rtx_def shift; rtunion shift_fld1;
 103   struct rtx_def shift_mult;    rtunion shift_mult_fld1;
 104   struct rtx_def shift_add;     rtunion shift_add_fld1;
 105   struct rtx_def shift_sub0;    rtunion shift_sub0_fld1;
 106   struct rtx_def shift_sub1;    rtunion shift_sub1_fld1;
 107   struct rtx_def zext;
 108   struct rtx_def trunc;
 109
 110   rtx pow2[MAX_BITS_PER_WORD];
 111   rtx cint[MAX_BITS_PER_WORD];
 112 };
 113
 114 static void
 115 init_expmed_one_conv (struct init_expmed_rtl *all, enum machine_mode to_mode,
 116                       enum machine_mode from_mode, bool speed)
 117 {
 118   int to_size, from_size;
 119   rtx which;
 120
 121   /* We're given no information about the true size of a partial integer,
 122      only the size of the "full" integer it requires for storage.  For
 123      comparison purposes here, reduce the bit size by one in that case.  */
 124   to_size = (GET_MODE_BITSIZE (to_mode)
 125              - (GET_MODE_CLASS (to_mode) == MODE_PARTIAL_INT));
 126   from_size = (GET_MODE_BITSIZE (from_mode)
 127                - (GET_MODE_CLASS (from_mode) == MODE_PARTIAL_INT));
 128
 129   /* Assume cost of zero-extend and sign-extend is the same.  */
 130   which = (to_size < from_size ? &all->trunc : &all->zext);
 131
 132   PUT_MODE (&all->reg, from_mode);
 133   set_convert_cost (to_mode, from_mode, speed, set_src_cost (which, speed));
 134 }
 135
 136 static void
 137 init_expmed_one_mode (struct init_expmed_rtl *all,
 138                       enum machine_mode mode, int speed)
 139 {
 140   int m, n, mode_bitsize;
 141   enum machine_mode mode_from;
 142
 143   mode_bitsize = GET_MODE_UNIT_BITSIZE (mode);
 144
 145   PUT_MODE (&all->reg, mode);
 146   PUT_MODE (&all->plus, mode);
 147   PUT_MODE (&all->neg, mode);
 148   PUT_MODE (&all->mult, mode);
 149   PUT_MODE (&all->sdiv, mode);
 150   PUT_MODE (&all->udiv, mode);
 151   PUT_MODE (&all->sdiv_32, mode);
 152   PUT_MODE (&all->smod_32, mode);
 153   PUT_MODE (&all->wide_trunc, mode);
 154   PUT_MODE (&all->shift, mode);
 155   PUT_MODE (&all->shift_mult, mode);
 156   PUT_MODE (&all->shift_add, mode);
 157   PUT_MODE (&all->shift_sub0, mode);
 158   PUT_MODE (&all->shift_sub1, mode);
 159   PUT_MODE (&all->zext, mode);
 160   PUT_MODE (&all->trunc, mode);
 161
 162   set_add_cost (speed, mode, set_src_cost (&all->plus, speed));
 163   set_neg_cost (speed, mode, set_src_cost (&all->neg, speed));
 164   set_mul_cost (speed, mode, set_src_cost (&all->mult, speed));
 165   set_sdiv_cost (speed, mode, set_src_cost (&all->sdiv, speed));
 166   set_udiv_cost (speed, mode, set_src_cost (&all->udiv, speed));
 167
 168   set_sdiv_pow2_cheap (speed, mode, (set_src_cost (&all->sdiv_32, speed)
 169                                      <= 2 * add_cost (speed, mode)));
 170   set_smod_pow2_cheap (speed, mode, (set_src_cost (&all->smod_32, speed)
 171                                      <= 4 * add_cost (speed, mode)));
 172
 173   set_shift_cost (speed, mode, 0, 0);
 174   {
 175     int cost = add_cost (speed, mode);
 176     set_shiftadd_cost (speed, mode, 0, cost);
 177     set_shiftsub0_cost (speed, mode, 0, cost);
 178     set_shiftsub1_cost (speed, mode, 0, cost);
 179   }
 180
 181   n = MIN (MAX_BITS_PER_WORD, mode_bitsize);
 182   for (m = 1; m < n; m++)
 183     {
 184       XEXP (&all->shift, 1) = all->cint[m];
 185       XEXP (&all->shift_mult, 1) = all->pow2[m];
 186
 187       set_shift_cost (speed, mode, m, set_src_cost (&all->shift, speed));
 188       set_shiftadd_cost (speed, mode, m, set_src_cost (&all->shift_add, speed));
 189       set_shiftsub0_cost (speed, mode, m, set_src_cost (&all->shift_sub0, speed));
 190       set_shiftsub1_cost (speed, mode, m, set_src_cost (&all->shift_sub1, speed));
 191     }
 192
 193   if (SCALAR_INT_MODE_P (mode))
 194     {
 195       for (mode_from = MIN_MODE_INT; mode_from <= MAX_MODE_INT;
 196            mode_from = (enum machine_mode)(mode_from + 1))
 197         init_expmed_one_conv (all, mode, mode_from, speed);
 198     }
 199   if (GET_MODE_CLASS (mode) == MODE_INT)
 200     {
 201       enum machine_mode  wider_mode = GET_MODE_WIDER_MODE (mode);
 202       if (wider_mode != VOIDmode)
 203         {
 204           PUT_MODE (&all->zext, wider_mode);
 205           PUT_MODE (&all->wide_mult, wider_mode);
 206           PUT_MODE (&all->wide_lshr, wider_mode);
 207           XEXP (&all->wide_lshr, 1) = GEN_INT (mode_bitsize);
 208
 209           set_mul_widen_cost (speed, wider_mode,
 210                               set_src_cost (&all->wide_mult, speed));
 211           set_mul_highpart_cost (speed, mode,
 212                                  set_src_cost (&all->wide_trunc, speed));
 213         }
 214     }
 215 }
 216
 217 void
 218 init_expmed (void)
 219 {
 220   struct init_expmed_rtl all;
 221   enum machine_mode mode;
 222   int m, speed;
 223
 224   memset (&all, 0, sizeof all);
 225   for (m = 1; m < MAX_BITS_PER_WORD; m++)
 226     {
 227       all.pow2[m] = GEN_INT ((HOST_WIDE_INT) 1 << m);
 228       all.cint[m] = GEN_INT (m);
 229     }
 230
 231   PUT_CODE (&all.reg, REG);
 232   /* Avoid using hard regs in ways which may be unsupported.  */
 233   SET_REGNO (&all.reg, LAST_VIRTUAL_REGISTER + 1);
 234
 235   PUT_CODE (&all.plus, PLUS);
 236   XEXP (&all.plus, 0) = &all.reg;
 237   XEXP (&all.plus, 1) = &all.reg;
 238
 239   PUT_CODE (&all.neg, NEG);
 240   XEXP (&all.neg, 0) = &all.reg;
 241
 242   PUT_CODE (&all.mult, MULT);
 243   XEXP (&all.mult, 0) = &all.reg;
 244   XEXP (&all.mult, 1) = &all.reg;
 245
 246   PUT_CODE (&all.sdiv, DIV);
 247   XEXP (&all.sdiv, 0) = &all.reg;
 248   XEXP (&all.sdiv, 1) = &all.reg;
 249
 250   PUT_CODE (&all.udiv, UDIV);
 251   XEXP (&all.udiv, 0) = &all.reg;
 252   XEXP (&all.udiv, 1) = &all.reg;
 253
 254   PUT_CODE (&all.sdiv_32, DIV);
 255   XEXP (&all.sdiv_32, 0) = &all.reg;
 256   XEXP (&all.sdiv_32, 1) = 32 < MAX_BITS_PER_WORD ? all.cint[32] : GEN_INT (32);
 257
 258   PUT_CODE (&all.smod_32, MOD);
 259   XEXP (&all.smod_32, 0) = &all.reg;
 260   XEXP (&all.smod_32, 1) = XEXP (&all.sdiv_32, 1);
 261
 262   PUT_CODE (&all.zext, ZERO_EXTEND);
 263   XEXP (&all.zext, 0) = &all.reg;
 264
 265   PUT_CODE (&all.wide_mult, MULT);
 266   XEXP (&all.wide_mult, 0) = &all.zext;
 267   XEXP (&all.wide_mult, 1) = &all.zext;
 268
 269   PUT_CODE (&all.wide_lshr, LSHIFTRT);
 270   XEXP (&all.wide_lshr, 0) = &all.wide_mult;
 271
 272   PUT_CODE (&all.wide_trunc, TRUNCATE);
 273   XEXP (&all.wide_trunc, 0) = &all.wide_lshr;
 274
 275   PUT_CODE (&all.shift, ASHIFT);
 276   XEXP (&all.shift, 0) = &all.reg;
 277
 278   PUT_CODE (&all.shift_mult, MULT);
 279   XEXP (&all.shift_mult, 0) = &all.reg;
 280
 281   PUT_CODE (&all.shift_add, PLUS);
 282   XEXP (&all.shift_add, 0) = &all.shift_mult;
 283   XEXP (&all.shift_add, 1) = &all.reg;
 284
 285   PUT_CODE (&all.shift_sub0, MINUS);
 286   XEXP (&all.shift_sub0, 0) = &all.shift_mult;
 287   XEXP (&all.shift_sub0, 1) = &all.reg;
 288
 289   PUT_CODE (&all.shift_sub1, MINUS);
 290   XEXP (&all.shift_sub1, 0) = &all.reg;
 291   XEXP (&all.shift_sub1, 1) = &all.shift_mult;
 292
 293   PUT_CODE (&all.trunc, TRUNCATE);
 294   XEXP (&all.trunc, 0) = &all.reg;
 295
 296   for (speed = 0; speed < 2; speed++)
 297     {
 298       crtl->maybe_hot_insn_p = speed;
 299       set_zero_cost (speed, set_src_cost (const0_rtx, speed));
 300
 301       for (mode = MIN_MODE_INT; mode <= MAX_MODE_INT;
 302            mode = (enum machine_mode)(mode + 1))
 303         init_expmed_one_mode (&all, mode, speed);
 304
 305       if (MIN_MODE_PARTIAL_INT != VOIDmode)
 306         for (mode = MIN_MODE_PARTIAL_INT; mode <= MAX_MODE_PARTIAL_INT;
 307              mode = (enum machine_mode)(mode + 1))
 308           init_expmed_one_mode (&all, mode, speed);
 309
 310       if (MIN_MODE_VECTOR_INT != VOIDmode)
 311         for (mode = MIN_MODE_VECTOR_INT; mode <= MAX_MODE_VECTOR_INT;
 312              mode = (enum machine_mode)(mode + 1))
 313           init_expmed_one_mode (&all, mode, speed);
 314     }
 315
 316   if (alg_hash_used_p ())
 317     {
 318       struct alg_hash_entry *p = alg_hash_entry_ptr (0);
 319       memset (p, 0, sizeof (*p) * NUM_ALG_HASH_ENTRIES);
 320     }
 321   else
 322     set_alg_hash_used_p (true);
 323   default_rtl_profile ();
 324 }
 325
 326 /* Return an rtx representing minus the value of X.
 327    MODE is the intended mode of the result,
 328    useful if X is a CONST_INT.  */
 329
 330 rtx
 331 negate_rtx (enum machine_mode mode, rtx x)
 332 {
 333   rtx result = simplify_unary_operation (NEG, mode, x, mode);
 334
 335   if (result == 0)
 336     result = expand_unop (mode, neg_optab, x, NULL_RTX, 0);
 337
 338   return result;
 339 }
 340
 341 /* Report on the availability of insv/extv/extzv and the desired mode
 342    of each of their operands.  Returns MAX_MACHINE_MODE if HAVE_foo
 343    is false; else the mode of the specified operand.  If OPNO is -1,
 344    all the caller cares about is whether the insn is available.  */
 345 enum machine_mode
 346 mode_for_extraction (enum extraction_pattern pattern, int opno)
 347 {
 348   const struct insn_data_d *data;
 349
 350   switch (pattern)
 351     {
 352     case EP_insv:
 353       if (HAVE_insv)
 354         {
 355           data = &insn_data[CODE_FOR_insv];
 356           break;
 357         }
 358       return MAX_MACHINE_MODE;
 359
 360     case EP_extv:
 361       if (HAVE_extv)
 362         {
 363           data = &insn_data[CODE_FOR_extv];
 364           break;
 365         }
 366       return MAX_MACHINE_MODE;
 367
 368     case EP_extzv:
 369       if (HAVE_extzv)
 370         {
 371           data = &insn_data[CODE_FOR_extzv];
 372           break;
 373         }
 374       return MAX_MACHINE_MODE;
 375
 376     default:
 377       gcc_unreachable ();
 378     }
 379
 380   if (opno == -1)
 381     return VOIDmode;
 382
 383   /* Everyone who uses this function used to follow it with
 384      if (result == VOIDmode) result = word_mode; */
 385   if (data->operand[opno].mode == VOIDmode)
 386     return word_mode;
 387   return data->operand[opno].mode;
 388 }
 389
 390 /* Adjust bitfield memory MEM so that it points to the first unit of mode
 391    MODE that contains a bitfield of size BITSIZE at bit position BITNUM.
 392    If MODE is BLKmode, return a reference to every byte in the bitfield.
 393    Set *NEW_BITNUM to the bit position of the field within the new memory.  */
 394
 395 static rtx
 396 narrow_bit_field_mem (rtx mem, enum machine_mode mode,
 397                       unsigned HOST_WIDE_INT bitsize,
 398                       unsigned HOST_WIDE_INT bitnum,
 399                       unsigned HOST_WIDE_INT *new_bitnum)
 400 {
 401   if (mode == BLKmode)
 402     {
 403       *new_bitnum = bitnum % BITS_PER_UNIT;
 404       HOST_WIDE_INT offset = bitnum / BITS_PER_UNIT;
 405       HOST_WIDE_INT size = ((*new_bitnum + bitsize + BITS_PER_UNIT - 1)
 406                             / BITS_PER_UNIT);
 407       return adjust_bitfield_address_size (mem, mode, offset, size);
 408     }
 409   else
 410     {
 411       unsigned int unit = GET_MODE_BITSIZE (mode);
 412       *new_bitnum = bitnum % unit;
 413       HOST_WIDE_INT offset = (bitnum - *new_bitnum) / BITS_PER_UNIT;
 414       return adjust_bitfield_address (mem, mode, offset);
 415     }
 416 }
 417
 418 /* Return true if a bitfield of size BITSIZE at bit number BITNUM within
 419    a structure of mode STRUCT_MODE represents a lowpart subreg.   The subreg
 420    offset is then BITNUM / BITS_PER_UNIT.  */
 421
 422 static bool
 423 lowpart_bit_field_p (unsigned HOST_WIDE_INT bitnum,
 424                      unsigned HOST_WIDE_INT bitsize,
 425                      enum machine_mode struct_mode)
 426 {
 427   if (BYTES_BIG_ENDIAN)
 428     return (bitnum % BITS_PER_UNIT == 0
 429             && (bitnum + bitsize == GET_MODE_BITSIZE (struct_mode)
 430                 || (bitnum + bitsize) % BITS_PER_WORD == 0));
 431   else
 432     return bitnum % BITS_PER_WORD == 0;
 433 }
 434 \f
 435 /* Try to use an insv pattern to store VALUE into a field of OP0.
 436    OP_MODE is the mode of the insertion and BITSIZE and BITNUM are
 437    as for store_bit_field.  */
 438
 439 static bool
 440 store_bit_field_using_insv (rtx op0, unsigned HOST_WIDE_INT bitsize,
 441                             unsigned HOST_WIDE_INT bitnum, rtx value,
 442                             enum machine_mode op_mode)
 443 {
 444   struct expand_operand ops[4];
 445   rtx value1;
 446   rtx xop0 = op0;
 447   rtx last = get_last_insn ();
 448   bool copy_back = false;
 449
 450   unsigned int unit = GET_MODE_BITSIZE (op_mode);
 451   if (bitsize == 0 || bitsize > unit)
 452     return false;
 453
 454   if (MEM_P (xop0))
 455     /* Get a reference to the first byte of the field.  */
 456     xop0 = narrow_bit_field_mem (xop0, byte_mode, bitsize, bitnum, &bitnum);
 457   else
 458     {
 459       /* Convert from counting within OP0 to counting in OP_MODE.  */
 460       if (BYTES_BIG_ENDIAN)
 461         bitnum += unit - GET_MODE_BITSIZE (GET_MODE (op0));
 462
 463       /* If xop0 is a register, we need it in OP_MODE
 464          to make it acceptable to the format of insv.  */
 465       if (GET_CODE (xop0) == SUBREG)
 466         /* We can't just change the mode, because this might clobber op0,
 467            and we will need the original value of op0 if insv fails.  */
 468         xop0 = gen_rtx_SUBREG (op_mode, SUBREG_REG (xop0), SUBREG_BYTE (xop0));
 469       if (REG_P (xop0) && GET_MODE (xop0) != op_mode)
 470         xop0 = gen_lowpart_SUBREG (op_mode, xop0);
 471     }
 472
 473   /* If the destination is a paradoxical subreg such that we need a
 474      truncate to the inner mode, perform the insertion on a temporary and
 475      truncate the result to the original destination.  Note that we can't
 476      just truncate the paradoxical subreg as (truncate:N (subreg:W (reg:N
 477      X) 0)) is (reg:N X).  */
 478   if (GET_CODE (xop0) == SUBREG
 479       && REG_P (SUBREG_REG (xop0))
 480       && !TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (SUBREG_REG (xop0)),
 481                                          op_mode))
 482     {
 483       rtx tem = gen_reg_rtx (op_mode);
 484       emit_move_insn (tem, xop0);
 485       xop0 = tem;
 486       copy_back = true;
 487     }
 488
 489   /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
 490      "backwards" from the size of the unit we are inserting into.
 491      Otherwise, we count bits from the most significant on a
 492      BYTES/BITS_BIG_ENDIAN machine.  */
 493
 494   if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
 495     bitnum = unit - bitsize - bitnum;
 496
 497   /* Convert VALUE to op_mode (which insv insn wants) in VALUE1.  */
 498   value1 = value;
 499   if (GET_MODE (value) != op_mode)
 500     {
 501       if (GET_MODE_BITSIZE (GET_MODE (value)) >= bitsize)
 502         {
 503           /* Optimization: Don't bother really extending VALUE
 504              if it has all the bits we will actually use.  However,
 505              if we must narrow it, be sure we do it correctly.  */
 506
 507           if (GET_MODE_SIZE (GET_MODE (value)) < GET_MODE_SIZE (op_mode))
 508             {
 509               rtx tmp;
 510
 511               tmp = simplify_subreg (op_mode, value1, GET_MODE (value), 0);
 512               if (! tmp)
 513                 tmp = simplify_gen_subreg (op_mode,
 514                                            force_reg (GET_MODE (value),
 515                                                       value1),
 516                                            GET_MODE (value), 0);
 517               value1 = tmp;
 518             }
 519           else
 520             value1 = gen_lowpart (op_mode, value1);
 521         }
 522       else if (CONST_INT_P (value))
 523         value1 = gen_int_mode (INTVAL (value), op_mode);
 524       else
 525         /* Parse phase is supposed to make VALUE's data type
 526            match that of the component reference, which is a type
 527            at least as wide as the field; so VALUE should have
 528            a mode that corresponds to that type.  */
 529         gcc_assert (CONSTANT_P (value));
 530     }
 531
 532   create_fixed_operand (&ops[0], xop0);
 533   create_integer_operand (&ops[1], bitsize);
 534   create_integer_operand (&ops[2], bitnum);
 535   create_input_operand (&ops[3], value1, op_mode);
 536   if (maybe_expand_insn (CODE_FOR_insv, 4, ops))
 537     {
 538       if (copy_back)
 539         convert_move (op0, xop0, true);
 540       return true;
 541     }
 542   delete_insns_since (last);
 543   return false;
 544 }
 545
 546 /* A subroutine of store_bit_field, with the same arguments.  Return true
 547    if the operation could be implemented.
 548
 549    If FALLBACK_P is true, fall back to store_fixed_bit_field if we have
 550    no other way of implementing the operation.  If FALLBACK_P is false,
 551    return false instead.  */
 552
 553 static bool
 554 store_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
 555                    unsigned HOST_WIDE_INT bitnum,
 556                    unsigned HOST_WIDE_INT bitregion_start,
 557                    unsigned HOST_WIDE_INT bitregion_end,
 558                    enum machine_mode fieldmode,
 559                    rtx value, bool fallback_p)
 560 {
 561   rtx op0 = str_rtx;
 562   rtx orig_value;
 563
 564   while (GET_CODE (op0) == SUBREG)
 565     {
 566       /* The following line once was done only if WORDS_BIG_ENDIAN,
 567          but I think that is a mistake.  WORDS_BIG_ENDIAN is
 568          meaningful at a much higher level; when structures are copied
 569          between memory and regs, the higher-numbered regs
 570          always get higher addresses.  */
 571       int inner_mode_size = GET_MODE_SIZE (GET_MODE (SUBREG_REG (op0)));
 572       int outer_mode_size = GET_MODE_SIZE (GET_MODE (op0));
 573       int byte_offset = 0;
 574
 575       /* Paradoxical subregs need special handling on big endian machines.  */
 576       if (SUBREG_BYTE (op0) == 0 && inner_mode_size < outer_mode_size)
 577         {
 578           int difference = inner_mode_size - outer_mode_size;
 579
 580           if (WORDS_BIG_ENDIAN)
 581             byte_offset += (difference / UNITS_PER_WORD) * UNITS_PER_WORD;
 582           if (BYTES_BIG_ENDIAN)
 583             byte_offset += difference % UNITS_PER_WORD;
 584         }
 585       else
 586         byte_offset = SUBREG_BYTE (op0);
 587
 588       bitnum += byte_offset * BITS_PER_UNIT;
 589       op0 = SUBREG_REG (op0);
 590     }
 591
 592   /* No action is needed if the target is a register and if the field
 593      lies completely outside that register.  This can occur if the source
 594      code contains an out-of-bounds access to a small array.  */
 595   if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
 596     return true;
 597
 598   /* Use vec_set patterns for inserting parts of vectors whenever
 599      available.  */
 600   if (VECTOR_MODE_P (GET_MODE (op0))
 601       && !MEM_P (op0)
 602       && optab_handler (vec_set_optab, GET_MODE (op0)) != CODE_FOR_nothing
 603       && fieldmode == GET_MODE_INNER (GET_MODE (op0))
 604       && bitsize == GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
 605       && !(bitnum % GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
 606     {
 607       struct expand_operand ops[3];
 608       enum machine_mode outermode = GET_MODE (op0);
 609       enum machine_mode innermode = GET_MODE_INNER (outermode);
 610       enum insn_code icode = optab_handler (vec_set_optab, outermode);
 611       int pos = bitnum / GET_MODE_BITSIZE (innermode);
 612
 613       create_fixed_operand (&ops[0], op0);
 614       create_input_operand (&ops[1], value, innermode);
 615       create_integer_operand (&ops[2], pos);
 616       if (maybe_expand_insn (icode, 3, ops))
 617         return true;
 618     }
 619
 620   /* If the target is a register, overwriting the entire object, or storing
 621      a full-word or multi-word field can be done with just a SUBREG.  */
 622   if (!MEM_P (op0)
 623       && bitsize == GET_MODE_BITSIZE (fieldmode)
 624       && ((bitsize == GET_MODE_BITSIZE (GET_MODE (op0)) && bitnum == 0)
 625           || (bitsize % BITS_PER_WORD == 0 && bitnum % BITS_PER_WORD == 0)))
 626     {
 627       /* Use the subreg machinery either to narrow OP0 to the required
 628          words or to cope with mode punning between equal-sized modes.  */
 629       rtx sub = simplify_gen_subreg (fieldmode, op0, GET_MODE (op0),
 630                                      bitnum / BITS_PER_UNIT);
 631       if (sub)
 632         {
 633           emit_move_insn (sub, value);
 634           return true;
 635         }
 636     }
 637
 638   /* If the target is memory, storing any naturally aligned field can be
 639      done with a simple store.  For targets that support fast unaligned
 640      memory, any naturally sized, unit aligned field can be done directly.  */
 641   if (MEM_P (op0)
 642       && bitnum % BITS_PER_UNIT == 0
 643       && bitsize == GET_MODE_BITSIZE (fieldmode)
 644       && (!SLOW_UNALIGNED_ACCESS (fieldmode, MEM_ALIGN (op0))
 645           || (bitnum % bitsize == 0
 646               && MEM_ALIGN (op0) % bitsize == 0)))
 647     {
 648       op0 = adjust_bitfield_address (op0, fieldmode, bitnum / BITS_PER_UNIT);
 649       emit_move_insn (op0, value);
 650       return true;
 651     }
 652
 653   /* Make sure we are playing with integral modes.  Pun with subregs
 654      if we aren't.  This must come after the entire register case above,
 655      since that case is valid for any mode.  The following cases are only
 656      valid for integral modes.  */
 657   {
 658     enum machine_mode imode = int_mode_for_mode (GET_MODE (op0));
 659     if (imode != GET_MODE (op0))
 660       {
 661         if (MEM_P (op0))
 662           op0 = adjust_bitfield_address (op0, imode, 0);
 663         else
 664           {
 665             gcc_assert (imode != BLKmode);
 666             op0 = gen_lowpart (imode, op0);
 667           }
 668       }
 669   }
 670
 671   /* Storing an lsb-aligned field in a register
 672      can be done with a movstrict instruction.  */
 673
 674   if (!MEM_P (op0)
 675       && lowpart_bit_field_p (bitnum, bitsize, GET_MODE (op0))
 676       && bitsize == GET_MODE_BITSIZE (fieldmode)
 677       && optab_handler (movstrict_optab, fieldmode) != CODE_FOR_nothing)
 678     {
 679       struct expand_operand ops[2];
 680       enum insn_code icode = optab_handler (movstrict_optab, fieldmode);
 681       rtx arg0 = op0;
 682       unsigned HOST_WIDE_INT subreg_off;
 683
 684       if (GET_CODE (arg0) == SUBREG)
 685         {
 686           /* Else we've got some float mode source being extracted into
 687              a different float mode destination -- this combination of
 688              subregs results in Severe Tire Damage.  */
 689           gcc_assert (GET_MODE (SUBREG_REG (arg0)) == fieldmode
 690                       || GET_MODE_CLASS (fieldmode) == MODE_INT
 691                       || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT);
 692           arg0 = SUBREG_REG (arg0);
 693         }
 694
 695       subreg_off = bitnum / BITS_PER_UNIT;
 696       if (validate_subreg (fieldmode, GET_MODE (arg0), arg0, subreg_off))
 697         {
 698           arg0 = gen_rtx_SUBREG (fieldmode, arg0, subreg_off);
 699
 700           create_fixed_operand (&ops[0], arg0);
 701           /* Shrink the source operand to FIELDMODE.  */
 702           create_convert_operand_to (&ops[1], value, fieldmode, false);
 703           if (maybe_expand_insn (icode, 2, ops))
 704             return true;
 705         }
 706     }
 707
 708   /* Handle fields bigger than a word.  */
 709
 710   if (bitsize > BITS_PER_WORD)
 711     {
 712       /* Here we transfer the words of the field
 713          in the order least significant first.
 714          This is because the most significant word is the one which may
 715          be less than full.
 716          However, only do that if the value is not BLKmode.  */
 717
 718       unsigned int backwards = WORDS_BIG_ENDIAN && fieldmode != BLKmode;
 719       unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
 720       unsigned int i;
 721       rtx last;
 722
 723       /* This is the mode we must force value to, so that there will be enough
 724          subwords to extract.  Note that fieldmode will often (always?) be
 725          VOIDmode, because that is what store_field uses to indicate that this
 726          is a bit field, but passing VOIDmode to operand_subword_force
 727          is not allowed.  */
 728       fieldmode = GET_MODE (value);
 729       if (fieldmode == VOIDmode)
 730         fieldmode = smallest_mode_for_size (nwords * BITS_PER_WORD, MODE_INT);
 731
 732       last = get_last_insn ();
 733       for (i = 0; i < nwords; i++)
 734         {
 735           /* If I is 0, use the low-order word in both field and target;
 736              if I is 1, use the next to lowest word; and so on.  */
 737           unsigned int wordnum = (backwards
 738                                   ? GET_MODE_SIZE (fieldmode) / UNITS_PER_WORD
 739                                   - i - 1
 740                                   : i);
 741           unsigned int bit_offset = (backwards
 742                                      ? MAX ((int) bitsize - ((int) i + 1)
 743                                             * BITS_PER_WORD,
 744                                             0)
 745                                      : (int) i * BITS_PER_WORD);
 746           rtx value_word = operand_subword_force (value, wordnum, fieldmode);
 747           unsigned HOST_WIDE_INT new_bitsize =
 748             MIN (BITS_PER_WORD, bitsize - i * BITS_PER_WORD);
 749
 750           /* If the remaining chunk doesn't have full wordsize we have
 751              to make sure that for big endian machines the higher order
 752              bits are used.  */
 753           if (new_bitsize < BITS_PER_WORD && BYTES_BIG_ENDIAN && !backwards)
 754             value_word = simplify_expand_binop (word_mode, lshr_optab,
 755                                                 value_word,
 756                                                 GEN_INT (BITS_PER_WORD
 757                                                          - new_bitsize),
 758                                                 NULL_RTX, true,
 759                                                 OPTAB_LIB_WIDEN);
 760
 761           if (!store_bit_field_1 (op0, new_bitsize,
 762                                   bitnum + bit_offset,
 763                                   bitregion_start, bitregion_end,
 764                                   word_mode,
 765                                   value_word, fallback_p))
 766             {
 767               delete_insns_since (last);
 768               return false;
 769             }
 770         }
 771       return true;
 772     }
 773
 774   /* If VALUE has a floating-point or complex mode, access it as an
 775      integer of the corresponding size.  This can occur on a machine
 776      with 64 bit registers that uses SFmode for float.  It can also
 777      occur for unaligned float or complex fields.  */
 778   orig_value = value;
 779   if (GET_MODE (value) != VOIDmode
 780       && GET_MODE_CLASS (GET_MODE (value)) != MODE_INT
 781       && GET_MODE_CLASS (GET_MODE (value)) != MODE_PARTIAL_INT)
 782     {
 783       value = gen_reg_rtx (int_mode_for_mode (GET_MODE (value)));
 784       emit_move_insn (gen_lowpart (GET_MODE (orig_value), value), orig_value);
 785     }
 786
 787   /* If OP0 is a multi-word register, narrow it to the affected word.
 788      If the region spans two words, defer to store_split_bit_field.  */
 789   if (!MEM_P (op0) && GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
 790     {
 791       op0 = simplify_gen_subreg (word_mode, op0, GET_MODE (op0),
 792                                  bitnum / BITS_PER_WORD * UNITS_PER_WORD);
 793       gcc_assert (op0);
 794       bitnum %= BITS_PER_WORD;
 795       if (bitnum + bitsize > BITS_PER_WORD)
 796         {
 797           if (!fallback_p)
 798             return false;
 799
 800           store_split_bit_field (op0, bitsize, bitnum, bitregion_start,
 801                                  bitregion_end, value);
 802           return true;
 803         }
 804     }
 805
 806   /* From here on we can assume that the field to be stored in fits
 807      within a word.  If the destination is a register, it too fits
 808      in a word.  */
 809
 810   enum machine_mode op_mode = mode_for_extraction (EP_insv, 3);
 811   if (op_mode != MAX_MACHINE_MODE
 812       && !MEM_P (op0)
 813       && store_bit_field_using_insv (op0, bitsize, bitnum, value, op_mode))
 814     return true;
 815
 816   /* If OP0 is a memory, try copying it to a register and seeing if a
 817      cheap register alternative is available.  */
 818   if (op_mode != MAX_MACHINE_MODE && MEM_P (op0))
 819     {
 820       enum machine_mode bestmode;
 821       unsigned HOST_WIDE_INT maxbits = MAX_FIXED_MODE_SIZE;
 822
 823       /* Do not use insv for volatile bitfields when
 824          -fstrict-volatile-bitfields is in effect.  */
 825       if (!(MEM_VOLATILE_P (op0) && flag_strict_volatile_bitfields > 0)
 826           /* Do not use insv if the bit region is restricted and
 827              an op_mode integer doesn't fit into the restricted region.  */
 828           && !(bitregion_end
 829                && (bitnum - (bitnum % BITS_PER_UNIT)
 830                    + GET_MODE_BITSIZE (op_mode)
 831                    > bitregion_end + 1))
 832           && store_bit_field_using_insv (op0, bitsize, bitnum, value, op_mode))
 833         return true;
 834
 835       if (bitregion_end)
 836         maxbits = bitregion_end - bitregion_start + 1;
 837
 838       /* Get the mode to use for inserting into this field.  If OP0 is
 839          BLKmode, get the smallest mode consistent with the alignment. If
 840          OP0 is a non-BLKmode object that is no wider than OP_MODE, use its
 841          mode. Otherwise, use the smallest mode containing the field.  */
 842
 843       if (GET_MODE (op0) == BLKmode
 844           || GET_MODE_BITSIZE (GET_MODE (op0)) > maxbits
 845           || GET_MODE_SIZE (GET_MODE (op0)) > GET_MODE_SIZE (op_mode))
 846         bestmode = get_best_mode (bitsize, bitnum,
 847                                   bitregion_start, bitregion_end,
 848                                   MEM_ALIGN (op0), op_mode,
 849                                   MEM_VOLATILE_P (op0));
 850       else
 851         bestmode = GET_MODE (op0);
 852
 853       if (bestmode != VOIDmode
 854           && GET_MODE_SIZE (bestmode) >= GET_MODE_SIZE (fieldmode)
 855           && !(SLOW_UNALIGNED_ACCESS (bestmode, MEM_ALIGN (op0))
 856                && GET_MODE_BITSIZE (bestmode) > MEM_ALIGN (op0)))
 857         {
 858           rtx last, tempreg, xop0;
 859           unsigned HOST_WIDE_INT bitpos;
 860
 861           last = get_last_insn ();
 862
 863           /* Adjust address to point to the containing unit of
 864              that mode.  Compute the offset as a multiple of this unit,
 865              counting in bytes.  */
 866           xop0 = narrow_bit_field_mem (op0, bestmode, bitsize, bitnum,
 867                                        &bitpos);
 868
 869           /* Fetch that unit, store the bitfield in it, then store
 870              the unit.  */
 871           tempreg = copy_to_reg (xop0);
 872           if (store_bit_field_1 (tempreg, bitsize, bitpos,
 873                                  bitregion_start, bitregion_end,
 874                                  fieldmode, orig_value, false))
 875             {
 876               emit_move_insn (xop0, tempreg);
 877               return true;
 878             }
 879           delete_insns_since (last);
 880         }
 881     }
 882
 883   if (!fallback_p)
 884     return false;
 885
 886   store_fixed_bit_field (op0, bitsize, bitnum, bitregion_start,
 887                          bitregion_end, value);
 888   return true;
 889 }
 890
 891 /* Generate code to store value from rtx VALUE
 892    into a bit-field within structure STR_RTX
 893    containing BITSIZE bits starting at bit BITNUM.
 894
 895    BITREGION_START is bitpos of the first bitfield in this region.
 896    BITREGION_END is the bitpos of the ending bitfield in this region.
 897    These two fields are 0, if the C++ memory model does not apply,
 898    or we are not interested in keeping track of bitfield regions.
 899
 900    FIELDMODE is the machine-mode of the FIELD_DECL node for this field.  */
 901
 902 void
 903 store_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
 904                  unsigned HOST_WIDE_INT bitnum,
 905                  unsigned HOST_WIDE_INT bitregion_start,
 906                  unsigned HOST_WIDE_INT bitregion_end,
 907                  enum machine_mode fieldmode,
 908                  rtx value)
 909 {
 910   /* Under the C++0x memory model, we must not touch bits outside the
 911      bit region.  Adjust the address to start at the beginning of the
 912      bit region.  */
 913   if (MEM_P (str_rtx) && bitregion_start > 0)
 914     {
 915       enum machine_mode bestmode;
 916       enum machine_mode op_mode;
 917       unsigned HOST_WIDE_INT offset;
 918
 919       op_mode = mode_for_extraction (EP_insv, 3);
 920       if (op_mode == MAX_MACHINE_MODE)
 921         op_mode = VOIDmode;
 922
 923       gcc_assert ((bitregion_start % BITS_PER_UNIT) == 0);
 924
 925       offset = bitregion_start / BITS_PER_UNIT;
 926       bitnum -= bitregion_start;
 927       bitregion_end -= bitregion_start;
 928       bitregion_start = 0;
 929       bestmode = get_best_mode (bitsize, bitnum,
 930                                 bitregion_start, bitregion_end,
 931                                 MEM_ALIGN (str_rtx),
 932                                 op_mode,
 933                                 MEM_VOLATILE_P (str_rtx));
 934       str_rtx = adjust_address (str_rtx, bestmode, offset);
 935     }
 936
 937   if (!store_bit_field_1 (str_rtx, bitsize, bitnum,
 938                           bitregion_start, bitregion_end,
 939                           fieldmode, value, true))
 940     gcc_unreachable ();
 941 }
 942 \f
 943 /* Use shifts and boolean operations to store VALUE into a bit field of
 944    width BITSIZE in OP0, starting at bit BITNUM.  */
 945
 946 static void
 947 store_fixed_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
 948                        unsigned HOST_WIDE_INT bitnum,
 949                        unsigned HOST_WIDE_INT bitregion_start,
 950                        unsigned HOST_WIDE_INT bitregion_end,
 951                        rtx value)
 952 {
 953   enum machine_mode mode;
 954   rtx temp;
 955   int all_zero = 0;
 956   int all_one = 0;
 957
 958   /* There is a case not handled here:
 959      a structure with a known alignment of just a halfword
 960      and a field split across two aligned halfwords within the structure.
 961      Or likewise a structure with a known alignment of just a byte
 962      and a field split across two bytes.
 963      Such cases are not supposed to be able to occur.  */
 964
 965   if (MEM_P (op0))
 966     {
 967       unsigned HOST_WIDE_INT maxbits = MAX_FIXED_MODE_SIZE;
 968
 969       if (bitregion_end)
 970         maxbits = bitregion_end - bitregion_start + 1;
 971
 972       /* Get the proper mode to use for this field.  We want a mode that
 973          includes the entire field.  If such a mode would be larger than
 974          a word, we won't be doing the extraction the normal way.
 975          We don't want a mode bigger than the destination.  */
 976
 977       mode = GET_MODE (op0);
 978       if (GET_MODE_BITSIZE (mode) == 0
 979           || GET_MODE_BITSIZE (mode) > GET_MODE_BITSIZE (word_mode))
 980         mode = word_mode;
 981
 982       if (MEM_VOLATILE_P (op0)
 983           && GET_MODE_BITSIZE (GET_MODE (op0)) > 0
 984           && GET_MODE_BITSIZE (GET_MODE (op0)) <= maxbits
 985           && flag_strict_volatile_bitfields > 0)
 986         mode = GET_MODE (op0);
 987       else
 988         mode = get_best_mode (bitsize, bitnum, bitregion_start, bitregion_end,
 989                               MEM_ALIGN (op0), mode, MEM_VOLATILE_P (op0));
 990
 991       if (mode == VOIDmode)
 992         {
 993           /* The only way this should occur is if the field spans word
 994              boundaries.  */
 995           store_split_bit_field (op0, bitsize, bitnum, bitregion_start,
 996                                  bitregion_end, value);
 997           return;
 998         }
 999
1000       op0 = narrow_bit_field_mem (op0, mode, bitsize, bitnum, &bitnum);
1001     }
1002
1003   mode = GET_MODE (op0);
1004   gcc_assert (SCALAR_INT_MODE_P (mode));
1005
1006   /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode)
1007      for invalid input, such as f5 from gcc.dg/pr48335-2.c.  */
1008
1009   if (BYTES_BIG_ENDIAN)
1010     /* BITNUM is the distance between our msb
1011        and that of the containing datum.
1012        Convert it to the distance from the lsb.  */
1013     bitnum = GET_MODE_BITSIZE (mode) - bitsize - bitnum;
1014
1015   /* Now BITNUM is always the distance between our lsb
1016      and that of OP0.  */
1017
1018   /* Shift VALUE left by BITNUM bits.  If VALUE is not constant,
1019      we must first convert its mode to MODE.  */
1020
1021   if (CONST_INT_P (value))
1022     {
1023       HOST_WIDE_INT v = INTVAL (value);
1024
1025       if (bitsize < HOST_BITS_PER_WIDE_INT)
1026         v &= ((HOST_WIDE_INT) 1 << bitsize) - 1;
1027
1028       if (v == 0)
1029         all_zero = 1;
1030       else if ((bitsize < HOST_BITS_PER_WIDE_INT
1031                 && v == ((HOST_WIDE_INT) 1 << bitsize) - 1)
1032                || (bitsize == HOST_BITS_PER_WIDE_INT && v == -1))
1033         all_one = 1;
1034
1035       value = lshift_value (mode, value, bitnum, bitsize);
1036     }
1037   else
1038     {
1039       int must_and = (GET_MODE_BITSIZE (GET_MODE (value)) != bitsize
1040                       && bitnum + bitsize != GET_MODE_BITSIZE (mode));
1041
1042       if (GET_MODE (value) != mode)
1043         value = convert_to_mode (mode, value, 1);
1044
1045       if (must_and)
1046         value = expand_binop (mode, and_optab, value,
1047                               mask_rtx (mode, 0, bitsize, 0),
1048                               NULL_RTX, 1, OPTAB_LIB_WIDEN);
1049       if (bitnum > 0)
1050         value = expand_shift (LSHIFT_EXPR, mode, value,
1051                               bitnum, NULL_RTX, 1);
1052     }
1053
1054   /* Now clear the chosen bits in OP0,
1055      except that if VALUE is -1 we need not bother.  */
1056   /* We keep the intermediates in registers to allow CSE to combine
1057      consecutive bitfield assignments.  */
1058
1059   temp = force_reg (mode, op0);
1060
1061   if (! all_one)
1062     {
1063       temp = expand_binop (mode, and_optab, temp,
1064                            mask_rtx (mode, bitnum, bitsize, 1),
1065                            NULL_RTX, 1, OPTAB_LIB_WIDEN);
1066       temp = force_reg (mode, temp);
1067     }
1068
1069   /* Now logical-or VALUE into OP0, unless it is zero.  */
1070
1071   if (! all_zero)
1072     {
1073       temp = expand_binop (mode, ior_optab, temp, value,
1074                            NULL_RTX, 1, OPTAB_LIB_WIDEN);
1075       temp = force_reg (mode, temp);
1076     }
1077
1078   if (op0 != temp)
1079     {
1080       op0 = copy_rtx (op0);
1081       emit_move_insn (op0, temp);
1082     }
1083 }
1084 \f
1085 /* Store a bit field that is split across multiple accessible memory objects.
1086
1087    OP0 is the REG, SUBREG or MEM rtx for the first of the objects.
1088    BITSIZE is the field width; BITPOS the position of its first bit
1089    (within the word).
1090    VALUE is the value to store.
1091
1092    This does not yet handle fields wider than BITS_PER_WORD.  */
1093
1094 static void
1095 store_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1096                        unsigned HOST_WIDE_INT bitpos,
1097                        unsigned HOST_WIDE_INT bitregion_start,
1098                        unsigned HOST_WIDE_INT bitregion_end,
1099                        rtx value)
1100 {
1101   unsigned int unit;
1102   unsigned int bitsdone = 0;
1103
1104   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1105      much at a time.  */
1106   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
1107     unit = BITS_PER_WORD;
1108   else
1109     unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1110
1111   /* If VALUE is a constant other than a CONST_INT, get it into a register in
1112      WORD_MODE.  If we can do this using gen_lowpart_common, do so.  Note
1113      that VALUE might be a floating-point constant.  */
1114   if (CONSTANT_P (value) && !CONST_INT_P (value))
1115     {
1116       rtx word = gen_lowpart_common (word_mode, value);
1117
1118       if (word && (value != word))
1119         value = word;
1120       else
1121         value = gen_lowpart_common (word_mode,
1122                                     force_reg (GET_MODE (value) != VOIDmode
1123                                                ? GET_MODE (value)
1124                                                : word_mode, value));
1125     }
1126
1127   while (bitsdone < bitsize)
1128     {
1129       unsigned HOST_WIDE_INT thissize;
1130       rtx part, word;
1131       unsigned HOST_WIDE_INT thispos;
1132       unsigned HOST_WIDE_INT offset;
1133
1134       offset = (bitpos + bitsdone) / unit;
1135       thispos = (bitpos + bitsdone) % unit;
1136
1137       /* When region of bytes we can touch is restricted, decrease
1138          UNIT close to the end of the region as needed.  */
1139       if (bitregion_end
1140           && unit > BITS_PER_UNIT
1141           && bitpos + bitsdone - thispos + unit > bitregion_end + 1)
1142         {
1143           unit = unit / 2;
1144           continue;
1145         }
1146
1147       /* THISSIZE must not overrun a word boundary.  Otherwise,
1148          store_fixed_bit_field will call us again, and we will mutually
1149          recurse forever.  */
1150       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1151       thissize = MIN (thissize, unit - thispos);
1152
1153       if (BYTES_BIG_ENDIAN)
1154         {
1155           /* Fetch successively less significant portions.  */
1156           if (CONST_INT_P (value))
1157             part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1158                              >> (bitsize - bitsdone - thissize))
1159                             & (((HOST_WIDE_INT) 1 << thissize) - 1));
1160           else
1161             {
1162               int total_bits = GET_MODE_BITSIZE (GET_MODE (value));
1163               /* The args are chosen so that the last part includes the
1164                  lsb.  Give extract_bit_field the value it needs (with
1165                  endianness compensation) to fetch the piece we want.  */
1166               part = extract_fixed_bit_field (word_mode, value, thissize,
1167                                               total_bits - bitsize + bitsdone,
1168                                               NULL_RTX, 1, false);
1169             }
1170         }
1171       else
1172         {
1173           /* Fetch successively more significant portions.  */
1174           if (CONST_INT_P (value))
1175             part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1176                              >> bitsdone)
1177                             & (((HOST_WIDE_INT) 1 << thissize) - 1));
1178           else
1179             part = extract_fixed_bit_field (word_mode, value, thissize,
1180                                             bitsdone, NULL_RTX, 1, false);
1181         }
1182
1183       /* If OP0 is a register, then handle OFFSET here.
1184
1185          When handling multiword bitfields, extract_bit_field may pass
1186          down a word_mode SUBREG of a larger REG for a bitfield that actually
1187          crosses a word boundary.  Thus, for a SUBREG, we must find
1188          the current word starting from the base register.  */
1189       if (GET_CODE (op0) == SUBREG)
1190         {
1191           int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset;
1192           enum machine_mode sub_mode = GET_MODE (SUBREG_REG (op0));
1193           if (sub_mode != BLKmode && GET_MODE_SIZE (sub_mode) < UNITS_PER_WORD)
1194             word = word_offset ? const0_rtx : op0;
1195           else
1196             word = operand_subword_force (SUBREG_REG (op0), word_offset,
1197                                           GET_MODE (SUBREG_REG (op0)));
1198           offset = 0;
1199         }
1200       else if (REG_P (op0))
1201         {
1202           enum machine_mode op0_mode = GET_MODE (op0);
1203           if (op0_mode != BLKmode && GET_MODE_SIZE (op0_mode) < UNITS_PER_WORD)
1204             word = offset ? const0_rtx : op0;
1205           else
1206             word = operand_subword_force (op0, offset, GET_MODE (op0));
1207           offset = 0;
1208         }
1209       else
1210         word = op0;
1211
1212       /* OFFSET is in UNITs, and UNIT is in bits.  If WORD is const0_rtx,
1213          it is just an out-of-bounds access.  Ignore it.  */
1214       if (word != const0_rtx)
1215         store_fixed_bit_field (word, thissize, offset * unit + thispos,
1216                                bitregion_start, bitregion_end, part);
1217       bitsdone += thissize;
1218     }
1219 }
1220 \f
1221 /* A subroutine of extract_bit_field_1 that converts return value X
1222    to either MODE or TMODE.  MODE, TMODE and UNSIGNEDP are arguments
1223    to extract_bit_field.  */
1224
1225 static rtx
1226 convert_extracted_bit_field (rtx x, enum machine_mode mode,
1227                              enum machine_mode tmode, bool unsignedp)
1228 {
1229   if (GET_MODE (x) == tmode || GET_MODE (x) == mode)
1230     return x;
1231
1232   /* If the x mode is not a scalar integral, first convert to the
1233      integer mode of that size and then access it as a floating-point
1234      value via a SUBREG.  */
1235   if (!SCALAR_INT_MODE_P (tmode))
1236     {
1237       enum machine_mode smode;
1238
1239       smode = mode_for_size (GET_MODE_BITSIZE (tmode), MODE_INT, 0);
1240       x = convert_to_mode (smode, x, unsignedp);
1241       x = force_reg (smode, x);
1242       return gen_lowpart (tmode, x);
1243     }
1244
1245   return convert_to_mode (tmode, x, unsignedp);
1246 }
1247
1248 /* Try to use an ext(z)v pattern to extract a field from OP0.
1249    Return the extracted value on success, otherwise return null.
1250    EXT_MODE is the mode of the extraction and the other arguments
1251    are as for extract_bit_field.  */
1252
1253 static rtx
1254 extract_bit_field_using_extv (rtx op0, unsigned HOST_WIDE_INT bitsize,
1255                               unsigned HOST_WIDE_INT bitnum,
1256                               int unsignedp, rtx target,
1257                               enum machine_mode mode, enum machine_mode tmode,
1258                               enum machine_mode ext_mode)
1259 {
1260   struct expand_operand ops[4];
1261   rtx spec_target = target;
1262   rtx spec_target_subreg = 0;
1263   unsigned unit = GET_MODE_BITSIZE (ext_mode);
1264
1265   if (bitsize == 0 || unit < bitsize)
1266     return NULL_RTX;
1267
1268   if (MEM_P (op0))
1269     /* Get a reference to the first byte of the field.  */
1270     op0 = narrow_bit_field_mem (op0, byte_mode, bitsize, bitnum, &bitnum);
1271   else
1272     {
1273       /* Convert from counting within OP0 to counting in EXT_MODE.  */
1274       if (BYTES_BIG_ENDIAN)
1275         bitnum += unit - GET_MODE_BITSIZE (GET_MODE (op0));
1276
1277       /* If op0 is a register, we need it in EXT_MODE to make it
1278          acceptable to the format of ext(z)v.  */
1279       if (GET_CODE (op0) == SUBREG && GET_MODE (op0) != ext_mode)
1280         return NULL_RTX;
1281       if (REG_P (op0) && GET_MODE (op0) != ext_mode)
1282         op0 = gen_lowpart_SUBREG (ext_mode, op0);
1283     }
1284
1285   /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
1286      "backwards" from the size of the unit we are extracting from.
1287      Otherwise, we count bits from the most significant on a
1288      BYTES/BITS_BIG_ENDIAN machine.  */
1289
1290   if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1291     bitnum = unit - bitsize - bitnum;
1292
1293   if (target == 0)
1294     target = spec_target = gen_reg_rtx (tmode);
1295
1296   if (GET_MODE (target) != ext_mode)
1297     {
1298       /* Don't use LHS paradoxical subreg if explicit truncation is needed
1299          between the mode of the extraction (word_mode) and the target
1300          mode.  Instead, create a temporary and use convert_move to set
1301          the target.  */
1302       if (REG_P (target)
1303           && TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (target), ext_mode))
1304         {
1305           target = gen_lowpart (ext_mode, target);
1306           if (GET_MODE_PRECISION (ext_mode)
1307               > GET_MODE_PRECISION (GET_MODE (spec_target)))
1308             spec_target_subreg = target;
1309         }
1310       else
1311         target = gen_reg_rtx (ext_mode);
1312     }
1313
1314   create_output_operand (&ops[0], target, ext_mode);
1315   create_fixed_operand (&ops[1], op0);
1316   create_integer_operand (&ops[2], bitsize);
1317   create_integer_operand (&ops[3], bitnum);
1318   if (maybe_expand_insn (unsignedp ? CODE_FOR_extzv : CODE_FOR_extv, 4, ops))
1319     {
1320       target = ops[0].value;
1321       if (target == spec_target)
1322         return target;
1323       if (target == spec_target_subreg)
1324         return spec_target;
1325       return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1326     }
1327   return NULL_RTX;
1328 }
1329
1330 /* A subroutine of extract_bit_field, with the same arguments.
1331    If FALLBACK_P is true, fall back to extract_fixed_bit_field
1332    if we can find no other means of implementing the operation.
1333    if FALLBACK_P is false, return NULL instead.  */
1334
1335 static rtx
1336 extract_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1337                      unsigned HOST_WIDE_INT bitnum,
1338                      int unsignedp, bool packedp, rtx target,
1339                      enum machine_mode mode, enum machine_mode tmode,
1340                      bool fallback_p)
1341 {
1342   rtx op0 = str_rtx;
1343   enum machine_mode int_mode;
1344   enum machine_mode ext_mode;
1345   enum machine_mode mode1;
1346
1347   if (tmode == VOIDmode)
1348     tmode = mode;
1349
1350   while (GET_CODE (op0) == SUBREG)
1351     {
1352       bitnum += SUBREG_BYTE (op0) * BITS_PER_UNIT;
1353       op0 = SUBREG_REG (op0);
1354     }
1355
1356   /* If we have an out-of-bounds access to a register, just return an
1357      uninitialized register of the required mode.  This can occur if the
1358      source code contains an out-of-bounds access to a small array.  */
1359   if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
1360     return gen_reg_rtx (tmode);
1361
1362   if (REG_P (op0)
1363       && mode == GET_MODE (op0)
1364       && bitnum == 0
1365       && bitsize == GET_MODE_BITSIZE (GET_MODE (op0)))
1366     {
1367       /* We're trying to extract a full register from itself.  */
1368       return op0;
1369     }
1370
1371   /* See if we can get a better vector mode before extracting.  */
1372   if (VECTOR_MODE_P (GET_MODE (op0))
1373       && !MEM_P (op0)
1374       && GET_MODE_INNER (GET_MODE (op0)) != tmode)
1375     {
1376       enum machine_mode new_mode;
1377
1378       if (GET_MODE_CLASS (tmode) == MODE_FLOAT)
1379         new_mode = MIN_MODE_VECTOR_FLOAT;
1380       else if (GET_MODE_CLASS (tmode) == MODE_FRACT)
1381         new_mode = MIN_MODE_VECTOR_FRACT;
1382       else if (GET_MODE_CLASS (tmode) == MODE_UFRACT)
1383         new_mode = MIN_MODE_VECTOR_UFRACT;
1384       else if (GET_MODE_CLASS (tmode) == MODE_ACCUM)
1385         new_mode = MIN_MODE_VECTOR_ACCUM;
1386       else if (GET_MODE_CLASS (tmode) == MODE_UACCUM)
1387         new_mode = MIN_MODE_VECTOR_UACCUM;
1388       else
1389         new_mode = MIN_MODE_VECTOR_INT;
1390
1391       for (; new_mode != VOIDmode ; new_mode = GET_MODE_WIDER_MODE (new_mode))
1392         if (GET_MODE_SIZE (new_mode) == GET_MODE_SIZE (GET_MODE (op0))
1393             && targetm.vector_mode_supported_p (new_mode))
1394           break;
1395       if (new_mode != VOIDmode)
1396         op0 = gen_lowpart (new_mode, op0);
1397     }
1398
1399   /* Use vec_extract patterns for extracting parts of vectors whenever
1400      available.  */
1401   if (VECTOR_MODE_P (GET_MODE (op0))
1402       && !MEM_P (op0)
1403       && optab_handler (vec_extract_optab, GET_MODE (op0)) != CODE_FOR_nothing
1404       && ((bitnum + bitsize - 1) / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
1405           == bitnum / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
1406     {
1407       struct expand_operand ops[3];
1408       enum machine_mode outermode = GET_MODE (op0);
1409       enum machine_mode innermode = GET_MODE_INNER (outermode);
1410       enum insn_code icode = optab_handler (vec_extract_optab, outermode);
1411       unsigned HOST_WIDE_INT pos = bitnum / GET_MODE_BITSIZE (innermode);
1412
1413       create_output_operand (&ops[0], target, innermode);
1414       create_input_operand (&ops[1], op0, outermode);
1415       create_integer_operand (&ops[2], pos);
1416       if (maybe_expand_insn (icode, 3, ops))
1417         {
1418           target = ops[0].value;
1419           if (GET_MODE (target) != mode)
1420             return gen_lowpart (tmode, target);
1421           return target;
1422         }
1423     }
1424
1425   /* Make sure we are playing with integral modes.  Pun with subregs
1426      if we aren't.  */
1427   {
1428     enum machine_mode imode = int_mode_for_mode (GET_MODE (op0));
1429     if (imode != GET_MODE (op0))
1430       {
1431         if (MEM_P (op0))
1432           op0 = adjust_bitfield_address (op0, imode, 0);
1433         else if (imode != BLKmode)
1434           {
1435             op0 = gen_lowpart (imode, op0);
1436
1437             /* If we got a SUBREG, force it into a register since we
1438                aren't going to be able to do another SUBREG on it.  */
1439             if (GET_CODE (op0) == SUBREG)
1440               op0 = force_reg (imode, op0);
1441           }
1442         else if (REG_P (op0))
1443           {
1444             rtx reg, subreg;
1445             imode = smallest_mode_for_size (GET_MODE_BITSIZE (GET_MODE (op0)),
1446                                             MODE_INT);
1447             reg = gen_reg_rtx (imode);
1448             subreg = gen_lowpart_SUBREG (GET_MODE (op0), reg);
1449             emit_move_insn (subreg, op0);
1450             op0 = reg;
1451             bitnum += SUBREG_BYTE (subreg) * BITS_PER_UNIT;
1452           }
1453         else
1454           {
1455             rtx mem = assign_stack_temp (GET_MODE (op0),
1456                                          GET_MODE_SIZE (GET_MODE (op0)));
1457             emit_move_insn (mem, op0);
1458             op0 = adjust_bitfield_address (mem, BLKmode, 0);
1459           }
1460       }
1461   }
1462
1463   /* ??? We currently assume TARGET is at least as big as BITSIZE.
1464      If that's wrong, the solution is to test for it and set TARGET to 0
1465      if needed.  */
1466
1467   /* If the bitfield is volatile, we need to make sure the access
1468      remains on a type-aligned boundary.  */
1469   if (GET_CODE (op0) == MEM
1470       && MEM_VOLATILE_P (op0)
1471       && GET_MODE_BITSIZE (GET_MODE (op0)) > 0
1472       && flag_strict_volatile_bitfields > 0)
1473     goto no_subreg_mode_swap;
1474
1475   /* Only scalar integer modes can be converted via subregs.  There is an
1476      additional problem for FP modes here in that they can have a precision
1477      which is different from the size.  mode_for_size uses precision, but
1478      we want a mode based on the size, so we must avoid calling it for FP
1479      modes.  */
1480   mode1 = mode;
1481   if (SCALAR_INT_MODE_P (tmode))
1482     {
1483       enum machine_mode try_mode = mode_for_size (bitsize,
1484                                                   GET_MODE_CLASS (tmode), 0);
1485       if (try_mode != BLKmode)
1486         mode1 = try_mode;
1487     }
1488   gcc_assert (mode1 != BLKmode);
1489
1490   /* Extraction of a full MODE1 value can be done with a subreg as long
1491      as the least significant bit of the value is the least significant
1492      bit of either OP0 or a word of OP0.  */
1493   if (!MEM_P (op0)
1494       && lowpart_bit_field_p (bitnum, bitsize, GET_MODE (op0))
1495       && bitsize == GET_MODE_BITSIZE (mode1)
1496       && TRULY_NOOP_TRUNCATION_MODES_P (mode1, GET_MODE (op0)))
1497     {
1498       rtx sub = simplify_gen_subreg (mode1, op0, GET_MODE (op0),
1499                                      bitnum / BITS_PER_UNIT);
1500       if (sub)
1501         return convert_extracted_bit_field (sub, mode, tmode, unsignedp);
1502     }
1503
1504   /* Extraction of a full MODE1 value can be done with a load as long as
1505      the field is on a byte boundary and is sufficiently aligned.  */
1506   if (MEM_P (op0)
1507       && bitnum % BITS_PER_UNIT == 0
1508       && bitsize == GET_MODE_BITSIZE (mode1)
1509       && (!SLOW_UNALIGNED_ACCESS (mode1, MEM_ALIGN (op0))
1510           || (bitnum % bitsize == 0
1511               && MEM_ALIGN (op0) % bitsize == 0)))
1512     {
1513       op0 = adjust_bitfield_address (op0, mode1, bitnum / BITS_PER_UNIT);
1514       return convert_extracted_bit_field (op0, mode, tmode, unsignedp);
1515     }
1516
1517  no_subreg_mode_swap:
1518
1519   /* Handle fields bigger than a word.  */
1520
1521   if (bitsize > BITS_PER_WORD)
1522     {
1523       /* Here we transfer the words of the field
1524          in the order least significant first.
1525          This is because the most significant word is the one which may
1526          be less than full.  */
1527
1528       unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
1529       unsigned int i;
1530       rtx last;
1531
1532       if (target == 0 || !REG_P (target) || !valid_multiword_target_p (target))
1533         target = gen_reg_rtx (mode);
1534
1535       /* Indicate for flow that the entire target reg is being set.  */
1536       emit_clobber (target);
1537
1538       last = get_last_insn ();
1539       for (i = 0; i < nwords; i++)
1540         {
1541           /* If I is 0, use the low-order word in both field and target;
1542              if I is 1, use the next to lowest word; and so on.  */
1543           /* Word number in TARGET to use.  */
1544           unsigned int wordnum
1545             = (WORDS_BIG_ENDIAN
1546                ? GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD - i - 1
1547                : i);
1548           /* Offset from start of field in OP0.  */
1549           unsigned int bit_offset = (WORDS_BIG_ENDIAN
1550                                      ? MAX (0, ((int) bitsize - ((int) i + 1)
1551                                                 * (int) BITS_PER_WORD))
1552                                      : (int) i * BITS_PER_WORD);
1553           rtx target_part = operand_subword (target, wordnum, 1, VOIDmode);
1554           rtx result_part
1555             = extract_bit_field_1 (op0, MIN (BITS_PER_WORD,
1556                                              bitsize - i * BITS_PER_WORD),
1557                                    bitnum + bit_offset, 1, false, target_part,
1558                                    mode, word_mode, fallback_p);
1559
1560           gcc_assert (target_part);
1561           if (!result_part)
1562             {
1563               delete_insns_since (last);
1564               return NULL;
1565             }
1566
1567           if (result_part != target_part)
1568             emit_move_insn (target_part, result_part);
1569         }
1570
1571       if (unsignedp)
1572         {
1573           /* Unless we've filled TARGET, the upper regs in a multi-reg value
1574              need to be zero'd out.  */
1575           if (GET_MODE_SIZE (GET_MODE (target)) > nwords * UNITS_PER_WORD)
1576             {
1577               unsigned int i, total_words;
1578
1579               total_words = GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD;
1580               for (i = nwords; i < total_words; i++)
1581                 emit_move_insn
1582                   (operand_subword (target,
1583                                     WORDS_BIG_ENDIAN ? total_words - i - 1 : i,
1584                                     1, VOIDmode),
1585                    const0_rtx);
1586             }
1587           return target;
1588         }
1589
1590       /* Signed bit field: sign-extend with two arithmetic shifts.  */
1591       target = expand_shift (LSHIFT_EXPR, mode, target,
1592                              GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1593       return expand_shift (RSHIFT_EXPR, mode, target,
1594                            GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1595     }
1596
1597   /* If OP0 is a multi-word register, narrow it to the affected word.
1598      If the region spans two words, defer to extract_split_bit_field.  */
1599   if (!MEM_P (op0) && GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
1600     {
1601       op0 = simplify_gen_subreg (word_mode, op0, GET_MODE (op0),
1602                                  bitnum / BITS_PER_WORD * UNITS_PER_WORD);
1603       bitnum %= BITS_PER_WORD;
1604       if (bitnum + bitsize > BITS_PER_WORD)
1605         {
1606           if (!fallback_p)
1607             return NULL_RTX;
1608           target = extract_split_bit_field (op0, bitsize, bitnum, unsignedp);
1609           return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1610         }
1611     }
1612
1613   /* From here on we know the desired field is smaller than a word.
1614      If OP0 is a register, it too fits within a word.  */
1615
1616   ext_mode = mode_for_extraction (unsignedp ? EP_extzv : EP_extv, 0);
1617   if (ext_mode != MAX_MACHINE_MODE && !MEM_P (op0))
1618     {
1619       rtx result = extract_bit_field_using_extv (op0, bitsize, bitnum,
1620                                                  unsignedp, target, mode,
1621                                                  tmode, ext_mode);
1622       if (result)
1623         return result;
1624     }
1625
1626   /* If OP0 is a memory, try copying it to a register and seeing if a
1627      cheap register alternative is available.  */
1628   if (ext_mode != MAX_MACHINE_MODE && MEM_P (op0))
1629     {
1630       enum machine_mode bestmode;
1631
1632       /* Do not use extv/extzv for volatile bitfields when
1633          -fstrict-volatile-bitfields is in effect.  */
1634       if (!(MEM_VOLATILE_P (op0) && flag_strict_volatile_bitfields > 0))
1635         {
1636           rtx result = extract_bit_field_using_extv (op0, bitsize, bitnum,
1637                                                      unsignedp, target, mode,
1638                                                      tmode, ext_mode);
1639           if (result)
1640             return result;
1641         }
1642
1643       /* Get the mode to use for inserting into this field.  If
1644          OP0 is BLKmode, get the smallest mode consistent with the
1645          alignment. If OP0 is a non-BLKmode object that is no
1646          wider than EXT_MODE, use its mode. Otherwise, use the
1647          smallest mode containing the field.  */
1648
1649       if (GET_MODE (op0) == BLKmode
1650           || GET_MODE_SIZE (GET_MODE (op0)) > GET_MODE_SIZE (ext_mode))
1651         bestmode = get_best_mode (bitsize, bitnum, 0, 0, MEM_ALIGN (op0),
1652                                   ext_mode, MEM_VOLATILE_P (op0));
1653       else
1654         bestmode = GET_MODE (op0);
1655
1656       if (bestmode != VOIDmode
1657           && !(SLOW_UNALIGNED_ACCESS (bestmode, MEM_ALIGN (op0))
1658                && GET_MODE_BITSIZE (bestmode) > MEM_ALIGN (op0)))
1659         {
1660           unsigned HOST_WIDE_INT bitpos;
1661           rtx xop0 = narrow_bit_field_mem (op0, bestmode, bitsize, bitnum,
1662                                            &bitpos);
1663
1664           /* Make sure the register is big enough for the whole field.
1665              (It might not be if bestmode == GET_MODE (op0) and the input
1666              code was invalid.)  */
1667           if (bitpos + bitsize <= GET_MODE_BITSIZE (bestmode))
1668             {
1669               rtx last, result;
1670
1671               last = get_last_insn ();
1672
1673               /* Fetch it to a register in that size.  */
1674               xop0 = force_reg (bestmode, xop0);
1675               result = extract_bit_field_1 (xop0, bitsize, bitpos,
1676                                             unsignedp, packedp, target,
1677                                             mode, tmode, false);
1678               if (result)
1679                 return result;
1680
1681               delete_insns_since (last);
1682             }
1683         }
1684     }
1685
1686   if (!fallback_p)
1687     return NULL;
1688
1689   /* Find a correspondingly-sized integer field, so we can apply
1690      shifts and masks to it.  */
1691   int_mode = int_mode_for_mode (tmode);
1692   if (int_mode == BLKmode)
1693     int_mode = int_mode_for_mode (mode);
1694   /* Should probably push op0 out to memory and then do a load.  */
1695   gcc_assert (int_mode != BLKmode);
1696
1697   target = extract_fixed_bit_field (int_mode, op0, bitsize, bitnum,
1698                                     target, unsignedp, packedp);
1699   return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1700 }
1701
1702 /* Generate code to extract a byte-field from STR_RTX
1703    containing BITSIZE bits, starting at BITNUM,
1704    and put it in TARGET if possible (if TARGET is nonzero).
1705    Regardless of TARGET, we return the rtx for where the value is placed.
1706
1707    STR_RTX is the structure containing the byte (a REG or MEM).
1708    UNSIGNEDP is nonzero if this is an unsigned bit field.
1709    PACKEDP is nonzero if the field has the packed attribute.
1710    MODE is the natural mode of the field value once extracted.
1711    TMODE is the mode the caller would like the value to have;
1712    but the value may be returned with type MODE instead.
1713
1714    If a TARGET is specified and we can store in it at no extra cost,
1715    we do so, and return TARGET.
1716    Otherwise, we return a REG of mode TMODE or MODE, with TMODE preferred
1717    if they are equally easy.  */
1718
1719 rtx
1720 extract_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1721                    unsigned HOST_WIDE_INT bitnum, int unsignedp, bool packedp,
1722                    rtx target, enum machine_mode mode, enum machine_mode tmode)
1723 {
1724   return extract_bit_field_1 (str_rtx, bitsize, bitnum, unsignedp, packedp,
1725                               target, mode, tmode, true);
1726 }
1727 \f
1728 /* Use shifts and boolean operations to extract a field of BITSIZE bits
1729    from bit BITNUM of OP0.
1730
1731    UNSIGNEDP is nonzero for an unsigned bit field (don't sign-extend value).
1732    PACKEDP is true if the field has the packed attribute.
1733
1734    If TARGET is nonzero, attempts to store the value there
1735    and return TARGET, but this is not guaranteed.
1736    If TARGET is not used, create a pseudo-reg of mode TMODE for the value.  */
1737
1738 static rtx
1739 extract_fixed_bit_field (enum machine_mode tmode, rtx op0,
1740                          unsigned HOST_WIDE_INT bitsize,
1741                          unsigned HOST_WIDE_INT bitnum, rtx target,
1742                          int unsignedp, bool packedp)
1743 {
1744   enum machine_mode mode;
1745
1746   if (MEM_P (op0))
1747     {
1748       /* Get the proper mode to use for this field.  We want a mode that
1749          includes the entire field.  If such a mode would be larger than
1750          a word, we won't be doing the extraction the normal way.  */
1751
1752       if (MEM_VOLATILE_P (op0)
1753           && flag_strict_volatile_bitfields > 0)
1754         {
1755           if (GET_MODE_BITSIZE (GET_MODE (op0)) > 0)
1756             mode = GET_MODE (op0);
1757           else if (target && GET_MODE_BITSIZE (GET_MODE (target)) > 0)
1758             mode = GET_MODE (target);
1759           else
1760             mode = tmode;
1761         }
1762       else
1763         mode = get_best_mode (bitsize, bitnum, 0, 0,
1764                               MEM_ALIGN (op0), word_mode, MEM_VOLATILE_P (op0));
1765
1766       if (mode == VOIDmode)
1767         /* The only way this should occur is if the field spans word
1768            boundaries.  */
1769         return extract_split_bit_field (op0, bitsize, bitnum, unsignedp);
1770
1771       unsigned int total_bits = GET_MODE_BITSIZE (mode);
1772       HOST_WIDE_INT bit_offset = bitnum - bitnum % total_bits;
1773
1774       /* If we're accessing a volatile MEM, we can't apply BIT_OFFSET
1775          if it results in a multi-word access where we otherwise wouldn't
1776          have one.  So, check for that case here.  */
1777       if (MEM_P (op0)
1778           && MEM_VOLATILE_P (op0)
1779           && flag_strict_volatile_bitfields > 0
1780           && bitnum % BITS_PER_UNIT + bitsize <= total_bits
1781           && bitnum % GET_MODE_BITSIZE (mode) + bitsize > total_bits)
1782         {
1783           if (STRICT_ALIGNMENT)
1784             {
1785               static bool informed_about_misalignment = false;
1786
1787               if (packedp)
1788                 {
1789                   if (bitsize == total_bits)
1790                     warning_at (input_location, OPT_fstrict_volatile_bitfields,
1791                                 "multiple accesses to volatile structure"
1792                                 " member because of packed attribute");
1793                   else
1794                     warning_at (input_location, OPT_fstrict_volatile_bitfields,
1795                                 "multiple accesses to volatile structure"
1796                                 " bitfield because of packed attribute");
1797
1798                   return extract_split_bit_field (op0, bitsize, bitnum,
1799                                                   unsignedp);
1800                 }
1801
1802               if (bitsize == total_bits)
1803                 warning_at (input_location, OPT_fstrict_volatile_bitfields,
1804                             "mis-aligned access used for structure member");
1805               else
1806                 warning_at (input_location, OPT_fstrict_volatile_bitfields,
1807                             "mis-aligned access used for structure bitfield");
1808
1809               if (! informed_about_misalignment)
1810                 {
1811                   informed_about_misalignment = true;
1812                   inform (input_location,
1813                           "when a volatile object spans multiple type-sized"
1814                           " locations, the compiler must choose between using"
1815                           " a single mis-aligned access to preserve the"
1816                           " volatility, or using multiple aligned accesses"
1817                           " to avoid runtime faults; this code may fail at"
1818                           " runtime if the hardware does not allow this"
1819                           " access");
1820                 }
1821             }
1822           bit_offset = bitnum - bitnum % BITS_PER_UNIT;
1823         }
1824       op0 = adjust_bitfield_address (op0, mode, bit_offset / BITS_PER_UNIT);
1825       bitnum -= bit_offset;
1826     }
1827
1828   mode = GET_MODE (op0);
1829   gcc_assert (SCALAR_INT_MODE_P (mode));
1830
1831   /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode)
1832      for invalid input, such as extract equivalent of f5 from
1833      gcc.dg/pr48335-2.c.  */
1834
1835   if (BYTES_BIG_ENDIAN)
1836     /* BITNUM is the distance between our msb and that of OP0.
1837        Convert it to the distance from the lsb.  */
1838     bitnum = GET_MODE_BITSIZE (mode) - bitsize - bitnum;
1839
1840   /* Now BITNUM is always the distance between the field's lsb and that of OP0.
1841      We have reduced the big-endian case to the little-endian case.  */
1842
1843   if (unsignedp)
1844     {
1845       if (bitnum)
1846         {
1847           /* If the field does not already start at the lsb,
1848              shift it so it does.  */
1849           /* Maybe propagate the target for the shift.  */
1850           rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
1851           if (tmode != mode)
1852             subtarget = 0;
1853           op0 = expand_shift (RSHIFT_EXPR, mode, op0, bitnum, subtarget, 1);
1854         }
1855       /* Convert the value to the desired mode.  */
1856       if (mode != tmode)
1857         op0 = convert_to_mode (tmode, op0, 1);
1858
1859       /* Unless the msb of the field used to be the msb when we shifted,
1860          mask out the upper bits.  */
1861
1862       if (GET_MODE_BITSIZE (mode) != bitnum + bitsize)
1863         return expand_binop (GET_MODE (op0), and_optab, op0,
1864                              mask_rtx (GET_MODE (op0), 0, bitsize, 0),
1865                              target, 1, OPTAB_LIB_WIDEN);
1866       return op0;
1867     }
1868
1869   /* To extract a signed bit-field, first shift its msb to the msb of the word,
1870      then arithmetic-shift its lsb to the lsb of the word.  */
1871   op0 = force_reg (mode, op0);
1872
1873   /* Find the narrowest integer mode that contains the field.  */
1874
1875   for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT); mode != VOIDmode;
1876        mode = GET_MODE_WIDER_MODE (mode))
1877     if (GET_MODE_BITSIZE (mode) >= bitsize + bitnum)
1878       {
1879         op0 = convert_to_mode (mode, op0, 0);
1880         break;
1881       }
1882
1883   if (mode != tmode)
1884     target = 0;
1885
1886   if (GET_MODE_BITSIZE (mode) != (bitsize + bitnum))
1887     {
1888       int amount = GET_MODE_BITSIZE (mode) - (bitsize + bitnum);
1889       /* Maybe propagate the target for the shift.  */
1890       rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
1891       op0 = expand_shift (LSHIFT_EXPR, mode, op0, amount, subtarget, 1);
1892     }
1893
1894   return expand_shift (RSHIFT_EXPR, mode, op0,
1895                        GET_MODE_BITSIZE (mode) - bitsize, target, 0);
1896 }
1897 \f
1898 /* Return a constant integer (CONST_INT or CONST_DOUBLE) mask value
1899    of mode MODE with BITSIZE ones followed by BITPOS zeros, or the
1900    complement of that if COMPLEMENT.  The mask is truncated if
1901    necessary to the width of mode MODE.  The mask is zero-extended if
1902    BITSIZE+BITPOS is too small for MODE.  */
1903
1904 static rtx
1905 mask_rtx (enum machine_mode mode, int bitpos, int bitsize, int complement)
1906 {
1907   double_int mask;
1908
1909   mask = double_int::mask (bitsize);
1910   mask = mask.llshift (bitpos, HOST_BITS_PER_DOUBLE_INT);
1911
1912   if (complement)
1913     mask = ~mask;
1914
1915   return immed_double_int_const (mask, mode);
1916 }
1917
1918 /* Return a constant integer (CONST_INT or CONST_DOUBLE) rtx with the value
1919    VALUE truncated to BITSIZE bits and then shifted left BITPOS bits.  */
1920
1921 static rtx
1922 lshift_value (enum machine_mode mode, rtx value, int bitpos, int bitsize)
1923 {
1924   double_int val;
1925
1926   val = double_int::from_uhwi (INTVAL (value)).zext (bitsize);
1927   val = val.llshift (bitpos, HOST_BITS_PER_DOUBLE_INT);
1928
1929   return immed_double_int_const (val, mode);
1930 }
1931 \f
1932 /* Extract a bit field that is split across two words
1933    and return an RTX for the result.
1934
1935    OP0 is the REG, SUBREG or MEM rtx for the first of the two words.
1936    BITSIZE is the field width; BITPOS, position of its first bit, in the word.
1937    UNSIGNEDP is 1 if should zero-extend the contents; else sign-extend.  */
1938
1939 static rtx
1940 extract_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1941                          unsigned HOST_WIDE_INT bitpos, int unsignedp)
1942 {
1943   unsigned int unit;
1944   unsigned int bitsdone = 0;
1945   rtx result = NULL_RTX;
1946   int first = 1;
1947
1948   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1949      much at a time.  */
1950   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
1951     unit = BITS_PER_WORD;
1952   else
1953     unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1954
1955   while (bitsdone < bitsize)
1956     {
1957       unsigned HOST_WIDE_INT thissize;
1958       rtx part, word;
1959       unsigned HOST_WIDE_INT thispos;
1960       unsigned HOST_WIDE_INT offset;
1961
1962       offset = (bitpos + bitsdone) / unit;
1963       thispos = (bitpos + bitsdone) % unit;
1964
1965       /* THISSIZE must not overrun a word boundary.  Otherwise,
1966          extract_fixed_bit_field will call us again, and we will mutually
1967          recurse forever.  */
1968       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1969       thissize = MIN (thissize, unit - thispos);
1970
1971       /* If OP0 is a register, then handle OFFSET here.
1972
1973          When handling multiword bitfields, extract_bit_field may pass
1974          down a word_mode SUBREG of a larger REG for a bitfield that actually
1975          crosses a word boundary.  Thus, for a SUBREG, we must find
1976          the current word starting from the base register.  */
1977       if (GET_CODE (op0) == SUBREG)
1978         {
1979           int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset;
1980           word = operand_subword_force (SUBREG_REG (op0), word_offset,
1981                                         GET_MODE (SUBREG_REG (op0)));
1982           offset = 0;
1983         }
1984       else if (REG_P (op0))
1985         {
1986           word = operand_subword_force (op0, offset, GET_MODE (op0));
1987           offset = 0;
1988         }
1989       else
1990         word = op0;
1991
1992       /* Extract the parts in bit-counting order,
1993          whose meaning is determined by BYTES_PER_UNIT.
1994          OFFSET is in UNITs, and UNIT is in bits.  */
1995       part = extract_fixed_bit_field (word_mode, word, thissize,
1996                                       offset * unit + thispos, 0, 1, false);
1997       bitsdone += thissize;
1998
1999       /* Shift this part into place for the result.  */
2000       if (BYTES_BIG_ENDIAN)
2001         {
2002           if (bitsize != bitsdone)
2003             part = expand_shift (LSHIFT_EXPR, word_mode, part,
2004                                  bitsize - bitsdone, 0, 1);
2005         }
2006       else
2007         {
2008           if (bitsdone != thissize)
2009             part = expand_shift (LSHIFT_EXPR, word_mode, part,
2010                                  bitsdone - thissize, 0, 1);
2011         }
2012
2013       if (first)
2014         result = part;
2015       else
2016         /* Combine the parts with bitwise or.  This works
2017            because we extracted each part as an unsigned bit field.  */
2018         result = expand_binop (word_mode, ior_optab, part, result, NULL_RTX, 1,
2019                                OPTAB_LIB_WIDEN);
2020
2021       first = 0;
2022     }
2023
2024   /* Unsigned bit field: we are done.  */
2025   if (unsignedp)
2026     return result;
2027   /* Signed bit field: sign-extend with two arithmetic shifts.  */
2028   result = expand_shift (LSHIFT_EXPR, word_mode, result,
2029                          BITS_PER_WORD - bitsize, NULL_RTX, 0);
2030   return expand_shift (RSHIFT_EXPR, word_mode, result,
2031                        BITS_PER_WORD - bitsize, NULL_RTX, 0);
2032 }
2033 \f
2034 /* Try to read the low bits of SRC as an rvalue of mode MODE, preserving
2035    the bit pattern.  SRC_MODE is the mode of SRC; if this is smaller than
2036    MODE, fill the upper bits with zeros.  Fail if the layout of either
2037    mode is unknown (as for CC modes) or if the extraction would involve
2038    unprofitable mode punning.  Return the value on success, otherwise
2039    return null.
2040
2041    This is different from gen_lowpart* in these respects:
2042
2043      - the returned value must always be considered an rvalue
2044
2045      - when MODE is wider than SRC_MODE, the extraction involves
2046        a zero extension
2047
2048      - when MODE is smaller than SRC_MODE, the extraction involves
2049        a truncation (and is thus subject to TRULY_NOOP_TRUNCATION).
2050
2051    In other words, this routine performs a computation, whereas the
2052    gen_lowpart* routines are conceptually lvalue or rvalue subreg
2053    operations.  */
2054
2055 rtx
2056 extract_low_bits (enum machine_mode mode, enum machine_mode src_mode, rtx src)
2057 {
2058   enum machine_mode int_mode, src_int_mode;
2059
2060   if (mode == src_mode)
2061     return src;
2062
2063   if (CONSTANT_P (src))
2064     {
2065       /* simplify_gen_subreg can't be used here, as if simplify_subreg
2066          fails, it will happily create (subreg (symbol_ref)) or similar
2067          invalid SUBREGs.  */
2068       unsigned int byte = subreg_lowpart_offset (mode, src_mode);
2069       rtx ret = simplify_subreg (mode, src, src_mode, byte);
2070       if (ret)
2071         return ret;
2072
2073       if (GET_MODE (src) == VOIDmode
2074           || !validate_subreg (mode, src_mode, src, byte))
2075         return NULL_RTX;
2076
2077       src = force_reg (GET_MODE (src), src);
2078       return gen_rtx_SUBREG (mode, src, byte);
2079     }
2080
2081   if (GET_MODE_CLASS (mode) == MODE_CC || GET_MODE_CLASS (src_mode) == MODE_CC)
2082     return NULL_RTX;
2083
2084   if (GET_MODE_BITSIZE (mode) == GET_MODE_BITSIZE (src_mode)
2085       && MODES_TIEABLE_P (mode, src_mode))
2086     {
2087       rtx x = gen_lowpart_common (mode, src);
2088       if (x)
2089         return x;
2090     }
2091
2092   src_int_mode = int_mode_for_mode (src_mode);
2093   int_mode = int_mode_for_mode (mode);
2094   if (src_int_mode == BLKmode || int_mode == BLKmode)
2095     return NULL_RTX;
2096
2097   if (!MODES_TIEABLE_P (src_int_mode, src_mode))
2098     return NULL_RTX;
2099   if (!MODES_TIEABLE_P (int_mode, mode))
2100     return NULL_RTX;
2101
2102   src = gen_lowpart (src_int_mode, src);
2103   src = convert_modes (int_mode, src_int_mode, src, true);
2104   src = gen_lowpart (mode, src);
2105   return src;
2106 }
2107 \f
2108 /* Add INC into TARGET.  */
2109
2110 void
2111 expand_inc (rtx target, rtx inc)
2112 {
2113   rtx value = expand_binop (GET_MODE (target), add_optab,
2114                             target, inc,
2115                             target, 0, OPTAB_LIB_WIDEN);
2116   if (value != target)
2117     emit_move_insn (target, value);
2118 }
2119
2120 /* Subtract DEC from TARGET.  */
2121
2122 void
2123 expand_dec (rtx target, rtx dec)
2124 {
2125   rtx value = expand_binop (GET_MODE (target), sub_optab,
2126                             target, dec,
2127                             target, 0, OPTAB_LIB_WIDEN);
2128   if (value != target)
2129     emit_move_insn (target, value);
2130 }
2131 \f
2132 /* Output a shift instruction for expression code CODE,
2133    with SHIFTED being the rtx for the value to shift,
2134    and AMOUNT the rtx for the amount to shift by.
2135    Store the result in the rtx TARGET, if that is convenient.
2136    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2137    Return the rtx for where the value is.  */
2138
2139 static rtx
2140 expand_shift_1 (enum tree_code code, enum machine_mode mode, rtx shifted,
2141                 rtx amount, rtx target, int unsignedp)
2142 {
2143   rtx op1, temp = 0;
2144   int left = (code == LSHIFT_EXPR || code == LROTATE_EXPR);
2145   int rotate = (code == LROTATE_EXPR || code == RROTATE_EXPR);
2146   optab lshift_optab = ashl_optab;
2147   optab rshift_arith_optab = ashr_optab;
2148   optab rshift_uns_optab = lshr_optab;
2149   optab lrotate_optab = rotl_optab;
2150   optab rrotate_optab = rotr_optab;
2151   enum machine_mode op1_mode;
2152   int attempt;
2153   bool speed = optimize_insn_for_speed_p ();
2154
2155   op1 = amount;
2156   op1_mode = GET_MODE (op1);
2157
2158   /* Determine whether the shift/rotate amount is a vector, or scalar.  If the
2159      shift amount is a vector, use the vector/vector shift patterns.  */
2160   if (VECTOR_MODE_P (mode) && VECTOR_MODE_P (op1_mode))
2161     {
2162       lshift_optab = vashl_optab;
2163       rshift_arith_optab = vashr_optab;
2164       rshift_uns_optab = vlshr_optab;
2165       lrotate_optab = vrotl_optab;
2166       rrotate_optab = vrotr_optab;
2167     }
2168
2169   /* Previously detected shift-counts computed by NEGATE_EXPR
2170      and shifted in the other direction; but that does not work
2171      on all machines.  */
2172
2173   if (SHIFT_COUNT_TRUNCATED)
2174     {
2175       if (CONST_INT_P (op1)
2176           && ((unsigned HOST_WIDE_INT) INTVAL (op1) >=
2177               (unsigned HOST_WIDE_INT) GET_MODE_BITSIZE (mode)))
2178         op1 = GEN_INT ((unsigned HOST_WIDE_INT) INTVAL (op1)
2179                        % GET_MODE_BITSIZE (mode));
2180       else if (GET_CODE (op1) == SUBREG
2181                && subreg_lowpart_p (op1)
2182                && INTEGRAL_MODE_P (GET_MODE (SUBREG_REG (op1)))
2183                && INTEGRAL_MODE_P (GET_MODE (op1)))
2184         op1 = SUBREG_REG (op1);
2185     }
2186
2187   if (op1 == const0_rtx)
2188     return shifted;
2189
2190   /* Check whether its cheaper to implement a left shift by a constant
2191      bit count by a sequence of additions.  */
2192   if (code == LSHIFT_EXPR
2193       && CONST_INT_P (op1)
2194       && INTVAL (op1) > 0
2195       && INTVAL (op1) < GET_MODE_PRECISION (mode)
2196       && INTVAL (op1) < MAX_BITS_PER_WORD
2197       && (shift_cost (speed, mode, INTVAL (op1))
2198           > INTVAL (op1) * add_cost (speed, mode))
2199       && shift_cost (speed, mode, INTVAL (op1)) != MAX_COST)
2200     {
2201       int i;
2202       for (i = 0; i < INTVAL (op1); i++)
2203         {
2204           temp = force_reg (mode, shifted);
2205           shifted = expand_binop (mode, add_optab, temp, temp, NULL_RTX,
2206                                   unsignedp, OPTAB_LIB_WIDEN);
2207         }
2208       return shifted;
2209     }
2210
2211   for (attempt = 0; temp == 0 && attempt < 3; attempt++)
2212     {
2213       enum optab_methods methods;
2214
2215       if (attempt == 0)
2216         methods = OPTAB_DIRECT;
2217       else if (attempt == 1)
2218         methods = OPTAB_WIDEN;
2219       else
2220         methods = OPTAB_LIB_WIDEN;
2221
2222       if (rotate)
2223         {
2224           /* Widening does not work for rotation.  */
2225           if (methods == OPTAB_WIDEN)
2226             continue;
2227           else if (methods == OPTAB_LIB_WIDEN)
2228             {
2229               /* If we have been unable to open-code this by a rotation,
2230                  do it as the IOR of two shifts.  I.e., to rotate A
2231                  by N bits, compute (A << N) | ((unsigned) A >> (C - N))
2232                  where C is the bitsize of A.
2233
2234                  It is theoretically possible that the target machine might
2235                  not be able to perform either shift and hence we would
2236                  be making two libcalls rather than just the one for the
2237                  shift (similarly if IOR could not be done).  We will allow
2238                  this extremely unlikely lossage to avoid complicating the
2239                  code below.  */
2240
2241               rtx subtarget = target == shifted ? 0 : target;
2242               rtx new_amount, other_amount;
2243               rtx temp1;
2244
2245               new_amount = op1;
2246               if (CONST_INT_P (op1))
2247                 other_amount = GEN_INT (GET_MODE_BITSIZE (mode)
2248                                         - INTVAL (op1));
2249               else
2250                 other_amount
2251                   = simplify_gen_binary (MINUS, GET_MODE (op1),
2252                                          GEN_INT (GET_MODE_PRECISION (mode)),
2253                                          op1);
2254
2255               shifted = force_reg (mode, shifted);
2256
2257               temp = expand_shift_1 (left ? LSHIFT_EXPR : RSHIFT_EXPR,
2258                                      mode, shifted, new_amount, 0, 1);
2259               temp1 = expand_shift_1 (left ? RSHIFT_EXPR : LSHIFT_EXPR,
2260                                       mode, shifted, other_amount,
2261                                       subtarget, 1);
2262               return expand_binop (mode, ior_optab, temp, temp1, target,
2263                                    unsignedp, methods);
2264             }
2265
2266           temp = expand_binop (mode,
2267                                left ? lrotate_optab : rrotate_optab,
2268                                shifted, op1, target, unsignedp, methods);
2269         }
2270       else if (unsignedp)
2271         temp = expand_binop (mode,
2272                              left ? lshift_optab : rshift_uns_optab,
2273                              shifted, op1, target, unsignedp, methods);
2274
2275       /* Do arithmetic shifts.
2276          Also, if we are going to widen the operand, we can just as well
2277          use an arithmetic right-shift instead of a logical one.  */
2278       if (temp == 0 && ! rotate
2279           && (! unsignedp || (! left && methods == OPTAB_WIDEN)))
2280         {
2281           enum optab_methods methods1 = methods;
2282
2283           /* If trying to widen a log shift to an arithmetic shift,
2284              don't accept an arithmetic shift of the same size.  */
2285           if (unsignedp)
2286             methods1 = OPTAB_MUST_WIDEN;
2287
2288           /* Arithmetic shift */
2289
2290           temp = expand_binop (mode,
2291                                left ? lshift_optab : rshift_arith_optab,
2292                                shifted, op1, target, unsignedp, methods1);
2293         }
2294
2295       /* We used to try extzv here for logical right shifts, but that was
2296          only useful for one machine, the VAX, and caused poor code
2297          generation there for lshrdi3, so the code was deleted and a
2298          define_expand for lshrsi3 was added to vax.md.  */
2299     }
2300
2301   gcc_assert (temp);
2302   return temp;
2303 }
2304
2305 /* Output a shift instruction for expression code CODE,
2306    with SHIFTED being the rtx for the value to shift,
2307    and AMOUNT the amount to shift by.
2308    Store the result in the rtx TARGET, if that is convenient.
2309    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2310    Return the rtx for where the value is.  */
2311
2312 rtx
2313 expand_shift (enum tree_code code, enum machine_mode mode, rtx shifted,
2314               int amount, rtx target, int unsignedp)
2315 {
2316   return expand_shift_1 (code, mode,
2317                          shifted, GEN_INT (amount), target, unsignedp);
2318 }
2319
2320 /* Output a shift instruction for expression code CODE,
2321    with SHIFTED being the rtx for the value to shift,
2322    and AMOUNT the tree for the amount to shift by.
2323    Store the result in the rtx TARGET, if that is convenient.
2324    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2325    Return the rtx for where the value is.  */
2326
2327 rtx
2328 expand_variable_shift (enum tree_code code, enum machine_mode mode, rtx shifted,
2329                        tree amount, rtx target, int unsignedp)
2330 {
2331   return expand_shift_1 (code, mode,
2332                          shifted, expand_normal (amount), target, unsignedp);
2333 }
2334
2335 \f
2336 /* Indicates the type of fixup needed after a constant multiplication.
2337    BASIC_VARIANT means no fixup is needed, NEGATE_VARIANT means that
2338    the result should be negated, and ADD_VARIANT means that the
2339    multiplicand should be added to the result.  */
2340 enum mult_variant {basic_variant, negate_variant, add_variant};
2341
2342 static void synth_mult (struct algorithm *, unsigned HOST_WIDE_INT,
2343                         const struct mult_cost *, enum machine_mode mode);
2344 static bool choose_mult_variant (enum machine_mode, HOST_WIDE_INT,
2345                                  struct algorithm *, enum mult_variant *, int);
2346 static rtx expand_mult_const (enum machine_mode, rtx, HOST_WIDE_INT, rtx,
2347                               const struct algorithm *, enum mult_variant);
2348 static unsigned HOST_WIDE_INT invert_mod2n (unsigned HOST_WIDE_INT, int);
2349 static rtx extract_high_half (enum machine_mode, rtx);
2350 static rtx expmed_mult_highpart (enum machine_mode, rtx, rtx, rtx, int, int);
2351 static rtx expmed_mult_highpart_optab (enum machine_mode, rtx, rtx, rtx,
2352                                        int, int);
2353 /* Compute and return the best algorithm for multiplying by T.
2354    The algorithm must cost less than cost_limit
2355    If retval.cost >= COST_LIMIT, no algorithm was found and all
2356    other field of the returned struct are undefined.
2357    MODE is the machine mode of the multiplication.  */
2358
2359 static void
2360 synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
2361             const struct mult_cost *cost_limit, enum machine_mode mode)
2362 {
2363   int m;
2364   struct algorithm *alg_in, *best_alg;
2365   struct mult_cost best_cost;
2366   struct mult_cost new_limit;
2367   int op_cost, op_latency;
2368   unsigned HOST_WIDE_INT orig_t = t;
2369   unsigned HOST_WIDE_INT q;
2370   int maxm, hash_index;
2371   bool cache_hit = false;
2372   enum alg_code cache_alg = alg_zero;
2373   bool speed = optimize_insn_for_speed_p ();
2374   enum machine_mode imode;
2375   struct alg_hash_entry *entry_ptr;
2376
2377   /* Indicate that no algorithm is yet found.  If no algorithm
2378      is found, this value will be returned and indicate failure.  */
2379   alg_out->cost.cost = cost_limit->cost + 1;
2380   alg_out->cost.latency = cost_limit->latency + 1;
2381
2382   if (cost_limit->cost < 0
2383       || (cost_limit->cost == 0 && cost_limit->latency <= 0))
2384     return;
2385
2386   /* Be prepared for vector modes.  */
2387   imode = GET_MODE_INNER (mode);
2388   if (imode == VOIDmode)
2389     imode = mode;
2390
2391   maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (imode));
2392
2393   /* Restrict the bits of "t" to the multiplication's mode.  */
2394   t &= GET_MODE_MASK (imode);
2395
2396   /* t == 1 can be done in zero cost.  */
2397   if (t == 1)
2398     {
2399       alg_out->ops = 1;
2400       alg_out->cost.cost = 0;
2401       alg_out->cost.latency = 0;
2402       alg_out->op[0] = alg_m;
2403       return;
2404     }
2405
2406   /* t == 0 sometimes has a cost.  If it does and it exceeds our limit,
2407      fail now.  */
2408   if (t == 0)
2409     {
2410       if (MULT_COST_LESS (cost_limit, zero_cost (speed)))
2411         return;
2412       else
2413         {
2414           alg_out->ops = 1;
2415           alg_out->cost.cost = zero_cost (speed);
2416           alg_out->cost.latency = zero_cost (speed);
2417           alg_out->op[0] = alg_zero;
2418           return;
2419         }
2420     }
2421
2422   /* We'll be needing a couple extra algorithm structures now.  */
2423
2424   alg_in = XALLOCA (struct algorithm);
2425   best_alg = XALLOCA (struct algorithm);
2426   best_cost = *cost_limit;
2427
2428   /* Compute the hash index.  */
2429   hash_index = (t ^ (unsigned int) mode ^ (speed * 256)) % NUM_ALG_HASH_ENTRIES;
2430
2431   /* See if we already know what to do for T.  */
2432   entry_ptr = alg_hash_entry_ptr (hash_index);
2433   if (entry_ptr->t == t
2434       && entry_ptr->mode == mode
2435       && entry_ptr->mode == mode
2436       && entry_ptr->speed == speed
2437       && entry_ptr->alg != alg_unknown)
2438     {
2439       cache_alg = entry_ptr->alg;
2440
2441       if (cache_alg == alg_impossible)
2442         {
2443           /* The cache tells us that it's impossible to synthesize
2444              multiplication by T within entry_ptr->cost.  */
2445           if (!CHEAPER_MULT_COST (&entry_ptr->cost, cost_limit))
2446             /* COST_LIMIT is at least as restrictive as the one
2447                recorded in the hash table, in which case we have no
2448                hope of synthesizing a multiplication.  Just
2449                return.  */
2450             return;
2451
2452           /* If we get here, COST_LIMIT is less restrictive than the
2453              one recorded in the hash table, so we may be able to
2454              synthesize a multiplication.  Proceed as if we didn't
2455              have the cache entry.  */
2456         }
2457       else
2458         {
2459           if (CHEAPER_MULT_COST (cost_limit, &entry_ptr->cost))
2460             /* The cached algorithm shows that this multiplication
2461                requires more cost than COST_LIMIT.  Just return.  This
2462                way, we don't clobber this cache entry with
2463                alg_impossible but retain useful information.  */
2464             return;
2465
2466           cache_hit = true;
2467
2468           switch (cache_alg)
2469             {
2470             case alg_shift:
2471               goto do_alg_shift;
2472
2473             case alg_add_t_m2:
2474             case alg_sub_t_m2:
2475               goto do_alg_addsub_t_m2;
2476
2477             case alg_add_factor:
2478             case alg_sub_factor:
2479               goto do_alg_addsub_factor;
2480
2481             case alg_add_t2_m:
2482               goto do_alg_add_t2_m;
2483
2484             case alg_sub_t2_m:
2485               goto do_alg_sub_t2_m;
2486
2487             default:
2488               gcc_unreachable ();
2489             }
2490         }
2491     }
2492
2493   /* If we have a group of zero bits at the low-order part of T, try
2494      multiplying by the remaining bits and then doing a shift.  */
2495
2496   if ((t & 1) == 0)
2497     {
2498     do_alg_shift:
2499       m = floor_log2 (t & -t);  /* m = number of low zero bits */
2500       if (m < maxm)
2501         {
2502           q = t >> m;
2503           /* The function expand_shift will choose between a shift and
2504              a sequence of additions, so the observed cost is given as
2505              MIN (m * add_cost(speed, mode), shift_cost(speed, mode, m)).  */
2506           op_cost = m * add_cost (speed, mode);
2507           if (shift_cost (speed, mode, m) < op_cost)
2508             op_cost = shift_cost (speed, mode, m);
2509           new_limit.cost = best_cost.cost - op_cost;
2510           new_limit.latency = best_cost.latency - op_cost;
2511           synth_mult (alg_in, q, &new_limit, mode);
2512
2513           alg_in->cost.cost += op_cost;
2514           alg_in->cost.latency += op_cost;
2515           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2516             {
2517               struct algorithm *x;
2518               best_cost = alg_in->cost;
2519               x = alg_in, alg_in = best_alg, best_alg = x;
2520               best_alg->log[best_alg->ops] = m;
2521               best_alg->op[best_alg->ops] = alg_shift;
2522             }
2523
2524           /* See if treating ORIG_T as a signed number yields a better
2525              sequence.  Try this sequence only for a negative ORIG_T
2526              as it would be useless for a non-negative ORIG_T.  */
2527           if ((HOST_WIDE_INT) orig_t < 0)
2528             {
2529               /* Shift ORIG_T as follows because a right shift of a
2530                  negative-valued signed type is implementation
2531                  defined.  */
2532               q = ~(~orig_t >> m);
2533               /* The function expand_shift will choose between a shift
2534                  and a sequence of additions, so the observed cost is
2535                  given as MIN (m * add_cost(speed, mode),
2536                  shift_cost(speed, mode, m)).  */
2537               op_cost = m * add_cost (speed, mode);
2538               if (shift_cost (speed, mode, m) < op_cost)
2539                 op_cost = shift_cost (speed, mode, m);
2540               new_limit.cost = best_cost.cost - op_cost;
2541               new_limit.latency = best_cost.latency - op_cost;
2542               synth_mult (alg_in, q, &new_limit, mode);
2543
2544               alg_in->cost.cost += op_cost;
2545               alg_in->cost.latency += op_cost;
2546               if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2547                 {
2548                   struct algorithm *x;
2549                   best_cost = alg_in->cost;
2550                   x = alg_in, alg_in = best_alg, best_alg = x;
2551                   best_alg->log[best_alg->ops] = m;
2552                   best_alg->op[best_alg->ops] = alg_shift;
2553                 }
2554             }
2555         }
2556       if (cache_hit)
2557         goto done;
2558     }
2559
2560   /* If we have an odd number, add or subtract one.  */
2561   if ((t & 1) != 0)
2562     {
2563       unsigned HOST_WIDE_INT w;
2564
2565     do_alg_addsub_t_m2:
2566       for (w = 1; (w & t) != 0; w <<= 1)
2567         ;
2568       /* If T was -1, then W will be zero after the loop.  This is another
2569          case where T ends with ...111.  Handling this with (T + 1) and
2570          subtract 1 produces slightly better code and results in algorithm
2571          selection much faster than treating it like the ...0111 case
2572          below.  */
2573       if (w == 0
2574           || (w > 2
2575               /* Reject the case where t is 3.
2576                  Thus we prefer addition in that case.  */
2577               && t != 3))
2578         {
2579           /* T ends with ...111.  Multiply by (T + 1) and subtract 1.  */
2580
2581           op_cost = add_cost (speed, mode);
2582           new_limit.cost = best_cost.cost - op_cost;
2583           new_limit.latency = best_cost.latency - op_cost;
2584           synth_mult (alg_in, t + 1, &new_limit, mode);
2585
2586           alg_in->cost.cost += op_cost;
2587           alg_in->cost.latency += op_cost;
2588           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2589             {
2590               struct algorithm *x;
2591               best_cost = alg_in->cost;
2592               x = alg_in, alg_in = best_alg, best_alg = x;
2593               best_alg->log[best_alg->ops] = 0;
2594               best_alg->op[best_alg->ops] = alg_sub_t_m2;
2595             }
2596         }
2597       else
2598         {
2599           /* T ends with ...01 or ...011.  Multiply by (T - 1) and add 1.  */
2600
2601           op_cost = add_cost (speed, mode);
2602           new_limit.cost = best_cost.cost - op_cost;
2603           new_limit.latency = best_cost.latency - op_cost;
2604           synth_mult (alg_in, t - 1, &new_limit, mode);
2605
2606           alg_in->cost.cost += op_cost;
2607           alg_in->cost.latency += op_cost;
2608           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2609             {
2610               struct algorithm *x;
2611               best_cost = alg_in->cost;
2612               x = alg_in, alg_in = best_alg, best_alg = x;
2613               best_alg->log[best_alg->ops] = 0;
2614               best_alg->op[best_alg->ops] = alg_add_t_m2;
2615             }
2616         }
2617
2618       /* We may be able to calculate a * -7, a * -15, a * -31, etc
2619          quickly with a - a * n for some appropriate constant n.  */
2620       m = exact_log2 (-orig_t + 1);
2621       if (m >= 0 && m < maxm)
2622         {
2623           op_cost = shiftsub1_cost (speed, mode, m);
2624           new_limit.cost = best_cost.cost - op_cost;
2625           new_limit.latency = best_cost.latency - op_cost;
2626           synth_mult (alg_in, (unsigned HOST_WIDE_INT) (-orig_t + 1) >> m,
2627                       &new_limit, mode);
2628
2629           alg_in->cost.cost += op_cost;
2630           alg_in->cost.latency += op_cost;
2631           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2632             {
2633               struct algorithm *x;
2634               best_cost = alg_in->cost;
2635               x = alg_in, alg_in = best_alg, best_alg = x;
2636               best_alg->log[best_alg->ops] = m;
2637               best_alg->op[best_alg->ops] = alg_sub_t_m2;
2638             }
2639         }
2640
2641       if (cache_hit)
2642         goto done;
2643     }
2644
2645   /* Look for factors of t of the form
2646      t = q(2**m +- 1), 2 <= m <= floor(log2(t - 1)).
2647      If we find such a factor, we can multiply by t using an algorithm that
2648      multiplies by q, shift the result by m and add/subtract it to itself.
2649
2650      We search for large factors first and loop down, even if large factors
2651      are less probable than small; if we find a large factor we will find a
2652      good sequence quickly, and therefore be able to prune (by decreasing
2653      COST_LIMIT) the search.  */
2654
2655  do_alg_addsub_factor:
2656   for (m = floor_log2 (t - 1); m >= 2; m--)
2657     {
2658       unsigned HOST_WIDE_INT d;
2659
2660       d = ((unsigned HOST_WIDE_INT) 1 << m) + 1;
2661       if (t % d == 0 && t > d && m < maxm
2662           && (!cache_hit || cache_alg == alg_add_factor))
2663         {
2664           /* If the target has a cheap shift-and-add instruction use
2665              that in preference to a shift insn followed by an add insn.
2666              Assume that the shift-and-add is "atomic" with a latency
2667              equal to its cost, otherwise assume that on superscalar
2668              hardware the shift may be executed concurrently with the
2669              earlier steps in the algorithm.  */
2670           op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
2671           if (shiftadd_cost (speed, mode, m) < op_cost)
2672             {
2673               op_cost = shiftadd_cost (speed, mode, m);
2674               op_latency = op_cost;
2675             }
2676           else
2677             op_latency = add_cost (speed, mode);
2678
2679           new_limit.cost = best_cost.cost - op_cost;
2680           new_limit.latency = best_cost.latency - op_latency;
2681           synth_mult (alg_in, t / d, &new_limit, mode);
2682
2683           alg_in->cost.cost += op_cost;
2684           alg_in->cost.latency += op_latency;
2685           if (alg_in->cost.latency < op_cost)
2686             alg_in->cost.latency = op_cost;
2687           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2688             {
2689               struct algorithm *x;
2690               best_cost = alg_in->cost;
2691               x = alg_in, alg_in = best_alg, best_alg = x;
2692               best_alg->log[best_alg->ops] = m;
2693               best_alg->op[best_alg->ops] = alg_add_factor;
2694             }
2695           /* Other factors will have been taken care of in the recursion.  */
2696           break;
2697         }
2698
2699       d = ((unsigned HOST_WIDE_INT) 1 << m) - 1;
2700       if (t % d == 0 && t > d && m < maxm
2701           && (!cache_hit || cache_alg == alg_sub_factor))
2702         {
2703           /* If the target has a cheap shift-and-subtract insn use
2704              that in preference to a shift insn followed by a sub insn.
2705              Assume that the shift-and-sub is "atomic" with a latency
2706              equal to it's cost, otherwise assume that on superscalar
2707              hardware the shift may be executed concurrently with the
2708              earlier steps in the algorithm.  */
2709           op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
2710           if (shiftsub0_cost (speed, mode, m) < op_cost)
2711             {
2712               op_cost = shiftsub0_cost (speed, mode, m);
2713               op_latency = op_cost;
2714             }
2715           else
2716             op_latency = add_cost (speed, mode);
2717
2718           new_limit.cost = best_cost.cost - op_cost;
2719           new_limit.latency = best_cost.latency - op_latency;
2720           synth_mult (alg_in, t / d, &new_limit, mode);
2721
2722           alg_in->cost.cost += op_cost;
2723           alg_in->cost.latency += op_latency;
2724           if (alg_in->cost.latency < op_cost)
2725             alg_in->cost.latency = op_cost;
2726           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2727             {
2728               struct algorithm *x;
2729               best_cost = alg_in->cost;
2730               x = alg_in, alg_in = best_alg, best_alg = x;
2731               best_alg->log[best_alg->ops] = m;
2732               best_alg->op[best_alg->ops] = alg_sub_factor;
2733             }
2734           break;
2735         }
2736     }
2737   if (cache_hit)
2738     goto done;
2739
2740   /* Try shift-and-add (load effective address) instructions,
2741      i.e. do a*3, a*5, a*9.  */
2742   if ((t & 1) != 0)
2743     {
2744     do_alg_add_t2_m:
2745       q = t - 1;
2746       q = q & -q;
2747       m = exact_log2 (q);
2748       if (m >= 0 && m < maxm)
2749         {
2750           op_cost = shiftadd_cost (speed, mode, m);
2751           new_limit.cost = best_cost.cost - op_cost;
2752           new_limit.latency = best_cost.latency - op_cost;
2753           synth_mult (alg_in, (t - 1) >> m, &new_limit, mode);
2754
2755           alg_in->cost.cost += op_cost;
2756           alg_in->cost.latency += op_cost;
2757           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2758             {
2759               struct algorithm *x;
2760               best_cost = alg_in->cost;
2761               x = alg_in, alg_in = best_alg, best_alg = x;
2762               best_alg->log[best_alg->ops] = m;
2763               best_alg->op[best_alg->ops] = alg_add_t2_m;
2764             }
2765         }
2766       if (cache_hit)
2767         goto done;
2768
2769     do_alg_sub_t2_m:
2770       q = t + 1;
2771       q = q & -q;
2772       m = exact_log2 (q);
2773       if (m >= 0 && m < maxm)
2774         {
2775           op_cost = shiftsub0_cost (speed, mode, m);
2776           new_limit.cost = best_cost.cost - op_cost;
2777           new_limit.latency = best_cost.latency - op_cost;
2778           synth_mult (alg_in, (t + 1) >> m, &new_limit, mode);
2779
2780           alg_in->cost.cost += op_cost;
2781           alg_in->cost.latency += op_cost;
2782           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2783             {
2784               struct algorithm *x;
2785               best_cost = alg_in->cost;
2786               x = alg_in, alg_in = best_alg, best_alg = x;
2787               best_alg->log[best_alg->ops] = m;
2788               best_alg->op[best_alg->ops] = alg_sub_t2_m;
2789             }
2790         }
2791       if (cache_hit)
2792         goto done;
2793     }
2794
2795  done:
2796   /* If best_cost has not decreased, we have not found any algorithm.  */
2797   if (!CHEAPER_MULT_COST (&best_cost, cost_limit))
2798     {
2799       /* We failed to find an algorithm.  Record alg_impossible for
2800          this case (that is, <T, MODE, COST_LIMIT>) so that next time
2801          we are asked to find an algorithm for T within the same or
2802          lower COST_LIMIT, we can immediately return to the
2803          caller.  */
2804       entry_ptr->t = t;
2805       entry_ptr->mode = mode;
2806       entry_ptr->speed = speed;
2807       entry_ptr->alg = alg_impossible;
2808       entry_ptr->cost = *cost_limit;
2809       return;
2810     }
2811
2812   /* Cache the result.  */
2813   if (!cache_hit)
2814     {
2815       entry_ptr->t = t;
2816       entry_ptr->mode = mode;
2817       entry_ptr->speed = speed;
2818       entry_ptr->alg = best_alg->op[best_alg->ops];
2819       entry_ptr->cost.cost = best_cost.cost;
2820       entry_ptr->cost.latency = best_cost.latency;
2821     }
2822
2823   /* If we are getting a too long sequence for `struct algorithm'
2824      to record, make this search fail.  */
2825   if (best_alg->ops == MAX_BITS_PER_WORD)
2826     return;
2827
2828   /* Copy the algorithm from temporary space to the space at alg_out.
2829      We avoid using structure assignment because the majority of
2830      best_alg is normally undefined, and this is a critical function.  */
2831   alg_out->ops = best_alg->ops + 1;
2832   alg_out->cost = best_cost;
2833   memcpy (alg_out->op, best_alg->op,
2834           alg_out->ops * sizeof *alg_out->op);
2835   memcpy (alg_out->log, best_alg->log,
2836           alg_out->ops * sizeof *alg_out->log);
2837 }
2838 \f
2839 /* Find the cheapest way of multiplying a value of mode MODE by VAL.
2840    Try three variations:
2841
2842        - a shift/add sequence based on VAL itself
2843        - a shift/add sequence based on -VAL, followed by a negation
2844        - a shift/add sequence based on VAL - 1, followed by an addition.
2845
2846    Return true if the cheapest of these cost less than MULT_COST,
2847    describing the algorithm in *ALG and final fixup in *VARIANT.  */
2848
2849 static bool
2850 choose_mult_variant (enum machine_mode mode, HOST_WIDE_INT val,
2851                      struct algorithm *alg, enum mult_variant *variant,
2852                      int mult_cost)
2853 {
2854   struct algorithm alg2;
2855   struct mult_cost limit;
2856   int op_cost;
2857   bool speed = optimize_insn_for_speed_p ();
2858
2859   /* Fail quickly for impossible bounds.  */
2860   if (mult_cost < 0)
2861     return false;
2862
2863   /* Ensure that mult_cost provides a reasonable upper bound.
2864      Any constant multiplication can be performed with less
2865      than 2 * bits additions.  */
2866   op_cost = 2 * GET_MODE_UNIT_BITSIZE (mode) * add_cost (speed, mode);
2867   if (mult_cost > op_cost)
2868     mult_cost = op_cost;
2869
2870   *variant = basic_variant;
2871   limit.cost = mult_cost;
2872   limit.latency = mult_cost;
2873   synth_mult (alg, val, &limit, mode);
2874
2875   /* This works only if the inverted value actually fits in an
2876      `unsigned int' */
2877   if (HOST_BITS_PER_INT >= GET_MODE_UNIT_BITSIZE (mode))
2878     {
2879       op_cost = neg_cost(speed, mode);
2880       if (MULT_COST_LESS (&alg->cost, mult_cost))
2881         {
2882           limit.cost = alg->cost.cost - op_cost;
2883           limit.latency = alg->cost.latency - op_cost;
2884         }
2885       else
2886         {
2887           limit.cost = mult_cost - op_cost;
2888           limit.latency = mult_cost - op_cost;
2889         }
2890
2891       synth_mult (&alg2, -val, &limit, mode);
2892       alg2.cost.cost += op_cost;
2893       alg2.cost.latency += op_cost;
2894       if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
2895         *alg = alg2, *variant = negate_variant;
2896     }
2897
2898   /* This proves very useful for division-by-constant.  */
2899   op_cost = add_cost (speed, mode);
2900   if (MULT_COST_LESS (&alg->cost, mult_cost))
2901     {
2902       limit.cost = alg->cost.cost - op_cost;
2903       limit.latency = alg->cost.latency - op_cost;
2904     }
2905   else
2906     {
2907       limit.cost = mult_cost - op_cost;
2908       limit.latency = mult_cost - op_cost;
2909     }
2910
2911   synth_mult (&alg2, val - 1, &limit, mode);
2912   alg2.cost.cost += op_cost;
2913   alg2.cost.latency += op_cost;
2914   if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
2915     *alg = alg2, *variant = add_variant;
2916
2917   return MULT_COST_LESS (&alg->cost, mult_cost);
2918 }
2919
2920 /* A subroutine of expand_mult, used for constant multiplications.
2921    Multiply OP0 by VAL in mode MODE, storing the result in TARGET if
2922    convenient.  Use the shift/add sequence described by ALG and apply
2923    the final fixup specified by VARIANT.  */
2924
2925 static rtx
2926 expand_mult_const (enum machine_mode mode, rtx op0, HOST_WIDE_INT val,
2927                    rtx target, const struct algorithm *alg,
2928                    enum mult_variant variant)
2929 {
2930   HOST_WIDE_INT val_so_far;
2931   rtx insn, accum, tem;
2932   int opno;
2933   enum machine_mode nmode;
2934
2935   /* Avoid referencing memory over and over and invalid sharing
2936      on SUBREGs.  */
2937   op0 = force_reg (mode, op0);
2938
2939   /* ACCUM starts out either as OP0 or as a zero, depending on
2940      the first operation.  */
2941
2942   if (alg->op[0] == alg_zero)
2943     {
2944       accum = copy_to_mode_reg (mode, CONST0_RTX (mode));
2945       val_so_far = 0;
2946     }
2947   else if (alg->op[0] == alg_m)
2948     {
2949       accum = copy_to_mode_reg (mode, op0);
2950       val_so_far = 1;
2951     }
2952   else
2953     gcc_unreachable ();
2954
2955   for (opno = 1; opno < alg->ops; opno++)
2956     {
2957       int log = alg->log[opno];
2958       rtx shift_subtarget = optimize ? 0 : accum;
2959       rtx add_target
2960         = (opno == alg->ops - 1 && target != 0 && variant != add_variant
2961            && !optimize)
2962           ? target : 0;
2963       rtx accum_target = optimize ? 0 : accum;
2964       rtx accum_inner;
2965
2966       switch (alg->op[opno])
2967         {
2968         case alg_shift:
2969           tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
2970           /* REG_EQUAL note will be attached to the following insn.  */
2971           emit_move_insn (accum, tem);
2972           val_so_far <<= log;
2973           break;
2974
2975         case alg_add_t_m2:
2976           tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
2977           accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
2978                                  add_target ? add_target : accum_target);
2979           val_so_far += (HOST_WIDE_INT) 1 << log;
2980           break;
2981
2982         case alg_sub_t_m2:
2983           tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
2984           accum = force_operand (gen_rtx_MINUS (mode, accum, tem),
2985                                  add_target ? add_target : accum_target);
2986           val_so_far -= (HOST_WIDE_INT) 1 << log;
2987           break;
2988
2989         case alg_add_t2_m:
2990           accum = expand_shift (LSHIFT_EXPR, mode, accum,
2991                                 log, shift_subtarget, 0);
2992           accum = force_operand (gen_rtx_PLUS (mode, accum, op0),
2993                                  add_target ? add_target : accum_target);
2994           val_so_far = (val_so_far << log) + 1;
2995           break;
2996
2997         case alg_sub_t2_m:
2998           accum = expand_shift (LSHIFT_EXPR, mode, accum,
2999                                 log, shift_subtarget, 0);
3000           accum = force_operand (gen_rtx_MINUS (mode, accum, op0),
3001                                  add_target ? add_target : accum_target);
3002           val_so_far = (val_so_far << log) - 1;
3003           break;
3004
3005         case alg_add_factor:
3006           tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3007           accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
3008                                  add_target ? add_target : accum_target);
3009           val_so_far += val_so_far << log;
3010           break;
3011
3012         case alg_sub_factor:
3013           tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3014           accum = force_operand (gen_rtx_MINUS (mode, tem, accum),
3015                                  (add_target
3016                                   ? add_target : (optimize ? 0 : tem)));
3017           val_so_far = (val_so_far << log) - val_so_far;
3018           break;
3019
3020         default:
3021           gcc_unreachable ();
3022         }
3023
3024       if (SCALAR_INT_MODE_P (mode))
3025         {
3026           /* Write a REG_EQUAL note on the last insn so that we can cse
3027              multiplication sequences.  Note that if ACCUM is a SUBREG,
3028              we've set the inner register and must properly indicate that.  */
3029           tem = op0, nmode = mode;
3030           accum_inner = accum;
3031           if (GET_CODE (accum) == SUBREG)
3032             {
3033               accum_inner = SUBREG_REG (accum);
3034               nmode = GET_MODE (accum_inner);
3035               tem = gen_lowpart (nmode, op0);
3036             }
3037
3038           insn = get_last_insn ();
3039           set_dst_reg_note (insn, REG_EQUAL,
3040                             gen_rtx_MULT (nmode, tem, GEN_INT (val_so_far)),
3041                             accum_inner);
3042         }
3043     }
3044
3045   if (variant == negate_variant)
3046     {
3047       val_so_far = -val_so_far;
3048       accum = expand_unop (mode, neg_optab, accum, target, 0);
3049     }
3050   else if (variant == add_variant)
3051     {
3052       val_so_far = val_so_far + 1;
3053       accum = force_operand (gen_rtx_PLUS (mode, accum, op0), target);
3054     }
3055
3056   /* Compare only the bits of val and val_so_far that are significant
3057      in the result mode, to avoid sign-/zero-extension confusion.  */
3058   nmode = GET_MODE_INNER (mode);
3059   if (nmode == VOIDmode)
3060     nmode = mode;
3061   val &= GET_MODE_MASK (nmode);
3062   val_so_far &= GET_MODE_MASK (nmode);
3063   gcc_assert (val == val_so_far);
3064
3065   return accum;
3066 }
3067
3068 /* Perform a multiplication and return an rtx for the result.
3069    MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3070    TARGET is a suggestion for where to store the result (an rtx).
3071
3072    We check specially for a constant integer as OP1.
3073    If you want this check for OP0 as well, then before calling
3074    you should swap the two operands if OP0 would be constant.  */
3075
3076 rtx
3077 expand_mult (enum machine_mode mode, rtx op0, rtx op1, rtx target,
3078              int unsignedp)
3079 {
3080   enum mult_variant variant;
3081   struct algorithm algorithm;
3082   rtx scalar_op1;
3083   int max_cost;
3084   bool speed = optimize_insn_for_speed_p ();
3085   bool do_trapv = flag_trapv && SCALAR_INT_MODE_P (mode) && !unsignedp;
3086
3087   if (CONSTANT_P (op0))
3088     {
3089       rtx temp = op0;
3090       op0 = op1;
3091       op1 = temp;
3092     }
3093
3094   /* For vectors, there are several simplifications that can be made if
3095      all elements of the vector constant are identical.  */
3096   scalar_op1 = op1;
3097   if (GET_CODE (op1) == CONST_VECTOR)
3098     {
3099       int i, n = CONST_VECTOR_NUNITS (op1);
3100       scalar_op1 = CONST_VECTOR_ELT (op1, 0);
3101       for (i = 1; i < n; ++i)
3102         if (!rtx_equal_p (scalar_op1, CONST_VECTOR_ELT (op1, i)))
3103           goto skip_scalar;
3104     }
3105
3106   if (INTEGRAL_MODE_P (mode))
3107     {
3108       rtx fake_reg;
3109       HOST_WIDE_INT coeff;
3110       bool is_neg;
3111       int mode_bitsize;
3112
3113       if (op1 == CONST0_RTX (mode))
3114         return op1;
3115       if (op1 == CONST1_RTX (mode))
3116         return op0;
3117       if (op1 == CONSTM1_RTX (mode))
3118         return expand_unop (mode, do_trapv ? negv_optab : neg_optab,
3119                             op0, target, 0);
3120
3121       if (do_trapv)
3122         goto skip_synth;
3123
3124       /* These are the operations that are potentially turned into
3125          a sequence of shifts and additions.  */
3126       mode_bitsize = GET_MODE_UNIT_BITSIZE (mode);
3127
3128       /* synth_mult does an `unsigned int' multiply.  As long as the mode is
3129          less than or equal in size to `unsigned int' this doesn't matter.
3130          If the mode is larger than `unsigned int', then synth_mult works
3131          only if the constant value exactly fits in an `unsigned int' without
3132          any truncation.  This means that multiplying by negative values does
3133          not work; results are off by 2^32 on a 32 bit machine.  */
3134
3135       if (CONST_INT_P (scalar_op1))
3136         {
3137           coeff = INTVAL (scalar_op1);
3138           is_neg = coeff < 0;
3139         }
3140       else if (CONST_DOUBLE_AS_INT_P (scalar_op1))
3141         {
3142           /* If we are multiplying in DImode, it may still be a win
3143              to try to work with shifts and adds.  */
3144           if (CONST_DOUBLE_HIGH (scalar_op1) == 0
3145               && CONST_DOUBLE_LOW (scalar_op1) > 0)
3146             {
3147               coeff = CONST_DOUBLE_LOW (scalar_op1);
3148               is_neg = false;
3149             }
3150           else if (CONST_DOUBLE_LOW (scalar_op1) == 0)
3151             {
3152               coeff = CONST_DOUBLE_HIGH (scalar_op1);
3153               if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3154                 {
3155                   int shift = floor_log2 (coeff) + HOST_BITS_PER_WIDE_INT;
3156                   if (shift < HOST_BITS_PER_DOUBLE_INT - 1
3157                       || mode_bitsize <= HOST_BITS_PER_DOUBLE_INT)
3158                     return expand_shift (LSHIFT_EXPR, mode, op0,
3159                                          shift, target, unsignedp);
3160                 }
3161               goto skip_synth;
3162             }
3163           else
3164             goto skip_synth;
3165         }
3166       else
3167         goto skip_synth;
3168
3169       /* We used to test optimize here, on the grounds that it's better to
3170          produce a smaller program when -O is not used.  But this causes
3171          such a terrible slowdown sometimes that it seems better to always
3172          use synth_mult.  */
3173
3174       /* Special case powers of two.  */
3175       if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3176         return expand_shift (LSHIFT_EXPR, mode, op0,
3177                              floor_log2 (coeff), target, unsignedp);
3178
3179       fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3180
3181       /* Attempt to handle multiplication of DImode values by negative
3182          coefficients, by performing the multiplication by a positive
3183          multiplier and then inverting the result.  */
3184       if (is_neg && mode_bitsize > HOST_BITS_PER_WIDE_INT)
3185         {
3186           /* Its safe to use -coeff even for INT_MIN, as the
3187              result is interpreted as an unsigned coefficient.
3188              Exclude cost of op0 from max_cost to match the cost
3189              calculation of the synth_mult.  */
3190           max_cost = (set_src_cost (gen_rtx_MULT (mode, fake_reg, op1), speed)
3191                       - neg_cost(speed, mode));
3192           if (max_cost > 0
3193               && choose_mult_variant (mode, -coeff, &algorithm,
3194                                       &variant, max_cost))
3195             {
3196               rtx temp = expand_mult_const (mode, op0, -coeff, NULL_RTX,
3197                                             &algorithm, variant);
3198               return expand_unop (mode, neg_optab, temp, target, 0);
3199             }
3200           goto skip_synth;
3201         }
3202
3203       /* Exclude cost of op0 from max_cost to match the cost
3204          calculation of the synth_mult.  */
3205       max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, op1), speed);
3206       if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost))
3207         return expand_mult_const (mode, op0, coeff, target,
3208                                   &algorithm, variant);
3209     }
3210  skip_synth:
3211
3212   /* Expand x*2.0 as x+x.  */
3213   if (CONST_DOUBLE_AS_FLOAT_P (scalar_op1))
3214     {
3215       REAL_VALUE_TYPE d;
3216       REAL_VALUE_FROM_CONST_DOUBLE (d, scalar_op1);
3217
3218       if (REAL_VALUES_EQUAL (d, dconst2))
3219         {
3220           op0 = force_reg (GET_MODE (op0), op0);
3221           return expand_binop (mode, add_optab, op0, op0,
3222                                target, unsignedp, OPTAB_LIB_WIDEN);
3223         }
3224     }
3225  skip_scalar:
3226
3227   /* This used to use umul_optab if unsigned, but for non-widening multiply
3228      there is no difference between signed and unsigned.  */
3229   op0 = expand_binop (mode, do_trapv ? smulv_optab : smul_optab,
3230                       op0, op1, target, unsignedp, OPTAB_LIB_WIDEN);
3231   gcc_assert (op0);
3232   return op0;
3233 }
3234
3235 /* Return a cost estimate for multiplying a register by the given
3236    COEFFicient in the given MODE and SPEED.  */
3237
3238 int
3239 mult_by_coeff_cost (HOST_WIDE_INT coeff, enum machine_mode mode, bool speed)
3240 {
3241   int max_cost;
3242   struct algorithm algorithm;
3243   enum mult_variant variant;
3244
3245   rtx fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3246   max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, fake_reg), speed);
3247   if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost))
3248     return algorithm.cost.cost;
3249   else
3250     return max_cost;
3251 }
3252
3253 /* Perform a widening multiplication and return an rtx for the result.
3254    MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3255    TARGET is a suggestion for where to store the result (an rtx).
3256    THIS_OPTAB is the optab we should use, it must be either umul_widen_optab
3257    or smul_widen_optab.
3258
3259    We check specially for a constant integer as OP1, comparing the
3260    cost of a widening multiply against the cost of a sequence of shifts
3261    and adds.  */
3262
3263 rtx
3264 expand_widening_mult (enum machine_mode mode, rtx op0, rtx op1, rtx target,
3265                       int unsignedp, optab this_optab)
3266 {
3267   bool speed = optimize_insn_for_speed_p ();
3268   rtx cop1;
3269
3270   if (CONST_INT_P (op1)
3271       && GET_MODE (op0) != VOIDmode
3272       && (cop1 = convert_modes (mode, GET_MODE (op0), op1,
3273                                 this_optab == umul_widen_optab))
3274       && CONST_INT_P (cop1)
3275       && (INTVAL (cop1) >= 0
3276           || HWI_COMPUTABLE_MODE_P (mode)))
3277     {
3278       HOST_WIDE_INT coeff = INTVAL (cop1);
3279       int max_cost;
3280       enum mult_variant variant;
3281       struct algorithm algorithm;
3282
3283       /* Special case powers of two.  */
3284       if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3285         {
3286           op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3287           return expand_shift (LSHIFT_EXPR, mode, op0,
3288                                floor_log2 (coeff), target, unsignedp);
3289         }
3290
3291       /* Exclude cost of op0 from max_cost to match the cost
3292          calculation of the synth_mult.  */
3293       max_cost = mul_widen_cost (speed, mode);
3294       if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3295                                max_cost))
3296         {
3297           op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3298           return expand_mult_const (mode, op0, coeff, target,
3299                                     &algorithm, variant);
3300         }
3301     }
3302   return expand_binop (mode, this_optab, op0, op1, target,
3303                        unsignedp, OPTAB_LIB_WIDEN);
3304 }
3305 \f
3306 /* Choose a minimal N + 1 bit approximation to 1/D that can be used to
3307    replace division by D, and put the least significant N bits of the result
3308    in *MULTIPLIER_PTR and return the most significant bit.
3309
3310    The width of operations is N (should be <= HOST_BITS_PER_WIDE_INT), the
3311    needed precision is in PRECISION (should be <= N).
3312
3313    PRECISION should be as small as possible so this function can choose
3314    multiplier more freely.
3315
3316    The rounded-up logarithm of D is placed in *lgup_ptr.  A shift count that
3317    is to be used for a final right shift is placed in *POST_SHIFT_PTR.
3318
3319    Using this function, x/D will be equal to (x * m) >> (*POST_SHIFT_PTR),
3320    where m is the full HOST_BITS_PER_WIDE_INT + 1 bit multiplier.  */
3321
3322 unsigned HOST_WIDE_INT
3323 choose_multiplier (unsigned HOST_WIDE_INT d, int n, int precision,
3324                    unsigned HOST_WIDE_INT *multiplier_ptr,
3325                    int *post_shift_ptr, int *lgup_ptr)
3326 {
3327   double_int mhigh, mlow;
3328   int lgup, post_shift;
3329   int pow, pow2;
3330
3331   /* lgup = ceil(log2(divisor)); */
3332   lgup = ceil_log2 (d);
3333
3334   gcc_assert (lgup <= n);
3335
3336   pow = n + lgup;
3337   pow2 = n + lgup - precision;
3338
3339   /* We could handle this with some effort, but this case is much
3340      better handled directly with a scc insn, so rely on caller using
3341      that.  */
3342   gcc_assert (pow != HOST_BITS_PER_DOUBLE_INT);
3343
3344   /* mlow = 2^(N + lgup)/d */
3345   double_int val = double_int_zero.set_bit (pow);
3346   mlow = val.div (double_int::from_uhwi (d), true, TRUNC_DIV_EXPR);
3347
3348   /* mhigh = (2^(N + lgup) + 2^(N + lgup - precision))/d */
3349   val |= double_int_zero.set_bit (pow2);
3350   mhigh = val.div (double_int::from_uhwi (d), true, TRUNC_DIV_EXPR);
3351
3352   gcc_assert (!mhigh.high || val.high - d < d);
3353   gcc_assert (mhigh.high <= 1 && mlow.high <= 1);
3354   /* Assert that mlow < mhigh.  */
3355   gcc_assert (mlow.ult (mhigh));
3356
3357   /* If precision == N, then mlow, mhigh exceed 2^N
3358      (but they do not exceed 2^(N+1)).  */
3359
3360   /* Reduce to lowest terms.  */
3361   for (post_shift = lgup; post_shift > 0; post_shift--)
3362     {
3363       int shft = HOST_BITS_PER_WIDE_INT - 1;
3364       unsigned HOST_WIDE_INT ml_lo = (mlow.high << shft) | (mlow.low >> 1);
3365       unsigned HOST_WIDE_INT mh_lo = (mhigh.high << shft) | (mhigh.low >> 1);
3366       if (ml_lo >= mh_lo)
3367         break;
3368
3369       mlow = double_int::from_uhwi (ml_lo);
3370       mhigh = double_int::from_uhwi (mh_lo);
3371     }
3372
3373   *post_shift_ptr = post_shift;
3374   *lgup_ptr = lgup;
3375   if (n < HOST_BITS_PER_WIDE_INT)
3376     {
3377       unsigned HOST_WIDE_INT mask = ((unsigned HOST_WIDE_INT) 1 << n) - 1;
3378       *multiplier_ptr = mhigh.low & mask;
3379       return mhigh.low >= mask;
3380     }
3381   else
3382     {
3383       *multiplier_ptr = mhigh.low;
3384       return mhigh.high;
3385     }
3386 }
3387
3388 /* Compute the inverse of X mod 2**n, i.e., find Y such that X * Y is
3389    congruent to 1 (mod 2**N).  */
3390
3391 static unsigned HOST_WIDE_INT
3392 invert_mod2n (unsigned HOST_WIDE_INT x, int n)
3393 {
3394   /* Solve x*y == 1 (mod 2^n), where x is odd.  Return y.  */
3395
3396   /* The algorithm notes that the choice y = x satisfies
3397      x*y == 1 mod 2^3, since x is assumed odd.
3398      Each iteration doubles the number of bits of significance in y.  */
3399
3400   unsigned HOST_WIDE_INT mask;
3401   unsigned HOST_WIDE_INT y = x;
3402   int nbit = 3;
3403
3404   mask = (n == HOST_BITS_PER_WIDE_INT
3405           ? ~(unsigned HOST_WIDE_INT) 0
3406           : ((unsigned HOST_WIDE_INT) 1 << n) - 1);
3407
3408   while (nbit < n)
3409     {
3410       y = y * (2 - x*y) & mask;         /* Modulo 2^N */
3411       nbit *= 2;
3412     }
3413   return y;
3414 }
3415
3416 /* Emit code to adjust ADJ_OPERAND after multiplication of wrong signedness
3417    flavor of OP0 and OP1.  ADJ_OPERAND is already the high half of the
3418    product OP0 x OP1.  If UNSIGNEDP is nonzero, adjust the signed product
3419    to become unsigned, if UNSIGNEDP is zero, adjust the unsigned product to
3420    become signed.
3421
3422    The result is put in TARGET if that is convenient.
3423
3424    MODE is the mode of operation.  */
3425
3426 rtx
3427 expand_mult_highpart_adjust (enum machine_mode mode, rtx adj_operand, rtx op0,
3428                              rtx op1, rtx target, int unsignedp)
3429 {
3430   rtx tem;
3431   enum rtx_code adj_code = unsignedp ? PLUS : MINUS;
3432
3433   tem = expand_shift (RSHIFT_EXPR, mode, op0,
3434                       GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3435   tem = expand_and (mode, tem, op1, NULL_RTX);
3436   adj_operand
3437     = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3438                      adj_operand);
3439
3440   tem = expand_shift (RSHIFT_EXPR, mode, op1,
3441                       GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3442   tem = expand_and (mode, tem, op0, NULL_RTX);
3443   target = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3444                           target);
3445
3446   return target;
3447 }
3448
3449 /* Subroutine of expmed_mult_highpart.  Return the MODE high part of OP.  */
3450
3451 static rtx
3452 extract_high_half (enum machine_mode mode, rtx op)
3453 {
3454   enum machine_mode wider_mode;
3455
3456   if (mode == word_mode)
3457     return gen_highpart (mode, op);
3458
3459   gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3460
3461   wider_mode = GET_MODE_WIDER_MODE (mode);
3462   op = expand_shift (RSHIFT_EXPR, wider_mode, op,
3463                      GET_MODE_BITSIZE (mode), 0, 1);
3464   return convert_modes (mode, wider_mode, op, 0);
3465 }
3466
3467 /* Like expmed_mult_highpart, but only consider using a multiplication
3468    optab.  OP1 is an rtx for the constant operand.  */
3469
3470 static rtx
3471 expmed_mult_highpart_optab (enum machine_mode mode, rtx op0, rtx op1,
3472                             rtx target, int unsignedp, int max_cost)
3473 {
3474   rtx narrow_op1 = gen_int_mode (INTVAL (op1), mode);
3475   enum machine_mode wider_mode;
3476   optab moptab;
3477   rtx tem;
3478   int size;
3479   bool speed = optimize_insn_for_speed_p ();
3480
3481   gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3482
3483   wider_mode = GET_MODE_WIDER_MODE (mode);
3484   size = GET_MODE_BITSIZE (mode);
3485
3486   /* Firstly, try using a multiplication insn that only generates the needed
3487      high part of the product, and in the sign flavor of unsignedp.  */
3488   if (mul_highpart_cost (speed, mode) < max_cost)
3489     {
3490       moptab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
3491       tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3492                           unsignedp, OPTAB_DIRECT);
3493       if (tem)
3494         return tem;
3495     }
3496
3497   /* Secondly, same as above, but use sign flavor opposite of unsignedp.
3498      Need to adjust the result after the multiplication.  */
3499   if (size - 1 < BITS_PER_WORD
3500       && (mul_highpart_cost (speed, mode)
3501           + 2 * shift_cost (speed, mode, size-1)
3502           + 4 * add_cost (speed, mode) < max_cost))
3503     {
3504       moptab = unsignedp ? smul_highpart_optab : umul_highpart_optab;
3505       tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3506                           unsignedp, OPTAB_DIRECT);
3507       if (tem)
3508         /* We used the wrong signedness.  Adjust the result.  */
3509         return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3510                                             tem, unsignedp);
3511     }
3512
3513   /* Try widening multiplication.  */
3514   moptab = unsignedp ? umul_widen_optab : smul_widen_optab;
3515   if (widening_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3516       && mul_widen_cost (speed, wider_mode) < max_cost)
3517     {
3518       tem = expand_binop (wider_mode, moptab, op0, narrow_op1, 0,
3519                           unsignedp, OPTAB_WIDEN);
3520       if (tem)
3521         return extract_high_half (mode, tem);
3522     }
3523
3524   /* Try widening the mode and perform a non-widening multiplication.  */
3525   if (optab_handler (smul_optab, wider_mode) != CODE_FOR_nothing
3526       && size - 1 < BITS_PER_WORD
3527       && (mul_cost (speed, wider_mode) + shift_cost (speed, mode, size-1)
3528           < max_cost))
3529     {
3530       rtx insns, wop0, wop1;
3531
3532       /* We need to widen the operands, for example to ensure the
3533          constant multiplier is correctly sign or zero extended.
3534          Use a sequence to clean-up any instructions emitted by
3535          the conversions if things don't work out.  */
3536       start_sequence ();
3537       wop0 = convert_modes (wider_mode, mode, op0, unsignedp);
3538       wop1 = convert_modes (wider_mode, mode, op1, unsignedp);
3539       tem = expand_binop (wider_mode, smul_optab, wop0, wop1, 0,
3540                           unsignedp, OPTAB_WIDEN);
3541       insns = get_insns ();
3542       end_sequence ();
3543
3544       if (tem)
3545         {
3546           emit_insn (insns);
3547           return extract_high_half (mode, tem);
3548         }
3549     }
3550
3551   /* Try widening multiplication of opposite signedness, and adjust.  */
3552   moptab = unsignedp ? smul_widen_optab : umul_widen_optab;
3553   if (widening_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3554       && size - 1 < BITS_PER_WORD
3555       && (mul_widen_cost (speed, wider_mode)
3556           + 2 * shift_cost (speed, mode, size-1)
3557           + 4 * add_cost (speed, mode) < max_cost))
3558     {
3559       tem = expand_binop (wider_mode, moptab, op0, narrow_op1,
3560                           NULL_RTX, ! unsignedp, OPTAB_WIDEN);
3561       if (tem != 0)
3562         {
3563           tem = extract_high_half (mode, tem);
3564           /* We used the wrong signedness.  Adjust the result.  */
3565           return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3566                                               target, unsignedp);
3567         }
3568     }
3569
3570   return 0;
3571 }
3572
3573 /* Emit code to multiply OP0 and OP1 (where OP1 is an integer constant),
3574    putting the high half of the result in TARGET if that is convenient,
3575    and return where the result is.  If the operation can not be performed,
3576    0 is returned.
3577
3578    MODE is the mode of operation and result.
3579
3580    UNSIGNEDP nonzero means unsigned multiply.
3581
3582    MAX_COST is the total allowed cost for the expanded RTL.  */
3583
3584 static rtx
3585 expmed_mult_highpart (enum machine_mode mode, rtx op0, rtx op1,
3586                       rtx target, int unsignedp, int max_cost)
3587 {
3588   enum machine_mode wider_mode = GET_MODE_WIDER_MODE (mode);
3589   unsigned HOST_WIDE_INT cnst1;
3590   int extra_cost;
3591   bool sign_adjust = false;
3592   enum mult_variant variant;
3593   struct algorithm alg;
3594   rtx tem;
3595   bool speed = optimize_insn_for_speed_p ();
3596
3597   gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3598   /* We can't support modes wider than HOST_BITS_PER_INT.  */
3599   gcc_assert (HWI_COMPUTABLE_MODE_P (mode));
3600
3601   cnst1 = INTVAL (op1) & GET_MODE_MASK (mode);
3602
3603   /* We can't optimize modes wider than BITS_PER_WORD.
3604      ??? We might be able to perform double-word arithmetic if
3605      mode == word_mode, however all the cost calculations in
3606      synth_mult etc. assume single-word operations.  */
3607   if (GET_MODE_BITSIZE (wider_mode) > BITS_PER_WORD)
3608     return expmed_mult_highpart_optab (mode, op0, op1, target,
3609                                        unsignedp, max_cost);
3610
3611   extra_cost = shift_cost (speed, mode, GET_MODE_BITSIZE (mode) - 1);
3612
3613   /* Check whether we try to multiply by a negative constant.  */
3614   if (!unsignedp && ((cnst1 >> (GET_MODE_BITSIZE (mode) - 1)) & 1))
3615     {
3616       sign_adjust = true;
3617       extra_cost += add_cost (speed, mode);
3618     }
3619
3620   /* See whether shift/add multiplication is cheap enough.  */
3621   if (choose_mult_variant (wider_mode, cnst1, &alg, &variant,
3622                            max_cost - extra_cost))
3623     {
3624       /* See whether the specialized multiplication optabs are
3625          cheaper than the shift/add version.  */
3626       tem = expmed_mult_highpart_optab (mode, op0, op1, target, unsignedp,
3627                                         alg.cost.cost + extra_cost);
3628       if (tem)
3629         return tem;
3630
3631       tem = convert_to_mode (wider_mode, op0, unsignedp);
3632       tem = expand_mult_const (wider_mode, tem, cnst1, 0, &alg, variant);
3633       tem = extract_high_half (mode, tem);
3634
3635       /* Adjust result for signedness.  */
3636       if (sign_adjust)
3637         tem = force_operand (gen_rtx_MINUS (mode, tem, op0), tem);
3638
3639       return tem;
3640     }
3641   return expmed_mult_highpart_optab (mode, op0, op1, target,
3642                                      unsignedp, max_cost);
3643 }
3644
3645
3646 /* Expand signed modulus of OP0 by a power of two D in mode MODE.  */
3647
3648 static rtx
3649 expand_smod_pow2 (enum machine_mode mode, rtx op0, HOST_WIDE_INT d)
3650 {
3651   unsigned HOST_WIDE_INT masklow, maskhigh;
3652   rtx result, temp, shift, label;
3653   int logd;
3654
3655   logd = floor_log2 (d);
3656   result = gen_reg_rtx (mode);
3657
3658   /* Avoid conditional branches when they're expensive.  */
3659   if (BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2
3660       && optimize_insn_for_speed_p ())
3661     {
3662       rtx signmask = emit_store_flag (result, LT, op0, const0_rtx,
3663                                       mode, 0, -1);
3664       if (signmask)
3665         {
3666           signmask = force_reg (mode, signmask);
3667           masklow = ((HOST_WIDE_INT) 1 << logd) - 1;
3668           shift = GEN_INT (GET_MODE_BITSIZE (mode) - logd);
3669
3670           /* Use the rtx_cost of a LSHIFTRT instruction to determine
3671              which instruction sequence to use.  If logical right shifts
3672              are expensive the use 2 XORs, 2 SUBs and an AND, otherwise
3673              use a LSHIFTRT, 1 ADD, 1 SUB and an AND.  */
3674
3675           temp = gen_rtx_LSHIFTRT (mode, result, shift);
3676           if (optab_handler (lshr_optab, mode) == CODE_FOR_nothing
3677               || (set_src_cost (temp, optimize_insn_for_speed_p ())
3678                   > COSTS_N_INSNS (2)))
3679             {
3680               temp = expand_binop (mode, xor_optab, op0, signmask,
3681                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3682               temp = expand_binop (mode, sub_optab, temp, signmask,
3683                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3684               temp = expand_binop (mode, and_optab, temp, GEN_INT (masklow),
3685                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3686               temp = expand_binop (mode, xor_optab, temp, signmask,
3687                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3688               temp = expand_binop (mode, sub_optab, temp, signmask,
3689                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3690             }
3691           else
3692             {
3693               signmask = expand_binop (mode, lshr_optab, signmask, shift,
3694                                        NULL_RTX, 1, OPTAB_LIB_WIDEN);
3695               signmask = force_reg (mode, signmask);
3696
3697               temp = expand_binop (mode, add_optab, op0, signmask,
3698                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3699               temp = expand_binop (mode, and_optab, temp, GEN_INT (masklow),
3700                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3701               temp = expand_binop (mode, sub_optab, temp, signmask,
3702                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3703             }
3704           return temp;
3705         }
3706     }
3707
3708   /* Mask contains the mode's signbit and the significant bits of the
3709      modulus.  By including the signbit in the operation, many targets
3710      can avoid an explicit compare operation in the following comparison
3711      against zero.  */
3712
3713   masklow = ((HOST_WIDE_INT) 1 << logd) - 1;
3714   if (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
3715     {
3716       masklow |= (HOST_WIDE_INT) -1 << (GET_MODE_BITSIZE (mode) - 1);
3717       maskhigh = -1;
3718     }
3719   else
3720     maskhigh = (HOST_WIDE_INT) -1
3721                  << (GET_MODE_BITSIZE (mode) - HOST_BITS_PER_WIDE_INT - 1);
3722
3723   temp = expand_binop (mode, and_optab, op0,
3724                        immed_double_const (masklow, maskhigh, mode),
3725                        result, 1, OPTAB_LIB_WIDEN);
3726   if (temp != result)
3727     emit_move_insn (result, temp);
3728
3729   label = gen_label_rtx ();
3730   do_cmp_and_jump (result, const0_rtx, GE, mode, label);
3731
3732   temp = expand_binop (mode, sub_optab, result, const1_rtx, result,
3733                        0, OPTAB_LIB_WIDEN);
3734   masklow = (HOST_WIDE_INT) -1 << logd;
3735   maskhigh = -1;
3736   temp = expand_binop (mode, ior_optab, temp,
3737                        immed_double_const (masklow, maskhigh, mode),
3738                        result, 1, OPTAB_LIB_WIDEN);
3739   temp = expand_binop (mode, add_optab, temp, const1_rtx, result,
3740                        0, OPTAB_LIB_WIDEN);
3741   if (temp != result)
3742     emit_move_insn (result, temp);
3743   emit_label (label);
3744   return result;
3745 }
3746
3747 /* Expand signed division of OP0 by a power of two D in mode MODE.
3748    This routine is only called for positive values of D.  */
3749
3750 static rtx
3751 expand_sdiv_pow2 (enum machine_mode mode, rtx op0, HOST_WIDE_INT d)
3752 {
3753   rtx temp, label;
3754   int logd;
3755
3756   logd = floor_log2 (d);
3757
3758   if (d == 2
3759       && BRANCH_COST (optimize_insn_for_speed_p (),
3760                       false) >= 1)
3761     {
3762       temp = gen_reg_rtx (mode);
3763       temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, 1);
3764       temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3765                            0, OPTAB_LIB_WIDEN);
3766       return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3767     }
3768
3769 #ifdef HAVE_conditional_move
3770   if (BRANCH_COST (optimize_insn_for_speed_p (), false)
3771       >= 2)
3772     {
3773       rtx temp2;
3774
3775       /* ??? emit_conditional_move forces a stack adjustment via
3776          compare_from_rtx so, if the sequence is discarded, it will
3777          be lost.  Do it now instead.  */
3778       do_pending_stack_adjust ();
3779
3780       start_sequence ();
3781       temp2 = copy_to_mode_reg (mode, op0);
3782       temp = expand_binop (mode, add_optab, temp2, GEN_INT (d-1),
3783                            NULL_RTX, 0, OPTAB_LIB_WIDEN);
3784       temp = force_reg (mode, temp);
3785
3786       /* Construct "temp2 = (temp2 < 0) ? temp : temp2".  */
3787       temp2 = emit_conditional_move (temp2, LT, temp2, const0_rtx,
3788                                      mode, temp, temp2, mode, 0);
3789       if (temp2)
3790         {
3791           rtx seq = get_insns ();
3792           end_sequence ();
3793           emit_insn (seq);
3794           return expand_shift (RSHIFT_EXPR, mode, temp2, logd, NULL_RTX, 0);
3795         }
3796       end_sequence ();
3797     }
3798 #endif
3799
3800   if (BRANCH_COST (optimize_insn_for_speed_p (),
3801                    false) >= 2)
3802     {
3803       int ushift = GET_MODE_BITSIZE (mode) - logd;
3804
3805       temp = gen_reg_rtx (mode);
3806       temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, -1);
3807       if (shift_cost (optimize_insn_for_speed_p (), mode, ushift)
3808           > COSTS_N_INSNS (1))
3809         temp = expand_binop (mode, and_optab, temp, GEN_INT (d - 1),
3810                              NULL_RTX, 0, OPTAB_LIB_WIDEN);
3811       else
3812         temp = expand_shift (RSHIFT_EXPR, mode, temp,
3813                              ushift, NULL_RTX, 1);
3814       temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3815                            0, OPTAB_LIB_WIDEN);
3816       return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3817     }
3818
3819   label = gen_label_rtx ();
3820   temp = copy_to_mode_reg (mode, op0);
3821   do_cmp_and_jump (temp, const0_rtx, GE, mode, label);
3822   expand_inc (temp, GEN_INT (d - 1));
3823   emit_label (label);
3824   return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3825 }
3826 \f
3827 /* Emit the code to divide OP0 by OP1, putting the result in TARGET
3828    if that is convenient, and returning where the result is.
3829    You may request either the quotient or the remainder as the result;
3830    specify REM_FLAG nonzero to get the remainder.
3831
3832    CODE is the expression code for which kind of division this is;
3833    it controls how rounding is done.  MODE is the machine mode to use.
3834    UNSIGNEDP nonzero means do unsigned division.  */
3835
3836 /* ??? For CEIL_MOD_EXPR, can compute incorrect remainder with ANDI
3837    and then correct it by or'ing in missing high bits
3838    if result of ANDI is nonzero.
3839    For ROUND_MOD_EXPR, can use ANDI and then sign-extend the result.
3840    This could optimize to a bfexts instruction.
3841    But C doesn't use these operations, so their optimizations are
3842    left for later.  */
3843 /* ??? For modulo, we don't actually need the highpart of the first product,
3844    the low part will do nicely.  And for small divisors, the second multiply
3845    can also be a low-part only multiply or even be completely left out.
3846    E.g. to calculate the remainder of a division by 3 with a 32 bit
3847    multiply, multiply with 0x55555556 and extract the upper two bits;
3848    the result is exact for inputs up to 0x1fffffff.
3849    The input range can be reduced by using cross-sum rules.
3850    For odd divisors >= 3, the following table gives right shift counts
3851    so that if a number is shifted by an integer multiple of the given
3852    amount, the remainder stays the same:
3853    2, 4, 3, 6, 10, 12, 4, 8, 18, 6, 11, 20, 18, 0, 5, 10, 12, 0, 12, 20,
3854    14, 12, 23, 21, 8, 0, 20, 18, 0, 0, 6, 12, 0, 22, 0, 18, 20, 30, 0, 0,
3855    0, 8, 0, 11, 12, 10, 36, 0, 30, 0, 0, 12, 0, 0, 0, 0, 44, 12, 24, 0,
3856    20, 0, 7, 14, 0, 18, 36, 0, 0, 46, 60, 0, 42, 0, 15, 24, 20, 0, 0, 33,
3857    0, 20, 0, 0, 18, 0, 60, 0, 0, 0, 0, 0, 40, 18, 0, 0, 12
3858
3859    Cross-sum rules for even numbers can be derived by leaving as many bits
3860    to the right alone as the divisor has zeros to the right.
3861    E.g. if x is an unsigned 32 bit number:
3862    (x mod 12) == (((x & 1023) + ((x >> 8) & ~3)) * 0x15555558 >> 2 * 3) >> 28
3863    */
3864
3865 rtx
3866 expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode,
3867                rtx op0, rtx op1, rtx target, int unsignedp)
3868 {
3869   enum machine_mode compute_mode;
3870   rtx tquotient;
3871   rtx quotient = 0, remainder = 0;
3872   rtx last;
3873   int size;
3874   rtx insn;
3875   optab optab1, optab2;
3876   int op1_is_constant, op1_is_pow2 = 0;
3877   int max_cost, extra_cost;
3878   static HOST_WIDE_INT last_div_const = 0;
3879   static HOST_WIDE_INT ext_op1;
3880   bool speed = optimize_insn_for_speed_p ();
3881
3882   op1_is_constant = CONST_INT_P (op1);
3883   if (op1_is_constant)
3884     {
3885       ext_op1 = INTVAL (op1);
3886       if (unsignedp)
3887         ext_op1 &= GET_MODE_MASK (mode);
3888       op1_is_pow2 = ((EXACT_POWER_OF_2_OR_ZERO_P (ext_op1)
3889                      || (! unsignedp && EXACT_POWER_OF_2_OR_ZERO_P (-ext_op1))));
3890     }
3891
3892   /*
3893      This is the structure of expand_divmod:
3894
3895      First comes code to fix up the operands so we can perform the operations
3896      correctly and efficiently.
3897
3898      Second comes a switch statement with code specific for each rounding mode.
3899      For some special operands this code emits all RTL for the desired
3900      operation, for other cases, it generates only a quotient and stores it in
3901      QUOTIENT.  The case for trunc division/remainder might leave quotient = 0,
3902      to indicate that it has not done anything.
3903
3904      Last comes code that finishes the operation.  If QUOTIENT is set and
3905      REM_FLAG is set, the remainder is computed as OP0 - QUOTIENT * OP1.  If
3906      QUOTIENT is not set, it is computed using trunc rounding.
3907
3908      We try to generate special code for division and remainder when OP1 is a
3909      constant.  If |OP1| = 2**n we can use shifts and some other fast
3910      operations.  For other values of OP1, we compute a carefully selected
3911      fixed-point approximation m = 1/OP1, and generate code that multiplies OP0
3912      by m.
3913
3914      In all cases but EXACT_DIV_EXPR, this multiplication requires the upper
3915      half of the product.  Different strategies for generating the product are
3916      implemented in expmed_mult_highpart.
3917
3918      If what we actually want is the remainder, we generate that by another
3919      by-constant multiplication and a subtraction.  */
3920
3921   /* We shouldn't be called with OP1 == const1_rtx, but some of the
3922      code below will malfunction if we are, so check here and handle
3923      the special case if so.  */
3924   if (op1 == const1_rtx)
3925     return rem_flag ? const0_rtx : op0;
3926
3927     /* When dividing by -1, we could get an overflow.
3928      negv_optab can handle overflows.  */
3929   if (! unsignedp && op1 == constm1_rtx)
3930     {
3931       if (rem_flag)
3932         return const0_rtx;
3933       return expand_unop (mode, flag_trapv && GET_MODE_CLASS(mode) == MODE_INT
3934                           ? negv_optab : neg_optab, op0, target, 0);
3935     }
3936
3937   if (target
3938       /* Don't use the function value register as a target
3939          since we have to read it as well as write it,
3940          and function-inlining gets confused by this.  */
3941       && ((REG_P (target) && REG_FUNCTION_VALUE_P (target))
3942           /* Don't clobber an operand while doing a multi-step calculation.  */
3943           || ((rem_flag || op1_is_constant)
3944               && (reg_mentioned_p (target, op0)
3945                   || (MEM_P (op0) && MEM_P (target))))
3946           || reg_mentioned_p (target, op1)
3947           || (MEM_P (op1) && MEM_P (target))))
3948     target = 0;
3949
3950   /* Get the mode in which to perform this computation.  Normally it will
3951      be MODE, but sometimes we can't do the desired operation in MODE.
3952      If so, pick a wider mode in which we can do the operation.  Convert
3953      to that mode at the start to avoid repeated conversions.
3954
3955      First see what operations we need.  These depend on the expression
3956      we are evaluating.  (We assume that divxx3 insns exist under the
3957      same conditions that modxx3 insns and that these insns don't normally
3958      fail.  If these assumptions are not correct, we may generate less
3959      efficient code in some cases.)
3960
3961      Then see if we find a mode in which we can open-code that operation
3962      (either a division, modulus, or shift).  Finally, check for the smallest
3963      mode for which we can do the operation with a library call.  */
3964
3965   /* We might want to refine this now that we have division-by-constant
3966      optimization.  Since expmed_mult_highpart tries so many variants, it is
3967      not straightforward to generalize this.  Maybe we should make an array
3968      of possible modes in init_expmed?  Save this for GCC 2.7.  */
3969
3970   optab1 = ((op1_is_pow2 && op1 != const0_rtx)
3971             ? (unsignedp ? lshr_optab : ashr_optab)
3972             : (unsignedp ? udiv_optab : sdiv_optab));
3973   optab2 = ((op1_is_pow2 && op1 != const0_rtx)
3974             ? optab1
3975             : (unsignedp ? udivmod_optab : sdivmod_optab));
3976
3977   for (compute_mode = mode; compute_mode != VOIDmode;
3978        compute_mode = GET_MODE_WIDER_MODE (compute_mode))
3979     if (optab_handler (optab1, compute_mode) != CODE_FOR_nothing
3980         || optab_handler (optab2, compute_mode) != CODE_FOR_nothing)
3981       break;
3982
3983   if (compute_mode == VOIDmode)
3984     for (compute_mode = mode; compute_mode != VOIDmode;
3985          compute_mode = GET_MODE_WIDER_MODE (compute_mode))
3986       if (optab_libfunc (optab1, compute_mode)
3987           || optab_libfunc (optab2, compute_mode))
3988         break;
3989
3990   /* If we still couldn't find a mode, use MODE, but expand_binop will
3991      probably die.  */
3992   if (compute_mode == VOIDmode)
3993     compute_mode = mode;
3994
3995   if (target && GET_MODE (target) == compute_mode)
3996     tquotient = target;
3997   else
3998     tquotient = gen_reg_rtx (compute_mode);
3999
4000   size = GET_MODE_BITSIZE (compute_mode);
4001 #if 0
4002   /* It should be possible to restrict the precision to GET_MODE_BITSIZE
4003      (mode), and thereby get better code when OP1 is a constant.  Do that
4004      later.  It will require going over all usages of SIZE below.  */
4005   size = GET_MODE_BITSIZE (mode);
4006 #endif
4007
4008   /* Only deduct something for a REM if the last divide done was
4009      for a different constant.   Then set the constant of the last
4010      divide.  */
4011   max_cost = (unsignedp
4012               ? udiv_cost (speed, compute_mode)
4013               : sdiv_cost (speed, compute_mode));
4014   if (rem_flag && ! (last_div_const != 0 && op1_is_constant
4015                      && INTVAL (op1) == last_div_const))
4016     max_cost -= (mul_cost (speed, compute_mode)
4017                  + add_cost (speed, compute_mode));
4018
4019   last_div_const = ! rem_flag && op1_is_constant ? INTVAL (op1) : 0;
4020
4021   /* Now convert to the best mode to use.  */
4022   if (compute_mode != mode)
4023     {
4024       op0 = convert_modes (compute_mode, mode, op0, unsignedp);
4025       op1 = convert_modes (compute_mode, mode, op1, unsignedp);
4026
4027       /* convert_modes may have placed op1 into a register, so we
4028          must recompute the following.  */
4029       op1_is_constant = CONST_INT_P (op1);
4030       op1_is_pow2 = (op1_is_constant
4031                      && ((EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4032                           || (! unsignedp
4033                               && EXACT_POWER_OF_2_OR_ZERO_P (-INTVAL (op1)))))) ;
4034     }
4035
4036   /* If one of the operands is a volatile MEM, copy it into a register.  */
4037
4038   if (MEM_P (op0) && MEM_VOLATILE_P (op0))
4039     op0 = force_reg (compute_mode, op0);
4040   if (MEM_P (op1) && MEM_VOLATILE_P (op1))
4041     op1 = force_reg (compute_mode, op1);
4042
4043   /* If we need the remainder or if OP1 is constant, we need to
4044      put OP0 in a register in case it has any queued subexpressions.  */
4045   if (rem_flag || op1_is_constant)
4046     op0 = force_reg (compute_mode, op0);
4047
4048   last = get_last_insn ();
4049
4050   /* Promote floor rounding to trunc rounding for unsigned operations.  */
4051   if (unsignedp)
4052     {
4053       if (code == FLOOR_DIV_EXPR)
4054         code = TRUNC_DIV_EXPR;
4055       if (code == FLOOR_MOD_EXPR)
4056         code = TRUNC_MOD_EXPR;
4057       if (code == EXACT_DIV_EXPR && op1_is_pow2)
4058         code = TRUNC_DIV_EXPR;
4059     }
4060
4061   if (op1 != const0_rtx)
4062     switch (code)
4063       {
4064       case TRUNC_MOD_EXPR:
4065       case TRUNC_DIV_EXPR:
4066         if (op1_is_constant)
4067           {
4068             if (unsignedp)
4069               {
4070                 unsigned HOST_WIDE_INT mh, ml;
4071                 int pre_shift, post_shift;
4072                 int dummy;
4073                 unsigned HOST_WIDE_INT d = (INTVAL (op1)
4074                                             & GET_MODE_MASK (compute_mode));
4075
4076                 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4077                   {
4078                     pre_shift = floor_log2 (d);
4079                     if (rem_flag)
4080                       {
4081                         remainder
4082                           = expand_binop (compute_mode, and_optab, op0,
4083                                           GEN_INT (((HOST_WIDE_INT) 1 << pre_shift) - 1),
4084                                           remainder, 1,
4085                                           OPTAB_LIB_WIDEN);
4086                         if (remainder)
4087                           return gen_lowpart (mode, remainder);
4088                       }
4089                     quotient = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4090                                              pre_shift, tquotient, 1);
4091                   }
4092                 else if (size <= HOST_BITS_PER_WIDE_INT)
4093                   {
4094                     if (d >= ((unsigned HOST_WIDE_INT) 1 << (size - 1)))
4095                       {
4096                         /* Most significant bit of divisor is set; emit an scc
4097                            insn.  */
4098                         quotient = emit_store_flag_force (tquotient, GEU, op0, op1,
4099                                                           compute_mode, 1, 1);
4100                       }
4101                     else
4102                       {
4103                         /* Find a suitable multiplier and right shift count
4104                            instead of multiplying with D.  */
4105
4106                         mh = choose_multiplier (d, size, size,
4107                                                 &ml, &post_shift, &dummy);
4108
4109                         /* If the suggested multiplier is more than SIZE bits,
4110                            we can do better for even divisors, using an
4111                            initial right shift.  */
4112                         if (mh != 0 && (d & 1) == 0)
4113                           {
4114                             pre_shift = floor_log2 (d & -d);
4115                             mh = choose_multiplier (d >> pre_shift, size,
4116                                                     size - pre_shift,
4117                                                     &ml, &post_shift, &dummy);
4118                             gcc_assert (!mh);
4119                           }
4120                         else
4121                           pre_shift = 0;
4122
4123                         if (mh != 0)
4124                           {
4125                             rtx t1, t2, t3, t4;
4126
4127                             if (post_shift - 1 >= BITS_PER_WORD)
4128                               goto fail1;
4129
4130                             extra_cost
4131                               = (shift_cost (speed, compute_mode, post_shift - 1)
4132                                  + shift_cost (speed, compute_mode, 1)
4133                                  + 2 * add_cost (speed, compute_mode));
4134                             t1 = expmed_mult_highpart (compute_mode, op0,
4135                                                        GEN_INT (ml),
4136                                                        NULL_RTX, 1,
4137                                                        max_cost - extra_cost);
4138                             if (t1 == 0)
4139                               goto fail1;
4140                             t2 = force_operand (gen_rtx_MINUS (compute_mode,
4141                                                                op0, t1),
4142                                                 NULL_RTX);
4143                             t3 = expand_shift (RSHIFT_EXPR, compute_mode,
4144                                                t2, 1, NULL_RTX, 1);
4145                             t4 = force_operand (gen_rtx_PLUS (compute_mode,
4146                                                               t1, t3),
4147                                                 NULL_RTX);
4148                             quotient = expand_shift
4149                               (RSHIFT_EXPR, compute_mode, t4,
4150                                post_shift - 1, tquotient, 1);
4151                           }
4152                         else
4153                           {
4154                             rtx t1, t2;
4155
4156                             if (pre_shift >= BITS_PER_WORD
4157                                 || post_shift >= BITS_PER_WORD)
4158                               goto fail1;
4159
4160                             t1 = expand_shift
4161                               (RSHIFT_EXPR, compute_mode, op0,
4162                                pre_shift, NULL_RTX, 1);
4163                             extra_cost
4164                               = (shift_cost (speed, compute_mode, pre_shift)
4165                                  + shift_cost (speed, compute_mode, post_shift));
4166                             t2 = expmed_mult_highpart (compute_mode, t1,
4167                                                        GEN_INT (ml),
4168                                                        NULL_RTX, 1,
4169                                                        max_cost - extra_cost);
4170                             if (t2 == 0)
4171                               goto fail1;
4172                             quotient = expand_shift
4173                               (RSHIFT_EXPR, compute_mode, t2,
4174                                post_shift, tquotient, 1);
4175                           }
4176                       }
4177                   }
4178                 else            /* Too wide mode to use tricky code */
4179                   break;
4180
4181                 insn = get_last_insn ();
4182                 if (insn != last)
4183                   set_dst_reg_note (insn, REG_EQUAL,
4184                                     gen_rtx_UDIV (compute_mode, op0, op1),
4185                                     quotient);
4186               }
4187             else                /* TRUNC_DIV, signed */
4188               {
4189                 unsigned HOST_WIDE_INT ml;
4190                 int lgup, post_shift;
4191                 rtx mlr;
4192                 HOST_WIDE_INT d = INTVAL (op1);
4193                 unsigned HOST_WIDE_INT abs_d;
4194
4195                 /* Since d might be INT_MIN, we have to cast to
4196                    unsigned HOST_WIDE_INT before negating to avoid
4197                    undefined signed overflow.  */
4198                 abs_d = (d >= 0
4199                          ? (unsigned HOST_WIDE_INT) d
4200                          : - (unsigned HOST_WIDE_INT) d);
4201
4202                 /* n rem d = n rem -d */
4203                 if (rem_flag && d < 0)
4204                   {
4205                     d = abs_d;
4206                     op1 = gen_int_mode (abs_d, compute_mode);
4207                   }
4208
4209                 if (d == 1)
4210                   quotient = op0;
4211                 else if (d == -1)
4212                   quotient = expand_unop (compute_mode, neg_optab, op0,
4213                                           tquotient, 0);
4214                 else if (HOST_BITS_PER_WIDE_INT >= size
4215                          && abs_d == (unsigned HOST_WIDE_INT) 1 << (size - 1))
4216                   {
4217                     /* This case is not handled correctly below.  */
4218                     quotient = emit_store_flag (tquotient, EQ, op0, op1,
4219                                                 compute_mode, 1, 1);
4220                     if (quotient == 0)
4221                       goto fail1;
4222                   }
4223                 else if (EXACT_POWER_OF_2_OR_ZERO_P (d)
4224                          && (rem_flag
4225                              ? smod_pow2_cheap (speed, compute_mode)
4226                              : sdiv_pow2_cheap (speed, compute_mode))
4227                          /* We assume that cheap metric is true if the
4228                             optab has an expander for this mode.  */
4229                          && ((optab_handler ((rem_flag ? smod_optab
4230                                               : sdiv_optab),
4231                                              compute_mode)
4232                               != CODE_FOR_nothing)
4233                              || (optab_handler (sdivmod_optab,
4234                                                 compute_mode)
4235                                  != CODE_FOR_nothing)))
4236                   ;
4237                 else if (EXACT_POWER_OF_2_OR_ZERO_P (abs_d))
4238                   {
4239                     if (rem_flag)
4240                       {
4241                         remainder = expand_smod_pow2 (compute_mode, op0, d);
4242                         if (remainder)
4243                           return gen_lowpart (mode, remainder);
4244                       }
4245
4246                     if (sdiv_pow2_cheap (speed, compute_mode)
4247                         && ((optab_handler (sdiv_optab, compute_mode)
4248                              != CODE_FOR_nothing)
4249                             || (optab_handler (sdivmod_optab, compute_mode)
4250                                 != CODE_FOR_nothing)))
4251                       quotient = expand_divmod (0, TRUNC_DIV_EXPR,
4252                                                 compute_mode, op0,
4253                                                 gen_int_mode (abs_d,
4254                                                               compute_mode),
4255                                                 NULL_RTX, 0);
4256                     else
4257                       quotient = expand_sdiv_pow2 (compute_mode, op0, abs_d);
4258
4259                     /* We have computed OP0 / abs(OP1).  If OP1 is negative,
4260                        negate the quotient.  */
4261                     if (d < 0)
4262                       {
4263                         insn = get_last_insn ();
4264                         if (insn != last
4265                             && abs_d < ((unsigned HOST_WIDE_INT) 1
4266                                         << (HOST_BITS_PER_WIDE_INT - 1)))
4267                           set_dst_reg_note (insn, REG_EQUAL,
4268                                             gen_rtx_DIV (compute_mode, op0,
4269                                                          gen_int_mode
4270                                                            (abs_d,
4271                                                             compute_mode)),
4272                                             quotient);
4273
4274                         quotient = expand_unop (compute_mode, neg_optab,
4275                                                 quotient, quotient, 0);
4276                       }
4277                   }
4278                 else if (size <= HOST_BITS_PER_WIDE_INT)
4279                   {
4280                     choose_multiplier (abs_d, size, size - 1,
4281                                        &ml, &post_shift, &lgup);
4282                     if (ml < (unsigned HOST_WIDE_INT) 1 << (size - 1))
4283                       {
4284                         rtx t1, t2, t3;
4285
4286                         if (post_shift >= BITS_PER_WORD
4287                             || size - 1 >= BITS_PER_WORD)
4288                           goto fail1;
4289
4290                         extra_cost = (shift_cost (speed, compute_mode, post_shift)
4291                                       + shift_cost (speed, compute_mode, size - 1)
4292                                       + add_cost (speed, compute_mode));
4293                         t1 = expmed_mult_highpart (compute_mode, op0,
4294                                                    GEN_INT (ml), NULL_RTX, 0,
4295                                                    max_cost - extra_cost);
4296                         if (t1 == 0)
4297                           goto fail1;
4298                         t2 = expand_shift
4299                           (RSHIFT_EXPR, compute_mode, t1,
4300                            post_shift, NULL_RTX, 0);
4301                         t3 = expand_shift
4302                           (RSHIFT_EXPR, compute_mode, op0,
4303                            size - 1, NULL_RTX, 0);
4304                         if (d < 0)
4305                           quotient
4306                             = force_operand (gen_rtx_MINUS (compute_mode,
4307                                                             t3, t2),
4308                                              tquotient);
4309                         else
4310                           quotient
4311                             = force_operand (gen_rtx_MINUS (compute_mode,
4312                                                             t2, t3),
4313                                              tquotient);
4314                       }
4315                     else
4316                       {
4317                         rtx t1, t2, t3, t4;
4318
4319                         if (post_shift >= BITS_PER_WORD
4320                             || size - 1 >= BITS_PER_WORD)
4321                           goto fail1;
4322
4323                         ml |= (~(unsigned HOST_WIDE_INT) 0) << (size - 1);
4324                         mlr = gen_int_mode (ml, compute_mode);
4325                         extra_cost = (shift_cost (speed, compute_mode, post_shift)
4326                                       + shift_cost (speed, compute_mode, size - 1)
4327                                       + 2 * add_cost (speed, compute_mode));
4328                         t1 = expmed_mult_highpart (compute_mode, op0, mlr,
4329                                                    NULL_RTX, 0,
4330                                                    max_cost - extra_cost);
4331                         if (t1 == 0)
4332                           goto fail1;
4333                         t2 = force_operand (gen_rtx_PLUS (compute_mode,
4334                                                           t1, op0),
4335                                             NULL_RTX);
4336                         t3 = expand_shift
4337                           (RSHIFT_EXPR, compute_mode, t2,
4338                            post_shift, NULL_RTX, 0);
4339                         t4 = expand_shift
4340                           (RSHIFT_EXPR, compute_mode, op0,
4341                            size - 1, NULL_RTX, 0);
4342                         if (d < 0)
4343                           quotient
4344                             = force_operand (gen_rtx_MINUS (compute_mode,
4345                                                             t4, t3),
4346                                              tquotient);
4347                         else
4348                           quotient
4349                             = force_operand (gen_rtx_MINUS (compute_mode,
4350                                                             t3, t4),
4351                                              tquotient);
4352                       }
4353                   }
4354                 else            /* Too wide mode to use tricky code */
4355                   break;
4356
4357                 insn = get_last_insn ();
4358                 if (insn != last)
4359                   set_dst_reg_note (insn, REG_EQUAL,
4360                                     gen_rtx_DIV (compute_mode, op0, op1),
4361                                     quotient);
4362               }
4363             break;
4364           }
4365       fail1:
4366         delete_insns_since (last);
4367         break;
4368
4369       case FLOOR_DIV_EXPR:
4370       case FLOOR_MOD_EXPR:
4371       /* We will come here only for signed operations.  */
4372         if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4373           {
4374             unsigned HOST_WIDE_INT mh, ml;
4375             int pre_shift, lgup, post_shift;
4376             HOST_WIDE_INT d = INTVAL (op1);
4377
4378             if (d > 0)
4379               {
4380                 /* We could just as easily deal with negative constants here,
4381                    but it does not seem worth the trouble for GCC 2.6.  */
4382                 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4383                   {
4384                     pre_shift = floor_log2 (d);
4385                     if (rem_flag)
4386                       {
4387                         remainder = expand_binop (compute_mode, and_optab, op0,
4388                                                   GEN_INT (((HOST_WIDE_INT) 1 << pre_shift) - 1),
4389                                                   remainder, 0, OPTAB_LIB_WIDEN);
4390                         if (remainder)
4391                           return gen_lowpart (mode, remainder);
4392                       }
4393                     quotient = expand_shift
4394                       (RSHIFT_EXPR, compute_mode, op0,
4395                        pre_shift, tquotient, 0);
4396                   }
4397                 else
4398                   {
4399                     rtx t1, t2, t3, t4;
4400
4401                     mh = choose_multiplier (d, size, size - 1,
4402                                             &ml, &post_shift, &lgup);
4403                     gcc_assert (!mh);
4404
4405                     if (post_shift < BITS_PER_WORD
4406                         && size - 1 < BITS_PER_WORD)
4407                       {
4408                         t1 = expand_shift
4409                           (RSHIFT_EXPR, compute_mode, op0,
4410                            size - 1, NULL_RTX, 0);
4411                         t2 = expand_binop (compute_mode, xor_optab, op0, t1,
4412                                            NULL_RTX, 0, OPTAB_WIDEN);
4413                         extra_cost = (shift_cost (speed, compute_mode, post_shift)
4414                                       + shift_cost (speed, compute_mode, size - 1)
4415                                       + 2 * add_cost (speed, compute_mode));
4416                         t3 = expmed_mult_highpart (compute_mode, t2,
4417                                                    GEN_INT (ml), NULL_RTX, 1,
4418                                                    max_cost - extra_cost);
4419                         if (t3 != 0)
4420                           {
4421                             t4 = expand_shift
4422                               (RSHIFT_EXPR, compute_mode, t3,
4423                                post_shift, NULL_RTX, 1);
4424                             quotient = expand_binop (compute_mode, xor_optab,
4425                                                      t4, t1, tquotient, 0,
4426                                                      OPTAB_WIDEN);
4427                           }
4428                       }
4429                   }
4430               }
4431             else
4432               {
4433                 rtx nsign, t1, t2, t3, t4;
4434                 t1 = force_operand (gen_rtx_PLUS (compute_mode,
4435                                                   op0, constm1_rtx), NULL_RTX);
4436                 t2 = expand_binop (compute_mode, ior_optab, op0, t1, NULL_RTX,
4437                                    0, OPTAB_WIDEN);
4438                 nsign = expand_shift
4439                   (RSHIFT_EXPR, compute_mode, t2,
4440                    size - 1, NULL_RTX, 0);
4441                 t3 = force_operand (gen_rtx_MINUS (compute_mode, t1, nsign),
4442                                     NULL_RTX);
4443                 t4 = expand_divmod (0, TRUNC_DIV_EXPR, compute_mode, t3, op1,
4444                                     NULL_RTX, 0);
4445                 if (t4)
4446                   {
4447                     rtx t5;
4448                     t5 = expand_unop (compute_mode, one_cmpl_optab, nsign,
4449                                       NULL_RTX, 0);
4450                     quotient = force_operand (gen_rtx_PLUS (compute_mode,
4451                                                             t4, t5),
4452                                               tquotient);
4453                   }
4454               }
4455           }
4456
4457         if (quotient != 0)
4458           break;
4459         delete_insns_since (last);
4460
4461         /* Try using an instruction that produces both the quotient and
4462            remainder, using truncation.  We can easily compensate the quotient
4463            or remainder to get floor rounding, once we have the remainder.
4464            Notice that we compute also the final remainder value here,
4465            and return the result right away.  */
4466         if (target == 0 || GET_MODE (target) != compute_mode)
4467           target = gen_reg_rtx (compute_mode);
4468
4469         if (rem_flag)
4470           {
4471             remainder
4472               = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4473             quotient = gen_reg_rtx (compute_mode);
4474           }
4475         else
4476           {
4477             quotient
4478               = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4479             remainder = gen_reg_rtx (compute_mode);
4480           }
4481
4482         if (expand_twoval_binop (sdivmod_optab, op0, op1,
4483                                  quotient, remainder, 0))
4484           {
4485             /* This could be computed with a branch-less sequence.
4486                Save that for later.  */
4487             rtx tem;
4488             rtx label = gen_label_rtx ();
4489             do_cmp_and_jump (remainder, const0_rtx, EQ, compute_mode, label);
4490             tem = expand_binop (compute_mode, xor_optab, op0, op1,
4491                                 NULL_RTX, 0, OPTAB_WIDEN);
4492             do_cmp_and_jump (tem, const0_rtx, GE, compute_mode, label);
4493             expand_dec (quotient, const1_rtx);
4494             expand_inc (remainder, op1);
4495             emit_label (label);
4496             return gen_lowpart (mode, rem_flag ? remainder : quotient);
4497           }
4498
4499         /* No luck with division elimination or divmod.  Have to do it
4500            by conditionally adjusting op0 *and* the result.  */
4501         {
4502           rtx label1, label2, label3, label4, label5;
4503           rtx adjusted_op0;
4504           rtx tem;
4505
4506           quotient = gen_reg_rtx (compute_mode);
4507           adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4508           label1 = gen_label_rtx ();
4509           label2 = gen_label_rtx ();
4510           label3 = gen_label_rtx ();
4511           label4 = gen_label_rtx ();
4512           label5 = gen_label_rtx ();
4513           do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4514           do_cmp_and_jump (adjusted_op0, const0_rtx, LT, compute_mode, label1);
4515           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4516                               quotient, 0, OPTAB_LIB_WIDEN);
4517           if (tem != quotient)
4518             emit_move_insn (quotient, tem);
4519           emit_jump_insn (gen_jump (label5));
4520           emit_barrier ();
4521           emit_label (label1);
4522           expand_inc (adjusted_op0, const1_rtx);
4523           emit_jump_insn (gen_jump (label4));
4524           emit_barrier ();
4525           emit_label (label2);
4526           do_cmp_and_jump (adjusted_op0, const0_rtx, GT, compute_mode, label3);
4527           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4528                               quotient, 0, OPTAB_LIB_WIDEN);
4529           if (tem != quotient)
4530             emit_move_insn (quotient, tem);
4531           emit_jump_insn (gen_jump (label5));
4532           emit_barrier ();
4533           emit_label (label3);
4534           expand_dec (adjusted_op0, const1_rtx);
4535           emit_label (label4);
4536           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4537                               quotient, 0, OPTAB_LIB_WIDEN);
4538           if (tem != quotient)
4539             emit_move_insn (quotient, tem);
4540           expand_dec (quotient, const1_rtx);
4541           emit_label (label5);
4542         }
4543         break;
4544
4545       case CEIL_DIV_EXPR:
4546       case CEIL_MOD_EXPR:
4547         if (unsignedp)
4548           {
4549             if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1)))
4550               {
4551                 rtx t1, t2, t3;
4552                 unsigned HOST_WIDE_INT d = INTVAL (op1);
4553                 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4554                                    floor_log2 (d), tquotient, 1);
4555                 t2 = expand_binop (compute_mode, and_optab, op0,
4556                                    GEN_INT (d - 1),
4557                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4558                 t3 = gen_reg_rtx (compute_mode);
4559                 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4560                                       compute_mode, 1, 1);
4561                 if (t3 == 0)
4562                   {
4563                     rtx lab;
4564                     lab = gen_label_rtx ();
4565                     do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4566                     expand_inc (t1, const1_rtx);
4567                     emit_label (lab);
4568                     quotient = t1;
4569                   }
4570                 else
4571                   quotient = force_operand (gen_rtx_PLUS (compute_mode,
4572                                                           t1, t3),
4573                                             tquotient);
4574                 break;
4575               }
4576
4577             /* Try using an instruction that produces both the quotient and
4578                remainder, using truncation.  We can easily compensate the
4579                quotient or remainder to get ceiling rounding, once we have the
4580                remainder.  Notice that we compute also the final remainder
4581                value here, and return the result right away.  */
4582             if (target == 0 || GET_MODE (target) != compute_mode)
4583               target = gen_reg_rtx (compute_mode);
4584
4585             if (rem_flag)
4586               {
4587                 remainder = (REG_P (target)
4588                              ? target : gen_reg_rtx (compute_mode));
4589                 quotient = gen_reg_rtx (compute_mode);
4590               }
4591             else
4592               {
4593                 quotient = (REG_P (target)
4594                             ? target : gen_reg_rtx (compute_mode));
4595                 remainder = gen_reg_rtx (compute_mode);
4596               }
4597
4598             if (expand_twoval_binop (udivmod_optab, op0, op1, quotient,
4599                                      remainder, 1))
4600               {
4601                 /* This could be computed with a branch-less sequence.
4602                    Save that for later.  */
4603                 rtx label = gen_label_rtx ();
4604                 do_cmp_and_jump (remainder, const0_rtx, EQ,
4605                                  compute_mode, label);
4606                 expand_inc (quotient, const1_rtx);
4607                 expand_dec (remainder, op1);
4608                 emit_label (label);
4609                 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4610               }
4611
4612             /* No luck with division elimination or divmod.  Have to do it
4613                by conditionally adjusting op0 *and* the result.  */
4614             {
4615               rtx label1, label2;
4616               rtx adjusted_op0, tem;
4617
4618               quotient = gen_reg_rtx (compute_mode);
4619               adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4620               label1 = gen_label_rtx ();
4621               label2 = gen_label_rtx ();
4622               do_cmp_and_jump (adjusted_op0, const0_rtx, NE,
4623                                compute_mode, label1);
4624               emit_move_insn  (quotient, const0_rtx);
4625               emit_jump_insn (gen_jump (label2));
4626               emit_barrier ();
4627               emit_label (label1);
4628               expand_dec (adjusted_op0, const1_rtx);
4629               tem = expand_binop (compute_mode, udiv_optab, adjusted_op0, op1,
4630                                   quotient, 1, OPTAB_LIB_WIDEN);
4631               if (tem != quotient)
4632                 emit_move_insn (quotient, tem);
4633               expand_inc (quotient, const1_rtx);
4634               emit_label (label2);
4635             }
4636           }
4637         else /* signed */
4638           {
4639             if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4640                 && INTVAL (op1) >= 0)
4641               {
4642                 /* This is extremely similar to the code for the unsigned case
4643                    above.  For 2.7 we should merge these variants, but for
4644                    2.6.1 I don't want to touch the code for unsigned since that
4645                    get used in C.  The signed case will only be used by other
4646                    languages (Ada).  */
4647
4648                 rtx t1, t2, t3;
4649                 unsigned HOST_WIDE_INT d = INTVAL (op1);
4650                 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4651                                    floor_log2 (d), tquotient, 0);
4652                 t2 = expand_binop (compute_mode, and_optab, op0,
4653                                    GEN_INT (d - 1),
4654                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4655                 t3 = gen_reg_rtx (compute_mode);
4656                 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4657                                       compute_mode, 1, 1);
4658                 if (t3 == 0)
4659                   {
4660                     rtx lab;
4661                     lab = gen_label_rtx ();
4662                     do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4663                     expand_inc (t1, const1_rtx);
4664                     emit_label (lab);
4665                     quotient = t1;
4666                   }
4667                 else
4668                   quotient = force_operand (gen_rtx_PLUS (compute_mode,
4669                                                           t1, t3),
4670                                             tquotient);
4671                 break;
4672               }
4673
4674             /* Try using an instruction that produces both the quotient and
4675                remainder, using truncation.  We can easily compensate the
4676                quotient or remainder to get ceiling rounding, once we have the
4677                remainder.  Notice that we compute also the final remainder
4678                value here, and return the result right away.  */
4679             if (target == 0 || GET_MODE (target) != compute_mode)
4680               target = gen_reg_rtx (compute_mode);
4681             if (rem_flag)
4682               {
4683                 remainder= (REG_P (target)
4684                             ? target : gen_reg_rtx (compute_mode));
4685                 quotient = gen_reg_rtx (compute_mode);
4686               }
4687             else
4688               {
4689                 quotient = (REG_P (target)
4690                             ? target : gen_reg_rtx (compute_mode));
4691                 remainder = gen_reg_rtx (compute_mode);
4692               }
4693
4694             if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient,
4695                                      remainder, 0))
4696               {
4697                 /* This could be computed with a branch-less sequence.
4698                    Save that for later.  */
4699                 rtx tem;
4700                 rtx label = gen_label_rtx ();
4701                 do_cmp_and_jump (remainder, const0_rtx, EQ,
4702                                  compute_mode, label);
4703                 tem = expand_binop (compute_mode, xor_optab, op0, op1,
4704                                     NULL_RTX, 0, OPTAB_WIDEN);
4705                 do_cmp_and_jump (tem, const0_rtx, LT, compute_mode, label);
4706                 expand_inc (quotient, const1_rtx);
4707                 expand_dec (remainder, op1);
4708                 emit_label (label);
4709                 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4710               }
4711
4712             /* No luck with division elimination or divmod.  Have to do it
4713                by conditionally adjusting op0 *and* the result.  */
4714             {
4715               rtx label1, label2, label3, label4, label5;
4716               rtx adjusted_op0;
4717               rtx tem;
4718
4719               quotient = gen_reg_rtx (compute_mode);
4720               adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4721               label1 = gen_label_rtx ();
4722               label2 = gen_label_rtx ();
4723               label3 = gen_label_rtx ();
4724               label4 = gen_label_rtx ();
4725               label5 = gen_label_rtx ();
4726               do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4727               do_cmp_and_jump (adjusted_op0, const0_rtx, GT,
4728                                compute_mode, label1);
4729               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4730                                   quotient, 0, OPTAB_LIB_WIDEN);
4731               if (tem != quotient)
4732                 emit_move_insn (quotient, tem);
4733               emit_jump_insn (gen_jump (label5));
4734               emit_barrier ();
4735               emit_label (label1);
4736               expand_dec (adjusted_op0, const1_rtx);
4737               emit_jump_insn (gen_jump (label4));
4738               emit_barrier ();
4739               emit_label (label2);
4740               do_cmp_and_jump (adjusted_op0, const0_rtx, LT,
4741                                compute_mode, label3);
4742               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4743                                   quotient, 0, OPTAB_LIB_WIDEN);
4744               if (tem != quotient)
4745                 emit_move_insn (quotient, tem);
4746               emit_jump_insn (gen_jump (label5));
4747               emit_barrier ();
4748               emit_label (label3);
4749               expand_inc (adjusted_op0, const1_rtx);
4750               emit_label (label4);
4751               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4752                                   quotient, 0, OPTAB_LIB_WIDEN);
4753               if (tem != quotient)
4754                 emit_move_insn (quotient, tem);
4755               expand_inc (quotient, const1_rtx);
4756               emit_label (label5);
4757             }
4758           }
4759         break;
4760
4761       case EXACT_DIV_EXPR:
4762         if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4763           {
4764             HOST_WIDE_INT d = INTVAL (op1);
4765             unsigned HOST_WIDE_INT ml;
4766             int pre_shift;
4767             rtx t1;
4768
4769             pre_shift = floor_log2 (d & -d);
4770             ml = invert_mod2n (d >> pre_shift, size);
4771             t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4772                                pre_shift, NULL_RTX, unsignedp);
4773             quotient = expand_mult (compute_mode, t1,
4774                                     gen_int_mode (ml, compute_mode),
4775                                     NULL_RTX, 1);
4776
4777             insn = get_last_insn ();
4778             set_dst_reg_note (insn, REG_EQUAL,
4779                               gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
4780                                               compute_mode, op0, op1),
4781                               quotient);
4782           }
4783         break;
4784
4785       case ROUND_DIV_EXPR:
4786       case ROUND_MOD_EXPR:
4787         if (unsignedp)
4788           {
4789             rtx tem;
4790             rtx label;
4791             label = gen_label_rtx ();
4792             quotient = gen_reg_rtx (compute_mode);
4793             remainder = gen_reg_rtx (compute_mode);
4794             if (expand_twoval_binop (udivmod_optab, op0, op1, quotient, remainder, 1) == 0)
4795               {
4796                 rtx tem;
4797                 quotient = expand_binop (compute_mode, udiv_optab, op0, op1,
4798                                          quotient, 1, OPTAB_LIB_WIDEN);
4799                 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 1);
4800                 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4801                                           remainder, 1, OPTAB_LIB_WIDEN);
4802               }
4803             tem = plus_constant (compute_mode, op1, -1);
4804             tem = expand_shift (RSHIFT_EXPR, compute_mode, tem, 1, NULL_RTX, 1);
4805             do_cmp_and_jump (remainder, tem, LEU, compute_mode, label);
4806             expand_inc (quotient, const1_rtx);
4807             expand_dec (remainder, op1);
4808             emit_label (label);
4809           }
4810         else
4811           {
4812             rtx abs_rem, abs_op1, tem, mask;
4813             rtx label;
4814             label = gen_label_rtx ();
4815             quotient = gen_reg_rtx (compute_mode);
4816             remainder = gen_reg_rtx (compute_mode);
4817             if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient, remainder, 0) == 0)
4818               {
4819                 rtx tem;
4820                 quotient = expand_binop (compute_mode, sdiv_optab, op0, op1,
4821                                          quotient, 0, OPTAB_LIB_WIDEN);
4822                 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 0);
4823                 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4824                                           remainder, 0, OPTAB_LIB_WIDEN);
4825               }
4826             abs_rem = expand_abs (compute_mode, remainder, NULL_RTX, 1, 0);
4827             abs_op1 = expand_abs (compute_mode, op1, NULL_RTX, 1, 0);
4828             tem = expand_shift (LSHIFT_EXPR, compute_mode, abs_rem,
4829                                 1, NULL_RTX, 1);
4830             do_cmp_and_jump (tem, abs_op1, LTU, compute_mode, label);
4831             tem = expand_binop (compute_mode, xor_optab, op0, op1,
4832                                 NULL_RTX, 0, OPTAB_WIDEN);
4833             mask = expand_shift (RSHIFT_EXPR, compute_mode, tem,
4834                                  size - 1, NULL_RTX, 0);
4835             tem = expand_binop (compute_mode, xor_optab, mask, const1_rtx,
4836                                 NULL_RTX, 0, OPTAB_WIDEN);
4837             tem = expand_binop (compute_mode, sub_optab, tem, mask,
4838                                 NULL_RTX, 0, OPTAB_WIDEN);
4839             expand_inc (quotient, tem);
4840             tem = expand_binop (compute_mode, xor_optab, mask, op1,
4841                                 NULL_RTX, 0, OPTAB_WIDEN);
4842             tem = expand_binop (compute_mode, sub_optab, tem, mask,
4843                                 NULL_RTX, 0, OPTAB_WIDEN);
4844             expand_dec (remainder, tem);
4845             emit_label (label);
4846           }
4847         return gen_lowpart (mode, rem_flag ? remainder : quotient);
4848
4849       default:
4850         gcc_unreachable ();
4851       }
4852
4853   if (quotient == 0)
4854     {
4855       if (target && GET_MODE (target) != compute_mode)
4856         target = 0;
4857
4858       if (rem_flag)
4859         {
4860           /* Try to produce the remainder without producing the quotient.
4861              If we seem to have a divmod pattern that does not require widening,
4862              don't try widening here.  We should really have a WIDEN argument
4863              to expand_twoval_binop, since what we'd really like to do here is
4864              1) try a mod insn in compute_mode
4865              2) try a divmod insn in compute_mode
4866              3) try a div insn in compute_mode and multiply-subtract to get
4867                 remainder
4868              4) try the same things with widening allowed.  */
4869           remainder
4870             = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4871                                  op0, op1, target,
4872                                  unsignedp,
4873                                  ((optab_handler (optab2, compute_mode)
4874                                    != CODE_FOR_nothing)
4875                                   ? OPTAB_DIRECT : OPTAB_WIDEN));
4876           if (remainder == 0)
4877             {
4878               /* No luck there.  Can we do remainder and divide at once
4879                  without a library call?  */
4880               remainder = gen_reg_rtx (compute_mode);
4881               if (! expand_twoval_binop ((unsignedp
4882                                           ? udivmod_optab
4883                                           : sdivmod_optab),
4884                                          op0, op1,
4885                                          NULL_RTX, remainder, unsignedp))
4886                 remainder = 0;
4887             }
4888
4889           if (remainder)
4890             return gen_lowpart (mode, remainder);
4891         }
4892
4893       /* Produce the quotient.  Try a quotient insn, but not a library call.
4894          If we have a divmod in this mode, use it in preference to widening
4895          the div (for this test we assume it will not fail). Note that optab2
4896          is set to the one of the two optabs that the call below will use.  */
4897       quotient
4898         = sign_expand_binop (compute_mode, udiv_optab, sdiv_optab,
4899                              op0, op1, rem_flag ? NULL_RTX : target,
4900                              unsignedp,
4901                              ((optab_handler (optab2, compute_mode)
4902                                != CODE_FOR_nothing)
4903                               ? OPTAB_DIRECT : OPTAB_WIDEN));
4904
4905       if (quotient == 0)
4906         {
4907           /* No luck there.  Try a quotient-and-remainder insn,
4908              keeping the quotient alone.  */
4909           quotient = gen_reg_rtx (compute_mode);
4910           if (! expand_twoval_binop (unsignedp ? udivmod_optab : sdivmod_optab,
4911                                      op0, op1,
4912                                      quotient, NULL_RTX, unsignedp))
4913             {
4914               quotient = 0;
4915               if (! rem_flag)
4916                 /* Still no luck.  If we are not computing the remainder,
4917                    use a library call for the quotient.  */
4918                 quotient = sign_expand_binop (compute_mode,
4919                                               udiv_optab, sdiv_optab,
4920                                               op0, op1, target,
4921                                               unsignedp, OPTAB_LIB_WIDEN);
4922             }
4923         }
4924     }
4925
4926   if (rem_flag)
4927     {
4928       if (target && GET_MODE (target) != compute_mode)
4929         target = 0;
4930
4931       if (quotient == 0)
4932         {
4933           /* No divide instruction either.  Use library for remainder.  */
4934           remainder = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4935                                          op0, op1, target,
4936                                          unsignedp, OPTAB_LIB_WIDEN);
4937           /* No remainder function.  Try a quotient-and-remainder
4938              function, keeping the remainder.  */
4939           if (!remainder)
4940             {
4941               remainder = gen_reg_rtx (compute_mode);
4942               if (!expand_twoval_binop_libfunc
4943                   (unsignedp ? udivmod_optab : sdivmod_optab,
4944                    op0, op1,
4945                    NULL_RTX, remainder,
4946                    unsignedp ? UMOD : MOD))
4947                 remainder = NULL_RTX;
4948             }
4949         }
4950       else
4951         {
4952           /* We divided.  Now finish doing X - Y * (X / Y).  */
4953           remainder = expand_mult (compute_mode, quotient, op1,
4954                                    NULL_RTX, unsignedp);
4955           remainder = expand_binop (compute_mode, sub_optab, op0,
4956                                     remainder, target, unsignedp,
4957                                     OPTAB_LIB_WIDEN);
4958         }
4959     }
4960
4961   return gen_lowpart (mode, rem_flag ? remainder : quotient);
4962 }
4963 \f
4964 /* Return a tree node with data type TYPE, describing the value of X.
4965    Usually this is an VAR_DECL, if there is no obvious better choice.
4966    X may be an expression, however we only support those expressions
4967    generated by loop.c.  */
4968
4969 tree
4970 make_tree (tree type, rtx x)
4971 {
4972   tree t;
4973
4974   switch (GET_CODE (x))
4975     {
4976     case CONST_INT:
4977       {
4978         HOST_WIDE_INT hi = 0;
4979
4980         if (INTVAL (x) < 0
4981             && !(TYPE_UNSIGNED (type)
4982                  && (GET_MODE_BITSIZE (TYPE_MODE (type))
4983                      < HOST_BITS_PER_WIDE_INT)))
4984           hi = -1;
4985
4986         t = build_int_cst_wide (type, INTVAL (x), hi);
4987
4988         return t;
4989       }
4990
4991     case CONST_DOUBLE:
4992       if (GET_MODE (x) == VOIDmode)
4993         t = build_int_cst_wide (type,
4994                                 CONST_DOUBLE_LOW (x), CONST_DOUBLE_HIGH (x));
4995       else
4996         {
4997           REAL_VALUE_TYPE d;
4998
4999           REAL_VALUE_FROM_CONST_DOUBLE (d, x);
5000           t = build_real (type, d);
5001         }
5002
5003       return t;
5004
5005     case CONST_VECTOR:
5006       {
5007         int units = CONST_VECTOR_NUNITS (x);
5008         tree itype = TREE_TYPE (type);
5009         tree *elts;
5010         int i;
5011
5012         /* Build a tree with vector elements.  */
5013         elts = XALLOCAVEC (tree, units);
5014         for (i = units - 1; i >= 0; --i)
5015           {
5016             rtx elt = CONST_VECTOR_ELT (x, i);
5017             elts[i] = make_tree (itype, elt);
5018           }
5019
5020         return build_vector (type, elts);
5021       }
5022
5023     case PLUS:
5024       return fold_build2 (PLUS_EXPR, type, make_tree (type, XEXP (x, 0)),
5025                           make_tree (type, XEXP (x, 1)));
5026
5027     case MINUS:
5028       return fold_build2 (MINUS_EXPR, type, make_tree (type, XEXP (x, 0)),
5029                           make_tree (type, XEXP (x, 1)));
5030
5031     case NEG:
5032       return fold_build1 (NEGATE_EXPR, type, make_tree (type, XEXP (x, 0)));
5033
5034     case MULT:
5035       return fold_build2 (MULT_EXPR, type, make_tree (type, XEXP (x, 0)),
5036                           make_tree (type, XEXP (x, 1)));
5037
5038     case ASHIFT:
5039       return fold_build2 (LSHIFT_EXPR, type, make_tree (type, XEXP (x, 0)),
5040                           make_tree (type, XEXP (x, 1)));
5041
5042     case LSHIFTRT:
5043       t = unsigned_type_for (type);
5044       return fold_convert (type, build2 (RSHIFT_EXPR, t,
5045                                          make_tree (t, XEXP (x, 0)),
5046                                          make_tree (type, XEXP (x, 1))));
5047
5048     case ASHIFTRT:
5049       t = signed_type_for (type);
5050       return fold_convert (type, build2 (RSHIFT_EXPR, t,
5051                                          make_tree (t, XEXP (x, 0)),
5052                                          make_tree (type, XEXP (x, 1))));
5053
5054     case DIV:
5055       if (TREE_CODE (type) != REAL_TYPE)
5056         t = signed_type_for (type);
5057       else
5058         t = type;
5059
5060       return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5061                                          make_tree (t, XEXP (x, 0)),
5062                                          make_tree (t, XEXP (x, 1))));
5063     case UDIV:
5064       t = unsigned_type_for (type);
5065       return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5066                                          make_tree (t, XEXP (x, 0)),
5067                                          make_tree (t, XEXP (x, 1))));
5068
5069     case SIGN_EXTEND:
5070     case ZERO_EXTEND:
5071       t = lang_hooks.types.type_for_mode (GET_MODE (XEXP (x, 0)),
5072                                           GET_CODE (x) == ZERO_EXTEND);
5073       return fold_convert (type, make_tree (t, XEXP (x, 0)));
5074
5075     case CONST:
5076       return make_tree (type, XEXP (x, 0));
5077
5078     case SYMBOL_REF:
5079       t = SYMBOL_REF_DECL (x);
5080       if (t)
5081         return fold_convert (type, build_fold_addr_expr (t));
5082       /* else fall through.  */
5083
5084     default:
5085       t = build_decl (RTL_LOCATION (x), VAR_DECL, NULL_TREE, type);
5086
5087       /* If TYPE is a POINTER_TYPE, we might need to convert X from
5088          address mode to pointer mode.  */
5089       if (POINTER_TYPE_P (type))
5090         x = convert_memory_address_addr_space
5091               (TYPE_MODE (type), x, TYPE_ADDR_SPACE (TREE_TYPE (type)));
5092
5093       /* Note that we do *not* use SET_DECL_RTL here, because we do not
5094          want set_decl_rtl to go adjusting REG_ATTRS for this temporary.  */
5095       t->decl_with_rtl.rtl = x;
5096
5097       return t;
5098     }
5099 }
5100 \f
5101 /* Compute the logical-and of OP0 and OP1, storing it in TARGET
5102    and returning TARGET.
5103
5104    If TARGET is 0, a pseudo-register or constant is returned.  */
5105
5106 rtx
5107 expand_and (enum machine_mode mode, rtx op0, rtx op1, rtx target)
5108 {
5109   rtx tem = 0;
5110
5111   if (GET_MODE (op0) == VOIDmode && GET_MODE (op1) == VOIDmode)
5112     tem = simplify_binary_operation (AND, mode, op0, op1);
5113   if (tem == 0)
5114     tem = expand_binop (mode, and_optab, op0, op1, target, 0, OPTAB_LIB_WIDEN);
5115
5116   if (target == 0)
5117     target = tem;
5118   else if (tem != target)
5119     emit_move_insn (target, tem);
5120   return target;
5121 }
5122
5123 /* Helper function for emit_store_flag.  */
5124 static rtx
5125 emit_cstore (rtx target, enum insn_code icode, enum rtx_code code,
5126              enum machine_mode mode, enum machine_mode compare_mode,
5127              int unsignedp, rtx x, rtx y, int normalizep,
5128              enum machine_mode target_mode)
5129 {
5130   struct expand_operand ops[4];
5131   rtx op0, last, comparison, subtarget;
5132   enum machine_mode result_mode = insn_data[(int) icode].operand[0].mode;
5133
5134   last = get_last_insn ();
5135   x = prepare_operand (icode, x, 2, mode, compare_mode, unsignedp);
5136   y = prepare_operand (icode, y, 3, mode, compare_mode, unsignedp);
5137   if (!x || !y)
5138     {
5139       delete_insns_since (last);
5140       return NULL_RTX;
5141     }
5142
5143   if (target_mode == VOIDmode)
5144     target_mode = result_mode;
5145   if (!target)
5146     target = gen_reg_rtx (target_mode);
5147
5148   comparison = gen_rtx_fmt_ee (code, result_mode, x, y);
5149
5150   create_output_operand (&ops[0], optimize ? NULL_RTX : target, result_mode);
5151   create_fixed_operand (&ops[1], comparison);
5152   create_fixed_operand (&ops[2], x);
5153   create_fixed_operand (&ops[3], y);
5154   if (!maybe_expand_insn (icode, 4, ops))
5155     {
5156       delete_insns_since (last);
5157       return NULL_RTX;
5158     }
5159   subtarget = ops[0].value;
5160
5161   /* If we are converting to a wider mode, first convert to
5162      TARGET_MODE, then normalize.  This produces better combining
5163      opportunities on machines that have a SIGN_EXTRACT when we are
5164      testing a single bit.  This mostly benefits the 68k.
5165
5166      If STORE_FLAG_VALUE does not have the sign bit set when
5167      interpreted in MODE, we can do this conversion as unsigned, which
5168      is usually more efficient.  */
5169   if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (result_mode))
5170     {
5171       convert_move (target, subtarget,
5172                     val_signbit_known_clear_p (result_mode,
5173                                                STORE_FLAG_VALUE));
5174       op0 = target;
5175       result_mode = target_mode;
5176     }
5177   else
5178     op0 = subtarget;
5179
5180   /* If we want to keep subexpressions around, don't reuse our last
5181      target.  */
5182   if (optimize)
5183     subtarget = 0;
5184
5185   /* Now normalize to the proper value in MODE.  Sometimes we don't
5186      have to do anything.  */
5187   if (normalizep == 0 || normalizep == STORE_FLAG_VALUE)
5188     ;
5189   /* STORE_FLAG_VALUE might be the most negative number, so write
5190      the comparison this way to avoid a compiler-time warning.  */
5191   else if (- normalizep == STORE_FLAG_VALUE)
5192     op0 = expand_unop (result_mode, neg_optab, op0, subtarget, 0);
5193
5194   /* We don't want to use STORE_FLAG_VALUE < 0 below since this makes
5195      it hard to use a value of just the sign bit due to ANSI integer
5196      constant typing rules.  */
5197   else if (val_signbit_known_set_p (result_mode, STORE_FLAG_VALUE))
5198     op0 = expand_shift (RSHIFT_EXPR, result_mode, op0,
5199                         GET_MODE_BITSIZE (result_mode) - 1, subtarget,
5200                         normalizep == 1);
5201   else
5202     {
5203       gcc_assert (STORE_FLAG_VALUE & 1);
5204
5205       op0 = expand_and (result_mode, op0, const1_rtx, subtarget);
5206       if (normalizep == -1)
5207         op0 = expand_unop (result_mode, neg_optab, op0, op0, 0);
5208     }
5209
5210   /* If we were converting to a smaller mode, do the conversion now.  */
5211   if (target_mode != result_mode)
5212     {
5213       convert_move (target, op0, 0);
5214       return target;
5215     }
5216   else
5217     return op0;
5218 }
5219
5220
5221 /* A subroutine of emit_store_flag only including "tricks" that do not
5222    need a recursive call.  These are kept separate to avoid infinite
5223    loops.  */
5224
5225 static rtx
5226 emit_store_flag_1 (rtx target, enum rtx_code code, rtx op0, rtx op1,
5227                    enum machine_mode mode, int unsignedp, int normalizep,
5228                    enum machine_mode target_mode)
5229 {
5230   rtx subtarget;
5231   enum insn_code icode;
5232   enum machine_mode compare_mode;
5233   enum mode_class mclass;
5234   enum rtx_code scode;
5235   rtx tem;
5236
5237   if (unsignedp)
5238     code = unsigned_condition (code);
5239   scode = swap_condition (code);
5240
5241   /* If one operand is constant, make it the second one.  Only do this
5242      if the other operand is not constant as well.  */
5243
5244   if (swap_commutative_operands_p (op0, op1))
5245     {
5246       tem = op0;
5247       op0 = op1;
5248       op1 = tem;
5249       code = swap_condition (code);
5250     }
5251
5252   if (mode == VOIDmode)
5253     mode = GET_MODE (op0);
5254
5255   /* For some comparisons with 1 and -1, we can convert this to
5256      comparisons with zero.  This will often produce more opportunities for
5257      store-flag insns.  */
5258
5259   switch (code)
5260     {
5261     case LT:
5262       if (op1 == const1_rtx)
5263         op1 = const0_rtx, code = LE;
5264       break;
5265     case LE:
5266       if (op1 == constm1_rtx)
5267         op1 = const0_rtx, code = LT;
5268       break;
5269     case GE:
5270       if (op1 == const1_rtx)
5271         op1 = const0_rtx, code = GT;
5272       break;
5273     case GT:
5274       if (op1 == constm1_rtx)
5275         op1 = const0_rtx, code = GE;
5276       break;
5277     case GEU:
5278       if (op1 == const1_rtx)
5279         op1 = const0_rtx, code = NE;
5280       break;
5281     case LTU:
5282       if (op1 == const1_rtx)
5283         op1 = const0_rtx, code = EQ;
5284       break;
5285     default:
5286       break;
5287     }
5288
5289   /* If we are comparing a double-word integer with zero or -1, we can
5290      convert the comparison into one involving a single word.  */
5291   if (GET_MODE_BITSIZE (mode) == BITS_PER_WORD * 2
5292       && GET_MODE_CLASS (mode) == MODE_INT
5293       && (!MEM_P (op0) || ! MEM_VOLATILE_P (op0)))
5294     {
5295       if ((code == EQ || code == NE)
5296           && (op1 == const0_rtx || op1 == constm1_rtx))
5297         {
5298           rtx op00, op01;
5299
5300           /* Do a logical OR or AND of the two words and compare the
5301              result.  */
5302           op00 = simplify_gen_subreg (word_mode, op0, mode, 0);
5303           op01 = simplify_gen_subreg (word_mode, op0, mode, UNITS_PER_WORD);
5304           tem = expand_binop (word_mode,
5305                               op1 == const0_rtx ? ior_optab : and_optab,
5306                               op00, op01, NULL_RTX, unsignedp,
5307                               OPTAB_DIRECT);
5308
5309           if (tem != 0)
5310             tem = emit_store_flag (NULL_RTX, code, tem, op1, word_mode,
5311                                    unsignedp, normalizep);
5312         }
5313       else if ((code == LT || code == GE) && op1 == const0_rtx)
5314         {
5315           rtx op0h;
5316
5317           /* If testing the sign bit, can just test on high word.  */
5318           op0h = simplify_gen_subreg (word_mode, op0, mode,
5319                                       subreg_highpart_offset (word_mode,
5320                                                               mode));
5321           tem = emit_store_flag (NULL_RTX, code, op0h, op1, word_mode,
5322                                  unsignedp, normalizep);
5323         }
5324       else
5325         tem = NULL_RTX;
5326
5327       if (tem)
5328         {
5329           if (target_mode == VOIDmode || GET_MODE (tem) == target_mode)
5330             return tem;
5331           if (!target)
5332             target = gen_reg_rtx (target_mode);
5333
5334           convert_move (target, tem,
5335                         !val_signbit_known_set_p (word_mode,
5336                                                   (normalizep ? normalizep
5337                                                    : STORE_FLAG_VALUE)));
5338           return target;
5339         }
5340     }
5341
5342   /* If this is A < 0 or A >= 0, we can do this by taking the ones
5343      complement of A (for GE) and shifting the sign bit to the low bit.  */
5344   if (op1 == const0_rtx && (code == LT || code == GE)
5345       && GET_MODE_CLASS (mode) == MODE_INT
5346       && (normalizep || STORE_FLAG_VALUE == 1
5347           || val_signbit_p (mode, STORE_FLAG_VALUE)))
5348     {
5349       subtarget = target;
5350
5351       if (!target)
5352         target_mode = mode;
5353
5354       /* If the result is to be wider than OP0, it is best to convert it
5355          first.  If it is to be narrower, it is *incorrect* to convert it
5356          first.  */
5357       else if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (mode))
5358         {
5359           op0 = convert_modes (target_mode, mode, op0, 0);
5360           mode = target_mode;
5361         }
5362
5363       if (target_mode != mode)
5364         subtarget = 0;
5365
5366       if (code == GE)
5367         op0 = expand_unop (mode, one_cmpl_optab, op0,
5368                            ((STORE_FLAG_VALUE == 1 || normalizep)
5369                             ? 0 : subtarget), 0);
5370
5371       if (STORE_FLAG_VALUE == 1 || normalizep)
5372         /* If we are supposed to produce a 0/1 value, we want to do
5373            a logical shift from the sign bit to the low-order bit; for
5374            a -1/0 value, we do an arithmetic shift.  */
5375         op0 = expand_shift (RSHIFT_EXPR, mode, op0,
5376                             GET_MODE_BITSIZE (mode) - 1,
5377                             subtarget, normalizep != -1);
5378
5379       if (mode != target_mode)
5380         op0 = convert_modes (target_mode, mode, op0, 0);
5381
5382       return op0;
5383     }
5384
5385   mclass = GET_MODE_CLASS (mode);
5386   for (compare_mode = mode; compare_mode != VOIDmode;
5387        compare_mode = GET_MODE_WIDER_MODE (compare_mode))
5388     {
5389      enum machine_mode optab_mode = mclass == MODE_CC ? CCmode : compare_mode;
5390      icode = optab_handler (cstore_optab, optab_mode);
5391      if (icode != CODE_FOR_nothing)
5392         {
5393           do_pending_stack_adjust ();
5394           tem = emit_cstore (target, icode, code, mode, compare_mode,
5395                              unsignedp, op0, op1, normalizep, target_mode);
5396           if (tem)
5397             return tem;
5398
5399           if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5400             {
5401               tem = emit_cstore (target, icode, scode, mode, compare_mode,
5402                                  unsignedp, op1, op0, normalizep, target_mode);
5403               if (tem)
5404                 return tem;
5405             }
5406           break;
5407         }
5408     }
5409
5410   return 0;
5411 }
5412
5413 /* Emit a store-flags instruction for comparison CODE on OP0 and OP1
5414    and storing in TARGET.  Normally return TARGET.
5415    Return 0 if that cannot be done.
5416
5417    MODE is the mode to use for OP0 and OP1 should they be CONST_INTs.  If
5418    it is VOIDmode, they cannot both be CONST_INT.
5419
5420    UNSIGNEDP is for the case where we have to widen the operands
5421    to perform the operation.  It says to use zero-extension.
5422
5423    NORMALIZEP is 1 if we should convert the result to be either zero
5424    or one.  Normalize is -1 if we should convert the result to be
5425    either zero or -1.  If NORMALIZEP is zero, the result will be left
5426    "raw" out of the scc insn.  */
5427
5428 rtx
5429 emit_store_flag (rtx target, enum rtx_code code, rtx op0, rtx op1,
5430                  enum machine_mode mode, int unsignedp, int normalizep)
5431 {
5432   enum machine_mode target_mode = target ? GET_MODE (target) : VOIDmode;
5433   enum rtx_code rcode;
5434   rtx subtarget;
5435   rtx tem, last, trueval;
5436
5437   tem = emit_store_flag_1 (target, code, op0, op1, mode, unsignedp, normalizep,
5438                            target_mode);
5439   if (tem)
5440     return tem;
5441
5442   /* If we reached here, we can't do this with a scc insn, however there
5443      are some comparisons that can be done in other ways.  Don't do any
5444      of these cases if branches are very cheap.  */
5445   if (BRANCH_COST (optimize_insn_for_speed_p (), false) == 0)
5446     return 0;
5447
5448   /* See what we need to return.  We can only return a 1, -1, or the
5449      sign bit.  */
5450
5451   if (normalizep == 0)
5452     {
5453       if (STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1)
5454         normalizep = STORE_FLAG_VALUE;
5455
5456       else if (val_signbit_p (mode, STORE_FLAG_VALUE))
5457         ;
5458       else
5459         return 0;
5460     }
5461
5462   last = get_last_insn ();
5463
5464   /* If optimizing, use different pseudo registers for each insn, instead
5465      of reusing the same pseudo.  This leads to better CSE, but slows
5466      down the compiler, since there are more pseudos */
5467   subtarget = (!optimize
5468                && (target_mode == mode)) ? target : NULL_RTX;
5469   trueval = GEN_INT (normalizep ? normalizep : STORE_FLAG_VALUE);
5470
5471   /* For floating-point comparisons, try the reverse comparison or try
5472      changing the "orderedness" of the comparison.  */
5473   if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5474     {
5475       enum rtx_code first_code;
5476       bool and_them;
5477
5478       rcode = reverse_condition_maybe_unordered (code);
5479       if (can_compare_p (rcode, mode, ccp_store_flag)
5480           && (code == ORDERED || code == UNORDERED
5481               || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
5482               || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
5483         {
5484           int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5485                           || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5486
5487           /* For the reverse comparison, use either an addition or a XOR.  */
5488           if (want_add
5489               && rtx_cost (GEN_INT (normalizep), PLUS, 1,
5490                            optimize_insn_for_speed_p ()) == 0)
5491             {
5492               tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5493                                        STORE_FLAG_VALUE, target_mode);
5494               if (tem)
5495                 return expand_binop (target_mode, add_optab, tem,
5496                                      GEN_INT (normalizep),
5497                                      target, 0, OPTAB_WIDEN);
5498             }
5499           else if (!want_add
5500                    && rtx_cost (trueval, XOR, 1,
5501                                 optimize_insn_for_speed_p ()) == 0)
5502             {
5503               tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5504                                        normalizep, target_mode);
5505               if (tem)
5506                 return expand_binop (target_mode, xor_optab, tem, trueval,
5507                                      target, INTVAL (trueval) >= 0, OPTAB_WIDEN);
5508             }
5509         }
5510
5511       delete_insns_since (last);
5512
5513       /* Cannot split ORDERED and UNORDERED, only try the above trick.   */
5514       if (code == ORDERED || code == UNORDERED)
5515         return 0;
5516
5517       and_them = split_comparison (code, mode, &first_code, &code);
5518
5519       /* If there are no NaNs, the first comparison should always fall through.
5520          Effectively change the comparison to the other one.  */
5521       if (!HONOR_NANS (mode))
5522         {
5523           gcc_assert (first_code == (and_them ? ORDERED : UNORDERED));
5524           return emit_store_flag_1 (target, code, op0, op1, mode, 0, normalizep,
5525                                     target_mode);
5526         }
5527
5528 #ifdef HAVE_conditional_move
5529       /* Try using a setcc instruction for ORDERED/UNORDERED, followed by a
5530          conditional move.  */
5531       tem = emit_store_flag_1 (subtarget, first_code, op0, op1, mode, 0,
5532                                normalizep, target_mode);
5533       if (tem == 0)
5534         return 0;
5535
5536       if (and_them)
5537         tem = emit_conditional_move (target, code, op0, op1, mode,
5538                                      tem, const0_rtx, GET_MODE (tem), 0);
5539       else
5540         tem = emit_conditional_move (target, code, op0, op1, mode,
5541                                      trueval, tem, GET_MODE (tem), 0);
5542
5543       if (tem == 0)
5544         delete_insns_since (last);
5545       return tem;
5546 #else
5547       return 0;
5548 #endif
5549     }
5550
5551   /* The remaining tricks only apply to integer comparisons.  */
5552
5553   if (GET_MODE_CLASS (mode) != MODE_INT)
5554     return 0;
5555
5556   /* If this is an equality comparison of integers, we can try to exclusive-or
5557      (or subtract) the two operands and use a recursive call to try the
5558      comparison with zero.  Don't do any of these cases if branches are
5559      very cheap.  */
5560
5561   if ((code == EQ || code == NE) && op1 != const0_rtx)
5562     {
5563       tem = expand_binop (mode, xor_optab, op0, op1, subtarget, 1,
5564                           OPTAB_WIDEN);
5565
5566       if (tem == 0)
5567         tem = expand_binop (mode, sub_optab, op0, op1, subtarget, 1,
5568                             OPTAB_WIDEN);
5569       if (tem != 0)
5570         tem = emit_store_flag (target, code, tem, const0_rtx,
5571                                mode, unsignedp, normalizep);
5572       if (tem != 0)
5573         return tem;
5574
5575       delete_insns_since (last);
5576     }
5577
5578   /* For integer comparisons, try the reverse comparison.  However, for
5579      small X and if we'd have anyway to extend, implementing "X != 0"
5580      as "-(int)X >> 31" is still cheaper than inverting "(int)X == 0".  */
5581   rcode = reverse_condition (code);
5582   if (can_compare_p (rcode, mode, ccp_store_flag)
5583       && ! (optab_handler (cstore_optab, mode) == CODE_FOR_nothing
5584             && code == NE
5585             && GET_MODE_SIZE (mode) < UNITS_PER_WORD
5586             && op1 == const0_rtx))
5587     {
5588       int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5589                       || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5590
5591       /* Again, for the reverse comparison, use either an addition or a XOR.  */
5592       if (want_add
5593           && rtx_cost (GEN_INT (normalizep), PLUS, 1,
5594                        optimize_insn_for_speed_p ()) == 0)
5595         {
5596           tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5597                                    STORE_FLAG_VALUE, target_mode);
5598           if (tem != 0)
5599             tem = expand_binop (target_mode, add_optab, tem,
5600                                 GEN_INT (normalizep), target, 0, OPTAB_WIDEN);
5601         }
5602       else if (!want_add
5603                && rtx_cost (trueval, XOR, 1,
5604                             optimize_insn_for_speed_p ()) == 0)
5605         {
5606           tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5607                                    normalizep, target_mode);
5608           if (tem != 0)
5609             tem = expand_binop (target_mode, xor_optab, tem, trueval, target,
5610                                 INTVAL (trueval) >= 0, OPTAB_WIDEN);
5611         }
5612
5613       if (tem != 0)
5614         return tem;
5615       delete_insns_since (last);
5616     }
5617
5618   /* Some other cases we can do are EQ, NE, LE, and GT comparisons with
5619      the constant zero.  Reject all other comparisons at this point.  Only
5620      do LE and GT if branches are expensive since they are expensive on
5621      2-operand machines.  */
5622
5623   if (op1 != const0_rtx
5624       || (code != EQ && code != NE
5625           && (BRANCH_COST (optimize_insn_for_speed_p (),
5626                            false) <= 1 || (code != LE && code != GT))))
5627     return 0;
5628
5629   /* Try to put the result of the comparison in the sign bit.  Assume we can't
5630      do the necessary operation below.  */
5631
5632   tem = 0;
5633
5634   /* To see if A <= 0, compute (A | (A - 1)).  A <= 0 iff that result has
5635      the sign bit set.  */
5636
5637   if (code == LE)
5638     {
5639       /* This is destructive, so SUBTARGET can't be OP0.  */
5640       if (rtx_equal_p (subtarget, op0))
5641         subtarget = 0;
5642
5643       tem = expand_binop (mode, sub_optab, op0, const1_rtx, subtarget, 0,
5644                           OPTAB_WIDEN);
5645       if (tem)
5646         tem = expand_binop (mode, ior_optab, op0, tem, subtarget, 0,
5647                             OPTAB_WIDEN);
5648     }
5649
5650   /* To see if A > 0, compute (((signed) A) << BITS) - A, where BITS is the
5651      number of bits in the mode of OP0, minus one.  */
5652
5653   if (code == GT)
5654     {
5655       if (rtx_equal_p (subtarget, op0))
5656         subtarget = 0;
5657
5658       tem = expand_shift (RSHIFT_EXPR, mode, op0,
5659                           GET_MODE_BITSIZE (mode) - 1,
5660                           subtarget, 0);
5661       tem = expand_binop (mode, sub_optab, tem, op0, subtarget, 0,
5662                           OPTAB_WIDEN);
5663     }
5664
5665   if (code == EQ || code == NE)
5666     {
5667       /* For EQ or NE, one way to do the comparison is to apply an operation
5668          that converts the operand into a positive number if it is nonzero
5669          or zero if it was originally zero.  Then, for EQ, we subtract 1 and
5670          for NE we negate.  This puts the result in the sign bit.  Then we
5671          normalize with a shift, if needed.
5672
5673          Two operations that can do the above actions are ABS and FFS, so try
5674          them.  If that doesn't work, and MODE is smaller than a full word,
5675          we can use zero-extension to the wider mode (an unsigned conversion)
5676          as the operation.  */
5677
5678       /* Note that ABS doesn't yield a positive number for INT_MIN, but
5679          that is compensated by the subsequent overflow when subtracting
5680          one / negating.  */
5681
5682       if (optab_handler (abs_optab, mode) != CODE_FOR_nothing)
5683         tem = expand_unop (mode, abs_optab, op0, subtarget, 1);
5684       else if (optab_handler (ffs_optab, mode) != CODE_FOR_nothing)
5685         tem = expand_unop (mode, ffs_optab, op0, subtarget, 1);
5686       else if (GET_MODE_SIZE (mode) < UNITS_PER_WORD)
5687         {
5688           tem = convert_modes (word_mode, mode, op0, 1);
5689           mode = word_mode;
5690         }
5691
5692       if (tem != 0)
5693         {
5694           if (code == EQ)
5695             tem = expand_binop (mode, sub_optab, tem, const1_rtx, subtarget,
5696                                 0, OPTAB_WIDEN);
5697           else
5698             tem = expand_unop (mode, neg_optab, tem, subtarget, 0);
5699         }
5700
5701       /* If we couldn't do it that way, for NE we can "or" the two's complement
5702          of the value with itself.  For EQ, we take the one's complement of
5703          that "or", which is an extra insn, so we only handle EQ if branches
5704          are expensive.  */
5705
5706       if (tem == 0
5707           && (code == NE
5708               || BRANCH_COST (optimize_insn_for_speed_p (),
5709                               false) > 1))
5710         {
5711           if (rtx_equal_p (subtarget, op0))
5712             subtarget = 0;
5713
5714           tem = expand_unop (mode, neg_optab, op0, subtarget, 0);
5715           tem = expand_binop (mode, ior_optab, tem, op0, subtarget, 0,
5716                               OPTAB_WIDEN);
5717
5718           if (tem && code == EQ)
5719             tem = expand_unop (mode, one_cmpl_optab, tem, subtarget, 0);
5720         }
5721     }
5722
5723   if (tem && normalizep)
5724     tem = expand_shift (RSHIFT_EXPR, mode, tem,
5725                         GET_MODE_BITSIZE (mode) - 1,
5726                         subtarget, normalizep == 1);
5727
5728   if (tem)
5729     {
5730       if (!target)
5731         ;
5732       else if (GET_MODE (tem) != target_mode)
5733         {
5734           convert_move (target, tem, 0);
5735           tem = target;
5736         }
5737       else if (!subtarget)
5738         {
5739           emit_move_insn (target, tem);
5740           tem = target;
5741         }
5742     }
5743   else
5744     delete_insns_since (last);
5745
5746   return tem;
5747 }
5748
5749 /* Like emit_store_flag, but always succeeds.  */
5750
5751 rtx
5752 emit_store_flag_force (rtx target, enum rtx_code code, rtx op0, rtx op1,
5753                        enum machine_mode mode, int unsignedp, int normalizep)
5754 {
5755   rtx tem, label;
5756   rtx trueval, falseval;
5757
5758   /* First see if emit_store_flag can do the job.  */
5759   tem = emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep);
5760   if (tem != 0)
5761     return tem;
5762
5763   if (!target)
5764     target = gen_reg_rtx (word_mode);
5765
5766   /* If this failed, we have to do this with set/compare/jump/set code.
5767      For foo != 0, if foo is in OP0, just replace it with 1 if nonzero.  */
5768   trueval = normalizep ? GEN_INT (normalizep) : const1_rtx;
5769   if (code == NE
5770       && GET_MODE_CLASS (mode) == MODE_INT
5771       && REG_P (target)
5772       && op0 == target
5773       && op1 == const0_rtx)
5774     {
5775       label = gen_label_rtx ();
5776       do_compare_rtx_and_jump (target, const0_rtx, EQ, unsignedp,
5777                                mode, NULL_RTX, NULL_RTX, label, -1);
5778       emit_move_insn (target, trueval);
5779       emit_label (label);
5780       return target;
5781     }
5782
5783   if (!REG_P (target)
5784       || reg_mentioned_p (target, op0) || reg_mentioned_p (target, op1))
5785     target = gen_reg_rtx (GET_MODE (target));
5786
5787   /* Jump in the right direction if the target cannot implement CODE
5788      but can jump on its reverse condition.  */
5789   falseval = const0_rtx;
5790   if (! can_compare_p (code, mode, ccp_jump)
5791       && (! FLOAT_MODE_P (mode)
5792           || code == ORDERED || code == UNORDERED
5793           || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
5794           || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
5795     {
5796       enum rtx_code rcode;
5797       if (FLOAT_MODE_P (mode))
5798         rcode = reverse_condition_maybe_unordered (code);
5799       else
5800         rcode = reverse_condition (code);
5801
5802       /* Canonicalize to UNORDERED for the libcall.  */
5803       if (can_compare_p (rcode, mode, ccp_jump)
5804           || (code == ORDERED && ! can_compare_p (ORDERED, mode, ccp_jump)))
5805         {
5806           falseval = trueval;
5807           trueval = const0_rtx;
5808           code = rcode;
5809         }
5810     }
5811
5812   emit_move_insn (target, trueval);
5813   label = gen_label_rtx ();
5814   do_compare_rtx_and_jump (op0, op1, code, unsignedp, mode, NULL_RTX,
5815                            NULL_RTX, label, -1);
5816
5817   emit_move_insn (target, falseval);
5818   emit_label (label);
5819
5820   return target;
5821 }
5822 \f
5823 /* Perform possibly multi-word comparison and conditional jump to LABEL
5824    if ARG1 OP ARG2 true where ARG1 and ARG2 are of mode MODE.  This is
5825    now a thin wrapper around do_compare_rtx_and_jump.  */
5826
5827 static void
5828 do_cmp_and_jump (rtx arg1, rtx arg2, enum rtx_code op, enum machine_mode mode,
5829                  rtx label)
5830 {
5831   int unsignedp = (op == LTU || op == LEU || op == GTU || op == GEU);
5832   do_compare_rtx_and_jump (arg1, arg2, op, unsignedp, mode,
5833                            NULL_RTX, NULL_RTX, label, -1);
5834 }