gcc/expmed.c

   1 /* Medium-level subroutines: convert bit-field store and extract
   2    and shifts, multiplies and divides to rtl instructions.
   3    Copyright (C) 1987, 1988, 1989, 1992, 1993, 1994, 1995, 1996, 1997, 1998,
   4    1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010,
   5    2011
   6    Free Software Foundation, Inc.
   7
   8 This file is part of GCC.
   9
  10 GCC is free software; you can redistribute it and/or modify it under
  11 the terms of the GNU General Public License as published by the Free
  12 Software Foundation; either version 3, or (at your option) any later
  13 version.
  14
  15 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  16 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  17 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  18 for more details.
  19
  20 You should have received a copy of the GNU General Public License
  21 along with GCC; see the file COPYING3.  If not see
  22 <http://www.gnu.org/licenses/>.  */
  23
  24
  25 #include "config.h"
  26 #include "system.h"
  27 #include "coretypes.h"
  28 #include "tm.h"
  29 #include "diagnostic-core.h"
  30 #include "rtl.h"
  31 #include "tree.h"
  32 #include "tm_p.h"
  33 #include "flags.h"
  34 #include "insn-config.h"
  35 #include "expr.h"
  36 #include "optabs.h"
  37 #include "recog.h"
  38 #include "langhooks.h"
  39 #include "df.h"
  40 #include "target.h"
  41 #include "expmed.h"
  42
  43 struct target_expmed default_target_expmed;
  44 #if SWITCHABLE_TARGET
  45 struct target_expmed *this_target_expmed = &default_target_expmed;
  46 #endif
  47
  48 static void store_fixed_bit_field (rtx, unsigned HOST_WIDE_INT,
  49                                    unsigned HOST_WIDE_INT,
  50                                    unsigned HOST_WIDE_INT,
  51                                    unsigned HOST_WIDE_INT,
  52                                    unsigned HOST_WIDE_INT,
  53                                    rtx);
  54 static void store_split_bit_field (rtx, unsigned HOST_WIDE_INT,
  55                                    unsigned HOST_WIDE_INT,
  56                                    unsigned HOST_WIDE_INT,
  57                                    unsigned HOST_WIDE_INT,
  58                                    rtx);
  59 static rtx extract_fixed_bit_field (enum machine_mode, rtx,
  60                                     unsigned HOST_WIDE_INT,
  61                                     unsigned HOST_WIDE_INT,
  62                                     unsigned HOST_WIDE_INT, rtx, int, bool);
  63 static rtx mask_rtx (enum machine_mode, int, int, int);
  64 static rtx lshift_value (enum machine_mode, rtx, int, int);
  65 static rtx extract_split_bit_field (rtx, unsigned HOST_WIDE_INT,
  66                                     unsigned HOST_WIDE_INT, int);
  67 static void do_cmp_and_jump (rtx, rtx, enum rtx_code, enum machine_mode, rtx);
  68 static rtx expand_smod_pow2 (enum machine_mode, rtx, HOST_WIDE_INT);
  69 static rtx expand_sdiv_pow2 (enum machine_mode, rtx, HOST_WIDE_INT);
  70
  71 /* Test whether a value is zero of a power of two.  */
  72 #define EXACT_POWER_OF_2_OR_ZERO_P(x) (((x) & ((x) - 1)) == 0)
  73
  74 #ifndef SLOW_UNALIGNED_ACCESS
  75 #define SLOW_UNALIGNED_ACCESS(MODE, ALIGN) STRICT_ALIGNMENT
  76 #endif
  77
  78
  79 /* Reduce conditional compilation elsewhere.  */
  80 #ifndef HAVE_insv
  81 #define HAVE_insv       0
  82 #define CODE_FOR_insv   CODE_FOR_nothing
  83 #define gen_insv(a,b,c,d) NULL_RTX
  84 #endif
  85 #ifndef HAVE_extv
  86 #define HAVE_extv       0
  87 #define CODE_FOR_extv   CODE_FOR_nothing
  88 #define gen_extv(a,b,c,d) NULL_RTX
  89 #endif
  90 #ifndef HAVE_extzv
  91 #define HAVE_extzv      0
  92 #define CODE_FOR_extzv  CODE_FOR_nothing
  93 #define gen_extzv(a,b,c,d) NULL_RTX
  94 #endif
  95
  96 void
  97 init_expmed (void)
  98 {
  99   struct
 100   {
 101     struct rtx_def reg;         rtunion reg_fld[2];
 102     struct rtx_def plus;        rtunion plus_fld1;
 103     struct rtx_def neg;
 104     struct rtx_def mult;        rtunion mult_fld1;
 105     struct rtx_def sdiv;        rtunion sdiv_fld1;
 106     struct rtx_def udiv;        rtunion udiv_fld1;
 107     struct rtx_def zext;
 108     struct rtx_def sdiv_32;     rtunion sdiv_32_fld1;
 109     struct rtx_def smod_32;     rtunion smod_32_fld1;
 110     struct rtx_def wide_mult;   rtunion wide_mult_fld1;
 111     struct rtx_def wide_lshr;   rtunion wide_lshr_fld1;
 112     struct rtx_def wide_trunc;
 113     struct rtx_def shift;       rtunion shift_fld1;
 114     struct rtx_def shift_mult;  rtunion shift_mult_fld1;
 115     struct rtx_def shift_add;   rtunion shift_add_fld1;
 116     struct rtx_def shift_sub0;  rtunion shift_sub0_fld1;
 117     struct rtx_def shift_sub1;  rtunion shift_sub1_fld1;
 118   } all;
 119
 120   rtx pow2[MAX_BITS_PER_WORD];
 121   rtx cint[MAX_BITS_PER_WORD];
 122   int m, n;
 123   enum machine_mode mode, wider_mode;
 124   int speed;
 125
 126
 127   for (m = 1; m < MAX_BITS_PER_WORD; m++)
 128     {
 129       pow2[m] = GEN_INT ((HOST_WIDE_INT) 1 << m);
 130       cint[m] = GEN_INT (m);
 131     }
 132   memset (&all, 0, sizeof all);
 133
 134   PUT_CODE (&all.reg, REG);
 135   /* Avoid using hard regs in ways which may be unsupported.  */
 136   SET_REGNO (&all.reg, LAST_VIRTUAL_REGISTER + 1);
 137
 138   PUT_CODE (&all.plus, PLUS);
 139   XEXP (&all.plus, 0) = &all.reg;
 140   XEXP (&all.plus, 1) = &all.reg;
 141
 142   PUT_CODE (&all.neg, NEG);
 143   XEXP (&all.neg, 0) = &all.reg;
 144
 145   PUT_CODE (&all.mult, MULT);
 146   XEXP (&all.mult, 0) = &all.reg;
 147   XEXP (&all.mult, 1) = &all.reg;
 148
 149   PUT_CODE (&all.sdiv, DIV);
 150   XEXP (&all.sdiv, 0) = &all.reg;
 151   XEXP (&all.sdiv, 1) = &all.reg;
 152
 153   PUT_CODE (&all.udiv, UDIV);
 154   XEXP (&all.udiv, 0) = &all.reg;
 155   XEXP (&all.udiv, 1) = &all.reg;
 156
 157   PUT_CODE (&all.sdiv_32, DIV);
 158   XEXP (&all.sdiv_32, 0) = &all.reg;
 159   XEXP (&all.sdiv_32, 1) = 32 < MAX_BITS_PER_WORD ? cint[32] : GEN_INT (32);
 160
 161   PUT_CODE (&all.smod_32, MOD);
 162   XEXP (&all.smod_32, 0) = &all.reg;
 163   XEXP (&all.smod_32, 1) = XEXP (&all.sdiv_32, 1);
 164
 165   PUT_CODE (&all.zext, ZERO_EXTEND);
 166   XEXP (&all.zext, 0) = &all.reg;
 167
 168   PUT_CODE (&all.wide_mult, MULT);
 169   XEXP (&all.wide_mult, 0) = &all.zext;
 170   XEXP (&all.wide_mult, 1) = &all.zext;
 171
 172   PUT_CODE (&all.wide_lshr, LSHIFTRT);
 173   XEXP (&all.wide_lshr, 0) = &all.wide_mult;
 174
 175   PUT_CODE (&all.wide_trunc, TRUNCATE);
 176   XEXP (&all.wide_trunc, 0) = &all.wide_lshr;
 177
 178   PUT_CODE (&all.shift, ASHIFT);
 179   XEXP (&all.shift, 0) = &all.reg;
 180
 181   PUT_CODE (&all.shift_mult, MULT);
 182   XEXP (&all.shift_mult, 0) = &all.reg;
 183
 184   PUT_CODE (&all.shift_add, PLUS);
 185   XEXP (&all.shift_add, 0) = &all.shift_mult;
 186   XEXP (&all.shift_add, 1) = &all.reg;
 187
 188   PUT_CODE (&all.shift_sub0, MINUS);
 189   XEXP (&all.shift_sub0, 0) = &all.shift_mult;
 190   XEXP (&all.shift_sub0, 1) = &all.reg;
 191
 192   PUT_CODE (&all.shift_sub1, MINUS);
 193   XEXP (&all.shift_sub1, 0) = &all.reg;
 194   XEXP (&all.shift_sub1, 1) = &all.shift_mult;
 195
 196   for (speed = 0; speed < 2; speed++)
 197     {
 198       crtl->maybe_hot_insn_p = speed;
 199       zero_cost[speed] = set_src_cost (const0_rtx, speed);
 200
 201       for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT);
 202            mode != VOIDmode;
 203            mode = GET_MODE_WIDER_MODE (mode))
 204         {
 205           PUT_MODE (&all.reg, mode);
 206           PUT_MODE (&all.plus, mode);
 207           PUT_MODE (&all.neg, mode);
 208           PUT_MODE (&all.mult, mode);
 209           PUT_MODE (&all.sdiv, mode);
 210           PUT_MODE (&all.udiv, mode);
 211           PUT_MODE (&all.sdiv_32, mode);
 212           PUT_MODE (&all.smod_32, mode);
 213           PUT_MODE (&all.wide_trunc, mode);
 214           PUT_MODE (&all.shift, mode);
 215           PUT_MODE (&all.shift_mult, mode);
 216           PUT_MODE (&all.shift_add, mode);
 217           PUT_MODE (&all.shift_sub0, mode);
 218           PUT_MODE (&all.shift_sub1, mode);
 219
 220           add_cost[speed][mode] = set_src_cost (&all.plus, speed);
 221           neg_cost[speed][mode] = set_src_cost (&all.neg, speed);
 222           mul_cost[speed][mode] = set_src_cost (&all.mult, speed);
 223           sdiv_cost[speed][mode] = set_src_cost (&all.sdiv, speed);
 224           udiv_cost[speed][mode] = set_src_cost (&all.udiv, speed);
 225
 226           sdiv_pow2_cheap[speed][mode] = (set_src_cost (&all.sdiv_32, speed)
 227                                           <= 2 * add_cost[speed][mode]);
 228           smod_pow2_cheap[speed][mode] = (set_src_cost (&all.smod_32, speed)
 229                                           <= 4 * add_cost[speed][mode]);
 230
 231           wider_mode = GET_MODE_WIDER_MODE (mode);
 232           if (wider_mode != VOIDmode)
 233             {
 234               PUT_MODE (&all.zext, wider_mode);
 235               PUT_MODE (&all.wide_mult, wider_mode);
 236               PUT_MODE (&all.wide_lshr, wider_mode);
 237               XEXP (&all.wide_lshr, 1) = GEN_INT (GET_MODE_BITSIZE (mode));
 238
 239               mul_widen_cost[speed][wider_mode]
 240                 = set_src_cost (&all.wide_mult, speed);
 241               mul_highpart_cost[speed][mode]
 242                 = set_src_cost (&all.wide_trunc, speed);
 243             }
 244
 245           shift_cost[speed][mode][0] = 0;
 246           shiftadd_cost[speed][mode][0] = shiftsub0_cost[speed][mode][0]
 247             = shiftsub1_cost[speed][mode][0] = add_cost[speed][mode];
 248
 249           n = MIN (MAX_BITS_PER_WORD, GET_MODE_BITSIZE (mode));
 250           for (m = 1; m < n; m++)
 251             {
 252               XEXP (&all.shift, 1) = cint[m];
 253               XEXP (&all.shift_mult, 1) = pow2[m];
 254
 255               shift_cost[speed][mode][m] = set_src_cost (&all.shift, speed);
 256               shiftadd_cost[speed][mode][m] = set_src_cost (&all.shift_add,
 257                                                             speed);
 258               shiftsub0_cost[speed][mode][m] = set_src_cost (&all.shift_sub0,
 259                                                              speed);
 260               shiftsub1_cost[speed][mode][m] = set_src_cost (&all.shift_sub1,
 261                                                              speed);
 262             }
 263         }
 264     }
 265   if (alg_hash_used_p)
 266     memset (alg_hash, 0, sizeof (alg_hash));
 267   else
 268     alg_hash_used_p = true;
 269   default_rtl_profile ();
 270 }
 271
 272 /* Return an rtx representing minus the value of X.
 273    MODE is the intended mode of the result,
 274    useful if X is a CONST_INT.  */
 275
 276 rtx
 277 negate_rtx (enum machine_mode mode, rtx x)
 278 {
 279   rtx result = simplify_unary_operation (NEG, mode, x, mode);
 280
 281   if (result == 0)
 282     result = expand_unop (mode, neg_optab, x, NULL_RTX, 0);
 283
 284   return result;
 285 }
 286
 287 /* Report on the availability of insv/extv/extzv and the desired mode
 288    of each of their operands.  Returns MAX_MACHINE_MODE if HAVE_foo
 289    is false; else the mode of the specified operand.  If OPNO is -1,
 290    all the caller cares about is whether the insn is available.  */
 291 enum machine_mode
 292 mode_for_extraction (enum extraction_pattern pattern, int opno)
 293 {
 294   const struct insn_data_d *data;
 295
 296   switch (pattern)
 297     {
 298     case EP_insv:
 299       if (HAVE_insv)
 300         {
 301           data = &insn_data[CODE_FOR_insv];
 302           break;
 303         }
 304       return MAX_MACHINE_MODE;
 305
 306     case EP_extv:
 307       if (HAVE_extv)
 308         {
 309           data = &insn_data[CODE_FOR_extv];
 310           break;
 311         }
 312       return MAX_MACHINE_MODE;
 313
 314     case EP_extzv:
 315       if (HAVE_extzv)
 316         {
 317           data = &insn_data[CODE_FOR_extzv];
 318           break;
 319         }
 320       return MAX_MACHINE_MODE;
 321
 322     default:
 323       gcc_unreachable ();
 324     }
 325
 326   if (opno == -1)
 327     return VOIDmode;
 328
 329   /* Everyone who uses this function used to follow it with
 330      if (result == VOIDmode) result = word_mode; */
 331   if (data->operand[opno].mode == VOIDmode)
 332     return word_mode;
 333   return data->operand[opno].mode;
 334 }
 335 \f
 336 /* A subroutine of store_bit_field, with the same arguments.  Return true
 337    if the operation could be implemented.
 338
 339    If FALLBACK_P is true, fall back to store_fixed_bit_field if we have
 340    no other way of implementing the operation.  If FALLBACK_P is false,
 341    return false instead.  */
 342
 343 static bool
 344 store_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
 345                    unsigned HOST_WIDE_INT bitnum,
 346                    unsigned HOST_WIDE_INT bitregion_start,
 347                    unsigned HOST_WIDE_INT bitregion_end,
 348                    enum machine_mode fieldmode,
 349                    rtx value, bool fallback_p)
 350 {
 351   unsigned int unit
 352     = (MEM_P (str_rtx)) ? BITS_PER_UNIT : BITS_PER_WORD;
 353   unsigned HOST_WIDE_INT offset, bitpos;
 354   rtx op0 = str_rtx;
 355   int byte_offset;
 356   rtx orig_value;
 357
 358   enum machine_mode op_mode = mode_for_extraction (EP_insv, 3);
 359
 360   while (GET_CODE (op0) == SUBREG)
 361     {
 362       /* The following line once was done only if WORDS_BIG_ENDIAN,
 363          but I think that is a mistake.  WORDS_BIG_ENDIAN is
 364          meaningful at a much higher level; when structures are copied
 365          between memory and regs, the higher-numbered regs
 366          always get higher addresses.  */
 367       int inner_mode_size = GET_MODE_SIZE (GET_MODE (SUBREG_REG (op0)));
 368       int outer_mode_size = GET_MODE_SIZE (GET_MODE (op0));
 369
 370       byte_offset = 0;
 371
 372       /* Paradoxical subregs need special handling on big endian machines.  */
 373       if (SUBREG_BYTE (op0) == 0 && inner_mode_size < outer_mode_size)
 374         {
 375           int difference = inner_mode_size - outer_mode_size;
 376
 377           if (WORDS_BIG_ENDIAN)
 378             byte_offset += (difference / UNITS_PER_WORD) * UNITS_PER_WORD;
 379           if (BYTES_BIG_ENDIAN)
 380             byte_offset += difference % UNITS_PER_WORD;
 381         }
 382       else
 383         byte_offset = SUBREG_BYTE (op0);
 384
 385       bitnum += byte_offset * BITS_PER_UNIT;
 386       op0 = SUBREG_REG (op0);
 387     }
 388
 389   /* No action is needed if the target is a register and if the field
 390      lies completely outside that register.  This can occur if the source
 391      code contains an out-of-bounds access to a small array.  */
 392   if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
 393     return true;
 394
 395   /* Use vec_set patterns for inserting parts of vectors whenever
 396      available.  */
 397   if (VECTOR_MODE_P (GET_MODE (op0))
 398       && !MEM_P (op0)
 399       && optab_handler (vec_set_optab, GET_MODE (op0)) != CODE_FOR_nothing
 400       && fieldmode == GET_MODE_INNER (GET_MODE (op0))
 401       && bitsize == GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
 402       && !(bitnum % GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
 403     {
 404       struct expand_operand ops[3];
 405       enum machine_mode outermode = GET_MODE (op0);
 406       enum machine_mode innermode = GET_MODE_INNER (outermode);
 407       enum insn_code icode = optab_handler (vec_set_optab, outermode);
 408       int pos = bitnum / GET_MODE_BITSIZE (innermode);
 409
 410       create_fixed_operand (&ops[0], op0);
 411       create_input_operand (&ops[1], value, innermode);
 412       create_integer_operand (&ops[2], pos);
 413       if (maybe_expand_insn (icode, 3, ops))
 414         return true;
 415     }
 416
 417   /* If the target is a register, overwriting the entire object, or storing
 418      a full-word or multi-word field can be done with just a SUBREG.
 419
 420      If the target is memory, storing any naturally aligned field can be
 421      done with a simple store.  For targets that support fast unaligned
 422      memory, any naturally sized, unit aligned field can be done directly.  */
 423
 424   offset = bitnum / unit;
 425   bitpos = bitnum % unit;
 426   byte_offset = (bitnum % BITS_PER_WORD) / BITS_PER_UNIT
 427                 + (offset * UNITS_PER_WORD);
 428
 429   if (bitpos == 0
 430       && bitsize == GET_MODE_BITSIZE (fieldmode)
 431       && (!MEM_P (op0)
 432           ? ((GET_MODE_SIZE (fieldmode) >= UNITS_PER_WORD
 433               || GET_MODE_SIZE (GET_MODE (op0)) == GET_MODE_SIZE (fieldmode))
 434              && ((GET_MODE (op0) == fieldmode && byte_offset == 0)
 435                  || validate_subreg (fieldmode, GET_MODE (op0), op0,
 436                                      byte_offset)))
 437           : (! SLOW_UNALIGNED_ACCESS (fieldmode, MEM_ALIGN (op0))
 438              || (offset * BITS_PER_UNIT % bitsize == 0
 439                  && MEM_ALIGN (op0) % GET_MODE_BITSIZE (fieldmode) == 0))))
 440     {
 441       if (MEM_P (op0))
 442         op0 = adjust_address (op0, fieldmode, offset);
 443       else if (GET_MODE (op0) != fieldmode)
 444         op0 = simplify_gen_subreg (fieldmode, op0, GET_MODE (op0),
 445                                    byte_offset);
 446       emit_move_insn (op0, value);
 447       return true;
 448     }
 449
 450   /* Make sure we are playing with integral modes.  Pun with subregs
 451      if we aren't.  This must come after the entire register case above,
 452      since that case is valid for any mode.  The following cases are only
 453      valid for integral modes.  */
 454   {
 455     enum machine_mode imode = int_mode_for_mode (GET_MODE (op0));
 456     if (imode != GET_MODE (op0))
 457       {
 458         if (MEM_P (op0))
 459           op0 = adjust_address (op0, imode, 0);
 460         else
 461           {
 462             gcc_assert (imode != BLKmode);
 463             op0 = gen_lowpart (imode, op0);
 464           }
 465       }
 466   }
 467
 468   /* We may be accessing data outside the field, which means
 469      we can alias adjacent data.  */
 470   /* ?? not always for C++0x memory model ?? */
 471   if (MEM_P (op0))
 472     {
 473       op0 = shallow_copy_rtx (op0);
 474       set_mem_alias_set (op0, 0);
 475       set_mem_expr (op0, 0);
 476     }
 477
 478   /* If OP0 is a register, BITPOS must count within a word.
 479      But as we have it, it counts within whatever size OP0 now has.
 480      On a bigendian machine, these are not the same, so convert.  */
 481   if (BYTES_BIG_ENDIAN
 482       && !MEM_P (op0)
 483       && unit > GET_MODE_BITSIZE (GET_MODE (op0)))
 484     bitpos += unit - GET_MODE_BITSIZE (GET_MODE (op0));
 485
 486   /* Storing an lsb-aligned field in a register
 487      can be done with a movestrict instruction.  */
 488
 489   if (!MEM_P (op0)
 490       && (BYTES_BIG_ENDIAN ? bitpos + bitsize == unit : bitpos == 0)
 491       && bitsize == GET_MODE_BITSIZE (fieldmode)
 492       && optab_handler (movstrict_optab, fieldmode) != CODE_FOR_nothing)
 493     {
 494       struct expand_operand ops[2];
 495       enum insn_code icode = optab_handler (movstrict_optab, fieldmode);
 496       rtx arg0 = op0;
 497       unsigned HOST_WIDE_INT subreg_off;
 498
 499       if (GET_CODE (arg0) == SUBREG)
 500         {
 501           /* Else we've got some float mode source being extracted into
 502              a different float mode destination -- this combination of
 503              subregs results in Severe Tire Damage.  */
 504           gcc_assert (GET_MODE (SUBREG_REG (arg0)) == fieldmode
 505                       || GET_MODE_CLASS (fieldmode) == MODE_INT
 506                       || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT);
 507           arg0 = SUBREG_REG (arg0);
 508         }
 509
 510       subreg_off = (bitnum % BITS_PER_WORD) / BITS_PER_UNIT
 511                    + (offset * UNITS_PER_WORD);
 512       if (validate_subreg (fieldmode, GET_MODE (arg0), arg0, subreg_off))
 513         {
 514           arg0 = gen_rtx_SUBREG (fieldmode, arg0, subreg_off);
 515
 516           create_fixed_operand (&ops[0], arg0);
 517           /* Shrink the source operand to FIELDMODE.  */
 518           create_convert_operand_to (&ops[1], value, fieldmode, false);
 519           if (maybe_expand_insn (icode, 2, ops))
 520             return true;
 521         }
 522     }
 523
 524   /* Handle fields bigger than a word.  */
 525
 526   if (bitsize > BITS_PER_WORD)
 527     {
 528       /* Here we transfer the words of the field
 529          in the order least significant first.
 530          This is because the most significant word is the one which may
 531          be less than full.
 532          However, only do that if the value is not BLKmode.  */
 533
 534       unsigned int backwards = WORDS_BIG_ENDIAN && fieldmode != BLKmode;
 535       unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
 536       unsigned int i;
 537       rtx last;
 538
 539       /* This is the mode we must force value to, so that there will be enough
 540          subwords to extract.  Note that fieldmode will often (always?) be
 541          VOIDmode, because that is what store_field uses to indicate that this
 542          is a bit field, but passing VOIDmode to operand_subword_force
 543          is not allowed.  */
 544       fieldmode = GET_MODE (value);
 545       if (fieldmode == VOIDmode)
 546         fieldmode = smallest_mode_for_size (nwords * BITS_PER_WORD, MODE_INT);
 547
 548       last = get_last_insn ();
 549       for (i = 0; i < nwords; i++)
 550         {
 551           /* If I is 0, use the low-order word in both field and target;
 552              if I is 1, use the next to lowest word; and so on.  */
 553           unsigned int wordnum = (backwards ? nwords - i - 1 : i);
 554           unsigned int bit_offset = (backwards
 555                                      ? MAX ((int) bitsize - ((int) i + 1)
 556                                             * BITS_PER_WORD,
 557                                             0)
 558                                      : (int) i * BITS_PER_WORD);
 559           rtx value_word = operand_subword_force (value, wordnum, fieldmode);
 560           unsigned HOST_WIDE_INT new_bitsize =
 561             MIN (BITS_PER_WORD, bitsize - i * BITS_PER_WORD);
 562
 563           /* If the remaining chunk doesn't have full wordsize we have
 564              to make sure that for big endian machines the higher order
 565              bits are used.  */
 566           if (new_bitsize < BITS_PER_WORD && BYTES_BIG_ENDIAN && !backwards)
 567             value_word = simplify_expand_binop (word_mode, lshr_optab,
 568                                                 value_word,
 569                                                 GEN_INT (BITS_PER_WORD
 570                                                          - new_bitsize),
 571                                                 NULL_RTX, true,
 572                                                 OPTAB_LIB_WIDEN);
 573
 574           if (!store_bit_field_1 (op0, new_bitsize,
 575                                   bitnum + bit_offset,
 576                                   bitregion_start, bitregion_end,
 577                                   word_mode,
 578                                   value_word, fallback_p))
 579             {
 580               delete_insns_since (last);
 581               return false;
 582             }
 583         }
 584       return true;
 585     }
 586
 587   /* From here on we can assume that the field to be stored in is
 588      a full-word (whatever type that is), since it is shorter than a word.  */
 589
 590   /* OFFSET is the number of words or bytes (UNIT says which)
 591      from STR_RTX to the first word or byte containing part of the field.  */
 592
 593   if (!MEM_P (op0))
 594     {
 595       if (offset != 0
 596           || GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
 597         {
 598           if (!REG_P (op0))
 599             {
 600               /* Since this is a destination (lvalue), we can't copy
 601                  it to a pseudo.  We can remove a SUBREG that does not
 602                  change the size of the operand.  Such a SUBREG may
 603                  have been added above.  */
 604               gcc_assert (GET_CODE (op0) == SUBREG
 605                           && (GET_MODE_SIZE (GET_MODE (op0))
 606                               == GET_MODE_SIZE (GET_MODE (SUBREG_REG (op0)))));
 607               op0 = SUBREG_REG (op0);
 608             }
 609           op0 = gen_rtx_SUBREG (mode_for_size (BITS_PER_WORD, MODE_INT, 0),
 610                                 op0, (offset * UNITS_PER_WORD));
 611         }
 612       offset = 0;
 613     }
 614
 615   /* If VALUE has a floating-point or complex mode, access it as an
 616      integer of the corresponding size.  This can occur on a machine
 617      with 64 bit registers that uses SFmode for float.  It can also
 618      occur for unaligned float or complex fields.  */
 619   orig_value = value;
 620   if (GET_MODE (value) != VOIDmode
 621       && GET_MODE_CLASS (GET_MODE (value)) != MODE_INT
 622       && GET_MODE_CLASS (GET_MODE (value)) != MODE_PARTIAL_INT)
 623     {
 624       value = gen_reg_rtx (int_mode_for_mode (GET_MODE (value)));
 625       emit_move_insn (gen_lowpart (GET_MODE (orig_value), value), orig_value);
 626     }
 627
 628   /* Now OFFSET is nonzero only if OP0 is memory
 629      and is therefore always measured in bytes.  */
 630
 631   if (HAVE_insv
 632       && GET_MODE (value) != BLKmode
 633       && bitsize > 0
 634       && GET_MODE_BITSIZE (op_mode) >= bitsize
 635       /* Do not use insv for volatile bitfields when
 636          -fstrict-volatile-bitfields is in effect.  */
 637       && !(MEM_P (op0) && MEM_VOLATILE_P (op0)
 638            && flag_strict_volatile_bitfields > 0)
 639       && ! ((REG_P (op0) || GET_CODE (op0) == SUBREG)
 640             && (bitsize + bitpos > GET_MODE_BITSIZE (op_mode))))
 641     {
 642       struct expand_operand ops[4];
 643       int xbitpos = bitpos;
 644       rtx value1;
 645       rtx xop0 = op0;
 646       rtx last = get_last_insn ();
 647       bool copy_back = false;
 648
 649       /* Add OFFSET into OP0's address.  */
 650       if (MEM_P (xop0))
 651         xop0 = adjust_address (xop0, byte_mode, offset);
 652
 653       /* If xop0 is a register, we need it in OP_MODE
 654          to make it acceptable to the format of insv.  */
 655       if (GET_CODE (xop0) == SUBREG)
 656         /* We can't just change the mode, because this might clobber op0,
 657            and we will need the original value of op0 if insv fails.  */
 658         xop0 = gen_rtx_SUBREG (op_mode, SUBREG_REG (xop0), SUBREG_BYTE (xop0));
 659       if (REG_P (xop0) && GET_MODE (xop0) != op_mode)
 660         xop0 = gen_lowpart_SUBREG (op_mode, xop0);
 661
 662       /* If the destination is a paradoxical subreg such that we need a
 663          truncate to the inner mode, perform the insertion on a temporary and
 664          truncate the result to the original destination.  Note that we can't
 665          just truncate the paradoxical subreg as (truncate:N (subreg:W (reg:N
 666          X) 0)) is (reg:N X).  */
 667       if (GET_CODE (xop0) == SUBREG
 668           && REG_P (SUBREG_REG (xop0))
 669           && (!TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (SUBREG_REG (xop0)),
 670                                               op_mode)))
 671         {
 672           rtx tem = gen_reg_rtx (op_mode);
 673           emit_move_insn (tem, xop0);
 674           xop0 = tem;
 675           copy_back = true;
 676         }
 677
 678       /* We have been counting XBITPOS within UNIT.
 679          Count instead within the size of the register.  */
 680       if (BYTES_BIG_ENDIAN && !MEM_P (xop0))
 681         xbitpos += GET_MODE_BITSIZE (op_mode) - unit;
 682
 683       unit = GET_MODE_BITSIZE (op_mode);
 684
 685       /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
 686          "backwards" from the size of the unit we are inserting into.
 687          Otherwise, we count bits from the most significant on a
 688          BYTES/BITS_BIG_ENDIAN machine.  */
 689
 690       if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
 691         xbitpos = unit - bitsize - xbitpos;
 692
 693       /* Convert VALUE to op_mode (which insv insn wants) in VALUE1.  */
 694       value1 = value;
 695       if (GET_MODE (value) != op_mode)
 696         {
 697           if (GET_MODE_BITSIZE (GET_MODE (value)) >= bitsize)
 698             {
 699               /* Optimization: Don't bother really extending VALUE
 700                  if it has all the bits we will actually use.  However,
 701                  if we must narrow it, be sure we do it correctly.  */
 702
 703               if (GET_MODE_SIZE (GET_MODE (value)) < GET_MODE_SIZE (op_mode))
 704                 {
 705                   rtx tmp;
 706
 707                   tmp = simplify_subreg (op_mode, value1, GET_MODE (value), 0);
 708                   if (! tmp)
 709                     tmp = simplify_gen_subreg (op_mode,
 710                                                force_reg (GET_MODE (value),
 711                                                           value1),
 712                                                GET_MODE (value), 0);
 713                   value1 = tmp;
 714                 }
 715               else
 716                 value1 = gen_lowpart (op_mode, value1);
 717             }
 718           else if (CONST_INT_P (value))
 719             value1 = gen_int_mode (INTVAL (value), op_mode);
 720           else
 721             /* Parse phase is supposed to make VALUE's data type
 722                match that of the component reference, which is a type
 723                at least as wide as the field; so VALUE should have
 724                a mode that corresponds to that type.  */
 725             gcc_assert (CONSTANT_P (value));
 726         }
 727
 728       create_fixed_operand (&ops[0], xop0);
 729       create_integer_operand (&ops[1], bitsize);
 730       create_integer_operand (&ops[2], xbitpos);
 731       create_input_operand (&ops[3], value1, op_mode);
 732       if (maybe_expand_insn (CODE_FOR_insv, 4, ops))
 733         {
 734           if (copy_back)
 735             convert_move (op0, xop0, true);
 736           return true;
 737         }
 738       delete_insns_since (last);
 739     }
 740
 741   /* If OP0 is a memory, try copying it to a register and seeing if a
 742      cheap register alternative is available.  */
 743   if (HAVE_insv && MEM_P (op0))
 744     {
 745       enum machine_mode bestmode;
 746       unsigned HOST_WIDE_INT maxbits = MAX_FIXED_MODE_SIZE;
 747
 748       if (bitregion_end)
 749         maxbits = bitregion_end - bitregion_start + 1;
 750
 751       /* Get the mode to use for inserting into this field.  If OP0 is
 752          BLKmode, get the smallest mode consistent with the alignment. If
 753          OP0 is a non-BLKmode object that is no wider than OP_MODE, use its
 754          mode. Otherwise, use the smallest mode containing the field.  */
 755
 756       if (GET_MODE (op0) == BLKmode
 757           || GET_MODE_BITSIZE (GET_MODE (op0)) > maxbits
 758           || (op_mode != MAX_MACHINE_MODE
 759               && GET_MODE_SIZE (GET_MODE (op0)) > GET_MODE_SIZE (op_mode)))
 760         bestmode = get_best_mode  (bitsize, bitnum,
 761                                   bitregion_start, bitregion_end,
 762                                   MEM_ALIGN (op0),
 763                                   (op_mode == MAX_MACHINE_MODE
 764                                    ? VOIDmode : op_mode),
 765                                   MEM_VOLATILE_P (op0));
 766       else
 767         bestmode = GET_MODE (op0);
 768
 769       if (bestmode != VOIDmode
 770           && GET_MODE_SIZE (bestmode) >= GET_MODE_SIZE (fieldmode)
 771           && !(SLOW_UNALIGNED_ACCESS (bestmode, MEM_ALIGN (op0))
 772                && GET_MODE_BITSIZE (bestmode) > MEM_ALIGN (op0)))
 773         {
 774           rtx last, tempreg, xop0;
 775           unsigned HOST_WIDE_INT xoffset, xbitpos;
 776
 777           last = get_last_insn ();
 778
 779           /* Adjust address to point to the containing unit of
 780              that mode.  Compute the offset as a multiple of this unit,
 781              counting in bytes.  */
 782           unit = GET_MODE_BITSIZE (bestmode);
 783           xoffset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
 784           xbitpos = bitnum % unit;
 785           xop0 = adjust_address (op0, bestmode, xoffset);
 786
 787           /* Fetch that unit, store the bitfield in it, then store
 788              the unit.  */
 789           tempreg = copy_to_reg (xop0);
 790           if (store_bit_field_1 (tempreg, bitsize, xbitpos,
 791                                  bitregion_start, bitregion_end,
 792                                  fieldmode, orig_value, false))
 793             {
 794               emit_move_insn (xop0, tempreg);
 795               return true;
 796             }
 797           delete_insns_since (last);
 798         }
 799     }
 800
 801   if (!fallback_p)
 802     return false;
 803
 804   store_fixed_bit_field (op0, offset, bitsize, bitpos,
 805                          bitregion_start, bitregion_end, value);
 806   return true;
 807 }
 808
 809 /* Generate code to store value from rtx VALUE
 810    into a bit-field within structure STR_RTX
 811    containing BITSIZE bits starting at bit BITNUM.
 812
 813    BITREGION_START is bitpos of the first bitfield in this region.
 814    BITREGION_END is the bitpos of the ending bitfield in this region.
 815    These two fields are 0, if the C++ memory model does not apply,
 816    or we are not interested in keeping track of bitfield regions.
 817
 818    FIELDMODE is the machine-mode of the FIELD_DECL node for this field.  */
 819
 820 void
 821 store_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
 822                  unsigned HOST_WIDE_INT bitnum,
 823                  unsigned HOST_WIDE_INT bitregion_start,
 824                  unsigned HOST_WIDE_INT bitregion_end,
 825                  enum machine_mode fieldmode,
 826                  rtx value)
 827 {
 828   /* Under the C++0x memory model, we must not touch bits outside the
 829      bit region.  Adjust the address to start at the beginning of the
 830      bit region.  */
 831   if (MEM_P (str_rtx) && bitregion_start > 0)
 832     {
 833       enum machine_mode bestmode;
 834       enum machine_mode op_mode;
 835       unsigned HOST_WIDE_INT offset;
 836
 837       op_mode = mode_for_extraction (EP_insv, 3);
 838       if (op_mode == MAX_MACHINE_MODE)
 839         op_mode = VOIDmode;
 840
 841       gcc_assert ((bitregion_start % BITS_PER_UNIT) == 0);
 842
 843       offset = bitregion_start / BITS_PER_UNIT;
 844       bitnum -= bitregion_start;
 845       bitregion_end -= bitregion_start;
 846       bitregion_start = 0;
 847       bestmode = get_best_mode (bitsize, bitnum,
 848                                 bitregion_start, bitregion_end,
 849                                 MEM_ALIGN (str_rtx),
 850                                 op_mode,
 851                                 MEM_VOLATILE_P (str_rtx));
 852       str_rtx = adjust_address (str_rtx, bestmode, offset);
 853     }
 854
 855   if (!store_bit_field_1 (str_rtx, bitsize, bitnum,
 856                           bitregion_start, bitregion_end,
 857                           fieldmode, value, true))
 858     gcc_unreachable ();
 859 }
 860 \f
 861 /* Use shifts and boolean operations to store VALUE
 862    into a bit field of width BITSIZE
 863    in a memory location specified by OP0 except offset by OFFSET bytes.
 864      (OFFSET must be 0 if OP0 is a register.)
 865    The field starts at position BITPOS within the byte.
 866     (If OP0 is a register, it may be a full word or a narrower mode,
 867      but BITPOS still counts within a full word,
 868      which is significant on bigendian machines.)  */
 869
 870 static void
 871 store_fixed_bit_field (rtx op0, unsigned HOST_WIDE_INT offset,
 872                        unsigned HOST_WIDE_INT bitsize,
 873                        unsigned HOST_WIDE_INT bitpos,
 874                        unsigned HOST_WIDE_INT bitregion_start,
 875                        unsigned HOST_WIDE_INT bitregion_end,
 876                        rtx value)
 877 {
 878   enum machine_mode mode;
 879   unsigned int total_bits = BITS_PER_WORD;
 880   rtx temp;
 881   int all_zero = 0;
 882   int all_one = 0;
 883
 884   /* There is a case not handled here:
 885      a structure with a known alignment of just a halfword
 886      and a field split across two aligned halfwords within the structure.
 887      Or likewise a structure with a known alignment of just a byte
 888      and a field split across two bytes.
 889      Such cases are not supposed to be able to occur.  */
 890
 891   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
 892     {
 893       gcc_assert (!offset);
 894       /* Special treatment for a bit field split across two registers.  */
 895       if (bitsize + bitpos > BITS_PER_WORD)
 896         {
 897           store_split_bit_field (op0, bitsize, bitpos,
 898                                  bitregion_start, bitregion_end,
 899                                  value);
 900           return;
 901         }
 902     }
 903   else
 904     {
 905       unsigned HOST_WIDE_INT maxbits = MAX_FIXED_MODE_SIZE;
 906
 907       if (bitregion_end)
 908         maxbits = bitregion_end - bitregion_start + 1;
 909
 910       /* Get the proper mode to use for this field.  We want a mode that
 911          includes the entire field.  If such a mode would be larger than
 912          a word, we won't be doing the extraction the normal way.
 913          We don't want a mode bigger than the destination.  */
 914
 915       mode = GET_MODE (op0);
 916       if (GET_MODE_BITSIZE (mode) == 0
 917           || GET_MODE_BITSIZE (mode) > GET_MODE_BITSIZE (word_mode))
 918         mode = word_mode;
 919
 920       if (MEM_VOLATILE_P (op0)
 921           && GET_MODE_BITSIZE (GET_MODE (op0)) > 0
 922           && GET_MODE_BITSIZE (GET_MODE (op0)) <= maxbits
 923           && flag_strict_volatile_bitfields > 0)
 924         mode = GET_MODE (op0);
 925       else
 926         mode = get_best_mode (bitsize, bitpos + offset * BITS_PER_UNIT,
 927                               bitregion_start, bitregion_end,
 928                               MEM_ALIGN (op0), mode, MEM_VOLATILE_P (op0));
 929
 930       if (mode == VOIDmode)
 931         {
 932           /* The only way this should occur is if the field spans word
 933              boundaries.  */
 934           store_split_bit_field (op0, bitsize, bitpos + offset * BITS_PER_UNIT,
 935                                  bitregion_start, bitregion_end, value);
 936           return;
 937         }
 938
 939       total_bits = GET_MODE_BITSIZE (mode);
 940
 941       /* Make sure bitpos is valid for the chosen mode.  Adjust BITPOS to
 942          be in the range 0 to total_bits-1, and put any excess bytes in
 943          OFFSET.  */
 944       if (bitpos >= total_bits)
 945         {
 946           offset += (bitpos / total_bits) * (total_bits / BITS_PER_UNIT);
 947           bitpos -= ((bitpos / total_bits) * (total_bits / BITS_PER_UNIT)
 948                      * BITS_PER_UNIT);
 949         }
 950
 951       /* Get ref to an aligned byte, halfword, or word containing the field.
 952          Adjust BITPOS to be position within a word,
 953          and OFFSET to be the offset of that word.
 954          Then alter OP0 to refer to that word.  */
 955       bitpos += (offset % (total_bits / BITS_PER_UNIT)) * BITS_PER_UNIT;
 956       offset -= (offset % (total_bits / BITS_PER_UNIT));
 957       op0 = adjust_address (op0, mode, offset);
 958     }
 959
 960   mode = GET_MODE (op0);
 961
 962   /* Now MODE is either some integral mode for a MEM as OP0,
 963      or is a full-word for a REG as OP0.  TOTAL_BITS corresponds.
 964      The bit field is contained entirely within OP0.
 965      BITPOS is the starting bit number within OP0.
 966      (OP0's mode may actually be narrower than MODE.)  */
 967
 968   if (BYTES_BIG_ENDIAN)
 969       /* BITPOS is the distance between our msb
 970          and that of the containing datum.
 971          Convert it to the distance from the lsb.  */
 972       bitpos = total_bits - bitsize - bitpos;
 973
 974   /* Now BITPOS is always the distance between our lsb
 975      and that of OP0.  */
 976
 977   /* Shift VALUE left by BITPOS bits.  If VALUE is not constant,
 978      we must first convert its mode to MODE.  */
 979
 980   if (CONST_INT_P (value))
 981     {
 982       HOST_WIDE_INT v = INTVAL (value);
 983
 984       if (bitsize < HOST_BITS_PER_WIDE_INT)
 985         v &= ((HOST_WIDE_INT) 1 << bitsize) - 1;
 986
 987       if (v == 0)
 988         all_zero = 1;
 989       else if ((bitsize < HOST_BITS_PER_WIDE_INT
 990                 && v == ((HOST_WIDE_INT) 1 << bitsize) - 1)
 991                || (bitsize == HOST_BITS_PER_WIDE_INT && v == -1))
 992         all_one = 1;
 993
 994       value = lshift_value (mode, value, bitpos, bitsize);
 995     }
 996   else
 997     {
 998       int must_and = (GET_MODE_BITSIZE (GET_MODE (value)) != bitsize
 999                       && bitpos + bitsize != GET_MODE_BITSIZE (mode));
1000
1001       if (GET_MODE (value) != mode)
1002         value = convert_to_mode (mode, value, 1);
1003
1004       if (must_and)
1005         value = expand_binop (mode, and_optab, value,
1006                               mask_rtx (mode, 0, bitsize, 0),
1007                               NULL_RTX, 1, OPTAB_LIB_WIDEN);
1008       if (bitpos > 0)
1009         value = expand_shift (LSHIFT_EXPR, mode, value,
1010                               bitpos, NULL_RTX, 1);
1011     }
1012
1013   /* Now clear the chosen bits in OP0,
1014      except that if VALUE is -1 we need not bother.  */
1015   /* We keep the intermediates in registers to allow CSE to combine
1016      consecutive bitfield assignments.  */
1017
1018   temp = force_reg (mode, op0);
1019
1020   if (! all_one)
1021     {
1022       temp = expand_binop (mode, and_optab, temp,
1023                            mask_rtx (mode, bitpos, bitsize, 1),
1024                            NULL_RTX, 1, OPTAB_LIB_WIDEN);
1025       temp = force_reg (mode, temp);
1026     }
1027
1028   /* Now logical-or VALUE into OP0, unless it is zero.  */
1029
1030   if (! all_zero)
1031     {
1032       temp = expand_binop (mode, ior_optab, temp, value,
1033                            NULL_RTX, 1, OPTAB_LIB_WIDEN);
1034       temp = force_reg (mode, temp);
1035     }
1036
1037   if (op0 != temp)
1038     {
1039       op0 = copy_rtx (op0);
1040       emit_move_insn (op0, temp);
1041     }
1042 }
1043 \f
1044 /* Store a bit field that is split across multiple accessible memory objects.
1045
1046    OP0 is the REG, SUBREG or MEM rtx for the first of the objects.
1047    BITSIZE is the field width; BITPOS the position of its first bit
1048    (within the word).
1049    VALUE is the value to store.
1050
1051    This does not yet handle fields wider than BITS_PER_WORD.  */
1052
1053 static void
1054 store_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1055                        unsigned HOST_WIDE_INT bitpos,
1056                        unsigned HOST_WIDE_INT bitregion_start,
1057                        unsigned HOST_WIDE_INT bitregion_end,
1058                        rtx value)
1059 {
1060   unsigned int unit;
1061   unsigned int bitsdone = 0;
1062
1063   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1064      much at a time.  */
1065   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
1066     unit = BITS_PER_WORD;
1067   else
1068     unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1069
1070   /* If VALUE is a constant other than a CONST_INT, get it into a register in
1071      WORD_MODE.  If we can do this using gen_lowpart_common, do so.  Note
1072      that VALUE might be a floating-point constant.  */
1073   if (CONSTANT_P (value) && !CONST_INT_P (value))
1074     {
1075       rtx word = gen_lowpart_common (word_mode, value);
1076
1077       if (word && (value != word))
1078         value = word;
1079       else
1080         value = gen_lowpart_common (word_mode,
1081                                     force_reg (GET_MODE (value) != VOIDmode
1082                                                ? GET_MODE (value)
1083                                                : word_mode, value));
1084     }
1085
1086   while (bitsdone < bitsize)
1087     {
1088       unsigned HOST_WIDE_INT thissize;
1089       rtx part, word;
1090       unsigned HOST_WIDE_INT thispos;
1091       unsigned HOST_WIDE_INT offset;
1092
1093       offset = (bitpos + bitsdone) / unit;
1094       thispos = (bitpos + bitsdone) % unit;
1095
1096       /* THISSIZE must not overrun a word boundary.  Otherwise,
1097          store_fixed_bit_field will call us again, and we will mutually
1098          recurse forever.  */
1099       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1100       thissize = MIN (thissize, unit - thispos);
1101
1102       if (BYTES_BIG_ENDIAN)
1103         {
1104           int total_bits;
1105
1106           /* We must do an endian conversion exactly the same way as it is
1107              done in extract_bit_field, so that the two calls to
1108              extract_fixed_bit_field will have comparable arguments.  */
1109           if (!MEM_P (value) || GET_MODE (value) == BLKmode)
1110             total_bits = BITS_PER_WORD;
1111           else
1112             total_bits = GET_MODE_BITSIZE (GET_MODE (value));
1113
1114           /* Fetch successively less significant portions.  */
1115           if (CONST_INT_P (value))
1116             part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1117                              >> (bitsize - bitsdone - thissize))
1118                             & (((HOST_WIDE_INT) 1 << thissize) - 1));
1119           else
1120             /* The args are chosen so that the last part includes the
1121                lsb.  Give extract_bit_field the value it needs (with
1122                endianness compensation) to fetch the piece we want.  */
1123             part = extract_fixed_bit_field (word_mode, value, 0, thissize,
1124                                             total_bits - bitsize + bitsdone,
1125                                             NULL_RTX, 1, false);
1126         }
1127       else
1128         {
1129           /* Fetch successively more significant portions.  */
1130           if (CONST_INT_P (value))
1131             part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1132                              >> bitsdone)
1133                             & (((HOST_WIDE_INT) 1 << thissize) - 1));
1134           else
1135             part = extract_fixed_bit_field (word_mode, value, 0, thissize,
1136                                             bitsdone, NULL_RTX, 1, false);
1137         }
1138
1139       /* If OP0 is a register, then handle OFFSET here.
1140
1141          When handling multiword bitfields, extract_bit_field may pass
1142          down a word_mode SUBREG of a larger REG for a bitfield that actually
1143          crosses a word boundary.  Thus, for a SUBREG, we must find
1144          the current word starting from the base register.  */
1145       if (GET_CODE (op0) == SUBREG)
1146         {
1147           int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset;
1148           enum machine_mode sub_mode = GET_MODE (SUBREG_REG (op0));
1149           if (sub_mode != BLKmode && GET_MODE_SIZE (sub_mode) < UNITS_PER_WORD)
1150             word = word_offset ? const0_rtx : op0;
1151           else
1152             word = operand_subword_force (SUBREG_REG (op0), word_offset,
1153                                           GET_MODE (SUBREG_REG (op0)));
1154           offset = 0;
1155         }
1156       else if (REG_P (op0))
1157         {
1158           enum machine_mode op0_mode = GET_MODE (op0);
1159           if (op0_mode != BLKmode && GET_MODE_SIZE (op0_mode) < UNITS_PER_WORD)
1160             word = offset ? const0_rtx : op0;
1161           else
1162             word = operand_subword_force (op0, offset, GET_MODE (op0));
1163           offset = 0;
1164         }
1165       else
1166         word = op0;
1167
1168       /* OFFSET is in UNITs, and UNIT is in bits.
1169          store_fixed_bit_field wants offset in bytes.  If WORD is const0_rtx,
1170          it is just an out-of-bounds access.  Ignore it.  */
1171       if (word != const0_rtx)
1172         store_fixed_bit_field (word, offset * unit / BITS_PER_UNIT, thissize,
1173                                thispos, bitregion_start, bitregion_end, part);
1174       bitsdone += thissize;
1175     }
1176 }
1177 \f
1178 /* A subroutine of extract_bit_field_1 that converts return value X
1179    to either MODE or TMODE.  MODE, TMODE and UNSIGNEDP are arguments
1180    to extract_bit_field.  */
1181
1182 static rtx
1183 convert_extracted_bit_field (rtx x, enum machine_mode mode,
1184                              enum machine_mode tmode, bool unsignedp)
1185 {
1186   if (GET_MODE (x) == tmode || GET_MODE (x) == mode)
1187     return x;
1188
1189   /* If the x mode is not a scalar integral, first convert to the
1190      integer mode of that size and then access it as a floating-point
1191      value via a SUBREG.  */
1192   if (!SCALAR_INT_MODE_P (tmode))
1193     {
1194       enum machine_mode smode;
1195
1196       smode = mode_for_size (GET_MODE_BITSIZE (tmode), MODE_INT, 0);
1197       x = convert_to_mode (smode, x, unsignedp);
1198       x = force_reg (smode, x);
1199       return gen_lowpart (tmode, x);
1200     }
1201
1202   return convert_to_mode (tmode, x, unsignedp);
1203 }
1204
1205 /* A subroutine of extract_bit_field, with the same arguments.
1206    If FALLBACK_P is true, fall back to extract_fixed_bit_field
1207    if we can find no other means of implementing the operation.
1208    if FALLBACK_P is false, return NULL instead.  */
1209
1210 static rtx
1211 extract_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1212                      unsigned HOST_WIDE_INT bitnum,
1213                      int unsignedp, bool packedp, rtx target,
1214                      enum machine_mode mode, enum machine_mode tmode,
1215                      bool fallback_p)
1216 {
1217   unsigned int unit
1218     = (MEM_P (str_rtx)) ? BITS_PER_UNIT : BITS_PER_WORD;
1219   unsigned HOST_WIDE_INT offset, bitpos;
1220   rtx op0 = str_rtx;
1221   enum machine_mode int_mode;
1222   enum machine_mode ext_mode;
1223   enum machine_mode mode1;
1224   int byte_offset;
1225
1226   if (tmode == VOIDmode)
1227     tmode = mode;
1228
1229   while (GET_CODE (op0) == SUBREG)
1230     {
1231       bitnum += SUBREG_BYTE (op0) * BITS_PER_UNIT;
1232       op0 = SUBREG_REG (op0);
1233     }
1234
1235   /* If we have an out-of-bounds access to a register, just return an
1236      uninitialized register of the required mode.  This can occur if the
1237      source code contains an out-of-bounds access to a small array.  */
1238   if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
1239     return gen_reg_rtx (tmode);
1240
1241   if (REG_P (op0)
1242       && mode == GET_MODE (op0)
1243       && bitnum == 0
1244       && bitsize == GET_MODE_BITSIZE (GET_MODE (op0)))
1245     {
1246       /* We're trying to extract a full register from itself.  */
1247       return op0;
1248     }
1249
1250   /* See if we can get a better vector mode before extracting.  */
1251   if (VECTOR_MODE_P (GET_MODE (op0))
1252       && !MEM_P (op0)
1253       && GET_MODE_INNER (GET_MODE (op0)) != tmode)
1254     {
1255       enum machine_mode new_mode;
1256
1257       if (GET_MODE_CLASS (tmode) == MODE_FLOAT)
1258         new_mode = MIN_MODE_VECTOR_FLOAT;
1259       else if (GET_MODE_CLASS (tmode) == MODE_FRACT)
1260         new_mode = MIN_MODE_VECTOR_FRACT;
1261       else if (GET_MODE_CLASS (tmode) == MODE_UFRACT)
1262         new_mode = MIN_MODE_VECTOR_UFRACT;
1263       else if (GET_MODE_CLASS (tmode) == MODE_ACCUM)
1264         new_mode = MIN_MODE_VECTOR_ACCUM;
1265       else if (GET_MODE_CLASS (tmode) == MODE_UACCUM)
1266         new_mode = MIN_MODE_VECTOR_UACCUM;
1267       else
1268         new_mode = MIN_MODE_VECTOR_INT;
1269
1270       for (; new_mode != VOIDmode ; new_mode = GET_MODE_WIDER_MODE (new_mode))
1271         if (GET_MODE_SIZE (new_mode) == GET_MODE_SIZE (GET_MODE (op0))
1272             && targetm.vector_mode_supported_p (new_mode))
1273           break;
1274       if (new_mode != VOIDmode)
1275         op0 = gen_lowpart (new_mode, op0);
1276     }
1277
1278   /* Use vec_extract patterns for extracting parts of vectors whenever
1279      available.  */
1280   if (VECTOR_MODE_P (GET_MODE (op0))
1281       && !MEM_P (op0)
1282       && optab_handler (vec_extract_optab, GET_MODE (op0)) != CODE_FOR_nothing
1283       && ((bitnum + bitsize - 1) / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
1284           == bitnum / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
1285     {
1286       struct expand_operand ops[3];
1287       enum machine_mode outermode = GET_MODE (op0);
1288       enum machine_mode innermode = GET_MODE_INNER (outermode);
1289       enum insn_code icode = optab_handler (vec_extract_optab, outermode);
1290       unsigned HOST_WIDE_INT pos = bitnum / GET_MODE_BITSIZE (innermode);
1291
1292       create_output_operand (&ops[0], target, innermode);
1293       create_input_operand (&ops[1], op0, outermode);
1294       create_integer_operand (&ops[2], pos);
1295       if (maybe_expand_insn (icode, 3, ops))
1296         {
1297           target = ops[0].value;
1298           if (GET_MODE (target) != mode)
1299             return gen_lowpart (tmode, target);
1300           return target;
1301         }
1302     }
1303
1304   /* Make sure we are playing with integral modes.  Pun with subregs
1305      if we aren't.  */
1306   {
1307     enum machine_mode imode = int_mode_for_mode (GET_MODE (op0));
1308     if (imode != GET_MODE (op0))
1309       {
1310         if (MEM_P (op0))
1311           op0 = adjust_address (op0, imode, 0);
1312         else if (imode != BLKmode)
1313           {
1314             op0 = gen_lowpart (imode, op0);
1315
1316             /* If we got a SUBREG, force it into a register since we
1317                aren't going to be able to do another SUBREG on it.  */
1318             if (GET_CODE (op0) == SUBREG)
1319               op0 = force_reg (imode, op0);
1320           }
1321         else if (REG_P (op0))
1322           {
1323             rtx reg, subreg;
1324             imode = smallest_mode_for_size (GET_MODE_BITSIZE (GET_MODE (op0)),
1325                                             MODE_INT);
1326             reg = gen_reg_rtx (imode);
1327             subreg = gen_lowpart_SUBREG (GET_MODE (op0), reg);
1328             emit_move_insn (subreg, op0);
1329             op0 = reg;
1330             bitnum += SUBREG_BYTE (subreg) * BITS_PER_UNIT;
1331           }
1332         else
1333           {
1334             rtx mem = assign_stack_temp (GET_MODE (op0),
1335                                          GET_MODE_SIZE (GET_MODE (op0)), 0);
1336             emit_move_insn (mem, op0);
1337             op0 = adjust_address (mem, BLKmode, 0);
1338           }
1339       }
1340   }
1341
1342   /* We may be accessing data outside the field, which means
1343      we can alias adjacent data.  */
1344   if (MEM_P (op0))
1345     {
1346       op0 = shallow_copy_rtx (op0);
1347       set_mem_alias_set (op0, 0);
1348       set_mem_expr (op0, 0);
1349     }
1350
1351   /* Extraction of a full-word or multi-word value from a structure
1352      in a register or aligned memory can be done with just a SUBREG.
1353      A subword value in the least significant part of a register
1354      can also be extracted with a SUBREG.  For this, we need the
1355      byte offset of the value in op0.  */
1356
1357   bitpos = bitnum % unit;
1358   offset = bitnum / unit;
1359   byte_offset = bitpos / BITS_PER_UNIT + offset * UNITS_PER_WORD;
1360
1361   /* If OP0 is a register, BITPOS must count within a word.
1362      But as we have it, it counts within whatever size OP0 now has.
1363      On a bigendian machine, these are not the same, so convert.  */
1364   if (BYTES_BIG_ENDIAN
1365       && !MEM_P (op0)
1366       && unit > GET_MODE_BITSIZE (GET_MODE (op0)))
1367     bitpos += unit - GET_MODE_BITSIZE (GET_MODE (op0));
1368
1369   /* ??? We currently assume TARGET is at least as big as BITSIZE.
1370      If that's wrong, the solution is to test for it and set TARGET to 0
1371      if needed.  */
1372
1373   /* Only scalar integer modes can be converted via subregs.  There is an
1374      additional problem for FP modes here in that they can have a precision
1375      which is different from the size.  mode_for_size uses precision, but
1376      we want a mode based on the size, so we must avoid calling it for FP
1377      modes.  */
1378   mode1  = (SCALAR_INT_MODE_P (tmode)
1379             ? mode_for_size (bitsize, GET_MODE_CLASS (tmode), 0)
1380             : mode);
1381
1382   /* If the bitfield is volatile, we need to make sure the access
1383      remains on a type-aligned boundary.  */
1384   if (GET_CODE (op0) == MEM
1385       && MEM_VOLATILE_P (op0)
1386       && GET_MODE_BITSIZE (GET_MODE (op0)) > 0
1387       && flag_strict_volatile_bitfields > 0)
1388     goto no_subreg_mode_swap;
1389
1390   if (((bitsize >= BITS_PER_WORD && bitsize == GET_MODE_BITSIZE (mode)
1391         && bitpos % BITS_PER_WORD == 0)
1392        || (mode1 != BLKmode
1393            /* ??? The big endian test here is wrong.  This is correct
1394               if the value is in a register, and if mode_for_size is not
1395               the same mode as op0.  This causes us to get unnecessarily
1396               inefficient code from the Thumb port when -mbig-endian.  */
1397            && (BYTES_BIG_ENDIAN
1398                ? bitpos + bitsize == BITS_PER_WORD
1399                : bitpos == 0)))
1400       && ((!MEM_P (op0)
1401            && TRULY_NOOP_TRUNCATION_MODES_P (mode1, GET_MODE (op0))
1402            && GET_MODE_SIZE (mode1) != 0
1403            && byte_offset % GET_MODE_SIZE (mode1) == 0)
1404           || (MEM_P (op0)
1405               && (! SLOW_UNALIGNED_ACCESS (mode, MEM_ALIGN (op0))
1406                   || (offset * BITS_PER_UNIT % bitsize == 0
1407                       && MEM_ALIGN (op0) % bitsize == 0)))))
1408     {
1409       if (MEM_P (op0))
1410         op0 = adjust_address (op0, mode1, offset);
1411       else if (mode1 != GET_MODE (op0))
1412         {
1413           rtx sub = simplify_gen_subreg (mode1, op0, GET_MODE (op0),
1414                                          byte_offset);
1415           if (sub == NULL)
1416             goto no_subreg_mode_swap;
1417           op0 = sub;
1418         }
1419       if (mode1 != mode)
1420         return convert_to_mode (tmode, op0, unsignedp);
1421       return op0;
1422     }
1423  no_subreg_mode_swap:
1424
1425   /* Handle fields bigger than a word.  */
1426
1427   if (bitsize > BITS_PER_WORD)
1428     {
1429       /* Here we transfer the words of the field
1430          in the order least significant first.
1431          This is because the most significant word is the one which may
1432          be less than full.  */
1433
1434       unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
1435       unsigned int i;
1436
1437       if (target == 0 || !REG_P (target) || !valid_multiword_target_p (target))
1438         target = gen_reg_rtx (mode);
1439
1440       /* Indicate for flow that the entire target reg is being set.  */
1441       emit_clobber (target);
1442
1443       for (i = 0; i < nwords; i++)
1444         {
1445           /* If I is 0, use the low-order word in both field and target;
1446              if I is 1, use the next to lowest word; and so on.  */
1447           /* Word number in TARGET to use.  */
1448           unsigned int wordnum
1449             = (WORDS_BIG_ENDIAN
1450                ? GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD - i - 1
1451                : i);
1452           /* Offset from start of field in OP0.  */
1453           unsigned int bit_offset = (WORDS_BIG_ENDIAN
1454                                      ? MAX (0, ((int) bitsize - ((int) i + 1)
1455                                                 * (int) BITS_PER_WORD))
1456                                      : (int) i * BITS_PER_WORD);
1457           rtx target_part = operand_subword (target, wordnum, 1, VOIDmode);
1458           rtx result_part
1459             = extract_bit_field (op0, MIN (BITS_PER_WORD,
1460                                            bitsize - i * BITS_PER_WORD),
1461                                  bitnum + bit_offset, 1, false, target_part, mode,
1462                                  word_mode);
1463
1464           gcc_assert (target_part);
1465
1466           if (result_part != target_part)
1467             emit_move_insn (target_part, result_part);
1468         }
1469
1470       if (unsignedp)
1471         {
1472           /* Unless we've filled TARGET, the upper regs in a multi-reg value
1473              need to be zero'd out.  */
1474           if (GET_MODE_SIZE (GET_MODE (target)) > nwords * UNITS_PER_WORD)
1475             {
1476               unsigned int i, total_words;
1477
1478               total_words = GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD;
1479               for (i = nwords; i < total_words; i++)
1480                 emit_move_insn
1481                   (operand_subword (target,
1482                                     WORDS_BIG_ENDIAN ? total_words - i - 1 : i,
1483                                     1, VOIDmode),
1484                    const0_rtx);
1485             }
1486           return target;
1487         }
1488
1489       /* Signed bit field: sign-extend with two arithmetic shifts.  */
1490       target = expand_shift (LSHIFT_EXPR, mode, target,
1491                              GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1492       return expand_shift (RSHIFT_EXPR, mode, target,
1493                            GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1494     }
1495
1496   /* From here on we know the desired field is smaller than a word.  */
1497
1498   /* Check if there is a correspondingly-sized integer field, so we can
1499      safely extract it as one size of integer, if necessary; then
1500      truncate or extend to the size that is wanted; then use SUBREGs or
1501      convert_to_mode to get one of the modes we really wanted.  */
1502
1503   int_mode = int_mode_for_mode (tmode);
1504   if (int_mode == BLKmode)
1505     int_mode = int_mode_for_mode (mode);
1506   /* Should probably push op0 out to memory and then do a load.  */
1507   gcc_assert (int_mode != BLKmode);
1508
1509   /* OFFSET is the number of words or bytes (UNIT says which)
1510      from STR_RTX to the first word or byte containing part of the field.  */
1511   if (!MEM_P (op0))
1512     {
1513       if (offset != 0
1514           || GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
1515         {
1516           if (!REG_P (op0))
1517             op0 = copy_to_reg (op0);
1518           op0 = gen_rtx_SUBREG (mode_for_size (BITS_PER_WORD, MODE_INT, 0),
1519                                 op0, (offset * UNITS_PER_WORD));
1520         }
1521       offset = 0;
1522     }
1523
1524   /* Now OFFSET is nonzero only for memory operands.  */
1525   ext_mode = mode_for_extraction (unsignedp ? EP_extzv : EP_extv, 0);
1526   if (ext_mode != MAX_MACHINE_MODE
1527       && bitsize > 0
1528       && GET_MODE_BITSIZE (ext_mode) >= bitsize
1529       /* Do not use extv/extzv for volatile bitfields when
1530          -fstrict-volatile-bitfields is in effect.  */
1531       && !(MEM_P (op0) && MEM_VOLATILE_P (op0)
1532            && flag_strict_volatile_bitfields > 0)
1533       /* If op0 is a register, we need it in EXT_MODE to make it
1534          acceptable to the format of ext(z)v.  */
1535       && !(GET_CODE (op0) == SUBREG && GET_MODE (op0) != ext_mode)
1536       && !((REG_P (op0) || GET_CODE (op0) == SUBREG)
1537            && (bitsize + bitpos > GET_MODE_BITSIZE (ext_mode))))
1538     {
1539       struct expand_operand ops[4];
1540       unsigned HOST_WIDE_INT xbitpos = bitpos, xoffset = offset;
1541       rtx xop0 = op0;
1542       rtx xtarget = target;
1543       rtx xspec_target = target;
1544       rtx xspec_target_subreg = 0;
1545
1546       /* If op0 is a register, we need it in EXT_MODE to make it
1547          acceptable to the format of ext(z)v.  */
1548       if (REG_P (xop0) && GET_MODE (xop0) != ext_mode)
1549         xop0 = gen_lowpart_SUBREG (ext_mode, xop0);
1550       if (MEM_P (xop0))
1551         /* Get ref to first byte containing part of the field.  */
1552         xop0 = adjust_address (xop0, byte_mode, xoffset);
1553
1554       /* Now convert from counting within UNIT to counting in EXT_MODE.  */
1555       if (BYTES_BIG_ENDIAN && !MEM_P (xop0))
1556         xbitpos += GET_MODE_BITSIZE (ext_mode) - unit;
1557
1558       unit = GET_MODE_BITSIZE (ext_mode);
1559
1560       /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
1561          "backwards" from the size of the unit we are extracting from.
1562          Otherwise, we count bits from the most significant on a
1563          BYTES/BITS_BIG_ENDIAN machine.  */
1564
1565       if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1566         xbitpos = unit - bitsize - xbitpos;
1567
1568       if (xtarget == 0)
1569         xtarget = xspec_target = gen_reg_rtx (tmode);
1570
1571       if (GET_MODE (xtarget) != ext_mode)
1572         {
1573           /* Don't use LHS paradoxical subreg if explicit truncation is needed
1574              between the mode of the extraction (word_mode) and the target
1575              mode.  Instead, create a temporary and use convert_move to set
1576              the target.  */
1577           if (REG_P (xtarget)
1578               && TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (xtarget), ext_mode))
1579             {
1580               xtarget = gen_lowpart (ext_mode, xtarget);
1581               if (GET_MODE_PRECISION (ext_mode)
1582                   > GET_MODE_PRECISION (GET_MODE (xspec_target)))
1583                 xspec_target_subreg = xtarget;
1584             }
1585           else
1586             xtarget = gen_reg_rtx (ext_mode);
1587         }
1588
1589       create_output_operand (&ops[0], xtarget, ext_mode);
1590       create_fixed_operand (&ops[1], xop0);
1591       create_integer_operand (&ops[2], bitsize);
1592       create_integer_operand (&ops[3], xbitpos);
1593       if (maybe_expand_insn (unsignedp ? CODE_FOR_extzv : CODE_FOR_extv,
1594                              4, ops))
1595         {
1596           xtarget = ops[0].value;
1597           if (xtarget == xspec_target)
1598             return xtarget;
1599           if (xtarget == xspec_target_subreg)
1600             return xspec_target;
1601           return convert_extracted_bit_field (xtarget, mode, tmode, unsignedp);
1602         }
1603     }
1604
1605   /* If OP0 is a memory, try copying it to a register and seeing if a
1606      cheap register alternative is available.  */
1607   if (ext_mode != MAX_MACHINE_MODE && MEM_P (op0))
1608     {
1609       enum machine_mode bestmode;
1610
1611       /* Get the mode to use for inserting into this field.  If
1612          OP0 is BLKmode, get the smallest mode consistent with the
1613          alignment. If OP0 is a non-BLKmode object that is no
1614          wider than EXT_MODE, use its mode. Otherwise, use the
1615          smallest mode containing the field.  */
1616
1617       if (GET_MODE (op0) == BLKmode
1618           || (ext_mode != MAX_MACHINE_MODE
1619               && GET_MODE_SIZE (GET_MODE (op0)) > GET_MODE_SIZE (ext_mode)))
1620         bestmode = get_best_mode (bitsize, bitnum, 0, 0, MEM_ALIGN (op0),
1621                                   (ext_mode == MAX_MACHINE_MODE
1622                                    ? VOIDmode : ext_mode),
1623                                   MEM_VOLATILE_P (op0));
1624       else
1625         bestmode = GET_MODE (op0);
1626
1627       if (bestmode != VOIDmode
1628           && !(SLOW_UNALIGNED_ACCESS (bestmode, MEM_ALIGN (op0))
1629                && GET_MODE_BITSIZE (bestmode) > MEM_ALIGN (op0)))
1630         {
1631           unsigned HOST_WIDE_INT xoffset, xbitpos;
1632
1633           /* Compute the offset as a multiple of this unit,
1634              counting in bytes.  */
1635           unit = GET_MODE_BITSIZE (bestmode);
1636           xoffset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
1637           xbitpos = bitnum % unit;
1638
1639           /* Make sure the register is big enough for the whole field.  */
1640           if (xoffset * BITS_PER_UNIT + unit
1641               >= offset * BITS_PER_UNIT + bitsize)
1642             {
1643               rtx last, result, xop0;
1644
1645               last = get_last_insn ();
1646
1647               /* Fetch it to a register in that size.  */
1648               xop0 = adjust_address (op0, bestmode, xoffset);
1649               xop0 = force_reg (bestmode, xop0);
1650               result = extract_bit_field_1 (xop0, bitsize, xbitpos,
1651                                             unsignedp, packedp, target,
1652                                             mode, tmode, false);
1653               if (result)
1654                 return result;
1655
1656               delete_insns_since (last);
1657             }
1658         }
1659     }
1660
1661   if (!fallback_p)
1662     return NULL;
1663
1664   target = extract_fixed_bit_field (int_mode, op0, offset, bitsize,
1665                                     bitpos, target, unsignedp, packedp);
1666   return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1667 }
1668
1669 /* Generate code to extract a byte-field from STR_RTX
1670    containing BITSIZE bits, starting at BITNUM,
1671    and put it in TARGET if possible (if TARGET is nonzero).
1672    Regardless of TARGET, we return the rtx for where the value is placed.
1673
1674    STR_RTX is the structure containing the byte (a REG or MEM).
1675    UNSIGNEDP is nonzero if this is an unsigned bit field.
1676    PACKEDP is nonzero if the field has the packed attribute.
1677    MODE is the natural mode of the field value once extracted.
1678    TMODE is the mode the caller would like the value to have;
1679    but the value may be returned with type MODE instead.
1680
1681    If a TARGET is specified and we can store in it at no extra cost,
1682    we do so, and return TARGET.
1683    Otherwise, we return a REG of mode TMODE or MODE, with TMODE preferred
1684    if they are equally easy.  */
1685
1686 rtx
1687 extract_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1688                    unsigned HOST_WIDE_INT bitnum, int unsignedp, bool packedp,
1689                    rtx target, enum machine_mode mode, enum machine_mode tmode)
1690 {
1691   return extract_bit_field_1 (str_rtx, bitsize, bitnum, unsignedp, packedp,
1692                               target, mode, tmode, true);
1693 }
1694 \f
1695 /* Extract a bit field using shifts and boolean operations
1696    Returns an rtx to represent the value.
1697    OP0 addresses a register (word) or memory (byte).
1698    BITPOS says which bit within the word or byte the bit field starts in.
1699    OFFSET says how many bytes farther the bit field starts;
1700     it is 0 if OP0 is a register.
1701    BITSIZE says how many bits long the bit field is.
1702     (If OP0 is a register, it may be narrower than a full word,
1703      but BITPOS still counts within a full word,
1704      which is significant on bigendian machines.)
1705
1706    UNSIGNEDP is nonzero for an unsigned bit field (don't sign-extend value).
1707    PACKEDP is true if the field has the packed attribute.
1708
1709    If TARGET is nonzero, attempts to store the value there
1710    and return TARGET, but this is not guaranteed.
1711    If TARGET is not used, create a pseudo-reg of mode TMODE for the value.  */
1712
1713 static rtx
1714 extract_fixed_bit_field (enum machine_mode tmode, rtx op0,
1715                          unsigned HOST_WIDE_INT offset,
1716                          unsigned HOST_WIDE_INT bitsize,
1717                          unsigned HOST_WIDE_INT bitpos, rtx target,
1718                          int unsignedp, bool packedp)
1719 {
1720   unsigned int total_bits = BITS_PER_WORD;
1721   enum machine_mode mode;
1722
1723   if (GET_CODE (op0) == SUBREG || REG_P (op0))
1724     {
1725       /* Special treatment for a bit field split across two registers.  */
1726       if (bitsize + bitpos > BITS_PER_WORD)
1727         return extract_split_bit_field (op0, bitsize, bitpos, unsignedp);
1728     }
1729   else
1730     {
1731       /* Get the proper mode to use for this field.  We want a mode that
1732          includes the entire field.  If such a mode would be larger than
1733          a word, we won't be doing the extraction the normal way.  */
1734
1735       if (MEM_VOLATILE_P (op0)
1736           && flag_strict_volatile_bitfields > 0)
1737         {
1738           if (GET_MODE_BITSIZE (GET_MODE (op0)) > 0)
1739             mode = GET_MODE (op0);
1740           else if (target && GET_MODE_BITSIZE (GET_MODE (target)) > 0)
1741             mode = GET_MODE (target);
1742           else
1743             mode = tmode;
1744         }
1745       else
1746         mode = get_best_mode (bitsize, bitpos + offset * BITS_PER_UNIT, 0, 0,
1747                               MEM_ALIGN (op0), word_mode, MEM_VOLATILE_P (op0));
1748
1749       if (mode == VOIDmode)
1750         /* The only way this should occur is if the field spans word
1751            boundaries.  */
1752         return extract_split_bit_field (op0, bitsize,
1753                                         bitpos + offset * BITS_PER_UNIT,
1754                                         unsignedp);
1755
1756       total_bits = GET_MODE_BITSIZE (mode);
1757
1758       /* Make sure bitpos is valid for the chosen mode.  Adjust BITPOS to
1759          be in the range 0 to total_bits-1, and put any excess bytes in
1760          OFFSET.  */
1761       if (bitpos >= total_bits)
1762         {
1763           offset += (bitpos / total_bits) * (total_bits / BITS_PER_UNIT);
1764           bitpos -= ((bitpos / total_bits) * (total_bits / BITS_PER_UNIT)
1765                      * BITS_PER_UNIT);
1766         }
1767
1768       /* If we're accessing a volatile MEM, we can't do the next
1769          alignment step if it results in a multi-word access where we
1770          otherwise wouldn't have one.  So, check for that case
1771          here.  */
1772       if (MEM_P (op0)
1773           && MEM_VOLATILE_P (op0)
1774           && flag_strict_volatile_bitfields > 0
1775           && bitpos + bitsize <= total_bits
1776           && bitpos + bitsize + (offset % (total_bits / BITS_PER_UNIT)) * BITS_PER_UNIT > total_bits)
1777         {
1778           if (STRICT_ALIGNMENT)
1779             {
1780               static bool informed_about_misalignment = false;
1781               bool warned;
1782
1783               if (packedp)
1784                 {
1785                   if (bitsize == total_bits)
1786                     warned = warning_at (input_location, OPT_fstrict_volatile_bitfields,
1787                                          "multiple accesses to volatile structure member"
1788                                          " because of packed attribute");
1789                   else
1790                     warned = warning_at (input_location, OPT_fstrict_volatile_bitfields,
1791                                          "multiple accesses to volatile structure bitfield"
1792                                          " because of packed attribute");
1793
1794                   return extract_split_bit_field (op0, bitsize,
1795                                                   bitpos + offset * BITS_PER_UNIT,
1796                                                   unsignedp);
1797                 }
1798
1799               if (bitsize == total_bits)
1800                 warned = warning_at (input_location, OPT_fstrict_volatile_bitfields,
1801                                      "mis-aligned access used for structure member");
1802               else
1803                 warned = warning_at (input_location, OPT_fstrict_volatile_bitfields,
1804                                      "mis-aligned access used for structure bitfield");
1805
1806               if (! informed_about_misalignment && warned)
1807                 {
1808                   informed_about_misalignment = true;
1809                   inform (input_location,
1810                           "when a volatile object spans multiple type-sized locations,"
1811                           " the compiler must choose between using a single mis-aligned access to"
1812                           " preserve the volatility, or using multiple aligned accesses to avoid"
1813                           " runtime faults; this code may fail at runtime if the hardware does"
1814                           " not allow this access");
1815                 }
1816             }
1817         }
1818       else
1819         {
1820
1821           /* Get ref to an aligned byte, halfword, or word containing the field.
1822              Adjust BITPOS to be position within a word,
1823              and OFFSET to be the offset of that word.
1824              Then alter OP0 to refer to that word.  */
1825           bitpos += (offset % (total_bits / BITS_PER_UNIT)) * BITS_PER_UNIT;
1826           offset -= (offset % (total_bits / BITS_PER_UNIT));
1827         }
1828
1829       op0 = adjust_address (op0, mode, offset);
1830     }
1831
1832   mode = GET_MODE (op0);
1833
1834   if (BYTES_BIG_ENDIAN)
1835     /* BITPOS is the distance between our msb and that of OP0.
1836        Convert it to the distance from the lsb.  */
1837     bitpos = total_bits - bitsize - bitpos;
1838
1839   /* Now BITPOS is always the distance between the field's lsb and that of OP0.
1840      We have reduced the big-endian case to the little-endian case.  */
1841
1842   if (unsignedp)
1843     {
1844       if (bitpos)
1845         {
1846           /* If the field does not already start at the lsb,
1847              shift it so it does.  */
1848           /* Maybe propagate the target for the shift.  */
1849           /* But not if we will return it--could confuse integrate.c.  */
1850           rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
1851           if (tmode != mode) subtarget = 0;
1852           op0 = expand_shift (RSHIFT_EXPR, mode, op0, bitpos, subtarget, 1);
1853         }
1854       /* Convert the value to the desired mode.  */
1855       if (mode != tmode)
1856         op0 = convert_to_mode (tmode, op0, 1);
1857
1858       /* Unless the msb of the field used to be the msb when we shifted,
1859          mask out the upper bits.  */
1860
1861       if (GET_MODE_BITSIZE (mode) != bitpos + bitsize)
1862         return expand_binop (GET_MODE (op0), and_optab, op0,
1863                              mask_rtx (GET_MODE (op0), 0, bitsize, 0),
1864                              target, 1, OPTAB_LIB_WIDEN);
1865       return op0;
1866     }
1867
1868   /* To extract a signed bit-field, first shift its msb to the msb of the word,
1869      then arithmetic-shift its lsb to the lsb of the word.  */
1870   op0 = force_reg (mode, op0);
1871
1872   /* Find the narrowest integer mode that contains the field.  */
1873
1874   for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT); mode != VOIDmode;
1875        mode = GET_MODE_WIDER_MODE (mode))
1876     if (GET_MODE_BITSIZE (mode) >= bitsize + bitpos)
1877       {
1878         op0 = convert_to_mode (mode, op0, 0);
1879         break;
1880       }
1881
1882   if (mode != tmode)
1883     target = 0;
1884
1885   if (GET_MODE_BITSIZE (mode) != (bitsize + bitpos))
1886     {
1887       int amount = GET_MODE_BITSIZE (mode) - (bitsize + bitpos);
1888       /* Maybe propagate the target for the shift.  */
1889       rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
1890       op0 = expand_shift (LSHIFT_EXPR, mode, op0, amount, subtarget, 1);
1891     }
1892
1893   return expand_shift (RSHIFT_EXPR, mode, op0,
1894                        GET_MODE_BITSIZE (mode) - bitsize, target, 0);
1895 }
1896 \f
1897 /* Return a constant integer (CONST_INT or CONST_DOUBLE) mask value
1898    of mode MODE with BITSIZE ones followed by BITPOS zeros, or the
1899    complement of that if COMPLEMENT.  The mask is truncated if
1900    necessary to the width of mode MODE.  The mask is zero-extended if
1901    BITSIZE+BITPOS is too small for MODE.  */
1902
1903 static rtx
1904 mask_rtx (enum machine_mode mode, int bitpos, int bitsize, int complement)
1905 {
1906   double_int mask;
1907
1908   mask = double_int_mask (bitsize);
1909   mask = double_int_lshift (mask, bitpos, HOST_BITS_PER_DOUBLE_INT, false);
1910
1911   if (complement)
1912     mask = double_int_not (mask);
1913
1914   return immed_double_int_const (mask, mode);
1915 }
1916
1917 /* Return a constant integer (CONST_INT or CONST_DOUBLE) rtx with the value
1918    VALUE truncated to BITSIZE bits and then shifted left BITPOS bits.  */
1919
1920 static rtx
1921 lshift_value (enum machine_mode mode, rtx value, int bitpos, int bitsize)
1922 {
1923   double_int val;
1924
1925   val = double_int_zext (uhwi_to_double_int (INTVAL (value)), bitsize);
1926   val = double_int_lshift (val, bitpos, HOST_BITS_PER_DOUBLE_INT, false);
1927
1928   return immed_double_int_const (val, mode);
1929 }
1930 \f
1931 /* Extract a bit field that is split across two words
1932    and return an RTX for the result.
1933
1934    OP0 is the REG, SUBREG or MEM rtx for the first of the two words.
1935    BITSIZE is the field width; BITPOS, position of its first bit, in the word.
1936    UNSIGNEDP is 1 if should zero-extend the contents; else sign-extend.  */
1937
1938 static rtx
1939 extract_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1940                          unsigned HOST_WIDE_INT bitpos, int unsignedp)
1941 {
1942   unsigned int unit;
1943   unsigned int bitsdone = 0;
1944   rtx result = NULL_RTX;
1945   int first = 1;
1946
1947   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1948      much at a time.  */
1949   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
1950     unit = BITS_PER_WORD;
1951   else
1952     unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1953
1954   while (bitsdone < bitsize)
1955     {
1956       unsigned HOST_WIDE_INT thissize;
1957       rtx part, word;
1958       unsigned HOST_WIDE_INT thispos;
1959       unsigned HOST_WIDE_INT offset;
1960
1961       offset = (bitpos + bitsdone) / unit;
1962       thispos = (bitpos + bitsdone) % unit;
1963
1964       /* THISSIZE must not overrun a word boundary.  Otherwise,
1965          extract_fixed_bit_field will call us again, and we will mutually
1966          recurse forever.  */
1967       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1968       thissize = MIN (thissize, unit - thispos);
1969
1970       /* If OP0 is a register, then handle OFFSET here.
1971
1972          When handling multiword bitfields, extract_bit_field may pass
1973          down a word_mode SUBREG of a larger REG for a bitfield that actually
1974          crosses a word boundary.  Thus, for a SUBREG, we must find
1975          the current word starting from the base register.  */
1976       if (GET_CODE (op0) == SUBREG)
1977         {
1978           int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset;
1979           word = operand_subword_force (SUBREG_REG (op0), word_offset,
1980                                         GET_MODE (SUBREG_REG (op0)));
1981           offset = 0;
1982         }
1983       else if (REG_P (op0))
1984         {
1985           word = operand_subword_force (op0, offset, GET_MODE (op0));
1986           offset = 0;
1987         }
1988       else
1989         word = op0;
1990
1991       /* Extract the parts in bit-counting order,
1992          whose meaning is determined by BYTES_PER_UNIT.
1993          OFFSET is in UNITs, and UNIT is in bits.
1994          extract_fixed_bit_field wants offset in bytes.  */
1995       part = extract_fixed_bit_field (word_mode, word,
1996                                       offset * unit / BITS_PER_UNIT,
1997                                       thissize, thispos, 0, 1, false);
1998       bitsdone += thissize;
1999
2000       /* Shift this part into place for the result.  */
2001       if (BYTES_BIG_ENDIAN)
2002         {
2003           if (bitsize != bitsdone)
2004             part = expand_shift (LSHIFT_EXPR, word_mode, part,
2005                                  bitsize - bitsdone, 0, 1);
2006         }
2007       else
2008         {
2009           if (bitsdone != thissize)
2010             part = expand_shift (LSHIFT_EXPR, word_mode, part,
2011                                  bitsdone - thissize, 0, 1);
2012         }
2013
2014       if (first)
2015         result = part;
2016       else
2017         /* Combine the parts with bitwise or.  This works
2018            because we extracted each part as an unsigned bit field.  */
2019         result = expand_binop (word_mode, ior_optab, part, result, NULL_RTX, 1,
2020                                OPTAB_LIB_WIDEN);
2021
2022       first = 0;
2023     }
2024
2025   /* Unsigned bit field: we are done.  */
2026   if (unsignedp)
2027     return result;
2028   /* Signed bit field: sign-extend with two arithmetic shifts.  */
2029   result = expand_shift (LSHIFT_EXPR, word_mode, result,
2030                          BITS_PER_WORD - bitsize, NULL_RTX, 0);
2031   return expand_shift (RSHIFT_EXPR, word_mode, result,
2032                        BITS_PER_WORD - bitsize, NULL_RTX, 0);
2033 }
2034 \f
2035 /* Try to read the low bits of SRC as an rvalue of mode MODE, preserving
2036    the bit pattern.  SRC_MODE is the mode of SRC; if this is smaller than
2037    MODE, fill the upper bits with zeros.  Fail if the layout of either
2038    mode is unknown (as for CC modes) or if the extraction would involve
2039    unprofitable mode punning.  Return the value on success, otherwise
2040    return null.
2041
2042    This is different from gen_lowpart* in these respects:
2043
2044      - the returned value must always be considered an rvalue
2045
2046      - when MODE is wider than SRC_MODE, the extraction involves
2047        a zero extension
2048
2049      - when MODE is smaller than SRC_MODE, the extraction involves
2050        a truncation (and is thus subject to TRULY_NOOP_TRUNCATION).
2051
2052    In other words, this routine performs a computation, whereas the
2053    gen_lowpart* routines are conceptually lvalue or rvalue subreg
2054    operations.  */
2055
2056 rtx
2057 extract_low_bits (enum machine_mode mode, enum machine_mode src_mode, rtx src)
2058 {
2059   enum machine_mode int_mode, src_int_mode;
2060
2061   if (mode == src_mode)
2062     return src;
2063
2064   if (CONSTANT_P (src))
2065     {
2066       /* simplify_gen_subreg can't be used here, as if simplify_subreg
2067          fails, it will happily create (subreg (symbol_ref)) or similar
2068          invalid SUBREGs.  */
2069       unsigned int byte = subreg_lowpart_offset (mode, src_mode);
2070       rtx ret = simplify_subreg (mode, src, src_mode, byte);
2071       if (ret)
2072         return ret;
2073
2074       if (GET_MODE (src) == VOIDmode
2075           || !validate_subreg (mode, src_mode, src, byte))
2076         return NULL_RTX;
2077
2078       src = force_reg (GET_MODE (src), src);
2079       return gen_rtx_SUBREG (mode, src, byte);
2080     }
2081
2082   if (GET_MODE_CLASS (mode) == MODE_CC || GET_MODE_CLASS (src_mode) == MODE_CC)
2083     return NULL_RTX;
2084
2085   if (GET_MODE_BITSIZE (mode) == GET_MODE_BITSIZE (src_mode)
2086       && MODES_TIEABLE_P (mode, src_mode))
2087     {
2088       rtx x = gen_lowpart_common (mode, src);
2089       if (x)
2090         return x;
2091     }
2092
2093   src_int_mode = int_mode_for_mode (src_mode);
2094   int_mode = int_mode_for_mode (mode);
2095   if (src_int_mode == BLKmode || int_mode == BLKmode)
2096     return NULL_RTX;
2097
2098   if (!MODES_TIEABLE_P (src_int_mode, src_mode))
2099     return NULL_RTX;
2100   if (!MODES_TIEABLE_P (int_mode, mode))
2101     return NULL_RTX;
2102
2103   src = gen_lowpart (src_int_mode, src);
2104   src = convert_modes (int_mode, src_int_mode, src, true);
2105   src = gen_lowpart (mode, src);
2106   return src;
2107 }
2108 \f
2109 /* Add INC into TARGET.  */
2110
2111 void
2112 expand_inc (rtx target, rtx inc)
2113 {
2114   rtx value = expand_binop (GET_MODE (target), add_optab,
2115                             target, inc,
2116                             target, 0, OPTAB_LIB_WIDEN);
2117   if (value != target)
2118     emit_move_insn (target, value);
2119 }
2120
2121 /* Subtract DEC from TARGET.  */
2122
2123 void
2124 expand_dec (rtx target, rtx dec)
2125 {
2126   rtx value = expand_binop (GET_MODE (target), sub_optab,
2127                             target, dec,
2128                             target, 0, OPTAB_LIB_WIDEN);
2129   if (value != target)
2130     emit_move_insn (target, value);
2131 }
2132 \f
2133 /* Output a shift instruction for expression code CODE,
2134    with SHIFTED being the rtx for the value to shift,
2135    and AMOUNT the rtx for the amount to shift by.
2136    Store the result in the rtx TARGET, if that is convenient.
2137    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2138    Return the rtx for where the value is.  */
2139
2140 static rtx
2141 expand_shift_1 (enum tree_code code, enum machine_mode mode, rtx shifted,
2142                 rtx amount, rtx target, int unsignedp)
2143 {
2144   rtx op1, temp = 0;
2145   int left = (code == LSHIFT_EXPR || code == LROTATE_EXPR);
2146   int rotate = (code == LROTATE_EXPR || code == RROTATE_EXPR);
2147   optab lshift_optab = ashl_optab;
2148   optab rshift_arith_optab = ashr_optab;
2149   optab rshift_uns_optab = lshr_optab;
2150   optab lrotate_optab = rotl_optab;
2151   optab rrotate_optab = rotr_optab;
2152   enum machine_mode op1_mode;
2153   int attempt;
2154   bool speed = optimize_insn_for_speed_p ();
2155
2156   op1 = amount;
2157   op1_mode = GET_MODE (op1);
2158
2159   /* Determine whether the shift/rotate amount is a vector, or scalar.  If the
2160      shift amount is a vector, use the vector/vector shift patterns.  */
2161   if (VECTOR_MODE_P (mode) && VECTOR_MODE_P (op1_mode))
2162     {
2163       lshift_optab = vashl_optab;
2164       rshift_arith_optab = vashr_optab;
2165       rshift_uns_optab = vlshr_optab;
2166       lrotate_optab = vrotl_optab;
2167       rrotate_optab = vrotr_optab;
2168     }
2169
2170   /* Previously detected shift-counts computed by NEGATE_EXPR
2171      and shifted in the other direction; but that does not work
2172      on all machines.  */
2173
2174   if (SHIFT_COUNT_TRUNCATED)
2175     {
2176       if (CONST_INT_P (op1)
2177           && ((unsigned HOST_WIDE_INT) INTVAL (op1) >=
2178               (unsigned HOST_WIDE_INT) GET_MODE_BITSIZE (mode)))
2179         op1 = GEN_INT ((unsigned HOST_WIDE_INT) INTVAL (op1)
2180                        % GET_MODE_BITSIZE (mode));
2181       else if (GET_CODE (op1) == SUBREG
2182                && subreg_lowpart_p (op1)
2183                && INTEGRAL_MODE_P (GET_MODE (SUBREG_REG (op1))))
2184         op1 = SUBREG_REG (op1);
2185     }
2186
2187   if (op1 == const0_rtx)
2188     return shifted;
2189
2190   /* Check whether its cheaper to implement a left shift by a constant
2191      bit count by a sequence of additions.  */
2192   if (code == LSHIFT_EXPR
2193       && CONST_INT_P (op1)
2194       && INTVAL (op1) > 0
2195       && INTVAL (op1) < GET_MODE_PRECISION (mode)
2196       && INTVAL (op1) < MAX_BITS_PER_WORD
2197       && shift_cost[speed][mode][INTVAL (op1)] > INTVAL (op1) * add_cost[speed][mode]
2198       && shift_cost[speed][mode][INTVAL (op1)] != MAX_COST)
2199     {
2200       int i;
2201       for (i = 0; i < INTVAL (op1); i++)
2202         {
2203           temp = force_reg (mode, shifted);
2204           shifted = expand_binop (mode, add_optab, temp, temp, NULL_RTX,
2205                                   unsignedp, OPTAB_LIB_WIDEN);
2206         }
2207       return shifted;
2208     }
2209
2210   for (attempt = 0; temp == 0 && attempt < 3; attempt++)
2211     {
2212       enum optab_methods methods;
2213
2214       if (attempt == 0)
2215         methods = OPTAB_DIRECT;
2216       else if (attempt == 1)
2217         methods = OPTAB_WIDEN;
2218       else
2219         methods = OPTAB_LIB_WIDEN;
2220
2221       if (rotate)
2222         {
2223           /* Widening does not work for rotation.  */
2224           if (methods == OPTAB_WIDEN)
2225             continue;
2226           else if (methods == OPTAB_LIB_WIDEN)
2227             {
2228               /* If we have been unable to open-code this by a rotation,
2229                  do it as the IOR of two shifts.  I.e., to rotate A
2230                  by N bits, compute (A << N) | ((unsigned) A >> (C - N))
2231                  where C is the bitsize of A.
2232
2233                  It is theoretically possible that the target machine might
2234                  not be able to perform either shift and hence we would
2235                  be making two libcalls rather than just the one for the
2236                  shift (similarly if IOR could not be done).  We will allow
2237                  this extremely unlikely lossage to avoid complicating the
2238                  code below.  */
2239
2240               rtx subtarget = target == shifted ? 0 : target;
2241               rtx new_amount, other_amount;
2242               rtx temp1;
2243
2244               new_amount = op1;
2245               if (CONST_INT_P (op1))
2246                 other_amount = GEN_INT (GET_MODE_BITSIZE (mode)
2247                                         - INTVAL (op1));
2248               else
2249                 other_amount
2250                   = simplify_gen_binary (MINUS, GET_MODE (op1),
2251                                          GEN_INT (GET_MODE_PRECISION (mode)),
2252                                          op1);
2253
2254               shifted = force_reg (mode, shifted);
2255
2256               temp = expand_shift_1 (left ? LSHIFT_EXPR : RSHIFT_EXPR,
2257                                      mode, shifted, new_amount, 0, 1);
2258               temp1 = expand_shift_1 (left ? RSHIFT_EXPR : LSHIFT_EXPR,
2259                                       mode, shifted, other_amount,
2260                                       subtarget, 1);
2261               return expand_binop (mode, ior_optab, temp, temp1, target,
2262                                    unsignedp, methods);
2263             }
2264
2265           temp = expand_binop (mode,
2266                                left ? lrotate_optab : rrotate_optab,
2267                                shifted, op1, target, unsignedp, methods);
2268         }
2269       else if (unsignedp)
2270         temp = expand_binop (mode,
2271                              left ? lshift_optab : rshift_uns_optab,
2272                              shifted, op1, target, unsignedp, methods);
2273
2274       /* Do arithmetic shifts.
2275          Also, if we are going to widen the operand, we can just as well
2276          use an arithmetic right-shift instead of a logical one.  */
2277       if (temp == 0 && ! rotate
2278           && (! unsignedp || (! left && methods == OPTAB_WIDEN)))
2279         {
2280           enum optab_methods methods1 = methods;
2281
2282           /* If trying to widen a log shift to an arithmetic shift,
2283              don't accept an arithmetic shift of the same size.  */
2284           if (unsignedp)
2285             methods1 = OPTAB_MUST_WIDEN;
2286
2287           /* Arithmetic shift */
2288
2289           temp = expand_binop (mode,
2290                                left ? lshift_optab : rshift_arith_optab,
2291                                shifted, op1, target, unsignedp, methods1);
2292         }
2293
2294       /* We used to try extzv here for logical right shifts, but that was
2295          only useful for one machine, the VAX, and caused poor code
2296          generation there for lshrdi3, so the code was deleted and a
2297          define_expand for lshrsi3 was added to vax.md.  */
2298     }
2299
2300   gcc_assert (temp);
2301   return temp;
2302 }
2303
2304 /* Output a shift instruction for expression code CODE,
2305    with SHIFTED being the rtx for the value to shift,
2306    and AMOUNT the amount to shift by.
2307    Store the result in the rtx TARGET, if that is convenient.
2308    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2309    Return the rtx for where the value is.  */
2310
2311 rtx
2312 expand_shift (enum tree_code code, enum machine_mode mode, rtx shifted,
2313               int amount, rtx target, int unsignedp)
2314 {
2315   return expand_shift_1 (code, mode,
2316                          shifted, GEN_INT (amount), target, unsignedp);
2317 }
2318
2319 /* Output a shift instruction for expression code CODE,
2320    with SHIFTED being the rtx for the value to shift,
2321    and AMOUNT the tree for the amount to shift by.
2322    Store the result in the rtx TARGET, if that is convenient.
2323    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2324    Return the rtx for where the value is.  */
2325
2326 rtx
2327 expand_variable_shift (enum tree_code code, enum machine_mode mode, rtx shifted,
2328                        tree amount, rtx target, int unsignedp)
2329 {
2330   return expand_shift_1 (code, mode,
2331                          shifted, expand_normal (amount), target, unsignedp);
2332 }
2333
2334 \f
2335 /* Indicates the type of fixup needed after a constant multiplication.
2336    BASIC_VARIANT means no fixup is needed, NEGATE_VARIANT means that
2337    the result should be negated, and ADD_VARIANT means that the
2338    multiplicand should be added to the result.  */
2339 enum mult_variant {basic_variant, negate_variant, add_variant};
2340
2341 static void synth_mult (struct algorithm *, unsigned HOST_WIDE_INT,
2342                         const struct mult_cost *, enum machine_mode mode);
2343 static bool choose_mult_variant (enum machine_mode, HOST_WIDE_INT,
2344                                  struct algorithm *, enum mult_variant *, int);
2345 static rtx expand_mult_const (enum machine_mode, rtx, HOST_WIDE_INT, rtx,
2346                               const struct algorithm *, enum mult_variant);
2347 static unsigned HOST_WIDE_INT choose_multiplier (unsigned HOST_WIDE_INT, int,
2348                                                  int, rtx *, int *, int *);
2349 static unsigned HOST_WIDE_INT invert_mod2n (unsigned HOST_WIDE_INT, int);
2350 static rtx extract_high_half (enum machine_mode, rtx);
2351 static rtx expand_mult_highpart (enum machine_mode, rtx, rtx, rtx, int, int);
2352 static rtx expand_mult_highpart_optab (enum machine_mode, rtx, rtx, rtx,
2353                                        int, int);
2354 /* Compute and return the best algorithm for multiplying by T.
2355    The algorithm must cost less than cost_limit
2356    If retval.cost >= COST_LIMIT, no algorithm was found and all
2357    other field of the returned struct are undefined.
2358    MODE is the machine mode of the multiplication.  */
2359
2360 static void
2361 synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
2362             const struct mult_cost *cost_limit, enum machine_mode mode)
2363 {
2364   int m;
2365   struct algorithm *alg_in, *best_alg;
2366   struct mult_cost best_cost;
2367   struct mult_cost new_limit;
2368   int op_cost, op_latency;
2369   unsigned HOST_WIDE_INT orig_t = t;
2370   unsigned HOST_WIDE_INT q;
2371   int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode));
2372   int hash_index;
2373   bool cache_hit = false;
2374   enum alg_code cache_alg = alg_zero;
2375   bool speed = optimize_insn_for_speed_p ();
2376
2377   /* Indicate that no algorithm is yet found.  If no algorithm
2378      is found, this value will be returned and indicate failure.  */
2379   alg_out->cost.cost = cost_limit->cost + 1;
2380   alg_out->cost.latency = cost_limit->latency + 1;
2381
2382   if (cost_limit->cost < 0
2383       || (cost_limit->cost == 0 && cost_limit->latency <= 0))
2384     return;
2385
2386   /* Restrict the bits of "t" to the multiplication's mode.  */
2387   t &= GET_MODE_MASK (mode);
2388
2389   /* t == 1 can be done in zero cost.  */
2390   if (t == 1)
2391     {
2392       alg_out->ops = 1;
2393       alg_out->cost.cost = 0;
2394       alg_out->cost.latency = 0;
2395       alg_out->op[0] = alg_m;
2396       return;
2397     }
2398
2399   /* t == 0 sometimes has a cost.  If it does and it exceeds our limit,
2400      fail now.  */
2401   if (t == 0)
2402     {
2403       if (MULT_COST_LESS (cost_limit, zero_cost[speed]))
2404         return;
2405       else
2406         {
2407           alg_out->ops = 1;
2408           alg_out->cost.cost = zero_cost[speed];
2409           alg_out->cost.latency = zero_cost[speed];
2410           alg_out->op[0] = alg_zero;
2411           return;
2412         }
2413     }
2414
2415   /* We'll be needing a couple extra algorithm structures now.  */
2416
2417   alg_in = XALLOCA (struct algorithm);
2418   best_alg = XALLOCA (struct algorithm);
2419   best_cost = *cost_limit;
2420
2421   /* Compute the hash index.  */
2422   hash_index = (t ^ (unsigned int) mode ^ (speed * 256)) % NUM_ALG_HASH_ENTRIES;
2423
2424   /* See if we already know what to do for T.  */
2425   if (alg_hash[hash_index].t == t
2426       && alg_hash[hash_index].mode == mode
2427       && alg_hash[hash_index].mode == mode
2428       && alg_hash[hash_index].speed == speed
2429       && alg_hash[hash_index].alg != alg_unknown)
2430     {
2431       cache_alg = alg_hash[hash_index].alg;
2432
2433       if (cache_alg == alg_impossible)
2434         {
2435           /* The cache tells us that it's impossible to synthesize
2436              multiplication by T within alg_hash[hash_index].cost.  */
2437           if (!CHEAPER_MULT_COST (&alg_hash[hash_index].cost, cost_limit))
2438             /* COST_LIMIT is at least as restrictive as the one
2439                recorded in the hash table, in which case we have no
2440                hope of synthesizing a multiplication.  Just
2441                return.  */
2442             return;
2443
2444           /* If we get here, COST_LIMIT is less restrictive than the
2445              one recorded in the hash table, so we may be able to
2446              synthesize a multiplication.  Proceed as if we didn't
2447              have the cache entry.  */
2448         }
2449       else
2450         {
2451           if (CHEAPER_MULT_COST (cost_limit, &alg_hash[hash_index].cost))
2452             /* The cached algorithm shows that this multiplication
2453                requires more cost than COST_LIMIT.  Just return.  This
2454                way, we don't clobber this cache entry with
2455                alg_impossible but retain useful information.  */
2456             return;
2457
2458           cache_hit = true;
2459
2460           switch (cache_alg)
2461             {
2462             case alg_shift:
2463               goto do_alg_shift;
2464
2465             case alg_add_t_m2:
2466             case alg_sub_t_m2:
2467               goto do_alg_addsub_t_m2;
2468
2469             case alg_add_factor:
2470             case alg_sub_factor:
2471               goto do_alg_addsub_factor;
2472
2473             case alg_add_t2_m:
2474               goto do_alg_add_t2_m;
2475
2476             case alg_sub_t2_m:
2477               goto do_alg_sub_t2_m;
2478
2479             default:
2480               gcc_unreachable ();
2481             }
2482         }
2483     }
2484
2485   /* If we have a group of zero bits at the low-order part of T, try
2486      multiplying by the remaining bits and then doing a shift.  */
2487
2488   if ((t & 1) == 0)
2489     {
2490     do_alg_shift:
2491       m = floor_log2 (t & -t);  /* m = number of low zero bits */
2492       if (m < maxm)
2493         {
2494           q = t >> m;
2495           /* The function expand_shift will choose between a shift and
2496              a sequence of additions, so the observed cost is given as
2497              MIN (m * add_cost[speed][mode], shift_cost[speed][mode][m]).  */
2498           op_cost = m * add_cost[speed][mode];
2499           if (shift_cost[speed][mode][m] < op_cost)
2500             op_cost = shift_cost[speed][mode][m];
2501           new_limit.cost = best_cost.cost - op_cost;
2502           new_limit.latency = best_cost.latency - op_cost;
2503           synth_mult (alg_in, q, &new_limit, mode);
2504
2505           alg_in->cost.cost += op_cost;
2506           alg_in->cost.latency += op_cost;
2507           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2508             {
2509               struct algorithm *x;
2510               best_cost = alg_in->cost;
2511               x = alg_in, alg_in = best_alg, best_alg = x;
2512               best_alg->log[best_alg->ops] = m;
2513               best_alg->op[best_alg->ops] = alg_shift;
2514             }
2515
2516           /* See if treating ORIG_T as a signed number yields a better
2517              sequence.  Try this sequence only for a negative ORIG_T
2518              as it would be useless for a non-negative ORIG_T.  */
2519           if ((HOST_WIDE_INT) orig_t < 0)
2520             {
2521               /* Shift ORIG_T as follows because a right shift of a
2522                  negative-valued signed type is implementation
2523                  defined.  */
2524               q = ~(~orig_t >> m);
2525               /* The function expand_shift will choose between a shift
2526                  and a sequence of additions, so the observed cost is
2527                  given as MIN (m * add_cost[speed][mode],
2528                  shift_cost[speed][mode][m]).  */
2529               op_cost = m * add_cost[speed][mode];
2530               if (shift_cost[speed][mode][m] < op_cost)
2531                 op_cost = shift_cost[speed][mode][m];
2532               new_limit.cost = best_cost.cost - op_cost;
2533               new_limit.latency = best_cost.latency - op_cost;
2534               synth_mult (alg_in, q, &new_limit, mode);
2535
2536               alg_in->cost.cost += op_cost;
2537               alg_in->cost.latency += op_cost;
2538               if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2539                 {
2540                   struct algorithm *x;
2541                   best_cost = alg_in->cost;
2542                   x = alg_in, alg_in = best_alg, best_alg = x;
2543                   best_alg->log[best_alg->ops] = m;
2544                   best_alg->op[best_alg->ops] = alg_shift;
2545                 }
2546             }
2547         }
2548       if (cache_hit)
2549         goto done;
2550     }
2551
2552   /* If we have an odd number, add or subtract one.  */
2553   if ((t & 1) != 0)
2554     {
2555       unsigned HOST_WIDE_INT w;
2556
2557     do_alg_addsub_t_m2:
2558       for (w = 1; (w & t) != 0; w <<= 1)
2559         ;
2560       /* If T was -1, then W will be zero after the loop.  This is another
2561          case where T ends with ...111.  Handling this with (T + 1) and
2562          subtract 1 produces slightly better code and results in algorithm
2563          selection much faster than treating it like the ...0111 case
2564          below.  */
2565       if (w == 0
2566           || (w > 2
2567               /* Reject the case where t is 3.
2568                  Thus we prefer addition in that case.  */
2569               && t != 3))
2570         {
2571           /* T ends with ...111.  Multiply by (T + 1) and subtract 1.  */
2572
2573           op_cost = add_cost[speed][mode];
2574           new_limit.cost = best_cost.cost - op_cost;
2575           new_limit.latency = best_cost.latency - op_cost;
2576           synth_mult (alg_in, t + 1, &new_limit, mode);
2577
2578           alg_in->cost.cost += op_cost;
2579           alg_in->cost.latency += op_cost;
2580           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2581             {
2582               struct algorithm *x;
2583               best_cost = alg_in->cost;
2584               x = alg_in, alg_in = best_alg, best_alg = x;
2585               best_alg->log[best_alg->ops] = 0;
2586               best_alg->op[best_alg->ops] = alg_sub_t_m2;
2587             }
2588         }
2589       else
2590         {
2591           /* T ends with ...01 or ...011.  Multiply by (T - 1) and add 1.  */
2592
2593           op_cost = add_cost[speed][mode];
2594           new_limit.cost = best_cost.cost - op_cost;
2595           new_limit.latency = best_cost.latency - op_cost;
2596           synth_mult (alg_in, t - 1, &new_limit, mode);
2597
2598           alg_in->cost.cost += op_cost;
2599           alg_in->cost.latency += op_cost;
2600           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2601             {
2602               struct algorithm *x;
2603               best_cost = alg_in->cost;
2604               x = alg_in, alg_in = best_alg, best_alg = x;
2605               best_alg->log[best_alg->ops] = 0;
2606               best_alg->op[best_alg->ops] = alg_add_t_m2;
2607             }
2608         }
2609
2610       /* We may be able to calculate a * -7, a * -15, a * -31, etc
2611          quickly with a - a * n for some appropriate constant n.  */
2612       m = exact_log2 (-orig_t + 1);
2613       if (m >= 0 && m < maxm)
2614         {
2615           op_cost = shiftsub1_cost[speed][mode][m];
2616           new_limit.cost = best_cost.cost - op_cost;
2617           new_limit.latency = best_cost.latency - op_cost;
2618           synth_mult (alg_in, (unsigned HOST_WIDE_INT) (-orig_t + 1) >> m, &new_limit, mode);
2619
2620           alg_in->cost.cost += op_cost;
2621           alg_in->cost.latency += op_cost;
2622           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2623             {
2624               struct algorithm *x;
2625               best_cost = alg_in->cost;
2626               x = alg_in, alg_in = best_alg, best_alg = x;
2627               best_alg->log[best_alg->ops] = m;
2628               best_alg->op[best_alg->ops] = alg_sub_t_m2;
2629             }
2630         }
2631
2632       if (cache_hit)
2633         goto done;
2634     }
2635
2636   /* Look for factors of t of the form
2637      t = q(2**m +- 1), 2 <= m <= floor(log2(t - 1)).
2638      If we find such a factor, we can multiply by t using an algorithm that
2639      multiplies by q, shift the result by m and add/subtract it to itself.
2640
2641      We search for large factors first and loop down, even if large factors
2642      are less probable than small; if we find a large factor we will find a
2643      good sequence quickly, and therefore be able to prune (by decreasing
2644      COST_LIMIT) the search.  */
2645
2646  do_alg_addsub_factor:
2647   for (m = floor_log2 (t - 1); m >= 2; m--)
2648     {
2649       unsigned HOST_WIDE_INT d;
2650
2651       d = ((unsigned HOST_WIDE_INT) 1 << m) + 1;
2652       if (t % d == 0 && t > d && m < maxm
2653           && (!cache_hit || cache_alg == alg_add_factor))
2654         {
2655           /* If the target has a cheap shift-and-add instruction use
2656              that in preference to a shift insn followed by an add insn.
2657              Assume that the shift-and-add is "atomic" with a latency
2658              equal to its cost, otherwise assume that on superscalar
2659              hardware the shift may be executed concurrently with the
2660              earlier steps in the algorithm.  */
2661           op_cost = add_cost[speed][mode] + shift_cost[speed][mode][m];
2662           if (shiftadd_cost[speed][mode][m] < op_cost)
2663             {
2664               op_cost = shiftadd_cost[speed][mode][m];
2665               op_latency = op_cost;
2666             }
2667           else
2668             op_latency = add_cost[speed][mode];
2669
2670           new_limit.cost = best_cost.cost - op_cost;
2671           new_limit.latency = best_cost.latency - op_latency;
2672           synth_mult (alg_in, t / d, &new_limit, mode);
2673
2674           alg_in->cost.cost += op_cost;
2675           alg_in->cost.latency += op_latency;
2676           if (alg_in->cost.latency < op_cost)
2677             alg_in->cost.latency = op_cost;
2678           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2679             {
2680               struct algorithm *x;
2681               best_cost = alg_in->cost;
2682               x = alg_in, alg_in = best_alg, best_alg = x;
2683               best_alg->log[best_alg->ops] = m;
2684               best_alg->op[best_alg->ops] = alg_add_factor;
2685             }
2686           /* Other factors will have been taken care of in the recursion.  */
2687           break;
2688         }
2689
2690       d = ((unsigned HOST_WIDE_INT) 1 << m) - 1;
2691       if (t % d == 0 && t > d && m < maxm
2692           && (!cache_hit || cache_alg == alg_sub_factor))
2693         {
2694           /* If the target has a cheap shift-and-subtract insn use
2695              that in preference to a shift insn followed by a sub insn.
2696              Assume that the shift-and-sub is "atomic" with a latency
2697              equal to it's cost, otherwise assume that on superscalar
2698              hardware the shift may be executed concurrently with the
2699              earlier steps in the algorithm.  */
2700           op_cost = add_cost[speed][mode] + shift_cost[speed][mode][m];
2701           if (shiftsub0_cost[speed][mode][m] < op_cost)
2702             {
2703               op_cost = shiftsub0_cost[speed][mode][m];
2704               op_latency = op_cost;
2705             }
2706           else
2707             op_latency = add_cost[speed][mode];
2708
2709           new_limit.cost = best_cost.cost - op_cost;
2710           new_limit.latency = best_cost.latency - op_latency;
2711           synth_mult (alg_in, t / d, &new_limit, mode);
2712
2713           alg_in->cost.cost += op_cost;
2714           alg_in->cost.latency += op_latency;
2715           if (alg_in->cost.latency < op_cost)
2716             alg_in->cost.latency = op_cost;
2717           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2718             {
2719               struct algorithm *x;
2720               best_cost = alg_in->cost;
2721               x = alg_in, alg_in = best_alg, best_alg = x;
2722               best_alg->log[best_alg->ops] = m;
2723               best_alg->op[best_alg->ops] = alg_sub_factor;
2724             }
2725           break;
2726         }
2727     }
2728   if (cache_hit)
2729     goto done;
2730
2731   /* Try shift-and-add (load effective address) instructions,
2732      i.e. do a*3, a*5, a*9.  */
2733   if ((t & 1) != 0)
2734     {
2735     do_alg_add_t2_m:
2736       q = t - 1;
2737       q = q & -q;
2738       m = exact_log2 (q);
2739       if (m >= 0 && m < maxm)
2740         {
2741           op_cost = shiftadd_cost[speed][mode][m];
2742           new_limit.cost = best_cost.cost - op_cost;
2743           new_limit.latency = best_cost.latency - op_cost;
2744           synth_mult (alg_in, (t - 1) >> m, &new_limit, mode);
2745
2746           alg_in->cost.cost += op_cost;
2747           alg_in->cost.latency += op_cost;
2748           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2749             {
2750               struct algorithm *x;
2751               best_cost = alg_in->cost;
2752               x = alg_in, alg_in = best_alg, best_alg = x;
2753               best_alg->log[best_alg->ops] = m;
2754               best_alg->op[best_alg->ops] = alg_add_t2_m;
2755             }
2756         }
2757       if (cache_hit)
2758         goto done;
2759
2760     do_alg_sub_t2_m:
2761       q = t + 1;
2762       q = q & -q;
2763       m = exact_log2 (q);
2764       if (m >= 0 && m < maxm)
2765         {
2766           op_cost = shiftsub0_cost[speed][mode][m];
2767           new_limit.cost = best_cost.cost - op_cost;
2768           new_limit.latency = best_cost.latency - op_cost;
2769           synth_mult (alg_in, (t + 1) >> m, &new_limit, mode);
2770
2771           alg_in->cost.cost += op_cost;
2772           alg_in->cost.latency += op_cost;
2773           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2774             {
2775               struct algorithm *x;
2776               best_cost = alg_in->cost;
2777               x = alg_in, alg_in = best_alg, best_alg = x;
2778               best_alg->log[best_alg->ops] = m;
2779               best_alg->op[best_alg->ops] = alg_sub_t2_m;
2780             }
2781         }
2782       if (cache_hit)
2783         goto done;
2784     }
2785
2786  done:
2787   /* If best_cost has not decreased, we have not found any algorithm.  */
2788   if (!CHEAPER_MULT_COST (&best_cost, cost_limit))
2789     {
2790       /* We failed to find an algorithm.  Record alg_impossible for
2791          this case (that is, <T, MODE, COST_LIMIT>) so that next time
2792          we are asked to find an algorithm for T within the same or
2793          lower COST_LIMIT, we can immediately return to the
2794          caller.  */
2795       alg_hash[hash_index].t = t;
2796       alg_hash[hash_index].mode = mode;
2797       alg_hash[hash_index].speed = speed;
2798       alg_hash[hash_index].alg = alg_impossible;
2799       alg_hash[hash_index].cost = *cost_limit;
2800       return;
2801     }
2802
2803   /* Cache the result.  */
2804   if (!cache_hit)
2805     {
2806       alg_hash[hash_index].t = t;
2807       alg_hash[hash_index].mode = mode;
2808       alg_hash[hash_index].speed = speed;
2809       alg_hash[hash_index].alg = best_alg->op[best_alg->ops];
2810       alg_hash[hash_index].cost.cost = best_cost.cost;
2811       alg_hash[hash_index].cost.latency = best_cost.latency;
2812     }
2813
2814   /* If we are getting a too long sequence for `struct algorithm'
2815      to record, make this search fail.  */
2816   if (best_alg->ops == MAX_BITS_PER_WORD)
2817     return;
2818
2819   /* Copy the algorithm from temporary space to the space at alg_out.
2820      We avoid using structure assignment because the majority of
2821      best_alg is normally undefined, and this is a critical function.  */
2822   alg_out->ops = best_alg->ops + 1;
2823   alg_out->cost = best_cost;
2824   memcpy (alg_out->op, best_alg->op,
2825           alg_out->ops * sizeof *alg_out->op);
2826   memcpy (alg_out->log, best_alg->log,
2827           alg_out->ops * sizeof *alg_out->log);
2828 }
2829 \f
2830 /* Find the cheapest way of multiplying a value of mode MODE by VAL.
2831    Try three variations:
2832
2833        - a shift/add sequence based on VAL itself
2834        - a shift/add sequence based on -VAL, followed by a negation
2835        - a shift/add sequence based on VAL - 1, followed by an addition.
2836
2837    Return true if the cheapest of these cost less than MULT_COST,
2838    describing the algorithm in *ALG and final fixup in *VARIANT.  */
2839
2840 static bool
2841 choose_mult_variant (enum machine_mode mode, HOST_WIDE_INT val,
2842                      struct algorithm *alg, enum mult_variant *variant,
2843                      int mult_cost)
2844 {
2845   struct algorithm alg2;
2846   struct mult_cost limit;
2847   int op_cost;
2848   bool speed = optimize_insn_for_speed_p ();
2849
2850   /* Fail quickly for impossible bounds.  */
2851   if (mult_cost < 0)
2852     return false;
2853
2854   /* Ensure that mult_cost provides a reasonable upper bound.
2855      Any constant multiplication can be performed with less
2856      than 2 * bits additions.  */
2857   op_cost = 2 * GET_MODE_BITSIZE (mode) * add_cost[speed][mode];
2858   if (mult_cost > op_cost)
2859     mult_cost = op_cost;
2860
2861   *variant = basic_variant;
2862   limit.cost = mult_cost;
2863   limit.latency = mult_cost;
2864   synth_mult (alg, val, &limit, mode);
2865
2866   /* This works only if the inverted value actually fits in an
2867      `unsigned int' */
2868   if (HOST_BITS_PER_INT >= GET_MODE_BITSIZE (mode))
2869     {
2870       op_cost = neg_cost[speed][mode];
2871       if (MULT_COST_LESS (&alg->cost, mult_cost))
2872         {
2873           limit.cost = alg->cost.cost - op_cost;
2874           limit.latency = alg->cost.latency - op_cost;
2875         }
2876       else
2877         {
2878           limit.cost = mult_cost - op_cost;
2879           limit.latency = mult_cost - op_cost;
2880         }
2881
2882       synth_mult (&alg2, -val, &limit, mode);
2883       alg2.cost.cost += op_cost;
2884       alg2.cost.latency += op_cost;
2885       if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
2886         *alg = alg2, *variant = negate_variant;
2887     }
2888
2889   /* This proves very useful for division-by-constant.  */
2890   op_cost = add_cost[speed][mode];
2891   if (MULT_COST_LESS (&alg->cost, mult_cost))
2892     {
2893       limit.cost = alg->cost.cost - op_cost;
2894       limit.latency = alg->cost.latency - op_cost;
2895     }
2896   else
2897     {
2898       limit.cost = mult_cost - op_cost;
2899       limit.latency = mult_cost - op_cost;
2900     }
2901
2902   synth_mult (&alg2, val - 1, &limit, mode);
2903   alg2.cost.cost += op_cost;
2904   alg2.cost.latency += op_cost;
2905   if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
2906     *alg = alg2, *variant = add_variant;
2907
2908   return MULT_COST_LESS (&alg->cost, mult_cost);
2909 }
2910
2911 /* A subroutine of expand_mult, used for constant multiplications.
2912    Multiply OP0 by VAL in mode MODE, storing the result in TARGET if
2913    convenient.  Use the shift/add sequence described by ALG and apply
2914    the final fixup specified by VARIANT.  */
2915
2916 static rtx
2917 expand_mult_const (enum machine_mode mode, rtx op0, HOST_WIDE_INT val,
2918                    rtx target, const struct algorithm *alg,
2919                    enum mult_variant variant)
2920 {
2921   HOST_WIDE_INT val_so_far;
2922   rtx insn, accum, tem;
2923   int opno;
2924   enum machine_mode nmode;
2925
2926   /* Avoid referencing memory over and over and invalid sharing
2927      on SUBREGs.  */
2928   op0 = force_reg (mode, op0);
2929
2930   /* ACCUM starts out either as OP0 or as a zero, depending on
2931      the first operation.  */
2932
2933   if (alg->op[0] == alg_zero)
2934     {
2935       accum = copy_to_mode_reg (mode, const0_rtx);
2936       val_so_far = 0;
2937     }
2938   else if (alg->op[0] == alg_m)
2939     {
2940       accum = copy_to_mode_reg (mode, op0);
2941       val_so_far = 1;
2942     }
2943   else
2944     gcc_unreachable ();
2945
2946   for (opno = 1; opno < alg->ops; opno++)
2947     {
2948       int log = alg->log[opno];
2949       rtx shift_subtarget = optimize ? 0 : accum;
2950       rtx add_target
2951         = (opno == alg->ops - 1 && target != 0 && variant != add_variant
2952            && !optimize)
2953           ? target : 0;
2954       rtx accum_target = optimize ? 0 : accum;
2955       rtx accum_inner;
2956
2957       switch (alg->op[opno])
2958         {
2959         case alg_shift:
2960           tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
2961           /* REG_EQUAL note will be attached to the following insn.  */
2962           emit_move_insn (accum, tem);
2963           val_so_far <<= log;
2964           break;
2965
2966         case alg_add_t_m2:
2967           tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
2968           accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
2969                                  add_target ? add_target : accum_target);
2970           val_so_far += (HOST_WIDE_INT) 1 << log;
2971           break;
2972
2973         case alg_sub_t_m2:
2974           tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
2975           accum = force_operand (gen_rtx_MINUS (mode, accum, tem),
2976                                  add_target ? add_target : accum_target);
2977           val_so_far -= (HOST_WIDE_INT) 1 << log;
2978           break;
2979
2980         case alg_add_t2_m:
2981           accum = expand_shift (LSHIFT_EXPR, mode, accum,
2982                                 log, shift_subtarget, 0);
2983           accum = force_operand (gen_rtx_PLUS (mode, accum, op0),
2984                                  add_target ? add_target : accum_target);
2985           val_so_far = (val_so_far << log) + 1;
2986           break;
2987
2988         case alg_sub_t2_m:
2989           accum = expand_shift (LSHIFT_EXPR, mode, accum,
2990                                 log, shift_subtarget, 0);
2991           accum = force_operand (gen_rtx_MINUS (mode, accum, op0),
2992                                  add_target ? add_target : accum_target);
2993           val_so_far = (val_so_far << log) - 1;
2994           break;
2995
2996         case alg_add_factor:
2997           tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
2998           accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
2999                                  add_target ? add_target : accum_target);
3000           val_so_far += val_so_far << log;
3001           break;
3002
3003         case alg_sub_factor:
3004           tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3005           accum = force_operand (gen_rtx_MINUS (mode, tem, accum),
3006                                  (add_target
3007                                   ? add_target : (optimize ? 0 : tem)));
3008           val_so_far = (val_so_far << log) - val_so_far;
3009           break;
3010
3011         default:
3012           gcc_unreachable ();
3013         }
3014
3015       /* Write a REG_EQUAL note on the last insn so that we can cse
3016          multiplication sequences.  Note that if ACCUM is a SUBREG,
3017          we've set the inner register and must properly indicate
3018          that.  */
3019
3020       tem = op0, nmode = mode;
3021       accum_inner = accum;
3022       if (GET_CODE (accum) == SUBREG)
3023         {
3024           accum_inner = SUBREG_REG (accum);
3025           nmode = GET_MODE (accum_inner);
3026           tem = gen_lowpart (nmode, op0);
3027         }
3028
3029       insn = get_last_insn ();
3030       set_dst_reg_note (insn, REG_EQUAL,
3031                         gen_rtx_MULT (nmode, tem, GEN_INT (val_so_far)),
3032                         accum_inner);
3033     }
3034
3035   if (variant == negate_variant)
3036     {
3037       val_so_far = -val_so_far;
3038       accum = expand_unop (mode, neg_optab, accum, target, 0);
3039     }
3040   else if (variant == add_variant)
3041     {
3042       val_so_far = val_so_far + 1;
3043       accum = force_operand (gen_rtx_PLUS (mode, accum, op0), target);
3044     }
3045
3046   /* Compare only the bits of val and val_so_far that are significant
3047      in the result mode, to avoid sign-/zero-extension confusion.  */
3048   val &= GET_MODE_MASK (mode);
3049   val_so_far &= GET_MODE_MASK (mode);
3050   gcc_assert (val == val_so_far);
3051
3052   return accum;
3053 }
3054
3055 /* Perform a multiplication and return an rtx for the result.
3056    MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3057    TARGET is a suggestion for where to store the result (an rtx).
3058
3059    We check specially for a constant integer as OP1.
3060    If you want this check for OP0 as well, then before calling
3061    you should swap the two operands if OP0 would be constant.  */
3062
3063 rtx
3064 expand_mult (enum machine_mode mode, rtx op0, rtx op1, rtx target,
3065              int unsignedp)
3066 {
3067   enum mult_variant variant;
3068   struct algorithm algorithm;
3069   int max_cost;
3070   bool speed = optimize_insn_for_speed_p ();
3071
3072   /* Handling const0_rtx here allows us to use zero as a rogue value for
3073      coeff below.  */
3074   if (op1 == const0_rtx)
3075     return const0_rtx;
3076   if (op1 == const1_rtx)
3077     return op0;
3078   if (op1 == constm1_rtx)
3079     return expand_unop (mode,
3080                         GET_MODE_CLASS (mode) == MODE_INT
3081                         && !unsignedp && flag_trapv
3082                         ? negv_optab : neg_optab,
3083                         op0, target, 0);
3084
3085   /* These are the operations that are potentially turned into a sequence
3086      of shifts and additions.  */
3087   if (SCALAR_INT_MODE_P (mode)
3088       && (unsignedp || !flag_trapv))
3089     {
3090       HOST_WIDE_INT coeff = 0;
3091       rtx fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3092
3093       /* synth_mult does an `unsigned int' multiply.  As long as the mode is
3094          less than or equal in size to `unsigned int' this doesn't matter.
3095          If the mode is larger than `unsigned int', then synth_mult works
3096          only if the constant value exactly fits in an `unsigned int' without
3097          any truncation.  This means that multiplying by negative values does
3098          not work; results are off by 2^32 on a 32 bit machine.  */
3099
3100       if (CONST_INT_P (op1))
3101         {
3102           /* Attempt to handle multiplication of DImode values by negative
3103              coefficients, by performing the multiplication by a positive
3104              multiplier and then inverting the result.  */
3105           if (INTVAL (op1) < 0
3106               && GET_MODE_BITSIZE (mode) > HOST_BITS_PER_WIDE_INT)
3107             {
3108               /* Its safe to use -INTVAL (op1) even for INT_MIN, as the
3109                  result is interpreted as an unsigned coefficient.
3110                  Exclude cost of op0 from max_cost to match the cost
3111                  calculation of the synth_mult.  */
3112               max_cost = (set_src_cost (gen_rtx_MULT (mode, fake_reg, op1),
3113                                         speed)
3114                           - neg_cost[speed][mode]);
3115               if (max_cost > 0
3116                   && choose_mult_variant (mode, -INTVAL (op1), &algorithm,
3117                                           &variant, max_cost))
3118                 {
3119                   rtx temp = expand_mult_const (mode, op0, -INTVAL (op1),
3120                                                 NULL_RTX, &algorithm,
3121                                                 variant);
3122                   return expand_unop (mode, neg_optab, temp, target, 0);
3123                 }
3124             }
3125           else coeff = INTVAL (op1);
3126         }
3127       else if (GET_CODE (op1) == CONST_DOUBLE)
3128         {
3129           /* If we are multiplying in DImode, it may still be a win
3130              to try to work with shifts and adds.  */
3131           if (CONST_DOUBLE_HIGH (op1) == 0
3132               && CONST_DOUBLE_LOW (op1) > 0)
3133             coeff = CONST_DOUBLE_LOW (op1);
3134           else if (CONST_DOUBLE_LOW (op1) == 0
3135                    && EXACT_POWER_OF_2_OR_ZERO_P (CONST_DOUBLE_HIGH (op1)))
3136             {
3137               int shift = floor_log2 (CONST_DOUBLE_HIGH (op1))
3138                           + HOST_BITS_PER_WIDE_INT;
3139               return expand_shift (LSHIFT_EXPR, mode, op0,
3140                                    shift, target, unsignedp);
3141             }
3142         }
3143
3144       /* We used to test optimize here, on the grounds that it's better to
3145          produce a smaller program when -O is not used.  But this causes
3146          such a terrible slowdown sometimes that it seems better to always
3147          use synth_mult.  */
3148       if (coeff != 0)
3149         {
3150           /* Special case powers of two.  */
3151           if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3152             return expand_shift (LSHIFT_EXPR, mode, op0,
3153                                  floor_log2 (coeff), target, unsignedp);
3154
3155           /* Exclude cost of op0 from max_cost to match the cost
3156              calculation of the synth_mult.  */
3157           max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, op1), speed);
3158           if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3159                                    max_cost))
3160             return expand_mult_const (mode, op0, coeff, target,
3161                                       &algorithm, variant);
3162         }
3163     }
3164
3165   if (GET_CODE (op0) == CONST_DOUBLE)
3166     {
3167       rtx temp = op0;
3168       op0 = op1;
3169       op1 = temp;
3170     }
3171
3172   /* Expand x*2.0 as x+x.  */
3173   if (GET_CODE (op1) == CONST_DOUBLE
3174       && SCALAR_FLOAT_MODE_P (mode))
3175     {
3176       REAL_VALUE_TYPE d;
3177       REAL_VALUE_FROM_CONST_DOUBLE (d, op1);
3178
3179       if (REAL_VALUES_EQUAL (d, dconst2))
3180         {
3181           op0 = force_reg (GET_MODE (op0), op0);
3182           return expand_binop (mode, add_optab, op0, op0,
3183                                target, unsignedp, OPTAB_LIB_WIDEN);
3184         }
3185     }
3186
3187   /* This used to use umul_optab if unsigned, but for non-widening multiply
3188      there is no difference between signed and unsigned.  */
3189   op0 = expand_binop (mode,
3190                       ! unsignedp
3191                       && flag_trapv && (GET_MODE_CLASS(mode) == MODE_INT)
3192                       ? smulv_optab : smul_optab,
3193                       op0, op1, target, unsignedp, OPTAB_LIB_WIDEN);
3194   gcc_assert (op0);
3195   return op0;
3196 }
3197
3198 /* Perform a widening multiplication and return an rtx for the result.
3199    MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3200    TARGET is a suggestion for where to store the result (an rtx).
3201    THIS_OPTAB is the optab we should use, it must be either umul_widen_optab
3202    or smul_widen_optab.
3203
3204    We check specially for a constant integer as OP1, comparing the
3205    cost of a widening multiply against the cost of a sequence of shifts
3206    and adds.  */
3207
3208 rtx
3209 expand_widening_mult (enum machine_mode mode, rtx op0, rtx op1, rtx target,
3210                       int unsignedp, optab this_optab)
3211 {
3212   bool speed = optimize_insn_for_speed_p ();
3213   rtx cop1;
3214
3215   if (CONST_INT_P (op1)
3216       && GET_MODE (op0) != VOIDmode
3217       && (cop1 = convert_modes (mode, GET_MODE (op0), op1,
3218                                 this_optab == umul_widen_optab))
3219       && CONST_INT_P (cop1)
3220       && (INTVAL (cop1) >= 0
3221           || HWI_COMPUTABLE_MODE_P (mode)))
3222     {
3223       HOST_WIDE_INT coeff = INTVAL (cop1);
3224       int max_cost;
3225       enum mult_variant variant;
3226       struct algorithm algorithm;
3227
3228       /* Special case powers of two.  */
3229       if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3230         {
3231           op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3232           return expand_shift (LSHIFT_EXPR, mode, op0,
3233                                floor_log2 (coeff), target, unsignedp);
3234         }
3235
3236       /* Exclude cost of op0 from max_cost to match the cost
3237          calculation of the synth_mult.  */
3238       max_cost = mul_widen_cost[speed][mode];
3239       if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3240                                max_cost))
3241         {
3242           op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3243           return expand_mult_const (mode, op0, coeff, target,
3244                                     &algorithm, variant);
3245         }
3246     }
3247   return expand_binop (mode, this_optab, op0, op1, target,
3248                        unsignedp, OPTAB_LIB_WIDEN);
3249 }
3250 \f
3251 /* Return the smallest n such that 2**n >= X.  */
3252
3253 int
3254 ceil_log2 (unsigned HOST_WIDE_INT x)
3255 {
3256   return floor_log2 (x - 1) + 1;
3257 }
3258
3259 /* Choose a minimal N + 1 bit approximation to 1/D that can be used to
3260    replace division by D, and put the least significant N bits of the result
3261    in *MULTIPLIER_PTR and return the most significant bit.
3262
3263    The width of operations is N (should be <= HOST_BITS_PER_WIDE_INT), the
3264    needed precision is in PRECISION (should be <= N).
3265
3266    PRECISION should be as small as possible so this function can choose
3267    multiplier more freely.
3268
3269    The rounded-up logarithm of D is placed in *lgup_ptr.  A shift count that
3270    is to be used for a final right shift is placed in *POST_SHIFT_PTR.
3271
3272    Using this function, x/D will be equal to (x * m) >> (*POST_SHIFT_PTR),
3273    where m is the full HOST_BITS_PER_WIDE_INT + 1 bit multiplier.  */
3274
3275 static
3276 unsigned HOST_WIDE_INT
3277 choose_multiplier (unsigned HOST_WIDE_INT d, int n, int precision,
3278                    rtx *multiplier_ptr, int *post_shift_ptr, int *lgup_ptr)
3279 {
3280   HOST_WIDE_INT mhigh_hi, mlow_hi;
3281   unsigned HOST_WIDE_INT mhigh_lo, mlow_lo;
3282   int lgup, post_shift;
3283   int pow, pow2;
3284   unsigned HOST_WIDE_INT nl, dummy1;
3285   HOST_WIDE_INT nh, dummy2;
3286
3287   /* lgup = ceil(log2(divisor)); */
3288   lgup = ceil_log2 (d);
3289
3290   gcc_assert (lgup <= n);
3291
3292   pow = n + lgup;
3293   pow2 = n + lgup - precision;
3294
3295   /* We could handle this with some effort, but this case is much
3296      better handled directly with a scc insn, so rely on caller using
3297      that.  */
3298   gcc_assert (pow != 2 * HOST_BITS_PER_WIDE_INT);
3299
3300   /* mlow = 2^(N + lgup)/d */
3301  if (pow >= HOST_BITS_PER_WIDE_INT)
3302     {
3303       nh = (HOST_WIDE_INT) 1 << (pow - HOST_BITS_PER_WIDE_INT);
3304       nl = 0;
3305     }
3306   else
3307     {
3308       nh = 0;
3309       nl = (unsigned HOST_WIDE_INT) 1 << pow;
3310     }
3311   div_and_round_double (TRUNC_DIV_EXPR, 1, nl, nh, d, (HOST_WIDE_INT) 0,
3312                         &mlow_lo, &mlow_hi, &dummy1, &dummy2);
3313
3314   /* mhigh = (2^(N + lgup) + 2^N + lgup - precision)/d */
3315   if (pow2 >= HOST_BITS_PER_WIDE_INT)
3316     nh |= (HOST_WIDE_INT) 1 << (pow2 - HOST_BITS_PER_WIDE_INT);
3317   else
3318     nl |= (unsigned HOST_WIDE_INT) 1 << pow2;
3319   div_and_round_double (TRUNC_DIV_EXPR, 1, nl, nh, d, (HOST_WIDE_INT) 0,
3320                         &mhigh_lo, &mhigh_hi, &dummy1, &dummy2);
3321
3322   gcc_assert (!mhigh_hi || nh - d < d);
3323   gcc_assert (mhigh_hi <= 1 && mlow_hi <= 1);
3324   /* Assert that mlow < mhigh.  */
3325   gcc_assert (mlow_hi < mhigh_hi
3326               || (mlow_hi == mhigh_hi && mlow_lo < mhigh_lo));
3327
3328   /* If precision == N, then mlow, mhigh exceed 2^N
3329      (but they do not exceed 2^(N+1)).  */
3330
3331   /* Reduce to lowest terms.  */
3332   for (post_shift = lgup; post_shift > 0; post_shift--)
3333     {
3334       unsigned HOST_WIDE_INT ml_lo = (mlow_hi << (HOST_BITS_PER_WIDE_INT - 1)) | (mlow_lo >> 1);
3335       unsigned HOST_WIDE_INT mh_lo = (mhigh_hi << (HOST_BITS_PER_WIDE_INT - 1)) | (mhigh_lo >> 1);
3336       if (ml_lo >= mh_lo)
3337         break;
3338
3339       mlow_hi = 0;
3340       mlow_lo = ml_lo;
3341       mhigh_hi = 0;
3342       mhigh_lo = mh_lo;
3343     }
3344
3345   *post_shift_ptr = post_shift;
3346   *lgup_ptr = lgup;
3347   if (n < HOST_BITS_PER_WIDE_INT)
3348     {
3349       unsigned HOST_WIDE_INT mask = ((unsigned HOST_WIDE_INT) 1 << n) - 1;
3350       *multiplier_ptr = GEN_INT (mhigh_lo & mask);
3351       return mhigh_lo >= mask;
3352     }
3353   else
3354     {
3355       *multiplier_ptr = GEN_INT (mhigh_lo);
3356       return mhigh_hi;
3357     }
3358 }
3359
3360 /* Compute the inverse of X mod 2**n, i.e., find Y such that X * Y is
3361    congruent to 1 (mod 2**N).  */
3362
3363 static unsigned HOST_WIDE_INT
3364 invert_mod2n (unsigned HOST_WIDE_INT x, int n)
3365 {
3366   /* Solve x*y == 1 (mod 2^n), where x is odd.  Return y.  */
3367
3368   /* The algorithm notes that the choice y = x satisfies
3369      x*y == 1 mod 2^3, since x is assumed odd.
3370      Each iteration doubles the number of bits of significance in y.  */
3371
3372   unsigned HOST_WIDE_INT mask;
3373   unsigned HOST_WIDE_INT y = x;
3374   int nbit = 3;
3375
3376   mask = (n == HOST_BITS_PER_WIDE_INT
3377           ? ~(unsigned HOST_WIDE_INT) 0
3378           : ((unsigned HOST_WIDE_INT) 1 << n) - 1);
3379
3380   while (nbit < n)
3381     {
3382       y = y * (2 - x*y) & mask;         /* Modulo 2^N */
3383       nbit *= 2;
3384     }
3385   return y;
3386 }
3387
3388 /* Emit code to adjust ADJ_OPERAND after multiplication of wrong signedness
3389    flavor of OP0 and OP1.  ADJ_OPERAND is already the high half of the
3390    product OP0 x OP1.  If UNSIGNEDP is nonzero, adjust the signed product
3391    to become unsigned, if UNSIGNEDP is zero, adjust the unsigned product to
3392    become signed.
3393
3394    The result is put in TARGET if that is convenient.
3395
3396    MODE is the mode of operation.  */
3397
3398 rtx
3399 expand_mult_highpart_adjust (enum machine_mode mode, rtx adj_operand, rtx op0,
3400                              rtx op1, rtx target, int unsignedp)
3401 {
3402   rtx tem;
3403   enum rtx_code adj_code = unsignedp ? PLUS : MINUS;
3404
3405   tem = expand_shift (RSHIFT_EXPR, mode, op0,
3406                       GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3407   tem = expand_and (mode, tem, op1, NULL_RTX);
3408   adj_operand
3409     = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3410                      adj_operand);
3411
3412   tem = expand_shift (RSHIFT_EXPR, mode, op1,
3413                       GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3414   tem = expand_and (mode, tem, op0, NULL_RTX);
3415   target = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3416                           target);
3417
3418   return target;
3419 }
3420
3421 /* Subroutine of expand_mult_highpart.  Return the MODE high part of OP.  */
3422
3423 static rtx
3424 extract_high_half (enum machine_mode mode, rtx op)
3425 {
3426   enum machine_mode wider_mode;
3427
3428   if (mode == word_mode)
3429     return gen_highpart (mode, op);
3430
3431   gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3432
3433   wider_mode = GET_MODE_WIDER_MODE (mode);
3434   op = expand_shift (RSHIFT_EXPR, wider_mode, op,
3435                      GET_MODE_BITSIZE (mode), 0, 1);
3436   return convert_modes (mode, wider_mode, op, 0);
3437 }
3438
3439 /* Like expand_mult_highpart, but only consider using a multiplication
3440    optab.  OP1 is an rtx for the constant operand.  */
3441
3442 static rtx
3443 expand_mult_highpart_optab (enum machine_mode mode, rtx op0, rtx op1,
3444                             rtx target, int unsignedp, int max_cost)
3445 {
3446   rtx narrow_op1 = gen_int_mode (INTVAL (op1), mode);
3447   enum machine_mode wider_mode;
3448   optab moptab;
3449   rtx tem;
3450   int size;
3451   bool speed = optimize_insn_for_speed_p ();
3452
3453   gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3454
3455   wider_mode = GET_MODE_WIDER_MODE (mode);
3456   size = GET_MODE_BITSIZE (mode);
3457
3458   /* Firstly, try using a multiplication insn that only generates the needed
3459      high part of the product, and in the sign flavor of unsignedp.  */
3460   if (mul_highpart_cost[speed][mode] < max_cost)
3461     {
3462       moptab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
3463       tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3464                           unsignedp, OPTAB_DIRECT);
3465       if (tem)
3466         return tem;
3467     }
3468
3469   /* Secondly, same as above, but use sign flavor opposite of unsignedp.
3470      Need to adjust the result after the multiplication.  */
3471   if (size - 1 < BITS_PER_WORD
3472       && (mul_highpart_cost[speed][mode] + 2 * shift_cost[speed][mode][size-1]
3473           + 4 * add_cost[speed][mode] < max_cost))
3474     {
3475       moptab = unsignedp ? smul_highpart_optab : umul_highpart_optab;
3476       tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3477                           unsignedp, OPTAB_DIRECT);
3478       if (tem)
3479         /* We used the wrong signedness.  Adjust the result.  */
3480         return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3481                                             tem, unsignedp);
3482     }
3483
3484   /* Try widening multiplication.  */
3485   moptab = unsignedp ? umul_widen_optab : smul_widen_optab;
3486   if (widening_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3487       && mul_widen_cost[speed][wider_mode] < max_cost)
3488     {
3489       tem = expand_binop (wider_mode, moptab, op0, narrow_op1, 0,
3490                           unsignedp, OPTAB_WIDEN);
3491       if (tem)
3492         return extract_high_half (mode, tem);
3493     }
3494
3495   /* Try widening the mode and perform a non-widening multiplication.  */
3496   if (optab_handler (smul_optab, wider_mode) != CODE_FOR_nothing
3497       && size - 1 < BITS_PER_WORD
3498       && mul_cost[speed][wider_mode] + shift_cost[speed][mode][size-1] < max_cost)
3499     {
3500       rtx insns, wop0, wop1;
3501
3502       /* We need to widen the operands, for example to ensure the
3503          constant multiplier is correctly sign or zero extended.
3504          Use a sequence to clean-up any instructions emitted by
3505          the conversions if things don't work out.  */
3506       start_sequence ();
3507       wop0 = convert_modes (wider_mode, mode, op0, unsignedp);
3508       wop1 = convert_modes (wider_mode, mode, op1, unsignedp);
3509       tem = expand_binop (wider_mode, smul_optab, wop0, wop1, 0,
3510                           unsignedp, OPTAB_WIDEN);
3511       insns = get_insns ();
3512       end_sequence ();
3513
3514       if (tem)
3515         {
3516           emit_insn (insns);
3517           return extract_high_half (mode, tem);
3518         }
3519     }
3520
3521   /* Try widening multiplication of opposite signedness, and adjust.  */
3522   moptab = unsignedp ? smul_widen_optab : umul_widen_optab;
3523   if (widening_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3524       && size - 1 < BITS_PER_WORD
3525       && (mul_widen_cost[speed][wider_mode] + 2 * shift_cost[speed][mode][size-1]
3526           + 4 * add_cost[speed][mode] < max_cost))
3527     {
3528       tem = expand_binop (wider_mode, moptab, op0, narrow_op1,
3529                           NULL_RTX, ! unsignedp, OPTAB_WIDEN);
3530       if (tem != 0)
3531         {
3532           tem = extract_high_half (mode, tem);
3533           /* We used the wrong signedness.  Adjust the result.  */
3534           return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3535                                               target, unsignedp);
3536         }
3537     }
3538
3539   return 0;
3540 }
3541
3542 /* Emit code to multiply OP0 and OP1 (where OP1 is an integer constant),
3543    putting the high half of the result in TARGET if that is convenient,
3544    and return where the result is.  If the operation can not be performed,
3545    0 is returned.
3546
3547    MODE is the mode of operation and result.
3548
3549    UNSIGNEDP nonzero means unsigned multiply.
3550
3551    MAX_COST is the total allowed cost for the expanded RTL.  */
3552
3553 static rtx
3554 expand_mult_highpart (enum machine_mode mode, rtx op0, rtx op1,
3555                       rtx target, int unsignedp, int max_cost)
3556 {
3557   enum machine_mode wider_mode = GET_MODE_WIDER_MODE (mode);
3558   unsigned HOST_WIDE_INT cnst1;
3559   int extra_cost;
3560   bool sign_adjust = false;
3561   enum mult_variant variant;
3562   struct algorithm alg;
3563   rtx tem;
3564   bool speed = optimize_insn_for_speed_p ();
3565
3566   gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3567   /* We can't support modes wider than HOST_BITS_PER_INT.  */
3568   gcc_assert (HWI_COMPUTABLE_MODE_P (mode));
3569
3570   cnst1 = INTVAL (op1) & GET_MODE_MASK (mode);
3571
3572   /* We can't optimize modes wider than BITS_PER_WORD.
3573      ??? We might be able to perform double-word arithmetic if
3574      mode == word_mode, however all the cost calculations in
3575      synth_mult etc. assume single-word operations.  */
3576   if (GET_MODE_BITSIZE (wider_mode) > BITS_PER_WORD)
3577     return expand_mult_highpart_optab (mode, op0, op1, target,
3578                                        unsignedp, max_cost);
3579
3580   extra_cost = shift_cost[speed][mode][GET_MODE_BITSIZE (mode) - 1];
3581
3582   /* Check whether we try to multiply by a negative constant.  */
3583   if (!unsignedp && ((cnst1 >> (GET_MODE_BITSIZE (mode) - 1)) & 1))
3584     {
3585       sign_adjust = true;
3586       extra_cost += add_cost[speed][mode];
3587     }
3588
3589   /* See whether shift/add multiplication is cheap enough.  */
3590   if (choose_mult_variant (wider_mode, cnst1, &alg, &variant,
3591                            max_cost - extra_cost))
3592     {
3593       /* See whether the specialized multiplication optabs are
3594          cheaper than the shift/add version.  */
3595       tem = expand_mult_highpart_optab (mode, op0, op1, target, unsignedp,
3596                                         alg.cost.cost + extra_cost);
3597       if (tem)
3598         return tem;
3599
3600       tem = convert_to_mode (wider_mode, op0, unsignedp);
3601       tem = expand_mult_const (wider_mode, tem, cnst1, 0, &alg, variant);
3602       tem = extract_high_half (mode, tem);
3603
3604       /* Adjust result for signedness.  */
3605       if (sign_adjust)
3606         tem = force_operand (gen_rtx_MINUS (mode, tem, op0), tem);
3607
3608       return tem;
3609     }
3610   return expand_mult_highpart_optab (mode, op0, op1, target,
3611                                      unsignedp, max_cost);
3612 }
3613
3614
3615 /* Expand signed modulus of OP0 by a power of two D in mode MODE.  */
3616
3617 static rtx
3618 expand_smod_pow2 (enum machine_mode mode, rtx op0, HOST_WIDE_INT d)
3619 {
3620   unsigned HOST_WIDE_INT masklow, maskhigh;
3621   rtx result, temp, shift, label;
3622   int logd;
3623
3624   logd = floor_log2 (d);
3625   result = gen_reg_rtx (mode);
3626
3627   /* Avoid conditional branches when they're expensive.  */
3628   if (BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2
3629       && optimize_insn_for_speed_p ())
3630     {
3631       rtx signmask = emit_store_flag (result, LT, op0, const0_rtx,
3632                                       mode, 0, -1);
3633       if (signmask)
3634         {
3635           signmask = force_reg (mode, signmask);
3636           masklow = ((HOST_WIDE_INT) 1 << logd) - 1;
3637           shift = GEN_INT (GET_MODE_BITSIZE (mode) - logd);
3638
3639           /* Use the rtx_cost of a LSHIFTRT instruction to determine
3640              which instruction sequence to use.  If logical right shifts
3641              are expensive the use 2 XORs, 2 SUBs and an AND, otherwise
3642              use a LSHIFTRT, 1 ADD, 1 SUB and an AND.  */
3643
3644           temp = gen_rtx_LSHIFTRT (mode, result, shift);
3645           if (optab_handler (lshr_optab, mode) == CODE_FOR_nothing
3646               || (set_src_cost (temp, optimize_insn_for_speed_p ())
3647                   > COSTS_N_INSNS (2)))
3648             {
3649               temp = expand_binop (mode, xor_optab, op0, signmask,
3650                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3651               temp = expand_binop (mode, sub_optab, temp, signmask,
3652                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3653               temp = expand_binop (mode, and_optab, temp, GEN_INT (masklow),
3654                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3655               temp = expand_binop (mode, xor_optab, temp, signmask,
3656                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3657               temp = expand_binop (mode, sub_optab, temp, signmask,
3658                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3659             }
3660           else
3661             {
3662               signmask = expand_binop (mode, lshr_optab, signmask, shift,
3663                                        NULL_RTX, 1, OPTAB_LIB_WIDEN);
3664               signmask = force_reg (mode, signmask);
3665
3666               temp = expand_binop (mode, add_optab, op0, signmask,
3667                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3668               temp = expand_binop (mode, and_optab, temp, GEN_INT (masklow),
3669                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3670               temp = expand_binop (mode, sub_optab, temp, signmask,
3671                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3672             }
3673           return temp;
3674         }
3675     }
3676
3677   /* Mask contains the mode's signbit and the significant bits of the
3678      modulus.  By including the signbit in the operation, many targets
3679      can avoid an explicit compare operation in the following comparison
3680      against zero.  */
3681
3682   masklow = ((HOST_WIDE_INT) 1 << logd) - 1;
3683   if (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
3684     {
3685       masklow |= (HOST_WIDE_INT) -1 << (GET_MODE_BITSIZE (mode) - 1);
3686       maskhigh = -1;
3687     }
3688   else
3689     maskhigh = (HOST_WIDE_INT) -1
3690                  << (GET_MODE_BITSIZE (mode) - HOST_BITS_PER_WIDE_INT - 1);
3691
3692   temp = expand_binop (mode, and_optab, op0,
3693                        immed_double_const (masklow, maskhigh, mode),
3694                        result, 1, OPTAB_LIB_WIDEN);
3695   if (temp != result)
3696     emit_move_insn (result, temp);
3697
3698   label = gen_label_rtx ();
3699   do_cmp_and_jump (result, const0_rtx, GE, mode, label);
3700
3701   temp = expand_binop (mode, sub_optab, result, const1_rtx, result,
3702                        0, OPTAB_LIB_WIDEN);
3703   masklow = (HOST_WIDE_INT) -1 << logd;
3704   maskhigh = -1;
3705   temp = expand_binop (mode, ior_optab, temp,
3706                        immed_double_const (masklow, maskhigh, mode),
3707                        result, 1, OPTAB_LIB_WIDEN);
3708   temp = expand_binop (mode, add_optab, temp, const1_rtx, result,
3709                        0, OPTAB_LIB_WIDEN);
3710   if (temp != result)
3711     emit_move_insn (result, temp);
3712   emit_label (label);
3713   return result;
3714 }
3715
3716 /* Expand signed division of OP0 by a power of two D in mode MODE.
3717    This routine is only called for positive values of D.  */
3718
3719 static rtx
3720 expand_sdiv_pow2 (enum machine_mode mode, rtx op0, HOST_WIDE_INT d)
3721 {
3722   rtx temp, label;
3723   int logd;
3724
3725   logd = floor_log2 (d);
3726
3727   if (d == 2
3728       && BRANCH_COST (optimize_insn_for_speed_p (),
3729                       false) >= 1)
3730     {
3731       temp = gen_reg_rtx (mode);
3732       temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, 1);
3733       temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3734                            0, OPTAB_LIB_WIDEN);
3735       return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3736     }
3737
3738 #ifdef HAVE_conditional_move
3739   if (BRANCH_COST (optimize_insn_for_speed_p (), false)
3740       >= 2)
3741     {
3742       rtx temp2;
3743
3744       /* ??? emit_conditional_move forces a stack adjustment via
3745          compare_from_rtx so, if the sequence is discarded, it will
3746          be lost.  Do it now instead.  */
3747       do_pending_stack_adjust ();
3748
3749       start_sequence ();
3750       temp2 = copy_to_mode_reg (mode, op0);
3751       temp = expand_binop (mode, add_optab, temp2, GEN_INT (d-1),
3752                            NULL_RTX, 0, OPTAB_LIB_WIDEN);
3753       temp = force_reg (mode, temp);
3754
3755       /* Construct "temp2 = (temp2 < 0) ? temp : temp2".  */
3756       temp2 = emit_conditional_move (temp2, LT, temp2, const0_rtx,
3757                                      mode, temp, temp2, mode, 0);
3758       if (temp2)
3759         {
3760           rtx seq = get_insns ();
3761           end_sequence ();
3762           emit_insn (seq);
3763           return expand_shift (RSHIFT_EXPR, mode, temp2, logd, NULL_RTX, 0);
3764         }
3765       end_sequence ();
3766     }
3767 #endif
3768
3769   if (BRANCH_COST (optimize_insn_for_speed_p (),
3770                    false) >= 2)
3771     {
3772       int ushift = GET_MODE_BITSIZE (mode) - logd;
3773
3774       temp = gen_reg_rtx (mode);
3775       temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, -1);
3776       if (shift_cost[optimize_insn_for_speed_p ()][mode][ushift] > COSTS_N_INSNS (1))
3777         temp = expand_binop (mode, and_optab, temp, GEN_INT (d - 1),
3778                              NULL_RTX, 0, OPTAB_LIB_WIDEN);
3779       else
3780         temp = expand_shift (RSHIFT_EXPR, mode, temp,
3781                              ushift, NULL_RTX, 1);
3782       temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3783                            0, OPTAB_LIB_WIDEN);
3784       return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3785     }
3786
3787   label = gen_label_rtx ();
3788   temp = copy_to_mode_reg (mode, op0);
3789   do_cmp_and_jump (temp, const0_rtx, GE, mode, label);
3790   expand_inc (temp, GEN_INT (d - 1));
3791   emit_label (label);
3792   return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3793 }
3794 \f
3795 /* Emit the code to divide OP0 by OP1, putting the result in TARGET
3796    if that is convenient, and returning where the result is.
3797    You may request either the quotient or the remainder as the result;
3798    specify REM_FLAG nonzero to get the remainder.
3799
3800    CODE is the expression code for which kind of division this is;
3801    it controls how rounding is done.  MODE is the machine mode to use.
3802    UNSIGNEDP nonzero means do unsigned division.  */
3803
3804 /* ??? For CEIL_MOD_EXPR, can compute incorrect remainder with ANDI
3805    and then correct it by or'ing in missing high bits
3806    if result of ANDI is nonzero.
3807    For ROUND_MOD_EXPR, can use ANDI and then sign-extend the result.
3808    This could optimize to a bfexts instruction.
3809    But C doesn't use these operations, so their optimizations are
3810    left for later.  */
3811 /* ??? For modulo, we don't actually need the highpart of the first product,
3812    the low part will do nicely.  And for small divisors, the second multiply
3813    can also be a low-part only multiply or even be completely left out.
3814    E.g. to calculate the remainder of a division by 3 with a 32 bit
3815    multiply, multiply with 0x55555556 and extract the upper two bits;
3816    the result is exact for inputs up to 0x1fffffff.
3817    The input range can be reduced by using cross-sum rules.
3818    For odd divisors >= 3, the following table gives right shift counts
3819    so that if a number is shifted by an integer multiple of the given
3820    amount, the remainder stays the same:
3821    2, 4, 3, 6, 10, 12, 4, 8, 18, 6, 11, 20, 18, 0, 5, 10, 12, 0, 12, 20,
3822    14, 12, 23, 21, 8, 0, 20, 18, 0, 0, 6, 12, 0, 22, 0, 18, 20, 30, 0, 0,
3823    0, 8, 0, 11, 12, 10, 36, 0, 30, 0, 0, 12, 0, 0, 0, 0, 44, 12, 24, 0,
3824    20, 0, 7, 14, 0, 18, 36, 0, 0, 46, 60, 0, 42, 0, 15, 24, 20, 0, 0, 33,
3825    0, 20, 0, 0, 18, 0, 60, 0, 0, 0, 0, 0, 40, 18, 0, 0, 12
3826
3827    Cross-sum rules for even numbers can be derived by leaving as many bits
3828    to the right alone as the divisor has zeros to the right.
3829    E.g. if x is an unsigned 32 bit number:
3830    (x mod 12) == (((x & 1023) + ((x >> 8) & ~3)) * 0x15555558 >> 2 * 3) >> 28
3831    */
3832
3833 rtx
3834 expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode,
3835                rtx op0, rtx op1, rtx target, int unsignedp)
3836 {
3837   enum machine_mode compute_mode;
3838   rtx tquotient;
3839   rtx quotient = 0, remainder = 0;
3840   rtx last;
3841   int size;
3842   rtx insn;
3843   optab optab1, optab2;
3844   int op1_is_constant, op1_is_pow2 = 0;
3845   int max_cost, extra_cost;
3846   static HOST_WIDE_INT last_div_const = 0;
3847   static HOST_WIDE_INT ext_op1;
3848   bool speed = optimize_insn_for_speed_p ();
3849
3850   op1_is_constant = CONST_INT_P (op1);
3851   if (op1_is_constant)
3852     {
3853       ext_op1 = INTVAL (op1);
3854       if (unsignedp)
3855         ext_op1 &= GET_MODE_MASK (mode);
3856       op1_is_pow2 = ((EXACT_POWER_OF_2_OR_ZERO_P (ext_op1)
3857                      || (! unsignedp && EXACT_POWER_OF_2_OR_ZERO_P (-ext_op1))));
3858     }
3859
3860   /*
3861      This is the structure of expand_divmod:
3862
3863      First comes code to fix up the operands so we can perform the operations
3864      correctly and efficiently.
3865
3866      Second comes a switch statement with code specific for each rounding mode.
3867      For some special operands this code emits all RTL for the desired
3868      operation, for other cases, it generates only a quotient and stores it in
3869      QUOTIENT.  The case for trunc division/remainder might leave quotient = 0,
3870      to indicate that it has not done anything.
3871
3872      Last comes code that finishes the operation.  If QUOTIENT is set and
3873      REM_FLAG is set, the remainder is computed as OP0 - QUOTIENT * OP1.  If
3874      QUOTIENT is not set, it is computed using trunc rounding.
3875
3876      We try to generate special code for division and remainder when OP1 is a
3877      constant.  If |OP1| = 2**n we can use shifts and some other fast
3878      operations.  For other values of OP1, we compute a carefully selected
3879      fixed-point approximation m = 1/OP1, and generate code that multiplies OP0
3880      by m.
3881
3882      In all cases but EXACT_DIV_EXPR, this multiplication requires the upper
3883      half of the product.  Different strategies for generating the product are
3884      implemented in expand_mult_highpart.
3885
3886      If what we actually want is the remainder, we generate that by another
3887      by-constant multiplication and a subtraction.  */
3888
3889   /* We shouldn't be called with OP1 == const1_rtx, but some of the
3890      code below will malfunction if we are, so check here and handle
3891      the special case if so.  */
3892   if (op1 == const1_rtx)
3893     return rem_flag ? const0_rtx : op0;
3894
3895     /* When dividing by -1, we could get an overflow.
3896      negv_optab can handle overflows.  */
3897   if (! unsignedp && op1 == constm1_rtx)
3898     {
3899       if (rem_flag)
3900         return const0_rtx;
3901       return expand_unop (mode, flag_trapv && GET_MODE_CLASS(mode) == MODE_INT
3902                           ? negv_optab : neg_optab, op0, target, 0);
3903     }
3904
3905   if (target
3906       /* Don't use the function value register as a target
3907          since we have to read it as well as write it,
3908          and function-inlining gets confused by this.  */
3909       && ((REG_P (target) && REG_FUNCTION_VALUE_P (target))
3910           /* Don't clobber an operand while doing a multi-step calculation.  */
3911           || ((rem_flag || op1_is_constant)
3912               && (reg_mentioned_p (target, op0)
3913                   || (MEM_P (op0) && MEM_P (target))))
3914           || reg_mentioned_p (target, op1)
3915           || (MEM_P (op1) && MEM_P (target))))
3916     target = 0;
3917
3918   /* Get the mode in which to perform this computation.  Normally it will
3919      be MODE, but sometimes we can't do the desired operation in MODE.
3920      If so, pick a wider mode in which we can do the operation.  Convert
3921      to that mode at the start to avoid repeated conversions.
3922
3923      First see what operations we need.  These depend on the expression
3924      we are evaluating.  (We assume that divxx3 insns exist under the
3925      same conditions that modxx3 insns and that these insns don't normally
3926      fail.  If these assumptions are not correct, we may generate less
3927      efficient code in some cases.)
3928
3929      Then see if we find a mode in which we can open-code that operation
3930      (either a division, modulus, or shift).  Finally, check for the smallest
3931      mode for which we can do the operation with a library call.  */
3932
3933   /* We might want to refine this now that we have division-by-constant
3934      optimization.  Since expand_mult_highpart tries so many variants, it is
3935      not straightforward to generalize this.  Maybe we should make an array
3936      of possible modes in init_expmed?  Save this for GCC 2.7.  */
3937
3938   optab1 = ((op1_is_pow2 && op1 != const0_rtx)
3939             ? (unsignedp ? lshr_optab : ashr_optab)
3940             : (unsignedp ? udiv_optab : sdiv_optab));
3941   optab2 = ((op1_is_pow2 && op1 != const0_rtx)
3942             ? optab1
3943             : (unsignedp ? udivmod_optab : sdivmod_optab));
3944
3945   for (compute_mode = mode; compute_mode != VOIDmode;
3946        compute_mode = GET_MODE_WIDER_MODE (compute_mode))
3947     if (optab_handler (optab1, compute_mode) != CODE_FOR_nothing
3948         || optab_handler (optab2, compute_mode) != CODE_FOR_nothing)
3949       break;
3950
3951   if (compute_mode == VOIDmode)
3952     for (compute_mode = mode; compute_mode != VOIDmode;
3953          compute_mode = GET_MODE_WIDER_MODE (compute_mode))
3954       if (optab_libfunc (optab1, compute_mode)
3955           || optab_libfunc (optab2, compute_mode))
3956         break;
3957
3958   /* If we still couldn't find a mode, use MODE, but expand_binop will
3959      probably die.  */
3960   if (compute_mode == VOIDmode)
3961     compute_mode = mode;
3962
3963   if (target && GET_MODE (target) == compute_mode)
3964     tquotient = target;
3965   else
3966     tquotient = gen_reg_rtx (compute_mode);
3967
3968   size = GET_MODE_BITSIZE (compute_mode);
3969 #if 0
3970   /* It should be possible to restrict the precision to GET_MODE_BITSIZE
3971      (mode), and thereby get better code when OP1 is a constant.  Do that
3972      later.  It will require going over all usages of SIZE below.  */
3973   size = GET_MODE_BITSIZE (mode);
3974 #endif
3975
3976   /* Only deduct something for a REM if the last divide done was
3977      for a different constant.   Then set the constant of the last
3978      divide.  */
3979   max_cost = unsignedp ? udiv_cost[speed][compute_mode] : sdiv_cost[speed][compute_mode];
3980   if (rem_flag && ! (last_div_const != 0 && op1_is_constant
3981                      && INTVAL (op1) == last_div_const))
3982     max_cost -= mul_cost[speed][compute_mode] + add_cost[speed][compute_mode];
3983
3984   last_div_const = ! rem_flag && op1_is_constant ? INTVAL (op1) : 0;
3985
3986   /* Now convert to the best mode to use.  */
3987   if (compute_mode != mode)
3988     {
3989       op0 = convert_modes (compute_mode, mode, op0, unsignedp);
3990       op1 = convert_modes (compute_mode, mode, op1, unsignedp);
3991
3992       /* convert_modes may have placed op1 into a register, so we
3993          must recompute the following.  */
3994       op1_is_constant = CONST_INT_P (op1);
3995       op1_is_pow2 = (op1_is_constant
3996                      && ((EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
3997                           || (! unsignedp
3998                               && EXACT_POWER_OF_2_OR_ZERO_P (-INTVAL (op1)))))) ;
3999     }
4000
4001   /* If one of the operands is a volatile MEM, copy it into a register.  */
4002
4003   if (MEM_P (op0) && MEM_VOLATILE_P (op0))
4004     op0 = force_reg (compute_mode, op0);
4005   if (MEM_P (op1) && MEM_VOLATILE_P (op1))
4006     op1 = force_reg (compute_mode, op1);
4007
4008   /* If we need the remainder or if OP1 is constant, we need to
4009      put OP0 in a register in case it has any queued subexpressions.  */
4010   if (rem_flag || op1_is_constant)
4011     op0 = force_reg (compute_mode, op0);
4012
4013   last = get_last_insn ();
4014
4015   /* Promote floor rounding to trunc rounding for unsigned operations.  */
4016   if (unsignedp)
4017     {
4018       if (code == FLOOR_DIV_EXPR)
4019         code = TRUNC_DIV_EXPR;
4020       if (code == FLOOR_MOD_EXPR)
4021         code = TRUNC_MOD_EXPR;
4022       if (code == EXACT_DIV_EXPR && op1_is_pow2)
4023         code = TRUNC_DIV_EXPR;
4024     }
4025
4026   if (op1 != const0_rtx)
4027     switch (code)
4028       {
4029       case TRUNC_MOD_EXPR:
4030       case TRUNC_DIV_EXPR:
4031         if (op1_is_constant)
4032           {
4033             if (unsignedp)
4034               {
4035                 unsigned HOST_WIDE_INT mh;
4036                 int pre_shift, post_shift;
4037                 int dummy;
4038                 rtx ml;
4039                 unsigned HOST_WIDE_INT d = (INTVAL (op1)
4040                                             & GET_MODE_MASK (compute_mode));
4041
4042                 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4043                   {
4044                     pre_shift = floor_log2 (d);
4045                     if (rem_flag)
4046                       {
4047                         remainder
4048                           = expand_binop (compute_mode, and_optab, op0,
4049                                           GEN_INT (((HOST_WIDE_INT) 1 << pre_shift) - 1),
4050                                           remainder, 1,
4051                                           OPTAB_LIB_WIDEN);
4052                         if (remainder)
4053                           return gen_lowpart (mode, remainder);
4054                       }
4055                     quotient = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4056                                              pre_shift, tquotient, 1);
4057                   }
4058                 else if (size <= HOST_BITS_PER_WIDE_INT)
4059                   {
4060                     if (d >= ((unsigned HOST_WIDE_INT) 1 << (size - 1)))
4061                       {
4062                         /* Most significant bit of divisor is set; emit an scc
4063                            insn.  */
4064                         quotient = emit_store_flag_force (tquotient, GEU, op0, op1,
4065                                                           compute_mode, 1, 1);
4066                       }
4067                     else
4068                       {
4069                         /* Find a suitable multiplier and right shift count
4070                            instead of multiplying with D.  */
4071
4072                         mh = choose_multiplier (d, size, size,
4073                                                 &ml, &post_shift, &dummy);
4074
4075                         /* If the suggested multiplier is more than SIZE bits,
4076                            we can do better for even divisors, using an
4077                            initial right shift.  */
4078                         if (mh != 0 && (d & 1) == 0)
4079                           {
4080                             pre_shift = floor_log2 (d & -d);
4081                             mh = choose_multiplier (d >> pre_shift, size,
4082                                                     size - pre_shift,
4083                                                     &ml, &post_shift, &dummy);
4084                             gcc_assert (!mh);
4085                           }
4086                         else
4087                           pre_shift = 0;
4088
4089                         if (mh != 0)
4090                           {
4091                             rtx t1, t2, t3, t4;
4092
4093                             if (post_shift - 1 >= BITS_PER_WORD)
4094                               goto fail1;
4095
4096                             extra_cost
4097                               = (shift_cost[speed][compute_mode][post_shift - 1]
4098                                  + shift_cost[speed][compute_mode][1]
4099                                  + 2 * add_cost[speed][compute_mode]);
4100                             t1 = expand_mult_highpart (compute_mode, op0, ml,
4101                                                        NULL_RTX, 1,
4102                                                        max_cost - extra_cost);
4103                             if (t1 == 0)
4104                               goto fail1;
4105                             t2 = force_operand (gen_rtx_MINUS (compute_mode,
4106                                                                op0, t1),
4107                                                 NULL_RTX);
4108                             t3 = expand_shift (RSHIFT_EXPR, compute_mode,
4109                                                t2, 1, NULL_RTX, 1);
4110                             t4 = force_operand (gen_rtx_PLUS (compute_mode,
4111                                                               t1, t3),
4112                                                 NULL_RTX);
4113                             quotient = expand_shift
4114                               (RSHIFT_EXPR, compute_mode, t4,
4115                                post_shift - 1, tquotient, 1);
4116                           }
4117                         else
4118                           {
4119                             rtx t1, t2;
4120
4121                             if (pre_shift >= BITS_PER_WORD
4122                                 || post_shift >= BITS_PER_WORD)
4123                               goto fail1;
4124
4125                             t1 = expand_shift
4126                               (RSHIFT_EXPR, compute_mode, op0,
4127                                pre_shift, NULL_RTX, 1);
4128                             extra_cost
4129                               = (shift_cost[speed][compute_mode][pre_shift]
4130                                  + shift_cost[speed][compute_mode][post_shift]);
4131                             t2 = expand_mult_highpart (compute_mode, t1, ml,
4132                                                        NULL_RTX, 1,
4133                                                        max_cost - extra_cost);
4134                             if (t2 == 0)
4135                               goto fail1;
4136                             quotient = expand_shift
4137                               (RSHIFT_EXPR, compute_mode, t2,
4138                                post_shift, tquotient, 1);
4139                           }
4140                       }
4141                   }
4142                 else            /* Too wide mode to use tricky code */
4143                   break;
4144
4145                 insn = get_last_insn ();
4146                 if (insn != last)
4147                   set_dst_reg_note (insn, REG_EQUAL,
4148                                     gen_rtx_UDIV (compute_mode, op0, op1),
4149                                     quotient);
4150               }
4151             else                /* TRUNC_DIV, signed */
4152               {
4153                 unsigned HOST_WIDE_INT ml;
4154                 int lgup, post_shift;
4155                 rtx mlr;
4156                 HOST_WIDE_INT d = INTVAL (op1);
4157                 unsigned HOST_WIDE_INT abs_d;
4158
4159                 /* Since d might be INT_MIN, we have to cast to
4160                    unsigned HOST_WIDE_INT before negating to avoid
4161                    undefined signed overflow.  */
4162                 abs_d = (d >= 0
4163                          ? (unsigned HOST_WIDE_INT) d
4164                          : - (unsigned HOST_WIDE_INT) d);
4165
4166                 /* n rem d = n rem -d */
4167                 if (rem_flag && d < 0)
4168                   {
4169                     d = abs_d;
4170                     op1 = gen_int_mode (abs_d, compute_mode);
4171                   }
4172
4173                 if (d == 1)
4174                   quotient = op0;
4175                 else if (d == -1)
4176                   quotient = expand_unop (compute_mode, neg_optab, op0,
4177                                           tquotient, 0);
4178                 else if (HOST_BITS_PER_WIDE_INT >= size
4179                          && abs_d == (unsigned HOST_WIDE_INT) 1 << (size - 1))
4180                   {
4181                     /* This case is not handled correctly below.  */
4182                     quotient = emit_store_flag (tquotient, EQ, op0, op1,
4183                                                 compute_mode, 1, 1);
4184                     if (quotient == 0)
4185                       goto fail1;
4186                   }
4187                 else if (EXACT_POWER_OF_2_OR_ZERO_P (d)
4188                          && (rem_flag ? smod_pow2_cheap[speed][compute_mode]
4189                                       : sdiv_pow2_cheap[speed][compute_mode])
4190                          /* We assume that cheap metric is true if the
4191                             optab has an expander for this mode.  */
4192                          && ((optab_handler ((rem_flag ? smod_optab
4193                                               : sdiv_optab),
4194                                              compute_mode)
4195                               != CODE_FOR_nothing)
4196                              || (optab_handler (sdivmod_optab,
4197                                                 compute_mode)
4198                                  != CODE_FOR_nothing)))
4199                   ;
4200                 else if (EXACT_POWER_OF_2_OR_ZERO_P (abs_d))
4201                   {
4202                     if (rem_flag)
4203                       {
4204                         remainder = expand_smod_pow2 (compute_mode, op0, d);
4205                         if (remainder)
4206                           return gen_lowpart (mode, remainder);
4207                       }
4208
4209                     if (sdiv_pow2_cheap[speed][compute_mode]
4210                         && ((optab_handler (sdiv_optab, compute_mode)
4211                              != CODE_FOR_nothing)
4212                             || (optab_handler (sdivmod_optab, compute_mode)
4213                                 != CODE_FOR_nothing)))
4214                       quotient = expand_divmod (0, TRUNC_DIV_EXPR,
4215                                                 compute_mode, op0,
4216                                                 gen_int_mode (abs_d,
4217                                                               compute_mode),
4218                                                 NULL_RTX, 0);
4219                     else
4220                       quotient = expand_sdiv_pow2 (compute_mode, op0, abs_d);
4221
4222                     /* We have computed OP0 / abs(OP1).  If OP1 is negative,
4223                        negate the quotient.  */
4224                     if (d < 0)
4225                       {
4226                         insn = get_last_insn ();
4227                         if (insn != last
4228                             && abs_d < ((unsigned HOST_WIDE_INT) 1
4229                                         << (HOST_BITS_PER_WIDE_INT - 1)))
4230                           set_dst_reg_note (insn, REG_EQUAL,
4231                                             gen_rtx_DIV (compute_mode, op0,
4232                                                          gen_int_mode
4233                                                            (abs_d,
4234                                                             compute_mode)),
4235                                             quotient);
4236
4237                         quotient = expand_unop (compute_mode, neg_optab,
4238                                                 quotient, quotient, 0);
4239                       }
4240                   }
4241                 else if (size <= HOST_BITS_PER_WIDE_INT)
4242                   {
4243                     choose_multiplier (abs_d, size, size - 1,
4244                                        &mlr, &post_shift, &lgup);
4245                     ml = (unsigned HOST_WIDE_INT) INTVAL (mlr);
4246                     if (ml < (unsigned HOST_WIDE_INT) 1 << (size - 1))
4247                       {
4248                         rtx t1, t2, t3;
4249
4250                         if (post_shift >= BITS_PER_WORD
4251                             || size - 1 >= BITS_PER_WORD)
4252                           goto fail1;
4253
4254                         extra_cost = (shift_cost[speed][compute_mode][post_shift]
4255                                       + shift_cost[speed][compute_mode][size - 1]
4256                                       + add_cost[speed][compute_mode]);
4257                         t1 = expand_mult_highpart (compute_mode, op0, mlr,
4258                                                    NULL_RTX, 0,
4259                                                    max_cost - extra_cost);
4260                         if (t1 == 0)
4261                           goto fail1;
4262                         t2 = expand_shift
4263                           (RSHIFT_EXPR, compute_mode, t1,
4264                            post_shift, NULL_RTX, 0);
4265                         t3 = expand_shift
4266                           (RSHIFT_EXPR, compute_mode, op0,
4267                            size - 1, NULL_RTX, 0);
4268                         if (d < 0)
4269                           quotient
4270                             = force_operand (gen_rtx_MINUS (compute_mode,
4271                                                             t3, t2),
4272                                              tquotient);
4273                         else
4274                           quotient
4275                             = force_operand (gen_rtx_MINUS (compute_mode,
4276                                                             t2, t3),
4277                                              tquotient);
4278                       }
4279                     else
4280                       {
4281                         rtx t1, t2, t3, t4;
4282
4283                         if (post_shift >= BITS_PER_WORD
4284                             || size - 1 >= BITS_PER_WORD)
4285                           goto fail1;
4286
4287                         ml |= (~(unsigned HOST_WIDE_INT) 0) << (size - 1);
4288                         mlr = gen_int_mode (ml, compute_mode);
4289                         extra_cost = (shift_cost[speed][compute_mode][post_shift]
4290                                       + shift_cost[speed][compute_mode][size - 1]
4291                                       + 2 * add_cost[speed][compute_mode]);
4292                         t1 = expand_mult_highpart (compute_mode, op0, mlr,
4293                                                    NULL_RTX, 0,
4294                                                    max_cost - extra_cost);
4295                         if (t1 == 0)
4296                           goto fail1;
4297                         t2 = force_operand (gen_rtx_PLUS (compute_mode,
4298                                                           t1, op0),
4299                                             NULL_RTX);
4300                         t3 = expand_shift
4301                           (RSHIFT_EXPR, compute_mode, t2,
4302                            post_shift, NULL_RTX, 0);
4303                         t4 = expand_shift
4304                           (RSHIFT_EXPR, compute_mode, op0,
4305                            size - 1, NULL_RTX, 0);
4306                         if (d < 0)
4307                           quotient
4308                             = force_operand (gen_rtx_MINUS (compute_mode,
4309                                                             t4, t3),
4310                                              tquotient);
4311                         else
4312                           quotient
4313                             = force_operand (gen_rtx_MINUS (compute_mode,
4314                                                             t3, t4),
4315                                              tquotient);
4316                       }
4317                   }
4318                 else            /* Too wide mode to use tricky code */
4319                   break;
4320
4321                 insn = get_last_insn ();
4322                 if (insn != last)
4323                   set_dst_reg_note (insn, REG_EQUAL,
4324                                     gen_rtx_DIV (compute_mode, op0, op1),
4325                                     quotient);
4326               }
4327             break;
4328           }
4329       fail1:
4330         delete_insns_since (last);
4331         break;
4332
4333       case FLOOR_DIV_EXPR:
4334       case FLOOR_MOD_EXPR:
4335       /* We will come here only for signed operations.  */
4336         if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4337           {
4338             unsigned HOST_WIDE_INT mh;
4339             int pre_shift, lgup, post_shift;
4340             HOST_WIDE_INT d = INTVAL (op1);
4341             rtx ml;
4342
4343             if (d > 0)
4344               {
4345                 /* We could just as easily deal with negative constants here,
4346                    but it does not seem worth the trouble for GCC 2.6.  */
4347                 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4348                   {
4349                     pre_shift = floor_log2 (d);
4350                     if (rem_flag)
4351                       {
4352                         remainder = expand_binop (compute_mode, and_optab, op0,
4353                                                   GEN_INT (((HOST_WIDE_INT) 1 << pre_shift) - 1),
4354                                                   remainder, 0, OPTAB_LIB_WIDEN);
4355                         if (remainder)
4356                           return gen_lowpart (mode, remainder);
4357                       }
4358                     quotient = expand_shift
4359                       (RSHIFT_EXPR, compute_mode, op0,
4360                        pre_shift, tquotient, 0);
4361                   }
4362                 else
4363                   {
4364                     rtx t1, t2, t3, t4;
4365
4366                     mh = choose_multiplier (d, size, size - 1,
4367                                             &ml, &post_shift, &lgup);
4368                     gcc_assert (!mh);
4369
4370                     if (post_shift < BITS_PER_WORD
4371                         && size - 1 < BITS_PER_WORD)
4372                       {
4373                         t1 = expand_shift
4374                           (RSHIFT_EXPR, compute_mode, op0,
4375                            size - 1, NULL_RTX, 0);
4376                         t2 = expand_binop (compute_mode, xor_optab, op0, t1,
4377                                            NULL_RTX, 0, OPTAB_WIDEN);
4378                         extra_cost = (shift_cost[speed][compute_mode][post_shift]
4379                                       + shift_cost[speed][compute_mode][size - 1]
4380                                       + 2 * add_cost[speed][compute_mode]);
4381                         t3 = expand_mult_highpart (compute_mode, t2, ml,
4382                                                    NULL_RTX, 1,
4383                                                    max_cost - extra_cost);
4384                         if (t3 != 0)
4385                           {
4386                             t4 = expand_shift
4387                               (RSHIFT_EXPR, compute_mode, t3,
4388                                post_shift, NULL_RTX, 1);
4389                             quotient = expand_binop (compute_mode, xor_optab,
4390                                                      t4, t1, tquotient, 0,
4391                                                      OPTAB_WIDEN);
4392                           }
4393                       }
4394                   }
4395               }
4396             else
4397               {
4398                 rtx nsign, t1, t2, t3, t4;
4399                 t1 = force_operand (gen_rtx_PLUS (compute_mode,
4400                                                   op0, constm1_rtx), NULL_RTX);
4401                 t2 = expand_binop (compute_mode, ior_optab, op0, t1, NULL_RTX,
4402                                    0, OPTAB_WIDEN);
4403                 nsign = expand_shift
4404                   (RSHIFT_EXPR, compute_mode, t2,
4405                    size - 1, NULL_RTX, 0);
4406                 t3 = force_operand (gen_rtx_MINUS (compute_mode, t1, nsign),
4407                                     NULL_RTX);
4408                 t4 = expand_divmod (0, TRUNC_DIV_EXPR, compute_mode, t3, op1,
4409                                     NULL_RTX, 0);
4410                 if (t4)
4411                   {
4412                     rtx t5;
4413                     t5 = expand_unop (compute_mode, one_cmpl_optab, nsign,
4414                                       NULL_RTX, 0);
4415                     quotient = force_operand (gen_rtx_PLUS (compute_mode,
4416                                                             t4, t5),
4417                                               tquotient);
4418                   }
4419               }
4420           }
4421
4422         if (quotient != 0)
4423           break;
4424         delete_insns_since (last);
4425
4426         /* Try using an instruction that produces both the quotient and
4427            remainder, using truncation.  We can easily compensate the quotient
4428            or remainder to get floor rounding, once we have the remainder.
4429            Notice that we compute also the final remainder value here,
4430            and return the result right away.  */
4431         if (target == 0 || GET_MODE (target) != compute_mode)
4432           target = gen_reg_rtx (compute_mode);
4433
4434         if (rem_flag)
4435           {
4436             remainder
4437               = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4438             quotient = gen_reg_rtx (compute_mode);
4439           }
4440         else
4441           {
4442             quotient
4443               = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4444             remainder = gen_reg_rtx (compute_mode);
4445           }
4446
4447         if (expand_twoval_binop (sdivmod_optab, op0, op1,
4448                                  quotient, remainder, 0))
4449           {
4450             /* This could be computed with a branch-less sequence.
4451                Save that for later.  */
4452             rtx tem;
4453             rtx label = gen_label_rtx ();
4454             do_cmp_and_jump (remainder, const0_rtx, EQ, compute_mode, label);
4455             tem = expand_binop (compute_mode, xor_optab, op0, op1,
4456                                 NULL_RTX, 0, OPTAB_WIDEN);
4457             do_cmp_and_jump (tem, const0_rtx, GE, compute_mode, label);
4458             expand_dec (quotient, const1_rtx);
4459             expand_inc (remainder, op1);
4460             emit_label (label);
4461             return gen_lowpart (mode, rem_flag ? remainder : quotient);
4462           }
4463
4464         /* No luck with division elimination or divmod.  Have to do it
4465            by conditionally adjusting op0 *and* the result.  */
4466         {
4467           rtx label1, label2, label3, label4, label5;
4468           rtx adjusted_op0;
4469           rtx tem;
4470
4471           quotient = gen_reg_rtx (compute_mode);
4472           adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4473           label1 = gen_label_rtx ();
4474           label2 = gen_label_rtx ();
4475           label3 = gen_label_rtx ();
4476           label4 = gen_label_rtx ();
4477           label5 = gen_label_rtx ();
4478           do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4479           do_cmp_and_jump (adjusted_op0, const0_rtx, LT, compute_mode, label1);
4480           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4481                               quotient, 0, OPTAB_LIB_WIDEN);
4482           if (tem != quotient)
4483             emit_move_insn (quotient, tem);
4484           emit_jump_insn (gen_jump (label5));
4485           emit_barrier ();
4486           emit_label (label1);
4487           expand_inc (adjusted_op0, const1_rtx);
4488           emit_jump_insn (gen_jump (label4));
4489           emit_barrier ();
4490           emit_label (label2);
4491           do_cmp_and_jump (adjusted_op0, const0_rtx, GT, compute_mode, label3);
4492           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4493                               quotient, 0, OPTAB_LIB_WIDEN);
4494           if (tem != quotient)
4495             emit_move_insn (quotient, tem);
4496           emit_jump_insn (gen_jump (label5));
4497           emit_barrier ();
4498           emit_label (label3);
4499           expand_dec (adjusted_op0, const1_rtx);
4500           emit_label (label4);
4501           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4502                               quotient, 0, OPTAB_LIB_WIDEN);
4503           if (tem != quotient)
4504             emit_move_insn (quotient, tem);
4505           expand_dec (quotient, const1_rtx);
4506           emit_label (label5);
4507         }
4508         break;
4509
4510       case CEIL_DIV_EXPR:
4511       case CEIL_MOD_EXPR:
4512         if (unsignedp)
4513           {
4514             if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1)))
4515               {
4516                 rtx t1, t2, t3;
4517                 unsigned HOST_WIDE_INT d = INTVAL (op1);
4518                 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4519                                    floor_log2 (d), tquotient, 1);
4520                 t2 = expand_binop (compute_mode, and_optab, op0,
4521                                    GEN_INT (d - 1),
4522                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4523                 t3 = gen_reg_rtx (compute_mode);
4524                 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4525                                       compute_mode, 1, 1);
4526                 if (t3 == 0)
4527                   {
4528                     rtx lab;
4529                     lab = gen_label_rtx ();
4530                     do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4531                     expand_inc (t1, const1_rtx);
4532                     emit_label (lab);
4533                     quotient = t1;
4534                   }
4535                 else
4536                   quotient = force_operand (gen_rtx_PLUS (compute_mode,
4537                                                           t1, t3),
4538                                             tquotient);
4539                 break;
4540               }
4541
4542             /* Try using an instruction that produces both the quotient and
4543                remainder, using truncation.  We can easily compensate the
4544                quotient or remainder to get ceiling rounding, once we have the
4545                remainder.  Notice that we compute also the final remainder
4546                value here, and return the result right away.  */
4547             if (target == 0 || GET_MODE (target) != compute_mode)
4548               target = gen_reg_rtx (compute_mode);
4549
4550             if (rem_flag)
4551               {
4552                 remainder = (REG_P (target)
4553                              ? target : gen_reg_rtx (compute_mode));
4554                 quotient = gen_reg_rtx (compute_mode);
4555               }
4556             else
4557               {
4558                 quotient = (REG_P (target)
4559                             ? target : gen_reg_rtx (compute_mode));
4560                 remainder = gen_reg_rtx (compute_mode);
4561               }
4562
4563             if (expand_twoval_binop (udivmod_optab, op0, op1, quotient,
4564                                      remainder, 1))
4565               {
4566                 /* This could be computed with a branch-less sequence.
4567                    Save that for later.  */
4568                 rtx label = gen_label_rtx ();
4569                 do_cmp_and_jump (remainder, const0_rtx, EQ,
4570                                  compute_mode, label);
4571                 expand_inc (quotient, const1_rtx);
4572                 expand_dec (remainder, op1);
4573                 emit_label (label);
4574                 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4575               }
4576
4577             /* No luck with division elimination or divmod.  Have to do it
4578                by conditionally adjusting op0 *and* the result.  */
4579             {
4580               rtx label1, label2;
4581               rtx adjusted_op0, tem;
4582
4583               quotient = gen_reg_rtx (compute_mode);
4584               adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4585               label1 = gen_label_rtx ();
4586               label2 = gen_label_rtx ();
4587               do_cmp_and_jump (adjusted_op0, const0_rtx, NE,
4588                                compute_mode, label1);
4589               emit_move_insn  (quotient, const0_rtx);
4590               emit_jump_insn (gen_jump (label2));
4591               emit_barrier ();
4592               emit_label (label1);
4593               expand_dec (adjusted_op0, const1_rtx);
4594               tem = expand_binop (compute_mode, udiv_optab, adjusted_op0, op1,
4595                                   quotient, 1, OPTAB_LIB_WIDEN);
4596               if (tem != quotient)
4597                 emit_move_insn (quotient, tem);
4598               expand_inc (quotient, const1_rtx);
4599               emit_label (label2);
4600             }
4601           }
4602         else /* signed */
4603           {
4604             if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4605                 && INTVAL (op1) >= 0)
4606               {
4607                 /* This is extremely similar to the code for the unsigned case
4608                    above.  For 2.7 we should merge these variants, but for
4609                    2.6.1 I don't want to touch the code for unsigned since that
4610                    get used in C.  The signed case will only be used by other
4611                    languages (Ada).  */
4612
4613                 rtx t1, t2, t3;
4614                 unsigned HOST_WIDE_INT d = INTVAL (op1);
4615                 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4616                                    floor_log2 (d), tquotient, 0);
4617                 t2 = expand_binop (compute_mode, and_optab, op0,
4618                                    GEN_INT (d - 1),
4619                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4620                 t3 = gen_reg_rtx (compute_mode);
4621                 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4622                                       compute_mode, 1, 1);
4623                 if (t3 == 0)
4624                   {
4625                     rtx lab;
4626                     lab = gen_label_rtx ();
4627                     do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4628                     expand_inc (t1, const1_rtx);
4629                     emit_label (lab);
4630                     quotient = t1;
4631                   }
4632                 else
4633                   quotient = force_operand (gen_rtx_PLUS (compute_mode,
4634                                                           t1, t3),
4635                                             tquotient);
4636                 break;
4637               }
4638
4639             /* Try using an instruction that produces both the quotient and
4640                remainder, using truncation.  We can easily compensate the
4641                quotient or remainder to get ceiling rounding, once we have the
4642                remainder.  Notice that we compute also the final remainder
4643                value here, and return the result right away.  */
4644             if (target == 0 || GET_MODE (target) != compute_mode)
4645               target = gen_reg_rtx (compute_mode);
4646             if (rem_flag)
4647               {
4648                 remainder= (REG_P (target)
4649                             ? target : gen_reg_rtx (compute_mode));
4650                 quotient = gen_reg_rtx (compute_mode);
4651               }
4652             else
4653               {
4654                 quotient = (REG_P (target)
4655                             ? target : gen_reg_rtx (compute_mode));
4656                 remainder = gen_reg_rtx (compute_mode);
4657               }
4658
4659             if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient,
4660                                      remainder, 0))
4661               {
4662                 /* This could be computed with a branch-less sequence.
4663                    Save that for later.  */
4664                 rtx tem;
4665                 rtx label = gen_label_rtx ();
4666                 do_cmp_and_jump (remainder, const0_rtx, EQ,
4667                                  compute_mode, label);
4668                 tem = expand_binop (compute_mode, xor_optab, op0, op1,
4669                                     NULL_RTX, 0, OPTAB_WIDEN);
4670                 do_cmp_and_jump (tem, const0_rtx, LT, compute_mode, label);
4671                 expand_inc (quotient, const1_rtx);
4672                 expand_dec (remainder, op1);
4673                 emit_label (label);
4674                 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4675               }
4676
4677             /* No luck with division elimination or divmod.  Have to do it
4678                by conditionally adjusting op0 *and* the result.  */
4679             {
4680               rtx label1, label2, label3, label4, label5;
4681               rtx adjusted_op0;
4682               rtx tem;
4683
4684               quotient = gen_reg_rtx (compute_mode);
4685               adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4686               label1 = gen_label_rtx ();
4687               label2 = gen_label_rtx ();
4688               label3 = gen_label_rtx ();
4689               label4 = gen_label_rtx ();
4690               label5 = gen_label_rtx ();
4691               do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4692               do_cmp_and_jump (adjusted_op0, const0_rtx, GT,
4693                                compute_mode, label1);
4694               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4695                                   quotient, 0, OPTAB_LIB_WIDEN);
4696               if (tem != quotient)
4697                 emit_move_insn (quotient, tem);
4698               emit_jump_insn (gen_jump (label5));
4699               emit_barrier ();
4700               emit_label (label1);
4701               expand_dec (adjusted_op0, const1_rtx);
4702               emit_jump_insn (gen_jump (label4));
4703               emit_barrier ();
4704               emit_label (label2);
4705               do_cmp_and_jump (adjusted_op0, const0_rtx, LT,
4706                                compute_mode, label3);
4707               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4708                                   quotient, 0, OPTAB_LIB_WIDEN);
4709               if (tem != quotient)
4710                 emit_move_insn (quotient, tem);
4711               emit_jump_insn (gen_jump (label5));
4712               emit_barrier ();
4713               emit_label (label3);
4714               expand_inc (adjusted_op0, const1_rtx);
4715               emit_label (label4);
4716               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4717                                   quotient, 0, OPTAB_LIB_WIDEN);
4718               if (tem != quotient)
4719                 emit_move_insn (quotient, tem);
4720               expand_inc (quotient, const1_rtx);
4721               emit_label (label5);
4722             }
4723           }
4724         break;
4725
4726       case EXACT_DIV_EXPR:
4727         if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4728           {
4729             HOST_WIDE_INT d = INTVAL (op1);
4730             unsigned HOST_WIDE_INT ml;
4731             int pre_shift;
4732             rtx t1;
4733
4734             pre_shift = floor_log2 (d & -d);
4735             ml = invert_mod2n (d >> pre_shift, size);
4736             t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4737                                pre_shift, NULL_RTX, unsignedp);
4738             quotient = expand_mult (compute_mode, t1,
4739                                     gen_int_mode (ml, compute_mode),
4740                                     NULL_RTX, 1);
4741
4742             insn = get_last_insn ();
4743             set_dst_reg_note (insn, REG_EQUAL,
4744                               gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
4745                                               compute_mode, op0, op1),
4746                               quotient);
4747           }
4748         break;
4749
4750       case ROUND_DIV_EXPR:
4751       case ROUND_MOD_EXPR:
4752         if (unsignedp)
4753           {
4754             rtx tem;
4755             rtx label;
4756             label = gen_label_rtx ();
4757             quotient = gen_reg_rtx (compute_mode);
4758             remainder = gen_reg_rtx (compute_mode);
4759             if (expand_twoval_binop (udivmod_optab, op0, op1, quotient, remainder, 1) == 0)
4760               {
4761                 rtx tem;
4762                 quotient = expand_binop (compute_mode, udiv_optab, op0, op1,
4763                                          quotient, 1, OPTAB_LIB_WIDEN);
4764                 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 1);
4765                 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4766                                           remainder, 1, OPTAB_LIB_WIDEN);
4767               }
4768             tem = plus_constant (op1, -1);
4769             tem = expand_shift (RSHIFT_EXPR, compute_mode, tem, 1, NULL_RTX, 1);
4770             do_cmp_and_jump (remainder, tem, LEU, compute_mode, label);
4771             expand_inc (quotient, const1_rtx);
4772             expand_dec (remainder, op1);
4773             emit_label (label);
4774           }
4775         else
4776           {
4777             rtx abs_rem, abs_op1, tem, mask;
4778             rtx label;
4779             label = gen_label_rtx ();
4780             quotient = gen_reg_rtx (compute_mode);
4781             remainder = gen_reg_rtx (compute_mode);
4782             if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient, remainder, 0) == 0)
4783               {
4784                 rtx tem;
4785                 quotient = expand_binop (compute_mode, sdiv_optab, op0, op1,
4786                                          quotient, 0, OPTAB_LIB_WIDEN);
4787                 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 0);
4788                 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4789                                           remainder, 0, OPTAB_LIB_WIDEN);
4790               }
4791             abs_rem = expand_abs (compute_mode, remainder, NULL_RTX, 1, 0);
4792             abs_op1 = expand_abs (compute_mode, op1, NULL_RTX, 1, 0);
4793             tem = expand_shift (LSHIFT_EXPR, compute_mode, abs_rem,
4794                                 1, NULL_RTX, 1);
4795             do_cmp_and_jump (tem, abs_op1, LTU, compute_mode, label);
4796             tem = expand_binop (compute_mode, xor_optab, op0, op1,
4797                                 NULL_RTX, 0, OPTAB_WIDEN);
4798             mask = expand_shift (RSHIFT_EXPR, compute_mode, tem,
4799                                  size - 1, NULL_RTX, 0);
4800             tem = expand_binop (compute_mode, xor_optab, mask, const1_rtx,
4801                                 NULL_RTX, 0, OPTAB_WIDEN);
4802             tem = expand_binop (compute_mode, sub_optab, tem, mask,
4803                                 NULL_RTX, 0, OPTAB_WIDEN);
4804             expand_inc (quotient, tem);
4805             tem = expand_binop (compute_mode, xor_optab, mask, op1,
4806                                 NULL_RTX, 0, OPTAB_WIDEN);
4807             tem = expand_binop (compute_mode, sub_optab, tem, mask,
4808                                 NULL_RTX, 0, OPTAB_WIDEN);
4809             expand_dec (remainder, tem);
4810             emit_label (label);
4811           }
4812         return gen_lowpart (mode, rem_flag ? remainder : quotient);
4813
4814       default:
4815         gcc_unreachable ();
4816       }
4817
4818   if (quotient == 0)
4819     {
4820       if (target && GET_MODE (target) != compute_mode)
4821         target = 0;
4822
4823       if (rem_flag)
4824         {
4825           /* Try to produce the remainder without producing the quotient.
4826              If we seem to have a divmod pattern that does not require widening,
4827              don't try widening here.  We should really have a WIDEN argument
4828              to expand_twoval_binop, since what we'd really like to do here is
4829              1) try a mod insn in compute_mode
4830              2) try a divmod insn in compute_mode
4831              3) try a div insn in compute_mode and multiply-subtract to get
4832                 remainder
4833              4) try the same things with widening allowed.  */
4834           remainder
4835             = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4836                                  op0, op1, target,
4837                                  unsignedp,
4838                                  ((optab_handler (optab2, compute_mode)
4839                                    != CODE_FOR_nothing)
4840                                   ? OPTAB_DIRECT : OPTAB_WIDEN));
4841           if (remainder == 0)
4842             {
4843               /* No luck there.  Can we do remainder and divide at once
4844                  without a library call?  */
4845               remainder = gen_reg_rtx (compute_mode);
4846               if (! expand_twoval_binop ((unsignedp
4847                                           ? udivmod_optab
4848                                           : sdivmod_optab),
4849                                          op0, op1,
4850                                          NULL_RTX, remainder, unsignedp))
4851                 remainder = 0;
4852             }
4853
4854           if (remainder)
4855             return gen_lowpart (mode, remainder);
4856         }
4857
4858       /* Produce the quotient.  Try a quotient insn, but not a library call.
4859          If we have a divmod in this mode, use it in preference to widening
4860          the div (for this test we assume it will not fail). Note that optab2
4861          is set to the one of the two optabs that the call below will use.  */
4862       quotient
4863         = sign_expand_binop (compute_mode, udiv_optab, sdiv_optab,
4864                              op0, op1, rem_flag ? NULL_RTX : target,
4865                              unsignedp,
4866                              ((optab_handler (optab2, compute_mode)
4867                                != CODE_FOR_nothing)
4868                               ? OPTAB_DIRECT : OPTAB_WIDEN));
4869
4870       if (quotient == 0)
4871         {
4872           /* No luck there.  Try a quotient-and-remainder insn,
4873              keeping the quotient alone.  */
4874           quotient = gen_reg_rtx (compute_mode);
4875           if (! expand_twoval_binop (unsignedp ? udivmod_optab : sdivmod_optab,
4876                                      op0, op1,
4877                                      quotient, NULL_RTX, unsignedp))
4878             {
4879               quotient = 0;
4880               if (! rem_flag)
4881                 /* Still no luck.  If we are not computing the remainder,
4882                    use a library call for the quotient.  */
4883                 quotient = sign_expand_binop (compute_mode,
4884                                               udiv_optab, sdiv_optab,
4885                                               op0, op1, target,
4886                                               unsignedp, OPTAB_LIB_WIDEN);
4887             }
4888         }
4889     }
4890
4891   if (rem_flag)
4892     {
4893       if (target && GET_MODE (target) != compute_mode)
4894         target = 0;
4895
4896       if (quotient == 0)
4897         {
4898           /* No divide instruction either.  Use library for remainder.  */
4899           remainder = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4900                                          op0, op1, target,
4901                                          unsignedp, OPTAB_LIB_WIDEN);
4902           /* No remainder function.  Try a quotient-and-remainder
4903              function, keeping the remainder.  */
4904           if (!remainder)
4905             {
4906               remainder = gen_reg_rtx (compute_mode);
4907               if (!expand_twoval_binop_libfunc
4908                   (unsignedp ? udivmod_optab : sdivmod_optab,
4909                    op0, op1,
4910                    NULL_RTX, remainder,
4911                    unsignedp ? UMOD : MOD))
4912                 remainder = NULL_RTX;
4913             }
4914         }
4915       else
4916         {
4917           /* We divided.  Now finish doing X - Y * (X / Y).  */
4918           remainder = expand_mult (compute_mode, quotient, op1,
4919                                    NULL_RTX, unsignedp);
4920           remainder = expand_binop (compute_mode, sub_optab, op0,
4921                                     remainder, target, unsignedp,
4922                                     OPTAB_LIB_WIDEN);
4923         }
4924     }
4925
4926   return gen_lowpart (mode, rem_flag ? remainder : quotient);
4927 }
4928 \f
4929 /* Return a tree node with data type TYPE, describing the value of X.
4930    Usually this is an VAR_DECL, if there is no obvious better choice.
4931    X may be an expression, however we only support those expressions
4932    generated by loop.c.  */
4933
4934 tree
4935 make_tree (tree type, rtx x)
4936 {
4937   tree t;
4938
4939   switch (GET_CODE (x))
4940     {
4941     case CONST_INT:
4942       {
4943         HOST_WIDE_INT hi = 0;
4944
4945         if (INTVAL (x) < 0
4946             && !(TYPE_UNSIGNED (type)
4947                  && (GET_MODE_BITSIZE (TYPE_MODE (type))
4948                      < HOST_BITS_PER_WIDE_INT)))
4949           hi = -1;
4950
4951         t = build_int_cst_wide (type, INTVAL (x), hi);
4952
4953         return t;
4954       }
4955
4956     case CONST_DOUBLE:
4957       if (GET_MODE (x) == VOIDmode)
4958         t = build_int_cst_wide (type,
4959                                 CONST_DOUBLE_LOW (x), CONST_DOUBLE_HIGH (x));
4960       else
4961         {
4962           REAL_VALUE_TYPE d;
4963
4964           REAL_VALUE_FROM_CONST_DOUBLE (d, x);
4965           t = build_real (type, d);
4966         }
4967
4968       return t;
4969
4970     case CONST_VECTOR:
4971       {
4972         int units = CONST_VECTOR_NUNITS (x);
4973         tree itype = TREE_TYPE (type);
4974         tree *elts;
4975         int i;
4976
4977         /* Build a tree with vector elements.  */
4978         elts = XALLOCAVEC (tree, units);
4979         for (i = units - 1; i >= 0; --i)
4980           {
4981             rtx elt = CONST_VECTOR_ELT (x, i);
4982             elts[i] = make_tree (itype, elt);
4983           }
4984
4985         return build_vector (type, elts);
4986       }
4987
4988     case PLUS:
4989       return fold_build2 (PLUS_EXPR, type, make_tree (type, XEXP (x, 0)),
4990                           make_tree (type, XEXP (x, 1)));
4991
4992     case MINUS:
4993       return fold_build2 (MINUS_EXPR, type, make_tree (type, XEXP (x, 0)),
4994                           make_tree (type, XEXP (x, 1)));
4995
4996     case NEG:
4997       return fold_build1 (NEGATE_EXPR, type, make_tree (type, XEXP (x, 0)));
4998
4999     case MULT:
5000       return fold_build2 (MULT_EXPR, type, make_tree (type, XEXP (x, 0)),
5001                           make_tree (type, XEXP (x, 1)));
5002
5003     case ASHIFT:
5004       return fold_build2 (LSHIFT_EXPR, type, make_tree (type, XEXP (x, 0)),
5005                           make_tree (type, XEXP (x, 1)));
5006
5007     case LSHIFTRT:
5008       t = unsigned_type_for (type);
5009       return fold_convert (type, build2 (RSHIFT_EXPR, t,
5010                                          make_tree (t, XEXP (x, 0)),
5011                                          make_tree (type, XEXP (x, 1))));
5012
5013     case ASHIFTRT:
5014       t = signed_type_for (type);
5015       return fold_convert (type, build2 (RSHIFT_EXPR, t,
5016                                          make_tree (t, XEXP (x, 0)),
5017                                          make_tree (type, XEXP (x, 1))));
5018
5019     case DIV:
5020       if (TREE_CODE (type) != REAL_TYPE)
5021         t = signed_type_for (type);
5022       else
5023         t = type;
5024
5025       return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5026                                          make_tree (t, XEXP (x, 0)),
5027                                          make_tree (t, XEXP (x, 1))));
5028     case UDIV:
5029       t = unsigned_type_for (type);
5030       return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5031                                          make_tree (t, XEXP (x, 0)),
5032                                          make_tree (t, XEXP (x, 1))));
5033
5034     case SIGN_EXTEND:
5035     case ZERO_EXTEND:
5036       t = lang_hooks.types.type_for_mode (GET_MODE (XEXP (x, 0)),
5037                                           GET_CODE (x) == ZERO_EXTEND);
5038       return fold_convert (type, make_tree (t, XEXP (x, 0)));
5039
5040     case CONST:
5041       return make_tree (type, XEXP (x, 0));
5042
5043     case SYMBOL_REF:
5044       t = SYMBOL_REF_DECL (x);
5045       if (t)
5046         return fold_convert (type, build_fold_addr_expr (t));
5047       /* else fall through.  */
5048
5049     default:
5050       t = build_decl (RTL_LOCATION (x), VAR_DECL, NULL_TREE, type);
5051
5052       /* If TYPE is a POINTER_TYPE, we might need to convert X from
5053          address mode to pointer mode.  */
5054       if (POINTER_TYPE_P (type))
5055         x = convert_memory_address_addr_space
5056               (TYPE_MODE (type), x, TYPE_ADDR_SPACE (TREE_TYPE (type)));
5057
5058       /* Note that we do *not* use SET_DECL_RTL here, because we do not
5059          want set_decl_rtl to go adjusting REG_ATTRS for this temporary.  */
5060       t->decl_with_rtl.rtl = x;
5061
5062       return t;
5063     }
5064 }
5065 \f
5066 /* Compute the logical-and of OP0 and OP1, storing it in TARGET
5067    and returning TARGET.
5068
5069    If TARGET is 0, a pseudo-register or constant is returned.  */
5070
5071 rtx
5072 expand_and (enum machine_mode mode, rtx op0, rtx op1, rtx target)
5073 {
5074   rtx tem = 0;
5075
5076   if (GET_MODE (op0) == VOIDmode && GET_MODE (op1) == VOIDmode)
5077     tem = simplify_binary_operation (AND, mode, op0, op1);
5078   if (tem == 0)
5079     tem = expand_binop (mode, and_optab, op0, op1, target, 0, OPTAB_LIB_WIDEN);
5080
5081   if (target == 0)
5082     target = tem;
5083   else if (tem != target)
5084     emit_move_insn (target, tem);
5085   return target;
5086 }
5087
5088 /* Helper function for emit_store_flag.  */
5089 static rtx
5090 emit_cstore (rtx target, enum insn_code icode, enum rtx_code code,
5091              enum machine_mode mode, enum machine_mode compare_mode,
5092              int unsignedp, rtx x, rtx y, int normalizep,
5093              enum machine_mode target_mode)
5094 {
5095   struct expand_operand ops[4];
5096   rtx op0, last, comparison, subtarget;
5097   enum machine_mode result_mode = insn_data[(int) icode].operand[0].mode;
5098
5099   last = get_last_insn ();
5100   x = prepare_operand (icode, x, 2, mode, compare_mode, unsignedp);
5101   y = prepare_operand (icode, y, 3, mode, compare_mode, unsignedp);
5102   if (!x || !y)
5103     {
5104       delete_insns_since (last);
5105       return NULL_RTX;
5106     }
5107
5108   if (target_mode == VOIDmode)
5109     target_mode = result_mode;
5110   if (!target)
5111     target = gen_reg_rtx (target_mode);
5112
5113   comparison = gen_rtx_fmt_ee (code, result_mode, x, y);
5114
5115   create_output_operand (&ops[0], optimize ? NULL_RTX : target, result_mode);
5116   create_fixed_operand (&ops[1], comparison);
5117   create_fixed_operand (&ops[2], x);
5118   create_fixed_operand (&ops[3], y);
5119   if (!maybe_expand_insn (icode, 4, ops))
5120     {
5121       delete_insns_since (last);
5122       return NULL_RTX;
5123     }
5124   subtarget = ops[0].value;
5125
5126   /* If we are converting to a wider mode, first convert to
5127      TARGET_MODE, then normalize.  This produces better combining
5128      opportunities on machines that have a SIGN_EXTRACT when we are
5129      testing a single bit.  This mostly benefits the 68k.
5130
5131      If STORE_FLAG_VALUE does not have the sign bit set when
5132      interpreted in MODE, we can do this conversion as unsigned, which
5133      is usually more efficient.  */
5134   if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (result_mode))
5135     {
5136       convert_move (target, subtarget,
5137                     val_signbit_known_clear_p (result_mode,
5138                                                STORE_FLAG_VALUE));
5139       op0 = target;
5140       result_mode = target_mode;
5141     }
5142   else
5143     op0 = subtarget;
5144
5145   /* If we want to keep subexpressions around, don't reuse our last
5146      target.  */
5147   if (optimize)
5148     subtarget = 0;
5149
5150   /* Now normalize to the proper value in MODE.  Sometimes we don't
5151      have to do anything.  */
5152   if (normalizep == 0 || normalizep == STORE_FLAG_VALUE)
5153     ;
5154   /* STORE_FLAG_VALUE might be the most negative number, so write
5155      the comparison this way to avoid a compiler-time warning.  */
5156   else if (- normalizep == STORE_FLAG_VALUE)
5157     op0 = expand_unop (result_mode, neg_optab, op0, subtarget, 0);
5158
5159   /* We don't want to use STORE_FLAG_VALUE < 0 below since this makes
5160      it hard to use a value of just the sign bit due to ANSI integer
5161      constant typing rules.  */
5162   else if (val_signbit_known_set_p (result_mode, STORE_FLAG_VALUE))
5163     op0 = expand_shift (RSHIFT_EXPR, result_mode, op0,
5164                         GET_MODE_BITSIZE (result_mode) - 1, subtarget,
5165                         normalizep == 1);
5166   else
5167     {
5168       gcc_assert (STORE_FLAG_VALUE & 1);
5169
5170       op0 = expand_and (result_mode, op0, const1_rtx, subtarget);
5171       if (normalizep == -1)
5172         op0 = expand_unop (result_mode, neg_optab, op0, op0, 0);
5173     }
5174
5175   /* If we were converting to a smaller mode, do the conversion now.  */
5176   if (target_mode != result_mode)
5177     {
5178       convert_move (target, op0, 0);
5179       return target;
5180     }
5181   else
5182     return op0;
5183 }
5184
5185
5186 /* A subroutine of emit_store_flag only including "tricks" that do not
5187    need a recursive call.  These are kept separate to avoid infinite
5188    loops.  */
5189
5190 static rtx
5191 emit_store_flag_1 (rtx target, enum rtx_code code, rtx op0, rtx op1,
5192                    enum machine_mode mode, int unsignedp, int normalizep,
5193                    enum machine_mode target_mode)
5194 {
5195   rtx subtarget;
5196   enum insn_code icode;
5197   enum machine_mode compare_mode;
5198   enum mode_class mclass;
5199   enum rtx_code scode;
5200   rtx tem;
5201
5202   if (unsignedp)
5203     code = unsigned_condition (code);
5204   scode = swap_condition (code);
5205
5206   /* If one operand is constant, make it the second one.  Only do this
5207      if the other operand is not constant as well.  */
5208
5209   if (swap_commutative_operands_p (op0, op1))
5210     {
5211       tem = op0;
5212       op0 = op1;
5213       op1 = tem;
5214       code = swap_condition (code);
5215     }
5216
5217   if (mode == VOIDmode)
5218     mode = GET_MODE (op0);
5219
5220   /* For some comparisons with 1 and -1, we can convert this to
5221      comparisons with zero.  This will often produce more opportunities for
5222      store-flag insns.  */
5223
5224   switch (code)
5225     {
5226     case LT:
5227       if (op1 == const1_rtx)
5228         op1 = const0_rtx, code = LE;
5229       break;
5230     case LE:
5231       if (op1 == constm1_rtx)
5232         op1 = const0_rtx, code = LT;
5233       break;
5234     case GE:
5235       if (op1 == const1_rtx)
5236         op1 = const0_rtx, code = GT;
5237       break;
5238     case GT:
5239       if (op1 == constm1_rtx)
5240         op1 = const0_rtx, code = GE;
5241       break;
5242     case GEU:
5243       if (op1 == const1_rtx)
5244         op1 = const0_rtx, code = NE;
5245       break;
5246     case LTU:
5247       if (op1 == const1_rtx)
5248         op1 = const0_rtx, code = EQ;
5249       break;
5250     default:
5251       break;
5252     }
5253
5254   /* If we are comparing a double-word integer with zero or -1, we can
5255      convert the comparison into one involving a single word.  */
5256   if (GET_MODE_BITSIZE (mode) == BITS_PER_WORD * 2
5257       && GET_MODE_CLASS (mode) == MODE_INT
5258       && (!MEM_P (op0) || ! MEM_VOLATILE_P (op0)))
5259     {
5260       if ((code == EQ || code == NE)
5261           && (op1 == const0_rtx || op1 == constm1_rtx))
5262         {
5263           rtx op00, op01;
5264
5265           /* Do a logical OR or AND of the two words and compare the
5266              result.  */
5267           op00 = simplify_gen_subreg (word_mode, op0, mode, 0);
5268           op01 = simplify_gen_subreg (word_mode, op0, mode, UNITS_PER_WORD);
5269           tem = expand_binop (word_mode,
5270                               op1 == const0_rtx ? ior_optab : and_optab,
5271                               op00, op01, NULL_RTX, unsignedp,
5272                               OPTAB_DIRECT);
5273
5274           if (tem != 0)
5275             tem = emit_store_flag (NULL_RTX, code, tem, op1, word_mode,
5276                                    unsignedp, normalizep);
5277         }
5278       else if ((code == LT || code == GE) && op1 == const0_rtx)
5279         {
5280           rtx op0h;
5281
5282           /* If testing the sign bit, can just test on high word.  */
5283           op0h = simplify_gen_subreg (word_mode, op0, mode,
5284                                       subreg_highpart_offset (word_mode,
5285                                                               mode));
5286           tem = emit_store_flag (NULL_RTX, code, op0h, op1, word_mode,
5287                                  unsignedp, normalizep);
5288         }
5289       else
5290         tem = NULL_RTX;
5291
5292       if (tem)
5293         {
5294           if (target_mode == VOIDmode || GET_MODE (tem) == target_mode)
5295             return tem;
5296           if (!target)
5297             target = gen_reg_rtx (target_mode);
5298
5299           convert_move (target, tem,
5300                         !val_signbit_known_set_p (word_mode,
5301                                                   (normalizep ? normalizep
5302                                                    : STORE_FLAG_VALUE)));
5303           return target;
5304         }
5305     }
5306
5307   /* If this is A < 0 or A >= 0, we can do this by taking the ones
5308      complement of A (for GE) and shifting the sign bit to the low bit.  */
5309   if (op1 == const0_rtx && (code == LT || code == GE)
5310       && GET_MODE_CLASS (mode) == MODE_INT
5311       && (normalizep || STORE_FLAG_VALUE == 1
5312           || val_signbit_p (mode, STORE_FLAG_VALUE)))
5313     {
5314       subtarget = target;
5315
5316       if (!target)
5317         target_mode = mode;
5318
5319       /* If the result is to be wider than OP0, it is best to convert it
5320          first.  If it is to be narrower, it is *incorrect* to convert it
5321          first.  */
5322       else if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (mode))
5323         {
5324           op0 = convert_modes (target_mode, mode, op0, 0);
5325           mode = target_mode;
5326         }
5327
5328       if (target_mode != mode)
5329         subtarget = 0;
5330
5331       if (code == GE)
5332         op0 = expand_unop (mode, one_cmpl_optab, op0,
5333                            ((STORE_FLAG_VALUE == 1 || normalizep)
5334                             ? 0 : subtarget), 0);
5335
5336       if (STORE_FLAG_VALUE == 1 || normalizep)
5337         /* If we are supposed to produce a 0/1 value, we want to do
5338            a logical shift from the sign bit to the low-order bit; for
5339            a -1/0 value, we do an arithmetic shift.  */
5340         op0 = expand_shift (RSHIFT_EXPR, mode, op0,
5341                             GET_MODE_BITSIZE (mode) - 1,
5342                             subtarget, normalizep != -1);
5343
5344       if (mode != target_mode)
5345         op0 = convert_modes (target_mode, mode, op0, 0);
5346
5347       return op0;
5348     }
5349
5350   mclass = GET_MODE_CLASS (mode);
5351   for (compare_mode = mode; compare_mode != VOIDmode;
5352        compare_mode = GET_MODE_WIDER_MODE (compare_mode))
5353     {
5354      enum machine_mode optab_mode = mclass == MODE_CC ? CCmode : compare_mode;
5355      icode = optab_handler (cstore_optab, optab_mode);
5356      if (icode != CODE_FOR_nothing)
5357         {
5358           do_pending_stack_adjust ();
5359           tem = emit_cstore (target, icode, code, mode, compare_mode,
5360                              unsignedp, op0, op1, normalizep, target_mode);
5361           if (tem)
5362             return tem;
5363
5364           if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5365             {
5366               tem = emit_cstore (target, icode, scode, mode, compare_mode,
5367                                  unsignedp, op1, op0, normalizep, target_mode);
5368               if (tem)
5369                 return tem;
5370             }
5371           break;
5372         }
5373     }
5374
5375   return 0;
5376 }
5377
5378 /* Emit a store-flags instruction for comparison CODE on OP0 and OP1
5379    and storing in TARGET.  Normally return TARGET.
5380    Return 0 if that cannot be done.
5381
5382    MODE is the mode to use for OP0 and OP1 should they be CONST_INTs.  If
5383    it is VOIDmode, they cannot both be CONST_INT.
5384
5385    UNSIGNEDP is for the case where we have to widen the operands
5386    to perform the operation.  It says to use zero-extension.
5387
5388    NORMALIZEP is 1 if we should convert the result to be either zero
5389    or one.  Normalize is -1 if we should convert the result to be
5390    either zero or -1.  If NORMALIZEP is zero, the result will be left
5391    "raw" out of the scc insn.  */
5392
5393 rtx
5394 emit_store_flag (rtx target, enum rtx_code code, rtx op0, rtx op1,
5395                  enum machine_mode mode, int unsignedp, int normalizep)
5396 {
5397   enum machine_mode target_mode = target ? GET_MODE (target) : VOIDmode;
5398   enum rtx_code rcode;
5399   rtx subtarget;
5400   rtx tem, last, trueval;
5401
5402   tem = emit_store_flag_1 (target, code, op0, op1, mode, unsignedp, normalizep,
5403                            target_mode);
5404   if (tem)
5405     return tem;
5406
5407   /* If we reached here, we can't do this with a scc insn, however there
5408      are some comparisons that can be done in other ways.  Don't do any
5409      of these cases if branches are very cheap.  */
5410   if (BRANCH_COST (optimize_insn_for_speed_p (), false) == 0)
5411     return 0;
5412
5413   /* See what we need to return.  We can only return a 1, -1, or the
5414      sign bit.  */
5415
5416   if (normalizep == 0)
5417     {
5418       if (STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1)
5419         normalizep = STORE_FLAG_VALUE;
5420
5421       else if (val_signbit_p (mode, STORE_FLAG_VALUE))
5422         ;
5423       else
5424         return 0;
5425     }
5426
5427   last = get_last_insn ();
5428
5429   /* If optimizing, use different pseudo registers for each insn, instead
5430      of reusing the same pseudo.  This leads to better CSE, but slows
5431      down the compiler, since there are more pseudos */
5432   subtarget = (!optimize
5433                && (target_mode == mode)) ? target : NULL_RTX;
5434   trueval = GEN_INT (normalizep ? normalizep : STORE_FLAG_VALUE);
5435
5436   /* For floating-point comparisons, try the reverse comparison or try
5437      changing the "orderedness" of the comparison.  */
5438   if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5439     {
5440       enum rtx_code first_code;
5441       bool and_them;
5442
5443       rcode = reverse_condition_maybe_unordered (code);
5444       if (can_compare_p (rcode, mode, ccp_store_flag)
5445           && (code == ORDERED || code == UNORDERED
5446               || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
5447               || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
5448         {
5449           int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5450                           || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5451
5452           /* For the reverse comparison, use either an addition or a XOR.  */
5453           if (want_add
5454               && rtx_cost (GEN_INT (normalizep), PLUS, 1,
5455                            optimize_insn_for_speed_p ()) == 0)
5456             {
5457               tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5458                                        STORE_FLAG_VALUE, target_mode);
5459               if (tem)
5460                 return expand_binop (target_mode, add_optab, tem,
5461                                      GEN_INT (normalizep),
5462                                      target, 0, OPTAB_WIDEN);
5463             }
5464           else if (!want_add
5465                    && rtx_cost (trueval, XOR, 1,
5466                                 optimize_insn_for_speed_p ()) == 0)
5467             {
5468               tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5469                                        normalizep, target_mode);
5470               if (tem)
5471                 return expand_binop (target_mode, xor_optab, tem, trueval,
5472                                      target, INTVAL (trueval) >= 0, OPTAB_WIDEN);
5473             }
5474         }
5475
5476       delete_insns_since (last);
5477
5478       /* Cannot split ORDERED and UNORDERED, only try the above trick.   */
5479       if (code == ORDERED || code == UNORDERED)
5480         return 0;
5481
5482       and_them = split_comparison (code, mode, &first_code, &code);
5483
5484       /* If there are no NaNs, the first comparison should always fall through.
5485          Effectively change the comparison to the other one.  */
5486       if (!HONOR_NANS (mode))
5487         {
5488           gcc_assert (first_code == (and_them ? ORDERED : UNORDERED));
5489           return emit_store_flag_1 (target, code, op0, op1, mode, 0, normalizep,
5490                                     target_mode);
5491         }
5492
5493 #ifdef HAVE_conditional_move
5494       /* Try using a setcc instruction for ORDERED/UNORDERED, followed by a
5495          conditional move.  */
5496       tem = emit_store_flag_1 (subtarget, first_code, op0, op1, mode, 0,
5497                                normalizep, target_mode);
5498       if (tem == 0)
5499         return 0;
5500
5501       if (and_them)
5502         tem = emit_conditional_move (target, code, op0, op1, mode,
5503                                      tem, const0_rtx, GET_MODE (tem), 0);
5504       else
5505         tem = emit_conditional_move (target, code, op0, op1, mode,
5506                                      trueval, tem, GET_MODE (tem), 0);
5507
5508       if (tem == 0)
5509         delete_insns_since (last);
5510       return tem;
5511 #else
5512       return 0;
5513 #endif
5514     }
5515
5516   /* The remaining tricks only apply to integer comparisons.  */
5517
5518   if (GET_MODE_CLASS (mode) != MODE_INT)
5519     return 0;
5520
5521   /* If this is an equality comparison of integers, we can try to exclusive-or
5522      (or subtract) the two operands and use a recursive call to try the
5523      comparison with zero.  Don't do any of these cases if branches are
5524      very cheap.  */
5525
5526   if ((code == EQ || code == NE) && op1 != const0_rtx)
5527     {
5528       tem = expand_binop (mode, xor_optab, op0, op1, subtarget, 1,
5529                           OPTAB_WIDEN);
5530
5531       if (tem == 0)
5532         tem = expand_binop (mode, sub_optab, op0, op1, subtarget, 1,
5533                             OPTAB_WIDEN);
5534       if (tem != 0)
5535         tem = emit_store_flag (target, code, tem, const0_rtx,
5536                                mode, unsignedp, normalizep);
5537       if (tem != 0)
5538         return tem;
5539
5540       delete_insns_since (last);
5541     }
5542
5543   /* For integer comparisons, try the reverse comparison.  However, for
5544      small X and if we'd have anyway to extend, implementing "X != 0"
5545      as "-(int)X >> 31" is still cheaper than inverting "(int)X == 0".  */
5546   rcode = reverse_condition (code);
5547   if (can_compare_p (rcode, mode, ccp_store_flag)
5548       && ! (optab_handler (cstore_optab, mode) == CODE_FOR_nothing
5549             && code == NE
5550             && GET_MODE_SIZE (mode) < UNITS_PER_WORD
5551             && op1 == const0_rtx))
5552     {
5553       int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5554                       || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5555
5556       /* Again, for the reverse comparison, use either an addition or a XOR.  */
5557       if (want_add
5558           && rtx_cost (GEN_INT (normalizep), PLUS, 1,
5559                        optimize_insn_for_speed_p ()) == 0)
5560         {
5561           tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5562                                    STORE_FLAG_VALUE, target_mode);
5563           if (tem != 0)
5564             tem = expand_binop (target_mode, add_optab, tem,
5565                                 GEN_INT (normalizep), target, 0, OPTAB_WIDEN);
5566         }
5567       else if (!want_add
5568                && rtx_cost (trueval, XOR, 1,
5569                             optimize_insn_for_speed_p ()) == 0)
5570         {
5571           tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5572                                    normalizep, target_mode);
5573           if (tem != 0)
5574             tem = expand_binop (target_mode, xor_optab, tem, trueval, target,
5575                                 INTVAL (trueval) >= 0, OPTAB_WIDEN);
5576         }
5577
5578       if (tem != 0)
5579         return tem;
5580       delete_insns_since (last);
5581     }
5582
5583   /* Some other cases we can do are EQ, NE, LE, and GT comparisons with
5584      the constant zero.  Reject all other comparisons at this point.  Only
5585      do LE and GT if branches are expensive since they are expensive on
5586      2-operand machines.  */
5587
5588   if (op1 != const0_rtx
5589       || (code != EQ && code != NE
5590           && (BRANCH_COST (optimize_insn_for_speed_p (),
5591                            false) <= 1 || (code != LE && code != GT))))
5592     return 0;
5593
5594   /* Try to put the result of the comparison in the sign bit.  Assume we can't
5595      do the necessary operation below.  */
5596
5597   tem = 0;
5598
5599   /* To see if A <= 0, compute (A | (A - 1)).  A <= 0 iff that result has
5600      the sign bit set.  */
5601
5602   if (code == LE)
5603     {
5604       /* This is destructive, so SUBTARGET can't be OP0.  */
5605       if (rtx_equal_p (subtarget, op0))
5606         subtarget = 0;
5607
5608       tem = expand_binop (mode, sub_optab, op0, const1_rtx, subtarget, 0,
5609                           OPTAB_WIDEN);
5610       if (tem)
5611         tem = expand_binop (mode, ior_optab, op0, tem, subtarget, 0,
5612                             OPTAB_WIDEN);
5613     }
5614
5615   /* To see if A > 0, compute (((signed) A) << BITS) - A, where BITS is the
5616      number of bits in the mode of OP0, minus one.  */
5617
5618   if (code == GT)
5619     {
5620       if (rtx_equal_p (subtarget, op0))
5621         subtarget = 0;
5622
5623       tem = expand_shift (RSHIFT_EXPR, mode, op0,
5624                           GET_MODE_BITSIZE (mode) - 1,
5625                           subtarget, 0);
5626       tem = expand_binop (mode, sub_optab, tem, op0, subtarget, 0,
5627                           OPTAB_WIDEN);
5628     }
5629
5630   if (code == EQ || code == NE)
5631     {
5632       /* For EQ or NE, one way to do the comparison is to apply an operation
5633          that converts the operand into a positive number if it is nonzero
5634          or zero if it was originally zero.  Then, for EQ, we subtract 1 and
5635          for NE we negate.  This puts the result in the sign bit.  Then we
5636          normalize with a shift, if needed.
5637
5638          Two operations that can do the above actions are ABS and FFS, so try
5639          them.  If that doesn't work, and MODE is smaller than a full word,
5640          we can use zero-extension to the wider mode (an unsigned conversion)
5641          as the operation.  */
5642
5643       /* Note that ABS doesn't yield a positive number for INT_MIN, but
5644          that is compensated by the subsequent overflow when subtracting
5645          one / negating.  */
5646
5647       if (optab_handler (abs_optab, mode) != CODE_FOR_nothing)
5648         tem = expand_unop (mode, abs_optab, op0, subtarget, 1);
5649       else if (optab_handler (ffs_optab, mode) != CODE_FOR_nothing)
5650         tem = expand_unop (mode, ffs_optab, op0, subtarget, 1);
5651       else if (GET_MODE_SIZE (mode) < UNITS_PER_WORD)
5652         {
5653           tem = convert_modes (word_mode, mode, op0, 1);
5654           mode = word_mode;
5655         }
5656
5657       if (tem != 0)
5658         {
5659           if (code == EQ)
5660             tem = expand_binop (mode, sub_optab, tem, const1_rtx, subtarget,
5661                                 0, OPTAB_WIDEN);
5662           else
5663             tem = expand_unop (mode, neg_optab, tem, subtarget, 0);
5664         }
5665
5666       /* If we couldn't do it that way, for NE we can "or" the two's complement
5667          of the value with itself.  For EQ, we take the one's complement of
5668          that "or", which is an extra insn, so we only handle EQ if branches
5669          are expensive.  */
5670
5671       if (tem == 0
5672           && (code == NE
5673               || BRANCH_COST (optimize_insn_for_speed_p (),
5674                               false) > 1))
5675         {
5676           if (rtx_equal_p (subtarget, op0))
5677             subtarget = 0;
5678
5679           tem = expand_unop (mode, neg_optab, op0, subtarget, 0);
5680           tem = expand_binop (mode, ior_optab, tem, op0, subtarget, 0,
5681                               OPTAB_WIDEN);
5682
5683           if (tem && code == EQ)
5684             tem = expand_unop (mode, one_cmpl_optab, tem, subtarget, 0);
5685         }
5686     }
5687
5688   if (tem && normalizep)
5689     tem = expand_shift (RSHIFT_EXPR, mode, tem,
5690                         GET_MODE_BITSIZE (mode) - 1,
5691                         subtarget, normalizep == 1);
5692
5693   if (tem)
5694     {
5695       if (!target)
5696         ;
5697       else if (GET_MODE (tem) != target_mode)
5698         {
5699           convert_move (target, tem, 0);
5700           tem = target;
5701         }
5702       else if (!subtarget)
5703         {
5704           emit_move_insn (target, tem);
5705           tem = target;
5706         }
5707     }
5708   else
5709     delete_insns_since (last);
5710
5711   return tem;
5712 }
5713
5714 /* Like emit_store_flag, but always succeeds.  */
5715
5716 rtx
5717 emit_store_flag_force (rtx target, enum rtx_code code, rtx op0, rtx op1,
5718                        enum machine_mode mode, int unsignedp, int normalizep)
5719 {
5720   rtx tem, label;
5721   rtx trueval, falseval;
5722
5723   /* First see if emit_store_flag can do the job.  */
5724   tem = emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep);
5725   if (tem != 0)
5726     return tem;
5727
5728   if (!target)
5729     target = gen_reg_rtx (word_mode);
5730
5731   /* If this failed, we have to do this with set/compare/jump/set code.
5732      For foo != 0, if foo is in OP0, just replace it with 1 if nonzero.  */
5733   trueval = normalizep ? GEN_INT (normalizep) : const1_rtx;
5734   if (code == NE
5735       && GET_MODE_CLASS (mode) == MODE_INT
5736       && REG_P (target)
5737       && op0 == target
5738       && op1 == const0_rtx)
5739     {
5740       label = gen_label_rtx ();
5741       do_compare_rtx_and_jump (target, const0_rtx, EQ, unsignedp,
5742                                mode, NULL_RTX, NULL_RTX, label, -1);
5743       emit_move_insn (target, trueval);
5744       emit_label (label);
5745       return target;
5746     }
5747
5748   if (!REG_P (target)
5749       || reg_mentioned_p (target, op0) || reg_mentioned_p (target, op1))
5750     target = gen_reg_rtx (GET_MODE (target));
5751
5752   /* Jump in the right direction if the target cannot implement CODE
5753      but can jump on its reverse condition.  */
5754   falseval = const0_rtx;
5755   if (! can_compare_p (code, mode, ccp_jump)
5756       && (! FLOAT_MODE_P (mode)
5757           || code == ORDERED || code == UNORDERED
5758           || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
5759           || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
5760     {
5761       enum rtx_code rcode;
5762       if (FLOAT_MODE_P (mode))
5763         rcode = reverse_condition_maybe_unordered (code);
5764       else
5765         rcode = reverse_condition (code);
5766
5767       /* Canonicalize to UNORDERED for the libcall.  */
5768       if (can_compare_p (rcode, mode, ccp_jump)
5769           || (code == ORDERED && ! can_compare_p (ORDERED, mode, ccp_jump)))
5770         {
5771           falseval = trueval;
5772           trueval = const0_rtx;
5773           code = rcode;
5774         }
5775     }
5776
5777   emit_move_insn (target, trueval);
5778   label = gen_label_rtx ();
5779   do_compare_rtx_and_jump (op0, op1, code, unsignedp, mode, NULL_RTX,
5780                            NULL_RTX, label, -1);
5781
5782   emit_move_insn (target, falseval);
5783   emit_label (label);
5784
5785   return target;
5786 }
5787 \f
5788 /* Perform possibly multi-word comparison and conditional jump to LABEL
5789    if ARG1 OP ARG2 true where ARG1 and ARG2 are of mode MODE.  This is
5790    now a thin wrapper around do_compare_rtx_and_jump.  */
5791
5792 static void
5793 do_cmp_and_jump (rtx arg1, rtx arg2, enum rtx_code op, enum machine_mode mode,
5794                  rtx label)
5795 {
5796   int unsignedp = (op == LTU || op == LEU || op == GTU || op == GEU);
5797   do_compare_rtx_and_jump (arg1, arg2, op, unsignedp, mode,
5798                            NULL_RTX, NULL_RTX, label, -1);
5799 }