gcc/expmed.c

   1 /* Medium-level subroutines: convert bit-field store and extract
   2    and shifts, multiplies and divides to rtl instructions.
   3    Copyright (C) 1987, 1988, 1989, 1992, 1993, 1994, 1995, 1996, 1997, 1998,
   4    1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010,
   5    2011
   6    Free Software Foundation, Inc.
   7
   8 This file is part of GCC.
   9
  10 GCC is free software; you can redistribute it and/or modify it under
  11 the terms of the GNU General Public License as published by the Free
  12 Software Foundation; either version 3, or (at your option) any later
  13 version.
  14
  15 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  16 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  17 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  18 for more details.
  19
  20 You should have received a copy of the GNU General Public License
  21 along with GCC; see the file COPYING3.  If not see
  22 <http://www.gnu.org/licenses/>.  */
  23
  24
  25 #include "config.h"
  26 #include "system.h"
  27 #include "coretypes.h"
  28 #include "tm.h"
  29 #include "diagnostic-core.h"
  30 #include "rtl.h"
  31 #include "tree.h"
  32 #include "tm_p.h"
  33 #include "flags.h"
  34 #include "insn-config.h"
  35 #include "expr.h"
  36 #include "optabs.h"
  37 #include "recog.h"
  38 #include "langhooks.h"
  39 #include "df.h"
  40 #include "target.h"
  41 #include "expmed.h"
  42
  43 struct target_expmed default_target_expmed;
  44 #if SWITCHABLE_TARGET
  45 struct target_expmed *this_target_expmed = &default_target_expmed;
  46 #endif
  47
  48 static void store_fixed_bit_field (rtx, unsigned HOST_WIDE_INT,
  49                                    unsigned HOST_WIDE_INT,
  50                                    unsigned HOST_WIDE_INT,
  51                                    unsigned HOST_WIDE_INT,
  52                                    unsigned HOST_WIDE_INT,
  53                                    rtx);
  54 static void store_split_bit_field (rtx, unsigned HOST_WIDE_INT,
  55                                    unsigned HOST_WIDE_INT,
  56                                    unsigned HOST_WIDE_INT,
  57                                    unsigned HOST_WIDE_INT,
  58                                    rtx);
  59 static rtx extract_fixed_bit_field (enum machine_mode, rtx,
  60                                     unsigned HOST_WIDE_INT,
  61                                     unsigned HOST_WIDE_INT,
  62                                     unsigned HOST_WIDE_INT, rtx, int, bool);
  63 static rtx mask_rtx (enum machine_mode, int, int, int);
  64 static rtx lshift_value (enum machine_mode, rtx, int, int);
  65 static rtx extract_split_bit_field (rtx, unsigned HOST_WIDE_INT,
  66                                     unsigned HOST_WIDE_INT, int);
  67 static void do_cmp_and_jump (rtx, rtx, enum rtx_code, enum machine_mode, rtx);
  68 static rtx expand_smod_pow2 (enum machine_mode, rtx, HOST_WIDE_INT);
  69 static rtx expand_sdiv_pow2 (enum machine_mode, rtx, HOST_WIDE_INT);
  70
  71 /* Test whether a value is zero of a power of two.  */
  72 #define EXACT_POWER_OF_2_OR_ZERO_P(x) (((x) & ((x) - 1)) == 0)
  73
  74 #ifndef SLOW_UNALIGNED_ACCESS
  75 #define SLOW_UNALIGNED_ACCESS(MODE, ALIGN) STRICT_ALIGNMENT
  76 #endif
  77
  78
  79 /* Reduce conditional compilation elsewhere.  */
  80 #ifndef HAVE_insv
  81 #define HAVE_insv       0
  82 #define CODE_FOR_insv   CODE_FOR_nothing
  83 #define gen_insv(a,b,c,d) NULL_RTX
  84 #endif
  85 #ifndef HAVE_extv
  86 #define HAVE_extv       0
  87 #define CODE_FOR_extv   CODE_FOR_nothing
  88 #define gen_extv(a,b,c,d) NULL_RTX
  89 #endif
  90 #ifndef HAVE_extzv
  91 #define HAVE_extzv      0
  92 #define CODE_FOR_extzv  CODE_FOR_nothing
  93 #define gen_extzv(a,b,c,d) NULL_RTX
  94 #endif
  95
  96 void
  97 init_expmed (void)
  98 {
  99   struct
 100   {
 101     struct rtx_def reg;         rtunion reg_fld[2];
 102     struct rtx_def plus;        rtunion plus_fld1;
 103     struct rtx_def neg;
 104     struct rtx_def mult;        rtunion mult_fld1;
 105     struct rtx_def sdiv;        rtunion sdiv_fld1;
 106     struct rtx_def udiv;        rtunion udiv_fld1;
 107     struct rtx_def zext;
 108     struct rtx_def sdiv_32;     rtunion sdiv_32_fld1;
 109     struct rtx_def smod_32;     rtunion smod_32_fld1;
 110     struct rtx_def wide_mult;   rtunion wide_mult_fld1;
 111     struct rtx_def wide_lshr;   rtunion wide_lshr_fld1;
 112     struct rtx_def wide_trunc;
 113     struct rtx_def shift;       rtunion shift_fld1;
 114     struct rtx_def shift_mult;  rtunion shift_mult_fld1;
 115     struct rtx_def shift_add;   rtunion shift_add_fld1;
 116     struct rtx_def shift_sub0;  rtunion shift_sub0_fld1;
 117     struct rtx_def shift_sub1;  rtunion shift_sub1_fld1;
 118   } all;
 119
 120   rtx pow2[MAX_BITS_PER_WORD];
 121   rtx cint[MAX_BITS_PER_WORD];
 122   int m, n;
 123   enum machine_mode mode, wider_mode;
 124   int speed;
 125
 126
 127   for (m = 1; m < MAX_BITS_PER_WORD; m++)
 128     {
 129       pow2[m] = GEN_INT ((HOST_WIDE_INT) 1 << m);
 130       cint[m] = GEN_INT (m);
 131     }
 132   memset (&all, 0, sizeof all);
 133
 134   PUT_CODE (&all.reg, REG);
 135   /* Avoid using hard regs in ways which may be unsupported.  */
 136   SET_REGNO (&all.reg, LAST_VIRTUAL_REGISTER + 1);
 137
 138   PUT_CODE (&all.plus, PLUS);
 139   XEXP (&all.plus, 0) = &all.reg;
 140   XEXP (&all.plus, 1) = &all.reg;
 141
 142   PUT_CODE (&all.neg, NEG);
 143   XEXP (&all.neg, 0) = &all.reg;
 144
 145   PUT_CODE (&all.mult, MULT);
 146   XEXP (&all.mult, 0) = &all.reg;
 147   XEXP (&all.mult, 1) = &all.reg;
 148
 149   PUT_CODE (&all.sdiv, DIV);
 150   XEXP (&all.sdiv, 0) = &all.reg;
 151   XEXP (&all.sdiv, 1) = &all.reg;
 152
 153   PUT_CODE (&all.udiv, UDIV);
 154   XEXP (&all.udiv, 0) = &all.reg;
 155   XEXP (&all.udiv, 1) = &all.reg;
 156
 157   PUT_CODE (&all.sdiv_32, DIV);
 158   XEXP (&all.sdiv_32, 0) = &all.reg;
 159   XEXP (&all.sdiv_32, 1) = 32 < MAX_BITS_PER_WORD ? cint[32] : GEN_INT (32);
 160
 161   PUT_CODE (&all.smod_32, MOD);
 162   XEXP (&all.smod_32, 0) = &all.reg;
 163   XEXP (&all.smod_32, 1) = XEXP (&all.sdiv_32, 1);
 164
 165   PUT_CODE (&all.zext, ZERO_EXTEND);
 166   XEXP (&all.zext, 0) = &all.reg;
 167
 168   PUT_CODE (&all.wide_mult, MULT);
 169   XEXP (&all.wide_mult, 0) = &all.zext;
 170   XEXP (&all.wide_mult, 1) = &all.zext;
 171
 172   PUT_CODE (&all.wide_lshr, LSHIFTRT);
 173   XEXP (&all.wide_lshr, 0) = &all.wide_mult;
 174
 175   PUT_CODE (&all.wide_trunc, TRUNCATE);
 176   XEXP (&all.wide_trunc, 0) = &all.wide_lshr;
 177
 178   PUT_CODE (&all.shift, ASHIFT);
 179   XEXP (&all.shift, 0) = &all.reg;
 180
 181   PUT_CODE (&all.shift_mult, MULT);
 182   XEXP (&all.shift_mult, 0) = &all.reg;
 183
 184   PUT_CODE (&all.shift_add, PLUS);
 185   XEXP (&all.shift_add, 0) = &all.shift_mult;
 186   XEXP (&all.shift_add, 1) = &all.reg;
 187
 188   PUT_CODE (&all.shift_sub0, MINUS);
 189   XEXP (&all.shift_sub0, 0) = &all.shift_mult;
 190   XEXP (&all.shift_sub0, 1) = &all.reg;
 191
 192   PUT_CODE (&all.shift_sub1, MINUS);
 193   XEXP (&all.shift_sub1, 0) = &all.reg;
 194   XEXP (&all.shift_sub1, 1) = &all.shift_mult;
 195
 196   for (speed = 0; speed < 2; speed++)
 197     {
 198       crtl->maybe_hot_insn_p = speed;
 199       zero_cost[speed] = set_src_cost (const0_rtx, speed);
 200
 201       for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT);
 202            mode != VOIDmode;
 203            mode = GET_MODE_WIDER_MODE (mode))
 204         {
 205           PUT_MODE (&all.reg, mode);
 206           PUT_MODE (&all.plus, mode);
 207           PUT_MODE (&all.neg, mode);
 208           PUT_MODE (&all.mult, mode);
 209           PUT_MODE (&all.sdiv, mode);
 210           PUT_MODE (&all.udiv, mode);
 211           PUT_MODE (&all.sdiv_32, mode);
 212           PUT_MODE (&all.smod_32, mode);
 213           PUT_MODE (&all.wide_trunc, mode);
 214           PUT_MODE (&all.shift, mode);
 215           PUT_MODE (&all.shift_mult, mode);
 216           PUT_MODE (&all.shift_add, mode);
 217           PUT_MODE (&all.shift_sub0, mode);
 218           PUT_MODE (&all.shift_sub1, mode);
 219
 220           add_cost[speed][mode] = set_src_cost (&all.plus, speed);
 221           neg_cost[speed][mode] = set_src_cost (&all.neg, speed);
 222           mul_cost[speed][mode] = set_src_cost (&all.mult, speed);
 223           sdiv_cost[speed][mode] = set_src_cost (&all.sdiv, speed);
 224           udiv_cost[speed][mode] = set_src_cost (&all.udiv, speed);
 225
 226           sdiv_pow2_cheap[speed][mode] = (set_src_cost (&all.sdiv_32, speed)
 227                                           <= 2 * add_cost[speed][mode]);
 228           smod_pow2_cheap[speed][mode] = (set_src_cost (&all.smod_32, speed)
 229                                           <= 4 * add_cost[speed][mode]);
 230
 231           wider_mode = GET_MODE_WIDER_MODE (mode);
 232           if (wider_mode != VOIDmode)
 233             {
 234               PUT_MODE (&all.zext, wider_mode);
 235               PUT_MODE (&all.wide_mult, wider_mode);
 236               PUT_MODE (&all.wide_lshr, wider_mode);
 237               XEXP (&all.wide_lshr, 1) = GEN_INT (GET_MODE_BITSIZE (mode));
 238
 239               mul_widen_cost[speed][wider_mode]
 240                 = set_src_cost (&all.wide_mult, speed);
 241               mul_highpart_cost[speed][mode]
 242                 = set_src_cost (&all.wide_trunc, speed);
 243             }
 244
 245           shift_cost[speed][mode][0] = 0;
 246           shiftadd_cost[speed][mode][0] = shiftsub0_cost[speed][mode][0]
 247             = shiftsub1_cost[speed][mode][0] = add_cost[speed][mode];
 248
 249           n = MIN (MAX_BITS_PER_WORD, GET_MODE_BITSIZE (mode));
 250           for (m = 1; m < n; m++)
 251             {
 252               XEXP (&all.shift, 1) = cint[m];
 253               XEXP (&all.shift_mult, 1) = pow2[m];
 254
 255               shift_cost[speed][mode][m] = set_src_cost (&all.shift, speed);
 256               shiftadd_cost[speed][mode][m] = set_src_cost (&all.shift_add,
 257                                                             speed);
 258               shiftsub0_cost[speed][mode][m] = set_src_cost (&all.shift_sub0,
 259                                                              speed);
 260               shiftsub1_cost[speed][mode][m] = set_src_cost (&all.shift_sub1,
 261                                                              speed);
 262             }
 263         }
 264     }
 265   if (alg_hash_used_p)
 266     memset (alg_hash, 0, sizeof (alg_hash));
 267   else
 268     alg_hash_used_p = true;
 269   default_rtl_profile ();
 270 }
 271
 272 /* Return an rtx representing minus the value of X.
 273    MODE is the intended mode of the result,
 274    useful if X is a CONST_INT.  */
 275
 276 rtx
 277 negate_rtx (enum machine_mode mode, rtx x)
 278 {
 279   rtx result = simplify_unary_operation (NEG, mode, x, mode);
 280
 281   if (result == 0)
 282     result = expand_unop (mode, neg_optab, x, NULL_RTX, 0);
 283
 284   return result;
 285 }
 286
 287 /* Report on the availability of insv/extv/extzv and the desired mode
 288    of each of their operands.  Returns MAX_MACHINE_MODE if HAVE_foo
 289    is false; else the mode of the specified operand.  If OPNO is -1,
 290    all the caller cares about is whether the insn is available.  */
 291 enum machine_mode
 292 mode_for_extraction (enum extraction_pattern pattern, int opno)
 293 {
 294   const struct insn_data_d *data;
 295
 296   switch (pattern)
 297     {
 298     case EP_insv:
 299       if (HAVE_insv)
 300         {
 301           data = &insn_data[CODE_FOR_insv];
 302           break;
 303         }
 304       return MAX_MACHINE_MODE;
 305
 306     case EP_extv:
 307       if (HAVE_extv)
 308         {
 309           data = &insn_data[CODE_FOR_extv];
 310           break;
 311         }
 312       return MAX_MACHINE_MODE;
 313
 314     case EP_extzv:
 315       if (HAVE_extzv)
 316         {
 317           data = &insn_data[CODE_FOR_extzv];
 318           break;
 319         }
 320       return MAX_MACHINE_MODE;
 321
 322     default:
 323       gcc_unreachable ();
 324     }
 325
 326   if (opno == -1)
 327     return VOIDmode;
 328
 329   /* Everyone who uses this function used to follow it with
 330      if (result == VOIDmode) result = word_mode; */
 331   if (data->operand[opno].mode == VOIDmode)
 332     return word_mode;
 333   return data->operand[opno].mode;
 334 }
 335 \f
 336 /* A subroutine of store_bit_field, with the same arguments.  Return true
 337    if the operation could be implemented.
 338
 339    If FALLBACK_P is true, fall back to store_fixed_bit_field if we have
 340    no other way of implementing the operation.  If FALLBACK_P is false,
 341    return false instead.  */
 342
 343 static bool
 344 store_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
 345                    unsigned HOST_WIDE_INT bitnum,
 346                    unsigned HOST_WIDE_INT bitregion_start,
 347                    unsigned HOST_WIDE_INT bitregion_end,
 348                    enum machine_mode fieldmode,
 349                    rtx value, bool fallback_p)
 350 {
 351   unsigned int unit
 352     = (MEM_P (str_rtx)) ? BITS_PER_UNIT : BITS_PER_WORD;
 353   unsigned HOST_WIDE_INT offset, bitpos;
 354   rtx op0 = str_rtx;
 355   int byte_offset;
 356   rtx orig_value;
 357
 358   enum machine_mode op_mode = mode_for_extraction (EP_insv, 3);
 359
 360   while (GET_CODE (op0) == SUBREG)
 361     {
 362       /* The following line once was done only if WORDS_BIG_ENDIAN,
 363          but I think that is a mistake.  WORDS_BIG_ENDIAN is
 364          meaningful at a much higher level; when structures are copied
 365          between memory and regs, the higher-numbered regs
 366          always get higher addresses.  */
 367       int inner_mode_size = GET_MODE_SIZE (GET_MODE (SUBREG_REG (op0)));
 368       int outer_mode_size = GET_MODE_SIZE (GET_MODE (op0));
 369
 370       byte_offset = 0;
 371
 372       /* Paradoxical subregs need special handling on big endian machines.  */
 373       if (SUBREG_BYTE (op0) == 0 && inner_mode_size < outer_mode_size)
 374         {
 375           int difference = inner_mode_size - outer_mode_size;
 376
 377           if (WORDS_BIG_ENDIAN)
 378             byte_offset += (difference / UNITS_PER_WORD) * UNITS_PER_WORD;
 379           if (BYTES_BIG_ENDIAN)
 380             byte_offset += difference % UNITS_PER_WORD;
 381         }
 382       else
 383         byte_offset = SUBREG_BYTE (op0);
 384
 385       bitnum += byte_offset * BITS_PER_UNIT;
 386       op0 = SUBREG_REG (op0);
 387     }
 388
 389   /* No action is needed if the target is a register and if the field
 390      lies completely outside that register.  This can occur if the source
 391      code contains an out-of-bounds access to a small array.  */
 392   if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
 393     return true;
 394
 395   /* Use vec_set patterns for inserting parts of vectors whenever
 396      available.  */
 397   if (VECTOR_MODE_P (GET_MODE (op0))
 398       && !MEM_P (op0)
 399       && optab_handler (vec_set_optab, GET_MODE (op0)) != CODE_FOR_nothing
 400       && fieldmode == GET_MODE_INNER (GET_MODE (op0))
 401       && bitsize == GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
 402       && !(bitnum % GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
 403     {
 404       struct expand_operand ops[3];
 405       enum machine_mode outermode = GET_MODE (op0);
 406       enum machine_mode innermode = GET_MODE_INNER (outermode);
 407       enum insn_code icode = optab_handler (vec_set_optab, outermode);
 408       int pos = bitnum / GET_MODE_BITSIZE (innermode);
 409
 410       create_fixed_operand (&ops[0], op0);
 411       create_input_operand (&ops[1], value, innermode);
 412       create_integer_operand (&ops[2], pos);
 413       if (maybe_expand_insn (icode, 3, ops))
 414         return true;
 415     }
 416
 417   /* If the target is a register, overwriting the entire object, or storing
 418      a full-word or multi-word field can be done with just a SUBREG.
 419
 420      If the target is memory, storing any naturally aligned field can be
 421      done with a simple store.  For targets that support fast unaligned
 422      memory, any naturally sized, unit aligned field can be done directly.  */
 423
 424   offset = bitnum / unit;
 425   bitpos = bitnum % unit;
 426   byte_offset = (bitnum % BITS_PER_WORD) / BITS_PER_UNIT
 427                 + (offset * UNITS_PER_WORD);
 428
 429   if (bitpos == 0
 430       && bitsize == GET_MODE_BITSIZE (fieldmode)
 431       && (!MEM_P (op0)
 432           ? ((GET_MODE_SIZE (fieldmode) >= UNITS_PER_WORD
 433               || GET_MODE_SIZE (GET_MODE (op0)) == GET_MODE_SIZE (fieldmode))
 434              && ((GET_MODE (op0) == fieldmode && byte_offset == 0)
 435                  || validate_subreg (fieldmode, GET_MODE (op0), op0,
 436                                      byte_offset)))
 437           : (! SLOW_UNALIGNED_ACCESS (fieldmode, MEM_ALIGN (op0))
 438              || (offset * BITS_PER_UNIT % bitsize == 0
 439                  && MEM_ALIGN (op0) % GET_MODE_BITSIZE (fieldmode) == 0))))
 440     {
 441       if (MEM_P (op0))
 442         op0 = adjust_address (op0, fieldmode, offset);
 443       else if (GET_MODE (op0) != fieldmode)
 444         op0 = simplify_gen_subreg (fieldmode, op0, GET_MODE (op0),
 445                                    byte_offset);
 446       emit_move_insn (op0, value);
 447       return true;
 448     }
 449
 450   /* Make sure we are playing with integral modes.  Pun with subregs
 451      if we aren't.  This must come after the entire register case above,
 452      since that case is valid for any mode.  The following cases are only
 453      valid for integral modes.  */
 454   {
 455     enum machine_mode imode = int_mode_for_mode (GET_MODE (op0));
 456     if (imode != GET_MODE (op0))
 457       {
 458         if (MEM_P (op0))
 459           op0 = adjust_address (op0, imode, 0);
 460         else
 461           {
 462             gcc_assert (imode != BLKmode);
 463             op0 = gen_lowpart (imode, op0);
 464           }
 465       }
 466   }
 467
 468   /* We may be accessing data outside the field, which means
 469      we can alias adjacent data.  */
 470   /* ?? not always for C++0x memory model ?? */
 471   if (MEM_P (op0))
 472     {
 473       op0 = shallow_copy_rtx (op0);
 474       set_mem_alias_set (op0, 0);
 475       set_mem_expr (op0, 0);
 476     }
 477
 478   /* If OP0 is a register, BITPOS must count within a word.
 479      But as we have it, it counts within whatever size OP0 now has.
 480      On a bigendian machine, these are not the same, so convert.  */
 481   if (BYTES_BIG_ENDIAN
 482       && !MEM_P (op0)
 483       && unit > GET_MODE_BITSIZE (GET_MODE (op0)))
 484     bitpos += unit - GET_MODE_BITSIZE (GET_MODE (op0));
 485
 486   /* Storing an lsb-aligned field in a register
 487      can be done with a movestrict instruction.  */
 488
 489   if (!MEM_P (op0)
 490       && (BYTES_BIG_ENDIAN ? bitpos + bitsize == unit : bitpos == 0)
 491       && bitsize == GET_MODE_BITSIZE (fieldmode)
 492       && optab_handler (movstrict_optab, fieldmode) != CODE_FOR_nothing)
 493     {
 494       struct expand_operand ops[2];
 495       enum insn_code icode = optab_handler (movstrict_optab, fieldmode);
 496       rtx arg0 = op0;
 497       unsigned HOST_WIDE_INT subreg_off;
 498
 499       if (GET_CODE (arg0) == SUBREG)
 500         {
 501           /* Else we've got some float mode source being extracted into
 502              a different float mode destination -- this combination of
 503              subregs results in Severe Tire Damage.  */
 504           gcc_assert (GET_MODE (SUBREG_REG (arg0)) == fieldmode
 505                       || GET_MODE_CLASS (fieldmode) == MODE_INT
 506                       || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT);
 507           arg0 = SUBREG_REG (arg0);
 508         }
 509
 510       subreg_off = (bitnum % BITS_PER_WORD) / BITS_PER_UNIT
 511                    + (offset * UNITS_PER_WORD);
 512       if (validate_subreg (fieldmode, GET_MODE (arg0), arg0, subreg_off))
 513         {
 514           arg0 = gen_rtx_SUBREG (fieldmode, arg0, subreg_off);
 515
 516           create_fixed_operand (&ops[0], arg0);
 517           /* Shrink the source operand to FIELDMODE.  */
 518           create_convert_operand_to (&ops[1], value, fieldmode, false);
 519           if (maybe_expand_insn (icode, 2, ops))
 520             return true;
 521         }
 522     }
 523
 524   /* Handle fields bigger than a word.  */
 525
 526   if (bitsize > BITS_PER_WORD)
 527     {
 528       /* Here we transfer the words of the field
 529          in the order least significant first.
 530          This is because the most significant word is the one which may
 531          be less than full.
 532          However, only do that if the value is not BLKmode.  */
 533
 534       unsigned int backwards = WORDS_BIG_ENDIAN && fieldmode != BLKmode;
 535       unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
 536       unsigned int i;
 537       rtx last;
 538
 539       /* This is the mode we must force value to, so that there will be enough
 540          subwords to extract.  Note that fieldmode will often (always?) be
 541          VOIDmode, because that is what store_field uses to indicate that this
 542          is a bit field, but passing VOIDmode to operand_subword_force
 543          is not allowed.  */
 544       fieldmode = GET_MODE (value);
 545       if (fieldmode == VOIDmode)
 546         fieldmode = smallest_mode_for_size (nwords * BITS_PER_WORD, MODE_INT);
 547
 548       last = get_last_insn ();
 549       for (i = 0; i < nwords; i++)
 550         {
 551           /* If I is 0, use the low-order word in both field and target;
 552              if I is 1, use the next to lowest word; and so on.  */
 553           unsigned int wordnum = (backwards ? nwords - i - 1 : i);
 554           unsigned int bit_offset = (backwards
 555                                      ? MAX ((int) bitsize - ((int) i + 1)
 556                                             * BITS_PER_WORD,
 557                                             0)
 558                                      : (int) i * BITS_PER_WORD);
 559           rtx value_word = operand_subword_force (value, wordnum, fieldmode);
 560           unsigned HOST_WIDE_INT new_bitsize =
 561             MIN (BITS_PER_WORD, bitsize - i * BITS_PER_WORD);
 562
 563           /* If the remaining chunk doesn't have full wordsize we have
 564              to make sure that for big endian machines the higher order
 565              bits are used.  */
 566           if (new_bitsize < BITS_PER_WORD && BYTES_BIG_ENDIAN && !backwards)
 567             value_word = simplify_expand_binop (word_mode, lshr_optab,
 568                                                 value_word,
 569                                                 GEN_INT (BITS_PER_WORD
 570                                                          - new_bitsize),
 571                                                 NULL_RTX, true,
 572                                                 OPTAB_LIB_WIDEN);
 573
 574           if (!store_bit_field_1 (op0, new_bitsize,
 575                                   bitnum + bit_offset,
 576                                   bitregion_start, bitregion_end,
 577                                   word_mode,
 578                                   value_word, fallback_p))
 579             {
 580               delete_insns_since (last);
 581               return false;
 582             }
 583         }
 584       return true;
 585     }
 586
 587   /* From here on we can assume that the field to be stored in is
 588      a full-word (whatever type that is), since it is shorter than a word.  */
 589
 590   /* OFFSET is the number of words or bytes (UNIT says which)
 591      from STR_RTX to the first word or byte containing part of the field.  */
 592
 593   if (!MEM_P (op0))
 594     {
 595       if (offset != 0
 596           || GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
 597         {
 598           if (!REG_P (op0))
 599             {
 600               /* Since this is a destination (lvalue), we can't copy
 601                  it to a pseudo.  We can remove a SUBREG that does not
 602                  change the size of the operand.  Such a SUBREG may
 603                  have been added above.  */
 604               gcc_assert (GET_CODE (op0) == SUBREG
 605                           && (GET_MODE_SIZE (GET_MODE (op0))
 606                               == GET_MODE_SIZE (GET_MODE (SUBREG_REG (op0)))));
 607               op0 = SUBREG_REG (op0);
 608             }
 609           op0 = gen_rtx_SUBREG (mode_for_size (BITS_PER_WORD, MODE_INT, 0),
 610                                 op0, (offset * UNITS_PER_WORD));
 611         }
 612       offset = 0;
 613     }
 614
 615   /* If VALUE has a floating-point or complex mode, access it as an
 616      integer of the corresponding size.  This can occur on a machine
 617      with 64 bit registers that uses SFmode for float.  It can also
 618      occur for unaligned float or complex fields.  */
 619   orig_value = value;
 620   if (GET_MODE (value) != VOIDmode
 621       && GET_MODE_CLASS (GET_MODE (value)) != MODE_INT
 622       && GET_MODE_CLASS (GET_MODE (value)) != MODE_PARTIAL_INT)
 623     {
 624       value = gen_reg_rtx (int_mode_for_mode (GET_MODE (value)));
 625       emit_move_insn (gen_lowpart (GET_MODE (orig_value), value), orig_value);
 626     }
 627
 628   /* Now OFFSET is nonzero only if OP0 is memory
 629      and is therefore always measured in bytes.  */
 630
 631   if (HAVE_insv
 632       && GET_MODE (value) != BLKmode
 633       && bitsize > 0
 634       && GET_MODE_BITSIZE (op_mode) >= bitsize
 635       /* Do not use insv for volatile bitfields when
 636          -fstrict-volatile-bitfields is in effect.  */
 637       && !(MEM_P (op0) && MEM_VOLATILE_P (op0)
 638            && flag_strict_volatile_bitfields > 0)
 639       && ! ((REG_P (op0) || GET_CODE (op0) == SUBREG)
 640             && (bitsize + bitpos > GET_MODE_BITSIZE (op_mode))))
 641     {
 642       struct expand_operand ops[4];
 643       int xbitpos = bitpos;
 644       rtx value1;
 645       rtx xop0 = op0;
 646       rtx last = get_last_insn ();
 647       bool copy_back = false;
 648
 649       /* Add OFFSET into OP0's address.  */
 650       if (MEM_P (xop0))
 651         xop0 = adjust_address (xop0, byte_mode, offset);
 652
 653       /* If xop0 is a register, we need it in OP_MODE
 654          to make it acceptable to the format of insv.  */
 655       if (GET_CODE (xop0) == SUBREG)
 656         /* We can't just change the mode, because this might clobber op0,
 657            and we will need the original value of op0 if insv fails.  */
 658         xop0 = gen_rtx_SUBREG (op_mode, SUBREG_REG (xop0), SUBREG_BYTE (xop0));
 659       if (REG_P (xop0) && GET_MODE (xop0) != op_mode)
 660         xop0 = gen_lowpart_SUBREG (op_mode, xop0);
 661
 662       /* If the destination is a paradoxical subreg such that we need a
 663          truncate to the inner mode, perform the insertion on a temporary and
 664          truncate the result to the original destination.  Note that we can't
 665          just truncate the paradoxical subreg as (truncate:N (subreg:W (reg:N
 666          X) 0)) is (reg:N X).  */
 667       if (GET_CODE (xop0) == SUBREG
 668           && REG_P (SUBREG_REG (xop0))
 669           && (!TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (SUBREG_REG (xop0)),
 670                                               op_mode)))
 671         {
 672           rtx tem = gen_reg_rtx (op_mode);
 673           emit_move_insn (tem, xop0);
 674           xop0 = tem;
 675           copy_back = true;
 676         }
 677
 678       /* We have been counting XBITPOS within UNIT.
 679          Count instead within the size of the register.  */
 680       if (BYTES_BIG_ENDIAN && !MEM_P (xop0))
 681         xbitpos += GET_MODE_BITSIZE (op_mode) - unit;
 682
 683       unit = GET_MODE_BITSIZE (op_mode);
 684
 685       /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
 686          "backwards" from the size of the unit we are inserting into.
 687          Otherwise, we count bits from the most significant on a
 688          BYTES/BITS_BIG_ENDIAN machine.  */
 689
 690       if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
 691         xbitpos = unit - bitsize - xbitpos;
 692
 693       /* Convert VALUE to op_mode (which insv insn wants) in VALUE1.  */
 694       value1 = value;
 695       if (GET_MODE (value) != op_mode)
 696         {
 697           if (GET_MODE_BITSIZE (GET_MODE (value)) >= bitsize)
 698             {
 699               /* Optimization: Don't bother really extending VALUE
 700                  if it has all the bits we will actually use.  However,
 701                  if we must narrow it, be sure we do it correctly.  */
 702
 703               if (GET_MODE_SIZE (GET_MODE (value)) < GET_MODE_SIZE (op_mode))
 704                 {
 705                   rtx tmp;
 706
 707                   tmp = simplify_subreg (op_mode, value1, GET_MODE (value), 0);
 708                   if (! tmp)
 709                     tmp = simplify_gen_subreg (op_mode,
 710                                                force_reg (GET_MODE (value),
 711                                                           value1),
 712                                                GET_MODE (value), 0);
 713                   value1 = tmp;
 714                 }
 715               else
 716                 value1 = gen_lowpart (op_mode, value1);
 717             }
 718           else if (CONST_INT_P (value))
 719             value1 = gen_int_mode (INTVAL (value), op_mode);
 720           else
 721             /* Parse phase is supposed to make VALUE's data type
 722                match that of the component reference, which is a type
 723                at least as wide as the field; so VALUE should have
 724                a mode that corresponds to that type.  */
 725             gcc_assert (CONSTANT_P (value));
 726         }
 727
 728       create_fixed_operand (&ops[0], xop0);
 729       create_integer_operand (&ops[1], bitsize);
 730       create_integer_operand (&ops[2], xbitpos);
 731       create_input_operand (&ops[3], value1, op_mode);
 732       if (maybe_expand_insn (CODE_FOR_insv, 4, ops))
 733         {
 734           if (copy_back)
 735             convert_move (op0, xop0, true);
 736           return true;
 737         }
 738       delete_insns_since (last);
 739     }
 740
 741   /* If OP0 is a memory, try copying it to a register and seeing if a
 742      cheap register alternative is available.  */
 743   if (HAVE_insv && MEM_P (op0))
 744     {
 745       enum machine_mode bestmode;
 746       unsigned HOST_WIDE_INT maxbits = MAX_FIXED_MODE_SIZE;
 747
 748       if (bitregion_end)
 749         maxbits = bitregion_end - bitregion_start + 1;
 750
 751       /* Get the mode to use for inserting into this field.  If OP0 is
 752          BLKmode, get the smallest mode consistent with the alignment. If
 753          OP0 is a non-BLKmode object that is no wider than OP_MODE, use its
 754          mode. Otherwise, use the smallest mode containing the field.  */
 755
 756       if (GET_MODE (op0) == BLKmode
 757           || GET_MODE_BITSIZE (GET_MODE (op0)) > maxbits
 758           || (op_mode != MAX_MACHINE_MODE
 759               && GET_MODE_SIZE (GET_MODE (op0)) > GET_MODE_SIZE (op_mode)))
 760         bestmode = get_best_mode  (bitsize, bitnum,
 761                                   bitregion_start, bitregion_end,
 762                                   MEM_ALIGN (op0),
 763                                   (op_mode == MAX_MACHINE_MODE
 764                                    ? VOIDmode : op_mode),
 765                                   MEM_VOLATILE_P (op0));
 766       else
 767         bestmode = GET_MODE (op0);
 768
 769       if (bestmode != VOIDmode
 770           && GET_MODE_SIZE (bestmode) >= GET_MODE_SIZE (fieldmode)
 771           && !(SLOW_UNALIGNED_ACCESS (bestmode, MEM_ALIGN (op0))
 772                && GET_MODE_BITSIZE (bestmode) > MEM_ALIGN (op0)))
 773         {
 774           rtx last, tempreg, xop0;
 775           unsigned HOST_WIDE_INT xoffset, xbitpos;
 776
 777           last = get_last_insn ();
 778
 779           /* Adjust address to point to the containing unit of
 780              that mode.  Compute the offset as a multiple of this unit,
 781              counting in bytes.  */
 782           unit = GET_MODE_BITSIZE (bestmode);
 783           xoffset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
 784           xbitpos = bitnum % unit;
 785           xop0 = adjust_address (op0, bestmode, xoffset);
 786
 787           /* Fetch that unit, store the bitfield in it, then store
 788              the unit.  */
 789           tempreg = copy_to_reg (xop0);
 790           if (store_bit_field_1 (tempreg, bitsize, xbitpos,
 791                                  bitregion_start, bitregion_end,
 792                                  fieldmode, orig_value, false))
 793             {
 794               emit_move_insn (xop0, tempreg);
 795               return true;
 796             }
 797           delete_insns_since (last);
 798         }
 799     }
 800
 801   if (!fallback_p)
 802     return false;
 803
 804   store_fixed_bit_field (op0, offset, bitsize, bitpos,
 805                          bitregion_start, bitregion_end, value);
 806   return true;
 807 }
 808
 809 /* Generate code to store value from rtx VALUE
 810    into a bit-field within structure STR_RTX
 811    containing BITSIZE bits starting at bit BITNUM.
 812
 813    BITREGION_START is bitpos of the first bitfield in this region.
 814    BITREGION_END is the bitpos of the ending bitfield in this region.
 815    These two fields are 0, if the C++ memory model does not apply,
 816    or we are not interested in keeping track of bitfield regions.
 817
 818    FIELDMODE is the machine-mode of the FIELD_DECL node for this field.  */
 819
 820 void
 821 store_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
 822                  unsigned HOST_WIDE_INT bitnum,
 823                  unsigned HOST_WIDE_INT bitregion_start,
 824                  unsigned HOST_WIDE_INT bitregion_end,
 825                  enum machine_mode fieldmode,
 826                  rtx value)
 827 {
 828   /* Under the C++0x memory model, we must not touch bits outside the
 829      bit region.  Adjust the address to start at the beginning of the
 830      bit region.  */
 831   if (MEM_P (str_rtx)
 832       && bitregion_start > 0)
 833     {
 834       enum machine_mode bestmode;
 835       enum machine_mode op_mode;
 836       unsigned HOST_WIDE_INT offset;
 837
 838       op_mode = mode_for_extraction (EP_insv, 3);
 839       if (op_mode == MAX_MACHINE_MODE)
 840         op_mode = VOIDmode;
 841
 842       offset = bitregion_start / BITS_PER_UNIT;
 843       bitnum -= bitregion_start;
 844       bitregion_end -= bitregion_start;
 845       bitregion_start = 0;
 846       bestmode = get_best_mode (bitsize, bitnum,
 847                                 bitregion_start, bitregion_end,
 848                                 MEM_ALIGN (str_rtx),
 849                                 op_mode,
 850                                 MEM_VOLATILE_P (str_rtx));
 851       str_rtx = adjust_address (str_rtx, bestmode, offset);
 852     }
 853
 854   if (!store_bit_field_1 (str_rtx, bitsize, bitnum,
 855                           bitregion_start, bitregion_end,
 856                           fieldmode, value, true))
 857     gcc_unreachable ();
 858 }
 859 \f
 860 /* Use shifts and boolean operations to store VALUE
 861    into a bit field of width BITSIZE
 862    in a memory location specified by OP0 except offset by OFFSET bytes.
 863      (OFFSET must be 0 if OP0 is a register.)
 864    The field starts at position BITPOS within the byte.
 865     (If OP0 is a register, it may be a full word or a narrower mode,
 866      but BITPOS still counts within a full word,
 867      which is significant on bigendian machines.)  */
 868
 869 static void
 870 store_fixed_bit_field (rtx op0, unsigned HOST_WIDE_INT offset,
 871                        unsigned HOST_WIDE_INT bitsize,
 872                        unsigned HOST_WIDE_INT bitpos,
 873                        unsigned HOST_WIDE_INT bitregion_start,
 874                        unsigned HOST_WIDE_INT bitregion_end,
 875                        rtx value)
 876 {
 877   enum machine_mode mode;
 878   unsigned int total_bits = BITS_PER_WORD;
 879   rtx temp;
 880   int all_zero = 0;
 881   int all_one = 0;
 882
 883   /* There is a case not handled here:
 884      a structure with a known alignment of just a halfword
 885      and a field split across two aligned halfwords within the structure.
 886      Or likewise a structure with a known alignment of just a byte
 887      and a field split across two bytes.
 888      Such cases are not supposed to be able to occur.  */
 889
 890   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
 891     {
 892       gcc_assert (!offset);
 893       /* Special treatment for a bit field split across two registers.  */
 894       if (bitsize + bitpos > BITS_PER_WORD)
 895         {
 896           store_split_bit_field (op0, bitsize, bitpos,
 897                                  bitregion_start, bitregion_end,
 898                                  value);
 899           return;
 900         }
 901     }
 902   else
 903     {
 904       unsigned HOST_WIDE_INT maxbits = MAX_FIXED_MODE_SIZE;
 905
 906       if (bitregion_end)
 907         maxbits = bitregion_end - bitregion_start + 1;
 908
 909       /* Get the proper mode to use for this field.  We want a mode that
 910          includes the entire field.  If such a mode would be larger than
 911          a word, we won't be doing the extraction the normal way.
 912          We don't want a mode bigger than the destination.  */
 913
 914       mode = GET_MODE (op0);
 915       if (GET_MODE_BITSIZE (mode) == 0
 916           || GET_MODE_BITSIZE (mode) > GET_MODE_BITSIZE (word_mode))
 917         mode = word_mode;
 918
 919       if (MEM_VOLATILE_P (op0)
 920           && GET_MODE_BITSIZE (GET_MODE (op0)) > 0
 921           && GET_MODE_BITSIZE (GET_MODE (op0)) <= maxbits
 922           && flag_strict_volatile_bitfields > 0)
 923         mode = GET_MODE (op0);
 924       else
 925         mode = get_best_mode (bitsize, bitpos + offset * BITS_PER_UNIT,
 926                               bitregion_start, bitregion_end,
 927                               MEM_ALIGN (op0), mode, MEM_VOLATILE_P (op0));
 928
 929       if (mode == VOIDmode)
 930         {
 931           /* The only way this should occur is if the field spans word
 932              boundaries.  */
 933           store_split_bit_field (op0, bitsize, bitpos + offset * BITS_PER_UNIT,
 934                                  bitregion_start, bitregion_end, value);
 935           return;
 936         }
 937
 938       total_bits = GET_MODE_BITSIZE (mode);
 939
 940       /* Make sure bitpos is valid for the chosen mode.  Adjust BITPOS to
 941          be in the range 0 to total_bits-1, and put any excess bytes in
 942          OFFSET.  */
 943       if (bitpos >= total_bits)
 944         {
 945           offset += (bitpos / total_bits) * (total_bits / BITS_PER_UNIT);
 946           bitpos -= ((bitpos / total_bits) * (total_bits / BITS_PER_UNIT)
 947                      * BITS_PER_UNIT);
 948         }
 949
 950       /* Get ref to an aligned byte, halfword, or word containing the field.
 951          Adjust BITPOS to be position within a word,
 952          and OFFSET to be the offset of that word.
 953          Then alter OP0 to refer to that word.  */
 954       bitpos += (offset % (total_bits / BITS_PER_UNIT)) * BITS_PER_UNIT;
 955       offset -= (offset % (total_bits / BITS_PER_UNIT));
 956       op0 = adjust_address (op0, mode, offset);
 957     }
 958
 959   mode = GET_MODE (op0);
 960
 961   /* Now MODE is either some integral mode for a MEM as OP0,
 962      or is a full-word for a REG as OP0.  TOTAL_BITS corresponds.
 963      The bit field is contained entirely within OP0.
 964      BITPOS is the starting bit number within OP0.
 965      (OP0's mode may actually be narrower than MODE.)  */
 966
 967   if (BYTES_BIG_ENDIAN)
 968       /* BITPOS is the distance between our msb
 969          and that of the containing datum.
 970          Convert it to the distance from the lsb.  */
 971       bitpos = total_bits - bitsize - bitpos;
 972
 973   /* Now BITPOS is always the distance between our lsb
 974      and that of OP0.  */
 975
 976   /* Shift VALUE left by BITPOS bits.  If VALUE is not constant,
 977      we must first convert its mode to MODE.  */
 978
 979   if (CONST_INT_P (value))
 980     {
 981       HOST_WIDE_INT v = INTVAL (value);
 982
 983       if (bitsize < HOST_BITS_PER_WIDE_INT)
 984         v &= ((HOST_WIDE_INT) 1 << bitsize) - 1;
 985
 986       if (v == 0)
 987         all_zero = 1;
 988       else if ((bitsize < HOST_BITS_PER_WIDE_INT
 989                 && v == ((HOST_WIDE_INT) 1 << bitsize) - 1)
 990                || (bitsize == HOST_BITS_PER_WIDE_INT && v == -1))
 991         all_one = 1;
 992
 993       value = lshift_value (mode, value, bitpos, bitsize);
 994     }
 995   else
 996     {
 997       int must_and = (GET_MODE_BITSIZE (GET_MODE (value)) != bitsize
 998                       && bitpos + bitsize != GET_MODE_BITSIZE (mode));
 999
1000       if (GET_MODE (value) != mode)
1001         value = convert_to_mode (mode, value, 1);
1002
1003       if (must_and)
1004         value = expand_binop (mode, and_optab, value,
1005                               mask_rtx (mode, 0, bitsize, 0),
1006                               NULL_RTX, 1, OPTAB_LIB_WIDEN);
1007       if (bitpos > 0)
1008         value = expand_shift (LSHIFT_EXPR, mode, value,
1009                               bitpos, NULL_RTX, 1);
1010     }
1011
1012   /* Now clear the chosen bits in OP0,
1013      except that if VALUE is -1 we need not bother.  */
1014   /* We keep the intermediates in registers to allow CSE to combine
1015      consecutive bitfield assignments.  */
1016
1017   temp = force_reg (mode, op0);
1018
1019   if (! all_one)
1020     {
1021       temp = expand_binop (mode, and_optab, temp,
1022                            mask_rtx (mode, bitpos, bitsize, 1),
1023                            NULL_RTX, 1, OPTAB_LIB_WIDEN);
1024       temp = force_reg (mode, temp);
1025     }
1026
1027   /* Now logical-or VALUE into OP0, unless it is zero.  */
1028
1029   if (! all_zero)
1030     {
1031       temp = expand_binop (mode, ior_optab, temp, value,
1032                            NULL_RTX, 1, OPTAB_LIB_WIDEN);
1033       temp = force_reg (mode, temp);
1034     }
1035
1036   if (op0 != temp)
1037     {
1038       op0 = copy_rtx (op0);
1039       emit_move_insn (op0, temp);
1040     }
1041 }
1042 \f
1043 /* Store a bit field that is split across multiple accessible memory objects.
1044
1045    OP0 is the REG, SUBREG or MEM rtx for the first of the objects.
1046    BITSIZE is the field width; BITPOS the position of its first bit
1047    (within the word).
1048    VALUE is the value to store.
1049
1050    This does not yet handle fields wider than BITS_PER_WORD.  */
1051
1052 static void
1053 store_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1054                        unsigned HOST_WIDE_INT bitpos,
1055                        unsigned HOST_WIDE_INT bitregion_start,
1056                        unsigned HOST_WIDE_INT bitregion_end,
1057                        rtx value)
1058 {
1059   unsigned int unit;
1060   unsigned int bitsdone = 0;
1061
1062   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1063      much at a time.  */
1064   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
1065     unit = BITS_PER_WORD;
1066   else
1067     unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1068
1069   /* If VALUE is a constant other than a CONST_INT, get it into a register in
1070      WORD_MODE.  If we can do this using gen_lowpart_common, do so.  Note
1071      that VALUE might be a floating-point constant.  */
1072   if (CONSTANT_P (value) && !CONST_INT_P (value))
1073     {
1074       rtx word = gen_lowpart_common (word_mode, value);
1075
1076       if (word && (value != word))
1077         value = word;
1078       else
1079         value = gen_lowpart_common (word_mode,
1080                                     force_reg (GET_MODE (value) != VOIDmode
1081                                                ? GET_MODE (value)
1082                                                : word_mode, value));
1083     }
1084
1085   while (bitsdone < bitsize)
1086     {
1087       unsigned HOST_WIDE_INT thissize;
1088       rtx part, word;
1089       unsigned HOST_WIDE_INT thispos;
1090       unsigned HOST_WIDE_INT offset;
1091
1092       offset = (bitpos + bitsdone) / unit;
1093       thispos = (bitpos + bitsdone) % unit;
1094
1095       /* THISSIZE must not overrun a word boundary.  Otherwise,
1096          store_fixed_bit_field will call us again, and we will mutually
1097          recurse forever.  */
1098       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1099       thissize = MIN (thissize, unit - thispos);
1100
1101       if (BYTES_BIG_ENDIAN)
1102         {
1103           int total_bits;
1104
1105           /* We must do an endian conversion exactly the same way as it is
1106              done in extract_bit_field, so that the two calls to
1107              extract_fixed_bit_field will have comparable arguments.  */
1108           if (!MEM_P (value) || GET_MODE (value) == BLKmode)
1109             total_bits = BITS_PER_WORD;
1110           else
1111             total_bits = GET_MODE_BITSIZE (GET_MODE (value));
1112
1113           /* Fetch successively less significant portions.  */
1114           if (CONST_INT_P (value))
1115             part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1116                              >> (bitsize - bitsdone - thissize))
1117                             & (((HOST_WIDE_INT) 1 << thissize) - 1));
1118           else
1119             /* The args are chosen so that the last part includes the
1120                lsb.  Give extract_bit_field the value it needs (with
1121                endianness compensation) to fetch the piece we want.  */
1122             part = extract_fixed_bit_field (word_mode, value, 0, thissize,
1123                                             total_bits - bitsize + bitsdone,
1124                                             NULL_RTX, 1, false);
1125         }
1126       else
1127         {
1128           /* Fetch successively more significant portions.  */
1129           if (CONST_INT_P (value))
1130             part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1131                              >> bitsdone)
1132                             & (((HOST_WIDE_INT) 1 << thissize) - 1));
1133           else
1134             part = extract_fixed_bit_field (word_mode, value, 0, thissize,
1135                                             bitsdone, NULL_RTX, 1, false);
1136         }
1137
1138       /* If OP0 is a register, then handle OFFSET here.
1139
1140          When handling multiword bitfields, extract_bit_field may pass
1141          down a word_mode SUBREG of a larger REG for a bitfield that actually
1142          crosses a word boundary.  Thus, for a SUBREG, we must find
1143          the current word starting from the base register.  */
1144       if (GET_CODE (op0) == SUBREG)
1145         {
1146           int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset;
1147           enum machine_mode sub_mode = GET_MODE (SUBREG_REG (op0));
1148           if (sub_mode != BLKmode && GET_MODE_SIZE (sub_mode) < UNITS_PER_WORD)
1149             word = word_offset ? const0_rtx : op0;
1150           else
1151             word = operand_subword_force (SUBREG_REG (op0), word_offset,
1152                                           GET_MODE (SUBREG_REG (op0)));
1153           offset = 0;
1154         }
1155       else if (REG_P (op0))
1156         {
1157           enum machine_mode op0_mode = GET_MODE (op0);
1158           if (op0_mode != BLKmode && GET_MODE_SIZE (op0_mode) < UNITS_PER_WORD)
1159             word = offset ? const0_rtx : op0;
1160           else
1161             word = operand_subword_force (op0, offset, GET_MODE (op0));
1162           offset = 0;
1163         }
1164       else
1165         word = op0;
1166
1167       /* OFFSET is in UNITs, and UNIT is in bits.
1168          store_fixed_bit_field wants offset in bytes.  If WORD is const0_rtx,
1169          it is just an out-of-bounds access.  Ignore it.  */
1170       if (word != const0_rtx)
1171         store_fixed_bit_field (word, offset * unit / BITS_PER_UNIT, thissize,
1172                                thispos, bitregion_start, bitregion_end, part);
1173       bitsdone += thissize;
1174     }
1175 }
1176 \f
1177 /* A subroutine of extract_bit_field_1 that converts return value X
1178    to either MODE or TMODE.  MODE, TMODE and UNSIGNEDP are arguments
1179    to extract_bit_field.  */
1180
1181 static rtx
1182 convert_extracted_bit_field (rtx x, enum machine_mode mode,
1183                              enum machine_mode tmode, bool unsignedp)
1184 {
1185   if (GET_MODE (x) == tmode || GET_MODE (x) == mode)
1186     return x;
1187
1188   /* If the x mode is not a scalar integral, first convert to the
1189      integer mode of that size and then access it as a floating-point
1190      value via a SUBREG.  */
1191   if (!SCALAR_INT_MODE_P (tmode))
1192     {
1193       enum machine_mode smode;
1194
1195       smode = mode_for_size (GET_MODE_BITSIZE (tmode), MODE_INT, 0);
1196       x = convert_to_mode (smode, x, unsignedp);
1197       x = force_reg (smode, x);
1198       return gen_lowpart (tmode, x);
1199     }
1200
1201   return convert_to_mode (tmode, x, unsignedp);
1202 }
1203
1204 /* A subroutine of extract_bit_field, with the same arguments.
1205    If FALLBACK_P is true, fall back to extract_fixed_bit_field
1206    if we can find no other means of implementing the operation.
1207    if FALLBACK_P is false, return NULL instead.  */
1208
1209 static rtx
1210 extract_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1211                      unsigned HOST_WIDE_INT bitnum,
1212                      int unsignedp, bool packedp, rtx target,
1213                      enum machine_mode mode, enum machine_mode tmode,
1214                      bool fallback_p)
1215 {
1216   unsigned int unit
1217     = (MEM_P (str_rtx)) ? BITS_PER_UNIT : BITS_PER_WORD;
1218   unsigned HOST_WIDE_INT offset, bitpos;
1219   rtx op0 = str_rtx;
1220   enum machine_mode int_mode;
1221   enum machine_mode ext_mode;
1222   enum machine_mode mode1;
1223   int byte_offset;
1224
1225   if (tmode == VOIDmode)
1226     tmode = mode;
1227
1228   while (GET_CODE (op0) == SUBREG)
1229     {
1230       bitnum += SUBREG_BYTE (op0) * BITS_PER_UNIT;
1231       op0 = SUBREG_REG (op0);
1232     }
1233
1234   /* If we have an out-of-bounds access to a register, just return an
1235      uninitialized register of the required mode.  This can occur if the
1236      source code contains an out-of-bounds access to a small array.  */
1237   if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
1238     return gen_reg_rtx (tmode);
1239
1240   if (REG_P (op0)
1241       && mode == GET_MODE (op0)
1242       && bitnum == 0
1243       && bitsize == GET_MODE_BITSIZE (GET_MODE (op0)))
1244     {
1245       /* We're trying to extract a full register from itself.  */
1246       return op0;
1247     }
1248
1249   /* See if we can get a better vector mode before extracting.  */
1250   if (VECTOR_MODE_P (GET_MODE (op0))
1251       && !MEM_P (op0)
1252       && GET_MODE_INNER (GET_MODE (op0)) != tmode)
1253     {
1254       enum machine_mode new_mode;
1255
1256       if (GET_MODE_CLASS (tmode) == MODE_FLOAT)
1257         new_mode = MIN_MODE_VECTOR_FLOAT;
1258       else if (GET_MODE_CLASS (tmode) == MODE_FRACT)
1259         new_mode = MIN_MODE_VECTOR_FRACT;
1260       else if (GET_MODE_CLASS (tmode) == MODE_UFRACT)
1261         new_mode = MIN_MODE_VECTOR_UFRACT;
1262       else if (GET_MODE_CLASS (tmode) == MODE_ACCUM)
1263         new_mode = MIN_MODE_VECTOR_ACCUM;
1264       else if (GET_MODE_CLASS (tmode) == MODE_UACCUM)
1265         new_mode = MIN_MODE_VECTOR_UACCUM;
1266       else
1267         new_mode = MIN_MODE_VECTOR_INT;
1268
1269       for (; new_mode != VOIDmode ; new_mode = GET_MODE_WIDER_MODE (new_mode))
1270         if (GET_MODE_SIZE (new_mode) == GET_MODE_SIZE (GET_MODE (op0))
1271             && targetm.vector_mode_supported_p (new_mode))
1272           break;
1273       if (new_mode != VOIDmode)
1274         op0 = gen_lowpart (new_mode, op0);
1275     }
1276
1277   /* Use vec_extract patterns for extracting parts of vectors whenever
1278      available.  */
1279   if (VECTOR_MODE_P (GET_MODE (op0))
1280       && !MEM_P (op0)
1281       && optab_handler (vec_extract_optab, GET_MODE (op0)) != CODE_FOR_nothing
1282       && ((bitnum + bitsize - 1) / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
1283           == bitnum / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
1284     {
1285       struct expand_operand ops[3];
1286       enum machine_mode outermode = GET_MODE (op0);
1287       enum machine_mode innermode = GET_MODE_INNER (outermode);
1288       enum insn_code icode = optab_handler (vec_extract_optab, outermode);
1289       unsigned HOST_WIDE_INT pos = bitnum / GET_MODE_BITSIZE (innermode);
1290
1291       create_output_operand (&ops[0], target, innermode);
1292       create_input_operand (&ops[1], op0, outermode);
1293       create_integer_operand (&ops[2], pos);
1294       if (maybe_expand_insn (icode, 3, ops))
1295         {
1296           target = ops[0].value;
1297           if (GET_MODE (target) != mode)
1298             return gen_lowpart (tmode, target);
1299           return target;
1300         }
1301     }
1302
1303   /* Make sure we are playing with integral modes.  Pun with subregs
1304      if we aren't.  */
1305   {
1306     enum machine_mode imode = int_mode_for_mode (GET_MODE (op0));
1307     if (imode != GET_MODE (op0))
1308       {
1309         if (MEM_P (op0))
1310           op0 = adjust_address (op0, imode, 0);
1311         else if (imode != BLKmode)
1312           {
1313             op0 = gen_lowpart (imode, op0);
1314
1315             /* If we got a SUBREG, force it into a register since we
1316                aren't going to be able to do another SUBREG on it.  */
1317             if (GET_CODE (op0) == SUBREG)
1318               op0 = force_reg (imode, op0);
1319           }
1320         else if (REG_P (op0))
1321           {
1322             rtx reg, subreg;
1323             imode = smallest_mode_for_size (GET_MODE_BITSIZE (GET_MODE (op0)),
1324                                             MODE_INT);
1325             reg = gen_reg_rtx (imode);
1326             subreg = gen_lowpart_SUBREG (GET_MODE (op0), reg);
1327             emit_move_insn (subreg, op0);
1328             op0 = reg;
1329             bitnum += SUBREG_BYTE (subreg) * BITS_PER_UNIT;
1330           }
1331         else
1332           {
1333             rtx mem = assign_stack_temp (GET_MODE (op0),
1334                                          GET_MODE_SIZE (GET_MODE (op0)), 0);
1335             emit_move_insn (mem, op0);
1336             op0 = adjust_address (mem, BLKmode, 0);
1337           }
1338       }
1339   }
1340
1341   /* We may be accessing data outside the field, which means
1342      we can alias adjacent data.  */
1343   if (MEM_P (op0))
1344     {
1345       op0 = shallow_copy_rtx (op0);
1346       set_mem_alias_set (op0, 0);
1347       set_mem_expr (op0, 0);
1348     }
1349
1350   /* Extraction of a full-word or multi-word value from a structure
1351      in a register or aligned memory can be done with just a SUBREG.
1352      A subword value in the least significant part of a register
1353      can also be extracted with a SUBREG.  For this, we need the
1354      byte offset of the value in op0.  */
1355
1356   bitpos = bitnum % unit;
1357   offset = bitnum / unit;
1358   byte_offset = bitpos / BITS_PER_UNIT + offset * UNITS_PER_WORD;
1359
1360   /* If OP0 is a register, BITPOS must count within a word.
1361      But as we have it, it counts within whatever size OP0 now has.
1362      On a bigendian machine, these are not the same, so convert.  */
1363   if (BYTES_BIG_ENDIAN
1364       && !MEM_P (op0)
1365       && unit > GET_MODE_BITSIZE (GET_MODE (op0)))
1366     bitpos += unit - GET_MODE_BITSIZE (GET_MODE (op0));
1367
1368   /* ??? We currently assume TARGET is at least as big as BITSIZE.
1369      If that's wrong, the solution is to test for it and set TARGET to 0
1370      if needed.  */
1371
1372   /* Only scalar integer modes can be converted via subregs.  There is an
1373      additional problem for FP modes here in that they can have a precision
1374      which is different from the size.  mode_for_size uses precision, but
1375      we want a mode based on the size, so we must avoid calling it for FP
1376      modes.  */
1377   mode1  = (SCALAR_INT_MODE_P (tmode)
1378             ? mode_for_size (bitsize, GET_MODE_CLASS (tmode), 0)
1379             : mode);
1380
1381   /* If the bitfield is volatile, we need to make sure the access
1382      remains on a type-aligned boundary.  */
1383   if (GET_CODE (op0) == MEM
1384       && MEM_VOLATILE_P (op0)
1385       && GET_MODE_BITSIZE (GET_MODE (op0)) > 0
1386       && flag_strict_volatile_bitfields > 0)
1387     goto no_subreg_mode_swap;
1388
1389   if (((bitsize >= BITS_PER_WORD && bitsize == GET_MODE_BITSIZE (mode)
1390         && bitpos % BITS_PER_WORD == 0)
1391        || (mode1 != BLKmode
1392            /* ??? The big endian test here is wrong.  This is correct
1393               if the value is in a register, and if mode_for_size is not
1394               the same mode as op0.  This causes us to get unnecessarily
1395               inefficient code from the Thumb port when -mbig-endian.  */
1396            && (BYTES_BIG_ENDIAN
1397                ? bitpos + bitsize == BITS_PER_WORD
1398                : bitpos == 0)))
1399       && ((!MEM_P (op0)
1400            && TRULY_NOOP_TRUNCATION_MODES_P (mode1, GET_MODE (op0))
1401            && GET_MODE_SIZE (mode1) != 0
1402            && byte_offset % GET_MODE_SIZE (mode1) == 0)
1403           || (MEM_P (op0)
1404               && (! SLOW_UNALIGNED_ACCESS (mode, MEM_ALIGN (op0))
1405                   || (offset * BITS_PER_UNIT % bitsize == 0
1406                       && MEM_ALIGN (op0) % bitsize == 0)))))
1407     {
1408       if (MEM_P (op0))
1409         op0 = adjust_address (op0, mode1, offset);
1410       else if (mode1 != GET_MODE (op0))
1411         {
1412           rtx sub = simplify_gen_subreg (mode1, op0, GET_MODE (op0),
1413                                          byte_offset);
1414           if (sub == NULL)
1415             goto no_subreg_mode_swap;
1416           op0 = sub;
1417         }
1418       if (mode1 != mode)
1419         return convert_to_mode (tmode, op0, unsignedp);
1420       return op0;
1421     }
1422  no_subreg_mode_swap:
1423
1424   /* Handle fields bigger than a word.  */
1425
1426   if (bitsize > BITS_PER_WORD)
1427     {
1428       /* Here we transfer the words of the field
1429          in the order least significant first.
1430          This is because the most significant word is the one which may
1431          be less than full.  */
1432
1433       unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
1434       unsigned int i;
1435
1436       if (target == 0 || !REG_P (target) || !valid_multiword_target_p (target))
1437         target = gen_reg_rtx (mode);
1438
1439       /* Indicate for flow that the entire target reg is being set.  */
1440       emit_clobber (target);
1441
1442       for (i = 0; i < nwords; i++)
1443         {
1444           /* If I is 0, use the low-order word in both field and target;
1445              if I is 1, use the next to lowest word; and so on.  */
1446           /* Word number in TARGET to use.  */
1447           unsigned int wordnum
1448             = (WORDS_BIG_ENDIAN
1449                ? GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD - i - 1
1450                : i);
1451           /* Offset from start of field in OP0.  */
1452           unsigned int bit_offset = (WORDS_BIG_ENDIAN
1453                                      ? MAX (0, ((int) bitsize - ((int) i + 1)
1454                                                 * (int) BITS_PER_WORD))
1455                                      : (int) i * BITS_PER_WORD);
1456           rtx target_part = operand_subword (target, wordnum, 1, VOIDmode);
1457           rtx result_part
1458             = extract_bit_field (op0, MIN (BITS_PER_WORD,
1459                                            bitsize - i * BITS_PER_WORD),
1460                                  bitnum + bit_offset, 1, false, target_part, mode,
1461                                  word_mode);
1462
1463           gcc_assert (target_part);
1464
1465           if (result_part != target_part)
1466             emit_move_insn (target_part, result_part);
1467         }
1468
1469       if (unsignedp)
1470         {
1471           /* Unless we've filled TARGET, the upper regs in a multi-reg value
1472              need to be zero'd out.  */
1473           if (GET_MODE_SIZE (GET_MODE (target)) > nwords * UNITS_PER_WORD)
1474             {
1475               unsigned int i, total_words;
1476
1477               total_words = GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD;
1478               for (i = nwords; i < total_words; i++)
1479                 emit_move_insn
1480                   (operand_subword (target,
1481                                     WORDS_BIG_ENDIAN ? total_words - i - 1 : i,
1482                                     1, VOIDmode),
1483                    const0_rtx);
1484             }
1485           return target;
1486         }
1487
1488       /* Signed bit field: sign-extend with two arithmetic shifts.  */
1489       target = expand_shift (LSHIFT_EXPR, mode, target,
1490                              GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1491       return expand_shift (RSHIFT_EXPR, mode, target,
1492                            GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1493     }
1494
1495   /* From here on we know the desired field is smaller than a word.  */
1496
1497   /* Check if there is a correspondingly-sized integer field, so we can
1498      safely extract it as one size of integer, if necessary; then
1499      truncate or extend to the size that is wanted; then use SUBREGs or
1500      convert_to_mode to get one of the modes we really wanted.  */
1501
1502   int_mode = int_mode_for_mode (tmode);
1503   if (int_mode == BLKmode)
1504     int_mode = int_mode_for_mode (mode);
1505   /* Should probably push op0 out to memory and then do a load.  */
1506   gcc_assert (int_mode != BLKmode);
1507
1508   /* OFFSET is the number of words or bytes (UNIT says which)
1509      from STR_RTX to the first word or byte containing part of the field.  */
1510   if (!MEM_P (op0))
1511     {
1512       if (offset != 0
1513           || GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
1514         {
1515           if (!REG_P (op0))
1516             op0 = copy_to_reg (op0);
1517           op0 = gen_rtx_SUBREG (mode_for_size (BITS_PER_WORD, MODE_INT, 0),
1518                                 op0, (offset * UNITS_PER_WORD));
1519         }
1520       offset = 0;
1521     }
1522
1523   /* Now OFFSET is nonzero only for memory operands.  */
1524   ext_mode = mode_for_extraction (unsignedp ? EP_extzv : EP_extv, 0);
1525   if (ext_mode != MAX_MACHINE_MODE
1526       && bitsize > 0
1527       && GET_MODE_BITSIZE (ext_mode) >= bitsize
1528       /* Do not use extv/extzv for volatile bitfields when
1529          -fstrict-volatile-bitfields is in effect.  */
1530       && !(MEM_P (op0) && MEM_VOLATILE_P (op0)
1531            && flag_strict_volatile_bitfields > 0)
1532       /* If op0 is a register, we need it in EXT_MODE to make it
1533          acceptable to the format of ext(z)v.  */
1534       && !(GET_CODE (op0) == SUBREG && GET_MODE (op0) != ext_mode)
1535       && !((REG_P (op0) || GET_CODE (op0) == SUBREG)
1536            && (bitsize + bitpos > GET_MODE_BITSIZE (ext_mode))))
1537     {
1538       struct expand_operand ops[4];
1539       unsigned HOST_WIDE_INT xbitpos = bitpos, xoffset = offset;
1540       rtx xop0 = op0;
1541       rtx xtarget = target;
1542       rtx xspec_target = target;
1543       rtx xspec_target_subreg = 0;
1544
1545       /* If op0 is a register, we need it in EXT_MODE to make it
1546          acceptable to the format of ext(z)v.  */
1547       if (REG_P (xop0) && GET_MODE (xop0) != ext_mode)
1548         xop0 = gen_lowpart_SUBREG (ext_mode, xop0);
1549       if (MEM_P (xop0))
1550         /* Get ref to first byte containing part of the field.  */
1551         xop0 = adjust_address (xop0, byte_mode, xoffset);
1552
1553       /* Now convert from counting within UNIT to counting in EXT_MODE.  */
1554       if (BYTES_BIG_ENDIAN && !MEM_P (xop0))
1555         xbitpos += GET_MODE_BITSIZE (ext_mode) - unit;
1556
1557       unit = GET_MODE_BITSIZE (ext_mode);
1558
1559       /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
1560          "backwards" from the size of the unit we are extracting from.
1561          Otherwise, we count bits from the most significant on a
1562          BYTES/BITS_BIG_ENDIAN machine.  */
1563
1564       if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1565         xbitpos = unit - bitsize - xbitpos;
1566
1567       if (xtarget == 0)
1568         xtarget = xspec_target = gen_reg_rtx (tmode);
1569
1570       if (GET_MODE (xtarget) != ext_mode)
1571         {
1572           /* Don't use LHS paradoxical subreg if explicit truncation is needed
1573              between the mode of the extraction (word_mode) and the target
1574              mode.  Instead, create a temporary and use convert_move to set
1575              the target.  */
1576           if (REG_P (xtarget)
1577               && TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (xtarget), ext_mode))
1578             {
1579               xtarget = gen_lowpart (ext_mode, xtarget);
1580               if (GET_MODE_PRECISION (ext_mode)
1581                   > GET_MODE_PRECISION (GET_MODE (xspec_target)))
1582                 xspec_target_subreg = xtarget;
1583             }
1584           else
1585             xtarget = gen_reg_rtx (ext_mode);
1586         }
1587
1588       create_output_operand (&ops[0], xtarget, ext_mode);
1589       create_fixed_operand (&ops[1], xop0);
1590       create_integer_operand (&ops[2], bitsize);
1591       create_integer_operand (&ops[3], xbitpos);
1592       if (maybe_expand_insn (unsignedp ? CODE_FOR_extzv : CODE_FOR_extv,
1593                              4, ops))
1594         {
1595           xtarget = ops[0].value;
1596           if (xtarget == xspec_target)
1597             return xtarget;
1598           if (xtarget == xspec_target_subreg)
1599             return xspec_target;
1600           return convert_extracted_bit_field (xtarget, mode, tmode, unsignedp);
1601         }
1602     }
1603
1604   /* If OP0 is a memory, try copying it to a register and seeing if a
1605      cheap register alternative is available.  */
1606   if (ext_mode != MAX_MACHINE_MODE && MEM_P (op0))
1607     {
1608       enum machine_mode bestmode;
1609
1610       /* Get the mode to use for inserting into this field.  If
1611          OP0 is BLKmode, get the smallest mode consistent with the
1612          alignment. If OP0 is a non-BLKmode object that is no
1613          wider than EXT_MODE, use its mode. Otherwise, use the
1614          smallest mode containing the field.  */
1615
1616       if (GET_MODE (op0) == BLKmode
1617           || (ext_mode != MAX_MACHINE_MODE
1618               && GET_MODE_SIZE (GET_MODE (op0)) > GET_MODE_SIZE (ext_mode)))
1619         bestmode = get_best_mode (bitsize, bitnum, 0, 0, MEM_ALIGN (op0),
1620                                   (ext_mode == MAX_MACHINE_MODE
1621                                    ? VOIDmode : ext_mode),
1622                                   MEM_VOLATILE_P (op0));
1623       else
1624         bestmode = GET_MODE (op0);
1625
1626       if (bestmode != VOIDmode
1627           && !(SLOW_UNALIGNED_ACCESS (bestmode, MEM_ALIGN (op0))
1628                && GET_MODE_BITSIZE (bestmode) > MEM_ALIGN (op0)))
1629         {
1630           unsigned HOST_WIDE_INT xoffset, xbitpos;
1631
1632           /* Compute the offset as a multiple of this unit,
1633              counting in bytes.  */
1634           unit = GET_MODE_BITSIZE (bestmode);
1635           xoffset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
1636           xbitpos = bitnum % unit;
1637
1638           /* Make sure the register is big enough for the whole field.  */
1639           if (xoffset * BITS_PER_UNIT + unit
1640               >= offset * BITS_PER_UNIT + bitsize)
1641             {
1642               rtx last, result, xop0;
1643
1644               last = get_last_insn ();
1645
1646               /* Fetch it to a register in that size.  */
1647               xop0 = adjust_address (op0, bestmode, xoffset);
1648               xop0 = force_reg (bestmode, xop0);
1649               result = extract_bit_field_1 (xop0, bitsize, xbitpos,
1650                                             unsignedp, packedp, target,
1651                                             mode, tmode, false);
1652               if (result)
1653                 return result;
1654
1655               delete_insns_since (last);
1656             }
1657         }
1658     }
1659
1660   if (!fallback_p)
1661     return NULL;
1662
1663   target = extract_fixed_bit_field (int_mode, op0, offset, bitsize,
1664                                     bitpos, target, unsignedp, packedp);
1665   return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1666 }
1667
1668 /* Generate code to extract a byte-field from STR_RTX
1669    containing BITSIZE bits, starting at BITNUM,
1670    and put it in TARGET if possible (if TARGET is nonzero).
1671    Regardless of TARGET, we return the rtx for where the value is placed.
1672
1673    STR_RTX is the structure containing the byte (a REG or MEM).
1674    UNSIGNEDP is nonzero if this is an unsigned bit field.
1675    PACKEDP is nonzero if the field has the packed attribute.
1676    MODE is the natural mode of the field value once extracted.
1677    TMODE is the mode the caller would like the value to have;
1678    but the value may be returned with type MODE instead.
1679
1680    If a TARGET is specified and we can store in it at no extra cost,
1681    we do so, and return TARGET.
1682    Otherwise, we return a REG of mode TMODE or MODE, with TMODE preferred
1683    if they are equally easy.  */
1684
1685 rtx
1686 extract_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1687                    unsigned HOST_WIDE_INT bitnum, int unsignedp, bool packedp,
1688                    rtx target, enum machine_mode mode, enum machine_mode tmode)
1689 {
1690   return extract_bit_field_1 (str_rtx, bitsize, bitnum, unsignedp, packedp,
1691                               target, mode, tmode, true);
1692 }
1693 \f
1694 /* Extract a bit field using shifts and boolean operations
1695    Returns an rtx to represent the value.
1696    OP0 addresses a register (word) or memory (byte).
1697    BITPOS says which bit within the word or byte the bit field starts in.
1698    OFFSET says how many bytes farther the bit field starts;
1699     it is 0 if OP0 is a register.
1700    BITSIZE says how many bits long the bit field is.
1701     (If OP0 is a register, it may be narrower than a full word,
1702      but BITPOS still counts within a full word,
1703      which is significant on bigendian machines.)
1704
1705    UNSIGNEDP is nonzero for an unsigned bit field (don't sign-extend value).
1706    PACKEDP is true if the field has the packed attribute.
1707
1708    If TARGET is nonzero, attempts to store the value there
1709    and return TARGET, but this is not guaranteed.
1710    If TARGET is not used, create a pseudo-reg of mode TMODE for the value.  */
1711
1712 static rtx
1713 extract_fixed_bit_field (enum machine_mode tmode, rtx op0,
1714                          unsigned HOST_WIDE_INT offset,
1715                          unsigned HOST_WIDE_INT bitsize,
1716                          unsigned HOST_WIDE_INT bitpos, rtx target,
1717                          int unsignedp, bool packedp)
1718 {
1719   unsigned int total_bits = BITS_PER_WORD;
1720   enum machine_mode mode;
1721
1722   if (GET_CODE (op0) == SUBREG || REG_P (op0))
1723     {
1724       /* Special treatment for a bit field split across two registers.  */
1725       if (bitsize + bitpos > BITS_PER_WORD)
1726         return extract_split_bit_field (op0, bitsize, bitpos, unsignedp);
1727     }
1728   else
1729     {
1730       /* Get the proper mode to use for this field.  We want a mode that
1731          includes the entire field.  If such a mode would be larger than
1732          a word, we won't be doing the extraction the normal way.  */
1733
1734       if (MEM_VOLATILE_P (op0)
1735           && flag_strict_volatile_bitfields > 0)
1736         {
1737           if (GET_MODE_BITSIZE (GET_MODE (op0)) > 0)
1738             mode = GET_MODE (op0);
1739           else if (target && GET_MODE_BITSIZE (GET_MODE (target)) > 0)
1740             mode = GET_MODE (target);
1741           else
1742             mode = tmode;
1743         }
1744       else
1745         mode = get_best_mode (bitsize, bitpos + offset * BITS_PER_UNIT, 0, 0,
1746                               MEM_ALIGN (op0), word_mode, MEM_VOLATILE_P (op0));
1747
1748       if (mode == VOIDmode)
1749         /* The only way this should occur is if the field spans word
1750            boundaries.  */
1751         return extract_split_bit_field (op0, bitsize,
1752                                         bitpos + offset * BITS_PER_UNIT,
1753                                         unsignedp);
1754
1755       total_bits = GET_MODE_BITSIZE (mode);
1756
1757       /* Make sure bitpos is valid for the chosen mode.  Adjust BITPOS to
1758          be in the range 0 to total_bits-1, and put any excess bytes in
1759          OFFSET.  */
1760       if (bitpos >= total_bits)
1761         {
1762           offset += (bitpos / total_bits) * (total_bits / BITS_PER_UNIT);
1763           bitpos -= ((bitpos / total_bits) * (total_bits / BITS_PER_UNIT)
1764                      * BITS_PER_UNIT);
1765         }
1766
1767       /* If we're accessing a volatile MEM, we can't do the next
1768          alignment step if it results in a multi-word access where we
1769          otherwise wouldn't have one.  So, check for that case
1770          here.  */
1771       if (MEM_P (op0)
1772           && MEM_VOLATILE_P (op0)
1773           && flag_strict_volatile_bitfields > 0
1774           && bitpos + bitsize <= total_bits
1775           && bitpos + bitsize + (offset % (total_bits / BITS_PER_UNIT)) * BITS_PER_UNIT > total_bits)
1776         {
1777           if (STRICT_ALIGNMENT)
1778             {
1779               static bool informed_about_misalignment = false;
1780               bool warned;
1781
1782               if (packedp)
1783                 {
1784                   if (bitsize == total_bits)
1785                     warned = warning_at (input_location, OPT_fstrict_volatile_bitfields,
1786                                          "multiple accesses to volatile structure member"
1787                                          " because of packed attribute");
1788                   else
1789                     warned = warning_at (input_location, OPT_fstrict_volatile_bitfields,
1790                                          "multiple accesses to volatile structure bitfield"
1791                                          " because of packed attribute");
1792
1793                   return extract_split_bit_field (op0, bitsize,
1794                                                   bitpos + offset * BITS_PER_UNIT,
1795                                                   unsignedp);
1796                 }
1797
1798               if (bitsize == total_bits)
1799                 warned = warning_at (input_location, OPT_fstrict_volatile_bitfields,
1800                                      "mis-aligned access used for structure member");
1801               else
1802                 warned = warning_at (input_location, OPT_fstrict_volatile_bitfields,
1803                                      "mis-aligned access used for structure bitfield");
1804
1805               if (! informed_about_misalignment && warned)
1806                 {
1807                   informed_about_misalignment = true;
1808                   inform (input_location,
1809                           "when a volatile object spans multiple type-sized locations,"
1810                           " the compiler must choose between using a single mis-aligned access to"
1811                           " preserve the volatility, or using multiple aligned accesses to avoid"
1812                           " runtime faults; this code may fail at runtime if the hardware does"
1813                           " not allow this access");
1814                 }
1815             }
1816         }
1817       else
1818         {
1819
1820           /* Get ref to an aligned byte, halfword, or word containing the field.
1821              Adjust BITPOS to be position within a word,
1822              and OFFSET to be the offset of that word.
1823              Then alter OP0 to refer to that word.  */
1824           bitpos += (offset % (total_bits / BITS_PER_UNIT)) * BITS_PER_UNIT;
1825           offset -= (offset % (total_bits / BITS_PER_UNIT));
1826         }
1827
1828       op0 = adjust_address (op0, mode, offset);
1829     }
1830
1831   mode = GET_MODE (op0);
1832
1833   if (BYTES_BIG_ENDIAN)
1834     /* BITPOS is the distance between our msb and that of OP0.
1835        Convert it to the distance from the lsb.  */
1836     bitpos = total_bits - bitsize - bitpos;
1837
1838   /* Now BITPOS is always the distance between the field's lsb and that of OP0.
1839      We have reduced the big-endian case to the little-endian case.  */
1840
1841   if (unsignedp)
1842     {
1843       if (bitpos)
1844         {
1845           /* If the field does not already start at the lsb,
1846              shift it so it does.  */
1847           /* Maybe propagate the target for the shift.  */
1848           /* But not if we will return it--could confuse integrate.c.  */
1849           rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
1850           if (tmode != mode) subtarget = 0;
1851           op0 = expand_shift (RSHIFT_EXPR, mode, op0, bitpos, subtarget, 1);
1852         }
1853       /* Convert the value to the desired mode.  */
1854       if (mode != tmode)
1855         op0 = convert_to_mode (tmode, op0, 1);
1856
1857       /* Unless the msb of the field used to be the msb when we shifted,
1858          mask out the upper bits.  */
1859
1860       if (GET_MODE_BITSIZE (mode) != bitpos + bitsize)
1861         return expand_binop (GET_MODE (op0), and_optab, op0,
1862                              mask_rtx (GET_MODE (op0), 0, bitsize, 0),
1863                              target, 1, OPTAB_LIB_WIDEN);
1864       return op0;
1865     }
1866
1867   /* To extract a signed bit-field, first shift its msb to the msb of the word,
1868      then arithmetic-shift its lsb to the lsb of the word.  */
1869   op0 = force_reg (mode, op0);
1870
1871   /* Find the narrowest integer mode that contains the field.  */
1872
1873   for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT); mode != VOIDmode;
1874        mode = GET_MODE_WIDER_MODE (mode))
1875     if (GET_MODE_BITSIZE (mode) >= bitsize + bitpos)
1876       {
1877         op0 = convert_to_mode (mode, op0, 0);
1878         break;
1879       }
1880
1881   if (mode != tmode)
1882     target = 0;
1883
1884   if (GET_MODE_BITSIZE (mode) != (bitsize + bitpos))
1885     {
1886       int amount = GET_MODE_BITSIZE (mode) - (bitsize + bitpos);
1887       /* Maybe propagate the target for the shift.  */
1888       rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
1889       op0 = expand_shift (LSHIFT_EXPR, mode, op0, amount, subtarget, 1);
1890     }
1891
1892   return expand_shift (RSHIFT_EXPR, mode, op0,
1893                        GET_MODE_BITSIZE (mode) - bitsize, target, 0);
1894 }
1895 \f
1896 /* Return a constant integer (CONST_INT or CONST_DOUBLE) mask value
1897    of mode MODE with BITSIZE ones followed by BITPOS zeros, or the
1898    complement of that if COMPLEMENT.  The mask is truncated if
1899    necessary to the width of mode MODE.  The mask is zero-extended if
1900    BITSIZE+BITPOS is too small for MODE.  */
1901
1902 static rtx
1903 mask_rtx (enum machine_mode mode, int bitpos, int bitsize, int complement)
1904 {
1905   double_int mask;
1906
1907   mask = double_int_mask (bitsize);
1908   mask = double_int_lshift (mask, bitpos, HOST_BITS_PER_DOUBLE_INT, false);
1909
1910   if (complement)
1911     mask = double_int_not (mask);
1912
1913   return immed_double_int_const (mask, mode);
1914 }
1915
1916 /* Return a constant integer (CONST_INT or CONST_DOUBLE) rtx with the value
1917    VALUE truncated to BITSIZE bits and then shifted left BITPOS bits.  */
1918
1919 static rtx
1920 lshift_value (enum machine_mode mode, rtx value, int bitpos, int bitsize)
1921 {
1922   double_int val;
1923
1924   val = double_int_zext (uhwi_to_double_int (INTVAL (value)), bitsize);
1925   val = double_int_lshift (val, bitpos, HOST_BITS_PER_DOUBLE_INT, false);
1926
1927   return immed_double_int_const (val, mode);
1928 }
1929 \f
1930 /* Extract a bit field that is split across two words
1931    and return an RTX for the result.
1932
1933    OP0 is the REG, SUBREG or MEM rtx for the first of the two words.
1934    BITSIZE is the field width; BITPOS, position of its first bit, in the word.
1935    UNSIGNEDP is 1 if should zero-extend the contents; else sign-extend.  */
1936
1937 static rtx
1938 extract_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1939                          unsigned HOST_WIDE_INT bitpos, int unsignedp)
1940 {
1941   unsigned int unit;
1942   unsigned int bitsdone = 0;
1943   rtx result = NULL_RTX;
1944   int first = 1;
1945
1946   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1947      much at a time.  */
1948   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
1949     unit = BITS_PER_WORD;
1950   else
1951     unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1952
1953   while (bitsdone < bitsize)
1954     {
1955       unsigned HOST_WIDE_INT thissize;
1956       rtx part, word;
1957       unsigned HOST_WIDE_INT thispos;
1958       unsigned HOST_WIDE_INT offset;
1959
1960       offset = (bitpos + bitsdone) / unit;
1961       thispos = (bitpos + bitsdone) % unit;
1962
1963       /* THISSIZE must not overrun a word boundary.  Otherwise,
1964          extract_fixed_bit_field will call us again, and we will mutually
1965          recurse forever.  */
1966       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1967       thissize = MIN (thissize, unit - thispos);
1968
1969       /* If OP0 is a register, then handle OFFSET here.
1970
1971          When handling multiword bitfields, extract_bit_field may pass
1972          down a word_mode SUBREG of a larger REG for a bitfield that actually
1973          crosses a word boundary.  Thus, for a SUBREG, we must find
1974          the current word starting from the base register.  */
1975       if (GET_CODE (op0) == SUBREG)
1976         {
1977           int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset;
1978           word = operand_subword_force (SUBREG_REG (op0), word_offset,
1979                                         GET_MODE (SUBREG_REG (op0)));
1980           offset = 0;
1981         }
1982       else if (REG_P (op0))
1983         {
1984           word = operand_subword_force (op0, offset, GET_MODE (op0));
1985           offset = 0;
1986         }
1987       else
1988         word = op0;
1989
1990       /* Extract the parts in bit-counting order,
1991          whose meaning is determined by BYTES_PER_UNIT.
1992          OFFSET is in UNITs, and UNIT is in bits.
1993          extract_fixed_bit_field wants offset in bytes.  */
1994       part = extract_fixed_bit_field (word_mode, word,
1995                                       offset * unit / BITS_PER_UNIT,
1996                                       thissize, thispos, 0, 1, false);
1997       bitsdone += thissize;
1998
1999       /* Shift this part into place for the result.  */
2000       if (BYTES_BIG_ENDIAN)
2001         {
2002           if (bitsize != bitsdone)
2003             part = expand_shift (LSHIFT_EXPR, word_mode, part,
2004                                  bitsize - bitsdone, 0, 1);
2005         }
2006       else
2007         {
2008           if (bitsdone != thissize)
2009             part = expand_shift (LSHIFT_EXPR, word_mode, part,
2010                                  bitsdone - thissize, 0, 1);
2011         }
2012
2013       if (first)
2014         result = part;
2015       else
2016         /* Combine the parts with bitwise or.  This works
2017            because we extracted each part as an unsigned bit field.  */
2018         result = expand_binop (word_mode, ior_optab, part, result, NULL_RTX, 1,
2019                                OPTAB_LIB_WIDEN);
2020
2021       first = 0;
2022     }
2023
2024   /* Unsigned bit field: we are done.  */
2025   if (unsignedp)
2026     return result;
2027   /* Signed bit field: sign-extend with two arithmetic shifts.  */
2028   result = expand_shift (LSHIFT_EXPR, word_mode, result,
2029                          BITS_PER_WORD - bitsize, NULL_RTX, 0);
2030   return expand_shift (RSHIFT_EXPR, word_mode, result,
2031                        BITS_PER_WORD - bitsize, NULL_RTX, 0);
2032 }
2033 \f
2034 /* Try to read the low bits of SRC as an rvalue of mode MODE, preserving
2035    the bit pattern.  SRC_MODE is the mode of SRC; if this is smaller than
2036    MODE, fill the upper bits with zeros.  Fail if the layout of either
2037    mode is unknown (as for CC modes) or if the extraction would involve
2038    unprofitable mode punning.  Return the value on success, otherwise
2039    return null.
2040
2041    This is different from gen_lowpart* in these respects:
2042
2043      - the returned value must always be considered an rvalue
2044
2045      - when MODE is wider than SRC_MODE, the extraction involves
2046        a zero extension
2047
2048      - when MODE is smaller than SRC_MODE, the extraction involves
2049        a truncation (and is thus subject to TRULY_NOOP_TRUNCATION).
2050
2051    In other words, this routine performs a computation, whereas the
2052    gen_lowpart* routines are conceptually lvalue or rvalue subreg
2053    operations.  */
2054
2055 rtx
2056 extract_low_bits (enum machine_mode mode, enum machine_mode src_mode, rtx src)
2057 {
2058   enum machine_mode int_mode, src_int_mode;
2059
2060   if (mode == src_mode)
2061     return src;
2062
2063   if (CONSTANT_P (src))
2064     {
2065       /* simplify_gen_subreg can't be used here, as if simplify_subreg
2066          fails, it will happily create (subreg (symbol_ref)) or similar
2067          invalid SUBREGs.  */
2068       unsigned int byte = subreg_lowpart_offset (mode, src_mode);
2069       rtx ret = simplify_subreg (mode, src, src_mode, byte);
2070       if (ret)
2071         return ret;
2072
2073       if (GET_MODE (src) == VOIDmode
2074           || !validate_subreg (mode, src_mode, src, byte))
2075         return NULL_RTX;
2076
2077       src = force_reg (GET_MODE (src), src);
2078       return gen_rtx_SUBREG (mode, src, byte);
2079     }
2080
2081   if (GET_MODE_CLASS (mode) == MODE_CC || GET_MODE_CLASS (src_mode) == MODE_CC)
2082     return NULL_RTX;
2083
2084   if (GET_MODE_BITSIZE (mode) == GET_MODE_BITSIZE (src_mode)
2085       && MODES_TIEABLE_P (mode, src_mode))
2086     {
2087       rtx x = gen_lowpart_common (mode, src);
2088       if (x)
2089         return x;
2090     }
2091
2092   src_int_mode = int_mode_for_mode (src_mode);
2093   int_mode = int_mode_for_mode (mode);
2094   if (src_int_mode == BLKmode || int_mode == BLKmode)
2095     return NULL_RTX;
2096
2097   if (!MODES_TIEABLE_P (src_int_mode, src_mode))
2098     return NULL_RTX;
2099   if (!MODES_TIEABLE_P (int_mode, mode))
2100     return NULL_RTX;
2101
2102   src = gen_lowpart (src_int_mode, src);
2103   src = convert_modes (int_mode, src_int_mode, src, true);
2104   src = gen_lowpart (mode, src);
2105   return src;
2106 }
2107 \f
2108 /* Add INC into TARGET.  */
2109
2110 void
2111 expand_inc (rtx target, rtx inc)
2112 {
2113   rtx value = expand_binop (GET_MODE (target), add_optab,
2114                             target, inc,
2115                             target, 0, OPTAB_LIB_WIDEN);
2116   if (value != target)
2117     emit_move_insn (target, value);
2118 }
2119
2120 /* Subtract DEC from TARGET.  */
2121
2122 void
2123 expand_dec (rtx target, rtx dec)
2124 {
2125   rtx value = expand_binop (GET_MODE (target), sub_optab,
2126                             target, dec,
2127                             target, 0, OPTAB_LIB_WIDEN);
2128   if (value != target)
2129     emit_move_insn (target, value);
2130 }
2131 \f
2132 /* Output a shift instruction for expression code CODE,
2133    with SHIFTED being the rtx for the value to shift,
2134    and AMOUNT the rtx for the amount to shift by.
2135    Store the result in the rtx TARGET, if that is convenient.
2136    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2137    Return the rtx for where the value is.  */
2138
2139 static rtx
2140 expand_shift_1 (enum tree_code code, enum machine_mode mode, rtx shifted,
2141                 rtx amount, rtx target, int unsignedp)
2142 {
2143   rtx op1, temp = 0;
2144   int left = (code == LSHIFT_EXPR || code == LROTATE_EXPR);
2145   int rotate = (code == LROTATE_EXPR || code == RROTATE_EXPR);
2146   optab lshift_optab = ashl_optab;
2147   optab rshift_arith_optab = ashr_optab;
2148   optab rshift_uns_optab = lshr_optab;
2149   optab lrotate_optab = rotl_optab;
2150   optab rrotate_optab = rotr_optab;
2151   enum machine_mode op1_mode;
2152   int attempt;
2153   bool speed = optimize_insn_for_speed_p ();
2154
2155   op1 = amount;
2156   op1_mode = GET_MODE (op1);
2157
2158   /* Determine whether the shift/rotate amount is a vector, or scalar.  If the
2159      shift amount is a vector, use the vector/vector shift patterns.  */
2160   if (VECTOR_MODE_P (mode) && VECTOR_MODE_P (op1_mode))
2161     {
2162       lshift_optab = vashl_optab;
2163       rshift_arith_optab = vashr_optab;
2164       rshift_uns_optab = vlshr_optab;
2165       lrotate_optab = vrotl_optab;
2166       rrotate_optab = vrotr_optab;
2167     }
2168
2169   /* Previously detected shift-counts computed by NEGATE_EXPR
2170      and shifted in the other direction; but that does not work
2171      on all machines.  */
2172
2173   if (SHIFT_COUNT_TRUNCATED)
2174     {
2175       if (CONST_INT_P (op1)
2176           && ((unsigned HOST_WIDE_INT) INTVAL (op1) >=
2177               (unsigned HOST_WIDE_INT) GET_MODE_BITSIZE (mode)))
2178         op1 = GEN_INT ((unsigned HOST_WIDE_INT) INTVAL (op1)
2179                        % GET_MODE_BITSIZE (mode));
2180       else if (GET_CODE (op1) == SUBREG
2181                && subreg_lowpart_p (op1)
2182                && INTEGRAL_MODE_P (GET_MODE (SUBREG_REG (op1))))
2183         op1 = SUBREG_REG (op1);
2184     }
2185
2186   if (op1 == const0_rtx)
2187     return shifted;
2188
2189   /* Check whether its cheaper to implement a left shift by a constant
2190      bit count by a sequence of additions.  */
2191   if (code == LSHIFT_EXPR
2192       && CONST_INT_P (op1)
2193       && INTVAL (op1) > 0
2194       && INTVAL (op1) < GET_MODE_PRECISION (mode)
2195       && INTVAL (op1) < MAX_BITS_PER_WORD
2196       && shift_cost[speed][mode][INTVAL (op1)] > INTVAL (op1) * add_cost[speed][mode]
2197       && shift_cost[speed][mode][INTVAL (op1)] != MAX_COST)
2198     {
2199       int i;
2200       for (i = 0; i < INTVAL (op1); i++)
2201         {
2202           temp = force_reg (mode, shifted);
2203           shifted = expand_binop (mode, add_optab, temp, temp, NULL_RTX,
2204                                   unsignedp, OPTAB_LIB_WIDEN);
2205         }
2206       return shifted;
2207     }
2208
2209   for (attempt = 0; temp == 0 && attempt < 3; attempt++)
2210     {
2211       enum optab_methods methods;
2212
2213       if (attempt == 0)
2214         methods = OPTAB_DIRECT;
2215       else if (attempt == 1)
2216         methods = OPTAB_WIDEN;
2217       else
2218         methods = OPTAB_LIB_WIDEN;
2219
2220       if (rotate)
2221         {
2222           /* Widening does not work for rotation.  */
2223           if (methods == OPTAB_WIDEN)
2224             continue;
2225           else if (methods == OPTAB_LIB_WIDEN)
2226             {
2227               /* If we have been unable to open-code this by a rotation,
2228                  do it as the IOR of two shifts.  I.e., to rotate A
2229                  by N bits, compute (A << N) | ((unsigned) A >> (C - N))
2230                  where C is the bitsize of A.
2231
2232                  It is theoretically possible that the target machine might
2233                  not be able to perform either shift and hence we would
2234                  be making two libcalls rather than just the one for the
2235                  shift (similarly if IOR could not be done).  We will allow
2236                  this extremely unlikely lossage to avoid complicating the
2237                  code below.  */
2238
2239               rtx subtarget = target == shifted ? 0 : target;
2240               rtx new_amount, other_amount;
2241               rtx temp1;
2242
2243               new_amount = op1;
2244               if (CONST_INT_P (op1))
2245                 other_amount = GEN_INT (GET_MODE_BITSIZE (mode)
2246                                         - INTVAL (op1));
2247               else
2248                 other_amount
2249                   = simplify_gen_binary (MINUS, GET_MODE (op1),
2250                                          GEN_INT (GET_MODE_PRECISION (mode)),
2251                                          op1);
2252
2253               shifted = force_reg (mode, shifted);
2254
2255               temp = expand_shift_1 (left ? LSHIFT_EXPR : RSHIFT_EXPR,
2256                                      mode, shifted, new_amount, 0, 1);
2257               temp1 = expand_shift_1 (left ? RSHIFT_EXPR : LSHIFT_EXPR,
2258                                       mode, shifted, other_amount,
2259                                       subtarget, 1);
2260               return expand_binop (mode, ior_optab, temp, temp1, target,
2261                                    unsignedp, methods);
2262             }
2263
2264           temp = expand_binop (mode,
2265                                left ? lrotate_optab : rrotate_optab,
2266                                shifted, op1, target, unsignedp, methods);
2267         }
2268       else if (unsignedp)
2269         temp = expand_binop (mode,
2270                              left ? lshift_optab : rshift_uns_optab,
2271                              shifted, op1, target, unsignedp, methods);
2272
2273       /* Do arithmetic shifts.
2274          Also, if we are going to widen the operand, we can just as well
2275          use an arithmetic right-shift instead of a logical one.  */
2276       if (temp == 0 && ! rotate
2277           && (! unsignedp || (! left && methods == OPTAB_WIDEN)))
2278         {
2279           enum optab_methods methods1 = methods;
2280
2281           /* If trying to widen a log shift to an arithmetic shift,
2282              don't accept an arithmetic shift of the same size.  */
2283           if (unsignedp)
2284             methods1 = OPTAB_MUST_WIDEN;
2285
2286           /* Arithmetic shift */
2287
2288           temp = expand_binop (mode,
2289                                left ? lshift_optab : rshift_arith_optab,
2290                                shifted, op1, target, unsignedp, methods1);
2291         }
2292
2293       /* We used to try extzv here for logical right shifts, but that was
2294          only useful for one machine, the VAX, and caused poor code
2295          generation there for lshrdi3, so the code was deleted and a
2296          define_expand for lshrsi3 was added to vax.md.  */
2297     }
2298
2299   gcc_assert (temp);
2300   return temp;
2301 }
2302
2303 /* Output a shift instruction for expression code CODE,
2304    with SHIFTED being the rtx for the value to shift,
2305    and AMOUNT the amount to shift by.
2306    Store the result in the rtx TARGET, if that is convenient.
2307    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2308    Return the rtx for where the value is.  */
2309
2310 rtx
2311 expand_shift (enum tree_code code, enum machine_mode mode, rtx shifted,
2312               int amount, rtx target, int unsignedp)
2313 {
2314   return expand_shift_1 (code, mode,
2315                          shifted, GEN_INT (amount), target, unsignedp);
2316 }
2317
2318 /* Output a shift instruction for expression code CODE,
2319    with SHIFTED being the rtx for the value to shift,
2320    and AMOUNT the tree for the amount to shift by.
2321    Store the result in the rtx TARGET, if that is convenient.
2322    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2323    Return the rtx for where the value is.  */
2324
2325 rtx
2326 expand_variable_shift (enum tree_code code, enum machine_mode mode, rtx shifted,
2327                        tree amount, rtx target, int unsignedp)
2328 {
2329   return expand_shift_1 (code, mode,
2330                          shifted, expand_normal (amount), target, unsignedp);
2331 }
2332
2333 \f
2334 /* Indicates the type of fixup needed after a constant multiplication.
2335    BASIC_VARIANT means no fixup is needed, NEGATE_VARIANT means that
2336    the result should be negated, and ADD_VARIANT means that the
2337    multiplicand should be added to the result.  */
2338 enum mult_variant {basic_variant, negate_variant, add_variant};
2339
2340 static void synth_mult (struct algorithm *, unsigned HOST_WIDE_INT,
2341                         const struct mult_cost *, enum machine_mode mode);
2342 static bool choose_mult_variant (enum machine_mode, HOST_WIDE_INT,
2343                                  struct algorithm *, enum mult_variant *, int);
2344 static rtx expand_mult_const (enum machine_mode, rtx, HOST_WIDE_INT, rtx,
2345                               const struct algorithm *, enum mult_variant);
2346 static unsigned HOST_WIDE_INT choose_multiplier (unsigned HOST_WIDE_INT, int,
2347                                                  int, rtx *, int *, int *);
2348 static unsigned HOST_WIDE_INT invert_mod2n (unsigned HOST_WIDE_INT, int);
2349 static rtx extract_high_half (enum machine_mode, rtx);
2350 static rtx expand_mult_highpart (enum machine_mode, rtx, rtx, rtx, int, int);
2351 static rtx expand_mult_highpart_optab (enum machine_mode, rtx, rtx, rtx,
2352                                        int, int);
2353 /* Compute and return the best algorithm for multiplying by T.
2354    The algorithm must cost less than cost_limit
2355    If retval.cost >= COST_LIMIT, no algorithm was found and all
2356    other field of the returned struct are undefined.
2357    MODE is the machine mode of the multiplication.  */
2358
2359 static void
2360 synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
2361             const struct mult_cost *cost_limit, enum machine_mode mode)
2362 {
2363   int m;
2364   struct algorithm *alg_in, *best_alg;
2365   struct mult_cost best_cost;
2366   struct mult_cost new_limit;
2367   int op_cost, op_latency;
2368   unsigned HOST_WIDE_INT orig_t = t;
2369   unsigned HOST_WIDE_INT q;
2370   int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode));
2371   int hash_index;
2372   bool cache_hit = false;
2373   enum alg_code cache_alg = alg_zero;
2374   bool speed = optimize_insn_for_speed_p ();
2375
2376   /* Indicate that no algorithm is yet found.  If no algorithm
2377      is found, this value will be returned and indicate failure.  */
2378   alg_out->cost.cost = cost_limit->cost + 1;
2379   alg_out->cost.latency = cost_limit->latency + 1;
2380
2381   if (cost_limit->cost < 0
2382       || (cost_limit->cost == 0 && cost_limit->latency <= 0))
2383     return;
2384
2385   /* Restrict the bits of "t" to the multiplication's mode.  */
2386   t &= GET_MODE_MASK (mode);
2387
2388   /* t == 1 can be done in zero cost.  */
2389   if (t == 1)
2390     {
2391       alg_out->ops = 1;
2392       alg_out->cost.cost = 0;
2393       alg_out->cost.latency = 0;
2394       alg_out->op[0] = alg_m;
2395       return;
2396     }
2397
2398   /* t == 0 sometimes has a cost.  If it does and it exceeds our limit,
2399      fail now.  */
2400   if (t == 0)
2401     {
2402       if (MULT_COST_LESS (cost_limit, zero_cost[speed]))
2403         return;
2404       else
2405         {
2406           alg_out->ops = 1;
2407           alg_out->cost.cost = zero_cost[speed];
2408           alg_out->cost.latency = zero_cost[speed];
2409           alg_out->op[0] = alg_zero;
2410           return;
2411         }
2412     }
2413
2414   /* We'll be needing a couple extra algorithm structures now.  */
2415
2416   alg_in = XALLOCA (struct algorithm);
2417   best_alg = XALLOCA (struct algorithm);
2418   best_cost = *cost_limit;
2419
2420   /* Compute the hash index.  */
2421   hash_index = (t ^ (unsigned int) mode ^ (speed * 256)) % NUM_ALG_HASH_ENTRIES;
2422
2423   /* See if we already know what to do for T.  */
2424   if (alg_hash[hash_index].t == t
2425       && alg_hash[hash_index].mode == mode
2426       && alg_hash[hash_index].mode == mode
2427       && alg_hash[hash_index].speed == speed
2428       && alg_hash[hash_index].alg != alg_unknown)
2429     {
2430       cache_alg = alg_hash[hash_index].alg;
2431
2432       if (cache_alg == alg_impossible)
2433         {
2434           /* The cache tells us that it's impossible to synthesize
2435              multiplication by T within alg_hash[hash_index].cost.  */
2436           if (!CHEAPER_MULT_COST (&alg_hash[hash_index].cost, cost_limit))
2437             /* COST_LIMIT is at least as restrictive as the one
2438                recorded in the hash table, in which case we have no
2439                hope of synthesizing a multiplication.  Just
2440                return.  */
2441             return;
2442
2443           /* If we get here, COST_LIMIT is less restrictive than the
2444              one recorded in the hash table, so we may be able to
2445              synthesize a multiplication.  Proceed as if we didn't
2446              have the cache entry.  */
2447         }
2448       else
2449         {
2450           if (CHEAPER_MULT_COST (cost_limit, &alg_hash[hash_index].cost))
2451             /* The cached algorithm shows that this multiplication
2452                requires more cost than COST_LIMIT.  Just return.  This
2453                way, we don't clobber this cache entry with
2454                alg_impossible but retain useful information.  */
2455             return;
2456
2457           cache_hit = true;
2458
2459           switch (cache_alg)
2460             {
2461             case alg_shift:
2462               goto do_alg_shift;
2463
2464             case alg_add_t_m2:
2465             case alg_sub_t_m2:
2466               goto do_alg_addsub_t_m2;
2467
2468             case alg_add_factor:
2469             case alg_sub_factor:
2470               goto do_alg_addsub_factor;
2471
2472             case alg_add_t2_m:
2473               goto do_alg_add_t2_m;
2474
2475             case alg_sub_t2_m:
2476               goto do_alg_sub_t2_m;
2477
2478             default:
2479               gcc_unreachable ();
2480             }
2481         }
2482     }
2483
2484   /* If we have a group of zero bits at the low-order part of T, try
2485      multiplying by the remaining bits and then doing a shift.  */
2486
2487   if ((t & 1) == 0)
2488     {
2489     do_alg_shift:
2490       m = floor_log2 (t & -t);  /* m = number of low zero bits */
2491       if (m < maxm)
2492         {
2493           q = t >> m;
2494           /* The function expand_shift will choose between a shift and
2495              a sequence of additions, so the observed cost is given as
2496              MIN (m * add_cost[speed][mode], shift_cost[speed][mode][m]).  */
2497           op_cost = m * add_cost[speed][mode];
2498           if (shift_cost[speed][mode][m] < op_cost)
2499             op_cost = shift_cost[speed][mode][m];
2500           new_limit.cost = best_cost.cost - op_cost;
2501           new_limit.latency = best_cost.latency - op_cost;
2502           synth_mult (alg_in, q, &new_limit, mode);
2503
2504           alg_in->cost.cost += op_cost;
2505           alg_in->cost.latency += op_cost;
2506           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2507             {
2508               struct algorithm *x;
2509               best_cost = alg_in->cost;
2510               x = alg_in, alg_in = best_alg, best_alg = x;
2511               best_alg->log[best_alg->ops] = m;
2512               best_alg->op[best_alg->ops] = alg_shift;
2513             }
2514
2515           /* See if treating ORIG_T as a signed number yields a better
2516              sequence.  Try this sequence only for a negative ORIG_T
2517              as it would be useless for a non-negative ORIG_T.  */
2518           if ((HOST_WIDE_INT) orig_t < 0)
2519             {
2520               /* Shift ORIG_T as follows because a right shift of a
2521                  negative-valued signed type is implementation
2522                  defined.  */
2523               q = ~(~orig_t >> m);
2524               /* The function expand_shift will choose between a shift
2525                  and a sequence of additions, so the observed cost is
2526                  given as MIN (m * add_cost[speed][mode],
2527                  shift_cost[speed][mode][m]).  */
2528               op_cost = m * add_cost[speed][mode];
2529               if (shift_cost[speed][mode][m] < op_cost)
2530                 op_cost = shift_cost[speed][mode][m];
2531               new_limit.cost = best_cost.cost - op_cost;
2532               new_limit.latency = best_cost.latency - op_cost;
2533               synth_mult (alg_in, q, &new_limit, mode);
2534
2535               alg_in->cost.cost += op_cost;
2536               alg_in->cost.latency += op_cost;
2537               if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2538                 {
2539                   struct algorithm *x;
2540                   best_cost = alg_in->cost;
2541                   x = alg_in, alg_in = best_alg, best_alg = x;
2542                   best_alg->log[best_alg->ops] = m;
2543                   best_alg->op[best_alg->ops] = alg_shift;
2544                 }
2545             }
2546         }
2547       if (cache_hit)
2548         goto done;
2549     }
2550
2551   /* If we have an odd number, add or subtract one.  */
2552   if ((t & 1) != 0)
2553     {
2554       unsigned HOST_WIDE_INT w;
2555
2556     do_alg_addsub_t_m2:
2557       for (w = 1; (w & t) != 0; w <<= 1)
2558         ;
2559       /* If T was -1, then W will be zero after the loop.  This is another
2560          case where T ends with ...111.  Handling this with (T + 1) and
2561          subtract 1 produces slightly better code and results in algorithm
2562          selection much faster than treating it like the ...0111 case
2563          below.  */
2564       if (w == 0
2565           || (w > 2
2566               /* Reject the case where t is 3.
2567                  Thus we prefer addition in that case.  */
2568               && t != 3))
2569         {
2570           /* T ends with ...111.  Multiply by (T + 1) and subtract 1.  */
2571
2572           op_cost = add_cost[speed][mode];
2573           new_limit.cost = best_cost.cost - op_cost;
2574           new_limit.latency = best_cost.latency - op_cost;
2575           synth_mult (alg_in, t + 1, &new_limit, mode);
2576
2577           alg_in->cost.cost += op_cost;
2578           alg_in->cost.latency += op_cost;
2579           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2580             {
2581               struct algorithm *x;
2582               best_cost = alg_in->cost;
2583               x = alg_in, alg_in = best_alg, best_alg = x;
2584               best_alg->log[best_alg->ops] = 0;
2585               best_alg->op[best_alg->ops] = alg_sub_t_m2;
2586             }
2587         }
2588       else
2589         {
2590           /* T ends with ...01 or ...011.  Multiply by (T - 1) and add 1.  */
2591
2592           op_cost = add_cost[speed][mode];
2593           new_limit.cost = best_cost.cost - op_cost;
2594           new_limit.latency = best_cost.latency - op_cost;
2595           synth_mult (alg_in, t - 1, &new_limit, mode);
2596
2597           alg_in->cost.cost += op_cost;
2598           alg_in->cost.latency += op_cost;
2599           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2600             {
2601               struct algorithm *x;
2602               best_cost = alg_in->cost;
2603               x = alg_in, alg_in = best_alg, best_alg = x;
2604               best_alg->log[best_alg->ops] = 0;
2605               best_alg->op[best_alg->ops] = alg_add_t_m2;
2606             }
2607         }
2608
2609       /* We may be able to calculate a * -7, a * -15, a * -31, etc
2610          quickly with a - a * n for some appropriate constant n.  */
2611       m = exact_log2 (-orig_t + 1);
2612       if (m >= 0 && m < maxm)
2613         {
2614           op_cost = shiftsub1_cost[speed][mode][m];
2615           new_limit.cost = best_cost.cost - op_cost;
2616           new_limit.latency = best_cost.latency - op_cost;
2617           synth_mult (alg_in, (unsigned HOST_WIDE_INT) (-orig_t + 1) >> m, &new_limit, mode);
2618
2619           alg_in->cost.cost += op_cost;
2620           alg_in->cost.latency += op_cost;
2621           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2622             {
2623               struct algorithm *x;
2624               best_cost = alg_in->cost;
2625               x = alg_in, alg_in = best_alg, best_alg = x;
2626               best_alg->log[best_alg->ops] = m;
2627               best_alg->op[best_alg->ops] = alg_sub_t_m2;
2628             }
2629         }
2630
2631       if (cache_hit)
2632         goto done;
2633     }
2634
2635   /* Look for factors of t of the form
2636      t = q(2**m +- 1), 2 <= m <= floor(log2(t - 1)).
2637      If we find such a factor, we can multiply by t using an algorithm that
2638      multiplies by q, shift the result by m and add/subtract it to itself.
2639
2640      We search for large factors first and loop down, even if large factors
2641      are less probable than small; if we find a large factor we will find a
2642      good sequence quickly, and therefore be able to prune (by decreasing
2643      COST_LIMIT) the search.  */
2644
2645  do_alg_addsub_factor:
2646   for (m = floor_log2 (t - 1); m >= 2; m--)
2647     {
2648       unsigned HOST_WIDE_INT d;
2649
2650       d = ((unsigned HOST_WIDE_INT) 1 << m) + 1;
2651       if (t % d == 0 && t > d && m < maxm
2652           && (!cache_hit || cache_alg == alg_add_factor))
2653         {
2654           /* If the target has a cheap shift-and-add instruction use
2655              that in preference to a shift insn followed by an add insn.
2656              Assume that the shift-and-add is "atomic" with a latency
2657              equal to its cost, otherwise assume that on superscalar
2658              hardware the shift may be executed concurrently with the
2659              earlier steps in the algorithm.  */
2660           op_cost = add_cost[speed][mode] + shift_cost[speed][mode][m];
2661           if (shiftadd_cost[speed][mode][m] < op_cost)
2662             {
2663               op_cost = shiftadd_cost[speed][mode][m];
2664               op_latency = op_cost;
2665             }
2666           else
2667             op_latency = add_cost[speed][mode];
2668
2669           new_limit.cost = best_cost.cost - op_cost;
2670           new_limit.latency = best_cost.latency - op_latency;
2671           synth_mult (alg_in, t / d, &new_limit, mode);
2672
2673           alg_in->cost.cost += op_cost;
2674           alg_in->cost.latency += op_latency;
2675           if (alg_in->cost.latency < op_cost)
2676             alg_in->cost.latency = op_cost;
2677           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2678             {
2679               struct algorithm *x;
2680               best_cost = alg_in->cost;
2681               x = alg_in, alg_in = best_alg, best_alg = x;
2682               best_alg->log[best_alg->ops] = m;
2683               best_alg->op[best_alg->ops] = alg_add_factor;
2684             }
2685           /* Other factors will have been taken care of in the recursion.  */
2686           break;
2687         }
2688
2689       d = ((unsigned HOST_WIDE_INT) 1 << m) - 1;
2690       if (t % d == 0 && t > d && m < maxm
2691           && (!cache_hit || cache_alg == alg_sub_factor))
2692         {
2693           /* If the target has a cheap shift-and-subtract insn use
2694              that in preference to a shift insn followed by a sub insn.
2695              Assume that the shift-and-sub is "atomic" with a latency
2696              equal to it's cost, otherwise assume that on superscalar
2697              hardware the shift may be executed concurrently with the
2698              earlier steps in the algorithm.  */
2699           op_cost = add_cost[speed][mode] + shift_cost[speed][mode][m];
2700           if (shiftsub0_cost[speed][mode][m] < op_cost)
2701             {
2702               op_cost = shiftsub0_cost[speed][mode][m];
2703               op_latency = op_cost;
2704             }
2705           else
2706             op_latency = add_cost[speed][mode];
2707
2708           new_limit.cost = best_cost.cost - op_cost;
2709           new_limit.latency = best_cost.latency - op_latency;
2710           synth_mult (alg_in, t / d, &new_limit, mode);
2711
2712           alg_in->cost.cost += op_cost;
2713           alg_in->cost.latency += op_latency;
2714           if (alg_in->cost.latency < op_cost)
2715             alg_in->cost.latency = op_cost;
2716           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2717             {
2718               struct algorithm *x;
2719               best_cost = alg_in->cost;
2720               x = alg_in, alg_in = best_alg, best_alg = x;
2721               best_alg->log[best_alg->ops] = m;
2722               best_alg->op[best_alg->ops] = alg_sub_factor;
2723             }
2724           break;
2725         }
2726     }
2727   if (cache_hit)
2728     goto done;
2729
2730   /* Try shift-and-add (load effective address) instructions,
2731      i.e. do a*3, a*5, a*9.  */
2732   if ((t & 1) != 0)
2733     {
2734     do_alg_add_t2_m:
2735       q = t - 1;
2736       q = q & -q;
2737       m = exact_log2 (q);
2738       if (m >= 0 && m < maxm)
2739         {
2740           op_cost = shiftadd_cost[speed][mode][m];
2741           new_limit.cost = best_cost.cost - op_cost;
2742           new_limit.latency = best_cost.latency - op_cost;
2743           synth_mult (alg_in, (t - 1) >> m, &new_limit, mode);
2744
2745           alg_in->cost.cost += op_cost;
2746           alg_in->cost.latency += op_cost;
2747           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2748             {
2749               struct algorithm *x;
2750               best_cost = alg_in->cost;
2751               x = alg_in, alg_in = best_alg, best_alg = x;
2752               best_alg->log[best_alg->ops] = m;
2753               best_alg->op[best_alg->ops] = alg_add_t2_m;
2754             }
2755         }
2756       if (cache_hit)
2757         goto done;
2758
2759     do_alg_sub_t2_m:
2760       q = t + 1;
2761       q = q & -q;
2762       m = exact_log2 (q);
2763       if (m >= 0 && m < maxm)
2764         {
2765           op_cost = shiftsub0_cost[speed][mode][m];
2766           new_limit.cost = best_cost.cost - op_cost;
2767           new_limit.latency = best_cost.latency - op_cost;
2768           synth_mult (alg_in, (t + 1) >> m, &new_limit, mode);
2769
2770           alg_in->cost.cost += op_cost;
2771           alg_in->cost.latency += op_cost;
2772           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2773             {
2774               struct algorithm *x;
2775               best_cost = alg_in->cost;
2776               x = alg_in, alg_in = best_alg, best_alg = x;
2777               best_alg->log[best_alg->ops] = m;
2778               best_alg->op[best_alg->ops] = alg_sub_t2_m;
2779             }
2780         }
2781       if (cache_hit)
2782         goto done;
2783     }
2784
2785  done:
2786   /* If best_cost has not decreased, we have not found any algorithm.  */
2787   if (!CHEAPER_MULT_COST (&best_cost, cost_limit))
2788     {
2789       /* We failed to find an algorithm.  Record alg_impossible for
2790          this case (that is, <T, MODE, COST_LIMIT>) so that next time
2791          we are asked to find an algorithm for T within the same or
2792          lower COST_LIMIT, we can immediately return to the
2793          caller.  */
2794       alg_hash[hash_index].t = t;
2795       alg_hash[hash_index].mode = mode;
2796       alg_hash[hash_index].speed = speed;
2797       alg_hash[hash_index].alg = alg_impossible;
2798       alg_hash[hash_index].cost = *cost_limit;
2799       return;
2800     }
2801
2802   /* Cache the result.  */
2803   if (!cache_hit)
2804     {
2805       alg_hash[hash_index].t = t;
2806       alg_hash[hash_index].mode = mode;
2807       alg_hash[hash_index].speed = speed;
2808       alg_hash[hash_index].alg = best_alg->op[best_alg->ops];
2809       alg_hash[hash_index].cost.cost = best_cost.cost;
2810       alg_hash[hash_index].cost.latency = best_cost.latency;
2811     }
2812
2813   /* If we are getting a too long sequence for `struct algorithm'
2814      to record, make this search fail.  */
2815   if (best_alg->ops == MAX_BITS_PER_WORD)
2816     return;
2817
2818   /* Copy the algorithm from temporary space to the space at alg_out.
2819      We avoid using structure assignment because the majority of
2820      best_alg is normally undefined, and this is a critical function.  */
2821   alg_out->ops = best_alg->ops + 1;
2822   alg_out->cost = best_cost;
2823   memcpy (alg_out->op, best_alg->op,
2824           alg_out->ops * sizeof *alg_out->op);
2825   memcpy (alg_out->log, best_alg->log,
2826           alg_out->ops * sizeof *alg_out->log);
2827 }
2828 \f
2829 /* Find the cheapest way of multiplying a value of mode MODE by VAL.
2830    Try three variations:
2831
2832        - a shift/add sequence based on VAL itself
2833        - a shift/add sequence based on -VAL, followed by a negation
2834        - a shift/add sequence based on VAL - 1, followed by an addition.
2835
2836    Return true if the cheapest of these cost less than MULT_COST,
2837    describing the algorithm in *ALG and final fixup in *VARIANT.  */
2838
2839 static bool
2840 choose_mult_variant (enum machine_mode mode, HOST_WIDE_INT val,
2841                      struct algorithm *alg, enum mult_variant *variant,
2842                      int mult_cost)
2843 {
2844   struct algorithm alg2;
2845   struct mult_cost limit;
2846   int op_cost;
2847   bool speed = optimize_insn_for_speed_p ();
2848
2849   /* Fail quickly for impossible bounds.  */
2850   if (mult_cost < 0)
2851     return false;
2852
2853   /* Ensure that mult_cost provides a reasonable upper bound.
2854      Any constant multiplication can be performed with less
2855      than 2 * bits additions.  */
2856   op_cost = 2 * GET_MODE_BITSIZE (mode) * add_cost[speed][mode];
2857   if (mult_cost > op_cost)
2858     mult_cost = op_cost;
2859
2860   *variant = basic_variant;
2861   limit.cost = mult_cost;
2862   limit.latency = mult_cost;
2863   synth_mult (alg, val, &limit, mode);
2864
2865   /* This works only if the inverted value actually fits in an
2866      `unsigned int' */
2867   if (HOST_BITS_PER_INT >= GET_MODE_BITSIZE (mode))
2868     {
2869       op_cost = neg_cost[speed][mode];
2870       if (MULT_COST_LESS (&alg->cost, mult_cost))
2871         {
2872           limit.cost = alg->cost.cost - op_cost;
2873           limit.latency = alg->cost.latency - op_cost;
2874         }
2875       else
2876         {
2877           limit.cost = mult_cost - op_cost;
2878           limit.latency = mult_cost - op_cost;
2879         }
2880
2881       synth_mult (&alg2, -val, &limit, mode);
2882       alg2.cost.cost += op_cost;
2883       alg2.cost.latency += op_cost;
2884       if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
2885         *alg = alg2, *variant = negate_variant;
2886     }
2887
2888   /* This proves very useful for division-by-constant.  */
2889   op_cost = add_cost[speed][mode];
2890   if (MULT_COST_LESS (&alg->cost, mult_cost))
2891     {
2892       limit.cost = alg->cost.cost - op_cost;
2893       limit.latency = alg->cost.latency - op_cost;
2894     }
2895   else
2896     {
2897       limit.cost = mult_cost - op_cost;
2898       limit.latency = mult_cost - op_cost;
2899     }
2900
2901   synth_mult (&alg2, val - 1, &limit, mode);
2902   alg2.cost.cost += op_cost;
2903   alg2.cost.latency += op_cost;
2904   if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
2905     *alg = alg2, *variant = add_variant;
2906
2907   return MULT_COST_LESS (&alg->cost, mult_cost);
2908 }
2909
2910 /* A subroutine of expand_mult, used for constant multiplications.
2911    Multiply OP0 by VAL in mode MODE, storing the result in TARGET if
2912    convenient.  Use the shift/add sequence described by ALG and apply
2913    the final fixup specified by VARIANT.  */
2914
2915 static rtx
2916 expand_mult_const (enum machine_mode mode, rtx op0, HOST_WIDE_INT val,
2917                    rtx target, const struct algorithm *alg,
2918                    enum mult_variant variant)
2919 {
2920   HOST_WIDE_INT val_so_far;
2921   rtx insn, accum, tem;
2922   int opno;
2923   enum machine_mode nmode;
2924
2925   /* Avoid referencing memory over and over and invalid sharing
2926      on SUBREGs.  */
2927   op0 = force_reg (mode, op0);
2928
2929   /* ACCUM starts out either as OP0 or as a zero, depending on
2930      the first operation.  */
2931
2932   if (alg->op[0] == alg_zero)
2933     {
2934       accum = copy_to_mode_reg (mode, const0_rtx);
2935       val_so_far = 0;
2936     }
2937   else if (alg->op[0] == alg_m)
2938     {
2939       accum = copy_to_mode_reg (mode, op0);
2940       val_so_far = 1;
2941     }
2942   else
2943     gcc_unreachable ();
2944
2945   for (opno = 1; opno < alg->ops; opno++)
2946     {
2947       int log = alg->log[opno];
2948       rtx shift_subtarget = optimize ? 0 : accum;
2949       rtx add_target
2950         = (opno == alg->ops - 1 && target != 0 && variant != add_variant
2951            && !optimize)
2952           ? target : 0;
2953       rtx accum_target = optimize ? 0 : accum;
2954       rtx accum_inner;
2955
2956       switch (alg->op[opno])
2957         {
2958         case alg_shift:
2959           tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
2960           /* REG_EQUAL note will be attached to the following insn.  */
2961           emit_move_insn (accum, tem);
2962           val_so_far <<= log;
2963           break;
2964
2965         case alg_add_t_m2:
2966           tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
2967           accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
2968                                  add_target ? add_target : accum_target);
2969           val_so_far += (HOST_WIDE_INT) 1 << log;
2970           break;
2971
2972         case alg_sub_t_m2:
2973           tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
2974           accum = force_operand (gen_rtx_MINUS (mode, accum, tem),
2975                                  add_target ? add_target : accum_target);
2976           val_so_far -= (HOST_WIDE_INT) 1 << log;
2977           break;
2978
2979         case alg_add_t2_m:
2980           accum = expand_shift (LSHIFT_EXPR, mode, accum,
2981                                 log, shift_subtarget, 0);
2982           accum = force_operand (gen_rtx_PLUS (mode, accum, op0),
2983                                  add_target ? add_target : accum_target);
2984           val_so_far = (val_so_far << log) + 1;
2985           break;
2986
2987         case alg_sub_t2_m:
2988           accum = expand_shift (LSHIFT_EXPR, mode, accum,
2989                                 log, shift_subtarget, 0);
2990           accum = force_operand (gen_rtx_MINUS (mode, accum, op0),
2991                                  add_target ? add_target : accum_target);
2992           val_so_far = (val_so_far << log) - 1;
2993           break;
2994
2995         case alg_add_factor:
2996           tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
2997           accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
2998                                  add_target ? add_target : accum_target);
2999           val_so_far += val_so_far << log;
3000           break;
3001
3002         case alg_sub_factor:
3003           tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3004           accum = force_operand (gen_rtx_MINUS (mode, tem, accum),
3005                                  (add_target
3006                                   ? add_target : (optimize ? 0 : tem)));
3007           val_so_far = (val_so_far << log) - val_so_far;
3008           break;
3009
3010         default:
3011           gcc_unreachable ();
3012         }
3013
3014       /* Write a REG_EQUAL note on the last insn so that we can cse
3015          multiplication sequences.  Note that if ACCUM is a SUBREG,
3016          we've set the inner register and must properly indicate
3017          that.  */
3018
3019       tem = op0, nmode = mode;
3020       accum_inner = accum;
3021       if (GET_CODE (accum) == SUBREG)
3022         {
3023           accum_inner = SUBREG_REG (accum);
3024           nmode = GET_MODE (accum_inner);
3025           tem = gen_lowpart (nmode, op0);
3026         }
3027
3028       insn = get_last_insn ();
3029       set_dst_reg_note (insn, REG_EQUAL,
3030                         gen_rtx_MULT (nmode, tem, GEN_INT (val_so_far)),
3031                         accum_inner);
3032     }
3033
3034   if (variant == negate_variant)
3035     {
3036       val_so_far = -val_so_far;
3037       accum = expand_unop (mode, neg_optab, accum, target, 0);
3038     }
3039   else if (variant == add_variant)
3040     {
3041       val_so_far = val_so_far + 1;
3042       accum = force_operand (gen_rtx_PLUS (mode, accum, op0), target);
3043     }
3044
3045   /* Compare only the bits of val and val_so_far that are significant
3046      in the result mode, to avoid sign-/zero-extension confusion.  */
3047   val &= GET_MODE_MASK (mode);
3048   val_so_far &= GET_MODE_MASK (mode);
3049   gcc_assert (val == val_so_far);
3050
3051   return accum;
3052 }
3053
3054 /* Perform a multiplication and return an rtx for the result.
3055    MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3056    TARGET is a suggestion for where to store the result (an rtx).
3057
3058    We check specially for a constant integer as OP1.
3059    If you want this check for OP0 as well, then before calling
3060    you should swap the two operands if OP0 would be constant.  */
3061
3062 rtx
3063 expand_mult (enum machine_mode mode, rtx op0, rtx op1, rtx target,
3064              int unsignedp)
3065 {
3066   enum mult_variant variant;
3067   struct algorithm algorithm;
3068   int max_cost;
3069   bool speed = optimize_insn_for_speed_p ();
3070
3071   /* Handling const0_rtx here allows us to use zero as a rogue value for
3072      coeff below.  */
3073   if (op1 == const0_rtx)
3074     return const0_rtx;
3075   if (op1 == const1_rtx)
3076     return op0;
3077   if (op1 == constm1_rtx)
3078     return expand_unop (mode,
3079                         GET_MODE_CLASS (mode) == MODE_INT
3080                         && !unsignedp && flag_trapv
3081                         ? negv_optab : neg_optab,
3082                         op0, target, 0);
3083
3084   /* These are the operations that are potentially turned into a sequence
3085      of shifts and additions.  */
3086   if (SCALAR_INT_MODE_P (mode)
3087       && (unsignedp || !flag_trapv))
3088     {
3089       HOST_WIDE_INT coeff = 0;
3090       rtx fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3091
3092       /* synth_mult does an `unsigned int' multiply.  As long as the mode is
3093          less than or equal in size to `unsigned int' this doesn't matter.
3094          If the mode is larger than `unsigned int', then synth_mult works
3095          only if the constant value exactly fits in an `unsigned int' without
3096          any truncation.  This means that multiplying by negative values does
3097          not work; results are off by 2^32 on a 32 bit machine.  */
3098
3099       if (CONST_INT_P (op1))
3100         {
3101           /* Attempt to handle multiplication of DImode values by negative
3102              coefficients, by performing the multiplication by a positive
3103              multiplier and then inverting the result.  */
3104           if (INTVAL (op1) < 0
3105               && GET_MODE_BITSIZE (mode) > HOST_BITS_PER_WIDE_INT)
3106             {
3107               /* Its safe to use -INTVAL (op1) even for INT_MIN, as the
3108                  result is interpreted as an unsigned coefficient.
3109                  Exclude cost of op0 from max_cost to match the cost
3110                  calculation of the synth_mult.  */
3111               max_cost = (set_src_cost (gen_rtx_MULT (mode, fake_reg, op1),
3112                                         speed)
3113                           - neg_cost[speed][mode]);
3114               if (max_cost > 0
3115                   && choose_mult_variant (mode, -INTVAL (op1), &algorithm,
3116                                           &variant, max_cost))
3117                 {
3118                   rtx temp = expand_mult_const (mode, op0, -INTVAL (op1),
3119                                                 NULL_RTX, &algorithm,
3120                                                 variant);
3121                   return expand_unop (mode, neg_optab, temp, target, 0);
3122                 }
3123             }
3124           else coeff = INTVAL (op1);
3125         }
3126       else if (GET_CODE (op1) == CONST_DOUBLE)
3127         {
3128           /* If we are multiplying in DImode, it may still be a win
3129              to try to work with shifts and adds.  */
3130           if (CONST_DOUBLE_HIGH (op1) == 0
3131               && CONST_DOUBLE_LOW (op1) > 0)
3132             coeff = CONST_DOUBLE_LOW (op1);
3133           else if (CONST_DOUBLE_LOW (op1) == 0
3134                    && EXACT_POWER_OF_2_OR_ZERO_P (CONST_DOUBLE_HIGH (op1)))
3135             {
3136               int shift = floor_log2 (CONST_DOUBLE_HIGH (op1))
3137                           + HOST_BITS_PER_WIDE_INT;
3138               return expand_shift (LSHIFT_EXPR, mode, op0,
3139                                    shift, target, unsignedp);
3140             }
3141         }
3142
3143       /* We used to test optimize here, on the grounds that it's better to
3144          produce a smaller program when -O is not used.  But this causes
3145          such a terrible slowdown sometimes that it seems better to always
3146          use synth_mult.  */
3147       if (coeff != 0)
3148         {
3149           /* Special case powers of two.  */
3150           if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3151             return expand_shift (LSHIFT_EXPR, mode, op0,
3152                                  floor_log2 (coeff), target, unsignedp);
3153
3154           /* Exclude cost of op0 from max_cost to match the cost
3155              calculation of the synth_mult.  */
3156           max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, op1), speed);
3157           if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3158                                    max_cost))
3159             return expand_mult_const (mode, op0, coeff, target,
3160                                       &algorithm, variant);
3161         }
3162     }
3163
3164   if (GET_CODE (op0) == CONST_DOUBLE)
3165     {
3166       rtx temp = op0;
3167       op0 = op1;
3168       op1 = temp;
3169     }
3170
3171   /* Expand x*2.0 as x+x.  */
3172   if (GET_CODE (op1) == CONST_DOUBLE
3173       && SCALAR_FLOAT_MODE_P (mode))
3174     {
3175       REAL_VALUE_TYPE d;
3176       REAL_VALUE_FROM_CONST_DOUBLE (d, op1);
3177
3178       if (REAL_VALUES_EQUAL (d, dconst2))
3179         {
3180           op0 = force_reg (GET_MODE (op0), op0);
3181           return expand_binop (mode, add_optab, op0, op0,
3182                                target, unsignedp, OPTAB_LIB_WIDEN);
3183         }
3184     }
3185
3186   /* This used to use umul_optab if unsigned, but for non-widening multiply
3187      there is no difference between signed and unsigned.  */
3188   op0 = expand_binop (mode,
3189                       ! unsignedp
3190                       && flag_trapv && (GET_MODE_CLASS(mode) == MODE_INT)
3191                       ? smulv_optab : smul_optab,
3192                       op0, op1, target, unsignedp, OPTAB_LIB_WIDEN);
3193   gcc_assert (op0);
3194   return op0;
3195 }
3196
3197 /* Perform a widening multiplication and return an rtx for the result.
3198    MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3199    TARGET is a suggestion for where to store the result (an rtx).
3200    THIS_OPTAB is the optab we should use, it must be either umul_widen_optab
3201    or smul_widen_optab.
3202
3203    We check specially for a constant integer as OP1, comparing the
3204    cost of a widening multiply against the cost of a sequence of shifts
3205    and adds.  */
3206
3207 rtx
3208 expand_widening_mult (enum machine_mode mode, rtx op0, rtx op1, rtx target,
3209                       int unsignedp, optab this_optab)
3210 {
3211   bool speed = optimize_insn_for_speed_p ();
3212   rtx cop1;
3213
3214   if (CONST_INT_P (op1)
3215       && GET_MODE (op0) != VOIDmode
3216       && (cop1 = convert_modes (mode, GET_MODE (op0), op1,
3217                                 this_optab == umul_widen_optab))
3218       && CONST_INT_P (cop1)
3219       && (INTVAL (cop1) >= 0
3220           || HWI_COMPUTABLE_MODE_P (mode)))
3221     {
3222       HOST_WIDE_INT coeff = INTVAL (cop1);
3223       int max_cost;
3224       enum mult_variant variant;
3225       struct algorithm algorithm;
3226
3227       /* Special case powers of two.  */
3228       if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3229         {
3230           op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3231           return expand_shift (LSHIFT_EXPR, mode, op0,
3232                                floor_log2 (coeff), target, unsignedp);
3233         }
3234
3235       /* Exclude cost of op0 from max_cost to match the cost
3236          calculation of the synth_mult.  */
3237       max_cost = mul_widen_cost[speed][mode];
3238       if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3239                                max_cost))
3240         {
3241           op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3242           return expand_mult_const (mode, op0, coeff, target,
3243                                     &algorithm, variant);
3244         }
3245     }
3246   return expand_binop (mode, this_optab, op0, op1, target,
3247                        unsignedp, OPTAB_LIB_WIDEN);
3248 }
3249 \f
3250 /* Return the smallest n such that 2**n >= X.  */
3251
3252 int
3253 ceil_log2 (unsigned HOST_WIDE_INT x)
3254 {
3255   return floor_log2 (x - 1) + 1;
3256 }
3257
3258 /* Choose a minimal N + 1 bit approximation to 1/D that can be used to
3259    replace division by D, and put the least significant N bits of the result
3260    in *MULTIPLIER_PTR and return the most significant bit.
3261
3262    The width of operations is N (should be <= HOST_BITS_PER_WIDE_INT), the
3263    needed precision is in PRECISION (should be <= N).
3264
3265    PRECISION should be as small as possible so this function can choose
3266    multiplier more freely.
3267
3268    The rounded-up logarithm of D is placed in *lgup_ptr.  A shift count that
3269    is to be used for a final right shift is placed in *POST_SHIFT_PTR.
3270
3271    Using this function, x/D will be equal to (x * m) >> (*POST_SHIFT_PTR),
3272    where m is the full HOST_BITS_PER_WIDE_INT + 1 bit multiplier.  */
3273
3274 static
3275 unsigned HOST_WIDE_INT
3276 choose_multiplier (unsigned HOST_WIDE_INT d, int n, int precision,
3277                    rtx *multiplier_ptr, int *post_shift_ptr, int *lgup_ptr)
3278 {
3279   HOST_WIDE_INT mhigh_hi, mlow_hi;
3280   unsigned HOST_WIDE_INT mhigh_lo, mlow_lo;
3281   int lgup, post_shift;
3282   int pow, pow2;
3283   unsigned HOST_WIDE_INT nl, dummy1;
3284   HOST_WIDE_INT nh, dummy2;
3285
3286   /* lgup = ceil(log2(divisor)); */
3287   lgup = ceil_log2 (d);
3288
3289   gcc_assert (lgup <= n);
3290
3291   pow = n + lgup;
3292   pow2 = n + lgup - precision;
3293
3294   /* We could handle this with some effort, but this case is much
3295      better handled directly with a scc insn, so rely on caller using
3296      that.  */
3297   gcc_assert (pow != 2 * HOST_BITS_PER_WIDE_INT);
3298
3299   /* mlow = 2^(N + lgup)/d */
3300  if (pow >= HOST_BITS_PER_WIDE_INT)
3301     {
3302       nh = (HOST_WIDE_INT) 1 << (pow - HOST_BITS_PER_WIDE_INT);
3303       nl = 0;
3304     }
3305   else
3306     {
3307       nh = 0;
3308       nl = (unsigned HOST_WIDE_INT) 1 << pow;
3309     }
3310   div_and_round_double (TRUNC_DIV_EXPR, 1, nl, nh, d, (HOST_WIDE_INT) 0,
3311                         &mlow_lo, &mlow_hi, &dummy1, &dummy2);
3312
3313   /* mhigh = (2^(N + lgup) + 2^N + lgup - precision)/d */
3314   if (pow2 >= HOST_BITS_PER_WIDE_INT)
3315     nh |= (HOST_WIDE_INT) 1 << (pow2 - HOST_BITS_PER_WIDE_INT);
3316   else
3317     nl |= (unsigned HOST_WIDE_INT) 1 << pow2;
3318   div_and_round_double (TRUNC_DIV_EXPR, 1, nl, nh, d, (HOST_WIDE_INT) 0,
3319                         &mhigh_lo, &mhigh_hi, &dummy1, &dummy2);
3320
3321   gcc_assert (!mhigh_hi || nh - d < d);
3322   gcc_assert (mhigh_hi <= 1 && mlow_hi <= 1);
3323   /* Assert that mlow < mhigh.  */
3324   gcc_assert (mlow_hi < mhigh_hi
3325               || (mlow_hi == mhigh_hi && mlow_lo < mhigh_lo));
3326
3327   /* If precision == N, then mlow, mhigh exceed 2^N
3328      (but they do not exceed 2^(N+1)).  */
3329
3330   /* Reduce to lowest terms.  */
3331   for (post_shift = lgup; post_shift > 0; post_shift--)
3332     {
3333       unsigned HOST_WIDE_INT ml_lo = (mlow_hi << (HOST_BITS_PER_WIDE_INT - 1)) | (mlow_lo >> 1);
3334       unsigned HOST_WIDE_INT mh_lo = (mhigh_hi << (HOST_BITS_PER_WIDE_INT - 1)) | (mhigh_lo >> 1);
3335       if (ml_lo >= mh_lo)
3336         break;
3337
3338       mlow_hi = 0;
3339       mlow_lo = ml_lo;
3340       mhigh_hi = 0;
3341       mhigh_lo = mh_lo;
3342     }
3343
3344   *post_shift_ptr = post_shift;
3345   *lgup_ptr = lgup;
3346   if (n < HOST_BITS_PER_WIDE_INT)
3347     {
3348       unsigned HOST_WIDE_INT mask = ((unsigned HOST_WIDE_INT) 1 << n) - 1;
3349       *multiplier_ptr = GEN_INT (mhigh_lo & mask);
3350       return mhigh_lo >= mask;
3351     }
3352   else
3353     {
3354       *multiplier_ptr = GEN_INT (mhigh_lo);
3355       return mhigh_hi;
3356     }
3357 }
3358
3359 /* Compute the inverse of X mod 2**n, i.e., find Y such that X * Y is
3360    congruent to 1 (mod 2**N).  */
3361
3362 static unsigned HOST_WIDE_INT
3363 invert_mod2n (unsigned HOST_WIDE_INT x, int n)
3364 {
3365   /* Solve x*y == 1 (mod 2^n), where x is odd.  Return y.  */
3366
3367   /* The algorithm notes that the choice y = x satisfies
3368      x*y == 1 mod 2^3, since x is assumed odd.
3369      Each iteration doubles the number of bits of significance in y.  */
3370
3371   unsigned HOST_WIDE_INT mask;
3372   unsigned HOST_WIDE_INT y = x;
3373   int nbit = 3;
3374
3375   mask = (n == HOST_BITS_PER_WIDE_INT
3376           ? ~(unsigned HOST_WIDE_INT) 0
3377           : ((unsigned HOST_WIDE_INT) 1 << n) - 1);
3378
3379   while (nbit < n)
3380     {
3381       y = y * (2 - x*y) & mask;         /* Modulo 2^N */
3382       nbit *= 2;
3383     }
3384   return y;
3385 }
3386
3387 /* Emit code to adjust ADJ_OPERAND after multiplication of wrong signedness
3388    flavor of OP0 and OP1.  ADJ_OPERAND is already the high half of the
3389    product OP0 x OP1.  If UNSIGNEDP is nonzero, adjust the signed product
3390    to become unsigned, if UNSIGNEDP is zero, adjust the unsigned product to
3391    become signed.
3392
3393    The result is put in TARGET if that is convenient.
3394
3395    MODE is the mode of operation.  */
3396
3397 rtx
3398 expand_mult_highpart_adjust (enum machine_mode mode, rtx adj_operand, rtx op0,
3399                              rtx op1, rtx target, int unsignedp)
3400 {
3401   rtx tem;
3402   enum rtx_code adj_code = unsignedp ? PLUS : MINUS;
3403
3404   tem = expand_shift (RSHIFT_EXPR, mode, op0,
3405                       GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3406   tem = expand_and (mode, tem, op1, NULL_RTX);
3407   adj_operand
3408     = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3409                      adj_operand);
3410
3411   tem = expand_shift (RSHIFT_EXPR, mode, op1,
3412                       GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3413   tem = expand_and (mode, tem, op0, NULL_RTX);
3414   target = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3415                           target);
3416
3417   return target;
3418 }
3419
3420 /* Subroutine of expand_mult_highpart.  Return the MODE high part of OP.  */
3421
3422 static rtx
3423 extract_high_half (enum machine_mode mode, rtx op)
3424 {
3425   enum machine_mode wider_mode;
3426
3427   if (mode == word_mode)
3428     return gen_highpart (mode, op);
3429
3430   gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3431
3432   wider_mode = GET_MODE_WIDER_MODE (mode);
3433   op = expand_shift (RSHIFT_EXPR, wider_mode, op,
3434                      GET_MODE_BITSIZE (mode), 0, 1);
3435   return convert_modes (mode, wider_mode, op, 0);
3436 }
3437
3438 /* Like expand_mult_highpart, but only consider using a multiplication
3439    optab.  OP1 is an rtx for the constant operand.  */
3440
3441 static rtx
3442 expand_mult_highpart_optab (enum machine_mode mode, rtx op0, rtx op1,
3443                             rtx target, int unsignedp, int max_cost)
3444 {
3445   rtx narrow_op1 = gen_int_mode (INTVAL (op1), mode);
3446   enum machine_mode wider_mode;
3447   optab moptab;
3448   rtx tem;
3449   int size;
3450   bool speed = optimize_insn_for_speed_p ();
3451
3452   gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3453
3454   wider_mode = GET_MODE_WIDER_MODE (mode);
3455   size = GET_MODE_BITSIZE (mode);
3456
3457   /* Firstly, try using a multiplication insn that only generates the needed
3458      high part of the product, and in the sign flavor of unsignedp.  */
3459   if (mul_highpart_cost[speed][mode] < max_cost)
3460     {
3461       moptab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
3462       tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3463                           unsignedp, OPTAB_DIRECT);
3464       if (tem)
3465         return tem;
3466     }
3467
3468   /* Secondly, same as above, but use sign flavor opposite of unsignedp.
3469      Need to adjust the result after the multiplication.  */
3470   if (size - 1 < BITS_PER_WORD
3471       && (mul_highpart_cost[speed][mode] + 2 * shift_cost[speed][mode][size-1]
3472           + 4 * add_cost[speed][mode] < max_cost))
3473     {
3474       moptab = unsignedp ? smul_highpart_optab : umul_highpart_optab;
3475       tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3476                           unsignedp, OPTAB_DIRECT);
3477       if (tem)
3478         /* We used the wrong signedness.  Adjust the result.  */
3479         return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3480                                             tem, unsignedp);
3481     }
3482
3483   /* Try widening multiplication.  */
3484   moptab = unsignedp ? umul_widen_optab : smul_widen_optab;
3485   if (widening_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3486       && mul_widen_cost[speed][wider_mode] < max_cost)
3487     {
3488       tem = expand_binop (wider_mode, moptab, op0, narrow_op1, 0,
3489                           unsignedp, OPTAB_WIDEN);
3490       if (tem)
3491         return extract_high_half (mode, tem);
3492     }
3493
3494   /* Try widening the mode and perform a non-widening multiplication.  */
3495   if (optab_handler (smul_optab, wider_mode) != CODE_FOR_nothing
3496       && size - 1 < BITS_PER_WORD
3497       && mul_cost[speed][wider_mode] + shift_cost[speed][mode][size-1] < max_cost)
3498     {
3499       rtx insns, wop0, wop1;
3500
3501       /* We need to widen the operands, for example to ensure the
3502          constant multiplier is correctly sign or zero extended.
3503          Use a sequence to clean-up any instructions emitted by
3504          the conversions if things don't work out.  */
3505       start_sequence ();
3506       wop0 = convert_modes (wider_mode, mode, op0, unsignedp);
3507       wop1 = convert_modes (wider_mode, mode, op1, unsignedp);
3508       tem = expand_binop (wider_mode, smul_optab, wop0, wop1, 0,
3509                           unsignedp, OPTAB_WIDEN);
3510       insns = get_insns ();
3511       end_sequence ();
3512
3513       if (tem)
3514         {
3515           emit_insn (insns);
3516           return extract_high_half (mode, tem);
3517         }
3518     }
3519
3520   /* Try widening multiplication of opposite signedness, and adjust.  */
3521   moptab = unsignedp ? smul_widen_optab : umul_widen_optab;
3522   if (widening_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3523       && size - 1 < BITS_PER_WORD
3524       && (mul_widen_cost[speed][wider_mode] + 2 * shift_cost[speed][mode][size-1]
3525           + 4 * add_cost[speed][mode] < max_cost))
3526     {
3527       tem = expand_binop (wider_mode, moptab, op0, narrow_op1,
3528                           NULL_RTX, ! unsignedp, OPTAB_WIDEN);
3529       if (tem != 0)
3530         {
3531           tem = extract_high_half (mode, tem);
3532           /* We used the wrong signedness.  Adjust the result.  */
3533           return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3534                                               target, unsignedp);
3535         }
3536     }
3537
3538   return 0;
3539 }
3540
3541 /* Emit code to multiply OP0 and OP1 (where OP1 is an integer constant),
3542    putting the high half of the result in TARGET if that is convenient,
3543    and return where the result is.  If the operation can not be performed,
3544    0 is returned.
3545
3546    MODE is the mode of operation and result.
3547
3548    UNSIGNEDP nonzero means unsigned multiply.
3549
3550    MAX_COST is the total allowed cost for the expanded RTL.  */
3551
3552 static rtx
3553 expand_mult_highpart (enum machine_mode mode, rtx op0, rtx op1,
3554                       rtx target, int unsignedp, int max_cost)
3555 {
3556   enum machine_mode wider_mode = GET_MODE_WIDER_MODE (mode);
3557   unsigned HOST_WIDE_INT cnst1;
3558   int extra_cost;
3559   bool sign_adjust = false;
3560   enum mult_variant variant;
3561   struct algorithm alg;
3562   rtx tem;
3563   bool speed = optimize_insn_for_speed_p ();
3564
3565   gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3566   /* We can't support modes wider than HOST_BITS_PER_INT.  */
3567   gcc_assert (HWI_COMPUTABLE_MODE_P (mode));
3568
3569   cnst1 = INTVAL (op1) & GET_MODE_MASK (mode);
3570
3571   /* We can't optimize modes wider than BITS_PER_WORD.
3572      ??? We might be able to perform double-word arithmetic if
3573      mode == word_mode, however all the cost calculations in
3574      synth_mult etc. assume single-word operations.  */
3575   if (GET_MODE_BITSIZE (wider_mode) > BITS_PER_WORD)
3576     return expand_mult_highpart_optab (mode, op0, op1, target,
3577                                        unsignedp, max_cost);
3578
3579   extra_cost = shift_cost[speed][mode][GET_MODE_BITSIZE (mode) - 1];
3580
3581   /* Check whether we try to multiply by a negative constant.  */
3582   if (!unsignedp && ((cnst1 >> (GET_MODE_BITSIZE (mode) - 1)) & 1))
3583     {
3584       sign_adjust = true;
3585       extra_cost += add_cost[speed][mode];
3586     }
3587
3588   /* See whether shift/add multiplication is cheap enough.  */
3589   if (choose_mult_variant (wider_mode, cnst1, &alg, &variant,
3590                            max_cost - extra_cost))
3591     {
3592       /* See whether the specialized multiplication optabs are
3593          cheaper than the shift/add version.  */
3594       tem = expand_mult_highpart_optab (mode, op0, op1, target, unsignedp,
3595                                         alg.cost.cost + extra_cost);
3596       if (tem)
3597         return tem;
3598
3599       tem = convert_to_mode (wider_mode, op0, unsignedp);
3600       tem = expand_mult_const (wider_mode, tem, cnst1, 0, &alg, variant);
3601       tem = extract_high_half (mode, tem);
3602
3603       /* Adjust result for signedness.  */
3604       if (sign_adjust)
3605         tem = force_operand (gen_rtx_MINUS (mode, tem, op0), tem);
3606
3607       return tem;
3608     }
3609   return expand_mult_highpart_optab (mode, op0, op1, target,
3610                                      unsignedp, max_cost);
3611 }
3612
3613
3614 /* Expand signed modulus of OP0 by a power of two D in mode MODE.  */
3615
3616 static rtx
3617 expand_smod_pow2 (enum machine_mode mode, rtx op0, HOST_WIDE_INT d)
3618 {
3619   unsigned HOST_WIDE_INT masklow, maskhigh;
3620   rtx result, temp, shift, label;
3621   int logd;
3622
3623   logd = floor_log2 (d);
3624   result = gen_reg_rtx (mode);
3625
3626   /* Avoid conditional branches when they're expensive.  */
3627   if (BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2
3628       && optimize_insn_for_speed_p ())
3629     {
3630       rtx signmask = emit_store_flag (result, LT, op0, const0_rtx,
3631                                       mode, 0, -1);
3632       if (signmask)
3633         {
3634           signmask = force_reg (mode, signmask);
3635           masklow = ((HOST_WIDE_INT) 1 << logd) - 1;
3636           shift = GEN_INT (GET_MODE_BITSIZE (mode) - logd);
3637
3638           /* Use the rtx_cost of a LSHIFTRT instruction to determine
3639              which instruction sequence to use.  If logical right shifts
3640              are expensive the use 2 XORs, 2 SUBs and an AND, otherwise
3641              use a LSHIFTRT, 1 ADD, 1 SUB and an AND.  */
3642
3643           temp = gen_rtx_LSHIFTRT (mode, result, shift);
3644           if (optab_handler (lshr_optab, mode) == CODE_FOR_nothing
3645               || (set_src_cost (temp, optimize_insn_for_speed_p ())
3646                   > COSTS_N_INSNS (2)))
3647             {
3648               temp = expand_binop (mode, xor_optab, op0, signmask,
3649                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3650               temp = expand_binop (mode, sub_optab, temp, signmask,
3651                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3652               temp = expand_binop (mode, and_optab, temp, GEN_INT (masklow),
3653                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3654               temp = expand_binop (mode, xor_optab, temp, signmask,
3655                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3656               temp = expand_binop (mode, sub_optab, temp, signmask,
3657                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3658             }
3659           else
3660             {
3661               signmask = expand_binop (mode, lshr_optab, signmask, shift,
3662                                        NULL_RTX, 1, OPTAB_LIB_WIDEN);
3663               signmask = force_reg (mode, signmask);
3664
3665               temp = expand_binop (mode, add_optab, op0, signmask,
3666                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3667               temp = expand_binop (mode, and_optab, temp, GEN_INT (masklow),
3668                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3669               temp = expand_binop (mode, sub_optab, temp, signmask,
3670                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3671             }
3672           return temp;
3673         }
3674     }
3675
3676   /* Mask contains the mode's signbit and the significant bits of the
3677      modulus.  By including the signbit in the operation, many targets
3678      can avoid an explicit compare operation in the following comparison
3679      against zero.  */
3680
3681   masklow = ((HOST_WIDE_INT) 1 << logd) - 1;
3682   if (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
3683     {
3684       masklow |= (HOST_WIDE_INT) -1 << (GET_MODE_BITSIZE (mode) - 1);
3685       maskhigh = -1;
3686     }
3687   else
3688     maskhigh = (HOST_WIDE_INT) -1
3689                  << (GET_MODE_BITSIZE (mode) - HOST_BITS_PER_WIDE_INT - 1);
3690
3691   temp = expand_binop (mode, and_optab, op0,
3692                        immed_double_const (masklow, maskhigh, mode),
3693                        result, 1, OPTAB_LIB_WIDEN);
3694   if (temp != result)
3695     emit_move_insn (result, temp);
3696
3697   label = gen_label_rtx ();
3698   do_cmp_and_jump (result, const0_rtx, GE, mode, label);
3699
3700   temp = expand_binop (mode, sub_optab, result, const1_rtx, result,
3701                        0, OPTAB_LIB_WIDEN);
3702   masklow = (HOST_WIDE_INT) -1 << logd;
3703   maskhigh = -1;
3704   temp = expand_binop (mode, ior_optab, temp,
3705                        immed_double_const (masklow, maskhigh, mode),
3706                        result, 1, OPTAB_LIB_WIDEN);
3707   temp = expand_binop (mode, add_optab, temp, const1_rtx, result,
3708                        0, OPTAB_LIB_WIDEN);
3709   if (temp != result)
3710     emit_move_insn (result, temp);
3711   emit_label (label);
3712   return result;
3713 }
3714
3715 /* Expand signed division of OP0 by a power of two D in mode MODE.
3716    This routine is only called for positive values of D.  */
3717
3718 static rtx
3719 expand_sdiv_pow2 (enum machine_mode mode, rtx op0, HOST_WIDE_INT d)
3720 {
3721   rtx temp, label;
3722   int logd;
3723
3724   logd = floor_log2 (d);
3725
3726   if (d == 2
3727       && BRANCH_COST (optimize_insn_for_speed_p (),
3728                       false) >= 1)
3729     {
3730       temp = gen_reg_rtx (mode);
3731       temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, 1);
3732       temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3733                            0, OPTAB_LIB_WIDEN);
3734       return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3735     }
3736
3737 #ifdef HAVE_conditional_move
3738   if (BRANCH_COST (optimize_insn_for_speed_p (), false)
3739       >= 2)
3740     {
3741       rtx temp2;
3742
3743       /* ??? emit_conditional_move forces a stack adjustment via
3744          compare_from_rtx so, if the sequence is discarded, it will
3745          be lost.  Do it now instead.  */
3746       do_pending_stack_adjust ();
3747
3748       start_sequence ();
3749       temp2 = copy_to_mode_reg (mode, op0);
3750       temp = expand_binop (mode, add_optab, temp2, GEN_INT (d-1),
3751                            NULL_RTX, 0, OPTAB_LIB_WIDEN);
3752       temp = force_reg (mode, temp);
3753
3754       /* Construct "temp2 = (temp2 < 0) ? temp : temp2".  */
3755       temp2 = emit_conditional_move (temp2, LT, temp2, const0_rtx,
3756                                      mode, temp, temp2, mode, 0);
3757       if (temp2)
3758         {
3759           rtx seq = get_insns ();
3760           end_sequence ();
3761           emit_insn (seq);
3762           return expand_shift (RSHIFT_EXPR, mode, temp2, logd, NULL_RTX, 0);
3763         }
3764       end_sequence ();
3765     }
3766 #endif
3767
3768   if (BRANCH_COST (optimize_insn_for_speed_p (),
3769                    false) >= 2)
3770     {
3771       int ushift = GET_MODE_BITSIZE (mode) - logd;
3772
3773       temp = gen_reg_rtx (mode);
3774       temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, -1);
3775       if (shift_cost[optimize_insn_for_speed_p ()][mode][ushift] > COSTS_N_INSNS (1))
3776         temp = expand_binop (mode, and_optab, temp, GEN_INT (d - 1),
3777                              NULL_RTX, 0, OPTAB_LIB_WIDEN);
3778       else
3779         temp = expand_shift (RSHIFT_EXPR, mode, temp,
3780                              ushift, NULL_RTX, 1);
3781       temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3782                            0, OPTAB_LIB_WIDEN);
3783       return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3784     }
3785
3786   label = gen_label_rtx ();
3787   temp = copy_to_mode_reg (mode, op0);
3788   do_cmp_and_jump (temp, const0_rtx, GE, mode, label);
3789   expand_inc (temp, GEN_INT (d - 1));
3790   emit_label (label);
3791   return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3792 }
3793 \f
3794 /* Emit the code to divide OP0 by OP1, putting the result in TARGET
3795    if that is convenient, and returning where the result is.
3796    You may request either the quotient or the remainder as the result;
3797    specify REM_FLAG nonzero to get the remainder.
3798
3799    CODE is the expression code for which kind of division this is;
3800    it controls how rounding is done.  MODE is the machine mode to use.
3801    UNSIGNEDP nonzero means do unsigned division.  */
3802
3803 /* ??? For CEIL_MOD_EXPR, can compute incorrect remainder with ANDI
3804    and then correct it by or'ing in missing high bits
3805    if result of ANDI is nonzero.
3806    For ROUND_MOD_EXPR, can use ANDI and then sign-extend the result.
3807    This could optimize to a bfexts instruction.
3808    But C doesn't use these operations, so their optimizations are
3809    left for later.  */
3810 /* ??? For modulo, we don't actually need the highpart of the first product,
3811    the low part will do nicely.  And for small divisors, the second multiply
3812    can also be a low-part only multiply or even be completely left out.
3813    E.g. to calculate the remainder of a division by 3 with a 32 bit
3814    multiply, multiply with 0x55555556 and extract the upper two bits;
3815    the result is exact for inputs up to 0x1fffffff.
3816    The input range can be reduced by using cross-sum rules.
3817    For odd divisors >= 3, the following table gives right shift counts
3818    so that if a number is shifted by an integer multiple of the given
3819    amount, the remainder stays the same:
3820    2, 4, 3, 6, 10, 12, 4, 8, 18, 6, 11, 20, 18, 0, 5, 10, 12, 0, 12, 20,
3821    14, 12, 23, 21, 8, 0, 20, 18, 0, 0, 6, 12, 0, 22, 0, 18, 20, 30, 0, 0,
3822    0, 8, 0, 11, 12, 10, 36, 0, 30, 0, 0, 12, 0, 0, 0, 0, 44, 12, 24, 0,
3823    20, 0, 7, 14, 0, 18, 36, 0, 0, 46, 60, 0, 42, 0, 15, 24, 20, 0, 0, 33,
3824    0, 20, 0, 0, 18, 0, 60, 0, 0, 0, 0, 0, 40, 18, 0, 0, 12
3825
3826    Cross-sum rules for even numbers can be derived by leaving as many bits
3827    to the right alone as the divisor has zeros to the right.
3828    E.g. if x is an unsigned 32 bit number:
3829    (x mod 12) == (((x & 1023) + ((x >> 8) & ~3)) * 0x15555558 >> 2 * 3) >> 28
3830    */
3831
3832 rtx
3833 expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode,
3834                rtx op0, rtx op1, rtx target, int unsignedp)
3835 {
3836   enum machine_mode compute_mode;
3837   rtx tquotient;
3838   rtx quotient = 0, remainder = 0;
3839   rtx last;
3840   int size;
3841   rtx insn;
3842   optab optab1, optab2;
3843   int op1_is_constant, op1_is_pow2 = 0;
3844   int max_cost, extra_cost;
3845   static HOST_WIDE_INT last_div_const = 0;
3846   static HOST_WIDE_INT ext_op1;
3847   bool speed = optimize_insn_for_speed_p ();
3848
3849   op1_is_constant = CONST_INT_P (op1);
3850   if (op1_is_constant)
3851     {
3852       ext_op1 = INTVAL (op1);
3853       if (unsignedp)
3854         ext_op1 &= GET_MODE_MASK (mode);
3855       op1_is_pow2 = ((EXACT_POWER_OF_2_OR_ZERO_P (ext_op1)
3856                      || (! unsignedp && EXACT_POWER_OF_2_OR_ZERO_P (-ext_op1))));
3857     }
3858
3859   /*
3860      This is the structure of expand_divmod:
3861
3862      First comes code to fix up the operands so we can perform the operations
3863      correctly and efficiently.
3864
3865      Second comes a switch statement with code specific for each rounding mode.
3866      For some special operands this code emits all RTL for the desired
3867      operation, for other cases, it generates only a quotient and stores it in
3868      QUOTIENT.  The case for trunc division/remainder might leave quotient = 0,
3869      to indicate that it has not done anything.
3870
3871      Last comes code that finishes the operation.  If QUOTIENT is set and
3872      REM_FLAG is set, the remainder is computed as OP0 - QUOTIENT * OP1.  If
3873      QUOTIENT is not set, it is computed using trunc rounding.
3874
3875      We try to generate special code for division and remainder when OP1 is a
3876      constant.  If |OP1| = 2**n we can use shifts and some other fast
3877      operations.  For other values of OP1, we compute a carefully selected
3878      fixed-point approximation m = 1/OP1, and generate code that multiplies OP0
3879      by m.
3880
3881      In all cases but EXACT_DIV_EXPR, this multiplication requires the upper
3882      half of the product.  Different strategies for generating the product are
3883      implemented in expand_mult_highpart.
3884
3885      If what we actually want is the remainder, we generate that by another
3886      by-constant multiplication and a subtraction.  */
3887
3888   /* We shouldn't be called with OP1 == const1_rtx, but some of the
3889      code below will malfunction if we are, so check here and handle
3890      the special case if so.  */
3891   if (op1 == const1_rtx)
3892     return rem_flag ? const0_rtx : op0;
3893
3894     /* When dividing by -1, we could get an overflow.
3895      negv_optab can handle overflows.  */
3896   if (! unsignedp && op1 == constm1_rtx)
3897     {
3898       if (rem_flag)
3899         return const0_rtx;
3900       return expand_unop (mode, flag_trapv && GET_MODE_CLASS(mode) == MODE_INT
3901                           ? negv_optab : neg_optab, op0, target, 0);
3902     }
3903
3904   if (target
3905       /* Don't use the function value register as a target
3906          since we have to read it as well as write it,
3907          and function-inlining gets confused by this.  */
3908       && ((REG_P (target) && REG_FUNCTION_VALUE_P (target))
3909           /* Don't clobber an operand while doing a multi-step calculation.  */
3910           || ((rem_flag || op1_is_constant)
3911               && (reg_mentioned_p (target, op0)
3912                   || (MEM_P (op0) && MEM_P (target))))
3913           || reg_mentioned_p (target, op1)
3914           || (MEM_P (op1) && MEM_P (target))))
3915     target = 0;
3916
3917   /* Get the mode in which to perform this computation.  Normally it will
3918      be MODE, but sometimes we can't do the desired operation in MODE.
3919      If so, pick a wider mode in which we can do the operation.  Convert
3920      to that mode at the start to avoid repeated conversions.
3921
3922      First see what operations we need.  These depend on the expression
3923      we are evaluating.  (We assume that divxx3 insns exist under the
3924      same conditions that modxx3 insns and that these insns don't normally
3925      fail.  If these assumptions are not correct, we may generate less
3926      efficient code in some cases.)
3927
3928      Then see if we find a mode in which we can open-code that operation
3929      (either a division, modulus, or shift).  Finally, check for the smallest
3930      mode for which we can do the operation with a library call.  */
3931
3932   /* We might want to refine this now that we have division-by-constant
3933      optimization.  Since expand_mult_highpart tries so many variants, it is
3934      not straightforward to generalize this.  Maybe we should make an array
3935      of possible modes in init_expmed?  Save this for GCC 2.7.  */
3936
3937   optab1 = ((op1_is_pow2 && op1 != const0_rtx)
3938             ? (unsignedp ? lshr_optab : ashr_optab)
3939             : (unsignedp ? udiv_optab : sdiv_optab));
3940   optab2 = ((op1_is_pow2 && op1 != const0_rtx)
3941             ? optab1
3942             : (unsignedp ? udivmod_optab : sdivmod_optab));
3943
3944   for (compute_mode = mode; compute_mode != VOIDmode;
3945        compute_mode = GET_MODE_WIDER_MODE (compute_mode))
3946     if (optab_handler (optab1, compute_mode) != CODE_FOR_nothing
3947         || optab_handler (optab2, compute_mode) != CODE_FOR_nothing)
3948       break;
3949
3950   if (compute_mode == VOIDmode)
3951     for (compute_mode = mode; compute_mode != VOIDmode;
3952          compute_mode = GET_MODE_WIDER_MODE (compute_mode))
3953       if (optab_libfunc (optab1, compute_mode)
3954           || optab_libfunc (optab2, compute_mode))
3955         break;
3956
3957   /* If we still couldn't find a mode, use MODE, but expand_binop will
3958      probably die.  */
3959   if (compute_mode == VOIDmode)
3960     compute_mode = mode;
3961
3962   if (target && GET_MODE (target) == compute_mode)
3963     tquotient = target;
3964   else
3965     tquotient = gen_reg_rtx (compute_mode);
3966
3967   size = GET_MODE_BITSIZE (compute_mode);
3968 #if 0
3969   /* It should be possible to restrict the precision to GET_MODE_BITSIZE
3970      (mode), and thereby get better code when OP1 is a constant.  Do that
3971      later.  It will require going over all usages of SIZE below.  */
3972   size = GET_MODE_BITSIZE (mode);
3973 #endif
3974
3975   /* Only deduct something for a REM if the last divide done was
3976      for a different constant.   Then set the constant of the last
3977      divide.  */
3978   max_cost = unsignedp ? udiv_cost[speed][compute_mode] : sdiv_cost[speed][compute_mode];
3979   if (rem_flag && ! (last_div_const != 0 && op1_is_constant
3980                      && INTVAL (op1) == last_div_const))
3981     max_cost -= mul_cost[speed][compute_mode] + add_cost[speed][compute_mode];
3982
3983   last_div_const = ! rem_flag && op1_is_constant ? INTVAL (op1) : 0;
3984
3985   /* Now convert to the best mode to use.  */
3986   if (compute_mode != mode)
3987     {
3988       op0 = convert_modes (compute_mode, mode, op0, unsignedp);
3989       op1 = convert_modes (compute_mode, mode, op1, unsignedp);
3990
3991       /* convert_modes may have placed op1 into a register, so we
3992          must recompute the following.  */
3993       op1_is_constant = CONST_INT_P (op1);
3994       op1_is_pow2 = (op1_is_constant
3995                      && ((EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
3996                           || (! unsignedp
3997                               && EXACT_POWER_OF_2_OR_ZERO_P (-INTVAL (op1)))))) ;
3998     }
3999
4000   /* If one of the operands is a volatile MEM, copy it into a register.  */
4001
4002   if (MEM_P (op0) && MEM_VOLATILE_P (op0))
4003     op0 = force_reg (compute_mode, op0);
4004   if (MEM_P (op1) && MEM_VOLATILE_P (op1))
4005     op1 = force_reg (compute_mode, op1);
4006
4007   /* If we need the remainder or if OP1 is constant, we need to
4008      put OP0 in a register in case it has any queued subexpressions.  */
4009   if (rem_flag || op1_is_constant)
4010     op0 = force_reg (compute_mode, op0);
4011
4012   last = get_last_insn ();
4013
4014   /* Promote floor rounding to trunc rounding for unsigned operations.  */
4015   if (unsignedp)
4016     {
4017       if (code == FLOOR_DIV_EXPR)
4018         code = TRUNC_DIV_EXPR;
4019       if (code == FLOOR_MOD_EXPR)
4020         code = TRUNC_MOD_EXPR;
4021       if (code == EXACT_DIV_EXPR && op1_is_pow2)
4022         code = TRUNC_DIV_EXPR;
4023     }
4024
4025   if (op1 != const0_rtx)
4026     switch (code)
4027       {
4028       case TRUNC_MOD_EXPR:
4029       case TRUNC_DIV_EXPR:
4030         if (op1_is_constant)
4031           {
4032             if (unsignedp)
4033               {
4034                 unsigned HOST_WIDE_INT mh;
4035                 int pre_shift, post_shift;
4036                 int dummy;
4037                 rtx ml;
4038                 unsigned HOST_WIDE_INT d = (INTVAL (op1)
4039                                             & GET_MODE_MASK (compute_mode));
4040
4041                 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4042                   {
4043                     pre_shift = floor_log2 (d);
4044                     if (rem_flag)
4045                       {
4046                         remainder
4047                           = expand_binop (compute_mode, and_optab, op0,
4048                                           GEN_INT (((HOST_WIDE_INT) 1 << pre_shift) - 1),
4049                                           remainder, 1,
4050                                           OPTAB_LIB_WIDEN);
4051                         if (remainder)
4052                           return gen_lowpart (mode, remainder);
4053                       }
4054                     quotient = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4055                                              pre_shift, tquotient, 1);
4056                   }
4057                 else if (size <= HOST_BITS_PER_WIDE_INT)
4058                   {
4059                     if (d >= ((unsigned HOST_WIDE_INT) 1 << (size - 1)))
4060                       {
4061                         /* Most significant bit of divisor is set; emit an scc
4062                            insn.  */
4063                         quotient = emit_store_flag_force (tquotient, GEU, op0, op1,
4064                                                           compute_mode, 1, 1);
4065                       }
4066                     else
4067                       {
4068                         /* Find a suitable multiplier and right shift count
4069                            instead of multiplying with D.  */
4070
4071                         mh = choose_multiplier (d, size, size,
4072                                                 &ml, &post_shift, &dummy);
4073
4074                         /* If the suggested multiplier is more than SIZE bits,
4075                            we can do better for even divisors, using an
4076                            initial right shift.  */
4077                         if (mh != 0 && (d & 1) == 0)
4078                           {
4079                             pre_shift = floor_log2 (d & -d);
4080                             mh = choose_multiplier (d >> pre_shift, size,
4081                                                     size - pre_shift,
4082                                                     &ml, &post_shift, &dummy);
4083                             gcc_assert (!mh);
4084                           }
4085                         else
4086                           pre_shift = 0;
4087
4088                         if (mh != 0)
4089                           {
4090                             rtx t1, t2, t3, t4;
4091
4092                             if (post_shift - 1 >= BITS_PER_WORD)
4093                               goto fail1;
4094
4095                             extra_cost
4096                               = (shift_cost[speed][compute_mode][post_shift - 1]
4097                                  + shift_cost[speed][compute_mode][1]
4098                                  + 2 * add_cost[speed][compute_mode]);
4099                             t1 = expand_mult_highpart (compute_mode, op0, ml,
4100                                                        NULL_RTX, 1,
4101                                                        max_cost - extra_cost);
4102                             if (t1 == 0)
4103                               goto fail1;
4104                             t2 = force_operand (gen_rtx_MINUS (compute_mode,
4105                                                                op0, t1),
4106                                                 NULL_RTX);
4107                             t3 = expand_shift (RSHIFT_EXPR, compute_mode,
4108                                                t2, 1, NULL_RTX, 1);
4109                             t4 = force_operand (gen_rtx_PLUS (compute_mode,
4110                                                               t1, t3),
4111                                                 NULL_RTX);
4112                             quotient = expand_shift
4113                               (RSHIFT_EXPR, compute_mode, t4,
4114                                post_shift - 1, tquotient, 1);
4115                           }
4116                         else
4117                           {
4118                             rtx t1, t2;
4119
4120                             if (pre_shift >= BITS_PER_WORD
4121                                 || post_shift >= BITS_PER_WORD)
4122                               goto fail1;
4123
4124                             t1 = expand_shift
4125                               (RSHIFT_EXPR, compute_mode, op0,
4126                                pre_shift, NULL_RTX, 1);
4127                             extra_cost
4128                               = (shift_cost[speed][compute_mode][pre_shift]
4129                                  + shift_cost[speed][compute_mode][post_shift]);
4130                             t2 = expand_mult_highpart (compute_mode, t1, ml,
4131                                                        NULL_RTX, 1,
4132                                                        max_cost - extra_cost);
4133                             if (t2 == 0)
4134                               goto fail1;
4135                             quotient = expand_shift
4136                               (RSHIFT_EXPR, compute_mode, t2,
4137                                post_shift, tquotient, 1);
4138                           }
4139                       }
4140                   }
4141                 else            /* Too wide mode to use tricky code */
4142                   break;
4143
4144                 insn = get_last_insn ();
4145                 if (insn != last)
4146                   set_dst_reg_note (insn, REG_EQUAL,
4147                                     gen_rtx_UDIV (compute_mode, op0, op1),
4148                                     quotient);
4149               }
4150             else                /* TRUNC_DIV, signed */
4151               {
4152                 unsigned HOST_WIDE_INT ml;
4153                 int lgup, post_shift;
4154                 rtx mlr;
4155                 HOST_WIDE_INT d = INTVAL (op1);
4156                 unsigned HOST_WIDE_INT abs_d;
4157
4158                 /* Since d might be INT_MIN, we have to cast to
4159                    unsigned HOST_WIDE_INT before negating to avoid
4160                    undefined signed overflow.  */
4161                 abs_d = (d >= 0
4162                          ? (unsigned HOST_WIDE_INT) d
4163                          : - (unsigned HOST_WIDE_INT) d);
4164
4165                 /* n rem d = n rem -d */
4166                 if (rem_flag && d < 0)
4167                   {
4168                     d = abs_d;
4169                     op1 = gen_int_mode (abs_d, compute_mode);
4170                   }
4171
4172                 if (d == 1)
4173                   quotient = op0;
4174                 else if (d == -1)
4175                   quotient = expand_unop (compute_mode, neg_optab, op0,
4176                                           tquotient, 0);
4177                 else if (HOST_BITS_PER_WIDE_INT >= size
4178                          && abs_d == (unsigned HOST_WIDE_INT) 1 << (size - 1))
4179                   {
4180                     /* This case is not handled correctly below.  */
4181                     quotient = emit_store_flag (tquotient, EQ, op0, op1,
4182                                                 compute_mode, 1, 1);
4183                     if (quotient == 0)
4184                       goto fail1;
4185                   }
4186                 else if (EXACT_POWER_OF_2_OR_ZERO_P (d)
4187                          && (rem_flag ? smod_pow2_cheap[speed][compute_mode]
4188                                       : sdiv_pow2_cheap[speed][compute_mode])
4189                          /* We assume that cheap metric is true if the
4190                             optab has an expander for this mode.  */
4191                          && ((optab_handler ((rem_flag ? smod_optab
4192                                               : sdiv_optab),
4193                                              compute_mode)
4194                               != CODE_FOR_nothing)
4195                              || (optab_handler (sdivmod_optab,
4196                                                 compute_mode)
4197                                  != CODE_FOR_nothing)))
4198                   ;
4199                 else if (EXACT_POWER_OF_2_OR_ZERO_P (abs_d))
4200                   {
4201                     if (rem_flag)
4202                       {
4203                         remainder = expand_smod_pow2 (compute_mode, op0, d);
4204                         if (remainder)
4205                           return gen_lowpart (mode, remainder);
4206                       }
4207
4208                     if (sdiv_pow2_cheap[speed][compute_mode]
4209                         && ((optab_handler (sdiv_optab, compute_mode)
4210                              != CODE_FOR_nothing)
4211                             || (optab_handler (sdivmod_optab, compute_mode)
4212                                 != CODE_FOR_nothing)))
4213                       quotient = expand_divmod (0, TRUNC_DIV_EXPR,
4214                                                 compute_mode, op0,
4215                                                 gen_int_mode (abs_d,
4216                                                               compute_mode),
4217                                                 NULL_RTX, 0);
4218                     else
4219                       quotient = expand_sdiv_pow2 (compute_mode, op0, abs_d);
4220
4221                     /* We have computed OP0 / abs(OP1).  If OP1 is negative,
4222                        negate the quotient.  */
4223                     if (d < 0)
4224                       {
4225                         insn = get_last_insn ();
4226                         if (insn != last
4227                             && abs_d < ((unsigned HOST_WIDE_INT) 1
4228                                         << (HOST_BITS_PER_WIDE_INT - 1)))
4229                           set_dst_reg_note (insn, REG_EQUAL,
4230                                             gen_rtx_DIV (compute_mode, op0,
4231                                                          gen_int_mode
4232                                                            (abs_d,
4233                                                             compute_mode)),
4234                                             quotient);
4235
4236                         quotient = expand_unop (compute_mode, neg_optab,
4237                                                 quotient, quotient, 0);
4238                       }
4239                   }
4240                 else if (size <= HOST_BITS_PER_WIDE_INT)
4241                   {
4242                     choose_multiplier (abs_d, size, size - 1,
4243                                        &mlr, &post_shift, &lgup);
4244                     ml = (unsigned HOST_WIDE_INT) INTVAL (mlr);
4245                     if (ml < (unsigned HOST_WIDE_INT) 1 << (size - 1))
4246                       {
4247                         rtx t1, t2, t3;
4248
4249                         if (post_shift >= BITS_PER_WORD
4250                             || size - 1 >= BITS_PER_WORD)
4251                           goto fail1;
4252
4253                         extra_cost = (shift_cost[speed][compute_mode][post_shift]
4254                                       + shift_cost[speed][compute_mode][size - 1]
4255                                       + add_cost[speed][compute_mode]);
4256                         t1 = expand_mult_highpart (compute_mode, op0, mlr,
4257                                                    NULL_RTX, 0,
4258                                                    max_cost - extra_cost);
4259                         if (t1 == 0)
4260                           goto fail1;
4261                         t2 = expand_shift
4262                           (RSHIFT_EXPR, compute_mode, t1,
4263                            post_shift, NULL_RTX, 0);
4264                         t3 = expand_shift
4265                           (RSHIFT_EXPR, compute_mode, op0,
4266                            size - 1, NULL_RTX, 0);
4267                         if (d < 0)
4268                           quotient
4269                             = force_operand (gen_rtx_MINUS (compute_mode,
4270                                                             t3, t2),
4271                                              tquotient);
4272                         else
4273                           quotient
4274                             = force_operand (gen_rtx_MINUS (compute_mode,
4275                                                             t2, t3),
4276                                              tquotient);
4277                       }
4278                     else
4279                       {
4280                         rtx t1, t2, t3, t4;
4281
4282                         if (post_shift >= BITS_PER_WORD
4283                             || size - 1 >= BITS_PER_WORD)
4284                           goto fail1;
4285
4286                         ml |= (~(unsigned HOST_WIDE_INT) 0) << (size - 1);
4287                         mlr = gen_int_mode (ml, compute_mode);
4288                         extra_cost = (shift_cost[speed][compute_mode][post_shift]
4289                                       + shift_cost[speed][compute_mode][size - 1]
4290                                       + 2 * add_cost[speed][compute_mode]);
4291                         t1 = expand_mult_highpart (compute_mode, op0, mlr,
4292                                                    NULL_RTX, 0,
4293                                                    max_cost - extra_cost);
4294                         if (t1 == 0)
4295                           goto fail1;
4296                         t2 = force_operand (gen_rtx_PLUS (compute_mode,
4297                                                           t1, op0),
4298                                             NULL_RTX);
4299                         t3 = expand_shift
4300                           (RSHIFT_EXPR, compute_mode, t2,
4301                            post_shift, NULL_RTX, 0);
4302                         t4 = expand_shift
4303                           (RSHIFT_EXPR, compute_mode, op0,
4304                            size - 1, NULL_RTX, 0);
4305                         if (d < 0)
4306                           quotient
4307                             = force_operand (gen_rtx_MINUS (compute_mode,
4308                                                             t4, t3),
4309                                              tquotient);
4310                         else
4311                           quotient
4312                             = force_operand (gen_rtx_MINUS (compute_mode,
4313                                                             t3, t4),
4314                                              tquotient);
4315                       }
4316                   }
4317                 else            /* Too wide mode to use tricky code */
4318                   break;
4319
4320                 insn = get_last_insn ();
4321                 if (insn != last)
4322                   set_dst_reg_note (insn, REG_EQUAL,
4323                                     gen_rtx_DIV (compute_mode, op0, op1),
4324                                     quotient);
4325               }
4326             break;
4327           }
4328       fail1:
4329         delete_insns_since (last);
4330         break;
4331
4332       case FLOOR_DIV_EXPR:
4333       case FLOOR_MOD_EXPR:
4334       /* We will come here only for signed operations.  */
4335         if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4336           {
4337             unsigned HOST_WIDE_INT mh;
4338             int pre_shift, lgup, post_shift;
4339             HOST_WIDE_INT d = INTVAL (op1);
4340             rtx ml;
4341
4342             if (d > 0)
4343               {
4344                 /* We could just as easily deal with negative constants here,
4345                    but it does not seem worth the trouble for GCC 2.6.  */
4346                 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4347                   {
4348                     pre_shift = floor_log2 (d);
4349                     if (rem_flag)
4350                       {
4351                         remainder = expand_binop (compute_mode, and_optab, op0,
4352                                                   GEN_INT (((HOST_WIDE_INT) 1 << pre_shift) - 1),
4353                                                   remainder, 0, OPTAB_LIB_WIDEN);
4354                         if (remainder)
4355                           return gen_lowpart (mode, remainder);
4356                       }
4357                     quotient = expand_shift
4358                       (RSHIFT_EXPR, compute_mode, op0,
4359                        pre_shift, tquotient, 0);
4360                   }
4361                 else
4362                   {
4363                     rtx t1, t2, t3, t4;
4364
4365                     mh = choose_multiplier (d, size, size - 1,
4366                                             &ml, &post_shift, &lgup);
4367                     gcc_assert (!mh);
4368
4369                     if (post_shift < BITS_PER_WORD
4370                         && size - 1 < BITS_PER_WORD)
4371                       {
4372                         t1 = expand_shift
4373                           (RSHIFT_EXPR, compute_mode, op0,
4374                            size - 1, NULL_RTX, 0);
4375                         t2 = expand_binop (compute_mode, xor_optab, op0, t1,
4376                                            NULL_RTX, 0, OPTAB_WIDEN);
4377                         extra_cost = (shift_cost[speed][compute_mode][post_shift]
4378                                       + shift_cost[speed][compute_mode][size - 1]
4379                                       + 2 * add_cost[speed][compute_mode]);
4380                         t3 = expand_mult_highpart (compute_mode, t2, ml,
4381                                                    NULL_RTX, 1,
4382                                                    max_cost - extra_cost);
4383                         if (t3 != 0)
4384                           {
4385                             t4 = expand_shift
4386                               (RSHIFT_EXPR, compute_mode, t3,
4387                                post_shift, NULL_RTX, 1);
4388                             quotient = expand_binop (compute_mode, xor_optab,
4389                                                      t4, t1, tquotient, 0,
4390                                                      OPTAB_WIDEN);
4391                           }
4392                       }
4393                   }
4394               }
4395             else
4396               {
4397                 rtx nsign, t1, t2, t3, t4;
4398                 t1 = force_operand (gen_rtx_PLUS (compute_mode,
4399                                                   op0, constm1_rtx), NULL_RTX);
4400                 t2 = expand_binop (compute_mode, ior_optab, op0, t1, NULL_RTX,
4401                                    0, OPTAB_WIDEN);
4402                 nsign = expand_shift
4403                   (RSHIFT_EXPR, compute_mode, t2,
4404                    size - 1, NULL_RTX, 0);
4405                 t3 = force_operand (gen_rtx_MINUS (compute_mode, t1, nsign),
4406                                     NULL_RTX);
4407                 t4 = expand_divmod (0, TRUNC_DIV_EXPR, compute_mode, t3, op1,
4408                                     NULL_RTX, 0);
4409                 if (t4)
4410                   {
4411                     rtx t5;
4412                     t5 = expand_unop (compute_mode, one_cmpl_optab, nsign,
4413                                       NULL_RTX, 0);
4414                     quotient = force_operand (gen_rtx_PLUS (compute_mode,
4415                                                             t4, t5),
4416                                               tquotient);
4417                   }
4418               }
4419           }
4420
4421         if (quotient != 0)
4422           break;
4423         delete_insns_since (last);
4424
4425         /* Try using an instruction that produces both the quotient and
4426            remainder, using truncation.  We can easily compensate the quotient
4427            or remainder to get floor rounding, once we have the remainder.
4428            Notice that we compute also the final remainder value here,
4429            and return the result right away.  */
4430         if (target == 0 || GET_MODE (target) != compute_mode)
4431           target = gen_reg_rtx (compute_mode);
4432
4433         if (rem_flag)
4434           {
4435             remainder
4436               = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4437             quotient = gen_reg_rtx (compute_mode);
4438           }
4439         else
4440           {
4441             quotient
4442               = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4443             remainder = gen_reg_rtx (compute_mode);
4444           }
4445
4446         if (expand_twoval_binop (sdivmod_optab, op0, op1,
4447                                  quotient, remainder, 0))
4448           {
4449             /* This could be computed with a branch-less sequence.
4450                Save that for later.  */
4451             rtx tem;
4452             rtx label = gen_label_rtx ();
4453             do_cmp_and_jump (remainder, const0_rtx, EQ, compute_mode, label);
4454             tem = expand_binop (compute_mode, xor_optab, op0, op1,
4455                                 NULL_RTX, 0, OPTAB_WIDEN);
4456             do_cmp_and_jump (tem, const0_rtx, GE, compute_mode, label);
4457             expand_dec (quotient, const1_rtx);
4458             expand_inc (remainder, op1);
4459             emit_label (label);
4460             return gen_lowpart (mode, rem_flag ? remainder : quotient);
4461           }
4462
4463         /* No luck with division elimination or divmod.  Have to do it
4464            by conditionally adjusting op0 *and* the result.  */
4465         {
4466           rtx label1, label2, label3, label4, label5;
4467           rtx adjusted_op0;
4468           rtx tem;
4469
4470           quotient = gen_reg_rtx (compute_mode);
4471           adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4472           label1 = gen_label_rtx ();
4473           label2 = gen_label_rtx ();
4474           label3 = gen_label_rtx ();
4475           label4 = gen_label_rtx ();
4476           label5 = gen_label_rtx ();
4477           do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4478           do_cmp_and_jump (adjusted_op0, const0_rtx, LT, compute_mode, label1);
4479           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4480                               quotient, 0, OPTAB_LIB_WIDEN);
4481           if (tem != quotient)
4482             emit_move_insn (quotient, tem);
4483           emit_jump_insn (gen_jump (label5));
4484           emit_barrier ();
4485           emit_label (label1);
4486           expand_inc (adjusted_op0, const1_rtx);
4487           emit_jump_insn (gen_jump (label4));
4488           emit_barrier ();
4489           emit_label (label2);
4490           do_cmp_and_jump (adjusted_op0, const0_rtx, GT, compute_mode, label3);
4491           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4492                               quotient, 0, OPTAB_LIB_WIDEN);
4493           if (tem != quotient)
4494             emit_move_insn (quotient, tem);
4495           emit_jump_insn (gen_jump (label5));
4496           emit_barrier ();
4497           emit_label (label3);
4498           expand_dec (adjusted_op0, const1_rtx);
4499           emit_label (label4);
4500           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4501                               quotient, 0, OPTAB_LIB_WIDEN);
4502           if (tem != quotient)
4503             emit_move_insn (quotient, tem);
4504           expand_dec (quotient, const1_rtx);
4505           emit_label (label5);
4506         }
4507         break;
4508
4509       case CEIL_DIV_EXPR:
4510       case CEIL_MOD_EXPR:
4511         if (unsignedp)
4512           {
4513             if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1)))
4514               {
4515                 rtx t1, t2, t3;
4516                 unsigned HOST_WIDE_INT d = INTVAL (op1);
4517                 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4518                                    floor_log2 (d), tquotient, 1);
4519                 t2 = expand_binop (compute_mode, and_optab, op0,
4520                                    GEN_INT (d - 1),
4521                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4522                 t3 = gen_reg_rtx (compute_mode);
4523                 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4524                                       compute_mode, 1, 1);
4525                 if (t3 == 0)
4526                   {
4527                     rtx lab;
4528                     lab = gen_label_rtx ();
4529                     do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4530                     expand_inc (t1, const1_rtx);
4531                     emit_label (lab);
4532                     quotient = t1;
4533                   }
4534                 else
4535                   quotient = force_operand (gen_rtx_PLUS (compute_mode,
4536                                                           t1, t3),
4537                                             tquotient);
4538                 break;
4539               }
4540
4541             /* Try using an instruction that produces both the quotient and
4542                remainder, using truncation.  We can easily compensate the
4543                quotient or remainder to get ceiling rounding, once we have the
4544                remainder.  Notice that we compute also the final remainder
4545                value here, and return the result right away.  */
4546             if (target == 0 || GET_MODE (target) != compute_mode)
4547               target = gen_reg_rtx (compute_mode);
4548
4549             if (rem_flag)
4550               {
4551                 remainder = (REG_P (target)
4552                              ? target : gen_reg_rtx (compute_mode));
4553                 quotient = gen_reg_rtx (compute_mode);
4554               }
4555             else
4556               {
4557                 quotient = (REG_P (target)
4558                             ? target : gen_reg_rtx (compute_mode));
4559                 remainder = gen_reg_rtx (compute_mode);
4560               }
4561
4562             if (expand_twoval_binop (udivmod_optab, op0, op1, quotient,
4563                                      remainder, 1))
4564               {
4565                 /* This could be computed with a branch-less sequence.
4566                    Save that for later.  */
4567                 rtx label = gen_label_rtx ();
4568                 do_cmp_and_jump (remainder, const0_rtx, EQ,
4569                                  compute_mode, label);
4570                 expand_inc (quotient, const1_rtx);
4571                 expand_dec (remainder, op1);
4572                 emit_label (label);
4573                 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4574               }
4575
4576             /* No luck with division elimination or divmod.  Have to do it
4577                by conditionally adjusting op0 *and* the result.  */
4578             {
4579               rtx label1, label2;
4580               rtx adjusted_op0, tem;
4581
4582               quotient = gen_reg_rtx (compute_mode);
4583               adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4584               label1 = gen_label_rtx ();
4585               label2 = gen_label_rtx ();
4586               do_cmp_and_jump (adjusted_op0, const0_rtx, NE,
4587                                compute_mode, label1);
4588               emit_move_insn  (quotient, const0_rtx);
4589               emit_jump_insn (gen_jump (label2));
4590               emit_barrier ();
4591               emit_label (label1);
4592               expand_dec (adjusted_op0, const1_rtx);
4593               tem = expand_binop (compute_mode, udiv_optab, adjusted_op0, op1,
4594                                   quotient, 1, OPTAB_LIB_WIDEN);
4595               if (tem != quotient)
4596                 emit_move_insn (quotient, tem);
4597               expand_inc (quotient, const1_rtx);
4598               emit_label (label2);
4599             }
4600           }
4601         else /* signed */
4602           {
4603             if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4604                 && INTVAL (op1) >= 0)
4605               {
4606                 /* This is extremely similar to the code for the unsigned case
4607                    above.  For 2.7 we should merge these variants, but for
4608                    2.6.1 I don't want to touch the code for unsigned since that
4609                    get used in C.  The signed case will only be used by other
4610                    languages (Ada).  */
4611
4612                 rtx t1, t2, t3;
4613                 unsigned HOST_WIDE_INT d = INTVAL (op1);
4614                 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4615                                    floor_log2 (d), tquotient, 0);
4616                 t2 = expand_binop (compute_mode, and_optab, op0,
4617                                    GEN_INT (d - 1),
4618                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4619                 t3 = gen_reg_rtx (compute_mode);
4620                 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4621                                       compute_mode, 1, 1);
4622                 if (t3 == 0)
4623                   {
4624                     rtx lab;
4625                     lab = gen_label_rtx ();
4626                     do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4627                     expand_inc (t1, const1_rtx);
4628                     emit_label (lab);
4629                     quotient = t1;
4630                   }
4631                 else
4632                   quotient = force_operand (gen_rtx_PLUS (compute_mode,
4633                                                           t1, t3),
4634                                             tquotient);
4635                 break;
4636               }
4637
4638             /* Try using an instruction that produces both the quotient and
4639                remainder, using truncation.  We can easily compensate the
4640                quotient or remainder to get ceiling rounding, once we have the
4641                remainder.  Notice that we compute also the final remainder
4642                value here, and return the result right away.  */
4643             if (target == 0 || GET_MODE (target) != compute_mode)
4644               target = gen_reg_rtx (compute_mode);
4645             if (rem_flag)
4646               {
4647                 remainder= (REG_P (target)
4648                             ? target : gen_reg_rtx (compute_mode));
4649                 quotient = gen_reg_rtx (compute_mode);
4650               }
4651             else
4652               {
4653                 quotient = (REG_P (target)
4654                             ? target : gen_reg_rtx (compute_mode));
4655                 remainder = gen_reg_rtx (compute_mode);
4656               }
4657
4658             if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient,
4659                                      remainder, 0))
4660               {
4661                 /* This could be computed with a branch-less sequence.
4662                    Save that for later.  */
4663                 rtx tem;
4664                 rtx label = gen_label_rtx ();
4665                 do_cmp_and_jump (remainder, const0_rtx, EQ,
4666                                  compute_mode, label);
4667                 tem = expand_binop (compute_mode, xor_optab, op0, op1,
4668                                     NULL_RTX, 0, OPTAB_WIDEN);
4669                 do_cmp_and_jump (tem, const0_rtx, LT, compute_mode, label);
4670                 expand_inc (quotient, const1_rtx);
4671                 expand_dec (remainder, op1);
4672                 emit_label (label);
4673                 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4674               }
4675
4676             /* No luck with division elimination or divmod.  Have to do it
4677                by conditionally adjusting op0 *and* the result.  */
4678             {
4679               rtx label1, label2, label3, label4, label5;
4680               rtx adjusted_op0;
4681               rtx tem;
4682
4683               quotient = gen_reg_rtx (compute_mode);
4684               adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4685               label1 = gen_label_rtx ();
4686               label2 = gen_label_rtx ();
4687               label3 = gen_label_rtx ();
4688               label4 = gen_label_rtx ();
4689               label5 = gen_label_rtx ();
4690               do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4691               do_cmp_and_jump (adjusted_op0, const0_rtx, GT,
4692                                compute_mode, label1);
4693               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4694                                   quotient, 0, OPTAB_LIB_WIDEN);
4695               if (tem != quotient)
4696                 emit_move_insn (quotient, tem);
4697               emit_jump_insn (gen_jump (label5));
4698               emit_barrier ();
4699               emit_label (label1);
4700               expand_dec (adjusted_op0, const1_rtx);
4701               emit_jump_insn (gen_jump (label4));
4702               emit_barrier ();
4703               emit_label (label2);
4704               do_cmp_and_jump (adjusted_op0, const0_rtx, LT,
4705                                compute_mode, label3);
4706               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4707                                   quotient, 0, OPTAB_LIB_WIDEN);
4708               if (tem != quotient)
4709                 emit_move_insn (quotient, tem);
4710               emit_jump_insn (gen_jump (label5));
4711               emit_barrier ();
4712               emit_label (label3);
4713               expand_inc (adjusted_op0, const1_rtx);
4714               emit_label (label4);
4715               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4716                                   quotient, 0, OPTAB_LIB_WIDEN);
4717               if (tem != quotient)
4718                 emit_move_insn (quotient, tem);
4719               expand_inc (quotient, const1_rtx);
4720               emit_label (label5);
4721             }
4722           }
4723         break;
4724
4725       case EXACT_DIV_EXPR:
4726         if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4727           {
4728             HOST_WIDE_INT d = INTVAL (op1);
4729             unsigned HOST_WIDE_INT ml;
4730             int pre_shift;
4731             rtx t1;
4732
4733             pre_shift = floor_log2 (d & -d);
4734             ml = invert_mod2n (d >> pre_shift, size);
4735             t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4736                                pre_shift, NULL_RTX, unsignedp);
4737             quotient = expand_mult (compute_mode, t1,
4738                                     gen_int_mode (ml, compute_mode),
4739                                     NULL_RTX, 1);
4740
4741             insn = get_last_insn ();
4742             set_dst_reg_note (insn, REG_EQUAL,
4743                               gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
4744                                               compute_mode, op0, op1),
4745                               quotient);
4746           }
4747         break;
4748
4749       case ROUND_DIV_EXPR:
4750       case ROUND_MOD_EXPR:
4751         if (unsignedp)
4752           {
4753             rtx tem;
4754             rtx label;
4755             label = gen_label_rtx ();
4756             quotient = gen_reg_rtx (compute_mode);
4757             remainder = gen_reg_rtx (compute_mode);
4758             if (expand_twoval_binop (udivmod_optab, op0, op1, quotient, remainder, 1) == 0)
4759               {
4760                 rtx tem;
4761                 quotient = expand_binop (compute_mode, udiv_optab, op0, op1,
4762                                          quotient, 1, OPTAB_LIB_WIDEN);
4763                 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 1);
4764                 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4765                                           remainder, 1, OPTAB_LIB_WIDEN);
4766               }
4767             tem = plus_constant (op1, -1);
4768             tem = expand_shift (RSHIFT_EXPR, compute_mode, tem, 1, NULL_RTX, 1);
4769             do_cmp_and_jump (remainder, tem, LEU, compute_mode, label);
4770             expand_inc (quotient, const1_rtx);
4771             expand_dec (remainder, op1);
4772             emit_label (label);
4773           }
4774         else
4775           {
4776             rtx abs_rem, abs_op1, tem, mask;
4777             rtx label;
4778             label = gen_label_rtx ();
4779             quotient = gen_reg_rtx (compute_mode);
4780             remainder = gen_reg_rtx (compute_mode);
4781             if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient, remainder, 0) == 0)
4782               {
4783                 rtx tem;
4784                 quotient = expand_binop (compute_mode, sdiv_optab, op0, op1,
4785                                          quotient, 0, OPTAB_LIB_WIDEN);
4786                 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 0);
4787                 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4788                                           remainder, 0, OPTAB_LIB_WIDEN);
4789               }
4790             abs_rem = expand_abs (compute_mode, remainder, NULL_RTX, 1, 0);
4791             abs_op1 = expand_abs (compute_mode, op1, NULL_RTX, 1, 0);
4792             tem = expand_shift (LSHIFT_EXPR, compute_mode, abs_rem,
4793                                 1, NULL_RTX, 1);
4794             do_cmp_and_jump (tem, abs_op1, LTU, compute_mode, label);
4795             tem = expand_binop (compute_mode, xor_optab, op0, op1,
4796                                 NULL_RTX, 0, OPTAB_WIDEN);
4797             mask = expand_shift (RSHIFT_EXPR, compute_mode, tem,
4798                                  size - 1, NULL_RTX, 0);
4799             tem = expand_binop (compute_mode, xor_optab, mask, const1_rtx,
4800                                 NULL_RTX, 0, OPTAB_WIDEN);
4801             tem = expand_binop (compute_mode, sub_optab, tem, mask,
4802                                 NULL_RTX, 0, OPTAB_WIDEN);
4803             expand_inc (quotient, tem);
4804             tem = expand_binop (compute_mode, xor_optab, mask, op1,
4805                                 NULL_RTX, 0, OPTAB_WIDEN);
4806             tem = expand_binop (compute_mode, sub_optab, tem, mask,
4807                                 NULL_RTX, 0, OPTAB_WIDEN);
4808             expand_dec (remainder, tem);
4809             emit_label (label);
4810           }
4811         return gen_lowpart (mode, rem_flag ? remainder : quotient);
4812
4813       default:
4814         gcc_unreachable ();
4815       }
4816
4817   if (quotient == 0)
4818     {
4819       if (target && GET_MODE (target) != compute_mode)
4820         target = 0;
4821
4822       if (rem_flag)
4823         {
4824           /* Try to produce the remainder without producing the quotient.
4825              If we seem to have a divmod pattern that does not require widening,
4826              don't try widening here.  We should really have a WIDEN argument
4827              to expand_twoval_binop, since what we'd really like to do here is
4828              1) try a mod insn in compute_mode
4829              2) try a divmod insn in compute_mode
4830              3) try a div insn in compute_mode and multiply-subtract to get
4831                 remainder
4832              4) try the same things with widening allowed.  */
4833           remainder
4834             = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4835                                  op0, op1, target,
4836                                  unsignedp,
4837                                  ((optab_handler (optab2, compute_mode)
4838                                    != CODE_FOR_nothing)
4839                                   ? OPTAB_DIRECT : OPTAB_WIDEN));
4840           if (remainder == 0)
4841             {
4842               /* No luck there.  Can we do remainder and divide at once
4843                  without a library call?  */
4844               remainder = gen_reg_rtx (compute_mode);
4845               if (! expand_twoval_binop ((unsignedp
4846                                           ? udivmod_optab
4847                                           : sdivmod_optab),
4848                                          op0, op1,
4849                                          NULL_RTX, remainder, unsignedp))
4850                 remainder = 0;
4851             }
4852
4853           if (remainder)
4854             return gen_lowpart (mode, remainder);
4855         }
4856
4857       /* Produce the quotient.  Try a quotient insn, but not a library call.
4858          If we have a divmod in this mode, use it in preference to widening
4859          the div (for this test we assume it will not fail). Note that optab2
4860          is set to the one of the two optabs that the call below will use.  */
4861       quotient
4862         = sign_expand_binop (compute_mode, udiv_optab, sdiv_optab,
4863                              op0, op1, rem_flag ? NULL_RTX : target,
4864                              unsignedp,
4865                              ((optab_handler (optab2, compute_mode)
4866                                != CODE_FOR_nothing)
4867                               ? OPTAB_DIRECT : OPTAB_WIDEN));
4868
4869       if (quotient == 0)
4870         {
4871           /* No luck there.  Try a quotient-and-remainder insn,
4872              keeping the quotient alone.  */
4873           quotient = gen_reg_rtx (compute_mode);
4874           if (! expand_twoval_binop (unsignedp ? udivmod_optab : sdivmod_optab,
4875                                      op0, op1,
4876                                      quotient, NULL_RTX, unsignedp))
4877             {
4878               quotient = 0;
4879               if (! rem_flag)
4880                 /* Still no luck.  If we are not computing the remainder,
4881                    use a library call for the quotient.  */
4882                 quotient = sign_expand_binop (compute_mode,
4883                                               udiv_optab, sdiv_optab,
4884                                               op0, op1, target,
4885                                               unsignedp, OPTAB_LIB_WIDEN);
4886             }
4887         }
4888     }
4889
4890   if (rem_flag)
4891     {
4892       if (target && GET_MODE (target) != compute_mode)
4893         target = 0;
4894
4895       if (quotient == 0)
4896         {
4897           /* No divide instruction either.  Use library for remainder.  */
4898           remainder = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4899                                          op0, op1, target,
4900                                          unsignedp, OPTAB_LIB_WIDEN);
4901           /* No remainder function.  Try a quotient-and-remainder
4902              function, keeping the remainder.  */
4903           if (!remainder)
4904             {
4905               remainder = gen_reg_rtx (compute_mode);
4906               if (!expand_twoval_binop_libfunc
4907                   (unsignedp ? udivmod_optab : sdivmod_optab,
4908                    op0, op1,
4909                    NULL_RTX, remainder,
4910                    unsignedp ? UMOD : MOD))
4911                 remainder = NULL_RTX;
4912             }
4913         }
4914       else
4915         {
4916           /* We divided.  Now finish doing X - Y * (X / Y).  */
4917           remainder = expand_mult (compute_mode, quotient, op1,
4918                                    NULL_RTX, unsignedp);
4919           remainder = expand_binop (compute_mode, sub_optab, op0,
4920                                     remainder, target, unsignedp,
4921                                     OPTAB_LIB_WIDEN);
4922         }
4923     }
4924
4925   return gen_lowpart (mode, rem_flag ? remainder : quotient);
4926 }
4927 \f
4928 /* Return a tree node with data type TYPE, describing the value of X.
4929    Usually this is an VAR_DECL, if there is no obvious better choice.
4930    X may be an expression, however we only support those expressions
4931    generated by loop.c.  */
4932
4933 tree
4934 make_tree (tree type, rtx x)
4935 {
4936   tree t;
4937
4938   switch (GET_CODE (x))
4939     {
4940     case CONST_INT:
4941       {
4942         HOST_WIDE_INT hi = 0;
4943
4944         if (INTVAL (x) < 0
4945             && !(TYPE_UNSIGNED (type)
4946                  && (GET_MODE_BITSIZE (TYPE_MODE (type))
4947                      < HOST_BITS_PER_WIDE_INT)))
4948           hi = -1;
4949
4950         t = build_int_cst_wide (type, INTVAL (x), hi);
4951
4952         return t;
4953       }
4954
4955     case CONST_DOUBLE:
4956       if (GET_MODE (x) == VOIDmode)
4957         t = build_int_cst_wide (type,
4958                                 CONST_DOUBLE_LOW (x), CONST_DOUBLE_HIGH (x));
4959       else
4960         {
4961           REAL_VALUE_TYPE d;
4962
4963           REAL_VALUE_FROM_CONST_DOUBLE (d, x);
4964           t = build_real (type, d);
4965         }
4966
4967       return t;
4968
4969     case CONST_VECTOR:
4970       {
4971         int units = CONST_VECTOR_NUNITS (x);
4972         tree itype = TREE_TYPE (type);
4973         tree t = NULL_TREE;
4974         int i;
4975
4976
4977         /* Build a tree with vector elements.  */
4978         for (i = units - 1; i >= 0; --i)
4979           {
4980             rtx elt = CONST_VECTOR_ELT (x, i);
4981             t = tree_cons (NULL_TREE, make_tree (itype, elt), t);
4982           }
4983
4984         return build_vector (type, t);
4985       }
4986
4987     case PLUS:
4988       return fold_build2 (PLUS_EXPR, type, make_tree (type, XEXP (x, 0)),
4989                           make_tree (type, XEXP (x, 1)));
4990
4991     case MINUS:
4992       return fold_build2 (MINUS_EXPR, type, make_tree (type, XEXP (x, 0)),
4993                           make_tree (type, XEXP (x, 1)));
4994
4995     case NEG:
4996       return fold_build1 (NEGATE_EXPR, type, make_tree (type, XEXP (x, 0)));
4997
4998     case MULT:
4999       return fold_build2 (MULT_EXPR, type, make_tree (type, XEXP (x, 0)),
5000                           make_tree (type, XEXP (x, 1)));
5001
5002     case ASHIFT:
5003       return fold_build2 (LSHIFT_EXPR, type, make_tree (type, XEXP (x, 0)),
5004                           make_tree (type, XEXP (x, 1)));
5005
5006     case LSHIFTRT:
5007       t = unsigned_type_for (type);
5008       return fold_convert (type, build2 (RSHIFT_EXPR, t,
5009                                          make_tree (t, XEXP (x, 0)),
5010                                          make_tree (type, XEXP (x, 1))));
5011
5012     case ASHIFTRT:
5013       t = signed_type_for (type);
5014       return fold_convert (type, build2 (RSHIFT_EXPR, t,
5015                                          make_tree (t, XEXP (x, 0)),
5016                                          make_tree (type, XEXP (x, 1))));
5017
5018     case DIV:
5019       if (TREE_CODE (type) != REAL_TYPE)
5020         t = signed_type_for (type);
5021       else
5022         t = type;
5023
5024       return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5025                                          make_tree (t, XEXP (x, 0)),
5026                                          make_tree (t, XEXP (x, 1))));
5027     case UDIV:
5028       t = unsigned_type_for (type);
5029       return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5030                                          make_tree (t, XEXP (x, 0)),
5031                                          make_tree (t, XEXP (x, 1))));
5032
5033     case SIGN_EXTEND:
5034     case ZERO_EXTEND:
5035       t = lang_hooks.types.type_for_mode (GET_MODE (XEXP (x, 0)),
5036                                           GET_CODE (x) == ZERO_EXTEND);
5037       return fold_convert (type, make_tree (t, XEXP (x, 0)));
5038
5039     case CONST:
5040       return make_tree (type, XEXP (x, 0));
5041
5042     case SYMBOL_REF:
5043       t = SYMBOL_REF_DECL (x);
5044       if (t)
5045         return fold_convert (type, build_fold_addr_expr (t));
5046       /* else fall through.  */
5047
5048     default:
5049       t = build_decl (RTL_LOCATION (x), VAR_DECL, NULL_TREE, type);
5050
5051       /* If TYPE is a POINTER_TYPE, we might need to convert X from
5052          address mode to pointer mode.  */
5053       if (POINTER_TYPE_P (type))
5054         x = convert_memory_address_addr_space
5055               (TYPE_MODE (type), x, TYPE_ADDR_SPACE (TREE_TYPE (type)));
5056
5057       /* Note that we do *not* use SET_DECL_RTL here, because we do not
5058          want set_decl_rtl to go adjusting REG_ATTRS for this temporary.  */
5059       t->decl_with_rtl.rtl = x;
5060
5061       return t;
5062     }
5063 }
5064 \f
5065 /* Compute the logical-and of OP0 and OP1, storing it in TARGET
5066    and returning TARGET.
5067
5068    If TARGET is 0, a pseudo-register or constant is returned.  */
5069
5070 rtx
5071 expand_and (enum machine_mode mode, rtx op0, rtx op1, rtx target)
5072 {
5073   rtx tem = 0;
5074
5075   if (GET_MODE (op0) == VOIDmode && GET_MODE (op1) == VOIDmode)
5076     tem = simplify_binary_operation (AND, mode, op0, op1);
5077   if (tem == 0)
5078     tem = expand_binop (mode, and_optab, op0, op1, target, 0, OPTAB_LIB_WIDEN);
5079
5080   if (target == 0)
5081     target = tem;
5082   else if (tem != target)
5083     emit_move_insn (target, tem);
5084   return target;
5085 }
5086
5087 /* Helper function for emit_store_flag.  */
5088 static rtx
5089 emit_cstore (rtx target, enum insn_code icode, enum rtx_code code,
5090              enum machine_mode mode, enum machine_mode compare_mode,
5091              int unsignedp, rtx x, rtx y, int normalizep,
5092              enum machine_mode target_mode)
5093 {
5094   struct expand_operand ops[4];
5095   rtx op0, last, comparison, subtarget;
5096   enum machine_mode result_mode = insn_data[(int) icode].operand[0].mode;
5097
5098   last = get_last_insn ();
5099   x = prepare_operand (icode, x, 2, mode, compare_mode, unsignedp);
5100   y = prepare_operand (icode, y, 3, mode, compare_mode, unsignedp);
5101   if (!x || !y)
5102     {
5103       delete_insns_since (last);
5104       return NULL_RTX;
5105     }
5106
5107   if (target_mode == VOIDmode)
5108     target_mode = result_mode;
5109   if (!target)
5110     target = gen_reg_rtx (target_mode);
5111
5112   comparison = gen_rtx_fmt_ee (code, result_mode, x, y);
5113
5114   create_output_operand (&ops[0], optimize ? NULL_RTX : target, result_mode);
5115   create_fixed_operand (&ops[1], comparison);
5116   create_fixed_operand (&ops[2], x);
5117   create_fixed_operand (&ops[3], y);
5118   if (!maybe_expand_insn (icode, 4, ops))
5119     {
5120       delete_insns_since (last);
5121       return NULL_RTX;
5122     }
5123   subtarget = ops[0].value;
5124
5125   /* If we are converting to a wider mode, first convert to
5126      TARGET_MODE, then normalize.  This produces better combining
5127      opportunities on machines that have a SIGN_EXTRACT when we are
5128      testing a single bit.  This mostly benefits the 68k.
5129
5130      If STORE_FLAG_VALUE does not have the sign bit set when
5131      interpreted in MODE, we can do this conversion as unsigned, which
5132      is usually more efficient.  */
5133   if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (result_mode))
5134     {
5135       convert_move (target, subtarget,
5136                     val_signbit_known_clear_p (result_mode,
5137                                                STORE_FLAG_VALUE));
5138       op0 = target;
5139       result_mode = target_mode;
5140     }
5141   else
5142     op0 = subtarget;
5143
5144   /* If we want to keep subexpressions around, don't reuse our last
5145      target.  */
5146   if (optimize)
5147     subtarget = 0;
5148
5149   /* Now normalize to the proper value in MODE.  Sometimes we don't
5150      have to do anything.  */
5151   if (normalizep == 0 || normalizep == STORE_FLAG_VALUE)
5152     ;
5153   /* STORE_FLAG_VALUE might be the most negative number, so write
5154      the comparison this way to avoid a compiler-time warning.  */
5155   else if (- normalizep == STORE_FLAG_VALUE)
5156     op0 = expand_unop (result_mode, neg_optab, op0, subtarget, 0);
5157
5158   /* We don't want to use STORE_FLAG_VALUE < 0 below since this makes
5159      it hard to use a value of just the sign bit due to ANSI integer
5160      constant typing rules.  */
5161   else if (val_signbit_known_set_p (result_mode, STORE_FLAG_VALUE))
5162     op0 = expand_shift (RSHIFT_EXPR, result_mode, op0,
5163                         GET_MODE_BITSIZE (result_mode) - 1, subtarget,
5164                         normalizep == 1);
5165   else
5166     {
5167       gcc_assert (STORE_FLAG_VALUE & 1);
5168
5169       op0 = expand_and (result_mode, op0, const1_rtx, subtarget);
5170       if (normalizep == -1)
5171         op0 = expand_unop (result_mode, neg_optab, op0, op0, 0);
5172     }
5173
5174   /* If we were converting to a smaller mode, do the conversion now.  */
5175   if (target_mode != result_mode)
5176     {
5177       convert_move (target, op0, 0);
5178       return target;
5179     }
5180   else
5181     return op0;
5182 }
5183
5184
5185 /* A subroutine of emit_store_flag only including "tricks" that do not
5186    need a recursive call.  These are kept separate to avoid infinite
5187    loops.  */
5188
5189 static rtx
5190 emit_store_flag_1 (rtx target, enum rtx_code code, rtx op0, rtx op1,
5191                    enum machine_mode mode, int unsignedp, int normalizep,
5192                    enum machine_mode target_mode)
5193 {
5194   rtx subtarget;
5195   enum insn_code icode;
5196   enum machine_mode compare_mode;
5197   enum mode_class mclass;
5198   enum rtx_code scode;
5199   rtx tem;
5200
5201   if (unsignedp)
5202     code = unsigned_condition (code);
5203   scode = swap_condition (code);
5204
5205   /* If one operand is constant, make it the second one.  Only do this
5206      if the other operand is not constant as well.  */
5207
5208   if (swap_commutative_operands_p (op0, op1))
5209     {
5210       tem = op0;
5211       op0 = op1;
5212       op1 = tem;
5213       code = swap_condition (code);
5214     }
5215
5216   if (mode == VOIDmode)
5217     mode = GET_MODE (op0);
5218
5219   /* For some comparisons with 1 and -1, we can convert this to
5220      comparisons with zero.  This will often produce more opportunities for
5221      store-flag insns.  */
5222
5223   switch (code)
5224     {
5225     case LT:
5226       if (op1 == const1_rtx)
5227         op1 = const0_rtx, code = LE;
5228       break;
5229     case LE:
5230       if (op1 == constm1_rtx)
5231         op1 = const0_rtx, code = LT;
5232       break;
5233     case GE:
5234       if (op1 == const1_rtx)
5235         op1 = const0_rtx, code = GT;
5236       break;
5237     case GT:
5238       if (op1 == constm1_rtx)
5239         op1 = const0_rtx, code = GE;
5240       break;
5241     case GEU:
5242       if (op1 == const1_rtx)
5243         op1 = const0_rtx, code = NE;
5244       break;
5245     case LTU:
5246       if (op1 == const1_rtx)
5247         op1 = const0_rtx, code = EQ;
5248       break;
5249     default:
5250       break;
5251     }
5252
5253   /* If we are comparing a double-word integer with zero or -1, we can
5254      convert the comparison into one involving a single word.  */
5255   if (GET_MODE_BITSIZE (mode) == BITS_PER_WORD * 2
5256       && GET_MODE_CLASS (mode) == MODE_INT
5257       && (!MEM_P (op0) || ! MEM_VOLATILE_P (op0)))
5258     {
5259       if ((code == EQ || code == NE)
5260           && (op1 == const0_rtx || op1 == constm1_rtx))
5261         {
5262           rtx op00, op01;
5263
5264           /* Do a logical OR or AND of the two words and compare the
5265              result.  */
5266           op00 = simplify_gen_subreg (word_mode, op0, mode, 0);
5267           op01 = simplify_gen_subreg (word_mode, op0, mode, UNITS_PER_WORD);
5268           tem = expand_binop (word_mode,
5269                               op1 == const0_rtx ? ior_optab : and_optab,
5270                               op00, op01, NULL_RTX, unsignedp,
5271                               OPTAB_DIRECT);
5272
5273           if (tem != 0)
5274             tem = emit_store_flag (NULL_RTX, code, tem, op1, word_mode,
5275                                    unsignedp, normalizep);
5276         }
5277       else if ((code == LT || code == GE) && op1 == const0_rtx)
5278         {
5279           rtx op0h;
5280
5281           /* If testing the sign bit, can just test on high word.  */
5282           op0h = simplify_gen_subreg (word_mode, op0, mode,
5283                                       subreg_highpart_offset (word_mode,
5284                                                               mode));
5285           tem = emit_store_flag (NULL_RTX, code, op0h, op1, word_mode,
5286                                  unsignedp, normalizep);
5287         }
5288       else
5289         tem = NULL_RTX;
5290
5291       if (tem)
5292         {
5293           if (target_mode == VOIDmode || GET_MODE (tem) == target_mode)
5294             return tem;
5295           if (!target)
5296             target = gen_reg_rtx (target_mode);
5297
5298           convert_move (target, tem,
5299                         !val_signbit_known_set_p (word_mode,
5300                                                   (normalizep ? normalizep
5301                                                    : STORE_FLAG_VALUE)));
5302           return target;
5303         }
5304     }
5305
5306   /* If this is A < 0 or A >= 0, we can do this by taking the ones
5307      complement of A (for GE) and shifting the sign bit to the low bit.  */
5308   if (op1 == const0_rtx && (code == LT || code == GE)
5309       && GET_MODE_CLASS (mode) == MODE_INT
5310       && (normalizep || STORE_FLAG_VALUE == 1
5311           || val_signbit_p (mode, STORE_FLAG_VALUE)))
5312     {
5313       subtarget = target;
5314
5315       if (!target)
5316         target_mode = mode;
5317
5318       /* If the result is to be wider than OP0, it is best to convert it
5319          first.  If it is to be narrower, it is *incorrect* to convert it
5320          first.  */
5321       else if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (mode))
5322         {
5323           op0 = convert_modes (target_mode, mode, op0, 0);
5324           mode = target_mode;
5325         }
5326
5327       if (target_mode != mode)
5328         subtarget = 0;
5329
5330       if (code == GE)
5331         op0 = expand_unop (mode, one_cmpl_optab, op0,
5332                            ((STORE_FLAG_VALUE == 1 || normalizep)
5333                             ? 0 : subtarget), 0);
5334
5335       if (STORE_FLAG_VALUE == 1 || normalizep)
5336         /* If we are supposed to produce a 0/1 value, we want to do
5337            a logical shift from the sign bit to the low-order bit; for
5338            a -1/0 value, we do an arithmetic shift.  */
5339         op0 = expand_shift (RSHIFT_EXPR, mode, op0,
5340                             GET_MODE_BITSIZE (mode) - 1,
5341                             subtarget, normalizep != -1);
5342
5343       if (mode != target_mode)
5344         op0 = convert_modes (target_mode, mode, op0, 0);
5345
5346       return op0;
5347     }
5348
5349   mclass = GET_MODE_CLASS (mode);
5350   for (compare_mode = mode; compare_mode != VOIDmode;
5351        compare_mode = GET_MODE_WIDER_MODE (compare_mode))
5352     {
5353      enum machine_mode optab_mode = mclass == MODE_CC ? CCmode : compare_mode;
5354      icode = optab_handler (cstore_optab, optab_mode);
5355      if (icode != CODE_FOR_nothing)
5356         {
5357           do_pending_stack_adjust ();
5358           tem = emit_cstore (target, icode, code, mode, compare_mode,
5359                              unsignedp, op0, op1, normalizep, target_mode);
5360           if (tem)
5361             return tem;
5362
5363           if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5364             {
5365               tem = emit_cstore (target, icode, scode, mode, compare_mode,
5366                                  unsignedp, op1, op0, normalizep, target_mode);
5367               if (tem)
5368                 return tem;
5369             }
5370           break;
5371         }
5372     }
5373
5374   return 0;
5375 }
5376
5377 /* Emit a store-flags instruction for comparison CODE on OP0 and OP1
5378    and storing in TARGET.  Normally return TARGET.
5379    Return 0 if that cannot be done.
5380
5381    MODE is the mode to use for OP0 and OP1 should they be CONST_INTs.  If
5382    it is VOIDmode, they cannot both be CONST_INT.
5383
5384    UNSIGNEDP is for the case where we have to widen the operands
5385    to perform the operation.  It says to use zero-extension.
5386
5387    NORMALIZEP is 1 if we should convert the result to be either zero
5388    or one.  Normalize is -1 if we should convert the result to be
5389    either zero or -1.  If NORMALIZEP is zero, the result will be left
5390    "raw" out of the scc insn.  */
5391
5392 rtx
5393 emit_store_flag (rtx target, enum rtx_code code, rtx op0, rtx op1,
5394                  enum machine_mode mode, int unsignedp, int normalizep)
5395 {
5396   enum machine_mode target_mode = target ? GET_MODE (target) : VOIDmode;
5397   enum rtx_code rcode;
5398   rtx subtarget;
5399   rtx tem, last, trueval;
5400
5401   tem = emit_store_flag_1 (target, code, op0, op1, mode, unsignedp, normalizep,
5402                            target_mode);
5403   if (tem)
5404     return tem;
5405
5406   /* If we reached here, we can't do this with a scc insn, however there
5407      are some comparisons that can be done in other ways.  Don't do any
5408      of these cases if branches are very cheap.  */
5409   if (BRANCH_COST (optimize_insn_for_speed_p (), false) == 0)
5410     return 0;
5411
5412   /* See what we need to return.  We can only return a 1, -1, or the
5413      sign bit.  */
5414
5415   if (normalizep == 0)
5416     {
5417       if (STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1)
5418         normalizep = STORE_FLAG_VALUE;
5419
5420       else if (val_signbit_p (mode, STORE_FLAG_VALUE))
5421         ;
5422       else
5423         return 0;
5424     }
5425
5426   last = get_last_insn ();
5427
5428   /* If optimizing, use different pseudo registers for each insn, instead
5429      of reusing the same pseudo.  This leads to better CSE, but slows
5430      down the compiler, since there are more pseudos */
5431   subtarget = (!optimize
5432                && (target_mode == mode)) ? target : NULL_RTX;
5433   trueval = GEN_INT (normalizep ? normalizep : STORE_FLAG_VALUE);
5434
5435   /* For floating-point comparisons, try the reverse comparison or try
5436      changing the "orderedness" of the comparison.  */
5437   if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5438     {
5439       enum rtx_code first_code;
5440       bool and_them;
5441
5442       rcode = reverse_condition_maybe_unordered (code);
5443       if (can_compare_p (rcode, mode, ccp_store_flag)
5444           && (code == ORDERED || code == UNORDERED
5445               || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
5446               || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
5447         {
5448           int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5449                           || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5450
5451           /* For the reverse comparison, use either an addition or a XOR.  */
5452           if (want_add
5453               && rtx_cost (GEN_INT (normalizep), PLUS, 1,
5454                            optimize_insn_for_speed_p ()) == 0)
5455             {
5456               tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5457                                        STORE_FLAG_VALUE, target_mode);
5458               if (tem)
5459                 return expand_binop (target_mode, add_optab, tem,
5460                                      GEN_INT (normalizep),
5461                                      target, 0, OPTAB_WIDEN);
5462             }
5463           else if (!want_add
5464                    && rtx_cost (trueval, XOR, 1,
5465                                 optimize_insn_for_speed_p ()) == 0)
5466             {
5467               tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5468                                        normalizep, target_mode);
5469               if (tem)
5470                 return expand_binop (target_mode, xor_optab, tem, trueval,
5471                                      target, INTVAL (trueval) >= 0, OPTAB_WIDEN);
5472             }
5473         }
5474
5475       delete_insns_since (last);
5476
5477       /* Cannot split ORDERED and UNORDERED, only try the above trick.   */
5478       if (code == ORDERED || code == UNORDERED)
5479         return 0;
5480
5481       and_them = split_comparison (code, mode, &first_code, &code);
5482
5483       /* If there are no NaNs, the first comparison should always fall through.
5484          Effectively change the comparison to the other one.  */
5485       if (!HONOR_NANS (mode))
5486         {
5487           gcc_assert (first_code == (and_them ? ORDERED : UNORDERED));
5488           return emit_store_flag_1 (target, code, op0, op1, mode, 0, normalizep,
5489                                     target_mode);
5490         }
5491
5492 #ifdef HAVE_conditional_move
5493       /* Try using a setcc instruction for ORDERED/UNORDERED, followed by a
5494          conditional move.  */
5495       tem = emit_store_flag_1 (subtarget, first_code, op0, op1, mode, 0,
5496                                normalizep, target_mode);
5497       if (tem == 0)
5498         return 0;
5499
5500       if (and_them)
5501         tem = emit_conditional_move (target, code, op0, op1, mode,
5502                                      tem, const0_rtx, GET_MODE (tem), 0);
5503       else
5504         tem = emit_conditional_move (target, code, op0, op1, mode,
5505                                      trueval, tem, GET_MODE (tem), 0);
5506
5507       if (tem == 0)
5508         delete_insns_since (last);
5509       return tem;
5510 #else
5511       return 0;
5512 #endif
5513     }
5514
5515   /* The remaining tricks only apply to integer comparisons.  */
5516
5517   if (GET_MODE_CLASS (mode) != MODE_INT)
5518     return 0;
5519
5520   /* If this is an equality comparison of integers, we can try to exclusive-or
5521      (or subtract) the two operands and use a recursive call to try the
5522      comparison with zero.  Don't do any of these cases if branches are
5523      very cheap.  */
5524
5525   if ((code == EQ || code == NE) && op1 != const0_rtx)
5526     {
5527       tem = expand_binop (mode, xor_optab, op0, op1, subtarget, 1,
5528                           OPTAB_WIDEN);
5529
5530       if (tem == 0)
5531         tem = expand_binop (mode, sub_optab, op0, op1, subtarget, 1,
5532                             OPTAB_WIDEN);
5533       if (tem != 0)
5534         tem = emit_store_flag (target, code, tem, const0_rtx,
5535                                mode, unsignedp, normalizep);
5536       if (tem != 0)
5537         return tem;
5538
5539       delete_insns_since (last);
5540     }
5541
5542   /* For integer comparisons, try the reverse comparison.  However, for
5543      small X and if we'd have anyway to extend, implementing "X != 0"
5544      as "-(int)X >> 31" is still cheaper than inverting "(int)X == 0".  */
5545   rcode = reverse_condition (code);
5546   if (can_compare_p (rcode, mode, ccp_store_flag)
5547       && ! (optab_handler (cstore_optab, mode) == CODE_FOR_nothing
5548             && code == NE
5549             && GET_MODE_SIZE (mode) < UNITS_PER_WORD
5550             && op1 == const0_rtx))
5551     {
5552       int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5553                       || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5554
5555       /* Again, for the reverse comparison, use either an addition or a XOR.  */
5556       if (want_add
5557           && rtx_cost (GEN_INT (normalizep), PLUS, 1,
5558                        optimize_insn_for_speed_p ()) == 0)
5559         {
5560           tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5561                                    STORE_FLAG_VALUE, target_mode);
5562           if (tem != 0)
5563             tem = expand_binop (target_mode, add_optab, tem,
5564                                 GEN_INT (normalizep), target, 0, OPTAB_WIDEN);
5565         }
5566       else if (!want_add
5567                && rtx_cost (trueval, XOR, 1,
5568                             optimize_insn_for_speed_p ()) == 0)
5569         {
5570           tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5571                                    normalizep, target_mode);
5572           if (tem != 0)
5573             tem = expand_binop (target_mode, xor_optab, tem, trueval, target,
5574                                 INTVAL (trueval) >= 0, OPTAB_WIDEN);
5575         }
5576
5577       if (tem != 0)
5578         return tem;
5579       delete_insns_since (last);
5580     }
5581
5582   /* Some other cases we can do are EQ, NE, LE, and GT comparisons with
5583      the constant zero.  Reject all other comparisons at this point.  Only
5584      do LE and GT if branches are expensive since they are expensive on
5585      2-operand machines.  */
5586
5587   if (op1 != const0_rtx
5588       || (code != EQ && code != NE
5589           && (BRANCH_COST (optimize_insn_for_speed_p (),
5590                            false) <= 1 || (code != LE && code != GT))))
5591     return 0;
5592
5593   /* Try to put the result of the comparison in the sign bit.  Assume we can't
5594      do the necessary operation below.  */
5595
5596   tem = 0;
5597
5598   /* To see if A <= 0, compute (A | (A - 1)).  A <= 0 iff that result has
5599      the sign bit set.  */
5600
5601   if (code == LE)
5602     {
5603       /* This is destructive, so SUBTARGET can't be OP0.  */
5604       if (rtx_equal_p (subtarget, op0))
5605         subtarget = 0;
5606
5607       tem = expand_binop (mode, sub_optab, op0, const1_rtx, subtarget, 0,
5608                           OPTAB_WIDEN);
5609       if (tem)
5610         tem = expand_binop (mode, ior_optab, op0, tem, subtarget, 0,
5611                             OPTAB_WIDEN);
5612     }
5613
5614   /* To see if A > 0, compute (((signed) A) << BITS) - A, where BITS is the
5615      number of bits in the mode of OP0, minus one.  */
5616
5617   if (code == GT)
5618     {
5619       if (rtx_equal_p (subtarget, op0))
5620         subtarget = 0;
5621
5622       tem = expand_shift (RSHIFT_EXPR, mode, op0,
5623                           GET_MODE_BITSIZE (mode) - 1,
5624                           subtarget, 0);
5625       tem = expand_binop (mode, sub_optab, tem, op0, subtarget, 0,
5626                           OPTAB_WIDEN);
5627     }
5628
5629   if (code == EQ || code == NE)
5630     {
5631       /* For EQ or NE, one way to do the comparison is to apply an operation
5632          that converts the operand into a positive number if it is nonzero
5633          or zero if it was originally zero.  Then, for EQ, we subtract 1 and
5634          for NE we negate.  This puts the result in the sign bit.  Then we
5635          normalize with a shift, if needed.
5636
5637          Two operations that can do the above actions are ABS and FFS, so try
5638          them.  If that doesn't work, and MODE is smaller than a full word,
5639          we can use zero-extension to the wider mode (an unsigned conversion)
5640          as the operation.  */
5641
5642       /* Note that ABS doesn't yield a positive number for INT_MIN, but
5643          that is compensated by the subsequent overflow when subtracting
5644          one / negating.  */
5645
5646       if (optab_handler (abs_optab, mode) != CODE_FOR_nothing)
5647         tem = expand_unop (mode, abs_optab, op0, subtarget, 1);
5648       else if (optab_handler (ffs_optab, mode) != CODE_FOR_nothing)
5649         tem = expand_unop (mode, ffs_optab, op0, subtarget, 1);
5650       else if (GET_MODE_SIZE (mode) < UNITS_PER_WORD)
5651         {
5652           tem = convert_modes (word_mode, mode, op0, 1);
5653           mode = word_mode;
5654         }
5655
5656       if (tem != 0)
5657         {
5658           if (code == EQ)
5659             tem = expand_binop (mode, sub_optab, tem, const1_rtx, subtarget,
5660                                 0, OPTAB_WIDEN);
5661           else
5662             tem = expand_unop (mode, neg_optab, tem, subtarget, 0);
5663         }
5664
5665       /* If we couldn't do it that way, for NE we can "or" the two's complement
5666          of the value with itself.  For EQ, we take the one's complement of
5667          that "or", which is an extra insn, so we only handle EQ if branches
5668          are expensive.  */
5669
5670       if (tem == 0
5671           && (code == NE
5672               || BRANCH_COST (optimize_insn_for_speed_p (),
5673                               false) > 1))
5674         {
5675           if (rtx_equal_p (subtarget, op0))
5676             subtarget = 0;
5677
5678           tem = expand_unop (mode, neg_optab, op0, subtarget, 0);
5679           tem = expand_binop (mode, ior_optab, tem, op0, subtarget, 0,
5680                               OPTAB_WIDEN);
5681
5682           if (tem && code == EQ)
5683             tem = expand_unop (mode, one_cmpl_optab, tem, subtarget, 0);
5684         }
5685     }
5686
5687   if (tem && normalizep)
5688     tem = expand_shift (RSHIFT_EXPR, mode, tem,
5689                         GET_MODE_BITSIZE (mode) - 1,
5690                         subtarget, normalizep == 1);
5691
5692   if (tem)
5693     {
5694       if (!target)
5695         ;
5696       else if (GET_MODE (tem) != target_mode)
5697         {
5698           convert_move (target, tem, 0);
5699           tem = target;
5700         }
5701       else if (!subtarget)
5702         {
5703           emit_move_insn (target, tem);
5704           tem = target;
5705         }
5706     }
5707   else
5708     delete_insns_since (last);
5709
5710   return tem;
5711 }
5712
5713 /* Like emit_store_flag, but always succeeds.  */
5714
5715 rtx
5716 emit_store_flag_force (rtx target, enum rtx_code code, rtx op0, rtx op1,
5717                        enum machine_mode mode, int unsignedp, int normalizep)
5718 {
5719   rtx tem, label;
5720   rtx trueval, falseval;
5721
5722   /* First see if emit_store_flag can do the job.  */
5723   tem = emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep);
5724   if (tem != 0)
5725     return tem;
5726
5727   if (!target)
5728     target = gen_reg_rtx (word_mode);
5729
5730   /* If this failed, we have to do this with set/compare/jump/set code.
5731      For foo != 0, if foo is in OP0, just replace it with 1 if nonzero.  */
5732   trueval = normalizep ? GEN_INT (normalizep) : const1_rtx;
5733   if (code == NE
5734       && GET_MODE_CLASS (mode) == MODE_INT
5735       && REG_P (target)
5736       && op0 == target
5737       && op1 == const0_rtx)
5738     {
5739       label = gen_label_rtx ();
5740       do_compare_rtx_and_jump (target, const0_rtx, EQ, unsignedp,
5741                                mode, NULL_RTX, NULL_RTX, label, -1);
5742       emit_move_insn (target, trueval);
5743       emit_label (label);
5744       return target;
5745     }
5746
5747   if (!REG_P (target)
5748       || reg_mentioned_p (target, op0) || reg_mentioned_p (target, op1))
5749     target = gen_reg_rtx (GET_MODE (target));
5750
5751   /* Jump in the right direction if the target cannot implement CODE
5752      but can jump on its reverse condition.  */
5753   falseval = const0_rtx;
5754   if (! can_compare_p (code, mode, ccp_jump)
5755       && (! FLOAT_MODE_P (mode)
5756           || code == ORDERED || code == UNORDERED
5757           || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
5758           || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
5759     {
5760       enum rtx_code rcode;
5761       if (FLOAT_MODE_P (mode))
5762         rcode = reverse_condition_maybe_unordered (code);
5763       else
5764         rcode = reverse_condition (code);
5765
5766       /* Canonicalize to UNORDERED for the libcall.  */
5767       if (can_compare_p (rcode, mode, ccp_jump)
5768           || (code == ORDERED && ! can_compare_p (ORDERED, mode, ccp_jump)))
5769         {
5770           falseval = trueval;
5771           trueval = const0_rtx;
5772           code = rcode;
5773         }
5774     }
5775
5776   emit_move_insn (target, trueval);
5777   label = gen_label_rtx ();
5778   do_compare_rtx_and_jump (op0, op1, code, unsignedp, mode, NULL_RTX,
5779                            NULL_RTX, label, -1);
5780
5781   emit_move_insn (target, falseval);
5782   emit_label (label);
5783
5784   return target;
5785 }
5786 \f
5787 /* Perform possibly multi-word comparison and conditional jump to LABEL
5788    if ARG1 OP ARG2 true where ARG1 and ARG2 are of mode MODE.  This is
5789    now a thin wrapper around do_compare_rtx_and_jump.  */
5790
5791 static void
5792 do_cmp_and_jump (rtx arg1, rtx arg2, enum rtx_code op, enum machine_mode mode,
5793                  rtx label)
5794 {
5795   int unsignedp = (op == LTU || op == LEU || op == GTU || op == GEU);
5796   do_compare_rtx_and_jump (arg1, arg2, op, unsignedp, mode,
5797                            NULL_RTX, NULL_RTX, label, -1);
5798 }