gcc/expmed.c

   1 /* Medium-level subroutines: convert bit-field store and extract
   2    and shifts, multiplies and divides to rtl instructions.
   3    Copyright (C) 1987, 1988, 1989, 1992, 1993, 1994, 1995, 1996, 1997, 1998,
   4    1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010,
   5    2011
   6    Free Software Foundation, Inc.
   7
   8 This file is part of GCC.
   9
  10 GCC is free software; you can redistribute it and/or modify it under
  11 the terms of the GNU General Public License as published by the Free
  12 Software Foundation; either version 3, or (at your option) any later
  13 version.
  14
  15 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  16 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  17 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  18 for more details.
  19
  20 You should have received a copy of the GNU General Public License
  21 along with GCC; see the file COPYING3.  If not see
  22 <http://www.gnu.org/licenses/>.  */
  23
  24
  25 #include "config.h"
  26 #include "system.h"
  27 #include "coretypes.h"
  28 #include "tm.h"
  29 #include "diagnostic-core.h"
  30 #include "rtl.h"
  31 #include "tree.h"
  32 #include "tm_p.h"
  33 #include "flags.h"
  34 #include "insn-config.h"
  35 #include "expr.h"
  36 #include "optabs.h"
  37 #include "recog.h"
  38 #include "langhooks.h"
  39 #include "df.h"
  40 #include "target.h"
  41 #include "expmed.h"
  42
  43 struct target_expmed default_target_expmed;
  44 #if SWITCHABLE_TARGET
  45 struct target_expmed *this_target_expmed = &default_target_expmed;
  46 #endif
  47
  48 static void store_fixed_bit_field (rtx, unsigned HOST_WIDE_INT,
  49                                    unsigned HOST_WIDE_INT,
  50                                    unsigned HOST_WIDE_INT,
  51                                    unsigned HOST_WIDE_INT,
  52                                    unsigned HOST_WIDE_INT,
  53                                    rtx);
  54 static void store_split_bit_field (rtx, unsigned HOST_WIDE_INT,
  55                                    unsigned HOST_WIDE_INT,
  56                                    unsigned HOST_WIDE_INT,
  57                                    unsigned HOST_WIDE_INT,
  58                                    rtx);
  59 static rtx extract_fixed_bit_field (enum machine_mode, rtx,
  60                                     unsigned HOST_WIDE_INT,
  61                                     unsigned HOST_WIDE_INT,
  62                                     unsigned HOST_WIDE_INT, rtx, int, bool);
  63 static rtx mask_rtx (enum machine_mode, int, int, int);
  64 static rtx lshift_value (enum machine_mode, rtx, int, int);
  65 static rtx extract_split_bit_field (rtx, unsigned HOST_WIDE_INT,
  66                                     unsigned HOST_WIDE_INT, int);
  67 static void do_cmp_and_jump (rtx, rtx, enum rtx_code, enum machine_mode, rtx);
  68 static rtx expand_smod_pow2 (enum machine_mode, rtx, HOST_WIDE_INT);
  69 static rtx expand_sdiv_pow2 (enum machine_mode, rtx, HOST_WIDE_INT);
  70
  71 /* Test whether a value is zero of a power of two.  */
  72 #define EXACT_POWER_OF_2_OR_ZERO_P(x) (((x) & ((x) - 1)) == 0)
  73
  74 #ifndef SLOW_UNALIGNED_ACCESS
  75 #define SLOW_UNALIGNED_ACCESS(MODE, ALIGN) STRICT_ALIGNMENT
  76 #endif
  77
  78
  79 /* Reduce conditional compilation elsewhere.  */
  80 #ifndef HAVE_insv
  81 #define HAVE_insv       0
  82 #define CODE_FOR_insv   CODE_FOR_nothing
  83 #define gen_insv(a,b,c,d) NULL_RTX
  84 #endif
  85 #ifndef HAVE_extv
  86 #define HAVE_extv       0
  87 #define CODE_FOR_extv   CODE_FOR_nothing
  88 #define gen_extv(a,b,c,d) NULL_RTX
  89 #endif
  90 #ifndef HAVE_extzv
  91 #define HAVE_extzv      0
  92 #define CODE_FOR_extzv  CODE_FOR_nothing
  93 #define gen_extzv(a,b,c,d) NULL_RTX
  94 #endif
  95
  96 void
  97 init_expmed (void)
  98 {
  99   struct
 100   {
 101     struct rtx_def reg;         rtunion reg_fld[2];
 102     struct rtx_def plus;        rtunion plus_fld1;
 103     struct rtx_def neg;
 104     struct rtx_def mult;        rtunion mult_fld1;
 105     struct rtx_def sdiv;        rtunion sdiv_fld1;
 106     struct rtx_def udiv;        rtunion udiv_fld1;
 107     struct rtx_def zext;
 108     struct rtx_def sdiv_32;     rtunion sdiv_32_fld1;
 109     struct rtx_def smod_32;     rtunion smod_32_fld1;
 110     struct rtx_def wide_mult;   rtunion wide_mult_fld1;
 111     struct rtx_def wide_lshr;   rtunion wide_lshr_fld1;
 112     struct rtx_def wide_trunc;
 113     struct rtx_def shift;       rtunion shift_fld1;
 114     struct rtx_def shift_mult;  rtunion shift_mult_fld1;
 115     struct rtx_def shift_add;   rtunion shift_add_fld1;
 116     struct rtx_def shift_sub0;  rtunion shift_sub0_fld1;
 117     struct rtx_def shift_sub1;  rtunion shift_sub1_fld1;
 118   } all;
 119
 120   rtx pow2[MAX_BITS_PER_WORD];
 121   rtx cint[MAX_BITS_PER_WORD];
 122   int m, n;
 123   enum machine_mode mode, wider_mode;
 124   int speed;
 125
 126
 127   for (m = 1; m < MAX_BITS_PER_WORD; m++)
 128     {
 129       pow2[m] = GEN_INT ((HOST_WIDE_INT) 1 << m);
 130       cint[m] = GEN_INT (m);
 131     }
 132   memset (&all, 0, sizeof all);
 133
 134   PUT_CODE (&all.reg, REG);
 135   /* Avoid using hard regs in ways which may be unsupported.  */
 136   SET_REGNO (&all.reg, LAST_VIRTUAL_REGISTER + 1);
 137
 138   PUT_CODE (&all.plus, PLUS);
 139   XEXP (&all.plus, 0) = &all.reg;
 140   XEXP (&all.plus, 1) = &all.reg;
 141
 142   PUT_CODE (&all.neg, NEG);
 143   XEXP (&all.neg, 0) = &all.reg;
 144
 145   PUT_CODE (&all.mult, MULT);
 146   XEXP (&all.mult, 0) = &all.reg;
 147   XEXP (&all.mult, 1) = &all.reg;
 148
 149   PUT_CODE (&all.sdiv, DIV);
 150   XEXP (&all.sdiv, 0) = &all.reg;
 151   XEXP (&all.sdiv, 1) = &all.reg;
 152
 153   PUT_CODE (&all.udiv, UDIV);
 154   XEXP (&all.udiv, 0) = &all.reg;
 155   XEXP (&all.udiv, 1) = &all.reg;
 156
 157   PUT_CODE (&all.sdiv_32, DIV);
 158   XEXP (&all.sdiv_32, 0) = &all.reg;
 159   XEXP (&all.sdiv_32, 1) = 32 < MAX_BITS_PER_WORD ? cint[32] : GEN_INT (32);
 160
 161   PUT_CODE (&all.smod_32, MOD);
 162   XEXP (&all.smod_32, 0) = &all.reg;
 163   XEXP (&all.smod_32, 1) = XEXP (&all.sdiv_32, 1);
 164
 165   PUT_CODE (&all.zext, ZERO_EXTEND);
 166   XEXP (&all.zext, 0) = &all.reg;
 167
 168   PUT_CODE (&all.wide_mult, MULT);
 169   XEXP (&all.wide_mult, 0) = &all.zext;
 170   XEXP (&all.wide_mult, 1) = &all.zext;
 171
 172   PUT_CODE (&all.wide_lshr, LSHIFTRT);
 173   XEXP (&all.wide_lshr, 0) = &all.wide_mult;
 174
 175   PUT_CODE (&all.wide_trunc, TRUNCATE);
 176   XEXP (&all.wide_trunc, 0) = &all.wide_lshr;
 177
 178   PUT_CODE (&all.shift, ASHIFT);
 179   XEXP (&all.shift, 0) = &all.reg;
 180
 181   PUT_CODE (&all.shift_mult, MULT);
 182   XEXP (&all.shift_mult, 0) = &all.reg;
 183
 184   PUT_CODE (&all.shift_add, PLUS);
 185   XEXP (&all.shift_add, 0) = &all.shift_mult;
 186   XEXP (&all.shift_add, 1) = &all.reg;
 187
 188   PUT_CODE (&all.shift_sub0, MINUS);
 189   XEXP (&all.shift_sub0, 0) = &all.shift_mult;
 190   XEXP (&all.shift_sub0, 1) = &all.reg;
 191
 192   PUT_CODE (&all.shift_sub1, MINUS);
 193   XEXP (&all.shift_sub1, 0) = &all.reg;
 194   XEXP (&all.shift_sub1, 1) = &all.shift_mult;
 195
 196   for (speed = 0; speed < 2; speed++)
 197     {
 198       crtl->maybe_hot_insn_p = speed;
 199       zero_cost[speed] = rtx_cost (const0_rtx, SET, speed);
 200
 201       for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT);
 202            mode != VOIDmode;
 203            mode = GET_MODE_WIDER_MODE (mode))
 204         {
 205           PUT_MODE (&all.reg, mode);
 206           PUT_MODE (&all.plus, mode);
 207           PUT_MODE (&all.neg, mode);
 208           PUT_MODE (&all.mult, mode);
 209           PUT_MODE (&all.sdiv, mode);
 210           PUT_MODE (&all.udiv, mode);
 211           PUT_MODE (&all.sdiv_32, mode);
 212           PUT_MODE (&all.smod_32, mode);
 213           PUT_MODE (&all.wide_trunc, mode);
 214           PUT_MODE (&all.shift, mode);
 215           PUT_MODE (&all.shift_mult, mode);
 216           PUT_MODE (&all.shift_add, mode);
 217           PUT_MODE (&all.shift_sub0, mode);
 218           PUT_MODE (&all.shift_sub1, mode);
 219
 220           add_cost[speed][mode] = rtx_cost (&all.plus, SET, speed);
 221           neg_cost[speed][mode] = rtx_cost (&all.neg, SET, speed);
 222           mul_cost[speed][mode] = rtx_cost (&all.mult, SET, speed);
 223           sdiv_cost[speed][mode] = rtx_cost (&all.sdiv, SET, speed);
 224           udiv_cost[speed][mode] = rtx_cost (&all.udiv, SET, speed);
 225
 226           sdiv_pow2_cheap[speed][mode] = (rtx_cost (&all.sdiv_32, SET, speed)
 227                                           <= 2 * add_cost[speed][mode]);
 228           smod_pow2_cheap[speed][mode] = (rtx_cost (&all.smod_32, SET, speed)
 229                                           <= 4 * add_cost[speed][mode]);
 230
 231           wider_mode = GET_MODE_WIDER_MODE (mode);
 232           if (wider_mode != VOIDmode)
 233             {
 234               PUT_MODE (&all.zext, wider_mode);
 235               PUT_MODE (&all.wide_mult, wider_mode);
 236               PUT_MODE (&all.wide_lshr, wider_mode);
 237               XEXP (&all.wide_lshr, 1) = GEN_INT (GET_MODE_BITSIZE (mode));
 238
 239               mul_widen_cost[speed][wider_mode]
 240                 = rtx_cost (&all.wide_mult, SET, speed);
 241               mul_highpart_cost[speed][mode]
 242                 = rtx_cost (&all.wide_trunc, SET, speed);
 243             }
 244
 245           shift_cost[speed][mode][0] = 0;
 246           shiftadd_cost[speed][mode][0] = shiftsub0_cost[speed][mode][0]
 247             = shiftsub1_cost[speed][mode][0] = add_cost[speed][mode];
 248
 249           n = MIN (MAX_BITS_PER_WORD, GET_MODE_BITSIZE (mode));
 250           for (m = 1; m < n; m++)
 251             {
 252               XEXP (&all.shift, 1) = cint[m];
 253               XEXP (&all.shift_mult, 1) = pow2[m];
 254
 255               shift_cost[speed][mode][m] = rtx_cost (&all.shift, SET, speed);
 256               shiftadd_cost[speed][mode][m] = rtx_cost (&all.shift_add, SET, speed);
 257               shiftsub0_cost[speed][mode][m] = rtx_cost (&all.shift_sub0, SET, speed);
 258               shiftsub1_cost[speed][mode][m] = rtx_cost (&all.shift_sub1, SET, speed);
 259             }
 260         }
 261     }
 262   if (alg_hash_used_p)
 263     memset (alg_hash, 0, sizeof (alg_hash));
 264   else
 265     alg_hash_used_p = true;
 266   default_rtl_profile ();
 267 }
 268
 269 /* Return an rtx representing minus the value of X.
 270    MODE is the intended mode of the result,
 271    useful if X is a CONST_INT.  */
 272
 273 rtx
 274 negate_rtx (enum machine_mode mode, rtx x)
 275 {
 276   rtx result = simplify_unary_operation (NEG, mode, x, mode);
 277
 278   if (result == 0)
 279     result = expand_unop (mode, neg_optab, x, NULL_RTX, 0);
 280
 281   return result;
 282 }
 283
 284 /* Report on the availability of insv/extv/extzv and the desired mode
 285    of each of their operands.  Returns MAX_MACHINE_MODE if HAVE_foo
 286    is false; else the mode of the specified operand.  If OPNO is -1,
 287    all the caller cares about is whether the insn is available.  */
 288 enum machine_mode
 289 mode_for_extraction (enum extraction_pattern pattern, int opno)
 290 {
 291   const struct insn_data_d *data;
 292
 293   switch (pattern)
 294     {
 295     case EP_insv:
 296       if (HAVE_insv)
 297         {
 298           data = &insn_data[CODE_FOR_insv];
 299           break;
 300         }
 301       return MAX_MACHINE_MODE;
 302
 303     case EP_extv:
 304       if (HAVE_extv)
 305         {
 306           data = &insn_data[CODE_FOR_extv];
 307           break;
 308         }
 309       return MAX_MACHINE_MODE;
 310
 311     case EP_extzv:
 312       if (HAVE_extzv)
 313         {
 314           data = &insn_data[CODE_FOR_extzv];
 315           break;
 316         }
 317       return MAX_MACHINE_MODE;
 318
 319     default:
 320       gcc_unreachable ();
 321     }
 322
 323   if (opno == -1)
 324     return VOIDmode;
 325
 326   /* Everyone who uses this function used to follow it with
 327      if (result == VOIDmode) result = word_mode; */
 328   if (data->operand[opno].mode == VOIDmode)
 329     return word_mode;
 330   return data->operand[opno].mode;
 331 }
 332 \f
 333 /* A subroutine of store_bit_field, with the same arguments.  Return true
 334    if the operation could be implemented.
 335
 336    If FALLBACK_P is true, fall back to store_fixed_bit_field if we have
 337    no other way of implementing the operation.  If FALLBACK_P is false,
 338    return false instead.  */
 339
 340 static bool
 341 store_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
 342                    unsigned HOST_WIDE_INT bitnum,
 343                    unsigned HOST_WIDE_INT bitregion_start,
 344                    unsigned HOST_WIDE_INT bitregion_end,
 345                    enum machine_mode fieldmode,
 346                    rtx value, bool fallback_p)
 347 {
 348   unsigned int unit
 349     = (MEM_P (str_rtx)) ? BITS_PER_UNIT : BITS_PER_WORD;
 350   unsigned HOST_WIDE_INT offset, bitpos;
 351   rtx op0 = str_rtx;
 352   int byte_offset;
 353   rtx orig_value;
 354
 355   enum machine_mode op_mode = mode_for_extraction (EP_insv, 3);
 356
 357   while (GET_CODE (op0) == SUBREG)
 358     {
 359       /* The following line once was done only if WORDS_BIG_ENDIAN,
 360          but I think that is a mistake.  WORDS_BIG_ENDIAN is
 361          meaningful at a much higher level; when structures are copied
 362          between memory and regs, the higher-numbered regs
 363          always get higher addresses.  */
 364       int inner_mode_size = GET_MODE_SIZE (GET_MODE (SUBREG_REG (op0)));
 365       int outer_mode_size = GET_MODE_SIZE (GET_MODE (op0));
 366
 367       byte_offset = 0;
 368
 369       /* Paradoxical subregs need special handling on big endian machines.  */
 370       if (SUBREG_BYTE (op0) == 0 && inner_mode_size < outer_mode_size)
 371         {
 372           int difference = inner_mode_size - outer_mode_size;
 373
 374           if (WORDS_BIG_ENDIAN)
 375             byte_offset += (difference / UNITS_PER_WORD) * UNITS_PER_WORD;
 376           if (BYTES_BIG_ENDIAN)
 377             byte_offset += difference % UNITS_PER_WORD;
 378         }
 379       else
 380         byte_offset = SUBREG_BYTE (op0);
 381
 382       bitnum += byte_offset * BITS_PER_UNIT;
 383       op0 = SUBREG_REG (op0);
 384     }
 385
 386   /* No action is needed if the target is a register and if the field
 387      lies completely outside that register.  This can occur if the source
 388      code contains an out-of-bounds access to a small array.  */
 389   if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
 390     return true;
 391
 392   /* Use vec_set patterns for inserting parts of vectors whenever
 393      available.  */
 394   if (VECTOR_MODE_P (GET_MODE (op0))
 395       && !MEM_P (op0)
 396       && optab_handler (vec_set_optab, GET_MODE (op0)) != CODE_FOR_nothing
 397       && fieldmode == GET_MODE_INNER (GET_MODE (op0))
 398       && bitsize == GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
 399       && !(bitnum % GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
 400     {
 401       struct expand_operand ops[3];
 402       enum machine_mode outermode = GET_MODE (op0);
 403       enum machine_mode innermode = GET_MODE_INNER (outermode);
 404       enum insn_code icode = optab_handler (vec_set_optab, outermode);
 405       int pos = bitnum / GET_MODE_BITSIZE (innermode);
 406
 407       create_fixed_operand (&ops[0], op0);
 408       create_input_operand (&ops[1], value, innermode);
 409       create_integer_operand (&ops[2], pos);
 410       if (maybe_expand_insn (icode, 3, ops))
 411         return true;
 412     }
 413
 414   /* If the target is a register, overwriting the entire object, or storing
 415      a full-word or multi-word field can be done with just a SUBREG.
 416
 417      If the target is memory, storing any naturally aligned field can be
 418      done with a simple store.  For targets that support fast unaligned
 419      memory, any naturally sized, unit aligned field can be done directly.  */
 420
 421   offset = bitnum / unit;
 422   bitpos = bitnum % unit;
 423   byte_offset = (bitnum % BITS_PER_WORD) / BITS_PER_UNIT
 424                 + (offset * UNITS_PER_WORD);
 425
 426   if (bitpos == 0
 427       && bitsize == GET_MODE_BITSIZE (fieldmode)
 428       && (!MEM_P (op0)
 429           ? ((GET_MODE_SIZE (fieldmode) >= UNITS_PER_WORD
 430               || GET_MODE_SIZE (GET_MODE (op0)) == GET_MODE_SIZE (fieldmode))
 431              && ((GET_MODE (op0) == fieldmode && byte_offset == 0)
 432                  || validate_subreg (fieldmode, GET_MODE (op0), op0,
 433                                      byte_offset)))
 434           : (! SLOW_UNALIGNED_ACCESS (fieldmode, MEM_ALIGN (op0))
 435              || (offset * BITS_PER_UNIT % bitsize == 0
 436                  && MEM_ALIGN (op0) % GET_MODE_BITSIZE (fieldmode) == 0))))
 437     {
 438       if (MEM_P (op0))
 439         op0 = adjust_address (op0, fieldmode, offset);
 440       else if (GET_MODE (op0) != fieldmode)
 441         op0 = simplify_gen_subreg (fieldmode, op0, GET_MODE (op0),
 442                                    byte_offset);
 443       emit_move_insn (op0, value);
 444       return true;
 445     }
 446
 447   /* Make sure we are playing with integral modes.  Pun with subregs
 448      if we aren't.  This must come after the entire register case above,
 449      since that case is valid for any mode.  The following cases are only
 450      valid for integral modes.  */
 451   {
 452     enum machine_mode imode = int_mode_for_mode (GET_MODE (op0));
 453     if (imode != GET_MODE (op0))
 454       {
 455         if (MEM_P (op0))
 456           op0 = adjust_address (op0, imode, 0);
 457         else
 458           {
 459             gcc_assert (imode != BLKmode);
 460             op0 = gen_lowpart (imode, op0);
 461           }
 462       }
 463   }
 464
 465   /* We may be accessing data outside the field, which means
 466      we can alias adjacent data.  */
 467   /* ?? not always for C++0x memory model ?? */
 468   if (MEM_P (op0))
 469     {
 470       op0 = shallow_copy_rtx (op0);
 471       set_mem_alias_set (op0, 0);
 472       set_mem_expr (op0, 0);
 473     }
 474
 475   /* If OP0 is a register, BITPOS must count within a word.
 476      But as we have it, it counts within whatever size OP0 now has.
 477      On a bigendian machine, these are not the same, so convert.  */
 478   if (BYTES_BIG_ENDIAN
 479       && !MEM_P (op0)
 480       && unit > GET_MODE_BITSIZE (GET_MODE (op0)))
 481     bitpos += unit - GET_MODE_BITSIZE (GET_MODE (op0));
 482
 483   /* Storing an lsb-aligned field in a register
 484      can be done with a movestrict instruction.  */
 485
 486   if (!MEM_P (op0)
 487       && (BYTES_BIG_ENDIAN ? bitpos + bitsize == unit : bitpos == 0)
 488       && bitsize == GET_MODE_BITSIZE (fieldmode)
 489       && optab_handler (movstrict_optab, fieldmode) != CODE_FOR_nothing)
 490     {
 491       struct expand_operand ops[2];
 492       enum insn_code icode = optab_handler (movstrict_optab, fieldmode);
 493       rtx arg0 = op0;
 494       unsigned HOST_WIDE_INT subreg_off;
 495
 496       if (GET_CODE (arg0) == SUBREG)
 497         {
 498           /* Else we've got some float mode source being extracted into
 499              a different float mode destination -- this combination of
 500              subregs results in Severe Tire Damage.  */
 501           gcc_assert (GET_MODE (SUBREG_REG (arg0)) == fieldmode
 502                       || GET_MODE_CLASS (fieldmode) == MODE_INT
 503                       || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT);
 504           arg0 = SUBREG_REG (arg0);
 505         }
 506
 507       subreg_off = (bitnum % BITS_PER_WORD) / BITS_PER_UNIT
 508                    + (offset * UNITS_PER_WORD);
 509       if (validate_subreg (fieldmode, GET_MODE (arg0), arg0, subreg_off))
 510         {
 511           arg0 = gen_rtx_SUBREG (fieldmode, arg0, subreg_off);
 512
 513           create_fixed_operand (&ops[0], arg0);
 514           /* Shrink the source operand to FIELDMODE.  */
 515           create_convert_operand_to (&ops[1], value, fieldmode, false);
 516           if (maybe_expand_insn (icode, 2, ops))
 517             return true;
 518         }
 519     }
 520
 521   /* Handle fields bigger than a word.  */
 522
 523   if (bitsize > BITS_PER_WORD)
 524     {
 525       /* Here we transfer the words of the field
 526          in the order least significant first.
 527          This is because the most significant word is the one which may
 528          be less than full.
 529          However, only do that if the value is not BLKmode.  */
 530
 531       unsigned int backwards = WORDS_BIG_ENDIAN && fieldmode != BLKmode;
 532       unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
 533       unsigned int i;
 534       rtx last;
 535
 536       /* This is the mode we must force value to, so that there will be enough
 537          subwords to extract.  Note that fieldmode will often (always?) be
 538          VOIDmode, because that is what store_field uses to indicate that this
 539          is a bit field, but passing VOIDmode to operand_subword_force
 540          is not allowed.  */
 541       fieldmode = GET_MODE (value);
 542       if (fieldmode == VOIDmode)
 543         fieldmode = smallest_mode_for_size (nwords * BITS_PER_WORD, MODE_INT);
 544
 545       last = get_last_insn ();
 546       for (i = 0; i < nwords; i++)
 547         {
 548           /* If I is 0, use the low-order word in both field and target;
 549              if I is 1, use the next to lowest word; and so on.  */
 550           unsigned int wordnum = (backwards ? nwords - i - 1 : i);
 551           unsigned int bit_offset = (backwards
 552                                      ? MAX ((int) bitsize - ((int) i + 1)
 553                                             * BITS_PER_WORD,
 554                                             0)
 555                                      : (int) i * BITS_PER_WORD);
 556           rtx value_word = operand_subword_force (value, wordnum, fieldmode);
 557
 558           if (!store_bit_field_1 (op0, MIN (BITS_PER_WORD,
 559                                             bitsize - i * BITS_PER_WORD),
 560                                   bitnum + bit_offset,
 561                                   bitregion_start, bitregion_end,
 562                                   word_mode,
 563                                   value_word, fallback_p))
 564             {
 565               delete_insns_since (last);
 566               return false;
 567             }
 568         }
 569       return true;
 570     }
 571
 572   /* From here on we can assume that the field to be stored in is
 573      a full-word (whatever type that is), since it is shorter than a word.  */
 574
 575   /* OFFSET is the number of words or bytes (UNIT says which)
 576      from STR_RTX to the first word or byte containing part of the field.  */
 577
 578   if (!MEM_P (op0))
 579     {
 580       if (offset != 0
 581           || GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
 582         {
 583           if (!REG_P (op0))
 584             {
 585               /* Since this is a destination (lvalue), we can't copy
 586                  it to a pseudo.  We can remove a SUBREG that does not
 587                  change the size of the operand.  Such a SUBREG may
 588                  have been added above.  */
 589               gcc_assert (GET_CODE (op0) == SUBREG
 590                           && (GET_MODE_SIZE (GET_MODE (op0))
 591                               == GET_MODE_SIZE (GET_MODE (SUBREG_REG (op0)))));
 592               op0 = SUBREG_REG (op0);
 593             }
 594           op0 = gen_rtx_SUBREG (mode_for_size (BITS_PER_WORD, MODE_INT, 0),
 595                                 op0, (offset * UNITS_PER_WORD));
 596         }
 597       offset = 0;
 598     }
 599
 600   /* If VALUE has a floating-point or complex mode, access it as an
 601      integer of the corresponding size.  This can occur on a machine
 602      with 64 bit registers that uses SFmode for float.  It can also
 603      occur for unaligned float or complex fields.  */
 604   orig_value = value;
 605   if (GET_MODE (value) != VOIDmode
 606       && GET_MODE_CLASS (GET_MODE (value)) != MODE_INT
 607       && GET_MODE_CLASS (GET_MODE (value)) != MODE_PARTIAL_INT)
 608     {
 609       value = gen_reg_rtx (int_mode_for_mode (GET_MODE (value)));
 610       emit_move_insn (gen_lowpart (GET_MODE (orig_value), value), orig_value);
 611     }
 612
 613   /* Now OFFSET is nonzero only if OP0 is memory
 614      and is therefore always measured in bytes.  */
 615
 616   if (HAVE_insv
 617       && GET_MODE (value) != BLKmode
 618       && bitsize > 0
 619       && GET_MODE_BITSIZE (op_mode) >= bitsize
 620       && ! ((REG_P (op0) || GET_CODE (op0) == SUBREG)
 621             && (bitsize + bitpos > GET_MODE_BITSIZE (op_mode))))
 622     {
 623       struct expand_operand ops[4];
 624       int xbitpos = bitpos;
 625       rtx value1;
 626       rtx xop0 = op0;
 627       rtx last = get_last_insn ();
 628       bool copy_back = false;
 629
 630       /* Add OFFSET into OP0's address.  */
 631       if (MEM_P (xop0))
 632         xop0 = adjust_address (xop0, byte_mode, offset);
 633
 634       /* If xop0 is a register, we need it in OP_MODE
 635          to make it acceptable to the format of insv.  */
 636       if (GET_CODE (xop0) == SUBREG)
 637         /* We can't just change the mode, because this might clobber op0,
 638            and we will need the original value of op0 if insv fails.  */
 639         xop0 = gen_rtx_SUBREG (op_mode, SUBREG_REG (xop0), SUBREG_BYTE (xop0));
 640       if (REG_P (xop0) && GET_MODE (xop0) != op_mode)
 641         xop0 = gen_lowpart_SUBREG (op_mode, xop0);
 642
 643       /* If the destination is a paradoxical subreg such that we need a
 644          truncate to the inner mode, perform the insertion on a temporary and
 645          truncate the result to the original destination.  Note that we can't
 646          just truncate the paradoxical subreg as (truncate:N (subreg:W (reg:N
 647          X) 0)) is (reg:N X).  */
 648       if (GET_CODE (xop0) == SUBREG
 649           && REG_P (SUBREG_REG (xop0))
 650           && (!TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (SUBREG_REG (xop0)),
 651                                               op_mode)))
 652         {
 653           rtx tem = gen_reg_rtx (op_mode);
 654           emit_move_insn (tem, xop0);
 655           xop0 = tem;
 656           copy_back = true;
 657         }
 658
 659       /* On big-endian machines, we count bits from the most significant.
 660          If the bit field insn does not, we must invert.  */
 661
 662       if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
 663         xbitpos = unit - bitsize - xbitpos;
 664
 665       /* We have been counting XBITPOS within UNIT.
 666          Count instead within the size of the register.  */
 667       if (BITS_BIG_ENDIAN && !MEM_P (xop0))
 668         xbitpos += GET_MODE_BITSIZE (op_mode) - unit;
 669
 670       unit = GET_MODE_BITSIZE (op_mode);
 671
 672       /* Convert VALUE to op_mode (which insv insn wants) in VALUE1.  */
 673       value1 = value;
 674       if (GET_MODE (value) != op_mode)
 675         {
 676           if (GET_MODE_BITSIZE (GET_MODE (value)) >= bitsize)
 677             {
 678               /* Optimization: Don't bother really extending VALUE
 679                  if it has all the bits we will actually use.  However,
 680                  if we must narrow it, be sure we do it correctly.  */
 681
 682               if (GET_MODE_SIZE (GET_MODE (value)) < GET_MODE_SIZE (op_mode))
 683                 {
 684                   rtx tmp;
 685
 686                   tmp = simplify_subreg (op_mode, value1, GET_MODE (value), 0);
 687                   if (! tmp)
 688                     tmp = simplify_gen_subreg (op_mode,
 689                                                force_reg (GET_MODE (value),
 690                                                           value1),
 691                                                GET_MODE (value), 0);
 692                   value1 = tmp;
 693                 }
 694               else
 695                 value1 = gen_lowpart (op_mode, value1);
 696             }
 697           else if (CONST_INT_P (value))
 698             value1 = gen_int_mode (INTVAL (value), op_mode);
 699           else
 700             /* Parse phase is supposed to make VALUE's data type
 701                match that of the component reference, which is a type
 702                at least as wide as the field; so VALUE should have
 703                a mode that corresponds to that type.  */
 704             gcc_assert (CONSTANT_P (value));
 705         }
 706
 707       create_fixed_operand (&ops[0], xop0);
 708       create_integer_operand (&ops[1], bitsize);
 709       create_integer_operand (&ops[2], xbitpos);
 710       create_input_operand (&ops[3], value1, op_mode);
 711       if (maybe_expand_insn (CODE_FOR_insv, 4, ops))
 712         {
 713           if (copy_back)
 714             convert_move (op0, xop0, true);
 715           return true;
 716         }
 717       delete_insns_since (last);
 718     }
 719
 720   /* If OP0 is a memory, try copying it to a register and seeing if a
 721      cheap register alternative is available.  */
 722   if (HAVE_insv && MEM_P (op0))
 723     {
 724       enum machine_mode bestmode;
 725       unsigned HOST_WIDE_INT maxbits = MAX_FIXED_MODE_SIZE;
 726
 727       if (bitregion_end)
 728         maxbits = bitregion_end - bitregion_start + 1;
 729
 730       /* Get the mode to use for inserting into this field.  If OP0 is
 731          BLKmode, get the smallest mode consistent with the alignment. If
 732          OP0 is a non-BLKmode object that is no wider than OP_MODE, use its
 733          mode. Otherwise, use the smallest mode containing the field.  */
 734
 735       if (GET_MODE (op0) == BLKmode
 736           || GET_MODE_BITSIZE (GET_MODE (op0)) > maxbits
 737           || (op_mode != MAX_MACHINE_MODE
 738               && GET_MODE_SIZE (GET_MODE (op0)) > GET_MODE_SIZE (op_mode)))
 739         bestmode = get_best_mode  (bitsize, bitnum,
 740                                   bitregion_start, bitregion_end,
 741                                   MEM_ALIGN (op0),
 742                                   (op_mode == MAX_MACHINE_MODE
 743                                    ? VOIDmode : op_mode),
 744                                   MEM_VOLATILE_P (op0));
 745       else
 746         bestmode = GET_MODE (op0);
 747
 748       if (bestmode != VOIDmode
 749           && GET_MODE_SIZE (bestmode) >= GET_MODE_SIZE (fieldmode)
 750           && !(SLOW_UNALIGNED_ACCESS (bestmode, MEM_ALIGN (op0))
 751                && GET_MODE_BITSIZE (bestmode) > MEM_ALIGN (op0)))
 752         {
 753           rtx last, tempreg, xop0;
 754           unsigned HOST_WIDE_INT xoffset, xbitpos;
 755
 756           last = get_last_insn ();
 757
 758           /* Adjust address to point to the containing unit of
 759              that mode.  Compute the offset as a multiple of this unit,
 760              counting in bytes.  */
 761           unit = GET_MODE_BITSIZE (bestmode);
 762           xoffset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
 763           xbitpos = bitnum % unit;
 764           xop0 = adjust_address (op0, bestmode, xoffset);
 765
 766           /* Fetch that unit, store the bitfield in it, then store
 767              the unit.  */
 768           tempreg = copy_to_reg (xop0);
 769           if (store_bit_field_1 (tempreg, bitsize, xbitpos,
 770                                  bitregion_start, bitregion_end,
 771                                  fieldmode, orig_value, false))
 772             {
 773               emit_move_insn (xop0, tempreg);
 774               return true;
 775             }
 776           delete_insns_since (last);
 777         }
 778     }
 779
 780   if (!fallback_p)
 781     return false;
 782
 783   store_fixed_bit_field (op0, offset, bitsize, bitpos,
 784                          bitregion_start, bitregion_end, value);
 785   return true;
 786 }
 787
 788 /* Generate code to store value from rtx VALUE
 789    into a bit-field within structure STR_RTX
 790    containing BITSIZE bits starting at bit BITNUM.
 791
 792    BITREGION_START is bitpos of the first bitfield in this region.
 793    BITREGION_END is the bitpos of the ending bitfield in this region.
 794    These two fields are 0, if the C++ memory model does not apply,
 795    or we are not interested in keeping track of bitfield regions.
 796
 797    FIELDMODE is the machine-mode of the FIELD_DECL node for this field.  */
 798
 799 void
 800 store_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
 801                  unsigned HOST_WIDE_INT bitnum,
 802                  unsigned HOST_WIDE_INT bitregion_start,
 803                  unsigned HOST_WIDE_INT bitregion_end,
 804                  enum machine_mode fieldmode,
 805                  rtx value)
 806 {
 807   /* Under the C++0x memory model, we must not touch bits outside the
 808      bit region.  Adjust the address to start at the beginning of the
 809      bit region.  */
 810   if (MEM_P (str_rtx)
 811       && bitregion_start > 0)
 812     {
 813       enum machine_mode bestmode;
 814       enum machine_mode op_mode;
 815       unsigned HOST_WIDE_INT offset;
 816
 817       op_mode = mode_for_extraction (EP_insv, 3);
 818       if (op_mode == MAX_MACHINE_MODE)
 819         op_mode = VOIDmode;
 820
 821       offset = bitregion_start / BITS_PER_UNIT;
 822       bitnum -= bitregion_start;
 823       bitregion_end -= bitregion_start;
 824       bitregion_start = 0;
 825       bestmode = get_best_mode (bitsize, bitnum,
 826                                 bitregion_start, bitregion_end,
 827                                 MEM_ALIGN (str_rtx),
 828                                 op_mode,
 829                                 MEM_VOLATILE_P (str_rtx));
 830       str_rtx = adjust_address (str_rtx, bestmode, offset);
 831     }
 832
 833   if (!store_bit_field_1 (str_rtx, bitsize, bitnum,
 834                           bitregion_start, bitregion_end,
 835                           fieldmode, value, true))
 836     gcc_unreachable ();
 837 }
 838 \f
 839 /* Use shifts and boolean operations to store VALUE
 840    into a bit field of width BITSIZE
 841    in a memory location specified by OP0 except offset by OFFSET bytes.
 842      (OFFSET must be 0 if OP0 is a register.)
 843    The field starts at position BITPOS within the byte.
 844     (If OP0 is a register, it may be a full word or a narrower mode,
 845      but BITPOS still counts within a full word,
 846      which is significant on bigendian machines.)  */
 847
 848 static void
 849 store_fixed_bit_field (rtx op0, unsigned HOST_WIDE_INT offset,
 850                        unsigned HOST_WIDE_INT bitsize,
 851                        unsigned HOST_WIDE_INT bitpos,
 852                        unsigned HOST_WIDE_INT bitregion_start,
 853                        unsigned HOST_WIDE_INT bitregion_end,
 854                        rtx value)
 855 {
 856   enum machine_mode mode;
 857   unsigned int total_bits = BITS_PER_WORD;
 858   rtx temp;
 859   int all_zero = 0;
 860   int all_one = 0;
 861
 862   /* There is a case not handled here:
 863      a structure with a known alignment of just a halfword
 864      and a field split across two aligned halfwords within the structure.
 865      Or likewise a structure with a known alignment of just a byte
 866      and a field split across two bytes.
 867      Such cases are not supposed to be able to occur.  */
 868
 869   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
 870     {
 871       gcc_assert (!offset);
 872       /* Special treatment for a bit field split across two registers.  */
 873       if (bitsize + bitpos > BITS_PER_WORD)
 874         {
 875           store_split_bit_field (op0, bitsize, bitpos,
 876                                  bitregion_start, bitregion_end,
 877                                  value);
 878           return;
 879         }
 880     }
 881   else
 882     {
 883       unsigned HOST_WIDE_INT maxbits = MAX_FIXED_MODE_SIZE;
 884
 885       if (bitregion_end)
 886         maxbits = bitregion_end - bitregion_start + 1;
 887
 888       /* Get the proper mode to use for this field.  We want a mode that
 889          includes the entire field.  If such a mode would be larger than
 890          a word, we won't be doing the extraction the normal way.
 891          We don't want a mode bigger than the destination.  */
 892
 893       mode = GET_MODE (op0);
 894       if (GET_MODE_BITSIZE (mode) == 0
 895           || GET_MODE_BITSIZE (mode) > GET_MODE_BITSIZE (word_mode))
 896         mode = word_mode;
 897
 898       if (MEM_VOLATILE_P (op0)
 899           && GET_MODE_BITSIZE (GET_MODE (op0)) > 0
 900           && GET_MODE_BITSIZE (GET_MODE (op0)) <= maxbits
 901           && flag_strict_volatile_bitfields > 0)
 902         mode = GET_MODE (op0);
 903       else
 904         mode = get_best_mode (bitsize, bitpos + offset * BITS_PER_UNIT,
 905                               bitregion_start, bitregion_end,
 906                               MEM_ALIGN (op0), mode, MEM_VOLATILE_P (op0));
 907
 908       if (mode == VOIDmode)
 909         {
 910           /* The only way this should occur is if the field spans word
 911              boundaries.  */
 912           store_split_bit_field (op0, bitsize, bitpos + offset * BITS_PER_UNIT,
 913                                  bitregion_start, bitregion_end, value);
 914           return;
 915         }
 916
 917       total_bits = GET_MODE_BITSIZE (mode);
 918
 919       /* Make sure bitpos is valid for the chosen mode.  Adjust BITPOS to
 920          be in the range 0 to total_bits-1, and put any excess bytes in
 921          OFFSET.  */
 922       if (bitpos >= total_bits)
 923         {
 924           offset += (bitpos / total_bits) * (total_bits / BITS_PER_UNIT);
 925           bitpos -= ((bitpos / total_bits) * (total_bits / BITS_PER_UNIT)
 926                      * BITS_PER_UNIT);
 927         }
 928
 929       /* Get ref to an aligned byte, halfword, or word containing the field.
 930          Adjust BITPOS to be position within a word,
 931          and OFFSET to be the offset of that word.
 932          Then alter OP0 to refer to that word.  */
 933       bitpos += (offset % (total_bits / BITS_PER_UNIT)) * BITS_PER_UNIT;
 934       offset -= (offset % (total_bits / BITS_PER_UNIT));
 935       op0 = adjust_address (op0, mode, offset);
 936     }
 937
 938   mode = GET_MODE (op0);
 939
 940   /* Now MODE is either some integral mode for a MEM as OP0,
 941      or is a full-word for a REG as OP0.  TOTAL_BITS corresponds.
 942      The bit field is contained entirely within OP0.
 943      BITPOS is the starting bit number within OP0.
 944      (OP0's mode may actually be narrower than MODE.)  */
 945
 946   if (BYTES_BIG_ENDIAN)
 947       /* BITPOS is the distance between our msb
 948          and that of the containing datum.
 949          Convert it to the distance from the lsb.  */
 950       bitpos = total_bits - bitsize - bitpos;
 951
 952   /* Now BITPOS is always the distance between our lsb
 953      and that of OP0.  */
 954
 955   /* Shift VALUE left by BITPOS bits.  If VALUE is not constant,
 956      we must first convert its mode to MODE.  */
 957
 958   if (CONST_INT_P (value))
 959     {
 960       HOST_WIDE_INT v = INTVAL (value);
 961
 962       if (bitsize < HOST_BITS_PER_WIDE_INT)
 963         v &= ((HOST_WIDE_INT) 1 << bitsize) - 1;
 964
 965       if (v == 0)
 966         all_zero = 1;
 967       else if ((bitsize < HOST_BITS_PER_WIDE_INT
 968                 && v == ((HOST_WIDE_INT) 1 << bitsize) - 1)
 969                || (bitsize == HOST_BITS_PER_WIDE_INT && v == -1))
 970         all_one = 1;
 971
 972       value = lshift_value (mode, value, bitpos, bitsize);
 973     }
 974   else
 975     {
 976       int must_and = (GET_MODE_BITSIZE (GET_MODE (value)) != bitsize
 977                       && bitpos + bitsize != GET_MODE_BITSIZE (mode));
 978
 979       if (GET_MODE (value) != mode)
 980         value = convert_to_mode (mode, value, 1);
 981
 982       if (must_and)
 983         value = expand_binop (mode, and_optab, value,
 984                               mask_rtx (mode, 0, bitsize, 0),
 985                               NULL_RTX, 1, OPTAB_LIB_WIDEN);
 986       if (bitpos > 0)
 987         value = expand_shift (LSHIFT_EXPR, mode, value,
 988                               bitpos, NULL_RTX, 1);
 989     }
 990
 991   /* Now clear the chosen bits in OP0,
 992      except that if VALUE is -1 we need not bother.  */
 993   /* We keep the intermediates in registers to allow CSE to combine
 994      consecutive bitfield assignments.  */
 995
 996   temp = force_reg (mode, op0);
 997
 998   if (! all_one)
 999     {
1000       temp = expand_binop (mode, and_optab, temp,
1001                            mask_rtx (mode, bitpos, bitsize, 1),
1002                            NULL_RTX, 1, OPTAB_LIB_WIDEN);
1003       temp = force_reg (mode, temp);
1004     }
1005
1006   /* Now logical-or VALUE into OP0, unless it is zero.  */
1007
1008   if (! all_zero)
1009     {
1010       temp = expand_binop (mode, ior_optab, temp, value,
1011                            NULL_RTX, 1, OPTAB_LIB_WIDEN);
1012       temp = force_reg (mode, temp);
1013     }
1014
1015   if (op0 != temp)
1016     {
1017       op0 = copy_rtx (op0);
1018       emit_move_insn (op0, temp);
1019     }
1020 }
1021 \f
1022 /* Store a bit field that is split across multiple accessible memory objects.
1023
1024    OP0 is the REG, SUBREG or MEM rtx for the first of the objects.
1025    BITSIZE is the field width; BITPOS the position of its first bit
1026    (within the word).
1027    VALUE is the value to store.
1028
1029    This does not yet handle fields wider than BITS_PER_WORD.  */
1030
1031 static void
1032 store_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1033                        unsigned HOST_WIDE_INT bitpos,
1034                        unsigned HOST_WIDE_INT bitregion_start,
1035                        unsigned HOST_WIDE_INT bitregion_end,
1036                        rtx value)
1037 {
1038   unsigned int unit;
1039   unsigned int bitsdone = 0;
1040
1041   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1042      much at a time.  */
1043   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
1044     unit = BITS_PER_WORD;
1045   else
1046     unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1047
1048   /* If VALUE is a constant other than a CONST_INT, get it into a register in
1049      WORD_MODE.  If we can do this using gen_lowpart_common, do so.  Note
1050      that VALUE might be a floating-point constant.  */
1051   if (CONSTANT_P (value) && !CONST_INT_P (value))
1052     {
1053       rtx word = gen_lowpart_common (word_mode, value);
1054
1055       if (word && (value != word))
1056         value = word;
1057       else
1058         value = gen_lowpart_common (word_mode,
1059                                     force_reg (GET_MODE (value) != VOIDmode
1060                                                ? GET_MODE (value)
1061                                                : word_mode, value));
1062     }
1063
1064   while (bitsdone < bitsize)
1065     {
1066       unsigned HOST_WIDE_INT thissize;
1067       rtx part, word;
1068       unsigned HOST_WIDE_INT thispos;
1069       unsigned HOST_WIDE_INT offset;
1070
1071       offset = (bitpos + bitsdone) / unit;
1072       thispos = (bitpos + bitsdone) % unit;
1073
1074       /* THISSIZE must not overrun a word boundary.  Otherwise,
1075          store_fixed_bit_field will call us again, and we will mutually
1076          recurse forever.  */
1077       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1078       thissize = MIN (thissize, unit - thispos);
1079
1080       if (BYTES_BIG_ENDIAN)
1081         {
1082           int total_bits;
1083
1084           /* We must do an endian conversion exactly the same way as it is
1085              done in extract_bit_field, so that the two calls to
1086              extract_fixed_bit_field will have comparable arguments.  */
1087           if (!MEM_P (value) || GET_MODE (value) == BLKmode)
1088             total_bits = BITS_PER_WORD;
1089           else
1090             total_bits = GET_MODE_BITSIZE (GET_MODE (value));
1091
1092           /* Fetch successively less significant portions.  */
1093           if (CONST_INT_P (value))
1094             part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1095                              >> (bitsize - bitsdone - thissize))
1096                             & (((HOST_WIDE_INT) 1 << thissize) - 1));
1097           else
1098             /* The args are chosen so that the last part includes the
1099                lsb.  Give extract_bit_field the value it needs (with
1100                endianness compensation) to fetch the piece we want.  */
1101             part = extract_fixed_bit_field (word_mode, value, 0, thissize,
1102                                             total_bits - bitsize + bitsdone,
1103                                             NULL_RTX, 1, false);
1104         }
1105       else
1106         {
1107           /* Fetch successively more significant portions.  */
1108           if (CONST_INT_P (value))
1109             part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1110                              >> bitsdone)
1111                             & (((HOST_WIDE_INT) 1 << thissize) - 1));
1112           else
1113             part = extract_fixed_bit_field (word_mode, value, 0, thissize,
1114                                             bitsdone, NULL_RTX, 1, false);
1115         }
1116
1117       /* If OP0 is a register, then handle OFFSET here.
1118
1119          When handling multiword bitfields, extract_bit_field may pass
1120          down a word_mode SUBREG of a larger REG for a bitfield that actually
1121          crosses a word boundary.  Thus, for a SUBREG, we must find
1122          the current word starting from the base register.  */
1123       if (GET_CODE (op0) == SUBREG)
1124         {
1125           int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset;
1126           enum machine_mode sub_mode = GET_MODE (SUBREG_REG (op0));
1127           if (sub_mode != BLKmode && GET_MODE_SIZE (sub_mode) < UNITS_PER_WORD)
1128             word = word_offset ? const0_rtx : op0;
1129           else
1130             word = operand_subword_force (SUBREG_REG (op0), word_offset,
1131                                           GET_MODE (SUBREG_REG (op0)));
1132           offset = 0;
1133         }
1134       else if (REG_P (op0))
1135         {
1136           enum machine_mode op0_mode = GET_MODE (op0);
1137           if (op0_mode != BLKmode && GET_MODE_SIZE (op0_mode) < UNITS_PER_WORD)
1138             word = offset ? const0_rtx : op0;
1139           else
1140             word = operand_subword_force (op0, offset, GET_MODE (op0));
1141           offset = 0;
1142         }
1143       else
1144         word = op0;
1145
1146       /* OFFSET is in UNITs, and UNIT is in bits.
1147          store_fixed_bit_field wants offset in bytes.  If WORD is const0_rtx,
1148          it is just an out-of-bounds access.  Ignore it.  */
1149       if (word != const0_rtx)
1150         store_fixed_bit_field (word, offset * unit / BITS_PER_UNIT, thissize,
1151                                thispos, bitregion_start, bitregion_end, part);
1152       bitsdone += thissize;
1153     }
1154 }
1155 \f
1156 /* A subroutine of extract_bit_field_1 that converts return value X
1157    to either MODE or TMODE.  MODE, TMODE and UNSIGNEDP are arguments
1158    to extract_bit_field.  */
1159
1160 static rtx
1161 convert_extracted_bit_field (rtx x, enum machine_mode mode,
1162                              enum machine_mode tmode, bool unsignedp)
1163 {
1164   if (GET_MODE (x) == tmode || GET_MODE (x) == mode)
1165     return x;
1166
1167   /* If the x mode is not a scalar integral, first convert to the
1168      integer mode of that size and then access it as a floating-point
1169      value via a SUBREG.  */
1170   if (!SCALAR_INT_MODE_P (tmode))
1171     {
1172       enum machine_mode smode;
1173
1174       smode = mode_for_size (GET_MODE_BITSIZE (tmode), MODE_INT, 0);
1175       x = convert_to_mode (smode, x, unsignedp);
1176       x = force_reg (smode, x);
1177       return gen_lowpart (tmode, x);
1178     }
1179
1180   return convert_to_mode (tmode, x, unsignedp);
1181 }
1182
1183 /* A subroutine of extract_bit_field, with the same arguments.
1184    If FALLBACK_P is true, fall back to extract_fixed_bit_field
1185    if we can find no other means of implementing the operation.
1186    if FALLBACK_P is false, return NULL instead.  */
1187
1188 static rtx
1189 extract_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1190                      unsigned HOST_WIDE_INT bitnum,
1191                      int unsignedp, bool packedp, rtx target,
1192                      enum machine_mode mode, enum machine_mode tmode,
1193                      bool fallback_p)
1194 {
1195   unsigned int unit
1196     = (MEM_P (str_rtx)) ? BITS_PER_UNIT : BITS_PER_WORD;
1197   unsigned HOST_WIDE_INT offset, bitpos;
1198   rtx op0 = str_rtx;
1199   enum machine_mode int_mode;
1200   enum machine_mode ext_mode;
1201   enum machine_mode mode1;
1202   int byte_offset;
1203
1204   if (tmode == VOIDmode)
1205     tmode = mode;
1206
1207   while (GET_CODE (op0) == SUBREG)
1208     {
1209       bitnum += SUBREG_BYTE (op0) * BITS_PER_UNIT;
1210       op0 = SUBREG_REG (op0);
1211     }
1212
1213   /* If we have an out-of-bounds access to a register, just return an
1214      uninitialized register of the required mode.  This can occur if the
1215      source code contains an out-of-bounds access to a small array.  */
1216   if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
1217     return gen_reg_rtx (tmode);
1218
1219   if (REG_P (op0)
1220       && mode == GET_MODE (op0)
1221       && bitnum == 0
1222       && bitsize == GET_MODE_BITSIZE (GET_MODE (op0)))
1223     {
1224       /* We're trying to extract a full register from itself.  */
1225       return op0;
1226     }
1227
1228   /* See if we can get a better vector mode before extracting.  */
1229   if (VECTOR_MODE_P (GET_MODE (op0))
1230       && !MEM_P (op0)
1231       && GET_MODE_INNER (GET_MODE (op0)) != tmode)
1232     {
1233       enum machine_mode new_mode;
1234
1235       if (GET_MODE_CLASS (tmode) == MODE_FLOAT)
1236         new_mode = MIN_MODE_VECTOR_FLOAT;
1237       else if (GET_MODE_CLASS (tmode) == MODE_FRACT)
1238         new_mode = MIN_MODE_VECTOR_FRACT;
1239       else if (GET_MODE_CLASS (tmode) == MODE_UFRACT)
1240         new_mode = MIN_MODE_VECTOR_UFRACT;
1241       else if (GET_MODE_CLASS (tmode) == MODE_ACCUM)
1242         new_mode = MIN_MODE_VECTOR_ACCUM;
1243       else if (GET_MODE_CLASS (tmode) == MODE_UACCUM)
1244         new_mode = MIN_MODE_VECTOR_UACCUM;
1245       else
1246         new_mode = MIN_MODE_VECTOR_INT;
1247
1248       for (; new_mode != VOIDmode ; new_mode = GET_MODE_WIDER_MODE (new_mode))
1249         if (GET_MODE_SIZE (new_mode) == GET_MODE_SIZE (GET_MODE (op0))
1250             && targetm.vector_mode_supported_p (new_mode))
1251           break;
1252       if (new_mode != VOIDmode)
1253         op0 = gen_lowpart (new_mode, op0);
1254     }
1255
1256   /* Use vec_extract patterns for extracting parts of vectors whenever
1257      available.  */
1258   if (VECTOR_MODE_P (GET_MODE (op0))
1259       && !MEM_P (op0)
1260       && optab_handler (vec_extract_optab, GET_MODE (op0)) != CODE_FOR_nothing
1261       && ((bitnum + bitsize - 1) / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
1262           == bitnum / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
1263     {
1264       struct expand_operand ops[3];
1265       enum machine_mode outermode = GET_MODE (op0);
1266       enum machine_mode innermode = GET_MODE_INNER (outermode);
1267       enum insn_code icode = optab_handler (vec_extract_optab, outermode);
1268       unsigned HOST_WIDE_INT pos = bitnum / GET_MODE_BITSIZE (innermode);
1269
1270       create_output_operand (&ops[0], target, innermode);
1271       create_input_operand (&ops[1], op0, outermode);
1272       create_integer_operand (&ops[2], pos);
1273       if (maybe_expand_insn (icode, 3, ops))
1274         {
1275           target = ops[0].value;
1276           if (GET_MODE (target) != mode)
1277             return gen_lowpart (tmode, target);
1278           return target;
1279         }
1280     }
1281
1282   /* Make sure we are playing with integral modes.  Pun with subregs
1283      if we aren't.  */
1284   {
1285     enum machine_mode imode = int_mode_for_mode (GET_MODE (op0));
1286     if (imode != GET_MODE (op0))
1287       {
1288         if (MEM_P (op0))
1289           op0 = adjust_address (op0, imode, 0);
1290         else if (imode != BLKmode)
1291           {
1292             op0 = gen_lowpart (imode, op0);
1293
1294             /* If we got a SUBREG, force it into a register since we
1295                aren't going to be able to do another SUBREG on it.  */
1296             if (GET_CODE (op0) == SUBREG)
1297               op0 = force_reg (imode, op0);
1298           }
1299         else if (REG_P (op0))
1300           {
1301             rtx reg, subreg;
1302             imode = smallest_mode_for_size (GET_MODE_BITSIZE (GET_MODE (op0)),
1303                                             MODE_INT);
1304             reg = gen_reg_rtx (imode);
1305             subreg = gen_lowpart_SUBREG (GET_MODE (op0), reg);
1306             emit_move_insn (subreg, op0);
1307             op0 = reg;
1308             bitnum += SUBREG_BYTE (subreg) * BITS_PER_UNIT;
1309           }
1310         else
1311           {
1312             rtx mem = assign_stack_temp (GET_MODE (op0),
1313                                          GET_MODE_SIZE (GET_MODE (op0)), 0);
1314             emit_move_insn (mem, op0);
1315             op0 = adjust_address (mem, BLKmode, 0);
1316           }
1317       }
1318   }
1319
1320   /* We may be accessing data outside the field, which means
1321      we can alias adjacent data.  */
1322   if (MEM_P (op0))
1323     {
1324       op0 = shallow_copy_rtx (op0);
1325       set_mem_alias_set (op0, 0);
1326       set_mem_expr (op0, 0);
1327     }
1328
1329   /* Extraction of a full-word or multi-word value from a structure
1330      in a register or aligned memory can be done with just a SUBREG.
1331      A subword value in the least significant part of a register
1332      can also be extracted with a SUBREG.  For this, we need the
1333      byte offset of the value in op0.  */
1334
1335   bitpos = bitnum % unit;
1336   offset = bitnum / unit;
1337   byte_offset = bitpos / BITS_PER_UNIT + offset * UNITS_PER_WORD;
1338
1339   /* If OP0 is a register, BITPOS must count within a word.
1340      But as we have it, it counts within whatever size OP0 now has.
1341      On a bigendian machine, these are not the same, so convert.  */
1342   if (BYTES_BIG_ENDIAN
1343       && !MEM_P (op0)
1344       && unit > GET_MODE_BITSIZE (GET_MODE (op0)))
1345     bitpos += unit - GET_MODE_BITSIZE (GET_MODE (op0));
1346
1347   /* ??? We currently assume TARGET is at least as big as BITSIZE.
1348      If that's wrong, the solution is to test for it and set TARGET to 0
1349      if needed.  */
1350
1351   /* Only scalar integer modes can be converted via subregs.  There is an
1352      additional problem for FP modes here in that they can have a precision
1353      which is different from the size.  mode_for_size uses precision, but
1354      we want a mode based on the size, so we must avoid calling it for FP
1355      modes.  */
1356   mode1  = (SCALAR_INT_MODE_P (tmode)
1357             ? mode_for_size (bitsize, GET_MODE_CLASS (tmode), 0)
1358             : mode);
1359
1360   /* If the bitfield is volatile, we need to make sure the access
1361      remains on a type-aligned boundary.  */
1362   if (GET_CODE (op0) == MEM
1363       && MEM_VOLATILE_P (op0)
1364       && GET_MODE_BITSIZE (GET_MODE (op0)) > 0
1365       && flag_strict_volatile_bitfields > 0)
1366     goto no_subreg_mode_swap;
1367
1368   if (((bitsize >= BITS_PER_WORD && bitsize == GET_MODE_BITSIZE (mode)
1369         && bitpos % BITS_PER_WORD == 0)
1370        || (mode1 != BLKmode
1371            /* ??? The big endian test here is wrong.  This is correct
1372               if the value is in a register, and if mode_for_size is not
1373               the same mode as op0.  This causes us to get unnecessarily
1374               inefficient code from the Thumb port when -mbig-endian.  */
1375            && (BYTES_BIG_ENDIAN
1376                ? bitpos + bitsize == BITS_PER_WORD
1377                : bitpos == 0)))
1378       && ((!MEM_P (op0)
1379            && TRULY_NOOP_TRUNCATION_MODES_P (mode1, GET_MODE (op0))
1380            && GET_MODE_SIZE (mode1) != 0
1381            && byte_offset % GET_MODE_SIZE (mode1) == 0)
1382           || (MEM_P (op0)
1383               && (! SLOW_UNALIGNED_ACCESS (mode, MEM_ALIGN (op0))
1384                   || (offset * BITS_PER_UNIT % bitsize == 0
1385                       && MEM_ALIGN (op0) % bitsize == 0)))))
1386     {
1387       if (MEM_P (op0))
1388         op0 = adjust_address (op0, mode1, offset);
1389       else if (mode1 != GET_MODE (op0))
1390         {
1391           rtx sub = simplify_gen_subreg (mode1, op0, GET_MODE (op0),
1392                                          byte_offset);
1393           if (sub == NULL)
1394             goto no_subreg_mode_swap;
1395           op0 = sub;
1396         }
1397       if (mode1 != mode)
1398         return convert_to_mode (tmode, op0, unsignedp);
1399       return op0;
1400     }
1401  no_subreg_mode_swap:
1402
1403   /* Handle fields bigger than a word.  */
1404
1405   if (bitsize > BITS_PER_WORD)
1406     {
1407       /* Here we transfer the words of the field
1408          in the order least significant first.
1409          This is because the most significant word is the one which may
1410          be less than full.  */
1411
1412       unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
1413       unsigned int i;
1414
1415       if (target == 0 || !REG_P (target) || !valid_multiword_target_p (target))
1416         target = gen_reg_rtx (mode);
1417
1418       /* Indicate for flow that the entire target reg is being set.  */
1419       emit_clobber (target);
1420
1421       for (i = 0; i < nwords; i++)
1422         {
1423           /* If I is 0, use the low-order word in both field and target;
1424              if I is 1, use the next to lowest word; and so on.  */
1425           /* Word number in TARGET to use.  */
1426           unsigned int wordnum
1427             = (WORDS_BIG_ENDIAN
1428                ? GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD - i - 1
1429                : i);
1430           /* Offset from start of field in OP0.  */
1431           unsigned int bit_offset = (WORDS_BIG_ENDIAN
1432                                      ? MAX (0, ((int) bitsize - ((int) i + 1)
1433                                                 * (int) BITS_PER_WORD))
1434                                      : (int) i * BITS_PER_WORD);
1435           rtx target_part = operand_subword (target, wordnum, 1, VOIDmode);
1436           rtx result_part
1437             = extract_bit_field (op0, MIN (BITS_PER_WORD,
1438                                            bitsize - i * BITS_PER_WORD),
1439                                  bitnum + bit_offset, 1, false, target_part, mode,
1440                                  word_mode);
1441
1442           gcc_assert (target_part);
1443
1444           if (result_part != target_part)
1445             emit_move_insn (target_part, result_part);
1446         }
1447
1448       if (unsignedp)
1449         {
1450           /* Unless we've filled TARGET, the upper regs in a multi-reg value
1451              need to be zero'd out.  */
1452           if (GET_MODE_SIZE (GET_MODE (target)) > nwords * UNITS_PER_WORD)
1453             {
1454               unsigned int i, total_words;
1455
1456               total_words = GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD;
1457               for (i = nwords; i < total_words; i++)
1458                 emit_move_insn
1459                   (operand_subword (target,
1460                                     WORDS_BIG_ENDIAN ? total_words - i - 1 : i,
1461                                     1, VOIDmode),
1462                    const0_rtx);
1463             }
1464           return target;
1465         }
1466
1467       /* Signed bit field: sign-extend with two arithmetic shifts.  */
1468       target = expand_shift (LSHIFT_EXPR, mode, target,
1469                              GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1470       return expand_shift (RSHIFT_EXPR, mode, target,
1471                            GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1472     }
1473
1474   /* From here on we know the desired field is smaller than a word.  */
1475
1476   /* Check if there is a correspondingly-sized integer field, so we can
1477      safely extract it as one size of integer, if necessary; then
1478      truncate or extend to the size that is wanted; then use SUBREGs or
1479      convert_to_mode to get one of the modes we really wanted.  */
1480
1481   int_mode = int_mode_for_mode (tmode);
1482   if (int_mode == BLKmode)
1483     int_mode = int_mode_for_mode (mode);
1484   /* Should probably push op0 out to memory and then do a load.  */
1485   gcc_assert (int_mode != BLKmode);
1486
1487   /* OFFSET is the number of words or bytes (UNIT says which)
1488      from STR_RTX to the first word or byte containing part of the field.  */
1489   if (!MEM_P (op0))
1490     {
1491       if (offset != 0
1492           || GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
1493         {
1494           if (!REG_P (op0))
1495             op0 = copy_to_reg (op0);
1496           op0 = gen_rtx_SUBREG (mode_for_size (BITS_PER_WORD, MODE_INT, 0),
1497                                 op0, (offset * UNITS_PER_WORD));
1498         }
1499       offset = 0;
1500     }
1501
1502   /* Now OFFSET is nonzero only for memory operands.  */
1503   ext_mode = mode_for_extraction (unsignedp ? EP_extzv : EP_extv, 0);
1504   if (ext_mode != MAX_MACHINE_MODE
1505       && bitsize > 0
1506       && GET_MODE_BITSIZE (ext_mode) >= bitsize
1507       /* If op0 is a register, we need it in EXT_MODE to make it
1508          acceptable to the format of ext(z)v.  */
1509       && !(GET_CODE (op0) == SUBREG && GET_MODE (op0) != ext_mode)
1510       && !((REG_P (op0) || GET_CODE (op0) == SUBREG)
1511            && (bitsize + bitpos > GET_MODE_BITSIZE (ext_mode))))
1512     {
1513       struct expand_operand ops[4];
1514       unsigned HOST_WIDE_INT xbitpos = bitpos, xoffset = offset;
1515       rtx xop0 = op0;
1516       rtx xtarget = target;
1517       rtx xspec_target = target;
1518       rtx xspec_target_subreg = 0;
1519
1520       /* If op0 is a register, we need it in EXT_MODE to make it
1521          acceptable to the format of ext(z)v.  */
1522       if (REG_P (xop0) && GET_MODE (xop0) != ext_mode)
1523         xop0 = gen_lowpart_SUBREG (ext_mode, xop0);
1524       if (MEM_P (xop0))
1525         /* Get ref to first byte containing part of the field.  */
1526         xop0 = adjust_address (xop0, byte_mode, xoffset);
1527
1528       /* On big-endian machines, we count bits from the most significant.
1529          If the bit field insn does not, we must invert.  */
1530       if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1531         xbitpos = unit - bitsize - xbitpos;
1532
1533       /* Now convert from counting within UNIT to counting in EXT_MODE.  */
1534       if (BITS_BIG_ENDIAN && !MEM_P (xop0))
1535         xbitpos += GET_MODE_BITSIZE (ext_mode) - unit;
1536
1537       unit = GET_MODE_BITSIZE (ext_mode);
1538
1539       if (xtarget == 0)
1540         xtarget = xspec_target = gen_reg_rtx (tmode);
1541
1542       if (GET_MODE (xtarget) != ext_mode)
1543         {
1544           /* Don't use LHS paradoxical subreg if explicit truncation is needed
1545              between the mode of the extraction (word_mode) and the target
1546              mode.  Instead, create a temporary and use convert_move to set
1547              the target.  */
1548           if (REG_P (xtarget)
1549               && TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (xtarget), ext_mode))
1550             {
1551               xtarget = gen_lowpart (ext_mode, xtarget);
1552               if (GET_MODE_PRECISION (ext_mode)
1553                   > GET_MODE_PRECISION (GET_MODE (xspec_target)))
1554                 xspec_target_subreg = xtarget;
1555             }
1556           else
1557             xtarget = gen_reg_rtx (ext_mode);
1558         }
1559
1560       create_output_operand (&ops[0], xtarget, ext_mode);
1561       create_fixed_operand (&ops[1], xop0);
1562       create_integer_operand (&ops[2], bitsize);
1563       create_integer_operand (&ops[3], xbitpos);
1564       if (maybe_expand_insn (unsignedp ? CODE_FOR_extzv : CODE_FOR_extv,
1565                              4, ops))
1566         {
1567           xtarget = ops[0].value;
1568           if (xtarget == xspec_target)
1569             return xtarget;
1570           if (xtarget == xspec_target_subreg)
1571             return xspec_target;
1572           return convert_extracted_bit_field (xtarget, mode, tmode, unsignedp);
1573         }
1574     }
1575
1576   /* If OP0 is a memory, try copying it to a register and seeing if a
1577      cheap register alternative is available.  */
1578   if (ext_mode != MAX_MACHINE_MODE && MEM_P (op0))
1579     {
1580       enum machine_mode bestmode;
1581
1582       /* Get the mode to use for inserting into this field.  If
1583          OP0 is BLKmode, get the smallest mode consistent with the
1584          alignment. If OP0 is a non-BLKmode object that is no
1585          wider than EXT_MODE, use its mode. Otherwise, use the
1586          smallest mode containing the field.  */
1587
1588       if (GET_MODE (op0) == BLKmode
1589           || (ext_mode != MAX_MACHINE_MODE
1590               && GET_MODE_SIZE (GET_MODE (op0)) > GET_MODE_SIZE (ext_mode)))
1591         bestmode = get_best_mode (bitsize, bitnum, 0, 0, MEM_ALIGN (op0),
1592                                   (ext_mode == MAX_MACHINE_MODE
1593                                    ? VOIDmode : ext_mode),
1594                                   MEM_VOLATILE_P (op0));
1595       else
1596         bestmode = GET_MODE (op0);
1597
1598       if (bestmode != VOIDmode
1599           && !(SLOW_UNALIGNED_ACCESS (bestmode, MEM_ALIGN (op0))
1600                && GET_MODE_BITSIZE (bestmode) > MEM_ALIGN (op0)))
1601         {
1602           unsigned HOST_WIDE_INT xoffset, xbitpos;
1603
1604           /* Compute the offset as a multiple of this unit,
1605              counting in bytes.  */
1606           unit = GET_MODE_BITSIZE (bestmode);
1607           xoffset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
1608           xbitpos = bitnum % unit;
1609
1610           /* Make sure the register is big enough for the whole field.  */
1611           if (xoffset * BITS_PER_UNIT + unit
1612               >= offset * BITS_PER_UNIT + bitsize)
1613             {
1614               rtx last, result, xop0;
1615
1616               last = get_last_insn ();
1617
1618               /* Fetch it to a register in that size.  */
1619               xop0 = adjust_address (op0, bestmode, xoffset);
1620               xop0 = force_reg (bestmode, xop0);
1621               result = extract_bit_field_1 (xop0, bitsize, xbitpos,
1622                                             unsignedp, packedp, target,
1623                                             mode, tmode, false);
1624               if (result)
1625                 return result;
1626
1627               delete_insns_since (last);
1628             }
1629         }
1630     }
1631
1632   if (!fallback_p)
1633     return NULL;
1634
1635   target = extract_fixed_bit_field (int_mode, op0, offset, bitsize,
1636                                     bitpos, target, unsignedp, packedp);
1637   return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1638 }
1639
1640 /* Generate code to extract a byte-field from STR_RTX
1641    containing BITSIZE bits, starting at BITNUM,
1642    and put it in TARGET if possible (if TARGET is nonzero).
1643    Regardless of TARGET, we return the rtx for where the value is placed.
1644
1645    STR_RTX is the structure containing the byte (a REG or MEM).
1646    UNSIGNEDP is nonzero if this is an unsigned bit field.
1647    PACKEDP is nonzero if the field has the packed attribute.
1648    MODE is the natural mode of the field value once extracted.
1649    TMODE is the mode the caller would like the value to have;
1650    but the value may be returned with type MODE instead.
1651
1652    If a TARGET is specified and we can store in it at no extra cost,
1653    we do so, and return TARGET.
1654    Otherwise, we return a REG of mode TMODE or MODE, with TMODE preferred
1655    if they are equally easy.  */
1656
1657 rtx
1658 extract_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1659                    unsigned HOST_WIDE_INT bitnum, int unsignedp, bool packedp,
1660                    rtx target, enum machine_mode mode, enum machine_mode tmode)
1661 {
1662   return extract_bit_field_1 (str_rtx, bitsize, bitnum, unsignedp, packedp,
1663                               target, mode, tmode, true);
1664 }
1665 \f
1666 /* Extract a bit field using shifts and boolean operations
1667    Returns an rtx to represent the value.
1668    OP0 addresses a register (word) or memory (byte).
1669    BITPOS says which bit within the word or byte the bit field starts in.
1670    OFFSET says how many bytes farther the bit field starts;
1671     it is 0 if OP0 is a register.
1672    BITSIZE says how many bits long the bit field is.
1673     (If OP0 is a register, it may be narrower than a full word,
1674      but BITPOS still counts within a full word,
1675      which is significant on bigendian machines.)
1676
1677    UNSIGNEDP is nonzero for an unsigned bit field (don't sign-extend value).
1678    PACKEDP is true if the field has the packed attribute.
1679
1680    If TARGET is nonzero, attempts to store the value there
1681    and return TARGET, but this is not guaranteed.
1682    If TARGET is not used, create a pseudo-reg of mode TMODE for the value.  */
1683
1684 static rtx
1685 extract_fixed_bit_field (enum machine_mode tmode, rtx op0,
1686                          unsigned HOST_WIDE_INT offset,
1687                          unsigned HOST_WIDE_INT bitsize,
1688                          unsigned HOST_WIDE_INT bitpos, rtx target,
1689                          int unsignedp, bool packedp)
1690 {
1691   unsigned int total_bits = BITS_PER_WORD;
1692   enum machine_mode mode;
1693
1694   if (GET_CODE (op0) == SUBREG || REG_P (op0))
1695     {
1696       /* Special treatment for a bit field split across two registers.  */
1697       if (bitsize + bitpos > BITS_PER_WORD)
1698         return extract_split_bit_field (op0, bitsize, bitpos, unsignedp);
1699     }
1700   else
1701     {
1702       /* Get the proper mode to use for this field.  We want a mode that
1703          includes the entire field.  If such a mode would be larger than
1704          a word, we won't be doing the extraction the normal way.  */
1705
1706       if (MEM_VOLATILE_P (op0)
1707           && flag_strict_volatile_bitfields > 0)
1708         {
1709           if (GET_MODE_BITSIZE (GET_MODE (op0)) > 0)
1710             mode = GET_MODE (op0);
1711           else if (target && GET_MODE_BITSIZE (GET_MODE (target)) > 0)
1712             mode = GET_MODE (target);
1713           else
1714             mode = tmode;
1715         }
1716       else
1717         mode = get_best_mode (bitsize, bitpos + offset * BITS_PER_UNIT, 0, 0,
1718                               MEM_ALIGN (op0), word_mode, MEM_VOLATILE_P (op0));
1719
1720       if (mode == VOIDmode)
1721         /* The only way this should occur is if the field spans word
1722            boundaries.  */
1723         return extract_split_bit_field (op0, bitsize,
1724                                         bitpos + offset * BITS_PER_UNIT,
1725                                         unsignedp);
1726
1727       total_bits = GET_MODE_BITSIZE (mode);
1728
1729       /* Make sure bitpos is valid for the chosen mode.  Adjust BITPOS to
1730          be in the range 0 to total_bits-1, and put any excess bytes in
1731          OFFSET.  */
1732       if (bitpos >= total_bits)
1733         {
1734           offset += (bitpos / total_bits) * (total_bits / BITS_PER_UNIT);
1735           bitpos -= ((bitpos / total_bits) * (total_bits / BITS_PER_UNIT)
1736                      * BITS_PER_UNIT);
1737         }
1738
1739       /* If we're accessing a volatile MEM, we can't do the next
1740          alignment step if it results in a multi-word access where we
1741          otherwise wouldn't have one.  So, check for that case
1742          here.  */
1743       if (MEM_P (op0)
1744           && MEM_VOLATILE_P (op0)
1745           && flag_strict_volatile_bitfields > 0
1746           && bitpos + bitsize <= total_bits
1747           && bitpos + bitsize + (offset % (total_bits / BITS_PER_UNIT)) * BITS_PER_UNIT > total_bits)
1748         {
1749           if (STRICT_ALIGNMENT)
1750             {
1751               static bool informed_about_misalignment = false;
1752               bool warned;
1753
1754               if (packedp)
1755                 {
1756                   if (bitsize == total_bits)
1757                     warned = warning_at (input_location, OPT_fstrict_volatile_bitfields,
1758                                          "multiple accesses to volatile structure member"
1759                                          " because of packed attribute");
1760                   else
1761                     warned = warning_at (input_location, OPT_fstrict_volatile_bitfields,
1762                                          "multiple accesses to volatile structure bitfield"
1763                                          " because of packed attribute");
1764
1765                   return extract_split_bit_field (op0, bitsize,
1766                                                   bitpos + offset * BITS_PER_UNIT,
1767                                                   unsignedp);
1768                 }
1769
1770               if (bitsize == total_bits)
1771                 warned = warning_at (input_location, OPT_fstrict_volatile_bitfields,
1772                                      "mis-aligned access used for structure member");
1773               else
1774                 warned = warning_at (input_location, OPT_fstrict_volatile_bitfields,
1775                                      "mis-aligned access used for structure bitfield");
1776
1777               if (! informed_about_misalignment && warned)
1778                 {
1779                   informed_about_misalignment = true;
1780                   inform (input_location,
1781                           "when a volatile object spans multiple type-sized locations,"
1782                           " the compiler must choose between using a single mis-aligned access to"
1783                           " preserve the volatility, or using multiple aligned accesses to avoid"
1784                           " runtime faults; this code may fail at runtime if the hardware does"
1785                           " not allow this access");
1786                 }
1787             }
1788         }
1789       else
1790         {
1791
1792           /* Get ref to an aligned byte, halfword, or word containing the field.
1793              Adjust BITPOS to be position within a word,
1794              and OFFSET to be the offset of that word.
1795              Then alter OP0 to refer to that word.  */
1796           bitpos += (offset % (total_bits / BITS_PER_UNIT)) * BITS_PER_UNIT;
1797           offset -= (offset % (total_bits / BITS_PER_UNIT));
1798         }
1799
1800       op0 = adjust_address (op0, mode, offset);
1801     }
1802
1803   mode = GET_MODE (op0);
1804
1805   if (BYTES_BIG_ENDIAN)
1806     /* BITPOS is the distance between our msb and that of OP0.
1807        Convert it to the distance from the lsb.  */
1808     bitpos = total_bits - bitsize - bitpos;
1809
1810   /* Now BITPOS is always the distance between the field's lsb and that of OP0.
1811      We have reduced the big-endian case to the little-endian case.  */
1812
1813   if (unsignedp)
1814     {
1815       if (bitpos)
1816         {
1817           /* If the field does not already start at the lsb,
1818              shift it so it does.  */
1819           /* Maybe propagate the target for the shift.  */
1820           /* But not if we will return it--could confuse integrate.c.  */
1821           rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
1822           if (tmode != mode) subtarget = 0;
1823           op0 = expand_shift (RSHIFT_EXPR, mode, op0, bitpos, subtarget, 1);
1824         }
1825       /* Convert the value to the desired mode.  */
1826       if (mode != tmode)
1827         op0 = convert_to_mode (tmode, op0, 1);
1828
1829       /* Unless the msb of the field used to be the msb when we shifted,
1830          mask out the upper bits.  */
1831
1832       if (GET_MODE_BITSIZE (mode) != bitpos + bitsize)
1833         return expand_binop (GET_MODE (op0), and_optab, op0,
1834                              mask_rtx (GET_MODE (op0), 0, bitsize, 0),
1835                              target, 1, OPTAB_LIB_WIDEN);
1836       return op0;
1837     }
1838
1839   /* To extract a signed bit-field, first shift its msb to the msb of the word,
1840      then arithmetic-shift its lsb to the lsb of the word.  */
1841   op0 = force_reg (mode, op0);
1842
1843   /* Find the narrowest integer mode that contains the field.  */
1844
1845   for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT); mode != VOIDmode;
1846        mode = GET_MODE_WIDER_MODE (mode))
1847     if (GET_MODE_BITSIZE (mode) >= bitsize + bitpos)
1848       {
1849         op0 = convert_to_mode (mode, op0, 0);
1850         break;
1851       }
1852
1853   if (mode != tmode)
1854     target = 0;
1855
1856   if (GET_MODE_BITSIZE (mode) != (bitsize + bitpos))
1857     {
1858       int amount = GET_MODE_BITSIZE (mode) - (bitsize + bitpos);
1859       /* Maybe propagate the target for the shift.  */
1860       rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
1861       op0 = expand_shift (LSHIFT_EXPR, mode, op0, amount, subtarget, 1);
1862     }
1863
1864   return expand_shift (RSHIFT_EXPR, mode, op0,
1865                        GET_MODE_BITSIZE (mode) - bitsize, target, 0);
1866 }
1867 \f
1868 /* Return a constant integer (CONST_INT or CONST_DOUBLE) mask value
1869    of mode MODE with BITSIZE ones followed by BITPOS zeros, or the
1870    complement of that if COMPLEMENT.  The mask is truncated if
1871    necessary to the width of mode MODE.  The mask is zero-extended if
1872    BITSIZE+BITPOS is too small for MODE.  */
1873
1874 static rtx
1875 mask_rtx (enum machine_mode mode, int bitpos, int bitsize, int complement)
1876 {
1877   double_int mask;
1878
1879   mask = double_int_mask (bitsize);
1880   mask = double_int_lshift (mask, bitpos, HOST_BITS_PER_DOUBLE_INT, false);
1881
1882   if (complement)
1883     mask = double_int_not (mask);
1884
1885   return immed_double_int_const (mask, mode);
1886 }
1887
1888 /* Return a constant integer (CONST_INT or CONST_DOUBLE) rtx with the value
1889    VALUE truncated to BITSIZE bits and then shifted left BITPOS bits.  */
1890
1891 static rtx
1892 lshift_value (enum machine_mode mode, rtx value, int bitpos, int bitsize)
1893 {
1894   double_int val;
1895
1896   val = double_int_zext (uhwi_to_double_int (INTVAL (value)), bitsize);
1897   val = double_int_lshift (val, bitpos, HOST_BITS_PER_DOUBLE_INT, false);
1898
1899   return immed_double_int_const (val, mode);
1900 }
1901 \f
1902 /* Extract a bit field that is split across two words
1903    and return an RTX for the result.
1904
1905    OP0 is the REG, SUBREG or MEM rtx for the first of the two words.
1906    BITSIZE is the field width; BITPOS, position of its first bit, in the word.
1907    UNSIGNEDP is 1 if should zero-extend the contents; else sign-extend.  */
1908
1909 static rtx
1910 extract_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1911                          unsigned HOST_WIDE_INT bitpos, int unsignedp)
1912 {
1913   unsigned int unit;
1914   unsigned int bitsdone = 0;
1915   rtx result = NULL_RTX;
1916   int first = 1;
1917
1918   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1919      much at a time.  */
1920   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
1921     unit = BITS_PER_WORD;
1922   else
1923     unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1924
1925   while (bitsdone < bitsize)
1926     {
1927       unsigned HOST_WIDE_INT thissize;
1928       rtx part, word;
1929       unsigned HOST_WIDE_INT thispos;
1930       unsigned HOST_WIDE_INT offset;
1931
1932       offset = (bitpos + bitsdone) / unit;
1933       thispos = (bitpos + bitsdone) % unit;
1934
1935       /* THISSIZE must not overrun a word boundary.  Otherwise,
1936          extract_fixed_bit_field will call us again, and we will mutually
1937          recurse forever.  */
1938       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1939       thissize = MIN (thissize, unit - thispos);
1940
1941       /* If OP0 is a register, then handle OFFSET here.
1942
1943          When handling multiword bitfields, extract_bit_field may pass
1944          down a word_mode SUBREG of a larger REG for a bitfield that actually
1945          crosses a word boundary.  Thus, for a SUBREG, we must find
1946          the current word starting from the base register.  */
1947       if (GET_CODE (op0) == SUBREG)
1948         {
1949           int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset;
1950           word = operand_subword_force (SUBREG_REG (op0), word_offset,
1951                                         GET_MODE (SUBREG_REG (op0)));
1952           offset = 0;
1953         }
1954       else if (REG_P (op0))
1955         {
1956           word = operand_subword_force (op0, offset, GET_MODE (op0));
1957           offset = 0;
1958         }
1959       else
1960         word = op0;
1961
1962       /* Extract the parts in bit-counting order,
1963          whose meaning is determined by BYTES_PER_UNIT.
1964          OFFSET is in UNITs, and UNIT is in bits.
1965          extract_fixed_bit_field wants offset in bytes.  */
1966       part = extract_fixed_bit_field (word_mode, word,
1967                                       offset * unit / BITS_PER_UNIT,
1968                                       thissize, thispos, 0, 1, false);
1969       bitsdone += thissize;
1970
1971       /* Shift this part into place for the result.  */
1972       if (BYTES_BIG_ENDIAN)
1973         {
1974           if (bitsize != bitsdone)
1975             part = expand_shift (LSHIFT_EXPR, word_mode, part,
1976                                  bitsize - bitsdone, 0, 1);
1977         }
1978       else
1979         {
1980           if (bitsdone != thissize)
1981             part = expand_shift (LSHIFT_EXPR, word_mode, part,
1982                                  bitsdone - thissize, 0, 1);
1983         }
1984
1985       if (first)
1986         result = part;
1987       else
1988         /* Combine the parts with bitwise or.  This works
1989            because we extracted each part as an unsigned bit field.  */
1990         result = expand_binop (word_mode, ior_optab, part, result, NULL_RTX, 1,
1991                                OPTAB_LIB_WIDEN);
1992
1993       first = 0;
1994     }
1995
1996   /* Unsigned bit field: we are done.  */
1997   if (unsignedp)
1998     return result;
1999   /* Signed bit field: sign-extend with two arithmetic shifts.  */
2000   result = expand_shift (LSHIFT_EXPR, word_mode, result,
2001                          BITS_PER_WORD - bitsize, NULL_RTX, 0);
2002   return expand_shift (RSHIFT_EXPR, word_mode, result,
2003                        BITS_PER_WORD - bitsize, NULL_RTX, 0);
2004 }
2005 \f
2006 /* Try to read the low bits of SRC as an rvalue of mode MODE, preserving
2007    the bit pattern.  SRC_MODE is the mode of SRC; if this is smaller than
2008    MODE, fill the upper bits with zeros.  Fail if the layout of either
2009    mode is unknown (as for CC modes) or if the extraction would involve
2010    unprofitable mode punning.  Return the value on success, otherwise
2011    return null.
2012
2013    This is different from gen_lowpart* in these respects:
2014
2015      - the returned value must always be considered an rvalue
2016
2017      - when MODE is wider than SRC_MODE, the extraction involves
2018        a zero extension
2019
2020      - when MODE is smaller than SRC_MODE, the extraction involves
2021        a truncation (and is thus subject to TRULY_NOOP_TRUNCATION).
2022
2023    In other words, this routine performs a computation, whereas the
2024    gen_lowpart* routines are conceptually lvalue or rvalue subreg
2025    operations.  */
2026
2027 rtx
2028 extract_low_bits (enum machine_mode mode, enum machine_mode src_mode, rtx src)
2029 {
2030   enum machine_mode int_mode, src_int_mode;
2031
2032   if (mode == src_mode)
2033     return src;
2034
2035   if (CONSTANT_P (src))
2036     {
2037       /* simplify_gen_subreg can't be used here, as if simplify_subreg
2038          fails, it will happily create (subreg (symbol_ref)) or similar
2039          invalid SUBREGs.  */
2040       unsigned int byte = subreg_lowpart_offset (mode, src_mode);
2041       rtx ret = simplify_subreg (mode, src, src_mode, byte);
2042       if (ret)
2043         return ret;
2044
2045       if (GET_MODE (src) == VOIDmode
2046           || !validate_subreg (mode, src_mode, src, byte))
2047         return NULL_RTX;
2048
2049       src = force_reg (GET_MODE (src), src);
2050       return gen_rtx_SUBREG (mode, src, byte);
2051     }
2052
2053   if (GET_MODE_CLASS (mode) == MODE_CC || GET_MODE_CLASS (src_mode) == MODE_CC)
2054     return NULL_RTX;
2055
2056   if (GET_MODE_BITSIZE (mode) == GET_MODE_BITSIZE (src_mode)
2057       && MODES_TIEABLE_P (mode, src_mode))
2058     {
2059       rtx x = gen_lowpart_common (mode, src);
2060       if (x)
2061         return x;
2062     }
2063
2064   src_int_mode = int_mode_for_mode (src_mode);
2065   int_mode = int_mode_for_mode (mode);
2066   if (src_int_mode == BLKmode || int_mode == BLKmode)
2067     return NULL_RTX;
2068
2069   if (!MODES_TIEABLE_P (src_int_mode, src_mode))
2070     return NULL_RTX;
2071   if (!MODES_TIEABLE_P (int_mode, mode))
2072     return NULL_RTX;
2073
2074   src = gen_lowpart (src_int_mode, src);
2075   src = convert_modes (int_mode, src_int_mode, src, true);
2076   src = gen_lowpart (mode, src);
2077   return src;
2078 }
2079 \f
2080 /* Add INC into TARGET.  */
2081
2082 void
2083 expand_inc (rtx target, rtx inc)
2084 {
2085   rtx value = expand_binop (GET_MODE (target), add_optab,
2086                             target, inc,
2087                             target, 0, OPTAB_LIB_WIDEN);
2088   if (value != target)
2089     emit_move_insn (target, value);
2090 }
2091
2092 /* Subtract DEC from TARGET.  */
2093
2094 void
2095 expand_dec (rtx target, rtx dec)
2096 {
2097   rtx value = expand_binop (GET_MODE (target), sub_optab,
2098                             target, dec,
2099                             target, 0, OPTAB_LIB_WIDEN);
2100   if (value != target)
2101     emit_move_insn (target, value);
2102 }
2103 \f
2104 /* Output a shift instruction for expression code CODE,
2105    with SHIFTED being the rtx for the value to shift,
2106    and AMOUNT the rtx for the amount to shift by.
2107    Store the result in the rtx TARGET, if that is convenient.
2108    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2109    Return the rtx for where the value is.  */
2110
2111 static rtx
2112 expand_shift_1 (enum tree_code code, enum machine_mode mode, rtx shifted,
2113                 rtx amount, rtx target, int unsignedp)
2114 {
2115   rtx op1, temp = 0;
2116   int left = (code == LSHIFT_EXPR || code == LROTATE_EXPR);
2117   int rotate = (code == LROTATE_EXPR || code == RROTATE_EXPR);
2118   optab lshift_optab = ashl_optab;
2119   optab rshift_arith_optab = ashr_optab;
2120   optab rshift_uns_optab = lshr_optab;
2121   optab lrotate_optab = rotl_optab;
2122   optab rrotate_optab = rotr_optab;
2123   enum machine_mode op1_mode;
2124   int attempt;
2125   bool speed = optimize_insn_for_speed_p ();
2126
2127   op1 = amount;
2128   op1_mode = GET_MODE (op1);
2129
2130   /* Determine whether the shift/rotate amount is a vector, or scalar.  If the
2131      shift amount is a vector, use the vector/vector shift patterns.  */
2132   if (VECTOR_MODE_P (mode) && VECTOR_MODE_P (op1_mode))
2133     {
2134       lshift_optab = vashl_optab;
2135       rshift_arith_optab = vashr_optab;
2136       rshift_uns_optab = vlshr_optab;
2137       lrotate_optab = vrotl_optab;
2138       rrotate_optab = vrotr_optab;
2139     }
2140
2141   /* Previously detected shift-counts computed by NEGATE_EXPR
2142      and shifted in the other direction; but that does not work
2143      on all machines.  */
2144
2145   if (SHIFT_COUNT_TRUNCATED)
2146     {
2147       if (CONST_INT_P (op1)
2148           && ((unsigned HOST_WIDE_INT) INTVAL (op1) >=
2149               (unsigned HOST_WIDE_INT) GET_MODE_BITSIZE (mode)))
2150         op1 = GEN_INT ((unsigned HOST_WIDE_INT) INTVAL (op1)
2151                        % GET_MODE_BITSIZE (mode));
2152       else if (GET_CODE (op1) == SUBREG
2153                && subreg_lowpart_p (op1)
2154                && INTEGRAL_MODE_P (GET_MODE (SUBREG_REG (op1))))
2155         op1 = SUBREG_REG (op1);
2156     }
2157
2158   if (op1 == const0_rtx)
2159     return shifted;
2160
2161   /* Check whether its cheaper to implement a left shift by a constant
2162      bit count by a sequence of additions.  */
2163   if (code == LSHIFT_EXPR
2164       && CONST_INT_P (op1)
2165       && INTVAL (op1) > 0
2166       && INTVAL (op1) < GET_MODE_PRECISION (mode)
2167       && INTVAL (op1) < MAX_BITS_PER_WORD
2168       && shift_cost[speed][mode][INTVAL (op1)] > INTVAL (op1) * add_cost[speed][mode]
2169       && shift_cost[speed][mode][INTVAL (op1)] != MAX_COST)
2170     {
2171       int i;
2172       for (i = 0; i < INTVAL (op1); i++)
2173         {
2174           temp = force_reg (mode, shifted);
2175           shifted = expand_binop (mode, add_optab, temp, temp, NULL_RTX,
2176                                   unsignedp, OPTAB_LIB_WIDEN);
2177         }
2178       return shifted;
2179     }
2180
2181   for (attempt = 0; temp == 0 && attempt < 3; attempt++)
2182     {
2183       enum optab_methods methods;
2184
2185       if (attempt == 0)
2186         methods = OPTAB_DIRECT;
2187       else if (attempt == 1)
2188         methods = OPTAB_WIDEN;
2189       else
2190         methods = OPTAB_LIB_WIDEN;
2191
2192       if (rotate)
2193         {
2194           /* Widening does not work for rotation.  */
2195           if (methods == OPTAB_WIDEN)
2196             continue;
2197           else if (methods == OPTAB_LIB_WIDEN)
2198             {
2199               /* If we have been unable to open-code this by a rotation,
2200                  do it as the IOR of two shifts.  I.e., to rotate A
2201                  by N bits, compute (A << N) | ((unsigned) A >> (C - N))
2202                  where C is the bitsize of A.
2203
2204                  It is theoretically possible that the target machine might
2205                  not be able to perform either shift and hence we would
2206                  be making two libcalls rather than just the one for the
2207                  shift (similarly if IOR could not be done).  We will allow
2208                  this extremely unlikely lossage to avoid complicating the
2209                  code below.  */
2210
2211               rtx subtarget = target == shifted ? 0 : target;
2212               rtx new_amount, other_amount;
2213               rtx temp1;
2214
2215               new_amount = op1;
2216               if (CONST_INT_P (op1))
2217                 other_amount = GEN_INT (GET_MODE_BITSIZE (mode)
2218                                         - INTVAL (op1));
2219               else
2220                 other_amount
2221                   = simplify_gen_binary (MINUS, GET_MODE (op1),
2222                                          GEN_INT (GET_MODE_PRECISION (mode)),
2223                                          op1);
2224
2225               shifted = force_reg (mode, shifted);
2226
2227               temp = expand_shift_1 (left ? LSHIFT_EXPR : RSHIFT_EXPR,
2228                                      mode, shifted, new_amount, 0, 1);
2229               temp1 = expand_shift_1 (left ? RSHIFT_EXPR : LSHIFT_EXPR,
2230                                       mode, shifted, other_amount,
2231                                       subtarget, 1);
2232               return expand_binop (mode, ior_optab, temp, temp1, target,
2233                                    unsignedp, methods);
2234             }
2235
2236           temp = expand_binop (mode,
2237                                left ? lrotate_optab : rrotate_optab,
2238                                shifted, op1, target, unsignedp, methods);
2239         }
2240       else if (unsignedp)
2241         temp = expand_binop (mode,
2242                              left ? lshift_optab : rshift_uns_optab,
2243                              shifted, op1, target, unsignedp, methods);
2244
2245       /* Do arithmetic shifts.
2246          Also, if we are going to widen the operand, we can just as well
2247          use an arithmetic right-shift instead of a logical one.  */
2248       if (temp == 0 && ! rotate
2249           && (! unsignedp || (! left && methods == OPTAB_WIDEN)))
2250         {
2251           enum optab_methods methods1 = methods;
2252
2253           /* If trying to widen a log shift to an arithmetic shift,
2254              don't accept an arithmetic shift of the same size.  */
2255           if (unsignedp)
2256             methods1 = OPTAB_MUST_WIDEN;
2257
2258           /* Arithmetic shift */
2259
2260           temp = expand_binop (mode,
2261                                left ? lshift_optab : rshift_arith_optab,
2262                                shifted, op1, target, unsignedp, methods1);
2263         }
2264
2265       /* We used to try extzv here for logical right shifts, but that was
2266          only useful for one machine, the VAX, and caused poor code
2267          generation there for lshrdi3, so the code was deleted and a
2268          define_expand for lshrsi3 was added to vax.md.  */
2269     }
2270
2271   gcc_assert (temp);
2272   return temp;
2273 }
2274
2275 /* Output a shift instruction for expression code CODE,
2276    with SHIFTED being the rtx for the value to shift,
2277    and AMOUNT the amount to shift by.
2278    Store the result in the rtx TARGET, if that is convenient.
2279    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2280    Return the rtx for where the value is.  */
2281
2282 rtx
2283 expand_shift (enum tree_code code, enum machine_mode mode, rtx shifted,
2284               int amount, rtx target, int unsignedp)
2285 {
2286   return expand_shift_1 (code, mode,
2287                          shifted, GEN_INT (amount), target, unsignedp);
2288 }
2289
2290 /* Output a shift instruction for expression code CODE,
2291    with SHIFTED being the rtx for the value to shift,
2292    and AMOUNT the tree for the amount to shift by.
2293    Store the result in the rtx TARGET, if that is convenient.
2294    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2295    Return the rtx for where the value is.  */
2296
2297 rtx
2298 expand_variable_shift (enum tree_code code, enum machine_mode mode, rtx shifted,
2299                        tree amount, rtx target, int unsignedp)
2300 {
2301   return expand_shift_1 (code, mode,
2302                          shifted, expand_normal (amount), target, unsignedp);
2303 }
2304
2305 \f
2306 /* Indicates the type of fixup needed after a constant multiplication.
2307    BASIC_VARIANT means no fixup is needed, NEGATE_VARIANT means that
2308    the result should be negated, and ADD_VARIANT means that the
2309    multiplicand should be added to the result.  */
2310 enum mult_variant {basic_variant, negate_variant, add_variant};
2311
2312 static void synth_mult (struct algorithm *, unsigned HOST_WIDE_INT,
2313                         const struct mult_cost *, enum machine_mode mode);
2314 static bool choose_mult_variant (enum machine_mode, HOST_WIDE_INT,
2315                                  struct algorithm *, enum mult_variant *, int);
2316 static rtx expand_mult_const (enum machine_mode, rtx, HOST_WIDE_INT, rtx,
2317                               const struct algorithm *, enum mult_variant);
2318 static unsigned HOST_WIDE_INT choose_multiplier (unsigned HOST_WIDE_INT, int,
2319                                                  int, rtx *, int *, int *);
2320 static unsigned HOST_WIDE_INT invert_mod2n (unsigned HOST_WIDE_INT, int);
2321 static rtx extract_high_half (enum machine_mode, rtx);
2322 static rtx expand_mult_highpart (enum machine_mode, rtx, rtx, rtx, int, int);
2323 static rtx expand_mult_highpart_optab (enum machine_mode, rtx, rtx, rtx,
2324                                        int, int);
2325 /* Compute and return the best algorithm for multiplying by T.
2326    The algorithm must cost less than cost_limit
2327    If retval.cost >= COST_LIMIT, no algorithm was found and all
2328    other field of the returned struct are undefined.
2329    MODE is the machine mode of the multiplication.  */
2330
2331 static void
2332 synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
2333             const struct mult_cost *cost_limit, enum machine_mode mode)
2334 {
2335   int m;
2336   struct algorithm *alg_in, *best_alg;
2337   struct mult_cost best_cost;
2338   struct mult_cost new_limit;
2339   int op_cost, op_latency;
2340   unsigned HOST_WIDE_INT orig_t = t;
2341   unsigned HOST_WIDE_INT q;
2342   int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode));
2343   int hash_index;
2344   bool cache_hit = false;
2345   enum alg_code cache_alg = alg_zero;
2346   bool speed = optimize_insn_for_speed_p ();
2347
2348   /* Indicate that no algorithm is yet found.  If no algorithm
2349      is found, this value will be returned and indicate failure.  */
2350   alg_out->cost.cost = cost_limit->cost + 1;
2351   alg_out->cost.latency = cost_limit->latency + 1;
2352
2353   if (cost_limit->cost < 0
2354       || (cost_limit->cost == 0 && cost_limit->latency <= 0))
2355     return;
2356
2357   /* Restrict the bits of "t" to the multiplication's mode.  */
2358   t &= GET_MODE_MASK (mode);
2359
2360   /* t == 1 can be done in zero cost.  */
2361   if (t == 1)
2362     {
2363       alg_out->ops = 1;
2364       alg_out->cost.cost = 0;
2365       alg_out->cost.latency = 0;
2366       alg_out->op[0] = alg_m;
2367       return;
2368     }
2369
2370   /* t == 0 sometimes has a cost.  If it does and it exceeds our limit,
2371      fail now.  */
2372   if (t == 0)
2373     {
2374       if (MULT_COST_LESS (cost_limit, zero_cost[speed]))
2375         return;
2376       else
2377         {
2378           alg_out->ops = 1;
2379           alg_out->cost.cost = zero_cost[speed];
2380           alg_out->cost.latency = zero_cost[speed];
2381           alg_out->op[0] = alg_zero;
2382           return;
2383         }
2384     }
2385
2386   /* We'll be needing a couple extra algorithm structures now.  */
2387
2388   alg_in = XALLOCA (struct algorithm);
2389   best_alg = XALLOCA (struct algorithm);
2390   best_cost = *cost_limit;
2391
2392   /* Compute the hash index.  */
2393   hash_index = (t ^ (unsigned int) mode ^ (speed * 256)) % NUM_ALG_HASH_ENTRIES;
2394
2395   /* See if we already know what to do for T.  */
2396   if (alg_hash[hash_index].t == t
2397       && alg_hash[hash_index].mode == mode
2398       && alg_hash[hash_index].mode == mode
2399       && alg_hash[hash_index].speed == speed
2400       && alg_hash[hash_index].alg != alg_unknown)
2401     {
2402       cache_alg = alg_hash[hash_index].alg;
2403
2404       if (cache_alg == alg_impossible)
2405         {
2406           /* The cache tells us that it's impossible to synthesize
2407              multiplication by T within alg_hash[hash_index].cost.  */
2408           if (!CHEAPER_MULT_COST (&alg_hash[hash_index].cost, cost_limit))
2409             /* COST_LIMIT is at least as restrictive as the one
2410                recorded in the hash table, in which case we have no
2411                hope of synthesizing a multiplication.  Just
2412                return.  */
2413             return;
2414
2415           /* If we get here, COST_LIMIT is less restrictive than the
2416              one recorded in the hash table, so we may be able to
2417              synthesize a multiplication.  Proceed as if we didn't
2418              have the cache entry.  */
2419         }
2420       else
2421         {
2422           if (CHEAPER_MULT_COST (cost_limit, &alg_hash[hash_index].cost))
2423             /* The cached algorithm shows that this multiplication
2424                requires more cost than COST_LIMIT.  Just return.  This
2425                way, we don't clobber this cache entry with
2426                alg_impossible but retain useful information.  */
2427             return;
2428
2429           cache_hit = true;
2430
2431           switch (cache_alg)
2432             {
2433             case alg_shift:
2434               goto do_alg_shift;
2435
2436             case alg_add_t_m2:
2437             case alg_sub_t_m2:
2438               goto do_alg_addsub_t_m2;
2439
2440             case alg_add_factor:
2441             case alg_sub_factor:
2442               goto do_alg_addsub_factor;
2443
2444             case alg_add_t2_m:
2445               goto do_alg_add_t2_m;
2446
2447             case alg_sub_t2_m:
2448               goto do_alg_sub_t2_m;
2449
2450             default:
2451               gcc_unreachable ();
2452             }
2453         }
2454     }
2455
2456   /* If we have a group of zero bits at the low-order part of T, try
2457      multiplying by the remaining bits and then doing a shift.  */
2458
2459   if ((t & 1) == 0)
2460     {
2461     do_alg_shift:
2462       m = floor_log2 (t & -t);  /* m = number of low zero bits */
2463       if (m < maxm)
2464         {
2465           q = t >> m;
2466           /* The function expand_shift will choose between a shift and
2467              a sequence of additions, so the observed cost is given as
2468              MIN (m * add_cost[speed][mode], shift_cost[speed][mode][m]).  */
2469           op_cost = m * add_cost[speed][mode];
2470           if (shift_cost[speed][mode][m] < op_cost)
2471             op_cost = shift_cost[speed][mode][m];
2472           new_limit.cost = best_cost.cost - op_cost;
2473           new_limit.latency = best_cost.latency - op_cost;
2474           synth_mult (alg_in, q, &new_limit, mode);
2475
2476           alg_in->cost.cost += op_cost;
2477           alg_in->cost.latency += op_cost;
2478           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2479             {
2480               struct algorithm *x;
2481               best_cost = alg_in->cost;
2482               x = alg_in, alg_in = best_alg, best_alg = x;
2483               best_alg->log[best_alg->ops] = m;
2484               best_alg->op[best_alg->ops] = alg_shift;
2485             }
2486
2487           /* See if treating ORIG_T as a signed number yields a better
2488              sequence.  Try this sequence only for a negative ORIG_T
2489              as it would be useless for a non-negative ORIG_T.  */
2490           if ((HOST_WIDE_INT) orig_t < 0)
2491             {
2492               /* Shift ORIG_T as follows because a right shift of a
2493                  negative-valued signed type is implementation
2494                  defined.  */
2495               q = ~(~orig_t >> m);
2496               /* The function expand_shift will choose between a shift
2497                  and a sequence of additions, so the observed cost is
2498                  given as MIN (m * add_cost[speed][mode],
2499                  shift_cost[speed][mode][m]).  */
2500               op_cost = m * add_cost[speed][mode];
2501               if (shift_cost[speed][mode][m] < op_cost)
2502                 op_cost = shift_cost[speed][mode][m];
2503               new_limit.cost = best_cost.cost - op_cost;
2504               new_limit.latency = best_cost.latency - op_cost;
2505               synth_mult (alg_in, q, &new_limit, mode);
2506
2507               alg_in->cost.cost += op_cost;
2508               alg_in->cost.latency += op_cost;
2509               if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2510                 {
2511                   struct algorithm *x;
2512                   best_cost = alg_in->cost;
2513                   x = alg_in, alg_in = best_alg, best_alg = x;
2514                   best_alg->log[best_alg->ops] = m;
2515                   best_alg->op[best_alg->ops] = alg_shift;
2516                 }
2517             }
2518         }
2519       if (cache_hit)
2520         goto done;
2521     }
2522
2523   /* If we have an odd number, add or subtract one.  */
2524   if ((t & 1) != 0)
2525     {
2526       unsigned HOST_WIDE_INT w;
2527
2528     do_alg_addsub_t_m2:
2529       for (w = 1; (w & t) != 0; w <<= 1)
2530         ;
2531       /* If T was -1, then W will be zero after the loop.  This is another
2532          case where T ends with ...111.  Handling this with (T + 1) and
2533          subtract 1 produces slightly better code and results in algorithm
2534          selection much faster than treating it like the ...0111 case
2535          below.  */
2536       if (w == 0
2537           || (w > 2
2538               /* Reject the case where t is 3.
2539                  Thus we prefer addition in that case.  */
2540               && t != 3))
2541         {
2542           /* T ends with ...111.  Multiply by (T + 1) and subtract 1.  */
2543
2544           op_cost = add_cost[speed][mode];
2545           new_limit.cost = best_cost.cost - op_cost;
2546           new_limit.latency = best_cost.latency - op_cost;
2547           synth_mult (alg_in, t + 1, &new_limit, mode);
2548
2549           alg_in->cost.cost += op_cost;
2550           alg_in->cost.latency += op_cost;
2551           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2552             {
2553               struct algorithm *x;
2554               best_cost = alg_in->cost;
2555               x = alg_in, alg_in = best_alg, best_alg = x;
2556               best_alg->log[best_alg->ops] = 0;
2557               best_alg->op[best_alg->ops] = alg_sub_t_m2;
2558             }
2559         }
2560       else
2561         {
2562           /* T ends with ...01 or ...011.  Multiply by (T - 1) and add 1.  */
2563
2564           op_cost = add_cost[speed][mode];
2565           new_limit.cost = best_cost.cost - op_cost;
2566           new_limit.latency = best_cost.latency - op_cost;
2567           synth_mult (alg_in, t - 1, &new_limit, mode);
2568
2569           alg_in->cost.cost += op_cost;
2570           alg_in->cost.latency += op_cost;
2571           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2572             {
2573               struct algorithm *x;
2574               best_cost = alg_in->cost;
2575               x = alg_in, alg_in = best_alg, best_alg = x;
2576               best_alg->log[best_alg->ops] = 0;
2577               best_alg->op[best_alg->ops] = alg_add_t_m2;
2578             }
2579         }
2580
2581       /* We may be able to calculate a * -7, a * -15, a * -31, etc
2582          quickly with a - a * n for some appropriate constant n.  */
2583       m = exact_log2 (-orig_t + 1);
2584       if (m >= 0 && m < maxm)
2585         {
2586           op_cost = shiftsub1_cost[speed][mode][m];
2587           new_limit.cost = best_cost.cost - op_cost;
2588           new_limit.latency = best_cost.latency - op_cost;
2589           synth_mult (alg_in, (unsigned HOST_WIDE_INT) (-orig_t + 1) >> m, &new_limit, mode);
2590
2591           alg_in->cost.cost += op_cost;
2592           alg_in->cost.latency += op_cost;
2593           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2594             {
2595               struct algorithm *x;
2596               best_cost = alg_in->cost;
2597               x = alg_in, alg_in = best_alg, best_alg = x;
2598               best_alg->log[best_alg->ops] = m;
2599               best_alg->op[best_alg->ops] = alg_sub_t_m2;
2600             }
2601         }
2602
2603       if (cache_hit)
2604         goto done;
2605     }
2606
2607   /* Look for factors of t of the form
2608      t = q(2**m +- 1), 2 <= m <= floor(log2(t - 1)).
2609      If we find such a factor, we can multiply by t using an algorithm that
2610      multiplies by q, shift the result by m and add/subtract it to itself.
2611
2612      We search for large factors first and loop down, even if large factors
2613      are less probable than small; if we find a large factor we will find a
2614      good sequence quickly, and therefore be able to prune (by decreasing
2615      COST_LIMIT) the search.  */
2616
2617  do_alg_addsub_factor:
2618   for (m = floor_log2 (t - 1); m >= 2; m--)
2619     {
2620       unsigned HOST_WIDE_INT d;
2621
2622       d = ((unsigned HOST_WIDE_INT) 1 << m) + 1;
2623       if (t % d == 0 && t > d && m < maxm
2624           && (!cache_hit || cache_alg == alg_add_factor))
2625         {
2626           /* If the target has a cheap shift-and-add instruction use
2627              that in preference to a shift insn followed by an add insn.
2628              Assume that the shift-and-add is "atomic" with a latency
2629              equal to its cost, otherwise assume that on superscalar
2630              hardware the shift may be executed concurrently with the
2631              earlier steps in the algorithm.  */
2632           op_cost = add_cost[speed][mode] + shift_cost[speed][mode][m];
2633           if (shiftadd_cost[speed][mode][m] < op_cost)
2634             {
2635               op_cost = shiftadd_cost[speed][mode][m];
2636               op_latency = op_cost;
2637             }
2638           else
2639             op_latency = add_cost[speed][mode];
2640
2641           new_limit.cost = best_cost.cost - op_cost;
2642           new_limit.latency = best_cost.latency - op_latency;
2643           synth_mult (alg_in, t / d, &new_limit, mode);
2644
2645           alg_in->cost.cost += op_cost;
2646           alg_in->cost.latency += op_latency;
2647           if (alg_in->cost.latency < op_cost)
2648             alg_in->cost.latency = op_cost;
2649           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2650             {
2651               struct algorithm *x;
2652               best_cost = alg_in->cost;
2653               x = alg_in, alg_in = best_alg, best_alg = x;
2654               best_alg->log[best_alg->ops] = m;
2655               best_alg->op[best_alg->ops] = alg_add_factor;
2656             }
2657           /* Other factors will have been taken care of in the recursion.  */
2658           break;
2659         }
2660
2661       d = ((unsigned HOST_WIDE_INT) 1 << m) - 1;
2662       if (t % d == 0 && t > d && m < maxm
2663           && (!cache_hit || cache_alg == alg_sub_factor))
2664         {
2665           /* If the target has a cheap shift-and-subtract insn use
2666              that in preference to a shift insn followed by a sub insn.
2667              Assume that the shift-and-sub is "atomic" with a latency
2668              equal to it's cost, otherwise assume that on superscalar
2669              hardware the shift may be executed concurrently with the
2670              earlier steps in the algorithm.  */
2671           op_cost = add_cost[speed][mode] + shift_cost[speed][mode][m];
2672           if (shiftsub0_cost[speed][mode][m] < op_cost)
2673             {
2674               op_cost = shiftsub0_cost[speed][mode][m];
2675               op_latency = op_cost;
2676             }
2677           else
2678             op_latency = add_cost[speed][mode];
2679
2680           new_limit.cost = best_cost.cost - op_cost;
2681           new_limit.latency = best_cost.latency - op_latency;
2682           synth_mult (alg_in, t / d, &new_limit, mode);
2683
2684           alg_in->cost.cost += op_cost;
2685           alg_in->cost.latency += op_latency;
2686           if (alg_in->cost.latency < op_cost)
2687             alg_in->cost.latency = op_cost;
2688           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2689             {
2690               struct algorithm *x;
2691               best_cost = alg_in->cost;
2692               x = alg_in, alg_in = best_alg, best_alg = x;
2693               best_alg->log[best_alg->ops] = m;
2694               best_alg->op[best_alg->ops] = alg_sub_factor;
2695             }
2696           break;
2697         }
2698     }
2699   if (cache_hit)
2700     goto done;
2701
2702   /* Try shift-and-add (load effective address) instructions,
2703      i.e. do a*3, a*5, a*9.  */
2704   if ((t & 1) != 0)
2705     {
2706     do_alg_add_t2_m:
2707       q = t - 1;
2708       q = q & -q;
2709       m = exact_log2 (q);
2710       if (m >= 0 && m < maxm)
2711         {
2712           op_cost = shiftadd_cost[speed][mode][m];
2713           new_limit.cost = best_cost.cost - op_cost;
2714           new_limit.latency = best_cost.latency - op_cost;
2715           synth_mult (alg_in, (t - 1) >> m, &new_limit, mode);
2716
2717           alg_in->cost.cost += op_cost;
2718           alg_in->cost.latency += op_cost;
2719           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2720             {
2721               struct algorithm *x;
2722               best_cost = alg_in->cost;
2723               x = alg_in, alg_in = best_alg, best_alg = x;
2724               best_alg->log[best_alg->ops] = m;
2725               best_alg->op[best_alg->ops] = alg_add_t2_m;
2726             }
2727         }
2728       if (cache_hit)
2729         goto done;
2730
2731     do_alg_sub_t2_m:
2732       q = t + 1;
2733       q = q & -q;
2734       m = exact_log2 (q);
2735       if (m >= 0 && m < maxm)
2736         {
2737           op_cost = shiftsub0_cost[speed][mode][m];
2738           new_limit.cost = best_cost.cost - op_cost;
2739           new_limit.latency = best_cost.latency - op_cost;
2740           synth_mult (alg_in, (t + 1) >> m, &new_limit, mode);
2741
2742           alg_in->cost.cost += op_cost;
2743           alg_in->cost.latency += op_cost;
2744           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2745             {
2746               struct algorithm *x;
2747               best_cost = alg_in->cost;
2748               x = alg_in, alg_in = best_alg, best_alg = x;
2749               best_alg->log[best_alg->ops] = m;
2750               best_alg->op[best_alg->ops] = alg_sub_t2_m;
2751             }
2752         }
2753       if (cache_hit)
2754         goto done;
2755     }
2756
2757  done:
2758   /* If best_cost has not decreased, we have not found any algorithm.  */
2759   if (!CHEAPER_MULT_COST (&best_cost, cost_limit))
2760     {
2761       /* We failed to find an algorithm.  Record alg_impossible for
2762          this case (that is, <T, MODE, COST_LIMIT>) so that next time
2763          we are asked to find an algorithm for T within the same or
2764          lower COST_LIMIT, we can immediately return to the
2765          caller.  */
2766       alg_hash[hash_index].t = t;
2767       alg_hash[hash_index].mode = mode;
2768       alg_hash[hash_index].speed = speed;
2769       alg_hash[hash_index].alg = alg_impossible;
2770       alg_hash[hash_index].cost = *cost_limit;
2771       return;
2772     }
2773
2774   /* Cache the result.  */
2775   if (!cache_hit)
2776     {
2777       alg_hash[hash_index].t = t;
2778       alg_hash[hash_index].mode = mode;
2779       alg_hash[hash_index].speed = speed;
2780       alg_hash[hash_index].alg = best_alg->op[best_alg->ops];
2781       alg_hash[hash_index].cost.cost = best_cost.cost;
2782       alg_hash[hash_index].cost.latency = best_cost.latency;
2783     }
2784
2785   /* If we are getting a too long sequence for `struct algorithm'
2786      to record, make this search fail.  */
2787   if (best_alg->ops == MAX_BITS_PER_WORD)
2788     return;
2789
2790   /* Copy the algorithm from temporary space to the space at alg_out.
2791      We avoid using structure assignment because the majority of
2792      best_alg is normally undefined, and this is a critical function.  */
2793   alg_out->ops = best_alg->ops + 1;
2794   alg_out->cost = best_cost;
2795   memcpy (alg_out->op, best_alg->op,
2796           alg_out->ops * sizeof *alg_out->op);
2797   memcpy (alg_out->log, best_alg->log,
2798           alg_out->ops * sizeof *alg_out->log);
2799 }
2800 \f
2801 /* Find the cheapest way of multiplying a value of mode MODE by VAL.
2802    Try three variations:
2803
2804        - a shift/add sequence based on VAL itself
2805        - a shift/add sequence based on -VAL, followed by a negation
2806        - a shift/add sequence based on VAL - 1, followed by an addition.
2807
2808    Return true if the cheapest of these cost less than MULT_COST,
2809    describing the algorithm in *ALG and final fixup in *VARIANT.  */
2810
2811 static bool
2812 choose_mult_variant (enum machine_mode mode, HOST_WIDE_INT val,
2813                      struct algorithm *alg, enum mult_variant *variant,
2814                      int mult_cost)
2815 {
2816   struct algorithm alg2;
2817   struct mult_cost limit;
2818   int op_cost;
2819   bool speed = optimize_insn_for_speed_p ();
2820
2821   /* Fail quickly for impossible bounds.  */
2822   if (mult_cost < 0)
2823     return false;
2824
2825   /* Ensure that mult_cost provides a reasonable upper bound.
2826      Any constant multiplication can be performed with less
2827      than 2 * bits additions.  */
2828   op_cost = 2 * GET_MODE_BITSIZE (mode) * add_cost[speed][mode];
2829   if (mult_cost > op_cost)
2830     mult_cost = op_cost;
2831
2832   *variant = basic_variant;
2833   limit.cost = mult_cost;
2834   limit.latency = mult_cost;
2835   synth_mult (alg, val, &limit, mode);
2836
2837   /* This works only if the inverted value actually fits in an
2838      `unsigned int' */
2839   if (HOST_BITS_PER_INT >= GET_MODE_BITSIZE (mode))
2840     {
2841       op_cost = neg_cost[speed][mode];
2842       if (MULT_COST_LESS (&alg->cost, mult_cost))
2843         {
2844           limit.cost = alg->cost.cost - op_cost;
2845           limit.latency = alg->cost.latency - op_cost;
2846         }
2847       else
2848         {
2849           limit.cost = mult_cost - op_cost;
2850           limit.latency = mult_cost - op_cost;
2851         }
2852
2853       synth_mult (&alg2, -val, &limit, mode);
2854       alg2.cost.cost += op_cost;
2855       alg2.cost.latency += op_cost;
2856       if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
2857         *alg = alg2, *variant = negate_variant;
2858     }
2859
2860   /* This proves very useful for division-by-constant.  */
2861   op_cost = add_cost[speed][mode];
2862   if (MULT_COST_LESS (&alg->cost, mult_cost))
2863     {
2864       limit.cost = alg->cost.cost - op_cost;
2865       limit.latency = alg->cost.latency - op_cost;
2866     }
2867   else
2868     {
2869       limit.cost = mult_cost - op_cost;
2870       limit.latency = mult_cost - op_cost;
2871     }
2872
2873   synth_mult (&alg2, val - 1, &limit, mode);
2874   alg2.cost.cost += op_cost;
2875   alg2.cost.latency += op_cost;
2876   if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
2877     *alg = alg2, *variant = add_variant;
2878
2879   return MULT_COST_LESS (&alg->cost, mult_cost);
2880 }
2881
2882 /* A subroutine of expand_mult, used for constant multiplications.
2883    Multiply OP0 by VAL in mode MODE, storing the result in TARGET if
2884    convenient.  Use the shift/add sequence described by ALG and apply
2885    the final fixup specified by VARIANT.  */
2886
2887 static rtx
2888 expand_mult_const (enum machine_mode mode, rtx op0, HOST_WIDE_INT val,
2889                    rtx target, const struct algorithm *alg,
2890                    enum mult_variant variant)
2891 {
2892   HOST_WIDE_INT val_so_far;
2893   rtx insn, accum, tem;
2894   int opno;
2895   enum machine_mode nmode;
2896
2897   /* Avoid referencing memory over and over and invalid sharing
2898      on SUBREGs.  */
2899   op0 = force_reg (mode, op0);
2900
2901   /* ACCUM starts out either as OP0 or as a zero, depending on
2902      the first operation.  */
2903
2904   if (alg->op[0] == alg_zero)
2905     {
2906       accum = copy_to_mode_reg (mode, const0_rtx);
2907       val_so_far = 0;
2908     }
2909   else if (alg->op[0] == alg_m)
2910     {
2911       accum = copy_to_mode_reg (mode, op0);
2912       val_so_far = 1;
2913     }
2914   else
2915     gcc_unreachable ();
2916
2917   for (opno = 1; opno < alg->ops; opno++)
2918     {
2919       int log = alg->log[opno];
2920       rtx shift_subtarget = optimize ? 0 : accum;
2921       rtx add_target
2922         = (opno == alg->ops - 1 && target != 0 && variant != add_variant
2923            && !optimize)
2924           ? target : 0;
2925       rtx accum_target = optimize ? 0 : accum;
2926
2927       switch (alg->op[opno])
2928         {
2929         case alg_shift:
2930           tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
2931           /* REG_EQUAL note will be attached to the following insn.  */
2932           emit_move_insn (accum, tem);
2933           val_so_far <<= log;
2934           break;
2935
2936         case alg_add_t_m2:
2937           tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
2938           accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
2939                                  add_target ? add_target : accum_target);
2940           val_so_far += (HOST_WIDE_INT) 1 << log;
2941           break;
2942
2943         case alg_sub_t_m2:
2944           tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
2945           accum = force_operand (gen_rtx_MINUS (mode, accum, tem),
2946                                  add_target ? add_target : accum_target);
2947           val_so_far -= (HOST_WIDE_INT) 1 << log;
2948           break;
2949
2950         case alg_add_t2_m:
2951           accum = expand_shift (LSHIFT_EXPR, mode, accum,
2952                                 log, shift_subtarget, 0);
2953           accum = force_operand (gen_rtx_PLUS (mode, accum, op0),
2954                                  add_target ? add_target : accum_target);
2955           val_so_far = (val_so_far << log) + 1;
2956           break;
2957
2958         case alg_sub_t2_m:
2959           accum = expand_shift (LSHIFT_EXPR, mode, accum,
2960                                 log, shift_subtarget, 0);
2961           accum = force_operand (gen_rtx_MINUS (mode, accum, op0),
2962                                  add_target ? add_target : accum_target);
2963           val_so_far = (val_so_far << log) - 1;
2964           break;
2965
2966         case alg_add_factor:
2967           tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
2968           accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
2969                                  add_target ? add_target : accum_target);
2970           val_so_far += val_so_far << log;
2971           break;
2972
2973         case alg_sub_factor:
2974           tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
2975           accum = force_operand (gen_rtx_MINUS (mode, tem, accum),
2976                                  (add_target
2977                                   ? add_target : (optimize ? 0 : tem)));
2978           val_so_far = (val_so_far << log) - val_so_far;
2979           break;
2980
2981         default:
2982           gcc_unreachable ();
2983         }
2984
2985       /* Write a REG_EQUAL note on the last insn so that we can cse
2986          multiplication sequences.  Note that if ACCUM is a SUBREG,
2987          we've set the inner register and must properly indicate
2988          that.  */
2989
2990       tem = op0, nmode = mode;
2991       if (GET_CODE (accum) == SUBREG)
2992         {
2993           nmode = GET_MODE (SUBREG_REG (accum));
2994           tem = gen_lowpart (nmode, op0);
2995         }
2996
2997       insn = get_last_insn ();
2998       set_unique_reg_note (insn, REG_EQUAL,
2999                            gen_rtx_MULT (nmode, tem,
3000                                          GEN_INT (val_so_far)));
3001     }
3002
3003   if (variant == negate_variant)
3004     {
3005       val_so_far = -val_so_far;
3006       accum = expand_unop (mode, neg_optab, accum, target, 0);
3007     }
3008   else if (variant == add_variant)
3009     {
3010       val_so_far = val_so_far + 1;
3011       accum = force_operand (gen_rtx_PLUS (mode, accum, op0), target);
3012     }
3013
3014   /* Compare only the bits of val and val_so_far that are significant
3015      in the result mode, to avoid sign-/zero-extension confusion.  */
3016   val &= GET_MODE_MASK (mode);
3017   val_so_far &= GET_MODE_MASK (mode);
3018   gcc_assert (val == val_so_far);
3019
3020   return accum;
3021 }
3022
3023 /* Perform a multiplication and return an rtx for the result.
3024    MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3025    TARGET is a suggestion for where to store the result (an rtx).
3026
3027    We check specially for a constant integer as OP1.
3028    If you want this check for OP0 as well, then before calling
3029    you should swap the two operands if OP0 would be constant.  */
3030
3031 rtx
3032 expand_mult (enum machine_mode mode, rtx op0, rtx op1, rtx target,
3033              int unsignedp)
3034 {
3035   enum mult_variant variant;
3036   struct algorithm algorithm;
3037   int max_cost;
3038   bool speed = optimize_insn_for_speed_p ();
3039
3040   /* Handling const0_rtx here allows us to use zero as a rogue value for
3041      coeff below.  */
3042   if (op1 == const0_rtx)
3043     return const0_rtx;
3044   if (op1 == const1_rtx)
3045     return op0;
3046   if (op1 == constm1_rtx)
3047     return expand_unop (mode,
3048                         GET_MODE_CLASS (mode) == MODE_INT
3049                         && !unsignedp && flag_trapv
3050                         ? negv_optab : neg_optab,
3051                         op0, target, 0);
3052
3053   /* These are the operations that are potentially turned into a sequence
3054      of shifts and additions.  */
3055   if (SCALAR_INT_MODE_P (mode)
3056       && (unsignedp || !flag_trapv))
3057     {
3058       HOST_WIDE_INT coeff = 0;
3059       rtx fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3060
3061       /* synth_mult does an `unsigned int' multiply.  As long as the mode is
3062          less than or equal in size to `unsigned int' this doesn't matter.
3063          If the mode is larger than `unsigned int', then synth_mult works
3064          only if the constant value exactly fits in an `unsigned int' without
3065          any truncation.  This means that multiplying by negative values does
3066          not work; results are off by 2^32 on a 32 bit machine.  */
3067
3068       if (CONST_INT_P (op1))
3069         {
3070           /* Attempt to handle multiplication of DImode values by negative
3071              coefficients, by performing the multiplication by a positive
3072              multiplier and then inverting the result.  */
3073           if (INTVAL (op1) < 0
3074               && GET_MODE_BITSIZE (mode) > HOST_BITS_PER_WIDE_INT)
3075             {
3076               /* Its safe to use -INTVAL (op1) even for INT_MIN, as the
3077                  result is interpreted as an unsigned coefficient.
3078                  Exclude cost of op0 from max_cost to match the cost
3079                  calculation of the synth_mult.  */
3080               max_cost = rtx_cost (gen_rtx_MULT (mode, fake_reg, op1), SET, speed)
3081                          - neg_cost[speed][mode];
3082               if (max_cost > 0
3083                   && choose_mult_variant (mode, -INTVAL (op1), &algorithm,
3084                                           &variant, max_cost))
3085                 {
3086                   rtx temp = expand_mult_const (mode, op0, -INTVAL (op1),
3087                                                 NULL_RTX, &algorithm,
3088                                                 variant);
3089                   return expand_unop (mode, neg_optab, temp, target, 0);
3090                 }
3091             }
3092           else coeff = INTVAL (op1);
3093         }
3094       else if (GET_CODE (op1) == CONST_DOUBLE)
3095         {
3096           /* If we are multiplying in DImode, it may still be a win
3097              to try to work with shifts and adds.  */
3098           if (CONST_DOUBLE_HIGH (op1) == 0
3099               && CONST_DOUBLE_LOW (op1) > 0)
3100             coeff = CONST_DOUBLE_LOW (op1);
3101           else if (CONST_DOUBLE_LOW (op1) == 0
3102                    && EXACT_POWER_OF_2_OR_ZERO_P (CONST_DOUBLE_HIGH (op1)))
3103             {
3104               int shift = floor_log2 (CONST_DOUBLE_HIGH (op1))
3105                           + HOST_BITS_PER_WIDE_INT;
3106               return expand_shift (LSHIFT_EXPR, mode, op0,
3107                                    shift, target, unsignedp);
3108             }
3109         }
3110
3111       /* We used to test optimize here, on the grounds that it's better to
3112          produce a smaller program when -O is not used.  But this causes
3113          such a terrible slowdown sometimes that it seems better to always
3114          use synth_mult.  */
3115       if (coeff != 0)
3116         {
3117           /* Special case powers of two.  */
3118           if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3119             return expand_shift (LSHIFT_EXPR, mode, op0,
3120                                  floor_log2 (coeff), target, unsignedp);
3121
3122           /* Exclude cost of op0 from max_cost to match the cost
3123              calculation of the synth_mult.  */
3124           max_cost = rtx_cost (gen_rtx_MULT (mode, fake_reg, op1), SET, speed);
3125           if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3126                                    max_cost))
3127             return expand_mult_const (mode, op0, coeff, target,
3128                                       &algorithm, variant);
3129         }
3130     }
3131
3132   if (GET_CODE (op0) == CONST_DOUBLE)
3133     {
3134       rtx temp = op0;
3135       op0 = op1;
3136       op1 = temp;
3137     }
3138
3139   /* Expand x*2.0 as x+x.  */
3140   if (GET_CODE (op1) == CONST_DOUBLE
3141       && SCALAR_FLOAT_MODE_P (mode))
3142     {
3143       REAL_VALUE_TYPE d;
3144       REAL_VALUE_FROM_CONST_DOUBLE (d, op1);
3145
3146       if (REAL_VALUES_EQUAL (d, dconst2))
3147         {
3148           op0 = force_reg (GET_MODE (op0), op0);
3149           return expand_binop (mode, add_optab, op0, op0,
3150                                target, unsignedp, OPTAB_LIB_WIDEN);
3151         }
3152     }
3153
3154   /* This used to use umul_optab if unsigned, but for non-widening multiply
3155      there is no difference between signed and unsigned.  */
3156   op0 = expand_binop (mode,
3157                       ! unsignedp
3158                       && flag_trapv && (GET_MODE_CLASS(mode) == MODE_INT)
3159                       ? smulv_optab : smul_optab,
3160                       op0, op1, target, unsignedp, OPTAB_LIB_WIDEN);
3161   gcc_assert (op0);
3162   return op0;
3163 }
3164
3165 /* Perform a widening multiplication and return an rtx for the result.
3166    MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3167    TARGET is a suggestion for where to store the result (an rtx).
3168    THIS_OPTAB is the optab we should use, it must be either umul_widen_optab
3169    or smul_widen_optab.
3170
3171    We check specially for a constant integer as OP1, comparing the
3172    cost of a widening multiply against the cost of a sequence of shifts
3173    and adds.  */
3174
3175 rtx
3176 expand_widening_mult (enum machine_mode mode, rtx op0, rtx op1, rtx target,
3177                       int unsignedp, optab this_optab)
3178 {
3179   bool speed = optimize_insn_for_speed_p ();
3180   rtx cop1;
3181
3182   if (CONST_INT_P (op1)
3183       && GET_MODE (op0) != VOIDmode
3184       && (cop1 = convert_modes (mode, GET_MODE (op0), op1,
3185                                 this_optab == umul_widen_optab))
3186       && CONST_INT_P (cop1)
3187       && (INTVAL (cop1) >= 0
3188           || HWI_COMPUTABLE_MODE_P (mode)))
3189     {
3190       HOST_WIDE_INT coeff = INTVAL (cop1);
3191       int max_cost;
3192       enum mult_variant variant;
3193       struct algorithm algorithm;
3194
3195       /* Special case powers of two.  */
3196       if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3197         {
3198           op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3199           return expand_shift (LSHIFT_EXPR, mode, op0,
3200                                floor_log2 (coeff), target, unsignedp);
3201         }
3202
3203       /* Exclude cost of op0 from max_cost to match the cost
3204          calculation of the synth_mult.  */
3205       max_cost = mul_widen_cost[speed][mode];
3206       if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3207                                max_cost))
3208         {
3209           op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3210           return expand_mult_const (mode, op0, coeff, target,
3211                                     &algorithm, variant);
3212         }
3213     }
3214   return expand_binop (mode, this_optab, op0, op1, target,
3215                        unsignedp, OPTAB_LIB_WIDEN);
3216 }
3217 \f
3218 /* Return the smallest n such that 2**n >= X.  */
3219
3220 int
3221 ceil_log2 (unsigned HOST_WIDE_INT x)
3222 {
3223   return floor_log2 (x - 1) + 1;
3224 }
3225
3226 /* Choose a minimal N + 1 bit approximation to 1/D that can be used to
3227    replace division by D, and put the least significant N bits of the result
3228    in *MULTIPLIER_PTR and return the most significant bit.
3229
3230    The width of operations is N (should be <= HOST_BITS_PER_WIDE_INT), the
3231    needed precision is in PRECISION (should be <= N).
3232
3233    PRECISION should be as small as possible so this function can choose
3234    multiplier more freely.
3235
3236    The rounded-up logarithm of D is placed in *lgup_ptr.  A shift count that
3237    is to be used for a final right shift is placed in *POST_SHIFT_PTR.
3238
3239    Using this function, x/D will be equal to (x * m) >> (*POST_SHIFT_PTR),
3240    where m is the full HOST_BITS_PER_WIDE_INT + 1 bit multiplier.  */
3241
3242 static
3243 unsigned HOST_WIDE_INT
3244 choose_multiplier (unsigned HOST_WIDE_INT d, int n, int precision,
3245                    rtx *multiplier_ptr, int *post_shift_ptr, int *lgup_ptr)
3246 {
3247   HOST_WIDE_INT mhigh_hi, mlow_hi;
3248   unsigned HOST_WIDE_INT mhigh_lo, mlow_lo;
3249   int lgup, post_shift;
3250   int pow, pow2;
3251   unsigned HOST_WIDE_INT nl, dummy1;
3252   HOST_WIDE_INT nh, dummy2;
3253
3254   /* lgup = ceil(log2(divisor)); */
3255   lgup = ceil_log2 (d);
3256
3257   gcc_assert (lgup <= n);
3258
3259   pow = n + lgup;
3260   pow2 = n + lgup - precision;
3261
3262   /* We could handle this with some effort, but this case is much
3263      better handled directly with a scc insn, so rely on caller using
3264      that.  */
3265   gcc_assert (pow != 2 * HOST_BITS_PER_WIDE_INT);
3266
3267   /* mlow = 2^(N + lgup)/d */
3268  if (pow >= HOST_BITS_PER_WIDE_INT)
3269     {
3270       nh = (HOST_WIDE_INT) 1 << (pow - HOST_BITS_PER_WIDE_INT);
3271       nl = 0;
3272     }
3273   else
3274     {
3275       nh = 0;
3276       nl = (unsigned HOST_WIDE_INT) 1 << pow;
3277     }
3278   div_and_round_double (TRUNC_DIV_EXPR, 1, nl, nh, d, (HOST_WIDE_INT) 0,
3279                         &mlow_lo, &mlow_hi, &dummy1, &dummy2);
3280
3281   /* mhigh = (2^(N + lgup) + 2^N + lgup - precision)/d */
3282   if (pow2 >= HOST_BITS_PER_WIDE_INT)
3283     nh |= (HOST_WIDE_INT) 1 << (pow2 - HOST_BITS_PER_WIDE_INT);
3284   else
3285     nl |= (unsigned HOST_WIDE_INT) 1 << pow2;
3286   div_and_round_double (TRUNC_DIV_EXPR, 1, nl, nh, d, (HOST_WIDE_INT) 0,
3287                         &mhigh_lo, &mhigh_hi, &dummy1, &dummy2);
3288
3289   gcc_assert (!mhigh_hi || nh - d < d);
3290   gcc_assert (mhigh_hi <= 1 && mlow_hi <= 1);
3291   /* Assert that mlow < mhigh.  */
3292   gcc_assert (mlow_hi < mhigh_hi
3293               || (mlow_hi == mhigh_hi && mlow_lo < mhigh_lo));
3294
3295   /* If precision == N, then mlow, mhigh exceed 2^N
3296      (but they do not exceed 2^(N+1)).  */
3297
3298   /* Reduce to lowest terms.  */
3299   for (post_shift = lgup; post_shift > 0; post_shift--)
3300     {
3301       unsigned HOST_WIDE_INT ml_lo = (mlow_hi << (HOST_BITS_PER_WIDE_INT - 1)) | (mlow_lo >> 1);
3302       unsigned HOST_WIDE_INT mh_lo = (mhigh_hi << (HOST_BITS_PER_WIDE_INT - 1)) | (mhigh_lo >> 1);
3303       if (ml_lo >= mh_lo)
3304         break;
3305
3306       mlow_hi = 0;
3307       mlow_lo = ml_lo;
3308       mhigh_hi = 0;
3309       mhigh_lo = mh_lo;
3310     }
3311
3312   *post_shift_ptr = post_shift;
3313   *lgup_ptr = lgup;
3314   if (n < HOST_BITS_PER_WIDE_INT)
3315     {
3316       unsigned HOST_WIDE_INT mask = ((unsigned HOST_WIDE_INT) 1 << n) - 1;
3317       *multiplier_ptr = GEN_INT (mhigh_lo & mask);
3318       return mhigh_lo >= mask;
3319     }
3320   else
3321     {
3322       *multiplier_ptr = GEN_INT (mhigh_lo);
3323       return mhigh_hi;
3324     }
3325 }
3326
3327 /* Compute the inverse of X mod 2**n, i.e., find Y such that X * Y is
3328    congruent to 1 (mod 2**N).  */
3329
3330 static unsigned HOST_WIDE_INT
3331 invert_mod2n (unsigned HOST_WIDE_INT x, int n)
3332 {
3333   /* Solve x*y == 1 (mod 2^n), where x is odd.  Return y.  */
3334
3335   /* The algorithm notes that the choice y = x satisfies
3336      x*y == 1 mod 2^3, since x is assumed odd.
3337      Each iteration doubles the number of bits of significance in y.  */
3338
3339   unsigned HOST_WIDE_INT mask;
3340   unsigned HOST_WIDE_INT y = x;
3341   int nbit = 3;
3342
3343   mask = (n == HOST_BITS_PER_WIDE_INT
3344           ? ~(unsigned HOST_WIDE_INT) 0
3345           : ((unsigned HOST_WIDE_INT) 1 << n) - 1);
3346
3347   while (nbit < n)
3348     {
3349       y = y * (2 - x*y) & mask;         /* Modulo 2^N */
3350       nbit *= 2;
3351     }
3352   return y;
3353 }
3354
3355 /* Emit code to adjust ADJ_OPERAND after multiplication of wrong signedness
3356    flavor of OP0 and OP1.  ADJ_OPERAND is already the high half of the
3357    product OP0 x OP1.  If UNSIGNEDP is nonzero, adjust the signed product
3358    to become unsigned, if UNSIGNEDP is zero, adjust the unsigned product to
3359    become signed.
3360
3361    The result is put in TARGET if that is convenient.
3362
3363    MODE is the mode of operation.  */
3364
3365 rtx
3366 expand_mult_highpart_adjust (enum machine_mode mode, rtx adj_operand, rtx op0,
3367                              rtx op1, rtx target, int unsignedp)
3368 {
3369   rtx tem;
3370   enum rtx_code adj_code = unsignedp ? PLUS : MINUS;
3371
3372   tem = expand_shift (RSHIFT_EXPR, mode, op0,
3373                       GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3374   tem = expand_and (mode, tem, op1, NULL_RTX);
3375   adj_operand
3376     = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3377                      adj_operand);
3378
3379   tem = expand_shift (RSHIFT_EXPR, mode, op1,
3380                       GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3381   tem = expand_and (mode, tem, op0, NULL_RTX);
3382   target = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3383                           target);
3384
3385   return target;
3386 }
3387
3388 /* Subroutine of expand_mult_highpart.  Return the MODE high part of OP.  */
3389
3390 static rtx
3391 extract_high_half (enum machine_mode mode, rtx op)
3392 {
3393   enum machine_mode wider_mode;
3394
3395   if (mode == word_mode)
3396     return gen_highpart (mode, op);
3397
3398   gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3399
3400   wider_mode = GET_MODE_WIDER_MODE (mode);
3401   op = expand_shift (RSHIFT_EXPR, wider_mode, op,
3402                      GET_MODE_BITSIZE (mode), 0, 1);
3403   return convert_modes (mode, wider_mode, op, 0);
3404 }
3405
3406 /* Like expand_mult_highpart, but only consider using a multiplication
3407    optab.  OP1 is an rtx for the constant operand.  */
3408
3409 static rtx
3410 expand_mult_highpart_optab (enum machine_mode mode, rtx op0, rtx op1,
3411                             rtx target, int unsignedp, int max_cost)
3412 {
3413   rtx narrow_op1 = gen_int_mode (INTVAL (op1), mode);
3414   enum machine_mode wider_mode;
3415   optab moptab;
3416   rtx tem;
3417   int size;
3418   bool speed = optimize_insn_for_speed_p ();
3419
3420   gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3421
3422   wider_mode = GET_MODE_WIDER_MODE (mode);
3423   size = GET_MODE_BITSIZE (mode);
3424
3425   /* Firstly, try using a multiplication insn that only generates the needed
3426      high part of the product, and in the sign flavor of unsignedp.  */
3427   if (mul_highpart_cost[speed][mode] < max_cost)
3428     {
3429       moptab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
3430       tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3431                           unsignedp, OPTAB_DIRECT);
3432       if (tem)
3433         return tem;
3434     }
3435
3436   /* Secondly, same as above, but use sign flavor opposite of unsignedp.
3437      Need to adjust the result after the multiplication.  */
3438   if (size - 1 < BITS_PER_WORD
3439       && (mul_highpart_cost[speed][mode] + 2 * shift_cost[speed][mode][size-1]
3440           + 4 * add_cost[speed][mode] < max_cost))
3441     {
3442       moptab = unsignedp ? smul_highpart_optab : umul_highpart_optab;
3443       tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3444                           unsignedp, OPTAB_DIRECT);
3445       if (tem)
3446         /* We used the wrong signedness.  Adjust the result.  */
3447         return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3448                                             tem, unsignedp);
3449     }
3450
3451   /* Try widening multiplication.  */
3452   moptab = unsignedp ? umul_widen_optab : smul_widen_optab;
3453   if (optab_handler (moptab, wider_mode) != CODE_FOR_nothing
3454       && mul_widen_cost[speed][wider_mode] < max_cost)
3455     {
3456       tem = expand_binop (wider_mode, moptab, op0, narrow_op1, 0,
3457                           unsignedp, OPTAB_WIDEN);
3458       if (tem)
3459         return extract_high_half (mode, tem);
3460     }
3461
3462   /* Try widening the mode and perform a non-widening multiplication.  */
3463   if (optab_handler (smul_optab, wider_mode) != CODE_FOR_nothing
3464       && size - 1 < BITS_PER_WORD
3465       && mul_cost[speed][wider_mode] + shift_cost[speed][mode][size-1] < max_cost)
3466     {
3467       rtx insns, wop0, wop1;
3468
3469       /* We need to widen the operands, for example to ensure the
3470          constant multiplier is correctly sign or zero extended.
3471          Use a sequence to clean-up any instructions emitted by
3472          the conversions if things don't work out.  */
3473       start_sequence ();
3474       wop0 = convert_modes (wider_mode, mode, op0, unsignedp);
3475       wop1 = convert_modes (wider_mode, mode, op1, unsignedp);
3476       tem = expand_binop (wider_mode, smul_optab, wop0, wop1, 0,
3477                           unsignedp, OPTAB_WIDEN);
3478       insns = get_insns ();
3479       end_sequence ();
3480
3481       if (tem)
3482         {
3483           emit_insn (insns);
3484           return extract_high_half (mode, tem);
3485         }
3486     }
3487
3488   /* Try widening multiplication of opposite signedness, and adjust.  */
3489   moptab = unsignedp ? smul_widen_optab : umul_widen_optab;
3490   if (optab_handler (moptab, wider_mode) != CODE_FOR_nothing
3491       && size - 1 < BITS_PER_WORD
3492       && (mul_widen_cost[speed][wider_mode] + 2 * shift_cost[speed][mode][size-1]
3493           + 4 * add_cost[speed][mode] < max_cost))
3494     {
3495       tem = expand_binop (wider_mode, moptab, op0, narrow_op1,
3496                           NULL_RTX, ! unsignedp, OPTAB_WIDEN);
3497       if (tem != 0)
3498         {
3499           tem = extract_high_half (mode, tem);
3500           /* We used the wrong signedness.  Adjust the result.  */
3501           return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3502                                               target, unsignedp);
3503         }
3504     }
3505
3506   return 0;
3507 }
3508
3509 /* Emit code to multiply OP0 and OP1 (where OP1 is an integer constant),
3510    putting the high half of the result in TARGET if that is convenient,
3511    and return where the result is.  If the operation can not be performed,
3512    0 is returned.
3513
3514    MODE is the mode of operation and result.
3515
3516    UNSIGNEDP nonzero means unsigned multiply.
3517
3518    MAX_COST is the total allowed cost for the expanded RTL.  */
3519
3520 static rtx
3521 expand_mult_highpart (enum machine_mode mode, rtx op0, rtx op1,
3522                       rtx target, int unsignedp, int max_cost)
3523 {
3524   enum machine_mode wider_mode = GET_MODE_WIDER_MODE (mode);
3525   unsigned HOST_WIDE_INT cnst1;
3526   int extra_cost;
3527   bool sign_adjust = false;
3528   enum mult_variant variant;
3529   struct algorithm alg;
3530   rtx tem;
3531   bool speed = optimize_insn_for_speed_p ();
3532
3533   gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3534   /* We can't support modes wider than HOST_BITS_PER_INT.  */
3535   gcc_assert (HWI_COMPUTABLE_MODE_P (mode));
3536
3537   cnst1 = INTVAL (op1) & GET_MODE_MASK (mode);
3538
3539   /* We can't optimize modes wider than BITS_PER_WORD.
3540      ??? We might be able to perform double-word arithmetic if
3541      mode == word_mode, however all the cost calculations in
3542      synth_mult etc. assume single-word operations.  */
3543   if (GET_MODE_BITSIZE (wider_mode) > BITS_PER_WORD)
3544     return expand_mult_highpart_optab (mode, op0, op1, target,
3545                                        unsignedp, max_cost);
3546
3547   extra_cost = shift_cost[speed][mode][GET_MODE_BITSIZE (mode) - 1];
3548
3549   /* Check whether we try to multiply by a negative constant.  */
3550   if (!unsignedp && ((cnst1 >> (GET_MODE_BITSIZE (mode) - 1)) & 1))
3551     {
3552       sign_adjust = true;
3553       extra_cost += add_cost[speed][mode];
3554     }
3555
3556   /* See whether shift/add multiplication is cheap enough.  */
3557   if (choose_mult_variant (wider_mode, cnst1, &alg, &variant,
3558                            max_cost - extra_cost))
3559     {
3560       /* See whether the specialized multiplication optabs are
3561          cheaper than the shift/add version.  */
3562       tem = expand_mult_highpart_optab (mode, op0, op1, target, unsignedp,
3563                                         alg.cost.cost + extra_cost);
3564       if (tem)
3565         return tem;
3566
3567       tem = convert_to_mode (wider_mode, op0, unsignedp);
3568       tem = expand_mult_const (wider_mode, tem, cnst1, 0, &alg, variant);
3569       tem = extract_high_half (mode, tem);
3570
3571       /* Adjust result for signedness.  */
3572       if (sign_adjust)
3573         tem = force_operand (gen_rtx_MINUS (mode, tem, op0), tem);
3574
3575       return tem;
3576     }
3577   return expand_mult_highpart_optab (mode, op0, op1, target,
3578                                      unsignedp, max_cost);
3579 }
3580
3581
3582 /* Expand signed modulus of OP0 by a power of two D in mode MODE.  */
3583
3584 static rtx
3585 expand_smod_pow2 (enum machine_mode mode, rtx op0, HOST_WIDE_INT d)
3586 {
3587   unsigned HOST_WIDE_INT masklow, maskhigh;
3588   rtx result, temp, shift, label;
3589   int logd;
3590
3591   logd = floor_log2 (d);
3592   result = gen_reg_rtx (mode);
3593
3594   /* Avoid conditional branches when they're expensive.  */
3595   if (BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2
3596       && optimize_insn_for_speed_p ())
3597     {
3598       rtx signmask = emit_store_flag (result, LT, op0, const0_rtx,
3599                                       mode, 0, -1);
3600       if (signmask)
3601         {
3602           signmask = force_reg (mode, signmask);
3603           masklow = ((HOST_WIDE_INT) 1 << logd) - 1;
3604           shift = GEN_INT (GET_MODE_BITSIZE (mode) - logd);
3605
3606           /* Use the rtx_cost of a LSHIFTRT instruction to determine
3607              which instruction sequence to use.  If logical right shifts
3608              are expensive the use 2 XORs, 2 SUBs and an AND, otherwise
3609              use a LSHIFTRT, 1 ADD, 1 SUB and an AND.  */
3610
3611           temp = gen_rtx_LSHIFTRT (mode, result, shift);
3612           if (optab_handler (lshr_optab, mode) == CODE_FOR_nothing
3613               || rtx_cost (temp, SET, optimize_insn_for_speed_p ()) > COSTS_N_INSNS (2))
3614             {
3615               temp = expand_binop (mode, xor_optab, op0, signmask,
3616                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3617               temp = expand_binop (mode, sub_optab, temp, signmask,
3618                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3619               temp = expand_binop (mode, and_optab, temp, GEN_INT (masklow),
3620                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3621               temp = expand_binop (mode, xor_optab, temp, signmask,
3622                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3623               temp = expand_binop (mode, sub_optab, temp, signmask,
3624                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3625             }
3626           else
3627             {
3628               signmask = expand_binop (mode, lshr_optab, signmask, shift,
3629                                        NULL_RTX, 1, OPTAB_LIB_WIDEN);
3630               signmask = force_reg (mode, signmask);
3631
3632               temp = expand_binop (mode, add_optab, op0, signmask,
3633                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3634               temp = expand_binop (mode, and_optab, temp, GEN_INT (masklow),
3635                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3636               temp = expand_binop (mode, sub_optab, temp, signmask,
3637                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3638             }
3639           return temp;
3640         }
3641     }
3642
3643   /* Mask contains the mode's signbit and the significant bits of the
3644      modulus.  By including the signbit in the operation, many targets
3645      can avoid an explicit compare operation in the following comparison
3646      against zero.  */
3647
3648   masklow = ((HOST_WIDE_INT) 1 << logd) - 1;
3649   if (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
3650     {
3651       masklow |= (HOST_WIDE_INT) -1 << (GET_MODE_BITSIZE (mode) - 1);
3652       maskhigh = -1;
3653     }
3654   else
3655     maskhigh = (HOST_WIDE_INT) -1
3656                  << (GET_MODE_BITSIZE (mode) - HOST_BITS_PER_WIDE_INT - 1);
3657
3658   temp = expand_binop (mode, and_optab, op0,
3659                        immed_double_const (masklow, maskhigh, mode),
3660                        result, 1, OPTAB_LIB_WIDEN);
3661   if (temp != result)
3662     emit_move_insn (result, temp);
3663
3664   label = gen_label_rtx ();
3665   do_cmp_and_jump (result, const0_rtx, GE, mode, label);
3666
3667   temp = expand_binop (mode, sub_optab, result, const1_rtx, result,
3668                        0, OPTAB_LIB_WIDEN);
3669   masklow = (HOST_WIDE_INT) -1 << logd;
3670   maskhigh = -1;
3671   temp = expand_binop (mode, ior_optab, temp,
3672                        immed_double_const (masklow, maskhigh, mode),
3673                        result, 1, OPTAB_LIB_WIDEN);
3674   temp = expand_binop (mode, add_optab, temp, const1_rtx, result,
3675                        0, OPTAB_LIB_WIDEN);
3676   if (temp != result)
3677     emit_move_insn (result, temp);
3678   emit_label (label);
3679   return result;
3680 }
3681
3682 /* Expand signed division of OP0 by a power of two D in mode MODE.
3683    This routine is only called for positive values of D.  */
3684
3685 static rtx
3686 expand_sdiv_pow2 (enum machine_mode mode, rtx op0, HOST_WIDE_INT d)
3687 {
3688   rtx temp, label;
3689   int logd;
3690
3691   logd = floor_log2 (d);
3692
3693   if (d == 2
3694       && BRANCH_COST (optimize_insn_for_speed_p (),
3695                       false) >= 1)
3696     {
3697       temp = gen_reg_rtx (mode);
3698       temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, 1);
3699       temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3700                            0, OPTAB_LIB_WIDEN);
3701       return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3702     }
3703
3704 #ifdef HAVE_conditional_move
3705   if (BRANCH_COST (optimize_insn_for_speed_p (), false)
3706       >= 2)
3707     {
3708       rtx temp2;
3709
3710       /* ??? emit_conditional_move forces a stack adjustment via
3711          compare_from_rtx so, if the sequence is discarded, it will
3712          be lost.  Do it now instead.  */
3713       do_pending_stack_adjust ();
3714
3715       start_sequence ();
3716       temp2 = copy_to_mode_reg (mode, op0);
3717       temp = expand_binop (mode, add_optab, temp2, GEN_INT (d-1),
3718                            NULL_RTX, 0, OPTAB_LIB_WIDEN);
3719       temp = force_reg (mode, temp);
3720
3721       /* Construct "temp2 = (temp2 < 0) ? temp : temp2".  */
3722       temp2 = emit_conditional_move (temp2, LT, temp2, const0_rtx,
3723                                      mode, temp, temp2, mode, 0);
3724       if (temp2)
3725         {
3726           rtx seq = get_insns ();
3727           end_sequence ();
3728           emit_insn (seq);
3729           return expand_shift (RSHIFT_EXPR, mode, temp2, logd, NULL_RTX, 0);
3730         }
3731       end_sequence ();
3732     }
3733 #endif
3734
3735   if (BRANCH_COST (optimize_insn_for_speed_p (),
3736                    false) >= 2)
3737     {
3738       int ushift = GET_MODE_BITSIZE (mode) - logd;
3739
3740       temp = gen_reg_rtx (mode);
3741       temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, -1);
3742       if (shift_cost[optimize_insn_for_speed_p ()][mode][ushift] > COSTS_N_INSNS (1))
3743         temp = expand_binop (mode, and_optab, temp, GEN_INT (d - 1),
3744                              NULL_RTX, 0, OPTAB_LIB_WIDEN);
3745       else
3746         temp = expand_shift (RSHIFT_EXPR, mode, temp,
3747                              ushift, NULL_RTX, 1);
3748       temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3749                            0, OPTAB_LIB_WIDEN);
3750       return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3751     }
3752
3753   label = gen_label_rtx ();
3754   temp = copy_to_mode_reg (mode, op0);
3755   do_cmp_and_jump (temp, const0_rtx, GE, mode, label);
3756   expand_inc (temp, GEN_INT (d - 1));
3757   emit_label (label);
3758   return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3759 }
3760 \f
3761 /* Emit the code to divide OP0 by OP1, putting the result in TARGET
3762    if that is convenient, and returning where the result is.
3763    You may request either the quotient or the remainder as the result;
3764    specify REM_FLAG nonzero to get the remainder.
3765
3766    CODE is the expression code for which kind of division this is;
3767    it controls how rounding is done.  MODE is the machine mode to use.
3768    UNSIGNEDP nonzero means do unsigned division.  */
3769
3770 /* ??? For CEIL_MOD_EXPR, can compute incorrect remainder with ANDI
3771    and then correct it by or'ing in missing high bits
3772    if result of ANDI is nonzero.
3773    For ROUND_MOD_EXPR, can use ANDI and then sign-extend the result.
3774    This could optimize to a bfexts instruction.
3775    But C doesn't use these operations, so their optimizations are
3776    left for later.  */
3777 /* ??? For modulo, we don't actually need the highpart of the first product,
3778    the low part will do nicely.  And for small divisors, the second multiply
3779    can also be a low-part only multiply or even be completely left out.
3780    E.g. to calculate the remainder of a division by 3 with a 32 bit
3781    multiply, multiply with 0x55555556 and extract the upper two bits;
3782    the result is exact for inputs up to 0x1fffffff.
3783    The input range can be reduced by using cross-sum rules.
3784    For odd divisors >= 3, the following table gives right shift counts
3785    so that if a number is shifted by an integer multiple of the given
3786    amount, the remainder stays the same:
3787    2, 4, 3, 6, 10, 12, 4, 8, 18, 6, 11, 20, 18, 0, 5, 10, 12, 0, 12, 20,
3788    14, 12, 23, 21, 8, 0, 20, 18, 0, 0, 6, 12, 0, 22, 0, 18, 20, 30, 0, 0,
3789    0, 8, 0, 11, 12, 10, 36, 0, 30, 0, 0, 12, 0, 0, 0, 0, 44, 12, 24, 0,
3790    20, 0, 7, 14, 0, 18, 36, 0, 0, 46, 60, 0, 42, 0, 15, 24, 20, 0, 0, 33,
3791    0, 20, 0, 0, 18, 0, 60, 0, 0, 0, 0, 0, 40, 18, 0, 0, 12
3792
3793    Cross-sum rules for even numbers can be derived by leaving as many bits
3794    to the right alone as the divisor has zeros to the right.
3795    E.g. if x is an unsigned 32 bit number:
3796    (x mod 12) == (((x & 1023) + ((x >> 8) & ~3)) * 0x15555558 >> 2 * 3) >> 28
3797    */
3798
3799 rtx
3800 expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode,
3801                rtx op0, rtx op1, rtx target, int unsignedp)
3802 {
3803   enum machine_mode compute_mode;
3804   rtx tquotient;
3805   rtx quotient = 0, remainder = 0;
3806   rtx last;
3807   int size;
3808   rtx insn, set;
3809   optab optab1, optab2;
3810   int op1_is_constant, op1_is_pow2 = 0;
3811   int max_cost, extra_cost;
3812   static HOST_WIDE_INT last_div_const = 0;
3813   static HOST_WIDE_INT ext_op1;
3814   bool speed = optimize_insn_for_speed_p ();
3815
3816   op1_is_constant = CONST_INT_P (op1);
3817   if (op1_is_constant)
3818     {
3819       ext_op1 = INTVAL (op1);
3820       if (unsignedp)
3821         ext_op1 &= GET_MODE_MASK (mode);
3822       op1_is_pow2 = ((EXACT_POWER_OF_2_OR_ZERO_P (ext_op1)
3823                      || (! unsignedp && EXACT_POWER_OF_2_OR_ZERO_P (-ext_op1))));
3824     }
3825
3826   /*
3827      This is the structure of expand_divmod:
3828
3829      First comes code to fix up the operands so we can perform the operations
3830      correctly and efficiently.
3831
3832      Second comes a switch statement with code specific for each rounding mode.
3833      For some special operands this code emits all RTL for the desired
3834      operation, for other cases, it generates only a quotient and stores it in
3835      QUOTIENT.  The case for trunc division/remainder might leave quotient = 0,
3836      to indicate that it has not done anything.
3837
3838      Last comes code that finishes the operation.  If QUOTIENT is set and
3839      REM_FLAG is set, the remainder is computed as OP0 - QUOTIENT * OP1.  If
3840      QUOTIENT is not set, it is computed using trunc rounding.
3841
3842      We try to generate special code for division and remainder when OP1 is a
3843      constant.  If |OP1| = 2**n we can use shifts and some other fast
3844      operations.  For other values of OP1, we compute a carefully selected
3845      fixed-point approximation m = 1/OP1, and generate code that multiplies OP0
3846      by m.
3847
3848      In all cases but EXACT_DIV_EXPR, this multiplication requires the upper
3849      half of the product.  Different strategies for generating the product are
3850      implemented in expand_mult_highpart.
3851
3852      If what we actually want is the remainder, we generate that by another
3853      by-constant multiplication and a subtraction.  */
3854
3855   /* We shouldn't be called with OP1 == const1_rtx, but some of the
3856      code below will malfunction if we are, so check here and handle
3857      the special case if so.  */
3858   if (op1 == const1_rtx)
3859     return rem_flag ? const0_rtx : op0;
3860
3861     /* When dividing by -1, we could get an overflow.
3862      negv_optab can handle overflows.  */
3863   if (! unsignedp && op1 == constm1_rtx)
3864     {
3865       if (rem_flag)
3866         return const0_rtx;
3867       return expand_unop (mode, flag_trapv && GET_MODE_CLASS(mode) == MODE_INT
3868                           ? negv_optab : neg_optab, op0, target, 0);
3869     }
3870
3871   if (target
3872       /* Don't use the function value register as a target
3873          since we have to read it as well as write it,
3874          and function-inlining gets confused by this.  */
3875       && ((REG_P (target) && REG_FUNCTION_VALUE_P (target))
3876           /* Don't clobber an operand while doing a multi-step calculation.  */
3877           || ((rem_flag || op1_is_constant)
3878               && (reg_mentioned_p (target, op0)
3879                   || (MEM_P (op0) && MEM_P (target))))
3880           || reg_mentioned_p (target, op1)
3881           || (MEM_P (op1) && MEM_P (target))))
3882     target = 0;
3883
3884   /* Get the mode in which to perform this computation.  Normally it will
3885      be MODE, but sometimes we can't do the desired operation in MODE.
3886      If so, pick a wider mode in which we can do the operation.  Convert
3887      to that mode at the start to avoid repeated conversions.
3888
3889      First see what operations we need.  These depend on the expression
3890      we are evaluating.  (We assume that divxx3 insns exist under the
3891      same conditions that modxx3 insns and that these insns don't normally
3892      fail.  If these assumptions are not correct, we may generate less
3893      efficient code in some cases.)
3894
3895      Then see if we find a mode in which we can open-code that operation
3896      (either a division, modulus, or shift).  Finally, check for the smallest
3897      mode for which we can do the operation with a library call.  */
3898
3899   /* We might want to refine this now that we have division-by-constant
3900      optimization.  Since expand_mult_highpart tries so many variants, it is
3901      not straightforward to generalize this.  Maybe we should make an array
3902      of possible modes in init_expmed?  Save this for GCC 2.7.  */
3903
3904   optab1 = ((op1_is_pow2 && op1 != const0_rtx)
3905             ? (unsignedp ? lshr_optab : ashr_optab)
3906             : (unsignedp ? udiv_optab : sdiv_optab));
3907   optab2 = ((op1_is_pow2 && op1 != const0_rtx)
3908             ? optab1
3909             : (unsignedp ? udivmod_optab : sdivmod_optab));
3910
3911   for (compute_mode = mode; compute_mode != VOIDmode;
3912        compute_mode = GET_MODE_WIDER_MODE (compute_mode))
3913     if (optab_handler (optab1, compute_mode) != CODE_FOR_nothing
3914         || optab_handler (optab2, compute_mode) != CODE_FOR_nothing)
3915       break;
3916
3917   if (compute_mode == VOIDmode)
3918     for (compute_mode = mode; compute_mode != VOIDmode;
3919          compute_mode = GET_MODE_WIDER_MODE (compute_mode))
3920       if (optab_libfunc (optab1, compute_mode)
3921           || optab_libfunc (optab2, compute_mode))
3922         break;
3923
3924   /* If we still couldn't find a mode, use MODE, but expand_binop will
3925      probably die.  */
3926   if (compute_mode == VOIDmode)
3927     compute_mode = mode;
3928
3929   if (target && GET_MODE (target) == compute_mode)
3930     tquotient = target;
3931   else
3932     tquotient = gen_reg_rtx (compute_mode);
3933
3934   size = GET_MODE_BITSIZE (compute_mode);
3935 #if 0
3936   /* It should be possible to restrict the precision to GET_MODE_BITSIZE
3937      (mode), and thereby get better code when OP1 is a constant.  Do that
3938      later.  It will require going over all usages of SIZE below.  */
3939   size = GET_MODE_BITSIZE (mode);
3940 #endif
3941
3942   /* Only deduct something for a REM if the last divide done was
3943      for a different constant.   Then set the constant of the last
3944      divide.  */
3945   max_cost = unsignedp ? udiv_cost[speed][compute_mode] : sdiv_cost[speed][compute_mode];
3946   if (rem_flag && ! (last_div_const != 0 && op1_is_constant
3947                      && INTVAL (op1) == last_div_const))
3948     max_cost -= mul_cost[speed][compute_mode] + add_cost[speed][compute_mode];
3949
3950   last_div_const = ! rem_flag && op1_is_constant ? INTVAL (op1) : 0;
3951
3952   /* Now convert to the best mode to use.  */
3953   if (compute_mode != mode)
3954     {
3955       op0 = convert_modes (compute_mode, mode, op0, unsignedp);
3956       op1 = convert_modes (compute_mode, mode, op1, unsignedp);
3957
3958       /* convert_modes may have placed op1 into a register, so we
3959          must recompute the following.  */
3960       op1_is_constant = CONST_INT_P (op1);
3961       op1_is_pow2 = (op1_is_constant
3962                      && ((EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
3963                           || (! unsignedp
3964                               && EXACT_POWER_OF_2_OR_ZERO_P (-INTVAL (op1)))))) ;
3965     }
3966
3967   /* If one of the operands is a volatile MEM, copy it into a register.  */
3968
3969   if (MEM_P (op0) && MEM_VOLATILE_P (op0))
3970     op0 = force_reg (compute_mode, op0);
3971   if (MEM_P (op1) && MEM_VOLATILE_P (op1))
3972     op1 = force_reg (compute_mode, op1);
3973
3974   /* If we need the remainder or if OP1 is constant, we need to
3975      put OP0 in a register in case it has any queued subexpressions.  */
3976   if (rem_flag || op1_is_constant)
3977     op0 = force_reg (compute_mode, op0);
3978
3979   last = get_last_insn ();
3980
3981   /* Promote floor rounding to trunc rounding for unsigned operations.  */
3982   if (unsignedp)
3983     {
3984       if (code == FLOOR_DIV_EXPR)
3985         code = TRUNC_DIV_EXPR;
3986       if (code == FLOOR_MOD_EXPR)
3987         code = TRUNC_MOD_EXPR;
3988       if (code == EXACT_DIV_EXPR && op1_is_pow2)
3989         code = TRUNC_DIV_EXPR;
3990     }
3991
3992   if (op1 != const0_rtx)
3993     switch (code)
3994       {
3995       case TRUNC_MOD_EXPR:
3996       case TRUNC_DIV_EXPR:
3997         if (op1_is_constant)
3998           {
3999             if (unsignedp)
4000               {
4001                 unsigned HOST_WIDE_INT mh;
4002                 int pre_shift, post_shift;
4003                 int dummy;
4004                 rtx ml;
4005                 unsigned HOST_WIDE_INT d = (INTVAL (op1)
4006                                             & GET_MODE_MASK (compute_mode));
4007
4008                 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4009                   {
4010                     pre_shift = floor_log2 (d);
4011                     if (rem_flag)
4012                       {
4013                         remainder
4014                           = expand_binop (compute_mode, and_optab, op0,
4015                                           GEN_INT (((HOST_WIDE_INT) 1 << pre_shift) - 1),
4016                                           remainder, 1,
4017                                           OPTAB_LIB_WIDEN);
4018                         if (remainder)
4019                           return gen_lowpart (mode, remainder);
4020                       }
4021                     quotient = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4022                                              pre_shift, tquotient, 1);
4023                   }
4024                 else if (size <= HOST_BITS_PER_WIDE_INT)
4025                   {
4026                     if (d >= ((unsigned HOST_WIDE_INT) 1 << (size - 1)))
4027                       {
4028                         /* Most significant bit of divisor is set; emit an scc
4029                            insn.  */
4030                         quotient = emit_store_flag_force (tquotient, GEU, op0, op1,
4031                                                           compute_mode, 1, 1);
4032                       }
4033                     else
4034                       {
4035                         /* Find a suitable multiplier and right shift count
4036                            instead of multiplying with D.  */
4037
4038                         mh = choose_multiplier (d, size, size,
4039                                                 &ml, &post_shift, &dummy);
4040
4041                         /* If the suggested multiplier is more than SIZE bits,
4042                            we can do better for even divisors, using an
4043                            initial right shift.  */
4044                         if (mh != 0 && (d & 1) == 0)
4045                           {
4046                             pre_shift = floor_log2 (d & -d);
4047                             mh = choose_multiplier (d >> pre_shift, size,
4048                                                     size - pre_shift,
4049                                                     &ml, &post_shift, &dummy);
4050                             gcc_assert (!mh);
4051                           }
4052                         else
4053                           pre_shift = 0;
4054
4055                         if (mh != 0)
4056                           {
4057                             rtx t1, t2, t3, t4;
4058
4059                             if (post_shift - 1 >= BITS_PER_WORD)
4060                               goto fail1;
4061
4062                             extra_cost
4063                               = (shift_cost[speed][compute_mode][post_shift - 1]
4064                                  + shift_cost[speed][compute_mode][1]
4065                                  + 2 * add_cost[speed][compute_mode]);
4066                             t1 = expand_mult_highpart (compute_mode, op0, ml,
4067                                                        NULL_RTX, 1,
4068                                                        max_cost - extra_cost);
4069                             if (t1 == 0)
4070                               goto fail1;
4071                             t2 = force_operand (gen_rtx_MINUS (compute_mode,
4072                                                                op0, t1),
4073                                                 NULL_RTX);
4074                             t3 = expand_shift (RSHIFT_EXPR, compute_mode,
4075                                                t2, 1, NULL_RTX, 1);
4076                             t4 = force_operand (gen_rtx_PLUS (compute_mode,
4077                                                               t1, t3),
4078                                                 NULL_RTX);
4079                             quotient = expand_shift
4080                               (RSHIFT_EXPR, compute_mode, t4,
4081                                post_shift - 1, tquotient, 1);
4082                           }
4083                         else
4084                           {
4085                             rtx t1, t2;
4086
4087                             if (pre_shift >= BITS_PER_WORD
4088                                 || post_shift >= BITS_PER_WORD)
4089                               goto fail1;
4090
4091                             t1 = expand_shift
4092                               (RSHIFT_EXPR, compute_mode, op0,
4093                                pre_shift, NULL_RTX, 1);
4094                             extra_cost
4095                               = (shift_cost[speed][compute_mode][pre_shift]
4096                                  + shift_cost[speed][compute_mode][post_shift]);
4097                             t2 = expand_mult_highpart (compute_mode, t1, ml,
4098                                                        NULL_RTX, 1,
4099                                                        max_cost - extra_cost);
4100                             if (t2 == 0)
4101                               goto fail1;
4102                             quotient = expand_shift
4103                               (RSHIFT_EXPR, compute_mode, t2,
4104                                post_shift, tquotient, 1);
4105                           }
4106                       }
4107                   }
4108                 else            /* Too wide mode to use tricky code */
4109                   break;
4110
4111                 insn = get_last_insn ();
4112                 if (insn != last
4113                     && (set = single_set (insn)) != 0
4114                     && SET_DEST (set) == quotient)
4115                   set_unique_reg_note (insn,
4116                                        REG_EQUAL,
4117                                        gen_rtx_UDIV (compute_mode, op0, op1));
4118               }
4119             else                /* TRUNC_DIV, signed */
4120               {
4121                 unsigned HOST_WIDE_INT ml;
4122                 int lgup, post_shift;
4123                 rtx mlr;
4124                 HOST_WIDE_INT d = INTVAL (op1);
4125                 unsigned HOST_WIDE_INT abs_d;
4126
4127                 /* Since d might be INT_MIN, we have to cast to
4128                    unsigned HOST_WIDE_INT before negating to avoid
4129                    undefined signed overflow.  */
4130                 abs_d = (d >= 0
4131                          ? (unsigned HOST_WIDE_INT) d
4132                          : - (unsigned HOST_WIDE_INT) d);
4133
4134                 /* n rem d = n rem -d */
4135                 if (rem_flag && d < 0)
4136                   {
4137                     d = abs_d;
4138                     op1 = gen_int_mode (abs_d, compute_mode);
4139                   }
4140
4141                 if (d == 1)
4142                   quotient = op0;
4143                 else if (d == -1)
4144                   quotient = expand_unop (compute_mode, neg_optab, op0,
4145                                           tquotient, 0);
4146                 else if (HOST_BITS_PER_WIDE_INT >= size
4147                          && abs_d == (unsigned HOST_WIDE_INT) 1 << (size - 1))
4148                   {
4149                     /* This case is not handled correctly below.  */
4150                     quotient = emit_store_flag (tquotient, EQ, op0, op1,
4151                                                 compute_mode, 1, 1);
4152                     if (quotient == 0)
4153                       goto fail1;
4154                   }
4155                 else if (EXACT_POWER_OF_2_OR_ZERO_P (d)
4156                          && (rem_flag ? smod_pow2_cheap[speed][compute_mode]
4157                                       : sdiv_pow2_cheap[speed][compute_mode])
4158                          /* We assume that cheap metric is true if the
4159                             optab has an expander for this mode.  */
4160                          && ((optab_handler ((rem_flag ? smod_optab
4161                                               : sdiv_optab),
4162                                              compute_mode)
4163                               != CODE_FOR_nothing)
4164                              || (optab_handler (sdivmod_optab,
4165                                                 compute_mode)
4166                                  != CODE_FOR_nothing)))
4167                   ;
4168                 else if (EXACT_POWER_OF_2_OR_ZERO_P (abs_d))
4169                   {
4170                     if (rem_flag)
4171                       {
4172                         remainder = expand_smod_pow2 (compute_mode, op0, d);
4173                         if (remainder)
4174                           return gen_lowpart (mode, remainder);
4175                       }
4176
4177                     if (sdiv_pow2_cheap[speed][compute_mode]
4178                         && ((optab_handler (sdiv_optab, compute_mode)
4179                              != CODE_FOR_nothing)
4180                             || (optab_handler (sdivmod_optab, compute_mode)
4181                                 != CODE_FOR_nothing)))
4182                       quotient = expand_divmod (0, TRUNC_DIV_EXPR,
4183                                                 compute_mode, op0,
4184                                                 gen_int_mode (abs_d,
4185                                                               compute_mode),
4186                                                 NULL_RTX, 0);
4187                     else
4188                       quotient = expand_sdiv_pow2 (compute_mode, op0, abs_d);
4189
4190                     /* We have computed OP0 / abs(OP1).  If OP1 is negative,
4191                        negate the quotient.  */
4192                     if (d < 0)
4193                       {
4194                         insn = get_last_insn ();
4195                         if (insn != last
4196                             && (set = single_set (insn)) != 0
4197                             && SET_DEST (set) == quotient
4198                             && abs_d < ((unsigned HOST_WIDE_INT) 1
4199                                         << (HOST_BITS_PER_WIDE_INT - 1)))
4200                           set_unique_reg_note (insn,
4201                                                REG_EQUAL,
4202                                                gen_rtx_DIV (compute_mode,
4203                                                             op0,
4204                                                             GEN_INT
4205                                                             (trunc_int_for_mode
4206                                                              (abs_d,
4207                                                               compute_mode))));
4208
4209                         quotient = expand_unop (compute_mode, neg_optab,
4210                                                 quotient, quotient, 0);
4211                       }
4212                   }
4213                 else if (size <= HOST_BITS_PER_WIDE_INT)
4214                   {
4215                     choose_multiplier (abs_d, size, size - 1,
4216                                        &mlr, &post_shift, &lgup);
4217                     ml = (unsigned HOST_WIDE_INT) INTVAL (mlr);
4218                     if (ml < (unsigned HOST_WIDE_INT) 1 << (size - 1))
4219                       {
4220                         rtx t1, t2, t3;
4221
4222                         if (post_shift >= BITS_PER_WORD
4223                             || size - 1 >= BITS_PER_WORD)
4224                           goto fail1;
4225
4226                         extra_cost = (shift_cost[speed][compute_mode][post_shift]
4227                                       + shift_cost[speed][compute_mode][size - 1]
4228                                       + add_cost[speed][compute_mode]);
4229                         t1 = expand_mult_highpart (compute_mode, op0, mlr,
4230                                                    NULL_RTX, 0,
4231                                                    max_cost - extra_cost);
4232                         if (t1 == 0)
4233                           goto fail1;
4234                         t2 = expand_shift
4235                           (RSHIFT_EXPR, compute_mode, t1,
4236                            post_shift, NULL_RTX, 0);
4237                         t3 = expand_shift
4238                           (RSHIFT_EXPR, compute_mode, op0,
4239                            size - 1, NULL_RTX, 0);
4240                         if (d < 0)
4241                           quotient
4242                             = force_operand (gen_rtx_MINUS (compute_mode,
4243                                                             t3, t2),
4244                                              tquotient);
4245                         else
4246                           quotient
4247                             = force_operand (gen_rtx_MINUS (compute_mode,
4248                                                             t2, t3),
4249                                              tquotient);
4250                       }
4251                     else
4252                       {
4253                         rtx t1, t2, t3, t4;
4254
4255                         if (post_shift >= BITS_PER_WORD
4256                             || size - 1 >= BITS_PER_WORD)
4257                           goto fail1;
4258
4259                         ml |= (~(unsigned HOST_WIDE_INT) 0) << (size - 1);
4260                         mlr = gen_int_mode (ml, compute_mode);
4261                         extra_cost = (shift_cost[speed][compute_mode][post_shift]
4262                                       + shift_cost[speed][compute_mode][size - 1]
4263                                       + 2 * add_cost[speed][compute_mode]);
4264                         t1 = expand_mult_highpart (compute_mode, op0, mlr,
4265                                                    NULL_RTX, 0,
4266                                                    max_cost - extra_cost);
4267                         if (t1 == 0)
4268                           goto fail1;
4269                         t2 = force_operand (gen_rtx_PLUS (compute_mode,
4270                                                           t1, op0),
4271                                             NULL_RTX);
4272                         t3 = expand_shift
4273                           (RSHIFT_EXPR, compute_mode, t2,
4274                            post_shift, NULL_RTX, 0);
4275                         t4 = expand_shift
4276                           (RSHIFT_EXPR, compute_mode, op0,
4277                            size - 1, NULL_RTX, 0);
4278                         if (d < 0)
4279                           quotient
4280                             = force_operand (gen_rtx_MINUS (compute_mode,
4281                                                             t4, t3),
4282                                              tquotient);
4283                         else
4284                           quotient
4285                             = force_operand (gen_rtx_MINUS (compute_mode,
4286                                                             t3, t4),
4287                                              tquotient);
4288                       }
4289                   }
4290                 else            /* Too wide mode to use tricky code */
4291                   break;
4292
4293                 insn = get_last_insn ();
4294                 if (insn != last
4295                     && (set = single_set (insn)) != 0
4296                     && SET_DEST (set) == quotient)
4297                   set_unique_reg_note (insn,
4298                                        REG_EQUAL,
4299                                        gen_rtx_DIV (compute_mode, op0, op1));
4300               }
4301             break;
4302           }
4303       fail1:
4304         delete_insns_since (last);
4305         break;
4306
4307       case FLOOR_DIV_EXPR:
4308       case FLOOR_MOD_EXPR:
4309       /* We will come here only for signed operations.  */
4310         if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4311           {
4312             unsigned HOST_WIDE_INT mh;
4313             int pre_shift, lgup, post_shift;
4314             HOST_WIDE_INT d = INTVAL (op1);
4315             rtx ml;
4316
4317             if (d > 0)
4318               {
4319                 /* We could just as easily deal with negative constants here,
4320                    but it does not seem worth the trouble for GCC 2.6.  */
4321                 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4322                   {
4323                     pre_shift = floor_log2 (d);
4324                     if (rem_flag)
4325                       {
4326                         remainder = expand_binop (compute_mode, and_optab, op0,
4327                                                   GEN_INT (((HOST_WIDE_INT) 1 << pre_shift) - 1),
4328                                                   remainder, 0, OPTAB_LIB_WIDEN);
4329                         if (remainder)
4330                           return gen_lowpart (mode, remainder);
4331                       }
4332                     quotient = expand_shift
4333                       (RSHIFT_EXPR, compute_mode, op0,
4334                        pre_shift, tquotient, 0);
4335                   }
4336                 else
4337                   {
4338                     rtx t1, t2, t3, t4;
4339
4340                     mh = choose_multiplier (d, size, size - 1,
4341                                             &ml, &post_shift, &lgup);
4342                     gcc_assert (!mh);
4343
4344                     if (post_shift < BITS_PER_WORD
4345                         && size - 1 < BITS_PER_WORD)
4346                       {
4347                         t1 = expand_shift
4348                           (RSHIFT_EXPR, compute_mode, op0,
4349                            size - 1, NULL_RTX, 0);
4350                         t2 = expand_binop (compute_mode, xor_optab, op0, t1,
4351                                            NULL_RTX, 0, OPTAB_WIDEN);
4352                         extra_cost = (shift_cost[speed][compute_mode][post_shift]
4353                                       + shift_cost[speed][compute_mode][size - 1]
4354                                       + 2 * add_cost[speed][compute_mode]);
4355                         t3 = expand_mult_highpart (compute_mode, t2, ml,
4356                                                    NULL_RTX, 1,
4357                                                    max_cost - extra_cost);
4358                         if (t3 != 0)
4359                           {
4360                             t4 = expand_shift
4361                               (RSHIFT_EXPR, compute_mode, t3,
4362                                post_shift, NULL_RTX, 1);
4363                             quotient = expand_binop (compute_mode, xor_optab,
4364                                                      t4, t1, tquotient, 0,
4365                                                      OPTAB_WIDEN);
4366                           }
4367                       }
4368                   }
4369               }
4370             else
4371               {
4372                 rtx nsign, t1, t2, t3, t4;
4373                 t1 = force_operand (gen_rtx_PLUS (compute_mode,
4374                                                   op0, constm1_rtx), NULL_RTX);
4375                 t2 = expand_binop (compute_mode, ior_optab, op0, t1, NULL_RTX,
4376                                    0, OPTAB_WIDEN);
4377                 nsign = expand_shift
4378                   (RSHIFT_EXPR, compute_mode, t2,
4379                    size - 1, NULL_RTX, 0);
4380                 t3 = force_operand (gen_rtx_MINUS (compute_mode, t1, nsign),
4381                                     NULL_RTX);
4382                 t4 = expand_divmod (0, TRUNC_DIV_EXPR, compute_mode, t3, op1,
4383                                     NULL_RTX, 0);
4384                 if (t4)
4385                   {
4386                     rtx t5;
4387                     t5 = expand_unop (compute_mode, one_cmpl_optab, nsign,
4388                                       NULL_RTX, 0);
4389                     quotient = force_operand (gen_rtx_PLUS (compute_mode,
4390                                                             t4, t5),
4391                                               tquotient);
4392                   }
4393               }
4394           }
4395
4396         if (quotient != 0)
4397           break;
4398         delete_insns_since (last);
4399
4400         /* Try using an instruction that produces both the quotient and
4401            remainder, using truncation.  We can easily compensate the quotient
4402            or remainder to get floor rounding, once we have the remainder.
4403            Notice that we compute also the final remainder value here,
4404            and return the result right away.  */
4405         if (target == 0 || GET_MODE (target) != compute_mode)
4406           target = gen_reg_rtx (compute_mode);
4407
4408         if (rem_flag)
4409           {
4410             remainder
4411               = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4412             quotient = gen_reg_rtx (compute_mode);
4413           }
4414         else
4415           {
4416             quotient
4417               = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4418             remainder = gen_reg_rtx (compute_mode);
4419           }
4420
4421         if (expand_twoval_binop (sdivmod_optab, op0, op1,
4422                                  quotient, remainder, 0))
4423           {
4424             /* This could be computed with a branch-less sequence.
4425                Save that for later.  */
4426             rtx tem;
4427             rtx label = gen_label_rtx ();
4428             do_cmp_and_jump (remainder, const0_rtx, EQ, compute_mode, label);
4429             tem = expand_binop (compute_mode, xor_optab, op0, op1,
4430                                 NULL_RTX, 0, OPTAB_WIDEN);
4431             do_cmp_and_jump (tem, const0_rtx, GE, compute_mode, label);
4432             expand_dec (quotient, const1_rtx);
4433             expand_inc (remainder, op1);
4434             emit_label (label);
4435             return gen_lowpart (mode, rem_flag ? remainder : quotient);
4436           }
4437
4438         /* No luck with division elimination or divmod.  Have to do it
4439            by conditionally adjusting op0 *and* the result.  */
4440         {
4441           rtx label1, label2, label3, label4, label5;
4442           rtx adjusted_op0;
4443           rtx tem;
4444
4445           quotient = gen_reg_rtx (compute_mode);
4446           adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4447           label1 = gen_label_rtx ();
4448           label2 = gen_label_rtx ();
4449           label3 = gen_label_rtx ();
4450           label4 = gen_label_rtx ();
4451           label5 = gen_label_rtx ();
4452           do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4453           do_cmp_and_jump (adjusted_op0, const0_rtx, LT, compute_mode, label1);
4454           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4455                               quotient, 0, OPTAB_LIB_WIDEN);
4456           if (tem != quotient)
4457             emit_move_insn (quotient, tem);
4458           emit_jump_insn (gen_jump (label5));
4459           emit_barrier ();
4460           emit_label (label1);
4461           expand_inc (adjusted_op0, const1_rtx);
4462           emit_jump_insn (gen_jump (label4));
4463           emit_barrier ();
4464           emit_label (label2);
4465           do_cmp_and_jump (adjusted_op0, const0_rtx, GT, compute_mode, label3);
4466           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4467                               quotient, 0, OPTAB_LIB_WIDEN);
4468           if (tem != quotient)
4469             emit_move_insn (quotient, tem);
4470           emit_jump_insn (gen_jump (label5));
4471           emit_barrier ();
4472           emit_label (label3);
4473           expand_dec (adjusted_op0, const1_rtx);
4474           emit_label (label4);
4475           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4476                               quotient, 0, OPTAB_LIB_WIDEN);
4477           if (tem != quotient)
4478             emit_move_insn (quotient, tem);
4479           expand_dec (quotient, const1_rtx);
4480           emit_label (label5);
4481         }
4482         break;
4483
4484       case CEIL_DIV_EXPR:
4485       case CEIL_MOD_EXPR:
4486         if (unsignedp)
4487           {
4488             if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1)))
4489               {
4490                 rtx t1, t2, t3;
4491                 unsigned HOST_WIDE_INT d = INTVAL (op1);
4492                 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4493                                    floor_log2 (d), tquotient, 1);
4494                 t2 = expand_binop (compute_mode, and_optab, op0,
4495                                    GEN_INT (d - 1),
4496                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4497                 t3 = gen_reg_rtx (compute_mode);
4498                 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4499                                       compute_mode, 1, 1);
4500                 if (t3 == 0)
4501                   {
4502                     rtx lab;
4503                     lab = gen_label_rtx ();
4504                     do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4505                     expand_inc (t1, const1_rtx);
4506                     emit_label (lab);
4507                     quotient = t1;
4508                   }
4509                 else
4510                   quotient = force_operand (gen_rtx_PLUS (compute_mode,
4511                                                           t1, t3),
4512                                             tquotient);
4513                 break;
4514               }
4515
4516             /* Try using an instruction that produces both the quotient and
4517                remainder, using truncation.  We can easily compensate the
4518                quotient or remainder to get ceiling rounding, once we have the
4519                remainder.  Notice that we compute also the final remainder
4520                value here, and return the result right away.  */
4521             if (target == 0 || GET_MODE (target) != compute_mode)
4522               target = gen_reg_rtx (compute_mode);
4523
4524             if (rem_flag)
4525               {
4526                 remainder = (REG_P (target)
4527                              ? target : gen_reg_rtx (compute_mode));
4528                 quotient = gen_reg_rtx (compute_mode);
4529               }
4530             else
4531               {
4532                 quotient = (REG_P (target)
4533                             ? target : gen_reg_rtx (compute_mode));
4534                 remainder = gen_reg_rtx (compute_mode);
4535               }
4536
4537             if (expand_twoval_binop (udivmod_optab, op0, op1, quotient,
4538                                      remainder, 1))
4539               {
4540                 /* This could be computed with a branch-less sequence.
4541                    Save that for later.  */
4542                 rtx label = gen_label_rtx ();
4543                 do_cmp_and_jump (remainder, const0_rtx, EQ,
4544                                  compute_mode, label);
4545                 expand_inc (quotient, const1_rtx);
4546                 expand_dec (remainder, op1);
4547                 emit_label (label);
4548                 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4549               }
4550
4551             /* No luck with division elimination or divmod.  Have to do it
4552                by conditionally adjusting op0 *and* the result.  */
4553             {
4554               rtx label1, label2;
4555               rtx adjusted_op0, tem;
4556
4557               quotient = gen_reg_rtx (compute_mode);
4558               adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4559               label1 = gen_label_rtx ();
4560               label2 = gen_label_rtx ();
4561               do_cmp_and_jump (adjusted_op0, const0_rtx, NE,
4562                                compute_mode, label1);
4563               emit_move_insn  (quotient, const0_rtx);
4564               emit_jump_insn (gen_jump (label2));
4565               emit_barrier ();
4566               emit_label (label1);
4567               expand_dec (adjusted_op0, const1_rtx);
4568               tem = expand_binop (compute_mode, udiv_optab, adjusted_op0, op1,
4569                                   quotient, 1, OPTAB_LIB_WIDEN);
4570               if (tem != quotient)
4571                 emit_move_insn (quotient, tem);
4572               expand_inc (quotient, const1_rtx);
4573               emit_label (label2);
4574             }
4575           }
4576         else /* signed */
4577           {
4578             if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4579                 && INTVAL (op1) >= 0)
4580               {
4581                 /* This is extremely similar to the code for the unsigned case
4582                    above.  For 2.7 we should merge these variants, but for
4583                    2.6.1 I don't want to touch the code for unsigned since that
4584                    get used in C.  The signed case will only be used by other
4585                    languages (Ada).  */
4586
4587                 rtx t1, t2, t3;
4588                 unsigned HOST_WIDE_INT d = INTVAL (op1);
4589                 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4590                                    floor_log2 (d), tquotient, 0);
4591                 t2 = expand_binop (compute_mode, and_optab, op0,
4592                                    GEN_INT (d - 1),
4593                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4594                 t3 = gen_reg_rtx (compute_mode);
4595                 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4596                                       compute_mode, 1, 1);
4597                 if (t3 == 0)
4598                   {
4599                     rtx lab;
4600                     lab = gen_label_rtx ();
4601                     do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4602                     expand_inc (t1, const1_rtx);
4603                     emit_label (lab);
4604                     quotient = t1;
4605                   }
4606                 else
4607                   quotient = force_operand (gen_rtx_PLUS (compute_mode,
4608                                                           t1, t3),
4609                                             tquotient);
4610                 break;
4611               }
4612
4613             /* Try using an instruction that produces both the quotient and
4614                remainder, using truncation.  We can easily compensate the
4615                quotient or remainder to get ceiling rounding, once we have the
4616                remainder.  Notice that we compute also the final remainder
4617                value here, and return the result right away.  */
4618             if (target == 0 || GET_MODE (target) != compute_mode)
4619               target = gen_reg_rtx (compute_mode);
4620             if (rem_flag)
4621               {
4622                 remainder= (REG_P (target)
4623                             ? target : gen_reg_rtx (compute_mode));
4624                 quotient = gen_reg_rtx (compute_mode);
4625               }
4626             else
4627               {
4628                 quotient = (REG_P (target)
4629                             ? target : gen_reg_rtx (compute_mode));
4630                 remainder = gen_reg_rtx (compute_mode);
4631               }
4632
4633             if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient,
4634                                      remainder, 0))
4635               {
4636                 /* This could be computed with a branch-less sequence.
4637                    Save that for later.  */
4638                 rtx tem;
4639                 rtx label = gen_label_rtx ();
4640                 do_cmp_and_jump (remainder, const0_rtx, EQ,
4641                                  compute_mode, label);
4642                 tem = expand_binop (compute_mode, xor_optab, op0, op1,
4643                                     NULL_RTX, 0, OPTAB_WIDEN);
4644                 do_cmp_and_jump (tem, const0_rtx, LT, compute_mode, label);
4645                 expand_inc (quotient, const1_rtx);
4646                 expand_dec (remainder, op1);
4647                 emit_label (label);
4648                 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4649               }
4650
4651             /* No luck with division elimination or divmod.  Have to do it
4652                by conditionally adjusting op0 *and* the result.  */
4653             {
4654               rtx label1, label2, label3, label4, label5;
4655               rtx adjusted_op0;
4656               rtx tem;
4657
4658               quotient = gen_reg_rtx (compute_mode);
4659               adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4660               label1 = gen_label_rtx ();
4661               label2 = gen_label_rtx ();
4662               label3 = gen_label_rtx ();
4663               label4 = gen_label_rtx ();
4664               label5 = gen_label_rtx ();
4665               do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4666               do_cmp_and_jump (adjusted_op0, const0_rtx, GT,
4667                                compute_mode, label1);
4668               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4669                                   quotient, 0, OPTAB_LIB_WIDEN);
4670               if (tem != quotient)
4671                 emit_move_insn (quotient, tem);
4672               emit_jump_insn (gen_jump (label5));
4673               emit_barrier ();
4674               emit_label (label1);
4675               expand_dec (adjusted_op0, const1_rtx);
4676               emit_jump_insn (gen_jump (label4));
4677               emit_barrier ();
4678               emit_label (label2);
4679               do_cmp_and_jump (adjusted_op0, const0_rtx, LT,
4680                                compute_mode, label3);
4681               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4682                                   quotient, 0, OPTAB_LIB_WIDEN);
4683               if (tem != quotient)
4684                 emit_move_insn (quotient, tem);
4685               emit_jump_insn (gen_jump (label5));
4686               emit_barrier ();
4687               emit_label (label3);
4688               expand_inc (adjusted_op0, const1_rtx);
4689               emit_label (label4);
4690               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4691                                   quotient, 0, OPTAB_LIB_WIDEN);
4692               if (tem != quotient)
4693                 emit_move_insn (quotient, tem);
4694               expand_inc (quotient, const1_rtx);
4695               emit_label (label5);
4696             }
4697           }
4698         break;
4699
4700       case EXACT_DIV_EXPR:
4701         if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4702           {
4703             HOST_WIDE_INT d = INTVAL (op1);
4704             unsigned HOST_WIDE_INT ml;
4705             int pre_shift;
4706             rtx t1;
4707
4708             pre_shift = floor_log2 (d & -d);
4709             ml = invert_mod2n (d >> pre_shift, size);
4710             t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4711                                pre_shift, NULL_RTX, unsignedp);
4712             quotient = expand_mult (compute_mode, t1,
4713                                     gen_int_mode (ml, compute_mode),
4714                                     NULL_RTX, 1);
4715
4716             insn = get_last_insn ();
4717             set_unique_reg_note (insn,
4718                                  REG_EQUAL,
4719                                  gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
4720                                                  compute_mode,
4721                                                  op0, op1));
4722           }
4723         break;
4724
4725       case ROUND_DIV_EXPR:
4726       case ROUND_MOD_EXPR:
4727         if (unsignedp)
4728           {
4729             rtx tem;
4730             rtx label;
4731             label = gen_label_rtx ();
4732             quotient = gen_reg_rtx (compute_mode);
4733             remainder = gen_reg_rtx (compute_mode);
4734             if (expand_twoval_binop (udivmod_optab, op0, op1, quotient, remainder, 1) == 0)
4735               {
4736                 rtx tem;
4737                 quotient = expand_binop (compute_mode, udiv_optab, op0, op1,
4738                                          quotient, 1, OPTAB_LIB_WIDEN);
4739                 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 1);
4740                 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4741                                           remainder, 1, OPTAB_LIB_WIDEN);
4742               }
4743             tem = plus_constant (op1, -1);
4744             tem = expand_shift (RSHIFT_EXPR, compute_mode, tem, 1, NULL_RTX, 1);
4745             do_cmp_and_jump (remainder, tem, LEU, compute_mode, label);
4746             expand_inc (quotient, const1_rtx);
4747             expand_dec (remainder, op1);
4748             emit_label (label);
4749           }
4750         else
4751           {
4752             rtx abs_rem, abs_op1, tem, mask;
4753             rtx label;
4754             label = gen_label_rtx ();
4755             quotient = gen_reg_rtx (compute_mode);
4756             remainder = gen_reg_rtx (compute_mode);
4757             if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient, remainder, 0) == 0)
4758               {
4759                 rtx tem;
4760                 quotient = expand_binop (compute_mode, sdiv_optab, op0, op1,
4761                                          quotient, 0, OPTAB_LIB_WIDEN);
4762                 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 0);
4763                 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4764                                           remainder, 0, OPTAB_LIB_WIDEN);
4765               }
4766             abs_rem = expand_abs (compute_mode, remainder, NULL_RTX, 1, 0);
4767             abs_op1 = expand_abs (compute_mode, op1, NULL_RTX, 1, 0);
4768             tem = expand_shift (LSHIFT_EXPR, compute_mode, abs_rem,
4769                                 1, NULL_RTX, 1);
4770             do_cmp_and_jump (tem, abs_op1, LTU, compute_mode, label);
4771             tem = expand_binop (compute_mode, xor_optab, op0, op1,
4772                                 NULL_RTX, 0, OPTAB_WIDEN);
4773             mask = expand_shift (RSHIFT_EXPR, compute_mode, tem,
4774                                  size - 1, NULL_RTX, 0);
4775             tem = expand_binop (compute_mode, xor_optab, mask, const1_rtx,
4776                                 NULL_RTX, 0, OPTAB_WIDEN);
4777             tem = expand_binop (compute_mode, sub_optab, tem, mask,
4778                                 NULL_RTX, 0, OPTAB_WIDEN);
4779             expand_inc (quotient, tem);
4780             tem = expand_binop (compute_mode, xor_optab, mask, op1,
4781                                 NULL_RTX, 0, OPTAB_WIDEN);
4782             tem = expand_binop (compute_mode, sub_optab, tem, mask,
4783                                 NULL_RTX, 0, OPTAB_WIDEN);
4784             expand_dec (remainder, tem);
4785             emit_label (label);
4786           }
4787         return gen_lowpart (mode, rem_flag ? remainder : quotient);
4788
4789       default:
4790         gcc_unreachable ();
4791       }
4792
4793   if (quotient == 0)
4794     {
4795       if (target && GET_MODE (target) != compute_mode)
4796         target = 0;
4797
4798       if (rem_flag)
4799         {
4800           /* Try to produce the remainder without producing the quotient.
4801              If we seem to have a divmod pattern that does not require widening,
4802              don't try widening here.  We should really have a WIDEN argument
4803              to expand_twoval_binop, since what we'd really like to do here is
4804              1) try a mod insn in compute_mode
4805              2) try a divmod insn in compute_mode
4806              3) try a div insn in compute_mode and multiply-subtract to get
4807                 remainder
4808              4) try the same things with widening allowed.  */
4809           remainder
4810             = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4811                                  op0, op1, target,
4812                                  unsignedp,
4813                                  ((optab_handler (optab2, compute_mode)
4814                                    != CODE_FOR_nothing)
4815                                   ? OPTAB_DIRECT : OPTAB_WIDEN));
4816           if (remainder == 0)
4817             {
4818               /* No luck there.  Can we do remainder and divide at once
4819                  without a library call?  */
4820               remainder = gen_reg_rtx (compute_mode);
4821               if (! expand_twoval_binop ((unsignedp
4822                                           ? udivmod_optab
4823                                           : sdivmod_optab),
4824                                          op0, op1,
4825                                          NULL_RTX, remainder, unsignedp))
4826                 remainder = 0;
4827             }
4828
4829           if (remainder)
4830             return gen_lowpart (mode, remainder);
4831         }
4832
4833       /* Produce the quotient.  Try a quotient insn, but not a library call.
4834          If we have a divmod in this mode, use it in preference to widening
4835          the div (for this test we assume it will not fail). Note that optab2
4836          is set to the one of the two optabs that the call below will use.  */
4837       quotient
4838         = sign_expand_binop (compute_mode, udiv_optab, sdiv_optab,
4839                              op0, op1, rem_flag ? NULL_RTX : target,
4840                              unsignedp,
4841                              ((optab_handler (optab2, compute_mode)
4842                                != CODE_FOR_nothing)
4843                               ? OPTAB_DIRECT : OPTAB_WIDEN));
4844
4845       if (quotient == 0)
4846         {
4847           /* No luck there.  Try a quotient-and-remainder insn,
4848              keeping the quotient alone.  */
4849           quotient = gen_reg_rtx (compute_mode);
4850           if (! expand_twoval_binop (unsignedp ? udivmod_optab : sdivmod_optab,
4851                                      op0, op1,
4852                                      quotient, NULL_RTX, unsignedp))
4853             {
4854               quotient = 0;
4855               if (! rem_flag)
4856                 /* Still no luck.  If we are not computing the remainder,
4857                    use a library call for the quotient.  */
4858                 quotient = sign_expand_binop (compute_mode,
4859                                               udiv_optab, sdiv_optab,
4860                                               op0, op1, target,
4861                                               unsignedp, OPTAB_LIB_WIDEN);
4862             }
4863         }
4864     }
4865
4866   if (rem_flag)
4867     {
4868       if (target && GET_MODE (target) != compute_mode)
4869         target = 0;
4870
4871       if (quotient == 0)
4872         {
4873           /* No divide instruction either.  Use library for remainder.  */
4874           remainder = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4875                                          op0, op1, target,
4876                                          unsignedp, OPTAB_LIB_WIDEN);
4877           /* No remainder function.  Try a quotient-and-remainder
4878              function, keeping the remainder.  */
4879           if (!remainder)
4880             {
4881               remainder = gen_reg_rtx (compute_mode);
4882               if (!expand_twoval_binop_libfunc
4883                   (unsignedp ? udivmod_optab : sdivmod_optab,
4884                    op0, op1,
4885                    NULL_RTX, remainder,
4886                    unsignedp ? UMOD : MOD))
4887                 remainder = NULL_RTX;
4888             }
4889         }
4890       else
4891         {
4892           /* We divided.  Now finish doing X - Y * (X / Y).  */
4893           remainder = expand_mult (compute_mode, quotient, op1,
4894                                    NULL_RTX, unsignedp);
4895           remainder = expand_binop (compute_mode, sub_optab, op0,
4896                                     remainder, target, unsignedp,
4897                                     OPTAB_LIB_WIDEN);
4898         }
4899     }
4900
4901   return gen_lowpart (mode, rem_flag ? remainder : quotient);
4902 }
4903 \f
4904 /* Return a tree node with data type TYPE, describing the value of X.
4905    Usually this is an VAR_DECL, if there is no obvious better choice.
4906    X may be an expression, however we only support those expressions
4907    generated by loop.c.  */
4908
4909 tree
4910 make_tree (tree type, rtx x)
4911 {
4912   tree t;
4913
4914   switch (GET_CODE (x))
4915     {
4916     case CONST_INT:
4917       {
4918         HOST_WIDE_INT hi = 0;
4919
4920         if (INTVAL (x) < 0
4921             && !(TYPE_UNSIGNED (type)
4922                  && (GET_MODE_BITSIZE (TYPE_MODE (type))
4923                      < HOST_BITS_PER_WIDE_INT)))
4924           hi = -1;
4925
4926         t = build_int_cst_wide (type, INTVAL (x), hi);
4927
4928         return t;
4929       }
4930
4931     case CONST_DOUBLE:
4932       if (GET_MODE (x) == VOIDmode)
4933         t = build_int_cst_wide (type,
4934                                 CONST_DOUBLE_LOW (x), CONST_DOUBLE_HIGH (x));
4935       else
4936         {
4937           REAL_VALUE_TYPE d;
4938
4939           REAL_VALUE_FROM_CONST_DOUBLE (d, x);
4940           t = build_real (type, d);
4941         }
4942
4943       return t;
4944
4945     case CONST_VECTOR:
4946       {
4947         int units = CONST_VECTOR_NUNITS (x);
4948         tree itype = TREE_TYPE (type);
4949         tree t = NULL_TREE;
4950         int i;
4951
4952
4953         /* Build a tree with vector elements.  */
4954         for (i = units - 1; i >= 0; --i)
4955           {
4956             rtx elt = CONST_VECTOR_ELT (x, i);
4957             t = tree_cons (NULL_TREE, make_tree (itype, elt), t);
4958           }
4959
4960         return build_vector (type, t);
4961       }
4962
4963     case PLUS:
4964       return fold_build2 (PLUS_EXPR, type, make_tree (type, XEXP (x, 0)),
4965                           make_tree (type, XEXP (x, 1)));
4966
4967     case MINUS:
4968       return fold_build2 (MINUS_EXPR, type, make_tree (type, XEXP (x, 0)),
4969                           make_tree (type, XEXP (x, 1)));
4970
4971     case NEG:
4972       return fold_build1 (NEGATE_EXPR, type, make_tree (type, XEXP (x, 0)));
4973
4974     case MULT:
4975       return fold_build2 (MULT_EXPR, type, make_tree (type, XEXP (x, 0)),
4976                           make_tree (type, XEXP (x, 1)));
4977
4978     case ASHIFT:
4979       return fold_build2 (LSHIFT_EXPR, type, make_tree (type, XEXP (x, 0)),
4980                           make_tree (type, XEXP (x, 1)));
4981
4982     case LSHIFTRT:
4983       t = unsigned_type_for (type);
4984       return fold_convert (type, build2 (RSHIFT_EXPR, t,
4985                                          make_tree (t, XEXP (x, 0)),
4986                                          make_tree (type, XEXP (x, 1))));
4987
4988     case ASHIFTRT:
4989       t = signed_type_for (type);
4990       return fold_convert (type, build2 (RSHIFT_EXPR, t,
4991                                          make_tree (t, XEXP (x, 0)),
4992                                          make_tree (type, XEXP (x, 1))));
4993
4994     case DIV:
4995       if (TREE_CODE (type) != REAL_TYPE)
4996         t = signed_type_for (type);
4997       else
4998         t = type;
4999
5000       return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5001                                          make_tree (t, XEXP (x, 0)),
5002                                          make_tree (t, XEXP (x, 1))));
5003     case UDIV:
5004       t = unsigned_type_for (type);
5005       return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5006                                          make_tree (t, XEXP (x, 0)),
5007                                          make_tree (t, XEXP (x, 1))));
5008
5009     case SIGN_EXTEND:
5010     case ZERO_EXTEND:
5011       t = lang_hooks.types.type_for_mode (GET_MODE (XEXP (x, 0)),
5012                                           GET_CODE (x) == ZERO_EXTEND);
5013       return fold_convert (type, make_tree (t, XEXP (x, 0)));
5014
5015     case CONST:
5016       return make_tree (type, XEXP (x, 0));
5017
5018     case SYMBOL_REF:
5019       t = SYMBOL_REF_DECL (x);
5020       if (t)
5021         return fold_convert (type, build_fold_addr_expr (t));
5022       /* else fall through.  */
5023
5024     default:
5025       t = build_decl (RTL_LOCATION (x), VAR_DECL, NULL_TREE, type);
5026
5027       /* If TYPE is a POINTER_TYPE, we might need to convert X from
5028          address mode to pointer mode.  */
5029       if (POINTER_TYPE_P (type))
5030         x = convert_memory_address_addr_space
5031               (TYPE_MODE (type), x, TYPE_ADDR_SPACE (TREE_TYPE (type)));
5032
5033       /* Note that we do *not* use SET_DECL_RTL here, because we do not
5034          want set_decl_rtl to go adjusting REG_ATTRS for this temporary.  */
5035       t->decl_with_rtl.rtl = x;
5036
5037       return t;
5038     }
5039 }
5040 \f
5041 /* Compute the logical-and of OP0 and OP1, storing it in TARGET
5042    and returning TARGET.
5043
5044    If TARGET is 0, a pseudo-register or constant is returned.  */
5045
5046 rtx
5047 expand_and (enum machine_mode mode, rtx op0, rtx op1, rtx target)
5048 {
5049   rtx tem = 0;
5050
5051   if (GET_MODE (op0) == VOIDmode && GET_MODE (op1) == VOIDmode)
5052     tem = simplify_binary_operation (AND, mode, op0, op1);
5053   if (tem == 0)
5054     tem = expand_binop (mode, and_optab, op0, op1, target, 0, OPTAB_LIB_WIDEN);
5055
5056   if (target == 0)
5057     target = tem;
5058   else if (tem != target)
5059     emit_move_insn (target, tem);
5060   return target;
5061 }
5062
5063 /* Helper function for emit_store_flag.  */
5064 static rtx
5065 emit_cstore (rtx target, enum insn_code icode, enum rtx_code code,
5066              enum machine_mode mode, enum machine_mode compare_mode,
5067              int unsignedp, rtx x, rtx y, int normalizep,
5068              enum machine_mode target_mode)
5069 {
5070   struct expand_operand ops[4];
5071   rtx op0, last, comparison, subtarget;
5072   enum machine_mode result_mode = insn_data[(int) icode].operand[0].mode;
5073
5074   last = get_last_insn ();
5075   x = prepare_operand (icode, x, 2, mode, compare_mode, unsignedp);
5076   y = prepare_operand (icode, y, 3, mode, compare_mode, unsignedp);
5077   if (!x || !y)
5078     {
5079       delete_insns_since (last);
5080       return NULL_RTX;
5081     }
5082
5083   if (target_mode == VOIDmode)
5084     target_mode = result_mode;
5085   if (!target)
5086     target = gen_reg_rtx (target_mode);
5087
5088   comparison = gen_rtx_fmt_ee (code, result_mode, x, y);
5089
5090   create_output_operand (&ops[0], optimize ? NULL_RTX : target, result_mode);
5091   create_fixed_operand (&ops[1], comparison);
5092   create_fixed_operand (&ops[2], x);
5093   create_fixed_operand (&ops[3], y);
5094   if (!maybe_expand_insn (icode, 4, ops))
5095     {
5096       delete_insns_since (last);
5097       return NULL_RTX;
5098     }
5099   subtarget = ops[0].value;
5100
5101   /* If we are converting to a wider mode, first convert to
5102      TARGET_MODE, then normalize.  This produces better combining
5103      opportunities on machines that have a SIGN_EXTRACT when we are
5104      testing a single bit.  This mostly benefits the 68k.
5105
5106      If STORE_FLAG_VALUE does not have the sign bit set when
5107      interpreted in MODE, we can do this conversion as unsigned, which
5108      is usually more efficient.  */
5109   if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (result_mode))
5110     {
5111       convert_move (target, subtarget,
5112                     val_signbit_known_clear_p (result_mode,
5113                                                STORE_FLAG_VALUE));
5114       op0 = target;
5115       result_mode = target_mode;
5116     }
5117   else
5118     op0 = subtarget;
5119
5120   /* If we want to keep subexpressions around, don't reuse our last
5121      target.  */
5122   if (optimize)
5123     subtarget = 0;
5124
5125   /* Now normalize to the proper value in MODE.  Sometimes we don't
5126      have to do anything.  */
5127   if (normalizep == 0 || normalizep == STORE_FLAG_VALUE)
5128     ;
5129   /* STORE_FLAG_VALUE might be the most negative number, so write
5130      the comparison this way to avoid a compiler-time warning.  */
5131   else if (- normalizep == STORE_FLAG_VALUE)
5132     op0 = expand_unop (result_mode, neg_optab, op0, subtarget, 0);
5133
5134   /* We don't want to use STORE_FLAG_VALUE < 0 below since this makes
5135      it hard to use a value of just the sign bit due to ANSI integer
5136      constant typing rules.  */
5137   else if (val_signbit_known_set_p (result_mode, STORE_FLAG_VALUE))
5138     op0 = expand_shift (RSHIFT_EXPR, result_mode, op0,
5139                         GET_MODE_BITSIZE (result_mode) - 1, subtarget,
5140                         normalizep == 1);
5141   else
5142     {
5143       gcc_assert (STORE_FLAG_VALUE & 1);
5144
5145       op0 = expand_and (result_mode, op0, const1_rtx, subtarget);
5146       if (normalizep == -1)
5147         op0 = expand_unop (result_mode, neg_optab, op0, op0, 0);
5148     }
5149
5150   /* If we were converting to a smaller mode, do the conversion now.  */
5151   if (target_mode != result_mode)
5152     {
5153       convert_move (target, op0, 0);
5154       return target;
5155     }
5156   else
5157     return op0;
5158 }
5159
5160
5161 /* A subroutine of emit_store_flag only including "tricks" that do not
5162    need a recursive call.  These are kept separate to avoid infinite
5163    loops.  */
5164
5165 static rtx
5166 emit_store_flag_1 (rtx target, enum rtx_code code, rtx op0, rtx op1,
5167                    enum machine_mode mode, int unsignedp, int normalizep,
5168                    enum machine_mode target_mode)
5169 {
5170   rtx subtarget;
5171   enum insn_code icode;
5172   enum machine_mode compare_mode;
5173   enum mode_class mclass;
5174   enum rtx_code scode;
5175   rtx tem;
5176
5177   if (unsignedp)
5178     code = unsigned_condition (code);
5179   scode = swap_condition (code);
5180
5181   /* If one operand is constant, make it the second one.  Only do this
5182      if the other operand is not constant as well.  */
5183
5184   if (swap_commutative_operands_p (op0, op1))
5185     {
5186       tem = op0;
5187       op0 = op1;
5188       op1 = tem;
5189       code = swap_condition (code);
5190     }
5191
5192   if (mode == VOIDmode)
5193     mode = GET_MODE (op0);
5194
5195   /* For some comparisons with 1 and -1, we can convert this to
5196      comparisons with zero.  This will often produce more opportunities for
5197      store-flag insns.  */
5198
5199   switch (code)
5200     {
5201     case LT:
5202       if (op1 == const1_rtx)
5203         op1 = const0_rtx, code = LE;
5204       break;
5205     case LE:
5206       if (op1 == constm1_rtx)
5207         op1 = const0_rtx, code = LT;
5208       break;
5209     case GE:
5210       if (op1 == const1_rtx)
5211         op1 = const0_rtx, code = GT;
5212       break;
5213     case GT:
5214       if (op1 == constm1_rtx)
5215         op1 = const0_rtx, code = GE;
5216       break;
5217     case GEU:
5218       if (op1 == const1_rtx)
5219         op1 = const0_rtx, code = NE;
5220       break;
5221     case LTU:
5222       if (op1 == const1_rtx)
5223         op1 = const0_rtx, code = EQ;
5224       break;
5225     default:
5226       break;
5227     }
5228
5229   /* If we are comparing a double-word integer with zero or -1, we can
5230      convert the comparison into one involving a single word.  */
5231   if (GET_MODE_BITSIZE (mode) == BITS_PER_WORD * 2
5232       && GET_MODE_CLASS (mode) == MODE_INT
5233       && (!MEM_P (op0) || ! MEM_VOLATILE_P (op0)))
5234     {
5235       if ((code == EQ || code == NE)
5236           && (op1 == const0_rtx || op1 == constm1_rtx))
5237         {
5238           rtx op00, op01;
5239
5240           /* Do a logical OR or AND of the two words and compare the
5241              result.  */
5242           op00 = simplify_gen_subreg (word_mode, op0, mode, 0);
5243           op01 = simplify_gen_subreg (word_mode, op0, mode, UNITS_PER_WORD);
5244           tem = expand_binop (word_mode,
5245                               op1 == const0_rtx ? ior_optab : and_optab,
5246                               op00, op01, NULL_RTX, unsignedp,
5247                               OPTAB_DIRECT);
5248
5249           if (tem != 0)
5250             tem = emit_store_flag (NULL_RTX, code, tem, op1, word_mode,
5251                                    unsignedp, normalizep);
5252         }
5253       else if ((code == LT || code == GE) && op1 == const0_rtx)
5254         {
5255           rtx op0h;
5256
5257           /* If testing the sign bit, can just test on high word.  */
5258           op0h = simplify_gen_subreg (word_mode, op0, mode,
5259                                       subreg_highpart_offset (word_mode,
5260                                                               mode));
5261           tem = emit_store_flag (NULL_RTX, code, op0h, op1, word_mode,
5262                                  unsignedp, normalizep);
5263         }
5264       else
5265         tem = NULL_RTX;
5266
5267       if (tem)
5268         {
5269           if (target_mode == VOIDmode || GET_MODE (tem) == target_mode)
5270             return tem;
5271           if (!target)
5272             target = gen_reg_rtx (target_mode);
5273
5274           convert_move (target, tem,
5275                         !val_signbit_known_set_p (word_mode,
5276                                                   (normalizep ? normalizep
5277                                                    : STORE_FLAG_VALUE)));
5278           return target;
5279         }
5280     }
5281
5282   /* If this is A < 0 or A >= 0, we can do this by taking the ones
5283      complement of A (for GE) and shifting the sign bit to the low bit.  */
5284   if (op1 == const0_rtx && (code == LT || code == GE)
5285       && GET_MODE_CLASS (mode) == MODE_INT
5286       && (normalizep || STORE_FLAG_VALUE == 1
5287           || val_signbit_p (mode, STORE_FLAG_VALUE)))
5288     {
5289       subtarget = target;
5290
5291       if (!target)
5292         target_mode = mode;
5293
5294       /* If the result is to be wider than OP0, it is best to convert it
5295          first.  If it is to be narrower, it is *incorrect* to convert it
5296          first.  */
5297       else if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (mode))
5298         {
5299           op0 = convert_modes (target_mode, mode, op0, 0);
5300           mode = target_mode;
5301         }
5302
5303       if (target_mode != mode)
5304         subtarget = 0;
5305
5306       if (code == GE)
5307         op0 = expand_unop (mode, one_cmpl_optab, op0,
5308                            ((STORE_FLAG_VALUE == 1 || normalizep)
5309                             ? 0 : subtarget), 0);
5310
5311       if (STORE_FLAG_VALUE == 1 || normalizep)
5312         /* If we are supposed to produce a 0/1 value, we want to do
5313            a logical shift from the sign bit to the low-order bit; for
5314            a -1/0 value, we do an arithmetic shift.  */
5315         op0 = expand_shift (RSHIFT_EXPR, mode, op0,
5316                             GET_MODE_BITSIZE (mode) - 1,
5317                             subtarget, normalizep != -1);
5318
5319       if (mode != target_mode)
5320         op0 = convert_modes (target_mode, mode, op0, 0);
5321
5322       return op0;
5323     }
5324
5325   mclass = GET_MODE_CLASS (mode);
5326   for (compare_mode = mode; compare_mode != VOIDmode;
5327        compare_mode = GET_MODE_WIDER_MODE (compare_mode))
5328     {
5329      enum machine_mode optab_mode = mclass == MODE_CC ? CCmode : compare_mode;
5330      icode = optab_handler (cstore_optab, optab_mode);
5331      if (icode != CODE_FOR_nothing)
5332         {
5333           do_pending_stack_adjust ();
5334           tem = emit_cstore (target, icode, code, mode, compare_mode,
5335                              unsignedp, op0, op1, normalizep, target_mode);
5336           if (tem)
5337             return tem;
5338
5339           if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5340             {
5341               tem = emit_cstore (target, icode, scode, mode, compare_mode,
5342                                  unsignedp, op1, op0, normalizep, target_mode);
5343               if (tem)
5344                 return tem;
5345             }
5346           break;
5347         }
5348     }
5349
5350   return 0;
5351 }
5352
5353 /* Emit a store-flags instruction for comparison CODE on OP0 and OP1
5354    and storing in TARGET.  Normally return TARGET.
5355    Return 0 if that cannot be done.
5356
5357    MODE is the mode to use for OP0 and OP1 should they be CONST_INTs.  If
5358    it is VOIDmode, they cannot both be CONST_INT.
5359
5360    UNSIGNEDP is for the case where we have to widen the operands
5361    to perform the operation.  It says to use zero-extension.
5362
5363    NORMALIZEP is 1 if we should convert the result to be either zero
5364    or one.  Normalize is -1 if we should convert the result to be
5365    either zero or -1.  If NORMALIZEP is zero, the result will be left
5366    "raw" out of the scc insn.  */
5367
5368 rtx
5369 emit_store_flag (rtx target, enum rtx_code code, rtx op0, rtx op1,
5370                  enum machine_mode mode, int unsignedp, int normalizep)
5371 {
5372   enum machine_mode target_mode = target ? GET_MODE (target) : VOIDmode;
5373   enum rtx_code rcode;
5374   rtx subtarget;
5375   rtx tem, last, trueval;
5376
5377   tem = emit_store_flag_1 (target, code, op0, op1, mode, unsignedp, normalizep,
5378                            target_mode);
5379   if (tem)
5380     return tem;
5381
5382   /* If we reached here, we can't do this with a scc insn, however there
5383      are some comparisons that can be done in other ways.  Don't do any
5384      of these cases if branches are very cheap.  */
5385   if (BRANCH_COST (optimize_insn_for_speed_p (), false) == 0)
5386     return 0;
5387
5388   /* See what we need to return.  We can only return a 1, -1, or the
5389      sign bit.  */
5390
5391   if (normalizep == 0)
5392     {
5393       if (STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1)
5394         normalizep = STORE_FLAG_VALUE;
5395
5396       else if (val_signbit_p (mode, STORE_FLAG_VALUE))
5397         ;
5398       else
5399         return 0;
5400     }
5401
5402   last = get_last_insn ();
5403
5404   /* If optimizing, use different pseudo registers for each insn, instead
5405      of reusing the same pseudo.  This leads to better CSE, but slows
5406      down the compiler, since there are more pseudos */
5407   subtarget = (!optimize
5408                && (target_mode == mode)) ? target : NULL_RTX;
5409   trueval = GEN_INT (normalizep ? normalizep : STORE_FLAG_VALUE);
5410
5411   /* For floating-point comparisons, try the reverse comparison or try
5412      changing the "orderedness" of the comparison.  */
5413   if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5414     {
5415       enum rtx_code first_code;
5416       bool and_them;
5417
5418       rcode = reverse_condition_maybe_unordered (code);
5419       if (can_compare_p (rcode, mode, ccp_store_flag)
5420           && (code == ORDERED || code == UNORDERED
5421               || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
5422               || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
5423         {
5424           int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5425                           || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5426
5427           /* For the reverse comparison, use either an addition or a XOR.  */
5428           if (want_add
5429               && rtx_cost (GEN_INT (normalizep), PLUS,
5430                            optimize_insn_for_speed_p ()) == 0)
5431             {
5432               tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5433                                        STORE_FLAG_VALUE, target_mode);
5434               if (tem)
5435                 return expand_binop (target_mode, add_optab, tem,
5436                                      GEN_INT (normalizep),
5437                                      target, 0, OPTAB_WIDEN);
5438             }
5439           else if (!want_add
5440                    && rtx_cost (trueval, XOR,
5441                                 optimize_insn_for_speed_p ()) == 0)
5442             {
5443               tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5444                                        normalizep, target_mode);
5445               if (tem)
5446                 return expand_binop (target_mode, xor_optab, tem, trueval,
5447                                      target, INTVAL (trueval) >= 0, OPTAB_WIDEN);
5448             }
5449         }
5450
5451       delete_insns_since (last);
5452
5453       /* Cannot split ORDERED and UNORDERED, only try the above trick.   */
5454       if (code == ORDERED || code == UNORDERED)
5455         return 0;
5456
5457       and_them = split_comparison (code, mode, &first_code, &code);
5458
5459       /* If there are no NaNs, the first comparison should always fall through.
5460          Effectively change the comparison to the other one.  */
5461       if (!HONOR_NANS (mode))
5462         {
5463           gcc_assert (first_code == (and_them ? ORDERED : UNORDERED));
5464           return emit_store_flag_1 (target, code, op0, op1, mode, 0, normalizep,
5465                                     target_mode);
5466         }
5467
5468 #ifdef HAVE_conditional_move
5469       /* Try using a setcc instruction for ORDERED/UNORDERED, followed by a
5470          conditional move.  */
5471       tem = emit_store_flag_1 (subtarget, first_code, op0, op1, mode, 0,
5472                                normalizep, target_mode);
5473       if (tem == 0)
5474         return 0;
5475
5476       if (and_them)
5477         tem = emit_conditional_move (target, code, op0, op1, mode,
5478                                      tem, const0_rtx, GET_MODE (tem), 0);
5479       else
5480         tem = emit_conditional_move (target, code, op0, op1, mode,
5481                                      trueval, tem, GET_MODE (tem), 0);
5482
5483       if (tem == 0)
5484         delete_insns_since (last);
5485       return tem;
5486 #else
5487       return 0;
5488 #endif
5489     }
5490
5491   /* The remaining tricks only apply to integer comparisons.  */
5492
5493   if (GET_MODE_CLASS (mode) != MODE_INT)
5494     return 0;
5495
5496   /* If this is an equality comparison of integers, we can try to exclusive-or
5497      (or subtract) the two operands and use a recursive call to try the
5498      comparison with zero.  Don't do any of these cases if branches are
5499      very cheap.  */
5500
5501   if ((code == EQ || code == NE) && op1 != const0_rtx)
5502     {
5503       tem = expand_binop (mode, xor_optab, op0, op1, subtarget, 1,
5504                           OPTAB_WIDEN);
5505
5506       if (tem == 0)
5507         tem = expand_binop (mode, sub_optab, op0, op1, subtarget, 1,
5508                             OPTAB_WIDEN);
5509       if (tem != 0)
5510         tem = emit_store_flag (target, code, tem, const0_rtx,
5511                                mode, unsignedp, normalizep);
5512       if (tem != 0)
5513         return tem;
5514
5515       delete_insns_since (last);
5516     }
5517
5518   /* For integer comparisons, try the reverse comparison.  However, for
5519      small X and if we'd have anyway to extend, implementing "X != 0"
5520      as "-(int)X >> 31" is still cheaper than inverting "(int)X == 0".  */
5521   rcode = reverse_condition (code);
5522   if (can_compare_p (rcode, mode, ccp_store_flag)
5523       && ! (optab_handler (cstore_optab, mode) == CODE_FOR_nothing
5524             && code == NE
5525             && GET_MODE_SIZE (mode) < UNITS_PER_WORD
5526             && op1 == const0_rtx))
5527     {
5528       int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5529                       || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5530
5531       /* Again, for the reverse comparison, use either an addition or a XOR.  */
5532       if (want_add
5533           && rtx_cost (GEN_INT (normalizep), PLUS,
5534                        optimize_insn_for_speed_p ()) == 0)
5535         {
5536           tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5537                                    STORE_FLAG_VALUE, target_mode);
5538           if (tem != 0)
5539             tem = expand_binop (target_mode, add_optab, tem,
5540                                 GEN_INT (normalizep), target, 0, OPTAB_WIDEN);
5541         }
5542       else if (!want_add
5543                && rtx_cost (trueval, XOR,
5544                             optimize_insn_for_speed_p ()) == 0)
5545         {
5546           tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5547                                    normalizep, target_mode);
5548           if (tem != 0)
5549             tem = expand_binop (target_mode, xor_optab, tem, trueval, target,
5550                                 INTVAL (trueval) >= 0, OPTAB_WIDEN);
5551         }
5552
5553       if (tem != 0)
5554         return tem;
5555       delete_insns_since (last);
5556     }
5557
5558   /* Some other cases we can do are EQ, NE, LE, and GT comparisons with
5559      the constant zero.  Reject all other comparisons at this point.  Only
5560      do LE and GT if branches are expensive since they are expensive on
5561      2-operand machines.  */
5562
5563   if (op1 != const0_rtx
5564       || (code != EQ && code != NE
5565           && (BRANCH_COST (optimize_insn_for_speed_p (),
5566                            false) <= 1 || (code != LE && code != GT))))
5567     return 0;
5568
5569   /* Try to put the result of the comparison in the sign bit.  Assume we can't
5570      do the necessary operation below.  */
5571
5572   tem = 0;
5573
5574   /* To see if A <= 0, compute (A | (A - 1)).  A <= 0 iff that result has
5575      the sign bit set.  */
5576
5577   if (code == LE)
5578     {
5579       /* This is destructive, so SUBTARGET can't be OP0.  */
5580       if (rtx_equal_p (subtarget, op0))
5581         subtarget = 0;
5582
5583       tem = expand_binop (mode, sub_optab, op0, const1_rtx, subtarget, 0,
5584                           OPTAB_WIDEN);
5585       if (tem)
5586         tem = expand_binop (mode, ior_optab, op0, tem, subtarget, 0,
5587                             OPTAB_WIDEN);
5588     }
5589
5590   /* To see if A > 0, compute (((signed) A) << BITS) - A, where BITS is the
5591      number of bits in the mode of OP0, minus one.  */
5592
5593   if (code == GT)
5594     {
5595       if (rtx_equal_p (subtarget, op0))
5596         subtarget = 0;
5597
5598       tem = expand_shift (RSHIFT_EXPR, mode, op0,
5599                           GET_MODE_BITSIZE (mode) - 1,
5600                           subtarget, 0);
5601       tem = expand_binop (mode, sub_optab, tem, op0, subtarget, 0,
5602                           OPTAB_WIDEN);
5603     }
5604
5605   if (code == EQ || code == NE)
5606     {
5607       /* For EQ or NE, one way to do the comparison is to apply an operation
5608          that converts the operand into a positive number if it is nonzero
5609          or zero if it was originally zero.  Then, for EQ, we subtract 1 and
5610          for NE we negate.  This puts the result in the sign bit.  Then we
5611          normalize with a shift, if needed.
5612
5613          Two operations that can do the above actions are ABS and FFS, so try
5614          them.  If that doesn't work, and MODE is smaller than a full word,
5615          we can use zero-extension to the wider mode (an unsigned conversion)
5616          as the operation.  */
5617
5618       /* Note that ABS doesn't yield a positive number for INT_MIN, but
5619          that is compensated by the subsequent overflow when subtracting
5620          one / negating.  */
5621
5622       if (optab_handler (abs_optab, mode) != CODE_FOR_nothing)
5623         tem = expand_unop (mode, abs_optab, op0, subtarget, 1);
5624       else if (optab_handler (ffs_optab, mode) != CODE_FOR_nothing)
5625         tem = expand_unop (mode, ffs_optab, op0, subtarget, 1);
5626       else if (GET_MODE_SIZE (mode) < UNITS_PER_WORD)
5627         {
5628           tem = convert_modes (word_mode, mode, op0, 1);
5629           mode = word_mode;
5630         }
5631
5632       if (tem != 0)
5633         {
5634           if (code == EQ)
5635             tem = expand_binop (mode, sub_optab, tem, const1_rtx, subtarget,
5636                                 0, OPTAB_WIDEN);
5637           else
5638             tem = expand_unop (mode, neg_optab, tem, subtarget, 0);
5639         }
5640
5641       /* If we couldn't do it that way, for NE we can "or" the two's complement
5642          of the value with itself.  For EQ, we take the one's complement of
5643          that "or", which is an extra insn, so we only handle EQ if branches
5644          are expensive.  */
5645
5646       if (tem == 0
5647           && (code == NE
5648               || BRANCH_COST (optimize_insn_for_speed_p (),
5649                               false) > 1))
5650         {
5651           if (rtx_equal_p (subtarget, op0))
5652             subtarget = 0;
5653
5654           tem = expand_unop (mode, neg_optab, op0, subtarget, 0);
5655           tem = expand_binop (mode, ior_optab, tem, op0, subtarget, 0,
5656                               OPTAB_WIDEN);
5657
5658           if (tem && code == EQ)
5659             tem = expand_unop (mode, one_cmpl_optab, tem, subtarget, 0);
5660         }
5661     }
5662
5663   if (tem && normalizep)
5664     tem = expand_shift (RSHIFT_EXPR, mode, tem,
5665                         GET_MODE_BITSIZE (mode) - 1,
5666                         subtarget, normalizep == 1);
5667
5668   if (tem)
5669     {
5670       if (!target)
5671         ;
5672       else if (GET_MODE (tem) != target_mode)
5673         {
5674           convert_move (target, tem, 0);
5675           tem = target;
5676         }
5677       else if (!subtarget)
5678         {
5679           emit_move_insn (target, tem);
5680           tem = target;
5681         }
5682     }
5683   else
5684     delete_insns_since (last);
5685
5686   return tem;
5687 }
5688
5689 /* Like emit_store_flag, but always succeeds.  */
5690
5691 rtx
5692 emit_store_flag_force (rtx target, enum rtx_code code, rtx op0, rtx op1,
5693                        enum machine_mode mode, int unsignedp, int normalizep)
5694 {
5695   rtx tem, label;
5696   rtx trueval, falseval;
5697
5698   /* First see if emit_store_flag can do the job.  */
5699   tem = emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep);
5700   if (tem != 0)
5701     return tem;
5702
5703   if (!target)
5704     target = gen_reg_rtx (word_mode);
5705
5706   /* If this failed, we have to do this with set/compare/jump/set code.
5707      For foo != 0, if foo is in OP0, just replace it with 1 if nonzero.  */
5708   trueval = normalizep ? GEN_INT (normalizep) : const1_rtx;
5709   if (code == NE
5710       && GET_MODE_CLASS (mode) == MODE_INT
5711       && REG_P (target)
5712       && op0 == target
5713       && op1 == const0_rtx)
5714     {
5715       label = gen_label_rtx ();
5716       do_compare_rtx_and_jump (target, const0_rtx, EQ, unsignedp,
5717                                mode, NULL_RTX, NULL_RTX, label, -1);
5718       emit_move_insn (target, trueval);
5719       emit_label (label);
5720       return target;
5721     }
5722
5723   if (!REG_P (target)
5724       || reg_mentioned_p (target, op0) || reg_mentioned_p (target, op1))
5725     target = gen_reg_rtx (GET_MODE (target));
5726
5727   /* Jump in the right direction if the target cannot implement CODE
5728      but can jump on its reverse condition.  */
5729   falseval = const0_rtx;
5730   if (! can_compare_p (code, mode, ccp_jump)
5731       && (! FLOAT_MODE_P (mode)
5732           || code == ORDERED || code == UNORDERED
5733           || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
5734           || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
5735     {
5736       enum rtx_code rcode;
5737       if (FLOAT_MODE_P (mode))
5738         rcode = reverse_condition_maybe_unordered (code);
5739       else
5740         rcode = reverse_condition (code);
5741
5742       /* Canonicalize to UNORDERED for the libcall.  */
5743       if (can_compare_p (rcode, mode, ccp_jump)
5744           || (code == ORDERED && ! can_compare_p (ORDERED, mode, ccp_jump)))
5745         {
5746           falseval = trueval;
5747           trueval = const0_rtx;
5748           code = rcode;
5749         }
5750     }
5751
5752   emit_move_insn (target, trueval);
5753   label = gen_label_rtx ();
5754   do_compare_rtx_and_jump (op0, op1, code, unsignedp, mode, NULL_RTX,
5755                            NULL_RTX, label, -1);
5756
5757   emit_move_insn (target, falseval);
5758   emit_label (label);
5759
5760   return target;
5761 }
5762 \f
5763 /* Perform possibly multi-word comparison and conditional jump to LABEL
5764    if ARG1 OP ARG2 true where ARG1 and ARG2 are of mode MODE.  This is
5765    now a thin wrapper around do_compare_rtx_and_jump.  */
5766
5767 static void
5768 do_cmp_and_jump (rtx arg1, rtx arg2, enum rtx_code op, enum machine_mode mode,
5769                  rtx label)
5770 {
5771   int unsignedp = (op == LTU || op == LEU || op == GTU || op == GEU);
5772   do_compare_rtx_and_jump (arg1, arg2, op, unsignedp, mode,
5773                            NULL_RTX, NULL_RTX, label, -1);
5774 }