gcc/expmed.c

   1 /* Medium-level subroutines: convert bit-field store and extract
   2    and shifts, multiplies and divides to rtl instructions.
   3    Copyright (C) 1987, 1988, 1989, 1992, 1993, 1994, 1995, 1996, 1997, 1998,
   4    1999, 2000, 2001, 2002, 2003, 2004, 2005 Free Software Foundation, Inc.
   5
   6 This file is part of GCC.
   7
   8 GCC is free software; you can redistribute it and/or modify it under
   9 the terms of the GNU General Public License as published by the Free
  10 Software Foundation; either version 2, or (at your option) any later
  11 version.
  12
  13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  15 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  16 for more details.
  17
  18 You should have received a copy of the GNU General Public License
  19 along with GCC; see the file COPYING.  If not, write to the Free
  20 Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
  21 02110-1301, USA.  */
  22
  23
  24 #include "config.h"
  25 #include "system.h"
  26 #include "coretypes.h"
  27 #include "tm.h"
  28 #include "toplev.h"
  29 #include "rtl.h"
  30 #include "tree.h"
  31 #include "tm_p.h"
  32 #include "flags.h"
  33 #include "insn-config.h"
  34 #include "expr.h"
  35 #include "optabs.h"
  36 #include "real.h"
  37 #include "recog.h"
  38 #include "langhooks.h"
  39
  40 static void store_fixed_bit_field (rtx, unsigned HOST_WIDE_INT,
  41                                    unsigned HOST_WIDE_INT,
  42                                    unsigned HOST_WIDE_INT, rtx);
  43 static void store_split_bit_field (rtx, unsigned HOST_WIDE_INT,
  44                                    unsigned HOST_WIDE_INT, rtx);
  45 static rtx extract_fixed_bit_field (enum machine_mode, rtx,
  46                                     unsigned HOST_WIDE_INT,
  47                                     unsigned HOST_WIDE_INT,
  48                                     unsigned HOST_WIDE_INT, rtx, int);
  49 static rtx mask_rtx (enum machine_mode, int, int, int);
  50 static rtx lshift_value (enum machine_mode, rtx, int, int);
  51 static rtx extract_split_bit_field (rtx, unsigned HOST_WIDE_INT,
  52                                     unsigned HOST_WIDE_INT, int);
  53 static void do_cmp_and_jump (rtx, rtx, enum rtx_code, enum machine_mode, rtx);
  54 static rtx expand_smod_pow2 (enum machine_mode, rtx, HOST_WIDE_INT);
  55 static rtx expand_sdiv_pow2 (enum machine_mode, rtx, HOST_WIDE_INT);
  56
  57 /* Test whether a value is zero of a power of two.  */
  58 #define EXACT_POWER_OF_2_OR_ZERO_P(x) (((x) & ((x) - 1)) == 0)
  59
  60 /* Nonzero means divides or modulus operations are relatively cheap for
  61    powers of two, so don't use branches; emit the operation instead.
  62    Usually, this will mean that the MD file will emit non-branch
  63    sequences.  */
  64
  65 static bool sdiv_pow2_cheap[NUM_MACHINE_MODES];
  66 static bool smod_pow2_cheap[NUM_MACHINE_MODES];
  67
  68 #ifndef SLOW_UNALIGNED_ACCESS
  69 #define SLOW_UNALIGNED_ACCESS(MODE, ALIGN) STRICT_ALIGNMENT
  70 #endif
  71
  72 /* For compilers that support multiple targets with different word sizes,
  73    MAX_BITS_PER_WORD contains the biggest value of BITS_PER_WORD.  An example
  74    is the H8/300(H) compiler.  */
  75
  76 #ifndef MAX_BITS_PER_WORD
  77 #define MAX_BITS_PER_WORD BITS_PER_WORD
  78 #endif
  79
  80 /* Reduce conditional compilation elsewhere.  */
  81 #ifndef HAVE_insv
  82 #define HAVE_insv       0
  83 #define CODE_FOR_insv   CODE_FOR_nothing
  84 #define gen_insv(a,b,c,d) NULL_RTX
  85 #endif
  86 #ifndef HAVE_extv
  87 #define HAVE_extv       0
  88 #define CODE_FOR_extv   CODE_FOR_nothing
  89 #define gen_extv(a,b,c,d) NULL_RTX
  90 #endif
  91 #ifndef HAVE_extzv
  92 #define HAVE_extzv      0
  93 #define CODE_FOR_extzv  CODE_FOR_nothing
  94 #define gen_extzv(a,b,c,d) NULL_RTX
  95 #endif
  96
  97 /* Cost of various pieces of RTL.  Note that some of these are indexed by
  98    shift count and some by mode.  */
  99 static int zero_cost;
 100 static int add_cost[NUM_MACHINE_MODES];
 101 static int neg_cost[NUM_MACHINE_MODES];
 102 static int shift_cost[NUM_MACHINE_MODES][MAX_BITS_PER_WORD];
 103 static int shiftadd_cost[NUM_MACHINE_MODES][MAX_BITS_PER_WORD];
 104 static int shiftsub_cost[NUM_MACHINE_MODES][MAX_BITS_PER_WORD];
 105 static int mul_cost[NUM_MACHINE_MODES];
 106 static int div_cost[NUM_MACHINE_MODES];
 107 static int mul_widen_cost[NUM_MACHINE_MODES];
 108 static int mul_highpart_cost[NUM_MACHINE_MODES];
 109
 110 void
 111 init_expmed (void)
 112 {
 113   struct
 114   {
 115     struct rtx_def reg;         rtunion reg_fld[2];
 116     struct rtx_def plus;        rtunion plus_fld1;
 117     struct rtx_def neg;
 118     struct rtx_def udiv;        rtunion udiv_fld1;
 119     struct rtx_def mult;        rtunion mult_fld1;
 120     struct rtx_def div;         rtunion div_fld1;
 121     struct rtx_def mod;         rtunion mod_fld1;
 122     struct rtx_def zext;
 123     struct rtx_def wide_mult;   rtunion wide_mult_fld1;
 124     struct rtx_def wide_lshr;   rtunion wide_lshr_fld1;
 125     struct rtx_def wide_trunc;
 126     struct rtx_def shift;       rtunion shift_fld1;
 127     struct rtx_def shift_mult;  rtunion shift_mult_fld1;
 128     struct rtx_def shift_add;   rtunion shift_add_fld1;
 129     struct rtx_def shift_sub;   rtunion shift_sub_fld1;
 130   } all;
 131
 132   rtx pow2[MAX_BITS_PER_WORD];
 133   rtx cint[MAX_BITS_PER_WORD];
 134   int m, n;
 135   enum machine_mode mode, wider_mode;
 136
 137   zero_cost = rtx_cost (const0_rtx, 0);
 138
 139   for (m = 1; m < MAX_BITS_PER_WORD; m++)
 140     {
 141       pow2[m] = GEN_INT ((HOST_WIDE_INT) 1 << m);
 142       cint[m] = GEN_INT (m);
 143     }
 144
 145   memset (&all, 0, sizeof all);
 146
 147   PUT_CODE (&all.reg, REG);
 148   /* Avoid using hard regs in ways which may be unsupported.  */
 149   REGNO (&all.reg) = LAST_VIRTUAL_REGISTER + 1;
 150
 151   PUT_CODE (&all.plus, PLUS);
 152   XEXP (&all.plus, 0) = &all.reg;
 153   XEXP (&all.plus, 1) = &all.reg;
 154
 155   PUT_CODE (&all.neg, NEG);
 156   XEXP (&all.neg, 0) = &all.reg;
 157
 158   PUT_CODE (&all.udiv, UDIV);
 159   XEXP (&all.udiv, 0) = &all.reg;
 160   XEXP (&all.udiv, 1) = &all.reg;
 161
 162   PUT_CODE (&all.mult, MULT);
 163   XEXP (&all.mult, 0) = &all.reg;
 164   XEXP (&all.mult, 1) = &all.reg;
 165
 166   PUT_CODE (&all.div, DIV);
 167   XEXP (&all.div, 0) = &all.reg;
 168   XEXP (&all.div, 1) = 32 < MAX_BITS_PER_WORD ? cint[32] : GEN_INT (32);
 169
 170   PUT_CODE (&all.mod, MOD);
 171   XEXP (&all.mod, 0) = &all.reg;
 172   XEXP (&all.mod, 1) = XEXP (&all.div, 1);
 173
 174   PUT_CODE (&all.zext, ZERO_EXTEND);
 175   XEXP (&all.zext, 0) = &all.reg;
 176
 177   PUT_CODE (&all.wide_mult, MULT);
 178   XEXP (&all.wide_mult, 0) = &all.zext;
 179   XEXP (&all.wide_mult, 1) = &all.zext;
 180
 181   PUT_CODE (&all.wide_lshr, LSHIFTRT);
 182   XEXP (&all.wide_lshr, 0) = &all.wide_mult;
 183
 184   PUT_CODE (&all.wide_trunc, TRUNCATE);
 185   XEXP (&all.wide_trunc, 0) = &all.wide_lshr;
 186
 187   PUT_CODE (&all.shift, ASHIFT);
 188   XEXP (&all.shift, 0) = &all.reg;
 189
 190   PUT_CODE (&all.shift_mult, MULT);
 191   XEXP (&all.shift_mult, 0) = &all.reg;
 192
 193   PUT_CODE (&all.shift_add, PLUS);
 194   XEXP (&all.shift_add, 0) = &all.shift_mult;
 195   XEXP (&all.shift_add, 1) = &all.reg;
 196
 197   PUT_CODE (&all.shift_sub, MINUS);
 198   XEXP (&all.shift_sub, 0) = &all.shift_mult;
 199   XEXP (&all.shift_sub, 1) = &all.reg;
 200
 201   for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT);
 202        mode != VOIDmode;
 203        mode = GET_MODE_WIDER_MODE (mode))
 204     {
 205       PUT_MODE (&all.reg, mode);
 206       PUT_MODE (&all.plus, mode);
 207       PUT_MODE (&all.neg, mode);
 208       PUT_MODE (&all.udiv, mode);
 209       PUT_MODE (&all.mult, mode);
 210       PUT_MODE (&all.div, mode);
 211       PUT_MODE (&all.mod, mode);
 212       PUT_MODE (&all.wide_trunc, mode);
 213       PUT_MODE (&all.shift, mode);
 214       PUT_MODE (&all.shift_mult, mode);
 215       PUT_MODE (&all.shift_add, mode);
 216       PUT_MODE (&all.shift_sub, mode);
 217
 218       add_cost[mode] = rtx_cost (&all.plus, SET);
 219       neg_cost[mode] = rtx_cost (&all.neg, SET);
 220       div_cost[mode] = rtx_cost (&all.udiv, SET);
 221       mul_cost[mode] = rtx_cost (&all.mult, SET);
 222
 223       sdiv_pow2_cheap[mode] = (rtx_cost (&all.div, SET) <= 2 * add_cost[mode]);
 224       smod_pow2_cheap[mode] = (rtx_cost (&all.mod, SET) <= 4 * add_cost[mode]);
 225
 226       wider_mode = GET_MODE_WIDER_MODE (mode);
 227       if (wider_mode != VOIDmode)
 228         {
 229           PUT_MODE (&all.zext, wider_mode);
 230           PUT_MODE (&all.wide_mult, wider_mode);
 231           PUT_MODE (&all.wide_lshr, wider_mode);
 232           XEXP (&all.wide_lshr, 1) = GEN_INT (GET_MODE_BITSIZE (mode));
 233
 234           mul_widen_cost[wider_mode] = rtx_cost (&all.wide_mult, SET);
 235           mul_highpart_cost[mode] = rtx_cost (&all.wide_trunc, SET);
 236         }
 237
 238       shift_cost[mode][0] = 0;
 239       shiftadd_cost[mode][0] = shiftsub_cost[mode][0] = add_cost[mode];
 240
 241       n = MIN (MAX_BITS_PER_WORD, GET_MODE_BITSIZE (mode));
 242       for (m = 1; m < n; m++)
 243         {
 244           XEXP (&all.shift, 1) = cint[m];
 245           XEXP (&all.shift_mult, 1) = pow2[m];
 246
 247           shift_cost[mode][m] = rtx_cost (&all.shift, SET);
 248           shiftadd_cost[mode][m] = rtx_cost (&all.shift_add, SET);
 249           shiftsub_cost[mode][m] = rtx_cost (&all.shift_sub, SET);
 250         }
 251     }
 252 }
 253
 254 /* Return an rtx representing minus the value of X.
 255    MODE is the intended mode of the result,
 256    useful if X is a CONST_INT.  */
 257
 258 rtx
 259 negate_rtx (enum machine_mode mode, rtx x)
 260 {
 261   rtx result = simplify_unary_operation (NEG, mode, x, mode);
 262
 263   if (result == 0)
 264     result = expand_unop (mode, neg_optab, x, NULL_RTX, 0);
 265
 266   return result;
 267 }
 268
 269 /* Report on the availability of insv/extv/extzv and the desired mode
 270    of each of their operands.  Returns MAX_MACHINE_MODE if HAVE_foo
 271    is false; else the mode of the specified operand.  If OPNO is -1,
 272    all the caller cares about is whether the insn is available.  */
 273 enum machine_mode
 274 mode_for_extraction (enum extraction_pattern pattern, int opno)
 275 {
 276   const struct insn_data *data;
 277
 278   switch (pattern)
 279     {
 280     case EP_insv:
 281       if (HAVE_insv)
 282         {
 283           data = &insn_data[CODE_FOR_insv];
 284           break;
 285         }
 286       return MAX_MACHINE_MODE;
 287
 288     case EP_extv:
 289       if (HAVE_extv)
 290         {
 291           data = &insn_data[CODE_FOR_extv];
 292           break;
 293         }
 294       return MAX_MACHINE_MODE;
 295
 296     case EP_extzv:
 297       if (HAVE_extzv)
 298         {
 299           data = &insn_data[CODE_FOR_extzv];
 300           break;
 301         }
 302       return MAX_MACHINE_MODE;
 303
 304     default:
 305       gcc_unreachable ();
 306     }
 307
 308   if (opno == -1)
 309     return VOIDmode;
 310
 311   /* Everyone who uses this function used to follow it with
 312      if (result == VOIDmode) result = word_mode; */
 313   if (data->operand[opno].mode == VOIDmode)
 314     return word_mode;
 315   return data->operand[opno].mode;
 316 }
 317
 318 \f
 319 /* Generate code to store value from rtx VALUE
 320    into a bit-field within structure STR_RTX
 321    containing BITSIZE bits starting at bit BITNUM.
 322    FIELDMODE is the machine-mode of the FIELD_DECL node for this field.
 323    ALIGN is the alignment that STR_RTX is known to have.
 324    TOTAL_SIZE is the size of the structure in bytes, or -1 if varying.  */
 325
 326 /* ??? Note that there are two different ideas here for how
 327    to determine the size to count bits within, for a register.
 328    One is BITS_PER_WORD, and the other is the size of operand 3
 329    of the insv pattern.
 330
 331    If operand 3 of the insv pattern is VOIDmode, then we will use BITS_PER_WORD
 332    else, we use the mode of operand 3.  */
 333
 334 rtx
 335 store_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
 336                  unsigned HOST_WIDE_INT bitnum, enum machine_mode fieldmode,
 337                  rtx value)
 338 {
 339   unsigned int unit
 340     = (MEM_P (str_rtx)) ? BITS_PER_UNIT : BITS_PER_WORD;
 341   unsigned HOST_WIDE_INT offset, bitpos;
 342   rtx op0 = str_rtx;
 343   int byte_offset;
 344   rtx orig_value;
 345
 346   enum machine_mode op_mode = mode_for_extraction (EP_insv, 3);
 347
 348   while (GET_CODE (op0) == SUBREG)
 349     {
 350       /* The following line once was done only if WORDS_BIG_ENDIAN,
 351          but I think that is a mistake.  WORDS_BIG_ENDIAN is
 352          meaningful at a much higher level; when structures are copied
 353          between memory and regs, the higher-numbered regs
 354          always get higher addresses.  */
 355       bitnum += SUBREG_BYTE (op0) * BITS_PER_UNIT;
 356       op0 = SUBREG_REG (op0);
 357     }
 358
 359   /* No action is needed if the target is a register and if the field
 360      lies completely outside that register.  This can occur if the source
 361      code contains an out-of-bounds access to a small array.  */
 362   if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
 363     return value;
 364
 365   /* Use vec_set patterns for inserting parts of vectors whenever
 366      available.  */
 367   if (VECTOR_MODE_P (GET_MODE (op0))
 368       && !MEM_P (op0)
 369       && (vec_set_optab->handlers[GET_MODE (op0)].insn_code
 370           != CODE_FOR_nothing)
 371       && fieldmode == GET_MODE_INNER (GET_MODE (op0))
 372       && bitsize == GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
 373       && !(bitnum % GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
 374     {
 375       enum machine_mode outermode = GET_MODE (op0);
 376       enum machine_mode innermode = GET_MODE_INNER (outermode);
 377       int icode = (int) vec_set_optab->handlers[outermode].insn_code;
 378       int pos = bitnum / GET_MODE_BITSIZE (innermode);
 379       rtx rtxpos = GEN_INT (pos);
 380       rtx src = value;
 381       rtx dest = op0;
 382       rtx pat, seq;
 383       enum machine_mode mode0 = insn_data[icode].operand[0].mode;
 384       enum machine_mode mode1 = insn_data[icode].operand[1].mode;
 385       enum machine_mode mode2 = insn_data[icode].operand[2].mode;
 386
 387       start_sequence ();
 388
 389       if (! (*insn_data[icode].operand[1].predicate) (src, mode1))
 390         src = copy_to_mode_reg (mode1, src);
 391
 392       if (! (*insn_data[icode].operand[2].predicate) (rtxpos, mode2))
 393         rtxpos = copy_to_mode_reg (mode1, rtxpos);
 394
 395       /* We could handle this, but we should always be called with a pseudo
 396          for our targets and all insns should take them as outputs.  */
 397       gcc_assert ((*insn_data[icode].operand[0].predicate) (dest, mode0)
 398                   && (*insn_data[icode].operand[1].predicate) (src, mode1)
 399                   && (*insn_data[icode].operand[2].predicate) (rtxpos, mode2));
 400       pat = GEN_FCN (icode) (dest, src, rtxpos);
 401       seq = get_insns ();
 402       end_sequence ();
 403       if (pat)
 404         {
 405           emit_insn (seq);
 406           emit_insn (pat);
 407           return dest;
 408         }
 409     }
 410
 411   /* If the target is a register, overwriting the entire object, or storing
 412      a full-word or multi-word field can be done with just a SUBREG.
 413
 414      If the target is memory, storing any naturally aligned field can be
 415      done with a simple store.  For targets that support fast unaligned
 416      memory, any naturally sized, unit aligned field can be done directly.  */
 417
 418   offset = bitnum / unit;
 419   bitpos = bitnum % unit;
 420   byte_offset = (bitnum % BITS_PER_WORD) / BITS_PER_UNIT
 421                 + (offset * UNITS_PER_WORD);
 422
 423   if (bitpos == 0
 424       && bitsize == GET_MODE_BITSIZE (fieldmode)
 425       && (!MEM_P (op0)
 426           ? ((GET_MODE_SIZE (fieldmode) >= UNITS_PER_WORD
 427              || GET_MODE_SIZE (GET_MODE (op0)) == GET_MODE_SIZE (fieldmode))
 428              && byte_offset % GET_MODE_SIZE (fieldmode) == 0)
 429           : (! SLOW_UNALIGNED_ACCESS (fieldmode, MEM_ALIGN (op0))
 430              || (offset * BITS_PER_UNIT % bitsize == 0
 431                  && MEM_ALIGN (op0) % GET_MODE_BITSIZE (fieldmode) == 0))))
 432     {
 433       if (MEM_P (op0))
 434         op0 = adjust_address (op0, fieldmode, offset);
 435       else if (GET_MODE (op0) != fieldmode)
 436         op0 = simplify_gen_subreg (fieldmode, op0, GET_MODE (op0),
 437                                    byte_offset);
 438       emit_move_insn (op0, value);
 439       return value;
 440     }
 441
 442   /* Make sure we are playing with integral modes.  Pun with subregs
 443      if we aren't.  This must come after the entire register case above,
 444      since that case is valid for any mode.  The following cases are only
 445      valid for integral modes.  */
 446   {
 447     enum machine_mode imode = int_mode_for_mode (GET_MODE (op0));
 448     if (imode != GET_MODE (op0))
 449       {
 450         if (MEM_P (op0))
 451           op0 = adjust_address (op0, imode, 0);
 452         else
 453           {
 454             gcc_assert (imode != BLKmode);
 455             op0 = gen_lowpart (imode, op0);
 456           }
 457       }
 458   }
 459
 460   /* We may be accessing data outside the field, which means
 461      we can alias adjacent data.  */
 462   if (MEM_P (op0))
 463     {
 464       op0 = shallow_copy_rtx (op0);
 465       set_mem_alias_set (op0, 0);
 466       set_mem_expr (op0, 0);
 467     }
 468
 469   /* If OP0 is a register, BITPOS must count within a word.
 470      But as we have it, it counts within whatever size OP0 now has.
 471      On a bigendian machine, these are not the same, so convert.  */
 472   if (BYTES_BIG_ENDIAN
 473       && !MEM_P (op0)
 474       && unit > GET_MODE_BITSIZE (GET_MODE (op0)))
 475     bitpos += unit - GET_MODE_BITSIZE (GET_MODE (op0));
 476
 477   /* Storing an lsb-aligned field in a register
 478      can be done with a movestrict instruction.  */
 479
 480   if (!MEM_P (op0)
 481       && (BYTES_BIG_ENDIAN ? bitpos + bitsize == unit : bitpos == 0)
 482       && bitsize == GET_MODE_BITSIZE (fieldmode)
 483       && (movstrict_optab->handlers[fieldmode].insn_code
 484           != CODE_FOR_nothing))
 485     {
 486       int icode = movstrict_optab->handlers[fieldmode].insn_code;
 487
 488       /* Get appropriate low part of the value being stored.  */
 489       if (GET_CODE (value) == CONST_INT || REG_P (value))
 490         value = gen_lowpart (fieldmode, value);
 491       else if (!(GET_CODE (value) == SYMBOL_REF
 492                  || GET_CODE (value) == LABEL_REF
 493                  || GET_CODE (value) == CONST))
 494         value = convert_to_mode (fieldmode, value, 0);
 495
 496       if (! (*insn_data[icode].operand[1].predicate) (value, fieldmode))
 497         value = copy_to_mode_reg (fieldmode, value);
 498
 499       if (GET_CODE (op0) == SUBREG)
 500         {
 501           /* Else we've got some float mode source being extracted into
 502              a different float mode destination -- this combination of
 503              subregs results in Severe Tire Damage.  */
 504           gcc_assert (GET_MODE (SUBREG_REG (op0)) == fieldmode
 505                       || GET_MODE_CLASS (fieldmode) == MODE_INT
 506                       || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT);
 507           op0 = SUBREG_REG (op0);
 508         }
 509
 510       emit_insn (GEN_FCN (icode)
 511                  (gen_rtx_SUBREG (fieldmode, op0,
 512                                   (bitnum % BITS_PER_WORD) / BITS_PER_UNIT
 513                                   + (offset * UNITS_PER_WORD)),
 514                                   value));
 515
 516       return value;
 517     }
 518
 519   /* Handle fields bigger than a word.  */
 520
 521   if (bitsize > BITS_PER_WORD)
 522     {
 523       /* Here we transfer the words of the field
 524          in the order least significant first.
 525          This is because the most significant word is the one which may
 526          be less than full.
 527          However, only do that if the value is not BLKmode.  */
 528
 529       unsigned int backwards = WORDS_BIG_ENDIAN && fieldmode != BLKmode;
 530       unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
 531       unsigned int i;
 532
 533       /* This is the mode we must force value to, so that there will be enough
 534          subwords to extract.  Note that fieldmode will often (always?) be
 535          VOIDmode, because that is what store_field uses to indicate that this
 536          is a bit field, but passing VOIDmode to operand_subword_force
 537          is not allowed.  */
 538       fieldmode = GET_MODE (value);
 539       if (fieldmode == VOIDmode)
 540         fieldmode = smallest_mode_for_size (nwords * BITS_PER_WORD, MODE_INT);
 541
 542       for (i = 0; i < nwords; i++)
 543         {
 544           /* If I is 0, use the low-order word in both field and target;
 545              if I is 1, use the next to lowest word; and so on.  */
 546           unsigned int wordnum = (backwards ? nwords - i - 1 : i);
 547           unsigned int bit_offset = (backwards
 548                                      ? MAX ((int) bitsize - ((int) i + 1)
 549                                             * BITS_PER_WORD,
 550                                             0)
 551                                      : (int) i * BITS_PER_WORD);
 552
 553           store_bit_field (op0, MIN (BITS_PER_WORD,
 554                                      bitsize - i * BITS_PER_WORD),
 555                            bitnum + bit_offset, word_mode,
 556                            operand_subword_force (value, wordnum, fieldmode));
 557         }
 558       return value;
 559     }
 560
 561   /* From here on we can assume that the field to be stored in is
 562      a full-word (whatever type that is), since it is shorter than a word.  */
 563
 564   /* OFFSET is the number of words or bytes (UNIT says which)
 565      from STR_RTX to the first word or byte containing part of the field.  */
 566
 567   if (!MEM_P (op0))
 568     {
 569       if (offset != 0
 570           || GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
 571         {
 572           if (!REG_P (op0))
 573             {
 574               /* Since this is a destination (lvalue), we can't copy
 575                  it to a pseudo.  We can remove a SUBREG that does not
 576                  change the size of the operand.  Such a SUBREG may
 577                  have been added above.  */
 578               gcc_assert (GET_CODE (op0) == SUBREG
 579                           && (GET_MODE_SIZE (GET_MODE (op0))
 580                               == GET_MODE_SIZE (GET_MODE (SUBREG_REG (op0)))));
 581               op0 = SUBREG_REG (op0);
 582             }
 583           op0 = gen_rtx_SUBREG (mode_for_size (BITS_PER_WORD, MODE_INT, 0),
 584                                 op0, (offset * UNITS_PER_WORD));
 585         }
 586       offset = 0;
 587     }
 588
 589   /* If VALUE has a floating-point or complex mode, access it as an
 590      integer of the corresponding size.  This can occur on a machine
 591      with 64 bit registers that uses SFmode for float.  It can also
 592      occur for unaligned float or complex fields.  */
 593   orig_value = value;
 594   if (GET_MODE (value) != VOIDmode
 595       && GET_MODE_CLASS (GET_MODE (value)) != MODE_INT
 596       && GET_MODE_CLASS (GET_MODE (value)) != MODE_PARTIAL_INT)
 597     {
 598       value = gen_reg_rtx (int_mode_for_mode (GET_MODE (value)));
 599       emit_move_insn (gen_lowpart (GET_MODE (orig_value), value), orig_value);
 600     }
 601
 602   /* Now OFFSET is nonzero only if OP0 is memory
 603      and is therefore always measured in bytes.  */
 604
 605   if (HAVE_insv
 606       && GET_MODE (value) != BLKmode
 607       && !(bitsize == 1 && GET_CODE (value) == CONST_INT)
 608       && bitsize > 0
 609       && GET_MODE_BITSIZE (op_mode) >= bitsize
 610       && ! ((REG_P (op0) || GET_CODE (op0) == SUBREG)
 611             && (bitsize + bitpos > GET_MODE_BITSIZE (op_mode))))
 612     {
 613       int xbitpos = bitpos;
 614       rtx value1;
 615       rtx xop0 = op0;
 616       rtx last = get_last_insn ();
 617       rtx pat;
 618       enum machine_mode maxmode = mode_for_extraction (EP_insv, 3);
 619       int save_volatile_ok = volatile_ok;
 620
 621       volatile_ok = 1;
 622
 623       /* If this machine's insv can only insert into a register, copy OP0
 624          into a register and save it back later.  */
 625       if (MEM_P (op0)
 626           && ! ((*insn_data[(int) CODE_FOR_insv].operand[0].predicate)
 627                 (op0, VOIDmode)))
 628         {
 629           rtx tempreg;
 630           enum machine_mode bestmode;
 631
 632           /* Get the mode to use for inserting into this field.  If OP0 is
 633              BLKmode, get the smallest mode consistent with the alignment. If
 634              OP0 is a non-BLKmode object that is no wider than MAXMODE, use its
 635              mode. Otherwise, use the smallest mode containing the field.  */
 636
 637           if (GET_MODE (op0) == BLKmode
 638               || GET_MODE_SIZE (GET_MODE (op0)) > GET_MODE_SIZE (maxmode))
 639             bestmode
 640               = get_best_mode (bitsize, bitnum, MEM_ALIGN (op0), maxmode,
 641                                MEM_VOLATILE_P (op0));
 642           else
 643             bestmode = GET_MODE (op0);
 644
 645           if (bestmode == VOIDmode
 646               || GET_MODE_SIZE (bestmode) < GET_MODE_SIZE (fieldmode)
 647               || (SLOW_UNALIGNED_ACCESS (bestmode, MEM_ALIGN (op0))
 648                   && GET_MODE_BITSIZE (bestmode) > MEM_ALIGN (op0)))
 649             goto insv_loses;
 650
 651           /* Adjust address to point to the containing unit of that mode.
 652              Compute offset as multiple of this unit, counting in bytes.  */
 653           unit = GET_MODE_BITSIZE (bestmode);
 654           offset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
 655           bitpos = bitnum % unit;
 656           op0 = adjust_address (op0, bestmode,  offset);
 657
 658           /* Fetch that unit, store the bitfield in it, then store
 659              the unit.  */
 660           tempreg = copy_to_reg (op0);
 661           store_bit_field (tempreg, bitsize, bitpos, fieldmode, orig_value);
 662           emit_move_insn (op0, tempreg);
 663           return value;
 664         }
 665       volatile_ok = save_volatile_ok;
 666
 667       /* Add OFFSET into OP0's address.  */
 668       if (MEM_P (xop0))
 669         xop0 = adjust_address (xop0, byte_mode, offset);
 670
 671       /* If xop0 is a register, we need it in MAXMODE
 672          to make it acceptable to the format of insv.  */
 673       if (GET_CODE (xop0) == SUBREG)
 674         /* We can't just change the mode, because this might clobber op0,
 675            and we will need the original value of op0 if insv fails.  */
 676         xop0 = gen_rtx_SUBREG (maxmode, SUBREG_REG (xop0), SUBREG_BYTE (xop0));
 677       if (REG_P (xop0) && GET_MODE (xop0) != maxmode)
 678         xop0 = gen_rtx_SUBREG (maxmode, xop0, 0);
 679
 680       /* On big-endian machines, we count bits from the most significant.
 681          If the bit field insn does not, we must invert.  */
 682
 683       if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
 684         xbitpos = unit - bitsize - xbitpos;
 685
 686       /* We have been counting XBITPOS within UNIT.
 687          Count instead within the size of the register.  */
 688       if (BITS_BIG_ENDIAN && !MEM_P (xop0))
 689         xbitpos += GET_MODE_BITSIZE (maxmode) - unit;
 690
 691       unit = GET_MODE_BITSIZE (maxmode);
 692
 693       /* Convert VALUE to maxmode (which insv insn wants) in VALUE1.  */
 694       value1 = value;
 695       if (GET_MODE (value) != maxmode)
 696         {
 697           if (GET_MODE_BITSIZE (GET_MODE (value)) >= bitsize)
 698             {
 699               /* Optimization: Don't bother really extending VALUE
 700                  if it has all the bits we will actually use.  However,
 701                  if we must narrow it, be sure we do it correctly.  */
 702
 703               if (GET_MODE_SIZE (GET_MODE (value)) < GET_MODE_SIZE (maxmode))
 704                 {
 705                   rtx tmp;
 706
 707                   tmp = simplify_subreg (maxmode, value1, GET_MODE (value), 0);
 708                   if (! tmp)
 709                     tmp = simplify_gen_subreg (maxmode,
 710                                                force_reg (GET_MODE (value),
 711                                                           value1),
 712                                                GET_MODE (value), 0);
 713                   value1 = tmp;
 714                 }
 715               else
 716                 value1 = gen_lowpart (maxmode, value1);
 717             }
 718           else if (GET_CODE (value) == CONST_INT)
 719             value1 = gen_int_mode (INTVAL (value), maxmode);
 720           else
 721             /* Parse phase is supposed to make VALUE's data type
 722                match that of the component reference, which is a type
 723                at least as wide as the field; so VALUE should have
 724                a mode that corresponds to that type.  */
 725             gcc_assert (CONSTANT_P (value));
 726         }
 727
 728       /* If this machine's insv insists on a register,
 729          get VALUE1 into a register.  */
 730       if (! ((*insn_data[(int) CODE_FOR_insv].operand[3].predicate)
 731              (value1, maxmode)))
 732         value1 = force_reg (maxmode, value1);
 733
 734       pat = gen_insv (xop0, GEN_INT (bitsize), GEN_INT (xbitpos), value1);
 735       if (pat)
 736         emit_insn (pat);
 737       else
 738         {
 739           delete_insns_since (last);
 740           store_fixed_bit_field (op0, offset, bitsize, bitpos, value);
 741         }
 742     }
 743   else
 744     insv_loses:
 745     /* Insv is not available; store using shifts and boolean ops.  */
 746     store_fixed_bit_field (op0, offset, bitsize, bitpos, value);
 747   return value;
 748 }
 749 \f
 750 /* Use shifts and boolean operations to store VALUE
 751    into a bit field of width BITSIZE
 752    in a memory location specified by OP0 except offset by OFFSET bytes.
 753      (OFFSET must be 0 if OP0 is a register.)
 754    The field starts at position BITPOS within the byte.
 755     (If OP0 is a register, it may be a full word or a narrower mode,
 756      but BITPOS still counts within a full word,
 757      which is significant on bigendian machines.)  */
 758
 759 static void
 760 store_fixed_bit_field (rtx op0, unsigned HOST_WIDE_INT offset,
 761                        unsigned HOST_WIDE_INT bitsize,
 762                        unsigned HOST_WIDE_INT bitpos, rtx value)
 763 {
 764   enum machine_mode mode;
 765   unsigned int total_bits = BITS_PER_WORD;
 766   rtx subtarget, temp;
 767   int all_zero = 0;
 768   int all_one = 0;
 769
 770   /* There is a case not handled here:
 771      a structure with a known alignment of just a halfword
 772      and a field split across two aligned halfwords within the structure.
 773      Or likewise a structure with a known alignment of just a byte
 774      and a field split across two bytes.
 775      Such cases are not supposed to be able to occur.  */
 776
 777   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
 778     {
 779       gcc_assert (!offset);
 780       /* Special treatment for a bit field split across two registers.  */
 781       if (bitsize + bitpos > BITS_PER_WORD)
 782         {
 783           store_split_bit_field (op0, bitsize, bitpos, value);
 784           return;
 785         }
 786     }
 787   else
 788     {
 789       /* Get the proper mode to use for this field.  We want a mode that
 790          includes the entire field.  If such a mode would be larger than
 791          a word, we won't be doing the extraction the normal way.
 792          We don't want a mode bigger than the destination.  */
 793
 794       mode = GET_MODE (op0);
 795       if (GET_MODE_BITSIZE (mode) == 0
 796           || GET_MODE_BITSIZE (mode) > GET_MODE_BITSIZE (word_mode))
 797         mode = word_mode;
 798       mode = get_best_mode (bitsize, bitpos + offset * BITS_PER_UNIT,
 799                             MEM_ALIGN (op0), mode, MEM_VOLATILE_P (op0));
 800
 801       if (mode == VOIDmode)
 802         {
 803           /* The only way this should occur is if the field spans word
 804              boundaries.  */
 805           store_split_bit_field (op0, bitsize, bitpos + offset * BITS_PER_UNIT,
 806                                  value);
 807           return;
 808         }
 809
 810       total_bits = GET_MODE_BITSIZE (mode);
 811
 812       /* Make sure bitpos is valid for the chosen mode.  Adjust BITPOS to
 813          be in the range 0 to total_bits-1, and put any excess bytes in
 814          OFFSET.  */
 815       if (bitpos >= total_bits)
 816         {
 817           offset += (bitpos / total_bits) * (total_bits / BITS_PER_UNIT);
 818           bitpos -= ((bitpos / total_bits) * (total_bits / BITS_PER_UNIT)
 819                      * BITS_PER_UNIT);
 820         }
 821
 822       /* Get ref to an aligned byte, halfword, or word containing the field.
 823          Adjust BITPOS to be position within a word,
 824          and OFFSET to be the offset of that word.
 825          Then alter OP0 to refer to that word.  */
 826       bitpos += (offset % (total_bits / BITS_PER_UNIT)) * BITS_PER_UNIT;
 827       offset -= (offset % (total_bits / BITS_PER_UNIT));
 828       op0 = adjust_address (op0, mode, offset);
 829     }
 830
 831   mode = GET_MODE (op0);
 832
 833   /* Now MODE is either some integral mode for a MEM as OP0,
 834      or is a full-word for a REG as OP0.  TOTAL_BITS corresponds.
 835      The bit field is contained entirely within OP0.
 836      BITPOS is the starting bit number within OP0.
 837      (OP0's mode may actually be narrower than MODE.)  */
 838
 839   if (BYTES_BIG_ENDIAN)
 840       /* BITPOS is the distance between our msb
 841          and that of the containing datum.
 842          Convert it to the distance from the lsb.  */
 843       bitpos = total_bits - bitsize - bitpos;
 844
 845   /* Now BITPOS is always the distance between our lsb
 846      and that of OP0.  */
 847
 848   /* Shift VALUE left by BITPOS bits.  If VALUE is not constant,
 849      we must first convert its mode to MODE.  */
 850
 851   if (GET_CODE (value) == CONST_INT)
 852     {
 853       HOST_WIDE_INT v = INTVAL (value);
 854
 855       if (bitsize < HOST_BITS_PER_WIDE_INT)
 856         v &= ((HOST_WIDE_INT) 1 << bitsize) - 1;
 857
 858       if (v == 0)
 859         all_zero = 1;
 860       else if ((bitsize < HOST_BITS_PER_WIDE_INT
 861                 && v == ((HOST_WIDE_INT) 1 << bitsize) - 1)
 862                || (bitsize == HOST_BITS_PER_WIDE_INT && v == -1))
 863         all_one = 1;
 864
 865       value = lshift_value (mode, value, bitpos, bitsize);
 866     }
 867   else
 868     {
 869       int must_and = (GET_MODE_BITSIZE (GET_MODE (value)) != bitsize
 870                       && bitpos + bitsize != GET_MODE_BITSIZE (mode));
 871
 872       if (GET_MODE (value) != mode)
 873         {
 874           if ((REG_P (value) || GET_CODE (value) == SUBREG)
 875               && GET_MODE_SIZE (mode) < GET_MODE_SIZE (GET_MODE (value)))
 876             value = gen_lowpart (mode, value);
 877           else
 878             value = convert_to_mode (mode, value, 1);
 879         }
 880
 881       if (must_and)
 882         value = expand_binop (mode, and_optab, value,
 883                               mask_rtx (mode, 0, bitsize, 0),
 884                               NULL_RTX, 1, OPTAB_LIB_WIDEN);
 885       if (bitpos > 0)
 886         value = expand_shift (LSHIFT_EXPR, mode, value,
 887                               build_int_cst (NULL_TREE, bitpos), NULL_RTX, 1);
 888     }
 889
 890   /* Now clear the chosen bits in OP0,
 891      except that if VALUE is -1 we need not bother.  */
 892
 893   subtarget = op0;
 894
 895   if (! all_one)
 896     {
 897       temp = expand_binop (mode, and_optab, op0,
 898                            mask_rtx (mode, bitpos, bitsize, 1),
 899                            subtarget, 1, OPTAB_LIB_WIDEN);
 900       subtarget = temp;
 901     }
 902   else
 903     temp = op0;
 904
 905   /* Now logical-or VALUE into OP0, unless it is zero.  */
 906
 907   if (! all_zero)
 908     temp = expand_binop (mode, ior_optab, temp, value,
 909                          subtarget, 1, OPTAB_LIB_WIDEN);
 910   if (op0 != temp)
 911     emit_move_insn (op0, temp);
 912 }
 913 \f
 914 /* Store a bit field that is split across multiple accessible memory objects.
 915
 916    OP0 is the REG, SUBREG or MEM rtx for the first of the objects.
 917    BITSIZE is the field width; BITPOS the position of its first bit
 918    (within the word).
 919    VALUE is the value to store.
 920
 921    This does not yet handle fields wider than BITS_PER_WORD.  */
 922
 923 static void
 924 store_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
 925                        unsigned HOST_WIDE_INT bitpos, rtx value)
 926 {
 927   unsigned int unit;
 928   unsigned int bitsdone = 0;
 929
 930   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
 931      much at a time.  */
 932   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
 933     unit = BITS_PER_WORD;
 934   else
 935     unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
 936
 937   /* If VALUE is a constant other than a CONST_INT, get it into a register in
 938      WORD_MODE.  If we can do this using gen_lowpart_common, do so.  Note
 939      that VALUE might be a floating-point constant.  */
 940   if (CONSTANT_P (value) && GET_CODE (value) != CONST_INT)
 941     {
 942       rtx word = gen_lowpart_common (word_mode, value);
 943
 944       if (word && (value != word))
 945         value = word;
 946       else
 947         value = gen_lowpart_common (word_mode,
 948                                     force_reg (GET_MODE (value) != VOIDmode
 949                                                ? GET_MODE (value)
 950                                                : word_mode, value));
 951     }
 952
 953   while (bitsdone < bitsize)
 954     {
 955       unsigned HOST_WIDE_INT thissize;
 956       rtx part, word;
 957       unsigned HOST_WIDE_INT thispos;
 958       unsigned HOST_WIDE_INT offset;
 959
 960       offset = (bitpos + bitsdone) / unit;
 961       thispos = (bitpos + bitsdone) % unit;
 962
 963       /* THISSIZE must not overrun a word boundary.  Otherwise,
 964          store_fixed_bit_field will call us again, and we will mutually
 965          recurse forever.  */
 966       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
 967       thissize = MIN (thissize, unit - thispos);
 968
 969       if (BYTES_BIG_ENDIAN)
 970         {
 971           int total_bits;
 972
 973           /* We must do an endian conversion exactly the same way as it is
 974              done in extract_bit_field, so that the two calls to
 975              extract_fixed_bit_field will have comparable arguments.  */
 976           if (!MEM_P (value) || GET_MODE (value) == BLKmode)
 977             total_bits = BITS_PER_WORD;
 978           else
 979             total_bits = GET_MODE_BITSIZE (GET_MODE (value));
 980
 981           /* Fetch successively less significant portions.  */
 982           if (GET_CODE (value) == CONST_INT)
 983             part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
 984                              >> (bitsize - bitsdone - thissize))
 985                             & (((HOST_WIDE_INT) 1 << thissize) - 1));
 986           else
 987             /* The args are chosen so that the last part includes the
 988                lsb.  Give extract_bit_field the value it needs (with
 989                endianness compensation) to fetch the piece we want.  */
 990             part = extract_fixed_bit_field (word_mode, value, 0, thissize,
 991                                             total_bits - bitsize + bitsdone,
 992                                             NULL_RTX, 1);
 993         }
 994       else
 995         {
 996           /* Fetch successively more significant portions.  */
 997           if (GET_CODE (value) == CONST_INT)
 998             part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
 999                              >> bitsdone)
1000                             & (((HOST_WIDE_INT) 1 << thissize) - 1));
1001           else
1002             part = extract_fixed_bit_field (word_mode, value, 0, thissize,
1003                                             bitsdone, NULL_RTX, 1);
1004         }
1005
1006       /* If OP0 is a register, then handle OFFSET here.
1007
1008          When handling multiword bitfields, extract_bit_field may pass
1009          down a word_mode SUBREG of a larger REG for a bitfield that actually
1010          crosses a word boundary.  Thus, for a SUBREG, we must find
1011          the current word starting from the base register.  */
1012       if (GET_CODE (op0) == SUBREG)
1013         {
1014           int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset;
1015           word = operand_subword_force (SUBREG_REG (op0), word_offset,
1016                                         GET_MODE (SUBREG_REG (op0)));
1017           offset = 0;
1018         }
1019       else if (REG_P (op0))
1020         {
1021           word = operand_subword_force (op0, offset, GET_MODE (op0));
1022           offset = 0;
1023         }
1024       else
1025         word = op0;
1026
1027       /* OFFSET is in UNITs, and UNIT is in bits.
1028          store_fixed_bit_field wants offset in bytes.  */
1029       store_fixed_bit_field (word, offset * unit / BITS_PER_UNIT, thissize,
1030                              thispos, part);
1031       bitsdone += thissize;
1032     }
1033 }
1034 \f
1035 /* Generate code to extract a byte-field from STR_RTX
1036    containing BITSIZE bits, starting at BITNUM,
1037    and put it in TARGET if possible (if TARGET is nonzero).
1038    Regardless of TARGET, we return the rtx for where the value is placed.
1039
1040    STR_RTX is the structure containing the byte (a REG or MEM).
1041    UNSIGNEDP is nonzero if this is an unsigned bit field.
1042    MODE is the natural mode of the field value once extracted.
1043    TMODE is the mode the caller would like the value to have;
1044    but the value may be returned with type MODE instead.
1045
1046    TOTAL_SIZE is the size in bytes of the containing structure,
1047    or -1 if varying.
1048
1049    If a TARGET is specified and we can store in it at no extra cost,
1050    we do so, and return TARGET.
1051    Otherwise, we return a REG of mode TMODE or MODE, with TMODE preferred
1052    if they are equally easy.  */
1053
1054 rtx
1055 extract_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1056                    unsigned HOST_WIDE_INT bitnum, int unsignedp, rtx target,
1057                    enum machine_mode mode, enum machine_mode tmode)
1058 {
1059   unsigned int unit
1060     = (MEM_P (str_rtx)) ? BITS_PER_UNIT : BITS_PER_WORD;
1061   unsigned HOST_WIDE_INT offset, bitpos;
1062   rtx op0 = str_rtx;
1063   rtx spec_target = target;
1064   rtx spec_target_subreg = 0;
1065   enum machine_mode int_mode;
1066   enum machine_mode extv_mode = mode_for_extraction (EP_extv, 0);
1067   enum machine_mode extzv_mode = mode_for_extraction (EP_extzv, 0);
1068   enum machine_mode mode1;
1069   int byte_offset;
1070
1071   if (tmode == VOIDmode)
1072     tmode = mode;
1073
1074   while (GET_CODE (op0) == SUBREG)
1075     {
1076       bitnum += SUBREG_BYTE (op0) * BITS_PER_UNIT;
1077       op0 = SUBREG_REG (op0);
1078     }
1079
1080   /* If we have an out-of-bounds access to a register, just return an
1081      uninitialized register of the required mode.  This can occur if the
1082      source code contains an out-of-bounds access to a small array.  */
1083   if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
1084     return gen_reg_rtx (tmode);
1085
1086   if (REG_P (op0)
1087       && mode == GET_MODE (op0)
1088       && bitnum == 0
1089       && bitsize == GET_MODE_BITSIZE (GET_MODE (op0)))
1090     {
1091       /* We're trying to extract a full register from itself.  */
1092       return op0;
1093     }
1094
1095   /* Use vec_extract patterns for extracting parts of vectors whenever
1096      available.  */
1097   if (VECTOR_MODE_P (GET_MODE (op0))
1098       && !MEM_P (op0)
1099       && (vec_extract_optab->handlers[GET_MODE (op0)].insn_code
1100           != CODE_FOR_nothing)
1101       && ((bitnum + bitsize - 1) / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
1102           == bitnum / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
1103     {
1104       enum machine_mode outermode = GET_MODE (op0);
1105       enum machine_mode innermode = GET_MODE_INNER (outermode);
1106       int icode = (int) vec_extract_optab->handlers[outermode].insn_code;
1107       unsigned HOST_WIDE_INT pos = bitnum / GET_MODE_BITSIZE (innermode);
1108       rtx rtxpos = GEN_INT (pos);
1109       rtx src = op0;
1110       rtx dest = NULL, pat, seq;
1111       enum machine_mode mode0 = insn_data[icode].operand[0].mode;
1112       enum machine_mode mode1 = insn_data[icode].operand[1].mode;
1113       enum machine_mode mode2 = insn_data[icode].operand[2].mode;
1114
1115       if (innermode == tmode || innermode == mode)
1116         dest = target;
1117
1118       if (!dest)
1119         dest = gen_reg_rtx (innermode);
1120
1121       start_sequence ();
1122
1123       if (! (*insn_data[icode].operand[0].predicate) (dest, mode0))
1124         dest = copy_to_mode_reg (mode0, dest);
1125
1126       if (! (*insn_data[icode].operand[1].predicate) (src, mode1))
1127         src = copy_to_mode_reg (mode1, src);
1128
1129       if (! (*insn_data[icode].operand[2].predicate) (rtxpos, mode2))
1130         rtxpos = copy_to_mode_reg (mode1, rtxpos);
1131
1132       /* We could handle this, but we should always be called with a pseudo
1133          for our targets and all insns should take them as outputs.  */
1134       gcc_assert ((*insn_data[icode].operand[0].predicate) (dest, mode0)
1135                   && (*insn_data[icode].operand[1].predicate) (src, mode1)
1136                   && (*insn_data[icode].operand[2].predicate) (rtxpos, mode2));
1137
1138       pat = GEN_FCN (icode) (dest, src, rtxpos);
1139       seq = get_insns ();
1140       end_sequence ();
1141       if (pat)
1142         {
1143           emit_insn (seq);
1144           emit_insn (pat);
1145           return dest;
1146         }
1147     }
1148
1149   /* Make sure we are playing with integral modes.  Pun with subregs
1150      if we aren't.  */
1151   {
1152     enum machine_mode imode = int_mode_for_mode (GET_MODE (op0));
1153     if (imode != GET_MODE (op0))
1154       {
1155         if (MEM_P (op0))
1156           op0 = adjust_address (op0, imode, 0);
1157         else
1158           {
1159             gcc_assert (imode != BLKmode);
1160             op0 = gen_lowpart (imode, op0);
1161
1162             /* If we got a SUBREG, force it into a register since we
1163                aren't going to be able to do another SUBREG on it.  */
1164             if (GET_CODE (op0) == SUBREG)
1165               op0 = force_reg (imode, op0);
1166           }
1167       }
1168   }
1169
1170   /* We may be accessing data outside the field, which means
1171      we can alias adjacent data.  */
1172   if (MEM_P (op0))
1173     {
1174       op0 = shallow_copy_rtx (op0);
1175       set_mem_alias_set (op0, 0);
1176       set_mem_expr (op0, 0);
1177     }
1178
1179   /* Extraction of a full-word or multi-word value from a structure
1180      in a register or aligned memory can be done with just a SUBREG.
1181      A subword value in the least significant part of a register
1182      can also be extracted with a SUBREG.  For this, we need the
1183      byte offset of the value in op0.  */
1184
1185   bitpos = bitnum % unit;
1186   offset = bitnum / unit;
1187   byte_offset = bitpos / BITS_PER_UNIT + offset * UNITS_PER_WORD;
1188
1189   /* If OP0 is a register, BITPOS must count within a word.
1190      But as we have it, it counts within whatever size OP0 now has.
1191      On a bigendian machine, these are not the same, so convert.  */
1192   if (BYTES_BIG_ENDIAN
1193       && !MEM_P (op0)
1194       && unit > GET_MODE_BITSIZE (GET_MODE (op0)))
1195     bitpos += unit - GET_MODE_BITSIZE (GET_MODE (op0));
1196
1197   /* ??? We currently assume TARGET is at least as big as BITSIZE.
1198      If that's wrong, the solution is to test for it and set TARGET to 0
1199      if needed.  */
1200
1201   /* Only scalar integer modes can be converted via subregs.  There is an
1202      additional problem for FP modes here in that they can have a precision
1203      which is different from the size.  mode_for_size uses precision, but
1204      we want a mode based on the size, so we must avoid calling it for FP
1205      modes.  */
1206   mode1  = (SCALAR_INT_MODE_P (tmode)
1207             ? mode_for_size (bitsize, GET_MODE_CLASS (tmode), 0)
1208             : mode);
1209
1210   if (((bitsize >= BITS_PER_WORD && bitsize == GET_MODE_BITSIZE (mode)
1211         && bitpos % BITS_PER_WORD == 0)
1212        || (mode1 != BLKmode
1213            /* ??? The big endian test here is wrong.  This is correct
1214               if the value is in a register, and if mode_for_size is not
1215               the same mode as op0.  This causes us to get unnecessarily
1216               inefficient code from the Thumb port when -mbig-endian.  */
1217            && (BYTES_BIG_ENDIAN
1218                ? bitpos + bitsize == BITS_PER_WORD
1219                : bitpos == 0)))
1220       && ((!MEM_P (op0)
1221            && TRULY_NOOP_TRUNCATION (GET_MODE_BITSIZE (mode),
1222                                      GET_MODE_BITSIZE (GET_MODE (op0)))
1223            && GET_MODE_SIZE (mode1) != 0
1224            && byte_offset % GET_MODE_SIZE (mode1) == 0)
1225           || (MEM_P (op0)
1226               && (! SLOW_UNALIGNED_ACCESS (mode, MEM_ALIGN (op0))
1227                   || (offset * BITS_PER_UNIT % bitsize == 0
1228                       && MEM_ALIGN (op0) % bitsize == 0)))))
1229     {
1230       if (mode1 != GET_MODE (op0))
1231         {
1232           if (MEM_P (op0))
1233             op0 = adjust_address (op0, mode1, offset);
1234           else
1235             {
1236               rtx sub = simplify_gen_subreg (mode1, op0, GET_MODE (op0),
1237                                              byte_offset);
1238               if (sub == NULL)
1239                 goto no_subreg_mode_swap;
1240               op0 = sub;
1241             }
1242         }
1243       if (mode1 != mode)
1244         return convert_to_mode (tmode, op0, unsignedp);
1245       return op0;
1246     }
1247  no_subreg_mode_swap:
1248
1249   /* Handle fields bigger than a word.  */
1250
1251   if (bitsize > BITS_PER_WORD)
1252     {
1253       /* Here we transfer the words of the field
1254          in the order least significant first.
1255          This is because the most significant word is the one which may
1256          be less than full.  */
1257
1258       unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
1259       unsigned int i;
1260
1261       if (target == 0 || !REG_P (target))
1262         target = gen_reg_rtx (mode);
1263
1264       /* Indicate for flow that the entire target reg is being set.  */
1265       emit_insn (gen_rtx_CLOBBER (VOIDmode, target));
1266
1267       for (i = 0; i < nwords; i++)
1268         {
1269           /* If I is 0, use the low-order word in both field and target;
1270              if I is 1, use the next to lowest word; and so on.  */
1271           /* Word number in TARGET to use.  */
1272           unsigned int wordnum
1273             = (WORDS_BIG_ENDIAN
1274                ? GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD - i - 1
1275                : i);
1276           /* Offset from start of field in OP0.  */
1277           unsigned int bit_offset = (WORDS_BIG_ENDIAN
1278                                      ? MAX (0, ((int) bitsize - ((int) i + 1)
1279                                                 * (int) BITS_PER_WORD))
1280                                      : (int) i * BITS_PER_WORD);
1281           rtx target_part = operand_subword (target, wordnum, 1, VOIDmode);
1282           rtx result_part
1283             = extract_bit_field (op0, MIN (BITS_PER_WORD,
1284                                            bitsize - i * BITS_PER_WORD),
1285                                  bitnum + bit_offset, 1, target_part, mode,
1286                                  word_mode);
1287
1288           gcc_assert (target_part);
1289
1290           if (result_part != target_part)
1291             emit_move_insn (target_part, result_part);
1292         }
1293
1294       if (unsignedp)
1295         {
1296           /* Unless we've filled TARGET, the upper regs in a multi-reg value
1297              need to be zero'd out.  */
1298           if (GET_MODE_SIZE (GET_MODE (target)) > nwords * UNITS_PER_WORD)
1299             {
1300               unsigned int i, total_words;
1301
1302               total_words = GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD;
1303               for (i = nwords; i < total_words; i++)
1304                 emit_move_insn
1305                   (operand_subword (target,
1306                                     WORDS_BIG_ENDIAN ? total_words - i - 1 : i,
1307                                     1, VOIDmode),
1308                    const0_rtx);
1309             }
1310           return target;
1311         }
1312
1313       /* Signed bit field: sign-extend with two arithmetic shifts.  */
1314       target = expand_shift (LSHIFT_EXPR, mode, target,
1315                              build_int_cst (NULL_TREE,
1316                                             GET_MODE_BITSIZE (mode) - bitsize),
1317                              NULL_RTX, 0);
1318       return expand_shift (RSHIFT_EXPR, mode, target,
1319                            build_int_cst (NULL_TREE,
1320                                           GET_MODE_BITSIZE (mode) - bitsize),
1321                            NULL_RTX, 0);
1322     }
1323
1324   /* From here on we know the desired field is smaller than a word.  */
1325
1326   /* Check if there is a correspondingly-sized integer field, so we can
1327      safely extract it as one size of integer, if necessary; then
1328      truncate or extend to the size that is wanted; then use SUBREGs or
1329      convert_to_mode to get one of the modes we really wanted.  */
1330
1331   int_mode = int_mode_for_mode (tmode);
1332   if (int_mode == BLKmode)
1333     int_mode = int_mode_for_mode (mode);
1334   /* Should probably push op0 out to memory and then do a load.  */
1335   gcc_assert (int_mode != BLKmode);
1336
1337   /* OFFSET is the number of words or bytes (UNIT says which)
1338      from STR_RTX to the first word or byte containing part of the field.  */
1339   if (!MEM_P (op0))
1340     {
1341       if (offset != 0
1342           || GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
1343         {
1344           if (!REG_P (op0))
1345             op0 = copy_to_reg (op0);
1346           op0 = gen_rtx_SUBREG (mode_for_size (BITS_PER_WORD, MODE_INT, 0),
1347                                 op0, (offset * UNITS_PER_WORD));
1348         }
1349       offset = 0;
1350     }
1351
1352   /* Now OFFSET is nonzero only for memory operands.  */
1353
1354   if (unsignedp)
1355     {
1356       if (HAVE_extzv
1357           && bitsize > 0
1358           && GET_MODE_BITSIZE (extzv_mode) >= bitsize
1359           && ! ((REG_P (op0) || GET_CODE (op0) == SUBREG)
1360                 && (bitsize + bitpos > GET_MODE_BITSIZE (extzv_mode))))
1361         {
1362           unsigned HOST_WIDE_INT xbitpos = bitpos, xoffset = offset;
1363           rtx bitsize_rtx, bitpos_rtx;
1364           rtx last = get_last_insn ();
1365           rtx xop0 = op0;
1366           rtx xtarget = target;
1367           rtx xspec_target = spec_target;
1368           rtx xspec_target_subreg = spec_target_subreg;
1369           rtx pat;
1370           enum machine_mode maxmode = mode_for_extraction (EP_extzv, 0);
1371
1372           if (MEM_P (xop0))
1373             {
1374               int save_volatile_ok = volatile_ok;
1375               volatile_ok = 1;
1376
1377               /* Is the memory operand acceptable?  */
1378               if (! ((*insn_data[(int) CODE_FOR_extzv].operand[1].predicate)
1379                      (xop0, GET_MODE (xop0))))
1380                 {
1381                   /* No, load into a reg and extract from there.  */
1382                   enum machine_mode bestmode;
1383
1384                   /* Get the mode to use for inserting into this field.  If
1385                      OP0 is BLKmode, get the smallest mode consistent with the
1386                      alignment. If OP0 is a non-BLKmode object that is no
1387                      wider than MAXMODE, use its mode. Otherwise, use the
1388                      smallest mode containing the field.  */
1389
1390                   if (GET_MODE (xop0) == BLKmode
1391                       || (GET_MODE_SIZE (GET_MODE (op0))
1392                           > GET_MODE_SIZE (maxmode)))
1393                     bestmode = get_best_mode (bitsize, bitnum,
1394                                               MEM_ALIGN (xop0), maxmode,
1395                                               MEM_VOLATILE_P (xop0));
1396                   else
1397                     bestmode = GET_MODE (xop0);
1398
1399                   if (bestmode == VOIDmode
1400                       || (SLOW_UNALIGNED_ACCESS (bestmode, MEM_ALIGN (xop0))
1401                           && GET_MODE_BITSIZE (bestmode) > MEM_ALIGN (xop0)))
1402                     goto extzv_loses;
1403
1404                   /* Compute offset as multiple of this unit,
1405                      counting in bytes.  */
1406                   unit = GET_MODE_BITSIZE (bestmode);
1407                   xoffset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
1408                   xbitpos = bitnum % unit;
1409                   xop0 = adjust_address (xop0, bestmode, xoffset);
1410
1411                   /* Make sure register is big enough for the whole field. */
1412                   if (xoffset * BITS_PER_UNIT + unit
1413                       < offset * BITS_PER_UNIT + bitsize)
1414                     goto extzv_loses;
1415
1416                   /* Fetch it to a register in that size.  */
1417                   xop0 = force_reg (bestmode, xop0);
1418
1419                   /* XBITPOS counts within UNIT, which is what is expected.  */
1420                 }
1421               else
1422                 /* Get ref to first byte containing part of the field.  */
1423                 xop0 = adjust_address (xop0, byte_mode, xoffset);
1424
1425               volatile_ok = save_volatile_ok;
1426             }
1427
1428           /* If op0 is a register, we need it in MAXMODE (which is usually
1429              SImode). to make it acceptable to the format of extzv.  */
1430           if (GET_CODE (xop0) == SUBREG && GET_MODE (xop0) != maxmode)
1431             goto extzv_loses;
1432           if (REG_P (xop0) && GET_MODE (xop0) != maxmode)
1433             xop0 = gen_rtx_SUBREG (maxmode, xop0, 0);
1434
1435           /* On big-endian machines, we count bits from the most significant.
1436              If the bit field insn does not, we must invert.  */
1437           if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1438             xbitpos = unit - bitsize - xbitpos;
1439
1440           /* Now convert from counting within UNIT to counting in MAXMODE.  */
1441           if (BITS_BIG_ENDIAN && !MEM_P (xop0))
1442             xbitpos += GET_MODE_BITSIZE (maxmode) - unit;
1443
1444           unit = GET_MODE_BITSIZE (maxmode);
1445
1446           if (xtarget == 0)
1447             xtarget = xspec_target = gen_reg_rtx (tmode);
1448
1449           if (GET_MODE (xtarget) != maxmode)
1450             {
1451               if (REG_P (xtarget))
1452                 {
1453                   int wider = (GET_MODE_SIZE (maxmode)
1454                                > GET_MODE_SIZE (GET_MODE (xtarget)));
1455                   xtarget = gen_lowpart (maxmode, xtarget);
1456                   if (wider)
1457                     xspec_target_subreg = xtarget;
1458                 }
1459               else
1460                 xtarget = gen_reg_rtx (maxmode);
1461             }
1462
1463           /* If this machine's extzv insists on a register target,
1464              make sure we have one.  */
1465           if (! ((*insn_data[(int) CODE_FOR_extzv].operand[0].predicate)
1466                  (xtarget, maxmode)))
1467             xtarget = gen_reg_rtx (maxmode);
1468
1469           bitsize_rtx = GEN_INT (bitsize);
1470           bitpos_rtx = GEN_INT (xbitpos);
1471
1472           pat = gen_extzv (xtarget, xop0, bitsize_rtx, bitpos_rtx);
1473           if (pat)
1474             {
1475               emit_insn (pat);
1476               target = xtarget;
1477               spec_target = xspec_target;
1478               spec_target_subreg = xspec_target_subreg;
1479             }
1480           else
1481             {
1482               delete_insns_since (last);
1483               target = extract_fixed_bit_field (int_mode, op0, offset, bitsize,
1484                                                 bitpos, target, 1);
1485             }
1486         }
1487       else
1488       extzv_loses:
1489         target = extract_fixed_bit_field (int_mode, op0, offset, bitsize,
1490                                           bitpos, target, 1);
1491     }
1492   else
1493     {
1494       if (HAVE_extv
1495           && bitsize > 0
1496           && GET_MODE_BITSIZE (extv_mode) >= bitsize
1497           && ! ((REG_P (op0) || GET_CODE (op0) == SUBREG)
1498                 && (bitsize + bitpos > GET_MODE_BITSIZE (extv_mode))))
1499         {
1500           int xbitpos = bitpos, xoffset = offset;
1501           rtx bitsize_rtx, bitpos_rtx;
1502           rtx last = get_last_insn ();
1503           rtx xop0 = op0, xtarget = target;
1504           rtx xspec_target = spec_target;
1505           rtx xspec_target_subreg = spec_target_subreg;
1506           rtx pat;
1507           enum machine_mode maxmode = mode_for_extraction (EP_extv, 0);
1508
1509           if (MEM_P (xop0))
1510             {
1511               /* Is the memory operand acceptable?  */
1512               if (! ((*insn_data[(int) CODE_FOR_extv].operand[1].predicate)
1513                      (xop0, GET_MODE (xop0))))
1514                 {
1515                   /* No, load into a reg and extract from there.  */
1516                   enum machine_mode bestmode;
1517
1518                   /* Get the mode to use for inserting into this field.  If
1519                      OP0 is BLKmode, get the smallest mode consistent with the
1520                      alignment. If OP0 is a non-BLKmode object that is no
1521                      wider than MAXMODE, use its mode. Otherwise, use the
1522                      smallest mode containing the field.  */
1523
1524                   if (GET_MODE (xop0) == BLKmode
1525                       || (GET_MODE_SIZE (GET_MODE (op0))
1526                           > GET_MODE_SIZE (maxmode)))
1527                     bestmode = get_best_mode (bitsize, bitnum,
1528                                               MEM_ALIGN (xop0), maxmode,
1529                                               MEM_VOLATILE_P (xop0));
1530                   else
1531                     bestmode = GET_MODE (xop0);
1532
1533                   if (bestmode == VOIDmode
1534                       || (SLOW_UNALIGNED_ACCESS (bestmode, MEM_ALIGN (xop0))
1535                           && GET_MODE_BITSIZE (bestmode) > MEM_ALIGN (xop0)))
1536                     goto extv_loses;
1537
1538                   /* Compute offset as multiple of this unit,
1539                      counting in bytes.  */
1540                   unit = GET_MODE_BITSIZE (bestmode);
1541                   xoffset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
1542                   xbitpos = bitnum % unit;
1543                   xop0 = adjust_address (xop0, bestmode, xoffset);
1544
1545                   /* Make sure register is big enough for the whole field. */
1546                   if (xoffset * BITS_PER_UNIT + unit
1547                       < offset * BITS_PER_UNIT + bitsize)
1548                     goto extv_loses;
1549
1550                   /* Fetch it to a register in that size.  */
1551                   xop0 = force_reg (bestmode, xop0);
1552
1553                   /* XBITPOS counts within UNIT, which is what is expected.  */
1554                 }
1555               else
1556                 /* Get ref to first byte containing part of the field.  */
1557                 xop0 = adjust_address (xop0, byte_mode, xoffset);
1558             }
1559
1560           /* If op0 is a register, we need it in MAXMODE (which is usually
1561              SImode) to make it acceptable to the format of extv.  */
1562           if (GET_CODE (xop0) == SUBREG && GET_MODE (xop0) != maxmode)
1563             goto extv_loses;
1564           if (REG_P (xop0) && GET_MODE (xop0) != maxmode)
1565             xop0 = gen_rtx_SUBREG (maxmode, xop0, 0);
1566
1567           /* On big-endian machines, we count bits from the most significant.
1568              If the bit field insn does not, we must invert.  */
1569           if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1570             xbitpos = unit - bitsize - xbitpos;
1571
1572           /* XBITPOS counts within a size of UNIT.
1573              Adjust to count within a size of MAXMODE.  */
1574           if (BITS_BIG_ENDIAN && !MEM_P (xop0))
1575             xbitpos += (GET_MODE_BITSIZE (maxmode) - unit);
1576
1577           unit = GET_MODE_BITSIZE (maxmode);
1578
1579           if (xtarget == 0)
1580             xtarget = xspec_target = gen_reg_rtx (tmode);
1581
1582           if (GET_MODE (xtarget) != maxmode)
1583             {
1584               if (REG_P (xtarget))
1585                 {
1586                   int wider = (GET_MODE_SIZE (maxmode)
1587                                > GET_MODE_SIZE (GET_MODE (xtarget)));
1588                   xtarget = gen_lowpart (maxmode, xtarget);
1589                   if (wider)
1590                     xspec_target_subreg = xtarget;
1591                 }
1592               else
1593                 xtarget = gen_reg_rtx (maxmode);
1594             }
1595
1596           /* If this machine's extv insists on a register target,
1597              make sure we have one.  */
1598           if (! ((*insn_data[(int) CODE_FOR_extv].operand[0].predicate)
1599                  (xtarget, maxmode)))
1600             xtarget = gen_reg_rtx (maxmode);
1601
1602           bitsize_rtx = GEN_INT (bitsize);
1603           bitpos_rtx = GEN_INT (xbitpos);
1604
1605           pat = gen_extv (xtarget, xop0, bitsize_rtx, bitpos_rtx);
1606           if (pat)
1607             {
1608               emit_insn (pat);
1609               target = xtarget;
1610               spec_target = xspec_target;
1611               spec_target_subreg = xspec_target_subreg;
1612             }
1613           else
1614             {
1615               delete_insns_since (last);
1616               target = extract_fixed_bit_field (int_mode, op0, offset, bitsize,
1617                                                 bitpos, target, 0);
1618             }
1619         }
1620       else
1621       extv_loses:
1622         target = extract_fixed_bit_field (int_mode, op0, offset, bitsize,
1623                                           bitpos, target, 0);
1624     }
1625   if (target == spec_target)
1626     return target;
1627   if (target == spec_target_subreg)
1628     return spec_target;
1629   if (GET_MODE (target) != tmode && GET_MODE (target) != mode)
1630     {
1631       /* If the target mode is not a scalar integral, first convert to the
1632          integer mode of that size and then access it as a floating-point
1633          value via a SUBREG.  */
1634       if (!SCALAR_INT_MODE_P (tmode))
1635         {
1636           enum machine_mode smode
1637             = mode_for_size (GET_MODE_BITSIZE (tmode), MODE_INT, 0);
1638           target = convert_to_mode (smode, target, unsignedp);
1639           target = force_reg (smode, target);
1640           return gen_lowpart (tmode, target);
1641         }
1642
1643       return convert_to_mode (tmode, target, unsignedp);
1644     }
1645   return target;
1646 }
1647 \f
1648 /* Extract a bit field using shifts and boolean operations
1649    Returns an rtx to represent the value.
1650    OP0 addresses a register (word) or memory (byte).
1651    BITPOS says which bit within the word or byte the bit field starts in.
1652    OFFSET says how many bytes farther the bit field starts;
1653     it is 0 if OP0 is a register.
1654    BITSIZE says how many bits long the bit field is.
1655     (If OP0 is a register, it may be narrower than a full word,
1656      but BITPOS still counts within a full word,
1657      which is significant on bigendian machines.)
1658
1659    UNSIGNEDP is nonzero for an unsigned bit field (don't sign-extend value).
1660    If TARGET is nonzero, attempts to store the value there
1661    and return TARGET, but this is not guaranteed.
1662    If TARGET is not used, create a pseudo-reg of mode TMODE for the value.  */
1663
1664 static rtx
1665 extract_fixed_bit_field (enum machine_mode tmode, rtx op0,
1666                          unsigned HOST_WIDE_INT offset,
1667                          unsigned HOST_WIDE_INT bitsize,
1668                          unsigned HOST_WIDE_INT bitpos, rtx target,
1669                          int unsignedp)
1670 {
1671   unsigned int total_bits = BITS_PER_WORD;
1672   enum machine_mode mode;
1673
1674   if (GET_CODE (op0) == SUBREG || REG_P (op0))
1675     {
1676       /* Special treatment for a bit field split across two registers.  */
1677       if (bitsize + bitpos > BITS_PER_WORD)
1678         return extract_split_bit_field (op0, bitsize, bitpos, unsignedp);
1679     }
1680   else
1681     {
1682       /* Get the proper mode to use for this field.  We want a mode that
1683          includes the entire field.  If such a mode would be larger than
1684          a word, we won't be doing the extraction the normal way.  */
1685
1686       mode = get_best_mode (bitsize, bitpos + offset * BITS_PER_UNIT,
1687                             MEM_ALIGN (op0), word_mode, MEM_VOLATILE_P (op0));
1688
1689       if (mode == VOIDmode)
1690         /* The only way this should occur is if the field spans word
1691            boundaries.  */
1692         return extract_split_bit_field (op0, bitsize,
1693                                         bitpos + offset * BITS_PER_UNIT,
1694                                         unsignedp);
1695
1696       total_bits = GET_MODE_BITSIZE (mode);
1697
1698       /* Make sure bitpos is valid for the chosen mode.  Adjust BITPOS to
1699          be in the range 0 to total_bits-1, and put any excess bytes in
1700          OFFSET.  */
1701       if (bitpos >= total_bits)
1702         {
1703           offset += (bitpos / total_bits) * (total_bits / BITS_PER_UNIT);
1704           bitpos -= ((bitpos / total_bits) * (total_bits / BITS_PER_UNIT)
1705                      * BITS_PER_UNIT);
1706         }
1707
1708       /* Get ref to an aligned byte, halfword, or word containing the field.
1709          Adjust BITPOS to be position within a word,
1710          and OFFSET to be the offset of that word.
1711          Then alter OP0 to refer to that word.  */
1712       bitpos += (offset % (total_bits / BITS_PER_UNIT)) * BITS_PER_UNIT;
1713       offset -= (offset % (total_bits / BITS_PER_UNIT));
1714       op0 = adjust_address (op0, mode, offset);
1715     }
1716
1717   mode = GET_MODE (op0);
1718
1719   if (BYTES_BIG_ENDIAN)
1720     /* BITPOS is the distance between our msb and that of OP0.
1721        Convert it to the distance from the lsb.  */
1722     bitpos = total_bits - bitsize - bitpos;
1723
1724   /* Now BITPOS is always the distance between the field's lsb and that of OP0.
1725      We have reduced the big-endian case to the little-endian case.  */
1726
1727   if (unsignedp)
1728     {
1729       if (bitpos)
1730         {
1731           /* If the field does not already start at the lsb,
1732              shift it so it does.  */
1733           tree amount = build_int_cst (NULL_TREE, bitpos);
1734           /* Maybe propagate the target for the shift.  */
1735           /* But not if we will return it--could confuse integrate.c.  */
1736           rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
1737           if (tmode != mode) subtarget = 0;
1738           op0 = expand_shift (RSHIFT_EXPR, mode, op0, amount, subtarget, 1);
1739         }
1740       /* Convert the value to the desired mode.  */
1741       if (mode != tmode)
1742         op0 = convert_to_mode (tmode, op0, 1);
1743
1744       /* Unless the msb of the field used to be the msb when we shifted,
1745          mask out the upper bits.  */
1746
1747       if (GET_MODE_BITSIZE (mode) != bitpos + bitsize)
1748         return expand_binop (GET_MODE (op0), and_optab, op0,
1749                              mask_rtx (GET_MODE (op0), 0, bitsize, 0),
1750                              target, 1, OPTAB_LIB_WIDEN);
1751       return op0;
1752     }
1753
1754   /* To extract a signed bit-field, first shift its msb to the msb of the word,
1755      then arithmetic-shift its lsb to the lsb of the word.  */
1756   op0 = force_reg (mode, op0);
1757   if (mode != tmode)
1758     target = 0;
1759
1760   /* Find the narrowest integer mode that contains the field.  */
1761
1762   for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT); mode != VOIDmode;
1763        mode = GET_MODE_WIDER_MODE (mode))
1764     if (GET_MODE_BITSIZE (mode) >= bitsize + bitpos)
1765       {
1766         op0 = convert_to_mode (mode, op0, 0);
1767         break;
1768       }
1769
1770   if (GET_MODE_BITSIZE (mode) != (bitsize + bitpos))
1771     {
1772       tree amount
1773         = build_int_cst (NULL_TREE,
1774                          GET_MODE_BITSIZE (mode) - (bitsize + bitpos));
1775       /* Maybe propagate the target for the shift.  */
1776       rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
1777       op0 = expand_shift (LSHIFT_EXPR, mode, op0, amount, subtarget, 1);
1778     }
1779
1780   return expand_shift (RSHIFT_EXPR, mode, op0,
1781                        build_int_cst (NULL_TREE,
1782                                       GET_MODE_BITSIZE (mode) - bitsize),
1783                        target, 0);
1784 }
1785 \f
1786 /* Return a constant integer (CONST_INT or CONST_DOUBLE) mask value
1787    of mode MODE with BITSIZE ones followed by BITPOS zeros, or the
1788    complement of that if COMPLEMENT.  The mask is truncated if
1789    necessary to the width of mode MODE.  The mask is zero-extended if
1790    BITSIZE+BITPOS is too small for MODE.  */
1791
1792 static rtx
1793 mask_rtx (enum machine_mode mode, int bitpos, int bitsize, int complement)
1794 {
1795   HOST_WIDE_INT masklow, maskhigh;
1796
1797   if (bitsize == 0)
1798     masklow = 0;
1799   else if (bitpos < HOST_BITS_PER_WIDE_INT)
1800     masklow = (HOST_WIDE_INT) -1 << bitpos;
1801   else
1802     masklow = 0;
1803
1804   if (bitpos + bitsize < HOST_BITS_PER_WIDE_INT)
1805     masklow &= ((unsigned HOST_WIDE_INT) -1
1806                 >> (HOST_BITS_PER_WIDE_INT - bitpos - bitsize));
1807
1808   if (bitpos <= HOST_BITS_PER_WIDE_INT)
1809     maskhigh = -1;
1810   else
1811     maskhigh = (HOST_WIDE_INT) -1 << (bitpos - HOST_BITS_PER_WIDE_INT);
1812
1813   if (bitsize == 0)
1814     maskhigh = 0;
1815   else if (bitpos + bitsize > HOST_BITS_PER_WIDE_INT)
1816     maskhigh &= ((unsigned HOST_WIDE_INT) -1
1817                  >> (2 * HOST_BITS_PER_WIDE_INT - bitpos - bitsize));
1818   else
1819     maskhigh = 0;
1820
1821   if (complement)
1822     {
1823       maskhigh = ~maskhigh;
1824       masklow = ~masklow;
1825     }
1826
1827   return immed_double_const (masklow, maskhigh, mode);
1828 }
1829
1830 /* Return a constant integer (CONST_INT or CONST_DOUBLE) rtx with the value
1831    VALUE truncated to BITSIZE bits and then shifted left BITPOS bits.  */
1832
1833 static rtx
1834 lshift_value (enum machine_mode mode, rtx value, int bitpos, int bitsize)
1835 {
1836   unsigned HOST_WIDE_INT v = INTVAL (value);
1837   HOST_WIDE_INT low, high;
1838
1839   if (bitsize < HOST_BITS_PER_WIDE_INT)
1840     v &= ~((HOST_WIDE_INT) -1 << bitsize);
1841
1842   if (bitpos < HOST_BITS_PER_WIDE_INT)
1843     {
1844       low = v << bitpos;
1845       high = (bitpos > 0 ? (v >> (HOST_BITS_PER_WIDE_INT - bitpos)) : 0);
1846     }
1847   else
1848     {
1849       low = 0;
1850       high = v << (bitpos - HOST_BITS_PER_WIDE_INT);
1851     }
1852
1853   return immed_double_const (low, high, mode);
1854 }
1855 \f
1856 /* Extract a bit field from a memory by forcing the alignment of the
1857    memory.  This efficient only if the field spans at least 4 boundaries.
1858
1859    OP0 is the MEM.
1860    BITSIZE is the field width; BITPOS is the position of the first bit.
1861    UNSIGNEDP is true if the result should be zero-extended.  */
1862
1863 static rtx
1864 extract_force_align_mem_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1865                                    unsigned HOST_WIDE_INT bitpos,
1866                                    int unsignedp)
1867 {
1868   enum machine_mode mode, dmode;
1869   unsigned int m_bitsize, m_size;
1870   unsigned int sign_shift_up, sign_shift_dn;
1871   rtx base, a1, a2, v1, v2, comb, shift, result, start;
1872
1873   /* Choose a mode that will fit BITSIZE.  */
1874   mode = smallest_mode_for_size (bitsize, MODE_INT);
1875   m_size = GET_MODE_SIZE (mode);
1876   m_bitsize = GET_MODE_BITSIZE (mode);
1877
1878   /* Choose a mode twice as wide.  Fail if no such mode exists.  */
1879   dmode = mode_for_size (m_bitsize * 2, MODE_INT, false);
1880   if (dmode == BLKmode)
1881     return NULL;
1882
1883   do_pending_stack_adjust ();
1884   start = get_last_insn ();
1885
1886   /* At the end, we'll need an additional shift to deal with sign/zero
1887      extension.  By default this will be a left+right shift of the
1888      appropriate size.  But we may be able to eliminate one of them.  */
1889   sign_shift_up = sign_shift_dn = m_bitsize - bitsize;
1890
1891   if (STRICT_ALIGNMENT)
1892     {
1893       base = plus_constant (XEXP (op0, 0), bitpos / BITS_PER_UNIT);
1894       bitpos %= BITS_PER_UNIT;
1895
1896       /* We load two values to be concatenate.  There's an edge condition
1897          that bears notice -- an aligned value at the end of a page can
1898          only load one value lest we segfault.  So the two values we load
1899          are at "base & -size" and "(base + size - 1) & -size".  If base
1900          is unaligned, the addresses will be aligned and sequential; if
1901          base is aligned, the addresses will both be equal to base.  */
1902
1903       a1 = expand_simple_binop (Pmode, AND, force_operand (base, NULL),
1904                                 GEN_INT (-(HOST_WIDE_INT)m_size),
1905                                 NULL, true, OPTAB_LIB_WIDEN);
1906       mark_reg_pointer (a1, m_bitsize);
1907       v1 = gen_rtx_MEM (mode, a1);
1908       set_mem_align (v1, m_bitsize);
1909       v1 = force_reg (mode, validize_mem (v1));
1910
1911       a2 = plus_constant (base, GET_MODE_SIZE (mode) - 1);
1912       a2 = expand_simple_binop (Pmode, AND, force_operand (a2, NULL),
1913                                 GEN_INT (-(HOST_WIDE_INT)m_size),
1914                                 NULL, true, OPTAB_LIB_WIDEN);
1915       v2 = gen_rtx_MEM (mode, a2);
1916       set_mem_align (v2, m_bitsize);
1917       v2 = force_reg (mode, validize_mem (v2));
1918
1919       /* Combine these two values into a double-word value.  */
1920       if (m_bitsize == BITS_PER_WORD)
1921         {
1922           comb = gen_reg_rtx (dmode);
1923           emit_insn (gen_rtx_CLOBBER (VOIDmode, comb));
1924           emit_move_insn (gen_rtx_SUBREG (mode, comb, 0), v1);
1925           emit_move_insn (gen_rtx_SUBREG (mode, comb, m_size), v2);
1926         }
1927       else
1928         {
1929           if (BYTES_BIG_ENDIAN)
1930             comb = v1, v1 = v2, v2 = comb;
1931           v1 = convert_modes (dmode, mode, v1, true);
1932           if (v1 == NULL)
1933             goto fail;
1934           v2 = convert_modes (dmode, mode, v2, true);
1935           v2 = expand_simple_binop (dmode, ASHIFT, v2, GEN_INT (m_bitsize),
1936                                     NULL, true, OPTAB_LIB_WIDEN);
1937           if (v2 == NULL)
1938             goto fail;
1939           comb = expand_simple_binop (dmode, IOR, v1, v2, NULL,
1940                                       true, OPTAB_LIB_WIDEN);
1941           if (comb == NULL)
1942             goto fail;
1943         }
1944
1945       shift = expand_simple_binop (Pmode, AND, base, GEN_INT (m_size - 1),
1946                                    NULL, true, OPTAB_LIB_WIDEN);
1947       shift = expand_mult (Pmode, shift, GEN_INT (BITS_PER_UNIT), NULL, 1);
1948
1949       if (bitpos != 0)
1950         {
1951           if (sign_shift_up <= bitpos)
1952             bitpos -= sign_shift_up, sign_shift_up = 0;
1953           shift = expand_simple_binop (Pmode, PLUS, shift, GEN_INT (bitpos),
1954                                        NULL, true, OPTAB_LIB_WIDEN);
1955         }
1956     }
1957   else
1958     {
1959       unsigned HOST_WIDE_INT offset = bitpos / BITS_PER_UNIT;
1960       bitpos %= BITS_PER_UNIT;
1961
1962       /* When strict alignment is not required, we can just load directly
1963          from memory without masking.  If the remaining BITPOS offset is
1964          small enough, we may be able to do all operations in MODE as
1965          opposed to DMODE.  */
1966       if (bitpos + bitsize <= m_bitsize)
1967         dmode = mode;
1968       comb = adjust_address (op0, dmode, offset);
1969
1970       if (sign_shift_up <= bitpos)
1971         bitpos -= sign_shift_up, sign_shift_up = 0;
1972       shift = GEN_INT (bitpos);
1973     }
1974
1975   /* Shift down the double-word such that the requested value is at bit 0.  */
1976   if (shift != const0_rtx)
1977     comb = expand_simple_binop (dmode, unsignedp ? LSHIFTRT : ASHIFTRT,
1978                                 comb, shift, NULL, unsignedp, OPTAB_LIB_WIDEN);
1979   if (comb == NULL)
1980     goto fail;
1981
1982   /* If the field exactly matches MODE, then all we need to do is return the
1983      lowpart.  Otherwise, shift to get the sign bits set properly.  */
1984   result = force_reg (mode, gen_lowpart (mode, comb));
1985
1986   if (sign_shift_up)
1987     result = expand_simple_binop (mode, ASHIFT, result,
1988                                   GEN_INT (sign_shift_up),
1989                                   NULL_RTX, 0, OPTAB_LIB_WIDEN);
1990   if (sign_shift_dn)
1991     result = expand_simple_binop (mode, unsignedp ? LSHIFTRT : ASHIFTRT,
1992                                   result, GEN_INT (sign_shift_dn),
1993                                   NULL_RTX, 0, OPTAB_LIB_WIDEN);
1994
1995   return result;
1996
1997  fail:
1998   delete_insns_since (start);
1999   return NULL;
2000 }
2001
2002 /* Extract a bit field that is split across two words
2003    and return an RTX for the result.
2004
2005    OP0 is the REG, SUBREG or MEM rtx for the first of the two words.
2006    BITSIZE is the field width; BITPOS, position of its first bit, in the word.
2007    UNSIGNEDP is 1 if should zero-extend the contents; else sign-extend.  */
2008
2009 static rtx
2010 extract_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
2011                          unsigned HOST_WIDE_INT bitpos, int unsignedp)
2012 {
2013   unsigned int unit;
2014   unsigned int bitsdone = 0;
2015   rtx result = NULL_RTX;
2016   int first = 1;
2017
2018   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
2019      much at a time.  */
2020   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
2021     unit = BITS_PER_WORD;
2022   else
2023     {
2024       unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
2025       if (0 && bitsize / unit > 2)
2026         {
2027           rtx tmp = extract_force_align_mem_bit_field (op0, bitsize, bitpos,
2028                                                        unsignedp);
2029           if (tmp)
2030             return tmp;
2031         }
2032     }
2033
2034   while (bitsdone < bitsize)
2035     {
2036       unsigned HOST_WIDE_INT thissize;
2037       rtx part, word;
2038       unsigned HOST_WIDE_INT thispos;
2039       unsigned HOST_WIDE_INT offset;
2040
2041       offset = (bitpos + bitsdone) / unit;
2042       thispos = (bitpos + bitsdone) % unit;
2043
2044       /* THISSIZE must not overrun a word boundary.  Otherwise,
2045          extract_fixed_bit_field will call us again, and we will mutually
2046          recurse forever.  */
2047       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
2048       thissize = MIN (thissize, unit - thispos);
2049
2050       /* If OP0 is a register, then handle OFFSET here.
2051
2052          When handling multiword bitfields, extract_bit_field may pass
2053          down a word_mode SUBREG of a larger REG for a bitfield that actually
2054          crosses a word boundary.  Thus, for a SUBREG, we must find
2055          the current word starting from the base register.  */
2056       if (GET_CODE (op0) == SUBREG)
2057         {
2058           int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset;
2059           word = operand_subword_force (SUBREG_REG (op0), word_offset,
2060                                         GET_MODE (SUBREG_REG (op0)));
2061           offset = 0;
2062         }
2063       else if (REG_P (op0))
2064         {
2065           word = operand_subword_force (op0, offset, GET_MODE (op0));
2066           offset = 0;
2067         }
2068       else
2069         word = op0;
2070
2071       /* Extract the parts in bit-counting order,
2072          whose meaning is determined by BYTES_PER_UNIT.
2073          OFFSET is in UNITs, and UNIT is in bits.
2074          extract_fixed_bit_field wants offset in bytes.  */
2075       part = extract_fixed_bit_field (word_mode, word,
2076                                       offset * unit / BITS_PER_UNIT,
2077                                       thissize, thispos, 0, 1);
2078       bitsdone += thissize;
2079
2080       /* Shift this part into place for the result.  */
2081       if (BYTES_BIG_ENDIAN)
2082         {
2083           if (bitsize != bitsdone)
2084             part = expand_shift (LSHIFT_EXPR, word_mode, part,
2085                                  build_int_cst (NULL_TREE, bitsize - bitsdone),
2086                                  0, 1);
2087         }
2088       else
2089         {
2090           if (bitsdone != thissize)
2091             part = expand_shift (LSHIFT_EXPR, word_mode, part,
2092                                  build_int_cst (NULL_TREE,
2093                                                 bitsdone - thissize), 0, 1);
2094         }
2095
2096       if (first)
2097         result = part;
2098       else
2099         /* Combine the parts with bitwise or.  This works
2100            because we extracted each part as an unsigned bit field.  */
2101         result = expand_binop (word_mode, ior_optab, part, result, NULL_RTX, 1,
2102                                OPTAB_LIB_WIDEN);
2103
2104       first = 0;
2105     }
2106
2107   /* Unsigned bit field: we are done.  */
2108   if (unsignedp)
2109     return result;
2110   /* Signed bit field: sign-extend with two arithmetic shifts.  */
2111   result = expand_shift (LSHIFT_EXPR, word_mode, result,
2112                          build_int_cst (NULL_TREE, BITS_PER_WORD - bitsize),
2113                          NULL_RTX, 0);
2114   return expand_shift (RSHIFT_EXPR, word_mode, result,
2115                        build_int_cst (NULL_TREE, BITS_PER_WORD - bitsize),
2116                        NULL_RTX, 0);
2117 }
2118 \f
2119 /* Add INC into TARGET.  */
2120
2121 void
2122 expand_inc (rtx target, rtx inc)
2123 {
2124   rtx value = expand_binop (GET_MODE (target), add_optab,
2125                             target, inc,
2126                             target, 0, OPTAB_LIB_WIDEN);
2127   if (value != target)
2128     emit_move_insn (target, value);
2129 }
2130
2131 /* Subtract DEC from TARGET.  */
2132
2133 void
2134 expand_dec (rtx target, rtx dec)
2135 {
2136   rtx value = expand_binop (GET_MODE (target), sub_optab,
2137                             target, dec,
2138                             target, 0, OPTAB_LIB_WIDEN);
2139   if (value != target)
2140     emit_move_insn (target, value);
2141 }
2142 \f
2143 /* Output a shift instruction for expression code CODE,
2144    with SHIFTED being the rtx for the value to shift,
2145    and AMOUNT the tree for the amount to shift by.
2146    Store the result in the rtx TARGET, if that is convenient.
2147    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2148    Return the rtx for where the value is.  */
2149
2150 rtx
2151 expand_shift (enum tree_code code, enum machine_mode mode, rtx shifted,
2152               tree amount, rtx target, int unsignedp)
2153 {
2154   rtx op1, temp = 0;
2155   int left = (code == LSHIFT_EXPR || code == LROTATE_EXPR);
2156   int rotate = (code == LROTATE_EXPR || code == RROTATE_EXPR);
2157   int try;
2158
2159   /* Previously detected shift-counts computed by NEGATE_EXPR
2160      and shifted in the other direction; but that does not work
2161      on all machines.  */
2162
2163   op1 = expand_expr (amount, NULL_RTX, VOIDmode, 0);
2164
2165   if (SHIFT_COUNT_TRUNCATED)
2166     {
2167       if (GET_CODE (op1) == CONST_INT
2168           && ((unsigned HOST_WIDE_INT) INTVAL (op1) >=
2169               (unsigned HOST_WIDE_INT) GET_MODE_BITSIZE (mode)))
2170         op1 = GEN_INT ((unsigned HOST_WIDE_INT) INTVAL (op1)
2171                        % GET_MODE_BITSIZE (mode));
2172       else if (GET_CODE (op1) == SUBREG
2173                && subreg_lowpart_p (op1))
2174         op1 = SUBREG_REG (op1);
2175     }
2176
2177   if (op1 == const0_rtx)
2178     return shifted;
2179
2180   /* Check whether its cheaper to implement a left shift by a constant
2181      bit count by a sequence of additions.  */
2182   if (code == LSHIFT_EXPR
2183       && GET_CODE (op1) == CONST_INT
2184       && INTVAL (op1) > 0
2185       && INTVAL (op1) < GET_MODE_BITSIZE (mode)
2186       && shift_cost[mode][INTVAL (op1)] > INTVAL (op1) * add_cost[mode])
2187     {
2188       int i;
2189       for (i = 0; i < INTVAL (op1); i++)
2190         {
2191           temp = force_reg (mode, shifted);
2192           shifted = expand_binop (mode, add_optab, temp, temp, NULL_RTX,
2193                                   unsignedp, OPTAB_LIB_WIDEN);
2194         }
2195       return shifted;
2196     }
2197
2198   for (try = 0; temp == 0 && try < 3; try++)
2199     {
2200       enum optab_methods methods;
2201
2202       if (try == 0)
2203         methods = OPTAB_DIRECT;
2204       else if (try == 1)
2205         methods = OPTAB_WIDEN;
2206       else
2207         methods = OPTAB_LIB_WIDEN;
2208
2209       if (rotate)
2210         {
2211           /* Widening does not work for rotation.  */
2212           if (methods == OPTAB_WIDEN)
2213             continue;
2214           else if (methods == OPTAB_LIB_WIDEN)
2215             {
2216               /* If we have been unable to open-code this by a rotation,
2217                  do it as the IOR of two shifts.  I.e., to rotate A
2218                  by N bits, compute (A << N) | ((unsigned) A >> (C - N))
2219                  where C is the bitsize of A.
2220
2221                  It is theoretically possible that the target machine might
2222                  not be able to perform either shift and hence we would
2223                  be making two libcalls rather than just the one for the
2224                  shift (similarly if IOR could not be done).  We will allow
2225                  this extremely unlikely lossage to avoid complicating the
2226                  code below.  */
2227
2228               rtx subtarget = target == shifted ? 0 : target;
2229               rtx temp1;
2230               tree type = TREE_TYPE (amount);
2231               tree new_amount = make_tree (type, op1);
2232               tree other_amount
2233                 = fold_build2 (MINUS_EXPR, type,
2234                                build_int_cst (type, GET_MODE_BITSIZE (mode)),
2235                                amount);
2236
2237               shifted = force_reg (mode, shifted);
2238
2239               temp = expand_shift (left ? LSHIFT_EXPR : RSHIFT_EXPR,
2240                                    mode, shifted, new_amount, 0, 1);
2241               temp1 = expand_shift (left ? RSHIFT_EXPR : LSHIFT_EXPR,
2242                                     mode, shifted, other_amount, subtarget, 1);
2243               return expand_binop (mode, ior_optab, temp, temp1, target,
2244                                    unsignedp, methods);
2245             }
2246
2247           temp = expand_binop (mode,
2248                                left ? rotl_optab : rotr_optab,
2249                                shifted, op1, target, unsignedp, methods);
2250         }
2251       else if (unsignedp)
2252         temp = expand_binop (mode,
2253                              left ? ashl_optab : lshr_optab,
2254                              shifted, op1, target, unsignedp, methods);
2255
2256       /* Do arithmetic shifts.
2257          Also, if we are going to widen the operand, we can just as well
2258          use an arithmetic right-shift instead of a logical one.  */
2259       if (temp == 0 && ! rotate
2260           && (! unsignedp || (! left && methods == OPTAB_WIDEN)))
2261         {
2262           enum optab_methods methods1 = methods;
2263
2264           /* If trying to widen a log shift to an arithmetic shift,
2265              don't accept an arithmetic shift of the same size.  */
2266           if (unsignedp)
2267             methods1 = OPTAB_MUST_WIDEN;
2268
2269           /* Arithmetic shift */
2270
2271           temp = expand_binop (mode,
2272                                left ? ashl_optab : ashr_optab,
2273                                shifted, op1, target, unsignedp, methods1);
2274         }
2275
2276       /* We used to try extzv here for logical right shifts, but that was
2277          only useful for one machine, the VAX, and caused poor code
2278          generation there for lshrdi3, so the code was deleted and a
2279          define_expand for lshrsi3 was added to vax.md.  */
2280     }
2281
2282   gcc_assert (temp);
2283   return temp;
2284 }
2285 \f
2286 enum alg_code {
2287   alg_unknown,
2288   alg_zero,
2289   alg_m, alg_shift,
2290   alg_add_t_m2,
2291   alg_sub_t_m2,
2292   alg_add_factor,
2293   alg_sub_factor,
2294   alg_add_t2_m,
2295   alg_sub_t2_m,
2296   alg_impossible
2297 };
2298
2299 /* This structure holds the "cost" of a multiply sequence.  The
2300    "cost" field holds the total rtx_cost of every operator in the
2301    synthetic multiplication sequence, hence cost(a op b) is defined
2302    as rtx_cost(op) + cost(a) + cost(b), where cost(leaf) is zero.
2303    The "latency" field holds the minimum possible latency of the
2304    synthetic multiply, on a hypothetical infinitely parallel CPU.
2305    This is the critical path, or the maximum height, of the expression
2306    tree which is the sum of rtx_costs on the most expensive path from
2307    any leaf to the root.  Hence latency(a op b) is defined as zero for
2308    leaves and rtx_cost(op) + max(latency(a), latency(b)) otherwise.  */
2309
2310 struct mult_cost {
2311   short cost;     /* Total rtx_cost of the multiplication sequence.  */
2312   short latency;  /* The latency of the multiplication sequence.  */
2313 };
2314
2315 /* This macro is used to compare a pointer to a mult_cost against an
2316    single integer "rtx_cost" value.  This is equivalent to the macro
2317    CHEAPER_MULT_COST(X,Z) where Z = {Y,Y}.  */
2318 #define MULT_COST_LESS(X,Y) ((X)->cost < (Y)    \
2319                              || ((X)->cost == (Y) && (X)->latency < (Y)))
2320
2321 /* This macro is used to compare two pointers to mult_costs against
2322    each other.  The macro returns true if X is cheaper than Y.
2323    Currently, the cheaper of two mult_costs is the one with the
2324    lower "cost".  If "cost"s are tied, the lower latency is cheaper.  */
2325 #define CHEAPER_MULT_COST(X,Y)  ((X)->cost < (Y)->cost          \
2326                                  || ((X)->cost == (Y)->cost     \
2327                                      && (X)->latency < (Y)->latency))
2328
2329 /* This structure records a sequence of operations.
2330    `ops' is the number of operations recorded.
2331    `cost' is their total cost.
2332    The operations are stored in `op' and the corresponding
2333    logarithms of the integer coefficients in `log'.
2334
2335    These are the operations:
2336    alg_zero             total := 0;
2337    alg_m                total := multiplicand;
2338    alg_shift            total := total * coeff
2339    alg_add_t_m2         total := total + multiplicand * coeff;
2340    alg_sub_t_m2         total := total - multiplicand * coeff;
2341    alg_add_factor       total := total * coeff + total;
2342    alg_sub_factor       total := total * coeff - total;
2343    alg_add_t2_m         total := total * coeff + multiplicand;
2344    alg_sub_t2_m         total := total * coeff - multiplicand;
2345
2346    The first operand must be either alg_zero or alg_m.  */
2347
2348 struct algorithm
2349 {
2350   struct mult_cost cost;
2351   short ops;
2352   /* The size of the OP and LOG fields are not directly related to the
2353      word size, but the worst-case algorithms will be if we have few
2354      consecutive ones or zeros, i.e., a multiplicand like 10101010101...
2355      In that case we will generate shift-by-2, add, shift-by-2, add,...,
2356      in total wordsize operations.  */
2357   enum alg_code op[MAX_BITS_PER_WORD];
2358   char log[MAX_BITS_PER_WORD];
2359 };
2360
2361 /* The entry for our multiplication cache/hash table.  */
2362 struct alg_hash_entry {
2363   /* The number we are multiplying by.  */
2364   unsigned int t;
2365
2366   /* The mode in which we are multiplying something by T.  */
2367   enum machine_mode mode;
2368
2369   /* The best multiplication algorithm for t.  */
2370   enum alg_code alg;
2371
2372   /* The cost of multiplication if ALG_CODE is not alg_impossible.
2373      Otherwise, the cost within which multiplication by T is
2374      impossible.  */
2375   struct mult_cost cost;
2376 };
2377
2378 /* The number of cache/hash entries.  */
2379 #define NUM_ALG_HASH_ENTRIES 307
2380
2381 /* Each entry of ALG_HASH caches alg_code for some integer.  This is
2382    actually a hash table.  If we have a collision, that the older
2383    entry is kicked out.  */
2384 static struct alg_hash_entry alg_hash[NUM_ALG_HASH_ENTRIES];
2385
2386 /* Indicates the type of fixup needed after a constant multiplication.
2387    BASIC_VARIANT means no fixup is needed, NEGATE_VARIANT means that
2388    the result should be negated, and ADD_VARIANT means that the
2389    multiplicand should be added to the result.  */
2390 enum mult_variant {basic_variant, negate_variant, add_variant};
2391
2392 static void synth_mult (struct algorithm *, unsigned HOST_WIDE_INT,
2393                         const struct mult_cost *, enum machine_mode mode);
2394 static bool choose_mult_variant (enum machine_mode, HOST_WIDE_INT,
2395                                  struct algorithm *, enum mult_variant *, int);
2396 static rtx expand_mult_const (enum machine_mode, rtx, HOST_WIDE_INT, rtx,
2397                               const struct algorithm *, enum mult_variant);
2398 static unsigned HOST_WIDE_INT choose_multiplier (unsigned HOST_WIDE_INT, int,
2399                                                  int, rtx *, int *, int *);
2400 static unsigned HOST_WIDE_INT invert_mod2n (unsigned HOST_WIDE_INT, int);
2401 static rtx extract_high_half (enum machine_mode, rtx);
2402 static rtx expand_mult_highpart (enum machine_mode, rtx, rtx, rtx, int, int);
2403 static rtx expand_mult_highpart_optab (enum machine_mode, rtx, rtx, rtx,
2404                                        int, int);
2405 /* Compute and return the best algorithm for multiplying by T.
2406    The algorithm must cost less than cost_limit
2407    If retval.cost >= COST_LIMIT, no algorithm was found and all
2408    other field of the returned struct are undefined.
2409    MODE is the machine mode of the multiplication.  */
2410
2411 static void
2412 synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
2413             const struct mult_cost *cost_limit, enum machine_mode mode)
2414 {
2415   int m;
2416   struct algorithm *alg_in, *best_alg;
2417   struct mult_cost best_cost;
2418   struct mult_cost new_limit;
2419   int op_cost, op_latency;
2420   unsigned HOST_WIDE_INT q;
2421   int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode));
2422   int hash_index;
2423   bool cache_hit = false;
2424   enum alg_code cache_alg = alg_zero;
2425
2426   /* Indicate that no algorithm is yet found.  If no algorithm
2427      is found, this value will be returned and indicate failure.  */
2428   alg_out->cost.cost = cost_limit->cost + 1;
2429   alg_out->cost.latency = cost_limit->latency + 1;
2430
2431   if (cost_limit->cost < 0
2432       || (cost_limit->cost == 0 && cost_limit->latency <= 0))
2433     return;
2434
2435   /* Restrict the bits of "t" to the multiplication's mode.  */
2436   t &= GET_MODE_MASK (mode);
2437
2438   /* t == 1 can be done in zero cost.  */
2439   if (t == 1)
2440     {
2441       alg_out->ops = 1;
2442       alg_out->cost.cost = 0;
2443       alg_out->cost.latency = 0;
2444       alg_out->op[0] = alg_m;
2445       return;
2446     }
2447
2448   /* t == 0 sometimes has a cost.  If it does and it exceeds our limit,
2449      fail now.  */
2450   if (t == 0)
2451     {
2452       if (MULT_COST_LESS (cost_limit, zero_cost))
2453         return;
2454       else
2455         {
2456           alg_out->ops = 1;
2457           alg_out->cost.cost = zero_cost;
2458           alg_out->cost.latency = zero_cost;
2459           alg_out->op[0] = alg_zero;
2460           return;
2461         }
2462     }
2463
2464   /* We'll be needing a couple extra algorithm structures now.  */
2465
2466   alg_in = alloca (sizeof (struct algorithm));
2467   best_alg = alloca (sizeof (struct algorithm));
2468   best_cost = *cost_limit;
2469
2470   /* Compute the hash index.  */
2471   hash_index = (t ^ (unsigned int) mode) % NUM_ALG_HASH_ENTRIES;
2472
2473   /* See if we already know what to do for T.  */
2474   if (alg_hash[hash_index].t == t
2475       && alg_hash[hash_index].mode == mode
2476       && alg_hash[hash_index].alg != alg_unknown)
2477     {
2478       cache_alg = alg_hash[hash_index].alg;
2479
2480       if (cache_alg == alg_impossible)
2481         {
2482           /* The cache tells us that it's impossible to synthesize
2483              multiplication by T within alg_hash[hash_index].cost.  */
2484           if (!CHEAPER_MULT_COST (&alg_hash[hash_index].cost, cost_limit))
2485             /* COST_LIMIT is at least as restrictive as the one
2486                recorded in the hash table, in which case we have no
2487                hope of synthesizing a multiplication.  Just
2488                return.  */
2489             return;
2490
2491           /* If we get here, COST_LIMIT is less restrictive than the
2492              one recorded in the hash table, so we may be able to
2493              synthesize a multiplication.  Proceed as if we didn't
2494              have the cache entry.  */
2495         }
2496       else
2497         {
2498           if (CHEAPER_MULT_COST (cost_limit, &alg_hash[hash_index].cost))
2499             /* The cached algorithm shows that this multiplication
2500                requires more cost than COST_LIMIT.  Just return.  This
2501                way, we don't clobber this cache entry with
2502                alg_impossible but retain useful information.  */
2503             return;
2504
2505           cache_hit = true;
2506
2507           switch (cache_alg)
2508             {
2509             case alg_shift:
2510               goto do_alg_shift;
2511
2512             case alg_add_t_m2:
2513             case alg_sub_t_m2:
2514               goto do_alg_addsub_t_m2;
2515
2516             case alg_add_factor:
2517             case alg_sub_factor:
2518               goto do_alg_addsub_factor;
2519
2520             case alg_add_t2_m:
2521               goto do_alg_add_t2_m;
2522
2523             case alg_sub_t2_m:
2524               goto do_alg_sub_t2_m;
2525
2526             default:
2527               gcc_unreachable ();
2528             }
2529         }
2530     }
2531
2532   /* If we have a group of zero bits at the low-order part of T, try
2533      multiplying by the remaining bits and then doing a shift.  */
2534
2535   if ((t & 1) == 0)
2536     {
2537     do_alg_shift:
2538       m = floor_log2 (t & -t);  /* m = number of low zero bits */
2539       if (m < maxm)
2540         {
2541           q = t >> m;
2542           /* The function expand_shift will choose between a shift and
2543              a sequence of additions, so the observed cost is given as
2544              MIN (m * add_cost[mode], shift_cost[mode][m]).  */
2545           op_cost = m * add_cost[mode];
2546           if (shift_cost[mode][m] < op_cost)
2547             op_cost = shift_cost[mode][m];
2548           new_limit.cost = best_cost.cost - op_cost;
2549           new_limit.latency = best_cost.latency - op_cost;
2550           synth_mult (alg_in, q, &new_limit, mode);
2551
2552           alg_in->cost.cost += op_cost;
2553           alg_in->cost.latency += op_cost;
2554           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2555             {
2556               struct algorithm *x;
2557               best_cost = alg_in->cost;
2558               x = alg_in, alg_in = best_alg, best_alg = x;
2559               best_alg->log[best_alg->ops] = m;
2560               best_alg->op[best_alg->ops] = alg_shift;
2561             }
2562         }
2563       if (cache_hit)
2564         goto done;
2565     }
2566
2567   /* If we have an odd number, add or subtract one.  */
2568   if ((t & 1) != 0)
2569     {
2570       unsigned HOST_WIDE_INT w;
2571
2572     do_alg_addsub_t_m2:
2573       for (w = 1; (w & t) != 0; w <<= 1)
2574         ;
2575       /* If T was -1, then W will be zero after the loop.  This is another
2576          case where T ends with ...111.  Handling this with (T + 1) and
2577          subtract 1 produces slightly better code and results in algorithm
2578          selection much faster than treating it like the ...0111 case
2579          below.  */
2580       if (w == 0
2581           || (w > 2
2582               /* Reject the case where t is 3.
2583                  Thus we prefer addition in that case.  */
2584               && t != 3))
2585         {
2586           /* T ends with ...111.  Multiply by (T + 1) and subtract 1.  */
2587
2588           op_cost = add_cost[mode];
2589           new_limit.cost = best_cost.cost - op_cost;
2590           new_limit.latency = best_cost.latency - op_cost;
2591           synth_mult (alg_in, t + 1, &new_limit, mode);
2592
2593           alg_in->cost.cost += op_cost;
2594           alg_in->cost.latency += op_cost;
2595           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2596             {
2597               struct algorithm *x;
2598               best_cost = alg_in->cost;
2599               x = alg_in, alg_in = best_alg, best_alg = x;
2600               best_alg->log[best_alg->ops] = 0;
2601               best_alg->op[best_alg->ops] = alg_sub_t_m2;
2602             }
2603         }
2604       else
2605         {
2606           /* T ends with ...01 or ...011.  Multiply by (T - 1) and add 1.  */
2607
2608           op_cost = add_cost[mode];
2609           new_limit.cost = best_cost.cost - op_cost;
2610           new_limit.latency = best_cost.latency - op_cost;
2611           synth_mult (alg_in, t - 1, &new_limit, mode);
2612
2613           alg_in->cost.cost += op_cost;
2614           alg_in->cost.latency += op_cost;
2615           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2616             {
2617               struct algorithm *x;
2618               best_cost = alg_in->cost;
2619               x = alg_in, alg_in = best_alg, best_alg = x;
2620               best_alg->log[best_alg->ops] = 0;
2621               best_alg->op[best_alg->ops] = alg_add_t_m2;
2622             }
2623         }
2624       if (cache_hit)
2625         goto done;
2626     }
2627
2628   /* Look for factors of t of the form
2629      t = q(2**m +- 1), 2 <= m <= floor(log2(t - 1)).
2630      If we find such a factor, we can multiply by t using an algorithm that
2631      multiplies by q, shift the result by m and add/subtract it to itself.
2632
2633      We search for large factors first and loop down, even if large factors
2634      are less probable than small; if we find a large factor we will find a
2635      good sequence quickly, and therefore be able to prune (by decreasing
2636      COST_LIMIT) the search.  */
2637
2638  do_alg_addsub_factor:
2639   for (m = floor_log2 (t - 1); m >= 2; m--)
2640     {
2641       unsigned HOST_WIDE_INT d;
2642
2643       d = ((unsigned HOST_WIDE_INT) 1 << m) + 1;
2644       if (t % d == 0 && t > d && m < maxm
2645           && (!cache_hit || cache_alg == alg_add_factor))
2646         {
2647           /* If the target has a cheap shift-and-add instruction use
2648              that in preference to a shift insn followed by an add insn.
2649              Assume that the shift-and-add is "atomic" with a latency
2650              equal to its cost, otherwise assume that on superscalar
2651              hardware the shift may be executed concurrently with the
2652              earlier steps in the algorithm.  */
2653           op_cost = add_cost[mode] + shift_cost[mode][m];
2654           if (shiftadd_cost[mode][m] < op_cost)
2655             {
2656               op_cost = shiftadd_cost[mode][m];
2657               op_latency = op_cost;
2658             }
2659           else
2660             op_latency = add_cost[mode];
2661
2662           new_limit.cost = best_cost.cost - op_cost;
2663           new_limit.latency = best_cost.latency - op_latency;
2664           synth_mult (alg_in, t / d, &new_limit, mode);
2665
2666           alg_in->cost.cost += op_cost;
2667           alg_in->cost.latency += op_latency;
2668           if (alg_in->cost.latency < op_cost)
2669             alg_in->cost.latency = op_cost;
2670           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2671             {
2672               struct algorithm *x;
2673               best_cost = alg_in->cost;
2674               x = alg_in, alg_in = best_alg, best_alg = x;
2675               best_alg->log[best_alg->ops] = m;
2676               best_alg->op[best_alg->ops] = alg_add_factor;
2677             }
2678           /* Other factors will have been taken care of in the recursion.  */
2679           break;
2680         }
2681
2682       d = ((unsigned HOST_WIDE_INT) 1 << m) - 1;
2683       if (t % d == 0 && t > d && m < maxm
2684           && (!cache_hit || cache_alg == alg_sub_factor))
2685         {
2686           /* If the target has a cheap shift-and-subtract insn use
2687              that in preference to a shift insn followed by a sub insn.
2688              Assume that the shift-and-sub is "atomic" with a latency
2689              equal to it's cost, otherwise assume that on superscalar
2690              hardware the shift may be executed concurrently with the
2691              earlier steps in the algorithm.  */
2692           op_cost = add_cost[mode] + shift_cost[mode][m];
2693           if (shiftsub_cost[mode][m] < op_cost)
2694             {
2695               op_cost = shiftsub_cost[mode][m];
2696               op_latency = op_cost;
2697             }
2698           else
2699             op_latency = add_cost[mode];
2700
2701           new_limit.cost = best_cost.cost - op_cost;
2702           new_limit.latency = best_cost.latency - op_latency;
2703           synth_mult (alg_in, t / d, &new_limit, mode);
2704
2705           alg_in->cost.cost += op_cost;
2706           alg_in->cost.latency += op_latency;
2707           if (alg_in->cost.latency < op_cost)
2708             alg_in->cost.latency = op_cost;
2709           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2710             {
2711               struct algorithm *x;
2712               best_cost = alg_in->cost;
2713               x = alg_in, alg_in = best_alg, best_alg = x;
2714               best_alg->log[best_alg->ops] = m;
2715               best_alg->op[best_alg->ops] = alg_sub_factor;
2716             }
2717           break;
2718         }
2719     }
2720   if (cache_hit)
2721     goto done;
2722
2723   /* Try shift-and-add (load effective address) instructions,
2724      i.e. do a*3, a*5, a*9.  */
2725   if ((t & 1) != 0)
2726     {
2727     do_alg_add_t2_m:
2728       q = t - 1;
2729       q = q & -q;
2730       m = exact_log2 (q);
2731       if (m >= 0 && m < maxm)
2732         {
2733           op_cost = shiftadd_cost[mode][m];
2734           new_limit.cost = best_cost.cost - op_cost;
2735           new_limit.latency = best_cost.latency - op_cost;
2736           synth_mult (alg_in, (t - 1) >> m, &new_limit, mode);
2737
2738           alg_in->cost.cost += op_cost;
2739           alg_in->cost.latency += op_cost;
2740           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2741             {
2742               struct algorithm *x;
2743               best_cost = alg_in->cost;
2744               x = alg_in, alg_in = best_alg, best_alg = x;
2745               best_alg->log[best_alg->ops] = m;
2746               best_alg->op[best_alg->ops] = alg_add_t2_m;
2747             }
2748         }
2749       if (cache_hit)
2750         goto done;
2751
2752     do_alg_sub_t2_m:
2753       q = t + 1;
2754       q = q & -q;
2755       m = exact_log2 (q);
2756       if (m >= 0 && m < maxm)
2757         {
2758           op_cost = shiftsub_cost[mode][m];
2759           new_limit.cost = best_cost.cost - op_cost;
2760           new_limit.latency = best_cost.latency - op_cost;
2761           synth_mult (alg_in, (t + 1) >> m, &new_limit, mode);
2762
2763           alg_in->cost.cost += op_cost;
2764           alg_in->cost.latency += op_cost;
2765           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2766             {
2767               struct algorithm *x;
2768               best_cost = alg_in->cost;
2769               x = alg_in, alg_in = best_alg, best_alg = x;
2770               best_alg->log[best_alg->ops] = m;
2771               best_alg->op[best_alg->ops] = alg_sub_t2_m;
2772             }
2773         }
2774       if (cache_hit)
2775         goto done;
2776     }
2777
2778  done:
2779   /* If best_cost has not decreased, we have not found any algorithm.  */
2780   if (!CHEAPER_MULT_COST (&best_cost, cost_limit))
2781     {
2782       /* We failed to find an algorithm.  Record alg_impossible for
2783          this case (that is, <T, MODE, COST_LIMIT>) so that next time
2784          we are asked to find an algorithm for T within the same or
2785          lower COST_LIMIT, we can immediately return to the
2786          caller.  */
2787       alg_hash[hash_index].t = t;
2788       alg_hash[hash_index].mode = mode;
2789       alg_hash[hash_index].alg = alg_impossible;
2790       alg_hash[hash_index].cost = *cost_limit;
2791       return;
2792     }
2793
2794   /* Cache the result.  */
2795   if (!cache_hit)
2796     {
2797       alg_hash[hash_index].t = t;
2798       alg_hash[hash_index].mode = mode;
2799       alg_hash[hash_index].alg = best_alg->op[best_alg->ops];
2800       alg_hash[hash_index].cost.cost = best_cost.cost;
2801       alg_hash[hash_index].cost.latency = best_cost.latency;
2802     }
2803
2804   /* If we are getting a too long sequence for `struct algorithm'
2805      to record, make this search fail.  */
2806   if (best_alg->ops == MAX_BITS_PER_WORD)
2807     return;
2808
2809   /* Copy the algorithm from temporary space to the space at alg_out.
2810      We avoid using structure assignment because the majority of
2811      best_alg is normally undefined, and this is a critical function.  */
2812   alg_out->ops = best_alg->ops + 1;
2813   alg_out->cost = best_cost;
2814   memcpy (alg_out->op, best_alg->op,
2815           alg_out->ops * sizeof *alg_out->op);
2816   memcpy (alg_out->log, best_alg->log,
2817           alg_out->ops * sizeof *alg_out->log);
2818 }
2819 \f
2820 /* Find the cheapest way of multiplying a value of mode MODE by VAL.
2821    Try three variations:
2822
2823        - a shift/add sequence based on VAL itself
2824        - a shift/add sequence based on -VAL, followed by a negation
2825        - a shift/add sequence based on VAL - 1, followed by an addition.
2826
2827    Return true if the cheapest of these cost less than MULT_COST,
2828    describing the algorithm in *ALG and final fixup in *VARIANT.  */
2829
2830 static bool
2831 choose_mult_variant (enum machine_mode mode, HOST_WIDE_INT val,
2832                      struct algorithm *alg, enum mult_variant *variant,
2833                      int mult_cost)
2834 {
2835   struct algorithm alg2;
2836   struct mult_cost limit;
2837   int op_cost;
2838
2839   *variant = basic_variant;
2840   limit.cost = mult_cost;
2841   limit.latency = mult_cost;
2842   synth_mult (alg, val, &limit, mode);
2843
2844   /* This works only if the inverted value actually fits in an
2845      `unsigned int' */
2846   if (HOST_BITS_PER_INT >= GET_MODE_BITSIZE (mode))
2847     {
2848       op_cost = neg_cost[mode];
2849       if (MULT_COST_LESS (&alg->cost, mult_cost))
2850         {
2851           limit.cost = alg->cost.cost - op_cost;
2852           limit.latency = alg->cost.latency - op_cost;
2853         }
2854       else
2855         {
2856           limit.cost = mult_cost - op_cost;
2857           limit.latency = mult_cost - op_cost;
2858         }
2859
2860       synth_mult (&alg2, -val, &limit, mode);
2861       alg2.cost.cost += op_cost;
2862       alg2.cost.latency += op_cost;
2863       if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
2864         *alg = alg2, *variant = negate_variant;
2865     }
2866
2867   /* This proves very useful for division-by-constant.  */
2868   op_cost = add_cost[mode];
2869   if (MULT_COST_LESS (&alg->cost, mult_cost))
2870     {
2871       limit.cost = alg->cost.cost - op_cost;
2872       limit.latency = alg->cost.latency - op_cost;
2873     }
2874   else
2875     {
2876       limit.cost = mult_cost - op_cost;
2877       limit.latency = mult_cost - op_cost;
2878     }
2879
2880   synth_mult (&alg2, val - 1, &limit, mode);
2881   alg2.cost.cost += op_cost;
2882   alg2.cost.latency += op_cost;
2883   if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
2884     *alg = alg2, *variant = add_variant;
2885
2886   return MULT_COST_LESS (&alg->cost, mult_cost);
2887 }
2888
2889 /* A subroutine of expand_mult, used for constant multiplications.
2890    Multiply OP0 by VAL in mode MODE, storing the result in TARGET if
2891    convenient.  Use the shift/add sequence described by ALG and apply
2892    the final fixup specified by VARIANT.  */
2893
2894 static rtx
2895 expand_mult_const (enum machine_mode mode, rtx op0, HOST_WIDE_INT val,
2896                    rtx target, const struct algorithm *alg,
2897                    enum mult_variant variant)
2898 {
2899   HOST_WIDE_INT val_so_far;
2900   rtx insn, accum, tem;
2901   int opno;
2902   enum machine_mode nmode;
2903
2904   /* Avoid referencing memory over and over.
2905      For speed, but also for correctness when mem is volatile.  */
2906   if (MEM_P (op0))
2907     op0 = force_reg (mode, op0);
2908
2909   /* ACCUM starts out either as OP0 or as a zero, depending on
2910      the first operation.  */
2911
2912   if (alg->op[0] == alg_zero)
2913     {
2914       accum = copy_to_mode_reg (mode, const0_rtx);
2915       val_so_far = 0;
2916     }
2917   else if (alg->op[0] == alg_m)
2918     {
2919       accum = copy_to_mode_reg (mode, op0);
2920       val_so_far = 1;
2921     }
2922   else
2923     gcc_unreachable ();
2924
2925   for (opno = 1; opno < alg->ops; opno++)
2926     {
2927       int log = alg->log[opno];
2928       rtx shift_subtarget = optimize ? 0 : accum;
2929       rtx add_target
2930         = (opno == alg->ops - 1 && target != 0 && variant != add_variant
2931            && !optimize)
2932           ? target : 0;
2933       rtx accum_target = optimize ? 0 : accum;
2934
2935       switch (alg->op[opno])
2936         {
2937         case alg_shift:
2938           accum = expand_shift (LSHIFT_EXPR, mode, accum,
2939                                 build_int_cst (NULL_TREE, log),
2940                                 NULL_RTX, 0);
2941           val_so_far <<= log;
2942           break;
2943
2944         case alg_add_t_m2:
2945           tem = expand_shift (LSHIFT_EXPR, mode, op0,
2946                               build_int_cst (NULL_TREE, log),
2947                               NULL_RTX, 0);
2948           accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
2949                                  add_target ? add_target : accum_target);
2950           val_so_far += (HOST_WIDE_INT) 1 << log;
2951           break;
2952
2953         case alg_sub_t_m2:
2954           tem = expand_shift (LSHIFT_EXPR, mode, op0,
2955                               build_int_cst (NULL_TREE, log),
2956                               NULL_RTX, 0);
2957           accum = force_operand (gen_rtx_MINUS (mode, accum, tem),
2958                                  add_target ? add_target : accum_target);
2959           val_so_far -= (HOST_WIDE_INT) 1 << log;
2960           break;
2961
2962         case alg_add_t2_m:
2963           accum = expand_shift (LSHIFT_EXPR, mode, accum,
2964                                 build_int_cst (NULL_TREE, log),
2965                                 shift_subtarget,
2966                                 0);
2967           accum = force_operand (gen_rtx_PLUS (mode, accum, op0),
2968                                  add_target ? add_target : accum_target);
2969           val_so_far = (val_so_far << log) + 1;
2970           break;
2971
2972         case alg_sub_t2_m:
2973           accum = expand_shift (LSHIFT_EXPR, mode, accum,
2974                                 build_int_cst (NULL_TREE, log),
2975                                 shift_subtarget, 0);
2976           accum = force_operand (gen_rtx_MINUS (mode, accum, op0),
2977                                  add_target ? add_target : accum_target);
2978           val_so_far = (val_so_far << log) - 1;
2979           break;
2980
2981         case alg_add_factor:
2982           tem = expand_shift (LSHIFT_EXPR, mode, accum,
2983                               build_int_cst (NULL_TREE, log),
2984                               NULL_RTX, 0);
2985           accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
2986                                  add_target ? add_target : accum_target);
2987           val_so_far += val_so_far << log;
2988           break;
2989
2990         case alg_sub_factor:
2991           tem = expand_shift (LSHIFT_EXPR, mode, accum,
2992                               build_int_cst (NULL_TREE, log),
2993                               NULL_RTX, 0);
2994           accum = force_operand (gen_rtx_MINUS (mode, tem, accum),
2995                                  (add_target
2996                                   ? add_target : (optimize ? 0 : tem)));
2997           val_so_far = (val_so_far << log) - val_so_far;
2998           break;
2999
3000         default:
3001           gcc_unreachable ();
3002         }
3003
3004       /* Write a REG_EQUAL note on the last insn so that we can cse
3005          multiplication sequences.  Note that if ACCUM is a SUBREG,
3006          we've set the inner register and must properly indicate
3007          that.  */
3008
3009       tem = op0, nmode = mode;
3010       if (GET_CODE (accum) == SUBREG)
3011         {
3012           nmode = GET_MODE (SUBREG_REG (accum));
3013           tem = gen_lowpart (nmode, op0);
3014         }
3015
3016       insn = get_last_insn ();
3017       set_unique_reg_note (insn, REG_EQUAL,
3018                            gen_rtx_MULT (nmode, tem, GEN_INT (val_so_far)));
3019     }
3020
3021   if (variant == negate_variant)
3022     {
3023       val_so_far = -val_so_far;
3024       accum = expand_unop (mode, neg_optab, accum, target, 0);
3025     }
3026   else if (variant == add_variant)
3027     {
3028       val_so_far = val_so_far + 1;
3029       accum = force_operand (gen_rtx_PLUS (mode, accum, op0), target);
3030     }
3031
3032   /* Compare only the bits of val and val_so_far that are significant
3033      in the result mode, to avoid sign-/zero-extension confusion.  */
3034   val &= GET_MODE_MASK (mode);
3035   val_so_far &= GET_MODE_MASK (mode);
3036   gcc_assert (val == val_so_far);
3037
3038   return accum;
3039 }
3040
3041 /* Perform a multiplication and return an rtx for the result.
3042    MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3043    TARGET is a suggestion for where to store the result (an rtx).
3044
3045    We check specially for a constant integer as OP1.
3046    If you want this check for OP0 as well, then before calling
3047    you should swap the two operands if OP0 would be constant.  */
3048
3049 rtx
3050 expand_mult (enum machine_mode mode, rtx op0, rtx op1, rtx target,
3051              int unsignedp)
3052 {
3053   enum mult_variant variant;
3054   struct algorithm algorithm;
3055   int max_cost;
3056
3057   /* Handling const0_rtx here allows us to use zero as a rogue value for
3058      coeff below.  */
3059   if (op1 == const0_rtx)
3060     return const0_rtx;
3061   if (op1 == const1_rtx)
3062     return op0;
3063   if (op1 == constm1_rtx)
3064     return expand_unop (mode,
3065                         GET_MODE_CLASS (mode) == MODE_INT
3066                         && !unsignedp && flag_trapv
3067                         ? negv_optab : neg_optab,
3068                         op0, target, 0);
3069
3070   /* These are the operations that are potentially turned into a sequence
3071      of shifts and additions.  */
3072   if (SCALAR_INT_MODE_P (mode)
3073       && (unsignedp || !flag_trapv))
3074     {
3075       HOST_WIDE_INT coeff = 0;
3076       rtx fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3077
3078       /* synth_mult does an `unsigned int' multiply.  As long as the mode is
3079          less than or equal in size to `unsigned int' this doesn't matter.
3080          If the mode is larger than `unsigned int', then synth_mult works
3081          only if the constant value exactly fits in an `unsigned int' without
3082          any truncation.  This means that multiplying by negative values does
3083          not work; results are off by 2^32 on a 32 bit machine.  */
3084
3085       if (GET_CODE (op1) == CONST_INT)
3086         {
3087           /* Attempt to handle multiplication of DImode values by negative
3088              coefficients, by performing the multiplication by a positive
3089              multiplier and then inverting the result.  */
3090           if (INTVAL (op1) < 0
3091               && GET_MODE_BITSIZE (mode) > HOST_BITS_PER_WIDE_INT)
3092             {
3093               /* Its safe to use -INTVAL (op1) even for INT_MIN, as the
3094                  result is interpreted as an unsigned coefficient.
3095                  Exclude cost of op0 from max_cost to match the cost
3096                  calculation of the synth_mult.  */
3097               max_cost = rtx_cost (gen_rtx_MULT (mode, fake_reg, op1), SET)
3098                          - neg_cost[mode];
3099               if (max_cost > 0
3100                   && choose_mult_variant (mode, -INTVAL (op1), &algorithm,
3101                                           &variant, max_cost))
3102                 {
3103                   rtx temp = expand_mult_const (mode, op0, -INTVAL (op1),
3104                                                 NULL_RTX, &algorithm,
3105                                                 variant);
3106                   return expand_unop (mode, neg_optab, temp, target, 0);
3107                 }
3108             }
3109           else coeff = INTVAL (op1);
3110         }
3111       else if (GET_CODE (op1) == CONST_DOUBLE)
3112         {
3113           /* If we are multiplying in DImode, it may still be a win
3114              to try to work with shifts and adds.  */
3115           if (CONST_DOUBLE_HIGH (op1) == 0)
3116             coeff = CONST_DOUBLE_LOW (op1);
3117           else if (CONST_DOUBLE_LOW (op1) == 0
3118                    && EXACT_POWER_OF_2_OR_ZERO_P (CONST_DOUBLE_HIGH (op1)))
3119             {
3120               int shift = floor_log2 (CONST_DOUBLE_HIGH (op1))
3121                           + HOST_BITS_PER_WIDE_INT;
3122               return expand_shift (LSHIFT_EXPR, mode, op0,
3123                                    build_int_cst (NULL_TREE, shift),
3124                                    target, unsignedp);
3125             }
3126         }
3127
3128       /* We used to test optimize here, on the grounds that it's better to
3129          produce a smaller program when -O is not used.  But this causes
3130          such a terrible slowdown sometimes that it seems better to always
3131          use synth_mult.  */
3132       if (coeff != 0)
3133         {
3134           /* Special case powers of two.  */
3135           if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3136             return expand_shift (LSHIFT_EXPR, mode, op0,
3137                                  build_int_cst (NULL_TREE, floor_log2 (coeff)),
3138                                  target, unsignedp);
3139
3140           /* Exclude cost of op0 from max_cost to match the cost
3141              calculation of the synth_mult.  */
3142           max_cost = rtx_cost (gen_rtx_MULT (mode, fake_reg, op1), SET);
3143           if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3144                                    max_cost))
3145             return expand_mult_const (mode, op0, coeff, target,
3146                                       &algorithm, variant);
3147         }
3148     }
3149
3150   if (GET_CODE (op0) == CONST_DOUBLE)
3151     {
3152       rtx temp = op0;
3153       op0 = op1;
3154       op1 = temp;
3155     }
3156
3157   /* Expand x*2.0 as x+x.  */
3158   if (GET_CODE (op1) == CONST_DOUBLE
3159       && SCALAR_FLOAT_MODE_P (mode))
3160     {
3161       REAL_VALUE_TYPE d;
3162       REAL_VALUE_FROM_CONST_DOUBLE (d, op1);
3163
3164       if (REAL_VALUES_EQUAL (d, dconst2))
3165         {
3166           op0 = force_reg (GET_MODE (op0), op0);
3167           return expand_binop (mode, add_optab, op0, op0,
3168                                target, unsignedp, OPTAB_LIB_WIDEN);
3169         }
3170     }
3171
3172   /* This used to use umul_optab if unsigned, but for non-widening multiply
3173      there is no difference between signed and unsigned.  */
3174   op0 = expand_binop (mode,
3175                       ! unsignedp
3176                       && flag_trapv && (GET_MODE_CLASS(mode) == MODE_INT)
3177                       ? smulv_optab : smul_optab,
3178                       op0, op1, target, unsignedp, OPTAB_LIB_WIDEN);
3179   gcc_assert (op0);
3180   return op0;
3181 }
3182 \f
3183 /* Return the smallest n such that 2**n >= X.  */
3184
3185 int
3186 ceil_log2 (unsigned HOST_WIDE_INT x)
3187 {
3188   return floor_log2 (x - 1) + 1;
3189 }
3190
3191 /* Choose a minimal N + 1 bit approximation to 1/D that can be used to
3192    replace division by D, and put the least significant N bits of the result
3193    in *MULTIPLIER_PTR and return the most significant bit.
3194
3195    The width of operations is N (should be <= HOST_BITS_PER_WIDE_INT), the
3196    needed precision is in PRECISION (should be <= N).
3197
3198    PRECISION should be as small as possible so this function can choose
3199    multiplier more freely.
3200
3201    The rounded-up logarithm of D is placed in *lgup_ptr.  A shift count that
3202    is to be used for a final right shift is placed in *POST_SHIFT_PTR.
3203
3204    Using this function, x/D will be equal to (x * m) >> (*POST_SHIFT_PTR),
3205    where m is the full HOST_BITS_PER_WIDE_INT + 1 bit multiplier.  */
3206
3207 static
3208 unsigned HOST_WIDE_INT
3209 choose_multiplier (unsigned HOST_WIDE_INT d, int n, int precision,
3210                    rtx *multiplier_ptr, int *post_shift_ptr, int *lgup_ptr)
3211 {
3212   HOST_WIDE_INT mhigh_hi, mlow_hi;
3213   unsigned HOST_WIDE_INT mhigh_lo, mlow_lo;
3214   int lgup, post_shift;
3215   int pow, pow2;
3216   unsigned HOST_WIDE_INT nl, dummy1;
3217   HOST_WIDE_INT nh, dummy2;
3218
3219   /* lgup = ceil(log2(divisor)); */
3220   lgup = ceil_log2 (d);
3221
3222   gcc_assert (lgup <= n);
3223
3224   pow = n + lgup;
3225   pow2 = n + lgup - precision;
3226
3227   /* We could handle this with some effort, but this case is much
3228      better handled directly with a scc insn, so rely on caller using
3229      that.  */
3230   gcc_assert (pow != 2 * HOST_BITS_PER_WIDE_INT);
3231
3232   /* mlow = 2^(N + lgup)/d */
3233  if (pow >= HOST_BITS_PER_WIDE_INT)
3234     {
3235       nh = (HOST_WIDE_INT) 1 << (pow - HOST_BITS_PER_WIDE_INT);
3236       nl = 0;
3237     }
3238   else
3239     {
3240       nh = 0;
3241       nl = (unsigned HOST_WIDE_INT) 1 << pow;
3242     }
3243   div_and_round_double (TRUNC_DIV_EXPR, 1, nl, nh, d, (HOST_WIDE_INT) 0,
3244                         &mlow_lo, &mlow_hi, &dummy1, &dummy2);
3245
3246   /* mhigh = (2^(N + lgup) + 2^N + lgup - precision)/d */
3247   if (pow2 >= HOST_BITS_PER_WIDE_INT)
3248     nh |= (HOST_WIDE_INT) 1 << (pow2 - HOST_BITS_PER_WIDE_INT);
3249   else
3250     nl |= (unsigned HOST_WIDE_INT) 1 << pow2;
3251   div_and_round_double (TRUNC_DIV_EXPR, 1, nl, nh, d, (HOST_WIDE_INT) 0,
3252                         &mhigh_lo, &mhigh_hi, &dummy1, &dummy2);
3253
3254   gcc_assert (!mhigh_hi || nh - d < d);
3255   gcc_assert (mhigh_hi <= 1 && mlow_hi <= 1);
3256   /* Assert that mlow < mhigh.  */
3257   gcc_assert (mlow_hi < mhigh_hi
3258               || (mlow_hi == mhigh_hi && mlow_lo < mhigh_lo));
3259
3260   /* If precision == N, then mlow, mhigh exceed 2^N
3261      (but they do not exceed 2^(N+1)).  */
3262
3263   /* Reduce to lowest terms.  */
3264   for (post_shift = lgup; post_shift > 0; post_shift--)
3265     {
3266       unsigned HOST_WIDE_INT ml_lo = (mlow_hi << (HOST_BITS_PER_WIDE_INT - 1)) | (mlow_lo >> 1);
3267       unsigned HOST_WIDE_INT mh_lo = (mhigh_hi << (HOST_BITS_PER_WIDE_INT - 1)) | (mhigh_lo >> 1);
3268       if (ml_lo >= mh_lo)
3269         break;
3270
3271       mlow_hi = 0;
3272       mlow_lo = ml_lo;
3273       mhigh_hi = 0;
3274       mhigh_lo = mh_lo;
3275     }
3276
3277   *post_shift_ptr = post_shift;
3278   *lgup_ptr = lgup;
3279   if (n < HOST_BITS_PER_WIDE_INT)
3280     {
3281       unsigned HOST_WIDE_INT mask = ((unsigned HOST_WIDE_INT) 1 << n) - 1;
3282       *multiplier_ptr = GEN_INT (mhigh_lo & mask);
3283       return mhigh_lo >= mask;
3284     }
3285   else
3286     {
3287       *multiplier_ptr = GEN_INT (mhigh_lo);
3288       return mhigh_hi;
3289     }
3290 }
3291
3292 /* Compute the inverse of X mod 2**n, i.e., find Y such that X * Y is
3293    congruent to 1 (mod 2**N).  */
3294
3295 static unsigned HOST_WIDE_INT
3296 invert_mod2n (unsigned HOST_WIDE_INT x, int n)
3297 {
3298   /* Solve x*y == 1 (mod 2^n), where x is odd.  Return y.  */
3299
3300   /* The algorithm notes that the choice y = x satisfies
3301      x*y == 1 mod 2^3, since x is assumed odd.
3302      Each iteration doubles the number of bits of significance in y.  */
3303
3304   unsigned HOST_WIDE_INT mask;
3305   unsigned HOST_WIDE_INT y = x;
3306   int nbit = 3;
3307
3308   mask = (n == HOST_BITS_PER_WIDE_INT
3309           ? ~(unsigned HOST_WIDE_INT) 0
3310           : ((unsigned HOST_WIDE_INT) 1 << n) - 1);
3311
3312   while (nbit < n)
3313     {
3314       y = y * (2 - x*y) & mask;         /* Modulo 2^N */
3315       nbit *= 2;
3316     }
3317   return y;
3318 }
3319
3320 /* Emit code to adjust ADJ_OPERAND after multiplication of wrong signedness
3321    flavor of OP0 and OP1.  ADJ_OPERAND is already the high half of the
3322    product OP0 x OP1.  If UNSIGNEDP is nonzero, adjust the signed product
3323    to become unsigned, if UNSIGNEDP is zero, adjust the unsigned product to
3324    become signed.
3325
3326    The result is put in TARGET if that is convenient.
3327
3328    MODE is the mode of operation.  */
3329
3330 rtx
3331 expand_mult_highpart_adjust (enum machine_mode mode, rtx adj_operand, rtx op0,
3332                              rtx op1, rtx target, int unsignedp)
3333 {
3334   rtx tem;
3335   enum rtx_code adj_code = unsignedp ? PLUS : MINUS;
3336
3337   tem = expand_shift (RSHIFT_EXPR, mode, op0,
3338                       build_int_cst (NULL_TREE, GET_MODE_BITSIZE (mode) - 1),
3339                       NULL_RTX, 0);
3340   tem = expand_and (mode, tem, op1, NULL_RTX);
3341   adj_operand
3342     = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3343                      adj_operand);
3344
3345   tem = expand_shift (RSHIFT_EXPR, mode, op1,
3346                       build_int_cst (NULL_TREE, GET_MODE_BITSIZE (mode) - 1),
3347                       NULL_RTX, 0);
3348   tem = expand_and (mode, tem, op0, NULL_RTX);
3349   target = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3350                           target);
3351
3352   return target;
3353 }
3354
3355 /* Subroutine of expand_mult_highpart.  Return the MODE high part of OP.  */
3356
3357 static rtx
3358 extract_high_half (enum machine_mode mode, rtx op)
3359 {
3360   enum machine_mode wider_mode;
3361
3362   if (mode == word_mode)
3363     return gen_highpart (mode, op);
3364
3365   wider_mode = GET_MODE_WIDER_MODE (mode);
3366   op = expand_shift (RSHIFT_EXPR, wider_mode, op,
3367                      build_int_cst (NULL_TREE, GET_MODE_BITSIZE (mode)), 0, 1);
3368   return convert_modes (mode, wider_mode, op, 0);
3369 }
3370
3371 /* Like expand_mult_highpart, but only consider using a multiplication
3372    optab.  OP1 is an rtx for the constant operand.  */
3373
3374 static rtx
3375 expand_mult_highpart_optab (enum machine_mode mode, rtx op0, rtx op1,
3376                             rtx target, int unsignedp, int max_cost)
3377 {
3378   rtx narrow_op1 = gen_int_mode (INTVAL (op1), mode);
3379   enum machine_mode wider_mode;
3380   optab moptab;
3381   rtx tem;
3382   int size;
3383
3384   wider_mode = GET_MODE_WIDER_MODE (mode);
3385   size = GET_MODE_BITSIZE (mode);
3386
3387   /* Firstly, try using a multiplication insn that only generates the needed
3388      high part of the product, and in the sign flavor of unsignedp.  */
3389   if (mul_highpart_cost[mode] < max_cost)
3390     {
3391       moptab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
3392       tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3393                           unsignedp, OPTAB_DIRECT);
3394       if (tem)
3395         return tem;
3396     }
3397
3398   /* Secondly, same as above, but use sign flavor opposite of unsignedp.
3399      Need to adjust the result after the multiplication.  */
3400   if (size - 1 < BITS_PER_WORD
3401       && (mul_highpart_cost[mode] + 2 * shift_cost[mode][size-1]
3402           + 4 * add_cost[mode] < max_cost))
3403     {
3404       moptab = unsignedp ? smul_highpart_optab : umul_highpart_optab;
3405       tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3406                           unsignedp, OPTAB_DIRECT);
3407       if (tem)
3408         /* We used the wrong signedness.  Adjust the result.  */
3409         return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3410                                             tem, unsignedp);
3411     }
3412
3413   /* Try widening multiplication.  */
3414   moptab = unsignedp ? umul_widen_optab : smul_widen_optab;
3415   if (moptab->handlers[wider_mode].insn_code != CODE_FOR_nothing
3416       && mul_widen_cost[wider_mode] < max_cost)
3417     {
3418       tem = expand_binop (wider_mode, moptab, op0, narrow_op1, 0,
3419                           unsignedp, OPTAB_WIDEN);
3420       if (tem)
3421         return extract_high_half (mode, tem);
3422     }
3423
3424   /* Try widening the mode and perform a non-widening multiplication.  */
3425   if (smul_optab->handlers[wider_mode].insn_code != CODE_FOR_nothing
3426       && size - 1 < BITS_PER_WORD
3427       && mul_cost[wider_mode] + shift_cost[mode][size-1] < max_cost)
3428     {
3429       rtx insns, wop0, wop1;
3430
3431       /* We need to widen the operands, for example to ensure the
3432          constant multiplier is correctly sign or zero extended.
3433          Use a sequence to clean-up any instructions emitted by
3434          the conversions if things don't work out.  */
3435       start_sequence ();
3436       wop0 = convert_modes (wider_mode, mode, op0, unsignedp);
3437       wop1 = convert_modes (wider_mode, mode, op1, unsignedp);
3438       tem = expand_binop (wider_mode, smul_optab, wop0, wop1, 0,
3439                           unsignedp, OPTAB_WIDEN);
3440       insns = get_insns ();
3441       end_sequence ();
3442
3443       if (tem)
3444         {
3445           emit_insn (insns);
3446           return extract_high_half (mode, tem);
3447         }
3448     }
3449
3450   /* Try widening multiplication of opposite signedness, and adjust.  */
3451   moptab = unsignedp ? smul_widen_optab : umul_widen_optab;
3452   if (moptab->handlers[wider_mode].insn_code != CODE_FOR_nothing
3453       && size - 1 < BITS_PER_WORD
3454       && (mul_widen_cost[wider_mode] + 2 * shift_cost[mode][size-1]
3455           + 4 * add_cost[mode] < max_cost))
3456     {
3457       tem = expand_binop (wider_mode, moptab, op0, narrow_op1,
3458                           NULL_RTX, ! unsignedp, OPTAB_WIDEN);
3459       if (tem != 0)
3460         {
3461           tem = extract_high_half (mode, tem);
3462           /* We used the wrong signedness.  Adjust the result.  */
3463           return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3464                                               target, unsignedp);
3465         }
3466     }
3467
3468   return 0;
3469 }
3470
3471 /* Emit code to multiply OP0 and OP1 (where OP1 is an integer constant),
3472    putting the high half of the result in TARGET if that is convenient,
3473    and return where the result is.  If the operation can not be performed,
3474    0 is returned.
3475
3476    MODE is the mode of operation and result.
3477
3478    UNSIGNEDP nonzero means unsigned multiply.
3479
3480    MAX_COST is the total allowed cost for the expanded RTL.  */
3481
3482 static rtx
3483 expand_mult_highpart (enum machine_mode mode, rtx op0, rtx op1,
3484                       rtx target, int unsignedp, int max_cost)
3485 {
3486   enum machine_mode wider_mode = GET_MODE_WIDER_MODE (mode);
3487   unsigned HOST_WIDE_INT cnst1;
3488   int extra_cost;
3489   bool sign_adjust = false;
3490   enum mult_variant variant;
3491   struct algorithm alg;
3492   rtx tem;
3493
3494   /* We can't support modes wider than HOST_BITS_PER_INT.  */
3495   gcc_assert (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT);
3496
3497   cnst1 = INTVAL (op1) & GET_MODE_MASK (mode);
3498
3499   /* We can't optimize modes wider than BITS_PER_WORD.
3500      ??? We might be able to perform double-word arithmetic if
3501      mode == word_mode, however all the cost calculations in
3502      synth_mult etc. assume single-word operations.  */
3503   if (GET_MODE_BITSIZE (wider_mode) > BITS_PER_WORD)
3504     return expand_mult_highpart_optab (mode, op0, op1, target,
3505                                        unsignedp, max_cost);
3506
3507   extra_cost = shift_cost[mode][GET_MODE_BITSIZE (mode) - 1];
3508
3509   /* Check whether we try to multiply by a negative constant.  */
3510   if (!unsignedp && ((cnst1 >> (GET_MODE_BITSIZE (mode) - 1)) & 1))
3511     {
3512       sign_adjust = true;
3513       extra_cost += add_cost[mode];
3514     }
3515
3516   /* See whether shift/add multiplication is cheap enough.  */
3517   if (choose_mult_variant (wider_mode, cnst1, &alg, &variant,
3518                            max_cost - extra_cost))
3519     {
3520       /* See whether the specialized multiplication optabs are
3521          cheaper than the shift/add version.  */
3522       tem = expand_mult_highpart_optab (mode, op0, op1, target, unsignedp,
3523                                         alg.cost.cost + extra_cost);
3524       if (tem)
3525         return tem;
3526
3527       tem = convert_to_mode (wider_mode, op0, unsignedp);
3528       tem = expand_mult_const (wider_mode, tem, cnst1, 0, &alg, variant);
3529       tem = extract_high_half (mode, tem);
3530
3531       /* Adjust result for signedness.  */
3532       if (sign_adjust)
3533         tem = force_operand (gen_rtx_MINUS (mode, tem, op0), tem);
3534
3535       return tem;
3536     }
3537   return expand_mult_highpart_optab (mode, op0, op1, target,
3538                                      unsignedp, max_cost);
3539 }
3540
3541
3542 /* Expand signed modulus of OP0 by a power of two D in mode MODE.  */
3543
3544 static rtx
3545 expand_smod_pow2 (enum machine_mode mode, rtx op0, HOST_WIDE_INT d)
3546 {
3547   unsigned HOST_WIDE_INT masklow, maskhigh;
3548   rtx result, temp, shift, label;
3549   int logd;
3550
3551   logd = floor_log2 (d);
3552   result = gen_reg_rtx (mode);
3553
3554   /* Avoid conditional branches when they're expensive.  */
3555   if (BRANCH_COST >= 2
3556       && !optimize_size)
3557     {
3558       rtx signmask = emit_store_flag (result, LT, op0, const0_rtx,
3559                                       mode, 0, -1);
3560       if (signmask)
3561         {
3562           signmask = force_reg (mode, signmask);
3563           masklow = ((HOST_WIDE_INT) 1 << logd) - 1;
3564           shift = GEN_INT (GET_MODE_BITSIZE (mode) - logd);
3565
3566           /* Use the rtx_cost of a LSHIFTRT instruction to determine
3567              which instruction sequence to use.  If logical right shifts
3568              are expensive the use 2 XORs, 2 SUBs and an AND, otherwise
3569              use a LSHIFTRT, 1 ADD, 1 SUB and an AND.  */
3570
3571           temp = gen_rtx_LSHIFTRT (mode, result, shift);
3572           if (lshr_optab->handlers[mode].insn_code == CODE_FOR_nothing
3573               || rtx_cost (temp, SET) > COSTS_N_INSNS (2))
3574             {
3575               temp = expand_binop (mode, xor_optab, op0, signmask,
3576                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3577               temp = expand_binop (mode, sub_optab, temp, signmask,
3578                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3579               temp = expand_binop (mode, and_optab, temp, GEN_INT (masklow),
3580                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3581               temp = expand_binop (mode, xor_optab, temp, signmask,
3582                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3583               temp = expand_binop (mode, sub_optab, temp, signmask,
3584                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3585             }
3586           else
3587             {
3588               signmask = expand_binop (mode, lshr_optab, signmask, shift,
3589                                        NULL_RTX, 1, OPTAB_LIB_WIDEN);
3590               signmask = force_reg (mode, signmask);
3591
3592               temp = expand_binop (mode, add_optab, op0, signmask,
3593                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3594               temp = expand_binop (mode, and_optab, temp, GEN_INT (masklow),
3595                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3596               temp = expand_binop (mode, sub_optab, temp, signmask,
3597                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3598             }
3599           return temp;
3600         }
3601     }
3602
3603   /* Mask contains the mode's signbit and the significant bits of the
3604      modulus.  By including the signbit in the operation, many targets
3605      can avoid an explicit compare operation in the following comparison
3606      against zero.  */
3607
3608   masklow = ((HOST_WIDE_INT) 1 << logd) - 1;
3609   if (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
3610     {
3611       masklow |= (HOST_WIDE_INT) -1 << (GET_MODE_BITSIZE (mode) - 1);
3612       maskhigh = -1;
3613     }
3614   else
3615     maskhigh = (HOST_WIDE_INT) -1
3616                  << (GET_MODE_BITSIZE (mode) - HOST_BITS_PER_WIDE_INT - 1);
3617
3618   temp = expand_binop (mode, and_optab, op0,
3619                        immed_double_const (masklow, maskhigh, mode),
3620                        result, 1, OPTAB_LIB_WIDEN);
3621   if (temp != result)
3622     emit_move_insn (result, temp);
3623
3624   label = gen_label_rtx ();
3625   do_cmp_and_jump (result, const0_rtx, GE, mode, label);
3626
3627   temp = expand_binop (mode, sub_optab, result, const1_rtx, result,
3628                        0, OPTAB_LIB_WIDEN);
3629   masklow = (HOST_WIDE_INT) -1 << logd;
3630   maskhigh = -1;
3631   temp = expand_binop (mode, ior_optab, temp,
3632                        immed_double_const (masklow, maskhigh, mode),
3633                        result, 1, OPTAB_LIB_WIDEN);
3634   temp = expand_binop (mode, add_optab, temp, const1_rtx, result,
3635                        0, OPTAB_LIB_WIDEN);
3636   if (temp != result)
3637     emit_move_insn (result, temp);
3638   emit_label (label);
3639   return result;
3640 }
3641
3642 /* Expand signed division of OP0 by a power of two D in mode MODE.
3643    This routine is only called for positive values of D.  */
3644
3645 static rtx
3646 expand_sdiv_pow2 (enum machine_mode mode, rtx op0, HOST_WIDE_INT d)
3647 {
3648   rtx temp, label;
3649   tree shift;
3650   int logd;
3651
3652   logd = floor_log2 (d);
3653   shift = build_int_cst (NULL_TREE, logd);
3654
3655   if (d == 2 && BRANCH_COST >= 1)
3656     {
3657       temp = gen_reg_rtx (mode);
3658       temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, 1);
3659       temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3660                            0, OPTAB_LIB_WIDEN);
3661       return expand_shift (RSHIFT_EXPR, mode, temp, shift, NULL_RTX, 0);
3662     }
3663
3664 #ifdef HAVE_conditional_move
3665   if (BRANCH_COST >= 2)
3666     {
3667       rtx temp2;
3668
3669       /* ??? emit_conditional_move forces a stack adjustment via
3670          compare_from_rtx so, if the sequence is discarded, it will
3671          be lost.  Do it now instead.  */
3672       do_pending_stack_adjust ();
3673
3674       start_sequence ();
3675       temp2 = copy_to_mode_reg (mode, op0);
3676       temp = expand_binop (mode, add_optab, temp2, GEN_INT (d-1),
3677                            NULL_RTX, 0, OPTAB_LIB_WIDEN);
3678       temp = force_reg (mode, temp);
3679
3680       /* Construct "temp2 = (temp2 < 0) ? temp : temp2".  */
3681       temp2 = emit_conditional_move (temp2, LT, temp2, const0_rtx,
3682                                      mode, temp, temp2, mode, 0);
3683       if (temp2)
3684         {
3685           rtx seq = get_insns ();
3686           end_sequence ();
3687           emit_insn (seq);
3688           return expand_shift (RSHIFT_EXPR, mode, temp2, shift, NULL_RTX, 0);
3689         }
3690       end_sequence ();
3691     }
3692 #endif
3693
3694   if (BRANCH_COST >= 2)
3695     {
3696       int ushift = GET_MODE_BITSIZE (mode) - logd;
3697
3698       temp = gen_reg_rtx (mode);
3699       temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, -1);
3700       if (shift_cost[mode][ushift] > COSTS_N_INSNS (1))
3701         temp = expand_binop (mode, and_optab, temp, GEN_INT (d - 1),
3702                              NULL_RTX, 0, OPTAB_LIB_WIDEN);
3703       else
3704         temp = expand_shift (RSHIFT_EXPR, mode, temp,
3705                              build_int_cst (NULL_TREE, ushift),
3706                              NULL_RTX, 1);
3707       temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3708                            0, OPTAB_LIB_WIDEN);
3709       return expand_shift (RSHIFT_EXPR, mode, temp, shift, NULL_RTX, 0);
3710     }
3711
3712   label = gen_label_rtx ();
3713   temp = copy_to_mode_reg (mode, op0);
3714   do_cmp_and_jump (temp, const0_rtx, GE, mode, label);
3715   expand_inc (temp, GEN_INT (d - 1));
3716   emit_label (label);
3717   return expand_shift (RSHIFT_EXPR, mode, temp, shift, NULL_RTX, 0);
3718 }
3719 \f
3720 /* Emit the code to divide OP0 by OP1, putting the result in TARGET
3721    if that is convenient, and returning where the result is.
3722    You may request either the quotient or the remainder as the result;
3723    specify REM_FLAG nonzero to get the remainder.
3724
3725    CODE is the expression code for which kind of division this is;
3726    it controls how rounding is done.  MODE is the machine mode to use.
3727    UNSIGNEDP nonzero means do unsigned division.  */
3728
3729 /* ??? For CEIL_MOD_EXPR, can compute incorrect remainder with ANDI
3730    and then correct it by or'ing in missing high bits
3731    if result of ANDI is nonzero.
3732    For ROUND_MOD_EXPR, can use ANDI and then sign-extend the result.
3733    This could optimize to a bfexts instruction.
3734    But C doesn't use these operations, so their optimizations are
3735    left for later.  */
3736 /* ??? For modulo, we don't actually need the highpart of the first product,
3737    the low part will do nicely.  And for small divisors, the second multiply
3738    can also be a low-part only multiply or even be completely left out.
3739    E.g. to calculate the remainder of a division by 3 with a 32 bit
3740    multiply, multiply with 0x55555556 and extract the upper two bits;
3741    the result is exact for inputs up to 0x1fffffff.
3742    The input range can be reduced by using cross-sum rules.
3743    For odd divisors >= 3, the following table gives right shift counts
3744    so that if a number is shifted by an integer multiple of the given
3745    amount, the remainder stays the same:
3746    2, 4, 3, 6, 10, 12, 4, 8, 18, 6, 11, 20, 18, 0, 5, 10, 12, 0, 12, 20,
3747    14, 12, 23, 21, 8, 0, 20, 18, 0, 0, 6, 12, 0, 22, 0, 18, 20, 30, 0, 0,
3748    0, 8, 0, 11, 12, 10, 36, 0, 30, 0, 0, 12, 0, 0, 0, 0, 44, 12, 24, 0,
3749    20, 0, 7, 14, 0, 18, 36, 0, 0, 46, 60, 0, 42, 0, 15, 24, 20, 0, 0, 33,
3750    0, 20, 0, 0, 18, 0, 60, 0, 0, 0, 0, 0, 40, 18, 0, 0, 12
3751
3752    Cross-sum rules for even numbers can be derived by leaving as many bits
3753    to the right alone as the divisor has zeros to the right.
3754    E.g. if x is an unsigned 32 bit number:
3755    (x mod 12) == (((x & 1023) + ((x >> 8) & ~3)) * 0x15555558 >> 2 * 3) >> 28
3756    */
3757
3758 rtx
3759 expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode,
3760                rtx op0, rtx op1, rtx target, int unsignedp)
3761 {
3762   enum machine_mode compute_mode;
3763   rtx tquotient;
3764   rtx quotient = 0, remainder = 0;
3765   rtx last;
3766   int size;
3767   rtx insn, set;
3768   optab optab1, optab2;
3769   int op1_is_constant, op1_is_pow2 = 0;
3770   int max_cost, extra_cost;
3771   static HOST_WIDE_INT last_div_const = 0;
3772   static HOST_WIDE_INT ext_op1;
3773
3774   op1_is_constant = GET_CODE (op1) == CONST_INT;
3775   if (op1_is_constant)
3776     {
3777       ext_op1 = INTVAL (op1);
3778       if (unsignedp)
3779         ext_op1 &= GET_MODE_MASK (mode);
3780       op1_is_pow2 = ((EXACT_POWER_OF_2_OR_ZERO_P (ext_op1)
3781                      || (! unsignedp && EXACT_POWER_OF_2_OR_ZERO_P (-ext_op1))));
3782     }
3783
3784   /*
3785      This is the structure of expand_divmod:
3786
3787      First comes code to fix up the operands so we can perform the operations
3788      correctly and efficiently.
3789
3790      Second comes a switch statement with code specific for each rounding mode.
3791      For some special operands this code emits all RTL for the desired
3792      operation, for other cases, it generates only a quotient and stores it in
3793      QUOTIENT.  The case for trunc division/remainder might leave quotient = 0,
3794      to indicate that it has not done anything.
3795
3796      Last comes code that finishes the operation.  If QUOTIENT is set and
3797      REM_FLAG is set, the remainder is computed as OP0 - QUOTIENT * OP1.  If
3798      QUOTIENT is not set, it is computed using trunc rounding.
3799
3800      We try to generate special code for division and remainder when OP1 is a
3801      constant.  If |OP1| = 2**n we can use shifts and some other fast
3802      operations.  For other values of OP1, we compute a carefully selected
3803      fixed-point approximation m = 1/OP1, and generate code that multiplies OP0
3804      by m.
3805
3806      In all cases but EXACT_DIV_EXPR, this multiplication requires the upper
3807      half of the product.  Different strategies for generating the product are
3808      implemented in expand_mult_highpart.
3809
3810      If what we actually want is the remainder, we generate that by another
3811      by-constant multiplication and a subtraction.  */
3812
3813   /* We shouldn't be called with OP1 == const1_rtx, but some of the
3814      code below will malfunction if we are, so check here and handle
3815      the special case if so.  */
3816   if (op1 == const1_rtx)
3817     return rem_flag ? const0_rtx : op0;
3818
3819     /* When dividing by -1, we could get an overflow.
3820      negv_optab can handle overflows.  */
3821   if (! unsignedp && op1 == constm1_rtx)
3822     {
3823       if (rem_flag)
3824         return const0_rtx;
3825       return expand_unop (mode, flag_trapv && GET_MODE_CLASS(mode) == MODE_INT
3826                           ? negv_optab : neg_optab, op0, target, 0);
3827     }
3828
3829   if (target
3830       /* Don't use the function value register as a target
3831          since we have to read it as well as write it,
3832          and function-inlining gets confused by this.  */
3833       && ((REG_P (target) && REG_FUNCTION_VALUE_P (target))
3834           /* Don't clobber an operand while doing a multi-step calculation.  */
3835           || ((rem_flag || op1_is_constant)
3836               && (reg_mentioned_p (target, op0)
3837                   || (MEM_P (op0) && MEM_P (target))))
3838           || reg_mentioned_p (target, op1)
3839           || (MEM_P (op1) && MEM_P (target))))
3840     target = 0;
3841
3842   /* Get the mode in which to perform this computation.  Normally it will
3843      be MODE, but sometimes we can't do the desired operation in MODE.
3844      If so, pick a wider mode in which we can do the operation.  Convert
3845      to that mode at the start to avoid repeated conversions.
3846
3847      First see what operations we need.  These depend on the expression
3848      we are evaluating.  (We assume that divxx3 insns exist under the
3849      same conditions that modxx3 insns and that these insns don't normally
3850      fail.  If these assumptions are not correct, we may generate less
3851      efficient code in some cases.)
3852
3853      Then see if we find a mode in which we can open-code that operation
3854      (either a division, modulus, or shift).  Finally, check for the smallest
3855      mode for which we can do the operation with a library call.  */
3856
3857   /* We might want to refine this now that we have division-by-constant
3858      optimization.  Since expand_mult_highpart tries so many variants, it is
3859      not straightforward to generalize this.  Maybe we should make an array
3860      of possible modes in init_expmed?  Save this for GCC 2.7.  */
3861
3862   optab1 = ((op1_is_pow2 && op1 != const0_rtx)
3863             ? (unsignedp ? lshr_optab : ashr_optab)
3864             : (unsignedp ? udiv_optab : sdiv_optab));
3865   optab2 = ((op1_is_pow2 && op1 != const0_rtx)
3866             ? optab1
3867             : (unsignedp ? udivmod_optab : sdivmod_optab));
3868
3869   for (compute_mode = mode; compute_mode != VOIDmode;
3870        compute_mode = GET_MODE_WIDER_MODE (compute_mode))
3871     if (optab1->handlers[compute_mode].insn_code != CODE_FOR_nothing
3872         || optab2->handlers[compute_mode].insn_code != CODE_FOR_nothing)
3873       break;
3874
3875   if (compute_mode == VOIDmode)
3876     for (compute_mode = mode; compute_mode != VOIDmode;
3877          compute_mode = GET_MODE_WIDER_MODE (compute_mode))
3878       if (optab1->handlers[compute_mode].libfunc
3879           || optab2->handlers[compute_mode].libfunc)
3880         break;
3881
3882   /* If we still couldn't find a mode, use MODE, but expand_binop will
3883      probably die.  */
3884   if (compute_mode == VOIDmode)
3885     compute_mode = mode;
3886
3887   if (target && GET_MODE (target) == compute_mode)
3888     tquotient = target;
3889   else
3890     tquotient = gen_reg_rtx (compute_mode);
3891
3892   size = GET_MODE_BITSIZE (compute_mode);
3893 #if 0
3894   /* It should be possible to restrict the precision to GET_MODE_BITSIZE
3895      (mode), and thereby get better code when OP1 is a constant.  Do that
3896      later.  It will require going over all usages of SIZE below.  */
3897   size = GET_MODE_BITSIZE (mode);
3898 #endif
3899
3900   /* Only deduct something for a REM if the last divide done was
3901      for a different constant.   Then set the constant of the last
3902      divide.  */
3903   max_cost = div_cost[compute_mode]
3904     - (rem_flag && ! (last_div_const != 0 && op1_is_constant
3905                       && INTVAL (op1) == last_div_const)
3906        ? mul_cost[compute_mode] + add_cost[compute_mode]
3907        : 0);
3908
3909   last_div_const = ! rem_flag && op1_is_constant ? INTVAL (op1) : 0;
3910
3911   /* Now convert to the best mode to use.  */
3912   if (compute_mode != mode)
3913     {
3914       op0 = convert_modes (compute_mode, mode, op0, unsignedp);
3915       op1 = convert_modes (compute_mode, mode, op1, unsignedp);
3916
3917       /* convert_modes may have placed op1 into a register, so we
3918          must recompute the following.  */
3919       op1_is_constant = GET_CODE (op1) == CONST_INT;
3920       op1_is_pow2 = (op1_is_constant
3921                      && ((EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
3922                           || (! unsignedp
3923                               && EXACT_POWER_OF_2_OR_ZERO_P (-INTVAL (op1)))))) ;
3924     }
3925
3926   /* If one of the operands is a volatile MEM, copy it into a register.  */
3927
3928   if (MEM_P (op0) && MEM_VOLATILE_P (op0))
3929     op0 = force_reg (compute_mode, op0);
3930   if (MEM_P (op1) && MEM_VOLATILE_P (op1))
3931     op1 = force_reg (compute_mode, op1);
3932
3933   /* If we need the remainder or if OP1 is constant, we need to
3934      put OP0 in a register in case it has any queued subexpressions.  */
3935   if (rem_flag || op1_is_constant)
3936     op0 = force_reg (compute_mode, op0);
3937
3938   last = get_last_insn ();
3939
3940   /* Promote floor rounding to trunc rounding for unsigned operations.  */
3941   if (unsignedp)
3942     {
3943       if (code == FLOOR_DIV_EXPR)
3944         code = TRUNC_DIV_EXPR;
3945       if (code == FLOOR_MOD_EXPR)
3946         code = TRUNC_MOD_EXPR;
3947       if (code == EXACT_DIV_EXPR && op1_is_pow2)
3948         code = TRUNC_DIV_EXPR;
3949     }
3950
3951   if (op1 != const0_rtx)
3952     switch (code)
3953       {
3954       case TRUNC_MOD_EXPR:
3955       case TRUNC_DIV_EXPR:
3956         if (op1_is_constant)
3957           {
3958             if (unsignedp)
3959               {
3960                 unsigned HOST_WIDE_INT mh;
3961                 int pre_shift, post_shift;
3962                 int dummy;
3963                 rtx ml;
3964                 unsigned HOST_WIDE_INT d = (INTVAL (op1)
3965                                             & GET_MODE_MASK (compute_mode));
3966
3967                 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
3968                   {
3969                     pre_shift = floor_log2 (d);
3970                     if (rem_flag)
3971                       {
3972                         remainder
3973                           = expand_binop (compute_mode, and_optab, op0,
3974                                           GEN_INT (((HOST_WIDE_INT) 1 << pre_shift) - 1),
3975                                           remainder, 1,
3976                                           OPTAB_LIB_WIDEN);
3977                         if (remainder)
3978                           return gen_lowpart (mode, remainder);
3979                       }
3980                     quotient = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3981                                              build_int_cst (NULL_TREE,
3982                                                             pre_shift),
3983                                              tquotient, 1);
3984                   }
3985                 else if (size <= HOST_BITS_PER_WIDE_INT)
3986                   {
3987                     if (d >= ((unsigned HOST_WIDE_INT) 1 << (size - 1)))
3988                       {
3989                         /* Most significant bit of divisor is set; emit an scc
3990                            insn.  */
3991                         quotient = emit_store_flag (tquotient, GEU, op0, op1,
3992                                                     compute_mode, 1, 1);
3993                         if (quotient == 0)
3994                           goto fail1;
3995                       }
3996                     else
3997                       {
3998                         /* Find a suitable multiplier and right shift count
3999                            instead of multiplying with D.  */
4000
4001                         mh = choose_multiplier (d, size, size,
4002                                                 &ml, &post_shift, &dummy);
4003
4004                         /* If the suggested multiplier is more than SIZE bits,
4005                            we can do better for even divisors, using an
4006                            initial right shift.  */
4007                         if (mh != 0 && (d & 1) == 0)
4008                           {
4009                             pre_shift = floor_log2 (d & -d);
4010                             mh = choose_multiplier (d >> pre_shift, size,
4011                                                     size - pre_shift,
4012                                                     &ml, &post_shift, &dummy);
4013                             gcc_assert (!mh);
4014                           }
4015                         else
4016                           pre_shift = 0;
4017
4018                         if (mh != 0)
4019                           {
4020                             rtx t1, t2, t3, t4;
4021
4022                             if (post_shift - 1 >= BITS_PER_WORD)
4023                               goto fail1;
4024
4025                             extra_cost
4026                               = (shift_cost[compute_mode][post_shift - 1]
4027                                  + shift_cost[compute_mode][1]
4028                                  + 2 * add_cost[compute_mode]);
4029                             t1 = expand_mult_highpart (compute_mode, op0, ml,
4030                                                        NULL_RTX, 1,
4031                                                        max_cost - extra_cost);
4032                             if (t1 == 0)
4033                               goto fail1;
4034                             t2 = force_operand (gen_rtx_MINUS (compute_mode,
4035                                                                op0, t1),
4036                                                 NULL_RTX);
4037                             t3 = expand_shift
4038                               (RSHIFT_EXPR, compute_mode, t2,
4039                                build_int_cst (NULL_TREE, 1),
4040                                NULL_RTX,1);
4041                             t4 = force_operand (gen_rtx_PLUS (compute_mode,
4042                                                               t1, t3),
4043                                                 NULL_RTX);
4044                             quotient = expand_shift
4045                               (RSHIFT_EXPR, compute_mode, t4,
4046                                build_int_cst (NULL_TREE, post_shift - 1),
4047                                tquotient, 1);
4048                           }
4049                         else
4050                           {
4051                             rtx t1, t2;
4052
4053                             if (pre_shift >= BITS_PER_WORD
4054                                 || post_shift >= BITS_PER_WORD)
4055                               goto fail1;
4056
4057                             t1 = expand_shift
4058                               (RSHIFT_EXPR, compute_mode, op0,
4059                                build_int_cst (NULL_TREE, pre_shift),
4060                                NULL_RTX, 1);
4061                             extra_cost
4062                               = (shift_cost[compute_mode][pre_shift]
4063                                  + shift_cost[compute_mode][post_shift]);
4064                             t2 = expand_mult_highpart (compute_mode, t1, ml,
4065                                                        NULL_RTX, 1,
4066                                                        max_cost - extra_cost);
4067                             if (t2 == 0)
4068                               goto fail1;
4069                             quotient = expand_shift
4070                               (RSHIFT_EXPR, compute_mode, t2,
4071                                build_int_cst (NULL_TREE, post_shift),
4072                                tquotient, 1);
4073                           }
4074                       }
4075                   }
4076                 else            /* Too wide mode to use tricky code */
4077                   break;
4078
4079                 insn = get_last_insn ();
4080                 if (insn != last
4081                     && (set = single_set (insn)) != 0
4082                     && SET_DEST (set) == quotient)
4083                   set_unique_reg_note (insn,
4084                                        REG_EQUAL,
4085                                        gen_rtx_UDIV (compute_mode, op0, op1));
4086               }
4087             else                /* TRUNC_DIV, signed */
4088               {
4089                 unsigned HOST_WIDE_INT ml;
4090                 int lgup, post_shift;
4091                 rtx mlr;
4092                 HOST_WIDE_INT d = INTVAL (op1);
4093                 unsigned HOST_WIDE_INT abs_d = d >= 0 ? d : -d;
4094
4095                 /* n rem d = n rem -d */
4096                 if (rem_flag && d < 0)
4097                   {
4098                     d = abs_d;
4099                     op1 = gen_int_mode (abs_d, compute_mode);
4100                   }
4101
4102                 if (d == 1)
4103                   quotient = op0;
4104                 else if (d == -1)
4105                   quotient = expand_unop (compute_mode, neg_optab, op0,
4106                                           tquotient, 0);
4107                 else if (abs_d == (unsigned HOST_WIDE_INT) 1 << (size - 1))
4108                   {
4109                     /* This case is not handled correctly below.  */
4110                     quotient = emit_store_flag (tquotient, EQ, op0, op1,
4111                                                 compute_mode, 1, 1);
4112                     if (quotient == 0)
4113                       goto fail1;
4114                   }
4115                 else if (EXACT_POWER_OF_2_OR_ZERO_P (d)
4116                          && (rem_flag ? smod_pow2_cheap[compute_mode]
4117                                       : sdiv_pow2_cheap[compute_mode])
4118                          /* We assume that cheap metric is true if the
4119                             optab has an expander for this mode.  */
4120                          && (((rem_flag ? smod_optab : sdiv_optab)
4121                               ->handlers[compute_mode].insn_code
4122                               != CODE_FOR_nothing)
4123                              || (sdivmod_optab->handlers[compute_mode]
4124                                  .insn_code != CODE_FOR_nothing)))
4125                   ;
4126                 else if (EXACT_POWER_OF_2_OR_ZERO_P (abs_d))
4127                   {
4128                     if (rem_flag)
4129                       {
4130                         remainder = expand_smod_pow2 (compute_mode, op0, d);
4131                         if (remainder)
4132                           return gen_lowpart (mode, remainder);
4133                       }
4134
4135                     if (sdiv_pow2_cheap[compute_mode]
4136                         && ((sdiv_optab->handlers[compute_mode].insn_code
4137                              != CODE_FOR_nothing)
4138                             || (sdivmod_optab->handlers[compute_mode].insn_code
4139                                 != CODE_FOR_nothing)))
4140                       quotient = expand_divmod (0, TRUNC_DIV_EXPR,
4141                                                 compute_mode, op0,
4142                                                 gen_int_mode (abs_d,
4143                                                               compute_mode),
4144                                                 NULL_RTX, 0);
4145                     else
4146                       quotient = expand_sdiv_pow2 (compute_mode, op0, abs_d);
4147
4148                     /* We have computed OP0 / abs(OP1).  If OP1 is negative,
4149                        negate the quotient.  */
4150                     if (d < 0)
4151                       {
4152                         insn = get_last_insn ();
4153                         if (insn != last
4154                             && (set = single_set (insn)) != 0
4155                             && SET_DEST (set) == quotient
4156                             && abs_d < ((unsigned HOST_WIDE_INT) 1
4157                                         << (HOST_BITS_PER_WIDE_INT - 1)))
4158                           set_unique_reg_note (insn,
4159                                                REG_EQUAL,
4160                                                gen_rtx_DIV (compute_mode,
4161                                                             op0,
4162                                                             GEN_INT
4163                                                             (trunc_int_for_mode
4164                                                              (abs_d,
4165                                                               compute_mode))));
4166
4167                         quotient = expand_unop (compute_mode, neg_optab,
4168                                                 quotient, quotient, 0);
4169                       }
4170                   }
4171                 else if (size <= HOST_BITS_PER_WIDE_INT)
4172                   {
4173                     choose_multiplier (abs_d, size, size - 1,
4174                                        &mlr, &post_shift, &lgup);
4175                     ml = (unsigned HOST_WIDE_INT) INTVAL (mlr);
4176                     if (ml < (unsigned HOST_WIDE_INT) 1 << (size - 1))
4177                       {
4178                         rtx t1, t2, t3;
4179
4180                         if (post_shift >= BITS_PER_WORD
4181                             || size - 1 >= BITS_PER_WORD)
4182                           goto fail1;
4183
4184                         extra_cost = (shift_cost[compute_mode][post_shift]
4185                                       + shift_cost[compute_mode][size - 1]
4186                                       + add_cost[compute_mode]);
4187                         t1 = expand_mult_highpart (compute_mode, op0, mlr,
4188                                                    NULL_RTX, 0,
4189                                                    max_cost - extra_cost);
4190                         if (t1 == 0)
4191                           goto fail1;
4192                         t2 = expand_shift
4193                           (RSHIFT_EXPR, compute_mode, t1,
4194                            build_int_cst (NULL_TREE, post_shift),
4195                            NULL_RTX, 0);
4196                         t3 = expand_shift
4197                           (RSHIFT_EXPR, compute_mode, op0,
4198                            build_int_cst (NULL_TREE, size - 1),
4199                            NULL_RTX, 0);
4200                         if (d < 0)
4201                           quotient
4202                             = force_operand (gen_rtx_MINUS (compute_mode,
4203                                                             t3, t2),
4204                                              tquotient);
4205                         else
4206                           quotient
4207                             = force_operand (gen_rtx_MINUS (compute_mode,
4208                                                             t2, t3),
4209                                              tquotient);
4210                       }
4211                     else
4212                       {
4213                         rtx t1, t2, t3, t4;
4214
4215                         if (post_shift >= BITS_PER_WORD
4216                             || size - 1 >= BITS_PER_WORD)
4217                           goto fail1;
4218
4219                         ml |= (~(unsigned HOST_WIDE_INT) 0) << (size - 1);
4220                         mlr = gen_int_mode (ml, compute_mode);
4221                         extra_cost = (shift_cost[compute_mode][post_shift]
4222                                       + shift_cost[compute_mode][size - 1]
4223                                       + 2 * add_cost[compute_mode]);
4224                         t1 = expand_mult_highpart (compute_mode, op0, mlr,
4225                                                    NULL_RTX, 0,
4226                                                    max_cost - extra_cost);
4227                         if (t1 == 0)
4228                           goto fail1;
4229                         t2 = force_operand (gen_rtx_PLUS (compute_mode,
4230                                                           t1, op0),
4231                                             NULL_RTX);
4232                         t3 = expand_shift
4233                           (RSHIFT_EXPR, compute_mode, t2,
4234                            build_int_cst (NULL_TREE, post_shift),
4235                            NULL_RTX, 0);
4236                         t4 = expand_shift
4237                           (RSHIFT_EXPR, compute_mode, op0,
4238                            build_int_cst (NULL_TREE, size - 1),
4239                            NULL_RTX, 0);
4240                         if (d < 0)
4241                           quotient
4242                             = force_operand (gen_rtx_MINUS (compute_mode,
4243                                                             t4, t3),
4244                                              tquotient);
4245                         else
4246                           quotient
4247                             = force_operand (gen_rtx_MINUS (compute_mode,
4248                                                             t3, t4),
4249                                              tquotient);
4250                       }
4251                   }
4252                 else            /* Too wide mode to use tricky code */
4253                   break;
4254
4255                 insn = get_last_insn ();
4256                 if (insn != last
4257                     && (set = single_set (insn)) != 0
4258                     && SET_DEST (set) == quotient)
4259                   set_unique_reg_note (insn,
4260                                        REG_EQUAL,
4261                                        gen_rtx_DIV (compute_mode, op0, op1));
4262               }
4263             break;
4264           }
4265       fail1:
4266         delete_insns_since (last);
4267         break;
4268
4269       case FLOOR_DIV_EXPR:
4270       case FLOOR_MOD_EXPR:
4271       /* We will come here only for signed operations.  */
4272         if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4273           {
4274             unsigned HOST_WIDE_INT mh;
4275             int pre_shift, lgup, post_shift;
4276             HOST_WIDE_INT d = INTVAL (op1);
4277             rtx ml;
4278
4279             if (d > 0)
4280               {
4281                 /* We could just as easily deal with negative constants here,
4282                    but it does not seem worth the trouble for GCC 2.6.  */
4283                 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4284                   {
4285                     pre_shift = floor_log2 (d);
4286                     if (rem_flag)
4287                       {
4288                         remainder = expand_binop (compute_mode, and_optab, op0,
4289                                                   GEN_INT (((HOST_WIDE_INT) 1 << pre_shift) - 1),
4290                                                   remainder, 0, OPTAB_LIB_WIDEN);
4291                         if (remainder)
4292                           return gen_lowpart (mode, remainder);
4293                       }
4294                     quotient = expand_shift
4295                       (RSHIFT_EXPR, compute_mode, op0,
4296                        build_int_cst (NULL_TREE, pre_shift),
4297                        tquotient, 0);
4298                   }
4299                 else
4300                   {
4301                     rtx t1, t2, t3, t4;
4302
4303                     mh = choose_multiplier (d, size, size - 1,
4304                                             &ml, &post_shift, &lgup);
4305                     gcc_assert (!mh);
4306
4307                     if (post_shift < BITS_PER_WORD
4308                         && size - 1 < BITS_PER_WORD)
4309                       {
4310                         t1 = expand_shift
4311                           (RSHIFT_EXPR, compute_mode, op0,
4312                            build_int_cst (NULL_TREE, size - 1),
4313                            NULL_RTX, 0);
4314                         t2 = expand_binop (compute_mode, xor_optab, op0, t1,
4315                                            NULL_RTX, 0, OPTAB_WIDEN);
4316                         extra_cost = (shift_cost[compute_mode][post_shift]
4317                                       + shift_cost[compute_mode][size - 1]
4318                                       + 2 * add_cost[compute_mode]);
4319                         t3 = expand_mult_highpart (compute_mode, t2, ml,
4320                                                    NULL_RTX, 1,
4321                                                    max_cost - extra_cost);
4322                         if (t3 != 0)
4323                           {
4324                             t4 = expand_shift
4325                               (RSHIFT_EXPR, compute_mode, t3,
4326                                build_int_cst (NULL_TREE, post_shift),
4327                                NULL_RTX, 1);
4328                             quotient = expand_binop (compute_mode, xor_optab,
4329                                                      t4, t1, tquotient, 0,
4330                                                      OPTAB_WIDEN);
4331                           }
4332                       }
4333                   }
4334               }
4335             else
4336               {
4337                 rtx nsign, t1, t2, t3, t4;
4338                 t1 = force_operand (gen_rtx_PLUS (compute_mode,
4339                                                   op0, constm1_rtx), NULL_RTX);
4340                 t2 = expand_binop (compute_mode, ior_optab, op0, t1, NULL_RTX,
4341                                    0, OPTAB_WIDEN);
4342                 nsign = expand_shift
4343                   (RSHIFT_EXPR, compute_mode, t2,
4344                    build_int_cst (NULL_TREE, size - 1),
4345                    NULL_RTX, 0);
4346                 t3 = force_operand (gen_rtx_MINUS (compute_mode, t1, nsign),
4347                                     NULL_RTX);
4348                 t4 = expand_divmod (0, TRUNC_DIV_EXPR, compute_mode, t3, op1,
4349                                     NULL_RTX, 0);
4350                 if (t4)
4351                   {
4352                     rtx t5;
4353                     t5 = expand_unop (compute_mode, one_cmpl_optab, nsign,
4354                                       NULL_RTX, 0);
4355                     quotient = force_operand (gen_rtx_PLUS (compute_mode,
4356                                                             t4, t5),
4357                                               tquotient);
4358                   }
4359               }
4360           }
4361
4362         if (quotient != 0)
4363           break;
4364         delete_insns_since (last);
4365
4366         /* Try using an instruction that produces both the quotient and
4367            remainder, using truncation.  We can easily compensate the quotient
4368            or remainder to get floor rounding, once we have the remainder.
4369            Notice that we compute also the final remainder value here,
4370            and return the result right away.  */
4371         if (target == 0 || GET_MODE (target) != compute_mode)
4372           target = gen_reg_rtx (compute_mode);
4373
4374         if (rem_flag)
4375           {
4376             remainder
4377               = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4378             quotient = gen_reg_rtx (compute_mode);
4379           }
4380         else
4381           {
4382             quotient
4383               = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4384             remainder = gen_reg_rtx (compute_mode);
4385           }
4386
4387         if (expand_twoval_binop (sdivmod_optab, op0, op1,
4388                                  quotient, remainder, 0))
4389           {
4390             /* This could be computed with a branch-less sequence.
4391                Save that for later.  */
4392             rtx tem;
4393             rtx label = gen_label_rtx ();
4394             do_cmp_and_jump (remainder, const0_rtx, EQ, compute_mode, label);
4395             tem = expand_binop (compute_mode, xor_optab, op0, op1,
4396                                 NULL_RTX, 0, OPTAB_WIDEN);
4397             do_cmp_and_jump (tem, const0_rtx, GE, compute_mode, label);
4398             expand_dec (quotient, const1_rtx);
4399             expand_inc (remainder, op1);
4400             emit_label (label);
4401             return gen_lowpart (mode, rem_flag ? remainder : quotient);
4402           }
4403
4404         /* No luck with division elimination or divmod.  Have to do it
4405            by conditionally adjusting op0 *and* the result.  */
4406         {
4407           rtx label1, label2, label3, label4, label5;
4408           rtx adjusted_op0;
4409           rtx tem;
4410
4411           quotient = gen_reg_rtx (compute_mode);
4412           adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4413           label1 = gen_label_rtx ();
4414           label2 = gen_label_rtx ();
4415           label3 = gen_label_rtx ();
4416           label4 = gen_label_rtx ();
4417           label5 = gen_label_rtx ();
4418           do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4419           do_cmp_and_jump (adjusted_op0, const0_rtx, LT, compute_mode, label1);
4420           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4421                               quotient, 0, OPTAB_LIB_WIDEN);
4422           if (tem != quotient)
4423             emit_move_insn (quotient, tem);
4424           emit_jump_insn (gen_jump (label5));
4425           emit_barrier ();
4426           emit_label (label1);
4427           expand_inc (adjusted_op0, const1_rtx);
4428           emit_jump_insn (gen_jump (label4));
4429           emit_barrier ();
4430           emit_label (label2);
4431           do_cmp_and_jump (adjusted_op0, const0_rtx, GT, compute_mode, label3);
4432           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4433                               quotient, 0, OPTAB_LIB_WIDEN);
4434           if (tem != quotient)
4435             emit_move_insn (quotient, tem);
4436           emit_jump_insn (gen_jump (label5));
4437           emit_barrier ();
4438           emit_label (label3);
4439           expand_dec (adjusted_op0, const1_rtx);
4440           emit_label (label4);
4441           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4442                               quotient, 0, OPTAB_LIB_WIDEN);
4443           if (tem != quotient)
4444             emit_move_insn (quotient, tem);
4445           expand_dec (quotient, const1_rtx);
4446           emit_label (label5);
4447         }
4448         break;
4449
4450       case CEIL_DIV_EXPR:
4451       case CEIL_MOD_EXPR:
4452         if (unsignedp)
4453           {
4454             if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1)))
4455               {
4456                 rtx t1, t2, t3;
4457                 unsigned HOST_WIDE_INT d = INTVAL (op1);
4458                 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4459                                    build_int_cst (NULL_TREE, floor_log2 (d)),
4460                                    tquotient, 1);
4461                 t2 = expand_binop (compute_mode, and_optab, op0,
4462                                    GEN_INT (d - 1),
4463                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4464                 t3 = gen_reg_rtx (compute_mode);
4465                 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4466                                       compute_mode, 1, 1);
4467                 if (t3 == 0)
4468                   {
4469                     rtx lab;
4470                     lab = gen_label_rtx ();
4471                     do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4472                     expand_inc (t1, const1_rtx);
4473                     emit_label (lab);
4474                     quotient = t1;
4475                   }
4476                 else
4477                   quotient = force_operand (gen_rtx_PLUS (compute_mode,
4478                                                           t1, t3),
4479                                             tquotient);
4480                 break;
4481               }
4482
4483             /* Try using an instruction that produces both the quotient and
4484                remainder, using truncation.  We can easily compensate the
4485                quotient or remainder to get ceiling rounding, once we have the
4486                remainder.  Notice that we compute also the final remainder
4487                value here, and return the result right away.  */
4488             if (target == 0 || GET_MODE (target) != compute_mode)
4489               target = gen_reg_rtx (compute_mode);
4490
4491             if (rem_flag)
4492               {
4493                 remainder = (REG_P (target)
4494                              ? target : gen_reg_rtx (compute_mode));
4495                 quotient = gen_reg_rtx (compute_mode);
4496               }
4497             else
4498               {
4499                 quotient = (REG_P (target)
4500                             ? target : gen_reg_rtx (compute_mode));
4501                 remainder = gen_reg_rtx (compute_mode);
4502               }
4503
4504             if (expand_twoval_binop (udivmod_optab, op0, op1, quotient,
4505                                      remainder, 1))
4506               {
4507                 /* This could be computed with a branch-less sequence.
4508                    Save that for later.  */
4509                 rtx label = gen_label_rtx ();
4510                 do_cmp_and_jump (remainder, const0_rtx, EQ,
4511                                  compute_mode, label);
4512                 expand_inc (quotient, const1_rtx);
4513                 expand_dec (remainder, op1);
4514                 emit_label (label);
4515                 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4516               }
4517
4518             /* No luck with division elimination or divmod.  Have to do it
4519                by conditionally adjusting op0 *and* the result.  */
4520             {
4521               rtx label1, label2;
4522               rtx adjusted_op0, tem;
4523
4524               quotient = gen_reg_rtx (compute_mode);
4525               adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4526               label1 = gen_label_rtx ();
4527               label2 = gen_label_rtx ();
4528               do_cmp_and_jump (adjusted_op0, const0_rtx, NE,
4529                                compute_mode, label1);
4530               emit_move_insn  (quotient, const0_rtx);
4531               emit_jump_insn (gen_jump (label2));
4532               emit_barrier ();
4533               emit_label (label1);
4534               expand_dec (adjusted_op0, const1_rtx);
4535               tem = expand_binop (compute_mode, udiv_optab, adjusted_op0, op1,
4536                                   quotient, 1, OPTAB_LIB_WIDEN);
4537               if (tem != quotient)
4538                 emit_move_insn (quotient, tem);
4539               expand_inc (quotient, const1_rtx);
4540               emit_label (label2);
4541             }
4542           }
4543         else /* signed */
4544           {
4545             if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4546                 && INTVAL (op1) >= 0)
4547               {
4548                 /* This is extremely similar to the code for the unsigned case
4549                    above.  For 2.7 we should merge these variants, but for
4550                    2.6.1 I don't want to touch the code for unsigned since that
4551                    get used in C.  The signed case will only be used by other
4552                    languages (Ada).  */
4553
4554                 rtx t1, t2, t3;
4555                 unsigned HOST_WIDE_INT d = INTVAL (op1);
4556                 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4557                                    build_int_cst (NULL_TREE, floor_log2 (d)),
4558                                    tquotient, 0);
4559                 t2 = expand_binop (compute_mode, and_optab, op0,
4560                                    GEN_INT (d - 1),
4561                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4562                 t3 = gen_reg_rtx (compute_mode);
4563                 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4564                                       compute_mode, 1, 1);
4565                 if (t3 == 0)
4566                   {
4567                     rtx lab;
4568                     lab = gen_label_rtx ();
4569                     do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4570                     expand_inc (t1, const1_rtx);
4571                     emit_label (lab);
4572                     quotient = t1;
4573                   }
4574                 else
4575                   quotient = force_operand (gen_rtx_PLUS (compute_mode,
4576                                                           t1, t3),
4577                                             tquotient);
4578                 break;
4579               }
4580
4581             /* Try using an instruction that produces both the quotient and
4582                remainder, using truncation.  We can easily compensate the
4583                quotient or remainder to get ceiling rounding, once we have the
4584                remainder.  Notice that we compute also the final remainder
4585                value here, and return the result right away.  */
4586             if (target == 0 || GET_MODE (target) != compute_mode)
4587               target = gen_reg_rtx (compute_mode);
4588             if (rem_flag)
4589               {
4590                 remainder= (REG_P (target)
4591                             ? target : gen_reg_rtx (compute_mode));
4592                 quotient = gen_reg_rtx (compute_mode);
4593               }
4594             else
4595               {
4596                 quotient = (REG_P (target)
4597                             ? target : gen_reg_rtx (compute_mode));
4598                 remainder = gen_reg_rtx (compute_mode);
4599               }
4600
4601             if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient,
4602                                      remainder, 0))
4603               {
4604                 /* This could be computed with a branch-less sequence.
4605                    Save that for later.  */
4606                 rtx tem;
4607                 rtx label = gen_label_rtx ();
4608                 do_cmp_and_jump (remainder, const0_rtx, EQ,
4609                                  compute_mode, label);
4610                 tem = expand_binop (compute_mode, xor_optab, op0, op1,
4611                                     NULL_RTX, 0, OPTAB_WIDEN);
4612                 do_cmp_and_jump (tem, const0_rtx, LT, compute_mode, label);
4613                 expand_inc (quotient, const1_rtx);
4614                 expand_dec (remainder, op1);
4615                 emit_label (label);
4616                 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4617               }
4618
4619             /* No luck with division elimination or divmod.  Have to do it
4620                by conditionally adjusting op0 *and* the result.  */
4621             {
4622               rtx label1, label2, label3, label4, label5;
4623               rtx adjusted_op0;
4624               rtx tem;
4625
4626               quotient = gen_reg_rtx (compute_mode);
4627               adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4628               label1 = gen_label_rtx ();
4629               label2 = gen_label_rtx ();
4630               label3 = gen_label_rtx ();
4631               label4 = gen_label_rtx ();
4632               label5 = gen_label_rtx ();
4633               do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4634               do_cmp_and_jump (adjusted_op0, const0_rtx, GT,
4635                                compute_mode, label1);
4636               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4637                                   quotient, 0, OPTAB_LIB_WIDEN);
4638               if (tem != quotient)
4639                 emit_move_insn (quotient, tem);
4640               emit_jump_insn (gen_jump (label5));
4641               emit_barrier ();
4642               emit_label (label1);
4643               expand_dec (adjusted_op0, const1_rtx);
4644               emit_jump_insn (gen_jump (label4));
4645               emit_barrier ();
4646               emit_label (label2);
4647               do_cmp_and_jump (adjusted_op0, const0_rtx, LT,
4648                                compute_mode, label3);
4649               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4650                                   quotient, 0, OPTAB_LIB_WIDEN);
4651               if (tem != quotient)
4652                 emit_move_insn (quotient, tem);
4653               emit_jump_insn (gen_jump (label5));
4654               emit_barrier ();
4655               emit_label (label3);
4656               expand_inc (adjusted_op0, const1_rtx);
4657               emit_label (label4);
4658               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4659                                   quotient, 0, OPTAB_LIB_WIDEN);
4660               if (tem != quotient)
4661                 emit_move_insn (quotient, tem);
4662               expand_inc (quotient, const1_rtx);
4663               emit_label (label5);
4664             }
4665           }
4666         break;
4667
4668       case EXACT_DIV_EXPR:
4669         if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4670           {
4671             HOST_WIDE_INT d = INTVAL (op1);
4672             unsigned HOST_WIDE_INT ml;
4673             int pre_shift;
4674             rtx t1;
4675
4676             pre_shift = floor_log2 (d & -d);
4677             ml = invert_mod2n (d >> pre_shift, size);
4678             t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4679                                build_int_cst (NULL_TREE, pre_shift),
4680                                NULL_RTX, unsignedp);
4681             quotient = expand_mult (compute_mode, t1,
4682                                     gen_int_mode (ml, compute_mode),
4683                                     NULL_RTX, 1);
4684
4685             insn = get_last_insn ();
4686             set_unique_reg_note (insn,
4687                                  REG_EQUAL,
4688                                  gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
4689                                                  compute_mode,
4690                                                  op0, op1));
4691           }
4692         break;
4693
4694       case ROUND_DIV_EXPR:
4695       case ROUND_MOD_EXPR:
4696         if (unsignedp)
4697           {
4698             rtx tem;
4699             rtx label;
4700             label = gen_label_rtx ();
4701             quotient = gen_reg_rtx (compute_mode);
4702             remainder = gen_reg_rtx (compute_mode);
4703             if (expand_twoval_binop (udivmod_optab, op0, op1, quotient, remainder, 1) == 0)
4704               {
4705                 rtx tem;
4706                 quotient = expand_binop (compute_mode, udiv_optab, op0, op1,
4707                                          quotient, 1, OPTAB_LIB_WIDEN);
4708                 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 1);
4709                 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4710                                           remainder, 1, OPTAB_LIB_WIDEN);
4711               }
4712             tem = plus_constant (op1, -1);
4713             tem = expand_shift (RSHIFT_EXPR, compute_mode, tem,
4714                                 build_int_cst (NULL_TREE, 1),
4715                                 NULL_RTX, 1);
4716             do_cmp_and_jump (remainder, tem, LEU, compute_mode, label);
4717             expand_inc (quotient, const1_rtx);
4718             expand_dec (remainder, op1);
4719             emit_label (label);
4720           }
4721         else
4722           {
4723             rtx abs_rem, abs_op1, tem, mask;
4724             rtx label;
4725             label = gen_label_rtx ();
4726             quotient = gen_reg_rtx (compute_mode);
4727             remainder = gen_reg_rtx (compute_mode);
4728             if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient, remainder, 0) == 0)
4729               {
4730                 rtx tem;
4731                 quotient = expand_binop (compute_mode, sdiv_optab, op0, op1,
4732                                          quotient, 0, OPTAB_LIB_WIDEN);
4733                 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 0);
4734                 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4735                                           remainder, 0, OPTAB_LIB_WIDEN);
4736               }
4737             abs_rem = expand_abs (compute_mode, remainder, NULL_RTX, 1, 0);
4738             abs_op1 = expand_abs (compute_mode, op1, NULL_RTX, 1, 0);
4739             tem = expand_shift (LSHIFT_EXPR, compute_mode, abs_rem,
4740                                 build_int_cst (NULL_TREE, 1),
4741                                 NULL_RTX, 1);
4742             do_cmp_and_jump (tem, abs_op1, LTU, compute_mode, label);
4743             tem = expand_binop (compute_mode, xor_optab, op0, op1,
4744                                 NULL_RTX, 0, OPTAB_WIDEN);
4745             mask = expand_shift (RSHIFT_EXPR, compute_mode, tem,
4746                                  build_int_cst (NULL_TREE, size - 1),
4747                                  NULL_RTX, 0);
4748             tem = expand_binop (compute_mode, xor_optab, mask, const1_rtx,
4749                                 NULL_RTX, 0, OPTAB_WIDEN);
4750             tem = expand_binop (compute_mode, sub_optab, tem, mask,
4751                                 NULL_RTX, 0, OPTAB_WIDEN);
4752             expand_inc (quotient, tem);
4753             tem = expand_binop (compute_mode, xor_optab, mask, op1,
4754                                 NULL_RTX, 0, OPTAB_WIDEN);
4755             tem = expand_binop (compute_mode, sub_optab, tem, mask,
4756                                 NULL_RTX, 0, OPTAB_WIDEN);
4757             expand_dec (remainder, tem);
4758             emit_label (label);
4759           }
4760         return gen_lowpart (mode, rem_flag ? remainder : quotient);
4761
4762       default:
4763         gcc_unreachable ();
4764       }
4765
4766   if (quotient == 0)
4767     {
4768       if (target && GET_MODE (target) != compute_mode)
4769         target = 0;
4770
4771       if (rem_flag)
4772         {
4773           /* Try to produce the remainder without producing the quotient.
4774              If we seem to have a divmod pattern that does not require widening,
4775              don't try widening here.  We should really have a WIDEN argument
4776              to expand_twoval_binop, since what we'd really like to do here is
4777              1) try a mod insn in compute_mode
4778              2) try a divmod insn in compute_mode
4779              3) try a div insn in compute_mode and multiply-subtract to get
4780                 remainder
4781              4) try the same things with widening allowed.  */
4782           remainder
4783             = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4784                                  op0, op1, target,
4785                                  unsignedp,
4786                                  ((optab2->handlers[compute_mode].insn_code
4787                                    != CODE_FOR_nothing)
4788                                   ? OPTAB_DIRECT : OPTAB_WIDEN));
4789           if (remainder == 0)
4790             {
4791               /* No luck there.  Can we do remainder and divide at once
4792                  without a library call?  */
4793               remainder = gen_reg_rtx (compute_mode);
4794               if (! expand_twoval_binop ((unsignedp
4795                                           ? udivmod_optab
4796                                           : sdivmod_optab),
4797                                          op0, op1,
4798                                          NULL_RTX, remainder, unsignedp))
4799                 remainder = 0;
4800             }
4801
4802           if (remainder)
4803             return gen_lowpart (mode, remainder);
4804         }
4805
4806       /* Produce the quotient.  Try a quotient insn, but not a library call.
4807          If we have a divmod in this mode, use it in preference to widening
4808          the div (for this test we assume it will not fail). Note that optab2
4809          is set to the one of the two optabs that the call below will use.  */
4810       quotient
4811         = sign_expand_binop (compute_mode, udiv_optab, sdiv_optab,
4812                              op0, op1, rem_flag ? NULL_RTX : target,
4813                              unsignedp,
4814                              ((optab2->handlers[compute_mode].insn_code
4815                                != CODE_FOR_nothing)
4816                               ? OPTAB_DIRECT : OPTAB_WIDEN));
4817
4818       if (quotient == 0)
4819         {
4820           /* No luck there.  Try a quotient-and-remainder insn,
4821              keeping the quotient alone.  */
4822           quotient = gen_reg_rtx (compute_mode);
4823           if (! expand_twoval_binop (unsignedp ? udivmod_optab : sdivmod_optab,
4824                                      op0, op1,
4825                                      quotient, NULL_RTX, unsignedp))
4826             {
4827               quotient = 0;
4828               if (! rem_flag)
4829                 /* Still no luck.  If we are not computing the remainder,
4830                    use a library call for the quotient.  */
4831                 quotient = sign_expand_binop (compute_mode,
4832                                               udiv_optab, sdiv_optab,
4833                                               op0, op1, target,
4834                                               unsignedp, OPTAB_LIB_WIDEN);
4835             }
4836         }
4837     }
4838
4839   if (rem_flag)
4840     {
4841       if (target && GET_MODE (target) != compute_mode)
4842         target = 0;
4843
4844       if (quotient == 0)
4845         {
4846           /* No divide instruction either.  Use library for remainder.  */
4847           remainder = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4848                                          op0, op1, target,
4849                                          unsignedp, OPTAB_LIB_WIDEN);
4850           /* No remainder function.  Try a quotient-and-remainder
4851              function, keeping the remainder.  */
4852           if (!remainder)
4853             {
4854               remainder = gen_reg_rtx (compute_mode);
4855               if (!expand_twoval_binop_libfunc
4856                   (unsignedp ? udivmod_optab : sdivmod_optab,
4857                    op0, op1,
4858                    NULL_RTX, remainder,
4859                    unsignedp ? UMOD : MOD))
4860                 remainder = NULL_RTX;
4861             }
4862         }
4863       else
4864         {
4865           /* We divided.  Now finish doing X - Y * (X / Y).  */
4866           remainder = expand_mult (compute_mode, quotient, op1,
4867                                    NULL_RTX, unsignedp);
4868           remainder = expand_binop (compute_mode, sub_optab, op0,
4869                                     remainder, target, unsignedp,
4870                                     OPTAB_LIB_WIDEN);
4871         }
4872     }
4873
4874   return gen_lowpart (mode, rem_flag ? remainder : quotient);
4875 }
4876 \f
4877 /* Return a tree node with data type TYPE, describing the value of X.
4878    Usually this is an VAR_DECL, if there is no obvious better choice.
4879    X may be an expression, however we only support those expressions
4880    generated by loop.c.  */
4881
4882 tree
4883 make_tree (tree type, rtx x)
4884 {
4885   tree t;
4886
4887   switch (GET_CODE (x))
4888     {
4889     case CONST_INT:
4890       {
4891         HOST_WIDE_INT hi = 0;
4892
4893         if (INTVAL (x) < 0
4894             && !(TYPE_UNSIGNED (type)
4895                  && (GET_MODE_BITSIZE (TYPE_MODE (type))
4896                      < HOST_BITS_PER_WIDE_INT)))
4897           hi = -1;
4898
4899         t = build_int_cst_wide (type, INTVAL (x), hi);
4900
4901         return t;
4902       }
4903
4904     case CONST_DOUBLE:
4905       if (GET_MODE (x) == VOIDmode)
4906         t = build_int_cst_wide (type,
4907                                 CONST_DOUBLE_LOW (x), CONST_DOUBLE_HIGH (x));
4908       else
4909         {
4910           REAL_VALUE_TYPE d;
4911
4912           REAL_VALUE_FROM_CONST_DOUBLE (d, x);
4913           t = build_real (type, d);
4914         }
4915
4916       return t;
4917
4918     case CONST_VECTOR:
4919       {
4920         int i, units;
4921         rtx elt;
4922         tree t = NULL_TREE;
4923
4924         units = CONST_VECTOR_NUNITS (x);
4925
4926         /* Build a tree with vector elements.  */
4927         for (i = units - 1; i >= 0; --i)
4928           {
4929             elt = CONST_VECTOR_ELT (x, i);
4930             t = tree_cons (NULL_TREE, make_tree (type, elt), t);
4931           }
4932
4933         return build_vector (type, t);
4934       }
4935
4936     case PLUS:
4937       return fold_build2 (PLUS_EXPR, type, make_tree (type, XEXP (x, 0)),
4938                           make_tree (type, XEXP (x, 1)));
4939
4940     case MINUS:
4941       return fold_build2 (MINUS_EXPR, type, make_tree (type, XEXP (x, 0)),
4942                           make_tree (type, XEXP (x, 1)));
4943
4944     case NEG:
4945       return fold_build1 (NEGATE_EXPR, type, make_tree (type, XEXP (x, 0)));
4946
4947     case MULT:
4948       return fold_build2 (MULT_EXPR, type, make_tree (type, XEXP (x, 0)),
4949                           make_tree (type, XEXP (x, 1)));
4950
4951     case ASHIFT:
4952       return fold_build2 (LSHIFT_EXPR, type, make_tree (type, XEXP (x, 0)),
4953                           make_tree (type, XEXP (x, 1)));
4954
4955     case LSHIFTRT:
4956       t = lang_hooks.types.unsigned_type (type);
4957       return fold_convert (type, build2 (RSHIFT_EXPR, t,
4958                                          make_tree (t, XEXP (x, 0)),
4959                                          make_tree (type, XEXP (x, 1))));
4960
4961     case ASHIFTRT:
4962       t = lang_hooks.types.signed_type (type);
4963       return fold_convert (type, build2 (RSHIFT_EXPR, t,
4964                                          make_tree (t, XEXP (x, 0)),
4965                                          make_tree (type, XEXP (x, 1))));
4966
4967     case DIV:
4968       if (TREE_CODE (type) != REAL_TYPE)
4969         t = lang_hooks.types.signed_type (type);
4970       else
4971         t = type;
4972
4973       return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
4974                                          make_tree (t, XEXP (x, 0)),
4975                                          make_tree (t, XEXP (x, 1))));
4976     case UDIV:
4977       t = lang_hooks.types.unsigned_type (type);
4978       return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
4979                                          make_tree (t, XEXP (x, 0)),
4980                                          make_tree (t, XEXP (x, 1))));
4981
4982     case SIGN_EXTEND:
4983     case ZERO_EXTEND:
4984       t = lang_hooks.types.type_for_mode (GET_MODE (XEXP (x, 0)),
4985                                           GET_CODE (x) == ZERO_EXTEND);
4986       return fold_convert (type, make_tree (t, XEXP (x, 0)));
4987
4988     default:
4989       t = build_decl (VAR_DECL, NULL_TREE, type);
4990
4991       /* If TYPE is a POINTER_TYPE, X might be Pmode with TYPE_MODE being
4992          ptr_mode.  So convert.  */
4993       if (POINTER_TYPE_P (type))
4994         x = convert_memory_address (TYPE_MODE (type), x);
4995
4996       /* Note that we do *not* use SET_DECL_RTL here, because we do not
4997          want set_decl_rtl to go adjusting REG_ATTRS for this temporary.  */
4998       t->decl_with_rtl.rtl = x;
4999
5000       return t;
5001     }
5002 }
5003
5004 /* Check whether the multiplication X * MULT + ADD overflows.
5005    X, MULT and ADD must be CONST_*.
5006    MODE is the machine mode for the computation.
5007    X and MULT must have mode MODE.  ADD may have a different mode.
5008    So can X (defaults to same as MODE).
5009    UNSIGNEDP is nonzero to do unsigned multiplication.  */
5010
5011 bool
5012 const_mult_add_overflow_p (rtx x, rtx mult, rtx add,
5013                            enum machine_mode mode, int unsignedp)
5014 {
5015   tree type, mult_type, add_type, result;
5016
5017   type = lang_hooks.types.type_for_mode (mode, unsignedp);
5018
5019   /* In order to get a proper overflow indication from an unsigned
5020      type, we have to pretend that it's a sizetype.  */
5021   mult_type = type;
5022   if (unsignedp)
5023     {
5024       /* FIXME:It would be nice if we could step directly from this
5025          type to its sizetype equivalent.  */
5026       mult_type = build_distinct_type_copy (type);
5027       TYPE_IS_SIZETYPE (mult_type) = 1;
5028     }
5029
5030   add_type = (GET_MODE (add) == VOIDmode ? mult_type
5031               : lang_hooks.types.type_for_mode (GET_MODE (add), unsignedp));
5032
5033   result = fold_build2 (PLUS_EXPR, mult_type,
5034                         fold_build2 (MULT_EXPR, mult_type,
5035                                      make_tree (mult_type, x),
5036                                      make_tree (mult_type, mult)),
5037                         make_tree (add_type, add));
5038
5039   return TREE_CONSTANT_OVERFLOW (result);
5040 }
5041
5042 /* Return an rtx representing the value of X * MULT + ADD.
5043    TARGET is a suggestion for where to store the result (an rtx).
5044    MODE is the machine mode for the computation.
5045    X and MULT must have mode MODE.  ADD may have a different mode.
5046    So can X (defaults to same as MODE).
5047    UNSIGNEDP is nonzero to do unsigned multiplication.
5048    This may emit insns.  */
5049
5050 rtx
5051 expand_mult_add (rtx x, rtx target, rtx mult, rtx add, enum machine_mode mode,
5052                  int unsignedp)
5053 {
5054   tree type = lang_hooks.types.type_for_mode (mode, unsignedp);
5055   tree add_type = (GET_MODE (add) == VOIDmode
5056                    ? type: lang_hooks.types.type_for_mode (GET_MODE (add),
5057                                                            unsignedp));
5058   tree result = fold_build2 (PLUS_EXPR, type,
5059                              fold_build2 (MULT_EXPR, type,
5060                                           make_tree (type, x),
5061                                           make_tree (type, mult)),
5062                              make_tree (add_type, add));
5063
5064   return expand_expr (result, target, VOIDmode, 0);
5065 }
5066 \f
5067 /* Compute the logical-and of OP0 and OP1, storing it in TARGET
5068    and returning TARGET.
5069
5070    If TARGET is 0, a pseudo-register or constant is returned.  */
5071
5072 rtx
5073 expand_and (enum machine_mode mode, rtx op0, rtx op1, rtx target)
5074 {
5075   rtx tem = 0;
5076
5077   if (GET_MODE (op0) == VOIDmode && GET_MODE (op1) == VOIDmode)
5078     tem = simplify_binary_operation (AND, mode, op0, op1);
5079   if (tem == 0)
5080     tem = expand_binop (mode, and_optab, op0, op1, target, 0, OPTAB_LIB_WIDEN);
5081
5082   if (target == 0)
5083     target = tem;
5084   else if (tem != target)
5085     emit_move_insn (target, tem);
5086   return target;
5087 }
5088 \f
5089 /* Emit a store-flags instruction for comparison CODE on OP0 and OP1
5090    and storing in TARGET.  Normally return TARGET.
5091    Return 0 if that cannot be done.
5092
5093    MODE is the mode to use for OP0 and OP1 should they be CONST_INTs.  If
5094    it is VOIDmode, they cannot both be CONST_INT.
5095
5096    UNSIGNEDP is for the case where we have to widen the operands
5097    to perform the operation.  It says to use zero-extension.
5098
5099    NORMALIZEP is 1 if we should convert the result to be either zero
5100    or one.  Normalize is -1 if we should convert the result to be
5101    either zero or -1.  If NORMALIZEP is zero, the result will be left
5102    "raw" out of the scc insn.  */
5103
5104 rtx
5105 emit_store_flag (rtx target, enum rtx_code code, rtx op0, rtx op1,
5106                  enum machine_mode mode, int unsignedp, int normalizep)
5107 {
5108   rtx subtarget;
5109   enum insn_code icode;
5110   enum machine_mode compare_mode;
5111   enum machine_mode target_mode = GET_MODE (target);
5112   rtx tem;
5113   rtx last = get_last_insn ();
5114   rtx pattern, comparison;
5115
5116   if (unsignedp)
5117     code = unsigned_condition (code);
5118
5119   /* If one operand is constant, make it the second one.  Only do this
5120      if the other operand is not constant as well.  */
5121
5122   if (swap_commutative_operands_p (op0, op1))
5123     {
5124       tem = op0;
5125       op0 = op1;
5126       op1 = tem;
5127       code = swap_condition (code);
5128     }
5129
5130   if (mode == VOIDmode)
5131     mode = GET_MODE (op0);
5132
5133   /* For some comparisons with 1 and -1, we can convert this to
5134      comparisons with zero.  This will often produce more opportunities for
5135      store-flag insns.  */
5136
5137   switch (code)
5138     {
5139     case LT:
5140       if (op1 == const1_rtx)
5141         op1 = const0_rtx, code = LE;
5142       break;
5143     case LE:
5144       if (op1 == constm1_rtx)
5145         op1 = const0_rtx, code = LT;
5146       break;
5147     case GE:
5148       if (op1 == const1_rtx)
5149         op1 = const0_rtx, code = GT;
5150       break;
5151     case GT:
5152       if (op1 == constm1_rtx)
5153         op1 = const0_rtx, code = GE;
5154       break;
5155     case GEU:
5156       if (op1 == const1_rtx)
5157         op1 = const0_rtx, code = NE;
5158       break;
5159     case LTU:
5160       if (op1 == const1_rtx)
5161         op1 = const0_rtx, code = EQ;
5162       break;
5163     default:
5164       break;
5165     }
5166
5167   /* If we are comparing a double-word integer with zero or -1, we can
5168      convert the comparison into one involving a single word.  */
5169   if (GET_MODE_BITSIZE (mode) == BITS_PER_WORD * 2
5170       && GET_MODE_CLASS (mode) == MODE_INT
5171       && (!MEM_P (op0) || ! MEM_VOLATILE_P (op0)))
5172     {
5173       if ((code == EQ || code == NE)
5174           && (op1 == const0_rtx || op1 == constm1_rtx))
5175         {
5176           rtx op00, op01, op0both;
5177
5178           /* Do a logical OR or AND of the two words and compare the result.  */
5179           op00 = simplify_gen_subreg (word_mode, op0, mode, 0);
5180           op01 = simplify_gen_subreg (word_mode, op0, mode, UNITS_PER_WORD);
5181           op0both = expand_binop (word_mode,
5182                                   op1 == const0_rtx ? ior_optab : and_optab,
5183                                   op00, op01, NULL_RTX, unsignedp, OPTAB_DIRECT);
5184
5185           if (op0both != 0)
5186             return emit_store_flag (target, code, op0both, op1, word_mode,
5187                                     unsignedp, normalizep);
5188         }
5189       else if ((code == LT || code == GE) && op1 == const0_rtx)
5190         {
5191           rtx op0h;
5192
5193           /* If testing the sign bit, can just test on high word.  */
5194           op0h = simplify_gen_subreg (word_mode, op0, mode,
5195                                       subreg_highpart_offset (word_mode, mode));
5196           return emit_store_flag (target, code, op0h, op1, word_mode,
5197                                   unsignedp, normalizep);
5198         }
5199     }
5200
5201   /* From now on, we won't change CODE, so set ICODE now.  */
5202   icode = setcc_gen_code[(int) code];
5203
5204   /* If this is A < 0 or A >= 0, we can do this by taking the ones
5205      complement of A (for GE) and shifting the sign bit to the low bit.  */
5206   if (op1 == const0_rtx && (code == LT || code == GE)
5207       && GET_MODE_CLASS (mode) == MODE_INT
5208       && (normalizep || STORE_FLAG_VALUE == 1
5209           || (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT
5210               && ((STORE_FLAG_VALUE & GET_MODE_MASK (mode))
5211                   == (unsigned HOST_WIDE_INT) 1 << (GET_MODE_BITSIZE (mode) - 1)))))
5212     {
5213       subtarget = target;
5214
5215       /* If the result is to be wider than OP0, it is best to convert it
5216          first.  If it is to be narrower, it is *incorrect* to convert it
5217          first.  */
5218       if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (mode))
5219         {
5220           op0 = convert_modes (target_mode, mode, op0, 0);
5221           mode = target_mode;
5222         }
5223
5224       if (target_mode != mode)
5225         subtarget = 0;
5226
5227       if (code == GE)
5228         op0 = expand_unop (mode, one_cmpl_optab, op0,
5229                            ((STORE_FLAG_VALUE == 1 || normalizep)
5230                             ? 0 : subtarget), 0);
5231
5232       if (STORE_FLAG_VALUE == 1 || normalizep)
5233         /* If we are supposed to produce a 0/1 value, we want to do
5234            a logical shift from the sign bit to the low-order bit; for
5235            a -1/0 value, we do an arithmetic shift.  */
5236         op0 = expand_shift (RSHIFT_EXPR, mode, op0,
5237                             size_int (GET_MODE_BITSIZE (mode) - 1),
5238                             subtarget, normalizep != -1);
5239
5240       if (mode != target_mode)
5241         op0 = convert_modes (target_mode, mode, op0, 0);
5242
5243       return op0;
5244     }
5245
5246   if (icode != CODE_FOR_nothing)
5247     {
5248       insn_operand_predicate_fn pred;
5249
5250       /* We think we may be able to do this with a scc insn.  Emit the
5251          comparison and then the scc insn.  */
5252
5253       do_pending_stack_adjust ();
5254       last = get_last_insn ();
5255
5256       comparison
5257         = compare_from_rtx (op0, op1, code, unsignedp, mode, NULL_RTX);
5258       if (CONSTANT_P (comparison))
5259         {
5260           switch (GET_CODE (comparison))
5261             {
5262             case CONST_INT:
5263               if (comparison == const0_rtx)
5264                 return const0_rtx;
5265               break;
5266
5267 #ifdef FLOAT_STORE_FLAG_VALUE
5268             case CONST_DOUBLE:
5269               if (comparison == CONST0_RTX (GET_MODE (comparison)))
5270                 return const0_rtx;
5271               break;
5272 #endif
5273             default:
5274               gcc_unreachable ();
5275             }
5276
5277           if (normalizep == 1)
5278             return const1_rtx;
5279           if (normalizep == -1)
5280             return constm1_rtx;
5281           return const_true_rtx;
5282         }
5283
5284       /* The code of COMPARISON may not match CODE if compare_from_rtx
5285          decided to swap its operands and reverse the original code.
5286
5287          We know that compare_from_rtx returns either a CONST_INT or
5288          a new comparison code, so it is safe to just extract the
5289          code from COMPARISON.  */
5290       code = GET_CODE (comparison);
5291
5292       /* Get a reference to the target in the proper mode for this insn.  */
5293       compare_mode = insn_data[(int) icode].operand[0].mode;
5294       subtarget = target;
5295       pred = insn_data[(int) icode].operand[0].predicate;
5296       if (optimize || ! (*pred) (subtarget, compare_mode))
5297         subtarget = gen_reg_rtx (compare_mode);
5298
5299       pattern = GEN_FCN (icode) (subtarget);
5300       if (pattern)
5301         {
5302           emit_insn (pattern);
5303
5304           /* If we are converting to a wider mode, first convert to
5305              TARGET_MODE, then normalize.  This produces better combining
5306              opportunities on machines that have a SIGN_EXTRACT when we are
5307              testing a single bit.  This mostly benefits the 68k.
5308
5309              If STORE_FLAG_VALUE does not have the sign bit set when
5310              interpreted in COMPARE_MODE, we can do this conversion as
5311              unsigned, which is usually more efficient.  */
5312           if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (compare_mode))
5313             {
5314               convert_move (target, subtarget,
5315                             (GET_MODE_BITSIZE (compare_mode)
5316                              <= HOST_BITS_PER_WIDE_INT)
5317                             && 0 == (STORE_FLAG_VALUE
5318                                      & ((HOST_WIDE_INT) 1
5319                                         << (GET_MODE_BITSIZE (compare_mode) -1))));
5320               op0 = target;
5321               compare_mode = target_mode;
5322             }
5323           else
5324             op0 = subtarget;
5325
5326           /* If we want to keep subexpressions around, don't reuse our
5327              last target.  */
5328
5329           if (optimize)
5330             subtarget = 0;
5331
5332           /* Now normalize to the proper value in COMPARE_MODE.  Sometimes
5333              we don't have to do anything.  */
5334           if (normalizep == 0 || normalizep == STORE_FLAG_VALUE)
5335             ;
5336           /* STORE_FLAG_VALUE might be the most negative number, so write
5337              the comparison this way to avoid a compiler-time warning.  */
5338           else if (- normalizep == STORE_FLAG_VALUE)
5339             op0 = expand_unop (compare_mode, neg_optab, op0, subtarget, 0);
5340
5341           /* We don't want to use STORE_FLAG_VALUE < 0 below since this
5342              makes it hard to use a value of just the sign bit due to
5343              ANSI integer constant typing rules.  */
5344           else if (GET_MODE_BITSIZE (compare_mode) <= HOST_BITS_PER_WIDE_INT
5345                    && (STORE_FLAG_VALUE
5346                        & ((HOST_WIDE_INT) 1
5347                           << (GET_MODE_BITSIZE (compare_mode) - 1))))
5348             op0 = expand_shift (RSHIFT_EXPR, compare_mode, op0,
5349                                 size_int (GET_MODE_BITSIZE (compare_mode) - 1),
5350                                 subtarget, normalizep == 1);
5351           else
5352             {
5353               gcc_assert (STORE_FLAG_VALUE & 1);
5354
5355               op0 = expand_and (compare_mode, op0, const1_rtx, subtarget);
5356               if (normalizep == -1)
5357                 op0 = expand_unop (compare_mode, neg_optab, op0, op0, 0);
5358             }
5359
5360           /* If we were converting to a smaller mode, do the
5361              conversion now.  */
5362           if (target_mode != compare_mode)
5363             {
5364               convert_move (target, op0, 0);
5365               return target;
5366             }
5367           else
5368             return op0;
5369         }
5370     }
5371
5372   delete_insns_since (last);
5373
5374   /* If optimizing, use different pseudo registers for each insn, instead
5375      of reusing the same pseudo.  This leads to better CSE, but slows
5376      down the compiler, since there are more pseudos */
5377   subtarget = (!optimize
5378                && (target_mode == mode)) ? target : NULL_RTX;
5379
5380   /* If we reached here, we can't do this with a scc insn.  However, there
5381      are some comparisons that can be done directly.  For example, if
5382      this is an equality comparison of integers, we can try to exclusive-or
5383      (or subtract) the two operands and use a recursive call to try the
5384      comparison with zero.  Don't do any of these cases if branches are
5385      very cheap.  */
5386
5387   if (BRANCH_COST > 0
5388       && GET_MODE_CLASS (mode) == MODE_INT && (code == EQ || code == NE)
5389       && op1 != const0_rtx)
5390     {
5391       tem = expand_binop (mode, xor_optab, op0, op1, subtarget, 1,
5392                           OPTAB_WIDEN);
5393
5394       if (tem == 0)
5395         tem = expand_binop (mode, sub_optab, op0, op1, subtarget, 1,
5396                             OPTAB_WIDEN);
5397       if (tem != 0)
5398         tem = emit_store_flag (target, code, tem, const0_rtx,
5399                                mode, unsignedp, normalizep);
5400       if (tem == 0)
5401         delete_insns_since (last);
5402       return tem;
5403     }
5404
5405   /* Some other cases we can do are EQ, NE, LE, and GT comparisons with
5406      the constant zero.  Reject all other comparisons at this point.  Only
5407      do LE and GT if branches are expensive since they are expensive on
5408      2-operand machines.  */
5409
5410   if (BRANCH_COST == 0
5411       || GET_MODE_CLASS (mode) != MODE_INT || op1 != const0_rtx
5412       || (code != EQ && code != NE
5413           && (BRANCH_COST <= 1 || (code != LE && code != GT))))
5414     return 0;
5415
5416   /* See what we need to return.  We can only return a 1, -1, or the
5417      sign bit.  */
5418
5419   if (normalizep == 0)
5420     {
5421       if (STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1)
5422         normalizep = STORE_FLAG_VALUE;
5423
5424       else if (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT
5425                && ((STORE_FLAG_VALUE & GET_MODE_MASK (mode))
5426                    == (unsigned HOST_WIDE_INT) 1 << (GET_MODE_BITSIZE (mode) - 1)))
5427         ;
5428       else
5429         return 0;
5430     }
5431
5432   /* Try to put the result of the comparison in the sign bit.  Assume we can't
5433      do the necessary operation below.  */
5434
5435   tem = 0;
5436
5437   /* To see if A <= 0, compute (A | (A - 1)).  A <= 0 iff that result has
5438      the sign bit set.  */
5439
5440   if (code == LE)
5441     {
5442       /* This is destructive, so SUBTARGET can't be OP0.  */
5443       if (rtx_equal_p (subtarget, op0))
5444         subtarget = 0;
5445
5446       tem = expand_binop (mode, sub_optab, op0, const1_rtx, subtarget, 0,
5447                           OPTAB_WIDEN);
5448       if (tem)
5449         tem = expand_binop (mode, ior_optab, op0, tem, subtarget, 0,
5450                             OPTAB_WIDEN);
5451     }
5452
5453   /* To see if A > 0, compute (((signed) A) << BITS) - A, where BITS is the
5454      number of bits in the mode of OP0, minus one.  */
5455
5456   if (code == GT)
5457     {
5458       if (rtx_equal_p (subtarget, op0))
5459         subtarget = 0;
5460
5461       tem = expand_shift (RSHIFT_EXPR, mode, op0,
5462                           size_int (GET_MODE_BITSIZE (mode) - 1),
5463                           subtarget, 0);
5464       tem = expand_binop (mode, sub_optab, tem, op0, subtarget, 0,
5465                           OPTAB_WIDEN);
5466     }
5467
5468   if (code == EQ || code == NE)
5469     {
5470       /* For EQ or NE, one way to do the comparison is to apply an operation
5471          that converts the operand into a positive number if it is nonzero
5472          or zero if it was originally zero.  Then, for EQ, we subtract 1 and
5473          for NE we negate.  This puts the result in the sign bit.  Then we
5474          normalize with a shift, if needed.
5475
5476          Two operations that can do the above actions are ABS and FFS, so try
5477          them.  If that doesn't work, and MODE is smaller than a full word,
5478          we can use zero-extension to the wider mode (an unsigned conversion)
5479          as the operation.  */
5480
5481       /* Note that ABS doesn't yield a positive number for INT_MIN, but
5482          that is compensated by the subsequent overflow when subtracting
5483          one / negating.  */
5484
5485       if (abs_optab->handlers[mode].insn_code != CODE_FOR_nothing)
5486         tem = expand_unop (mode, abs_optab, op0, subtarget, 1);
5487       else if (ffs_optab->handlers[mode].insn_code != CODE_FOR_nothing)
5488         tem = expand_unop (mode, ffs_optab, op0, subtarget, 1);
5489       else if (GET_MODE_SIZE (mode) < UNITS_PER_WORD)
5490         {
5491           tem = convert_modes (word_mode, mode, op0, 1);
5492           mode = word_mode;
5493         }
5494
5495       if (tem != 0)
5496         {
5497           if (code == EQ)
5498             tem = expand_binop (mode, sub_optab, tem, const1_rtx, subtarget,
5499                                 0, OPTAB_WIDEN);
5500           else
5501             tem = expand_unop (mode, neg_optab, tem, subtarget, 0);
5502         }
5503
5504       /* If we couldn't do it that way, for NE we can "or" the two's complement
5505          of the value with itself.  For EQ, we take the one's complement of
5506          that "or", which is an extra insn, so we only handle EQ if branches
5507          are expensive.  */
5508
5509       if (tem == 0 && (code == NE || BRANCH_COST > 1))
5510         {
5511           if (rtx_equal_p (subtarget, op0))
5512             subtarget = 0;
5513
5514           tem = expand_unop (mode, neg_optab, op0, subtarget, 0);
5515           tem = expand_binop (mode, ior_optab, tem, op0, subtarget, 0,
5516                               OPTAB_WIDEN);
5517
5518           if (tem && code == EQ)
5519             tem = expand_unop (mode, one_cmpl_optab, tem, subtarget, 0);
5520         }
5521     }
5522
5523   if (tem && normalizep)
5524     tem = expand_shift (RSHIFT_EXPR, mode, tem,
5525                         size_int (GET_MODE_BITSIZE (mode) - 1),
5526                         subtarget, normalizep == 1);
5527
5528   if (tem)
5529     {
5530       if (GET_MODE (tem) != target_mode)
5531         {
5532           convert_move (target, tem, 0);
5533           tem = target;
5534         }
5535       else if (!subtarget)
5536         {
5537           emit_move_insn (target, tem);
5538           tem = target;
5539         }
5540     }
5541   else
5542     delete_insns_since (last);
5543
5544   return tem;
5545 }
5546
5547 /* Like emit_store_flag, but always succeeds.  */
5548
5549 rtx
5550 emit_store_flag_force (rtx target, enum rtx_code code, rtx op0, rtx op1,
5551                        enum machine_mode mode, int unsignedp, int normalizep)
5552 {
5553   rtx tem, label;
5554
5555   /* First see if emit_store_flag can do the job.  */
5556   tem = emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep);
5557   if (tem != 0)
5558     return tem;
5559
5560   if (normalizep == 0)
5561     normalizep = 1;
5562
5563   /* If this failed, we have to do this with set/compare/jump/set code.  */
5564
5565   if (!REG_P (target)
5566       || reg_mentioned_p (target, op0) || reg_mentioned_p (target, op1))
5567     target = gen_reg_rtx (GET_MODE (target));
5568
5569   emit_move_insn (target, const1_rtx);
5570   label = gen_label_rtx ();
5571   do_compare_rtx_and_jump (op0, op1, code, unsignedp, mode, NULL_RTX,
5572                            NULL_RTX, label);
5573
5574   emit_move_insn (target, const0_rtx);
5575   emit_label (label);
5576
5577   return target;
5578 }
5579 \f
5580 /* Perform possibly multi-word comparison and conditional jump to LABEL
5581    if ARG1 OP ARG2 true where ARG1 and ARG2 are of mode MODE
5582
5583    The algorithm is based on the code in expr.c:do_jump.
5584
5585    Note that this does not perform a general comparison.  Only
5586    variants generated within expmed.c are correctly handled, others
5587    could be handled if needed.  */
5588
5589 static void
5590 do_cmp_and_jump (rtx arg1, rtx arg2, enum rtx_code op, enum machine_mode mode,
5591                  rtx label)
5592 {
5593   /* If this mode is an integer too wide to compare properly,
5594      compare word by word.  Rely on cse to optimize constant cases.  */
5595
5596   if (GET_MODE_CLASS (mode) == MODE_INT
5597       && ! can_compare_p (op, mode, ccp_jump))
5598     {
5599       rtx label2 = gen_label_rtx ();
5600
5601       switch (op)
5602         {
5603         case LTU:
5604           do_jump_by_parts_greater_rtx (mode, 1, arg2, arg1, label2, label);
5605           break;
5606
5607         case LEU:
5608           do_jump_by_parts_greater_rtx (mode, 1, arg1, arg2, label, label2);
5609           break;
5610
5611         case LT:
5612           do_jump_by_parts_greater_rtx (mode, 0, arg2, arg1, label2, label);
5613           break;
5614
5615         case GT:
5616           do_jump_by_parts_greater_rtx (mode, 0, arg1, arg2, label2, label);
5617           break;
5618
5619         case GE:
5620           do_jump_by_parts_greater_rtx (mode, 0, arg2, arg1, label, label2);
5621           break;
5622
5623           /* do_jump_by_parts_equality_rtx compares with zero.  Luckily
5624              that's the only equality operations we do */
5625         case EQ:
5626           gcc_assert (arg2 == const0_rtx && mode == GET_MODE(arg1));
5627           do_jump_by_parts_equality_rtx (arg1, label2, label);
5628           break;
5629
5630         case NE:
5631           gcc_assert (arg2 == const0_rtx && mode == GET_MODE(arg1));
5632           do_jump_by_parts_equality_rtx (arg1, label, label2);
5633           break;
5634
5635         default:
5636           gcc_unreachable ();
5637         }
5638
5639       emit_label (label2);
5640     }
5641   else
5642     emit_cmp_and_jump_insns (arg1, arg2, op, NULL_RTX, mode, 0, label);
5643 }