gcc/expmed.c

   1 /* Medium-level subroutines: convert bit-field store and extract
   2    and shifts, multiplies and divides to rtl instructions.
   3    Copyright (C) 1987, 1988, 1989, 1992, 1993, 1994, 1995, 1996, 1997, 1998,
   4    1999, 2000, 2001, 2002, 2003, 2004, 2005 Free Software Foundation, Inc.
   5
   6 This file is part of GCC.
   7
   8 GCC is free software; you can redistribute it and/or modify it under
   9 the terms of the GNU General Public License as published by the Free
  10 Software Foundation; either version 2, or (at your option) any later
  11 version.
  12
  13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  15 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  16 for more details.
  17
  18 You should have received a copy of the GNU General Public License
  19 along with GCC; see the file COPYING.  If not, write to the Free
  20 Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
  21 02110-1301, USA.  */
  22
  23
  24 #include "config.h"
  25 #include "system.h"
  26 #include "coretypes.h"
  27 #include "tm.h"
  28 #include "toplev.h"
  29 #include "rtl.h"
  30 #include "tree.h"
  31 #include "tm_p.h"
  32 #include "flags.h"
  33 #include "insn-config.h"
  34 #include "expr.h"
  35 #include "optabs.h"
  36 #include "real.h"
  37 #include "recog.h"
  38 #include "langhooks.h"
  39
  40 static void store_fixed_bit_field (rtx, unsigned HOST_WIDE_INT,
  41                                    unsigned HOST_WIDE_INT,
  42                                    unsigned HOST_WIDE_INT, rtx);
  43 static void store_split_bit_field (rtx, unsigned HOST_WIDE_INT,
  44                                    unsigned HOST_WIDE_INT, rtx);
  45 static rtx extract_fixed_bit_field (enum machine_mode, rtx,
  46                                     unsigned HOST_WIDE_INT,
  47                                     unsigned HOST_WIDE_INT,
  48                                     unsigned HOST_WIDE_INT, rtx, int);
  49 static rtx mask_rtx (enum machine_mode, int, int, int);
  50 static rtx lshift_value (enum machine_mode, rtx, int, int);
  51 static rtx extract_split_bit_field (rtx, unsigned HOST_WIDE_INT,
  52                                     unsigned HOST_WIDE_INT, int);
  53 static void do_cmp_and_jump (rtx, rtx, enum rtx_code, enum machine_mode, rtx);
  54 static rtx expand_smod_pow2 (enum machine_mode, rtx, HOST_WIDE_INT);
  55 static rtx expand_sdiv_pow2 (enum machine_mode, rtx, HOST_WIDE_INT);
  56
  57 /* Test whether a value is zero of a power of two.  */
  58 #define EXACT_POWER_OF_2_OR_ZERO_P(x) (((x) & ((x) - 1)) == 0)
  59
  60 /* Nonzero means divides or modulus operations are relatively cheap for
  61    powers of two, so don't use branches; emit the operation instead.
  62    Usually, this will mean that the MD file will emit non-branch
  63    sequences.  */
  64
  65 static bool sdiv_pow2_cheap[NUM_MACHINE_MODES];
  66 static bool smod_pow2_cheap[NUM_MACHINE_MODES];
  67
  68 #ifndef SLOW_UNALIGNED_ACCESS
  69 #define SLOW_UNALIGNED_ACCESS(MODE, ALIGN) STRICT_ALIGNMENT
  70 #endif
  71
  72 /* For compilers that support multiple targets with different word sizes,
  73    MAX_BITS_PER_WORD contains the biggest value of BITS_PER_WORD.  An example
  74    is the H8/300(H) compiler.  */
  75
  76 #ifndef MAX_BITS_PER_WORD
  77 #define MAX_BITS_PER_WORD BITS_PER_WORD
  78 #endif
  79
  80 /* Reduce conditional compilation elsewhere.  */
  81 #ifndef HAVE_insv
  82 #define HAVE_insv       0
  83 #define CODE_FOR_insv   CODE_FOR_nothing
  84 #define gen_insv(a,b,c,d) NULL_RTX
  85 #endif
  86 #ifndef HAVE_extv
  87 #define HAVE_extv       0
  88 #define CODE_FOR_extv   CODE_FOR_nothing
  89 #define gen_extv(a,b,c,d) NULL_RTX
  90 #endif
  91 #ifndef HAVE_extzv
  92 #define HAVE_extzv      0
  93 #define CODE_FOR_extzv  CODE_FOR_nothing
  94 #define gen_extzv(a,b,c,d) NULL_RTX
  95 #endif
  96
  97 /* Cost of various pieces of RTL.  Note that some of these are indexed by
  98    shift count and some by mode.  */
  99 static int zero_cost;
 100 static int add_cost[NUM_MACHINE_MODES];
 101 static int neg_cost[NUM_MACHINE_MODES];
 102 static int shift_cost[NUM_MACHINE_MODES][MAX_BITS_PER_WORD];
 103 static int shiftadd_cost[NUM_MACHINE_MODES][MAX_BITS_PER_WORD];
 104 static int shiftsub_cost[NUM_MACHINE_MODES][MAX_BITS_PER_WORD];
 105 static int mul_cost[NUM_MACHINE_MODES];
 106 static int div_cost[NUM_MACHINE_MODES];
 107 static int mul_widen_cost[NUM_MACHINE_MODES];
 108 static int mul_highpart_cost[NUM_MACHINE_MODES];
 109
 110 void
 111 init_expmed (void)
 112 {
 113   struct
 114   {
 115     struct rtx_def reg;         rtunion reg_fld[2];
 116     struct rtx_def plus;        rtunion plus_fld1;
 117     struct rtx_def neg;
 118     struct rtx_def udiv;        rtunion udiv_fld1;
 119     struct rtx_def mult;        rtunion mult_fld1;
 120     struct rtx_def div;         rtunion div_fld1;
 121     struct rtx_def mod;         rtunion mod_fld1;
 122     struct rtx_def zext;
 123     struct rtx_def wide_mult;   rtunion wide_mult_fld1;
 124     struct rtx_def wide_lshr;   rtunion wide_lshr_fld1;
 125     struct rtx_def wide_trunc;
 126     struct rtx_def shift;       rtunion shift_fld1;
 127     struct rtx_def shift_mult;  rtunion shift_mult_fld1;
 128     struct rtx_def shift_add;   rtunion shift_add_fld1;
 129     struct rtx_def shift_sub;   rtunion shift_sub_fld1;
 130   } all;
 131
 132   rtx pow2[MAX_BITS_PER_WORD];
 133   rtx cint[MAX_BITS_PER_WORD];
 134   int m, n;
 135   enum machine_mode mode, wider_mode;
 136
 137   zero_cost = rtx_cost (const0_rtx, 0);
 138
 139   for (m = 1; m < MAX_BITS_PER_WORD; m++)
 140     {
 141       pow2[m] = GEN_INT ((HOST_WIDE_INT) 1 << m);
 142       cint[m] = GEN_INT (m);
 143     }
 144
 145   memset (&all, 0, sizeof all);
 146
 147   PUT_CODE (&all.reg, REG);
 148   /* Avoid using hard regs in ways which may be unsupported.  */
 149   REGNO (&all.reg) = LAST_VIRTUAL_REGISTER + 1;
 150
 151   PUT_CODE (&all.plus, PLUS);
 152   XEXP (&all.plus, 0) = &all.reg;
 153   XEXP (&all.plus, 1) = &all.reg;
 154
 155   PUT_CODE (&all.neg, NEG);
 156   XEXP (&all.neg, 0) = &all.reg;
 157
 158   PUT_CODE (&all.udiv, UDIV);
 159   XEXP (&all.udiv, 0) = &all.reg;
 160   XEXP (&all.udiv, 1) = &all.reg;
 161
 162   PUT_CODE (&all.mult, MULT);
 163   XEXP (&all.mult, 0) = &all.reg;
 164   XEXP (&all.mult, 1) = &all.reg;
 165
 166   PUT_CODE (&all.div, DIV);
 167   XEXP (&all.div, 0) = &all.reg;
 168   XEXP (&all.div, 1) = 32 < MAX_BITS_PER_WORD ? cint[32] : GEN_INT (32);
 169
 170   PUT_CODE (&all.mod, MOD);
 171   XEXP (&all.mod, 0) = &all.reg;
 172   XEXP (&all.mod, 1) = XEXP (&all.div, 1);
 173
 174   PUT_CODE (&all.zext, ZERO_EXTEND);
 175   XEXP (&all.zext, 0) = &all.reg;
 176
 177   PUT_CODE (&all.wide_mult, MULT);
 178   XEXP (&all.wide_mult, 0) = &all.zext;
 179   XEXP (&all.wide_mult, 1) = &all.zext;
 180
 181   PUT_CODE (&all.wide_lshr, LSHIFTRT);
 182   XEXP (&all.wide_lshr, 0) = &all.wide_mult;
 183
 184   PUT_CODE (&all.wide_trunc, TRUNCATE);
 185   XEXP (&all.wide_trunc, 0) = &all.wide_lshr;
 186
 187   PUT_CODE (&all.shift, ASHIFT);
 188   XEXP (&all.shift, 0) = &all.reg;
 189
 190   PUT_CODE (&all.shift_mult, MULT);
 191   XEXP (&all.shift_mult, 0) = &all.reg;
 192
 193   PUT_CODE (&all.shift_add, PLUS);
 194   XEXP (&all.shift_add, 0) = &all.shift_mult;
 195   XEXP (&all.shift_add, 1) = &all.reg;
 196
 197   PUT_CODE (&all.shift_sub, MINUS);
 198   XEXP (&all.shift_sub, 0) = &all.shift_mult;
 199   XEXP (&all.shift_sub, 1) = &all.reg;
 200
 201   for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT);
 202        mode != VOIDmode;
 203        mode = GET_MODE_WIDER_MODE (mode))
 204     {
 205       PUT_MODE (&all.reg, mode);
 206       PUT_MODE (&all.plus, mode);
 207       PUT_MODE (&all.neg, mode);
 208       PUT_MODE (&all.udiv, mode);
 209       PUT_MODE (&all.mult, mode);
 210       PUT_MODE (&all.div, mode);
 211       PUT_MODE (&all.mod, mode);
 212       PUT_MODE (&all.wide_trunc, mode);
 213       PUT_MODE (&all.shift, mode);
 214       PUT_MODE (&all.shift_mult, mode);
 215       PUT_MODE (&all.shift_add, mode);
 216       PUT_MODE (&all.shift_sub, mode);
 217
 218       add_cost[mode] = rtx_cost (&all.plus, SET);
 219       neg_cost[mode] = rtx_cost (&all.neg, SET);
 220       div_cost[mode] = rtx_cost (&all.udiv, SET);
 221       mul_cost[mode] = rtx_cost (&all.mult, SET);
 222
 223       sdiv_pow2_cheap[mode] = (rtx_cost (&all.div, SET) <= 2 * add_cost[mode]);
 224       smod_pow2_cheap[mode] = (rtx_cost (&all.mod, SET) <= 4 * add_cost[mode]);
 225
 226       wider_mode = GET_MODE_WIDER_MODE (mode);
 227       if (wider_mode != VOIDmode)
 228         {
 229           PUT_MODE (&all.zext, wider_mode);
 230           PUT_MODE (&all.wide_mult, wider_mode);
 231           PUT_MODE (&all.wide_lshr, wider_mode);
 232           XEXP (&all.wide_lshr, 1) = GEN_INT (GET_MODE_BITSIZE (mode));
 233
 234           mul_widen_cost[wider_mode] = rtx_cost (&all.wide_mult, SET);
 235           mul_highpart_cost[mode] = rtx_cost (&all.wide_trunc, SET);
 236         }
 237
 238       shift_cost[mode][0] = 0;
 239       shiftadd_cost[mode][0] = shiftsub_cost[mode][0] = add_cost[mode];
 240
 241       n = MIN (MAX_BITS_PER_WORD, GET_MODE_BITSIZE (mode));
 242       for (m = 1; m < n; m++)
 243         {
 244           XEXP (&all.shift, 1) = cint[m];
 245           XEXP (&all.shift_mult, 1) = pow2[m];
 246
 247           shift_cost[mode][m] = rtx_cost (&all.shift, SET);
 248           shiftadd_cost[mode][m] = rtx_cost (&all.shift_add, SET);
 249           shiftsub_cost[mode][m] = rtx_cost (&all.shift_sub, SET);
 250         }
 251     }
 252 }
 253
 254 /* Return an rtx representing minus the value of X.
 255    MODE is the intended mode of the result,
 256    useful if X is a CONST_INT.  */
 257
 258 rtx
 259 negate_rtx (enum machine_mode mode, rtx x)
 260 {
 261   rtx result = simplify_unary_operation (NEG, mode, x, mode);
 262
 263   if (result == 0)
 264     result = expand_unop (mode, neg_optab, x, NULL_RTX, 0);
 265
 266   return result;
 267 }
 268
 269 /* Report on the availability of insv/extv/extzv and the desired mode
 270    of each of their operands.  Returns MAX_MACHINE_MODE if HAVE_foo
 271    is false; else the mode of the specified operand.  If OPNO is -1,
 272    all the caller cares about is whether the insn is available.  */
 273 enum machine_mode
 274 mode_for_extraction (enum extraction_pattern pattern, int opno)
 275 {
 276   const struct insn_data *data;
 277
 278   switch (pattern)
 279     {
 280     case EP_insv:
 281       if (HAVE_insv)
 282         {
 283           data = &insn_data[CODE_FOR_insv];
 284           break;
 285         }
 286       return MAX_MACHINE_MODE;
 287
 288     case EP_extv:
 289       if (HAVE_extv)
 290         {
 291           data = &insn_data[CODE_FOR_extv];
 292           break;
 293         }
 294       return MAX_MACHINE_MODE;
 295
 296     case EP_extzv:
 297       if (HAVE_extzv)
 298         {
 299           data = &insn_data[CODE_FOR_extzv];
 300           break;
 301         }
 302       return MAX_MACHINE_MODE;
 303
 304     default:
 305       gcc_unreachable ();
 306     }
 307
 308   if (opno == -1)
 309     return VOIDmode;
 310
 311   /* Everyone who uses this function used to follow it with
 312      if (result == VOIDmode) result = word_mode; */
 313   if (data->operand[opno].mode == VOIDmode)
 314     return word_mode;
 315   return data->operand[opno].mode;
 316 }
 317
 318 \f
 319 /* Generate code to store value from rtx VALUE
 320    into a bit-field within structure STR_RTX
 321    containing BITSIZE bits starting at bit BITNUM.
 322    FIELDMODE is the machine-mode of the FIELD_DECL node for this field.
 323    ALIGN is the alignment that STR_RTX is known to have.
 324    TOTAL_SIZE is the size of the structure in bytes, or -1 if varying.  */
 325
 326 /* ??? Note that there are two different ideas here for how
 327    to determine the size to count bits within, for a register.
 328    One is BITS_PER_WORD, and the other is the size of operand 3
 329    of the insv pattern.
 330
 331    If operand 3 of the insv pattern is VOIDmode, then we will use BITS_PER_WORD
 332    else, we use the mode of operand 3.  */
 333
 334 rtx
 335 store_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
 336                  unsigned HOST_WIDE_INT bitnum, enum machine_mode fieldmode,
 337                  rtx value)
 338 {
 339   unsigned int unit
 340     = (MEM_P (str_rtx)) ? BITS_PER_UNIT : BITS_PER_WORD;
 341   unsigned HOST_WIDE_INT offset, bitpos;
 342   rtx op0 = str_rtx;
 343   int byte_offset;
 344   rtx orig_value;
 345
 346   enum machine_mode op_mode = mode_for_extraction (EP_insv, 3);
 347
 348   while (GET_CODE (op0) == SUBREG)
 349     {
 350       /* The following line once was done only if WORDS_BIG_ENDIAN,
 351          but I think that is a mistake.  WORDS_BIG_ENDIAN is
 352          meaningful at a much higher level; when structures are copied
 353          between memory and regs, the higher-numbered regs
 354          always get higher addresses.  */
 355       bitnum += SUBREG_BYTE (op0) * BITS_PER_UNIT;
 356       op0 = SUBREG_REG (op0);
 357     }
 358
 359   /* No action is needed if the target is a register and if the field
 360      lies completely outside that register.  This can occur if the source
 361      code contains an out-of-bounds access to a small array.  */
 362   if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
 363     return value;
 364
 365   /* Use vec_set patterns for inserting parts of vectors whenever
 366      available.  */
 367   if (VECTOR_MODE_P (GET_MODE (op0))
 368       && !MEM_P (op0)
 369       && (vec_set_optab->handlers[GET_MODE (op0)].insn_code
 370           != CODE_FOR_nothing)
 371       && fieldmode == GET_MODE_INNER (GET_MODE (op0))
 372       && bitsize == GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
 373       && !(bitnum % GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
 374     {
 375       enum machine_mode outermode = GET_MODE (op0);
 376       enum machine_mode innermode = GET_MODE_INNER (outermode);
 377       int icode = (int) vec_set_optab->handlers[outermode].insn_code;
 378       int pos = bitnum / GET_MODE_BITSIZE (innermode);
 379       rtx rtxpos = GEN_INT (pos);
 380       rtx src = value;
 381       rtx dest = op0;
 382       rtx pat, seq;
 383       enum machine_mode mode0 = insn_data[icode].operand[0].mode;
 384       enum machine_mode mode1 = insn_data[icode].operand[1].mode;
 385       enum machine_mode mode2 = insn_data[icode].operand[2].mode;
 386
 387       start_sequence ();
 388
 389       if (! (*insn_data[icode].operand[1].predicate) (src, mode1))
 390         src = copy_to_mode_reg (mode1, src);
 391
 392       if (! (*insn_data[icode].operand[2].predicate) (rtxpos, mode2))
 393         rtxpos = copy_to_mode_reg (mode1, rtxpos);
 394
 395       /* We could handle this, but we should always be called with a pseudo
 396          for our targets and all insns should take them as outputs.  */
 397       gcc_assert ((*insn_data[icode].operand[0].predicate) (dest, mode0)
 398                   && (*insn_data[icode].operand[1].predicate) (src, mode1)
 399                   && (*insn_data[icode].operand[2].predicate) (rtxpos, mode2));
 400       pat = GEN_FCN (icode) (dest, src, rtxpos);
 401       seq = get_insns ();
 402       end_sequence ();
 403       if (pat)
 404         {
 405           emit_insn (seq);
 406           emit_insn (pat);
 407           return dest;
 408         }
 409     }
 410
 411   /* If the target is a register, overwriting the entire object, or storing
 412      a full-word or multi-word field can be done with just a SUBREG.
 413
 414      If the target is memory, storing any naturally aligned field can be
 415      done with a simple store.  For targets that support fast unaligned
 416      memory, any naturally sized, unit aligned field can be done directly.  */
 417
 418   offset = bitnum / unit;
 419   bitpos = bitnum % unit;
 420   byte_offset = (bitnum % BITS_PER_WORD) / BITS_PER_UNIT
 421                 + (offset * UNITS_PER_WORD);
 422
 423   if (bitpos == 0
 424       && bitsize == GET_MODE_BITSIZE (fieldmode)
 425       && (!MEM_P (op0)
 426           ? ((GET_MODE_SIZE (fieldmode) >= UNITS_PER_WORD
 427              || GET_MODE_SIZE (GET_MODE (op0)) == GET_MODE_SIZE (fieldmode))
 428              && byte_offset % GET_MODE_SIZE (fieldmode) == 0)
 429           : (! SLOW_UNALIGNED_ACCESS (fieldmode, MEM_ALIGN (op0))
 430              || (offset * BITS_PER_UNIT % bitsize == 0
 431                  && MEM_ALIGN (op0) % GET_MODE_BITSIZE (fieldmode) == 0))))
 432     {
 433       if (MEM_P (op0))
 434         op0 = adjust_address (op0, fieldmode, offset);
 435       else if (GET_MODE (op0) != fieldmode)
 436         op0 = simplify_gen_subreg (fieldmode, op0, GET_MODE (op0),
 437                                    byte_offset);
 438       emit_move_insn (op0, value);
 439       return value;
 440     }
 441
 442   /* Make sure we are playing with integral modes.  Pun with subregs
 443      if we aren't.  This must come after the entire register case above,
 444      since that case is valid for any mode.  The following cases are only
 445      valid for integral modes.  */
 446   {
 447     enum machine_mode imode = int_mode_for_mode (GET_MODE (op0));
 448     if (imode != GET_MODE (op0))
 449       {
 450         if (MEM_P (op0))
 451           op0 = adjust_address (op0, imode, 0);
 452         else
 453           {
 454             gcc_assert (imode != BLKmode);
 455             op0 = gen_lowpart (imode, op0);
 456           }
 457       }
 458   }
 459
 460   /* We may be accessing data outside the field, which means
 461      we can alias adjacent data.  */
 462   if (MEM_P (op0))
 463     {
 464       op0 = shallow_copy_rtx (op0);
 465       set_mem_alias_set (op0, 0);
 466       set_mem_expr (op0, 0);
 467     }
 468
 469   /* If OP0 is a register, BITPOS must count within a word.
 470      But as we have it, it counts within whatever size OP0 now has.
 471      On a bigendian machine, these are not the same, so convert.  */
 472   if (BYTES_BIG_ENDIAN
 473       && !MEM_P (op0)
 474       && unit > GET_MODE_BITSIZE (GET_MODE (op0)))
 475     bitpos += unit - GET_MODE_BITSIZE (GET_MODE (op0));
 476
 477   /* Storing an lsb-aligned field in a register
 478      can be done with a movestrict instruction.  */
 479
 480   if (!MEM_P (op0)
 481       && (BYTES_BIG_ENDIAN ? bitpos + bitsize == unit : bitpos == 0)
 482       && bitsize == GET_MODE_BITSIZE (fieldmode)
 483       && (movstrict_optab->handlers[fieldmode].insn_code
 484           != CODE_FOR_nothing))
 485     {
 486       int icode = movstrict_optab->handlers[fieldmode].insn_code;
 487
 488       /* Get appropriate low part of the value being stored.  */
 489       if (GET_CODE (value) == CONST_INT || REG_P (value))
 490         value = gen_lowpart (fieldmode, value);
 491       else if (!(GET_CODE (value) == SYMBOL_REF
 492                  || GET_CODE (value) == LABEL_REF
 493                  || GET_CODE (value) == CONST))
 494         value = convert_to_mode (fieldmode, value, 0);
 495
 496       if (! (*insn_data[icode].operand[1].predicate) (value, fieldmode))
 497         value = copy_to_mode_reg (fieldmode, value);
 498
 499       if (GET_CODE (op0) == SUBREG)
 500         {
 501           /* Else we've got some float mode source being extracted into
 502              a different float mode destination -- this combination of
 503              subregs results in Severe Tire Damage.  */
 504           gcc_assert (GET_MODE (SUBREG_REG (op0)) == fieldmode
 505                       || GET_MODE_CLASS (fieldmode) == MODE_INT
 506                       || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT);
 507           op0 = SUBREG_REG (op0);
 508         }
 509
 510       emit_insn (GEN_FCN (icode)
 511                  (gen_rtx_SUBREG (fieldmode, op0,
 512                                   (bitnum % BITS_PER_WORD) / BITS_PER_UNIT
 513                                   + (offset * UNITS_PER_WORD)),
 514                                   value));
 515
 516       return value;
 517     }
 518
 519   /* Handle fields bigger than a word.  */
 520
 521   if (bitsize > BITS_PER_WORD)
 522     {
 523       /* Here we transfer the words of the field
 524          in the order least significant first.
 525          This is because the most significant word is the one which may
 526          be less than full.
 527          However, only do that if the value is not BLKmode.  */
 528
 529       unsigned int backwards = WORDS_BIG_ENDIAN && fieldmode != BLKmode;
 530       unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
 531       unsigned int i;
 532
 533       /* This is the mode we must force value to, so that there will be enough
 534          subwords to extract.  Note that fieldmode will often (always?) be
 535          VOIDmode, because that is what store_field uses to indicate that this
 536          is a bit field, but passing VOIDmode to operand_subword_force
 537          is not allowed.  */
 538       fieldmode = GET_MODE (value);
 539       if (fieldmode == VOIDmode)
 540         fieldmode = smallest_mode_for_size (nwords * BITS_PER_WORD, MODE_INT);
 541
 542       for (i = 0; i < nwords; i++)
 543         {
 544           /* If I is 0, use the low-order word in both field and target;
 545              if I is 1, use the next to lowest word; and so on.  */
 546           unsigned int wordnum = (backwards ? nwords - i - 1 : i);
 547           unsigned int bit_offset = (backwards
 548                                      ? MAX ((int) bitsize - ((int) i + 1)
 549                                             * BITS_PER_WORD,
 550                                             0)
 551                                      : (int) i * BITS_PER_WORD);
 552
 553           store_bit_field (op0, MIN (BITS_PER_WORD,
 554                                      bitsize - i * BITS_PER_WORD),
 555                            bitnum + bit_offset, word_mode,
 556                            operand_subword_force (value, wordnum, fieldmode));
 557         }
 558       return value;
 559     }
 560
 561   /* From here on we can assume that the field to be stored in is
 562      a full-word (whatever type that is), since it is shorter than a word.  */
 563
 564   /* OFFSET is the number of words or bytes (UNIT says which)
 565      from STR_RTX to the first word or byte containing part of the field.  */
 566
 567   if (!MEM_P (op0))
 568     {
 569       if (offset != 0
 570           || GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
 571         {
 572           if (!REG_P (op0))
 573             {
 574               /* Since this is a destination (lvalue), we can't copy
 575                  it to a pseudo.  We can remove a SUBREG that does not
 576                  change the size of the operand.  Such a SUBREG may
 577                  have been added above.  */
 578               gcc_assert (GET_CODE (op0) == SUBREG
 579                           && (GET_MODE_SIZE (GET_MODE (op0))
 580                               == GET_MODE_SIZE (GET_MODE (SUBREG_REG (op0)))));
 581               op0 = SUBREG_REG (op0);
 582             }
 583           op0 = gen_rtx_SUBREG (mode_for_size (BITS_PER_WORD, MODE_INT, 0),
 584                                 op0, (offset * UNITS_PER_WORD));
 585         }
 586       offset = 0;
 587     }
 588
 589   /* If VALUE has a floating-point or complex mode, access it as an
 590      integer of the corresponding size.  This can occur on a machine
 591      with 64 bit registers that uses SFmode for float.  It can also
 592      occur for unaligned float or complex fields.  */
 593   orig_value = value;
 594   if (GET_MODE (value) != VOIDmode
 595       && GET_MODE_CLASS (GET_MODE (value)) != MODE_INT
 596       && GET_MODE_CLASS (GET_MODE (value)) != MODE_PARTIAL_INT)
 597     {
 598       value = gen_reg_rtx (int_mode_for_mode (GET_MODE (value)));
 599       emit_move_insn (gen_lowpart (GET_MODE (orig_value), value), orig_value);
 600     }
 601
 602   /* Now OFFSET is nonzero only if OP0 is memory
 603      and is therefore always measured in bytes.  */
 604
 605   if (HAVE_insv
 606       && GET_MODE (value) != BLKmode
 607       && !(bitsize == 1 && GET_CODE (value) == CONST_INT)
 608       && bitsize > 0
 609       && GET_MODE_BITSIZE (op_mode) >= bitsize
 610       && ! ((REG_P (op0) || GET_CODE (op0) == SUBREG)
 611             && (bitsize + bitpos > GET_MODE_BITSIZE (op_mode))))
 612     {
 613       int xbitpos = bitpos;
 614       rtx value1;
 615       rtx xop0 = op0;
 616       rtx last = get_last_insn ();
 617       rtx pat;
 618       enum machine_mode maxmode = mode_for_extraction (EP_insv, 3);
 619       int save_volatile_ok = volatile_ok;
 620
 621       volatile_ok = 1;
 622
 623       /* If this machine's insv can only insert into a register, copy OP0
 624          into a register and save it back later.  */
 625       if (MEM_P (op0)
 626           && ! ((*insn_data[(int) CODE_FOR_insv].operand[0].predicate)
 627                 (op0, VOIDmode)))
 628         {
 629           rtx tempreg;
 630           enum machine_mode bestmode;
 631
 632           /* Get the mode to use for inserting into this field.  If OP0 is
 633              BLKmode, get the smallest mode consistent with the alignment. If
 634              OP0 is a non-BLKmode object that is no wider than MAXMODE, use its
 635              mode. Otherwise, use the smallest mode containing the field.  */
 636
 637           if (GET_MODE (op0) == BLKmode
 638               || GET_MODE_SIZE (GET_MODE (op0)) > GET_MODE_SIZE (maxmode))
 639             bestmode
 640               = get_best_mode (bitsize, bitnum, MEM_ALIGN (op0), maxmode,
 641                                MEM_VOLATILE_P (op0));
 642           else
 643             bestmode = GET_MODE (op0);
 644
 645           if (bestmode == VOIDmode
 646               || (SLOW_UNALIGNED_ACCESS (bestmode, MEM_ALIGN (op0))
 647                   && GET_MODE_BITSIZE (bestmode) > MEM_ALIGN (op0)))
 648             goto insv_loses;
 649
 650           /* Adjust address to point to the containing unit of that mode.
 651              Compute offset as multiple of this unit, counting in bytes.  */
 652           unit = GET_MODE_BITSIZE (bestmode);
 653           offset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
 654           bitpos = bitnum % unit;
 655           op0 = adjust_address (op0, bestmode,  offset);
 656
 657           /* Fetch that unit, store the bitfield in it, then store
 658              the unit.  */
 659           tempreg = copy_to_reg (op0);
 660           store_bit_field (tempreg, bitsize, bitpos, fieldmode, orig_value);
 661           emit_move_insn (op0, tempreg);
 662           return value;
 663         }
 664       volatile_ok = save_volatile_ok;
 665
 666       /* Add OFFSET into OP0's address.  */
 667       if (MEM_P (xop0))
 668         xop0 = adjust_address (xop0, byte_mode, offset);
 669
 670       /* If xop0 is a register, we need it in MAXMODE
 671          to make it acceptable to the format of insv.  */
 672       if (GET_CODE (xop0) == SUBREG)
 673         /* We can't just change the mode, because this might clobber op0,
 674            and we will need the original value of op0 if insv fails.  */
 675         xop0 = gen_rtx_SUBREG (maxmode, SUBREG_REG (xop0), SUBREG_BYTE (xop0));
 676       if (REG_P (xop0) && GET_MODE (xop0) != maxmode)
 677         xop0 = gen_rtx_SUBREG (maxmode, xop0, 0);
 678
 679       /* On big-endian machines, we count bits from the most significant.
 680          If the bit field insn does not, we must invert.  */
 681
 682       if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
 683         xbitpos = unit - bitsize - xbitpos;
 684
 685       /* We have been counting XBITPOS within UNIT.
 686          Count instead within the size of the register.  */
 687       if (BITS_BIG_ENDIAN && !MEM_P (xop0))
 688         xbitpos += GET_MODE_BITSIZE (maxmode) - unit;
 689
 690       unit = GET_MODE_BITSIZE (maxmode);
 691
 692       /* Convert VALUE to maxmode (which insv insn wants) in VALUE1.  */
 693       value1 = value;
 694       if (GET_MODE (value) != maxmode)
 695         {
 696           if (GET_MODE_BITSIZE (GET_MODE (value)) >= bitsize)
 697             {
 698               /* Optimization: Don't bother really extending VALUE
 699                  if it has all the bits we will actually use.  However,
 700                  if we must narrow it, be sure we do it correctly.  */
 701
 702               if (GET_MODE_SIZE (GET_MODE (value)) < GET_MODE_SIZE (maxmode))
 703                 {
 704                   rtx tmp;
 705
 706                   tmp = simplify_subreg (maxmode, value1, GET_MODE (value), 0);
 707                   if (! tmp)
 708                     tmp = simplify_gen_subreg (maxmode,
 709                                                force_reg (GET_MODE (value),
 710                                                           value1),
 711                                                GET_MODE (value), 0);
 712                   value1 = tmp;
 713                 }
 714               else
 715                 value1 = gen_lowpart (maxmode, value1);
 716             }
 717           else if (GET_CODE (value) == CONST_INT)
 718             value1 = gen_int_mode (INTVAL (value), maxmode);
 719           else
 720             /* Parse phase is supposed to make VALUE's data type
 721                match that of the component reference, which is a type
 722                at least as wide as the field; so VALUE should have
 723                a mode that corresponds to that type.  */
 724             gcc_assert (CONSTANT_P (value));
 725         }
 726
 727       /* If this machine's insv insists on a register,
 728          get VALUE1 into a register.  */
 729       if (! ((*insn_data[(int) CODE_FOR_insv].operand[3].predicate)
 730              (value1, maxmode)))
 731         value1 = force_reg (maxmode, value1);
 732
 733       pat = gen_insv (xop0, GEN_INT (bitsize), GEN_INT (xbitpos), value1);
 734       if (pat)
 735         emit_insn (pat);
 736       else
 737         {
 738           delete_insns_since (last);
 739           store_fixed_bit_field (op0, offset, bitsize, bitpos, value);
 740         }
 741     }
 742   else
 743     insv_loses:
 744     /* Insv is not available; store using shifts and boolean ops.  */
 745     store_fixed_bit_field (op0, offset, bitsize, bitpos, value);
 746   return value;
 747 }
 748 \f
 749 /* Use shifts and boolean operations to store VALUE
 750    into a bit field of width BITSIZE
 751    in a memory location specified by OP0 except offset by OFFSET bytes.
 752      (OFFSET must be 0 if OP0 is a register.)
 753    The field starts at position BITPOS within the byte.
 754     (If OP0 is a register, it may be a full word or a narrower mode,
 755      but BITPOS still counts within a full word,
 756      which is significant on bigendian machines.)  */
 757
 758 static void
 759 store_fixed_bit_field (rtx op0, unsigned HOST_WIDE_INT offset,
 760                        unsigned HOST_WIDE_INT bitsize,
 761                        unsigned HOST_WIDE_INT bitpos, rtx value)
 762 {
 763   enum machine_mode mode;
 764   unsigned int total_bits = BITS_PER_WORD;
 765   rtx subtarget, temp;
 766   int all_zero = 0;
 767   int all_one = 0;
 768
 769   /* There is a case not handled here:
 770      a structure with a known alignment of just a halfword
 771      and a field split across two aligned halfwords within the structure.
 772      Or likewise a structure with a known alignment of just a byte
 773      and a field split across two bytes.
 774      Such cases are not supposed to be able to occur.  */
 775
 776   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
 777     {
 778       gcc_assert (!offset);
 779       /* Special treatment for a bit field split across two registers.  */
 780       if (bitsize + bitpos > BITS_PER_WORD)
 781         {
 782           store_split_bit_field (op0, bitsize, bitpos, value);
 783           return;
 784         }
 785     }
 786   else
 787     {
 788       /* Get the proper mode to use for this field.  We want a mode that
 789          includes the entire field.  If such a mode would be larger than
 790          a word, we won't be doing the extraction the normal way.
 791          We don't want a mode bigger than the destination.  */
 792
 793       mode = GET_MODE (op0);
 794       if (GET_MODE_BITSIZE (mode) == 0
 795           || GET_MODE_BITSIZE (mode) > GET_MODE_BITSIZE (word_mode))
 796         mode = word_mode;
 797       mode = get_best_mode (bitsize, bitpos + offset * BITS_PER_UNIT,
 798                             MEM_ALIGN (op0), mode, MEM_VOLATILE_P (op0));
 799
 800       if (mode == VOIDmode)
 801         {
 802           /* The only way this should occur is if the field spans word
 803              boundaries.  */
 804           store_split_bit_field (op0, bitsize, bitpos + offset * BITS_PER_UNIT,
 805                                  value);
 806           return;
 807         }
 808
 809       total_bits = GET_MODE_BITSIZE (mode);
 810
 811       /* Make sure bitpos is valid for the chosen mode.  Adjust BITPOS to
 812          be in the range 0 to total_bits-1, and put any excess bytes in
 813          OFFSET.  */
 814       if (bitpos >= total_bits)
 815         {
 816           offset += (bitpos / total_bits) * (total_bits / BITS_PER_UNIT);
 817           bitpos -= ((bitpos / total_bits) * (total_bits / BITS_PER_UNIT)
 818                      * BITS_PER_UNIT);
 819         }
 820
 821       /* Get ref to an aligned byte, halfword, or word containing the field.
 822          Adjust BITPOS to be position within a word,
 823          and OFFSET to be the offset of that word.
 824          Then alter OP0 to refer to that word.  */
 825       bitpos += (offset % (total_bits / BITS_PER_UNIT)) * BITS_PER_UNIT;
 826       offset -= (offset % (total_bits / BITS_PER_UNIT));
 827       op0 = adjust_address (op0, mode, offset);
 828     }
 829
 830   mode = GET_MODE (op0);
 831
 832   /* Now MODE is either some integral mode for a MEM as OP0,
 833      or is a full-word for a REG as OP0.  TOTAL_BITS corresponds.
 834      The bit field is contained entirely within OP0.
 835      BITPOS is the starting bit number within OP0.
 836      (OP0's mode may actually be narrower than MODE.)  */
 837
 838   if (BYTES_BIG_ENDIAN)
 839       /* BITPOS is the distance between our msb
 840          and that of the containing datum.
 841          Convert it to the distance from the lsb.  */
 842       bitpos = total_bits - bitsize - bitpos;
 843
 844   /* Now BITPOS is always the distance between our lsb
 845      and that of OP0.  */
 846
 847   /* Shift VALUE left by BITPOS bits.  If VALUE is not constant,
 848      we must first convert its mode to MODE.  */
 849
 850   if (GET_CODE (value) == CONST_INT)
 851     {
 852       HOST_WIDE_INT v = INTVAL (value);
 853
 854       if (bitsize < HOST_BITS_PER_WIDE_INT)
 855         v &= ((HOST_WIDE_INT) 1 << bitsize) - 1;
 856
 857       if (v == 0)
 858         all_zero = 1;
 859       else if ((bitsize < HOST_BITS_PER_WIDE_INT
 860                 && v == ((HOST_WIDE_INT) 1 << bitsize) - 1)
 861                || (bitsize == HOST_BITS_PER_WIDE_INT && v == -1))
 862         all_one = 1;
 863
 864       value = lshift_value (mode, value, bitpos, bitsize);
 865     }
 866   else
 867     {
 868       int must_and = (GET_MODE_BITSIZE (GET_MODE (value)) != bitsize
 869                       && bitpos + bitsize != GET_MODE_BITSIZE (mode));
 870
 871       if (GET_MODE (value) != mode)
 872         {
 873           if ((REG_P (value) || GET_CODE (value) == SUBREG)
 874               && GET_MODE_SIZE (mode) < GET_MODE_SIZE (GET_MODE (value)))
 875             value = gen_lowpart (mode, value);
 876           else
 877             value = convert_to_mode (mode, value, 1);
 878         }
 879
 880       if (must_and)
 881         value = expand_binop (mode, and_optab, value,
 882                               mask_rtx (mode, 0, bitsize, 0),
 883                               NULL_RTX, 1, OPTAB_LIB_WIDEN);
 884       if (bitpos > 0)
 885         value = expand_shift (LSHIFT_EXPR, mode, value,
 886                               build_int_cst (NULL_TREE, bitpos), NULL_RTX, 1);
 887     }
 888
 889   /* Now clear the chosen bits in OP0,
 890      except that if VALUE is -1 we need not bother.  */
 891
 892   subtarget = op0;
 893
 894   if (! all_one)
 895     {
 896       temp = expand_binop (mode, and_optab, op0,
 897                            mask_rtx (mode, bitpos, bitsize, 1),
 898                            subtarget, 1, OPTAB_LIB_WIDEN);
 899       subtarget = temp;
 900     }
 901   else
 902     temp = op0;
 903
 904   /* Now logical-or VALUE into OP0, unless it is zero.  */
 905
 906   if (! all_zero)
 907     temp = expand_binop (mode, ior_optab, temp, value,
 908                          subtarget, 1, OPTAB_LIB_WIDEN);
 909   if (op0 != temp)
 910     emit_move_insn (op0, temp);
 911 }
 912 \f
 913 /* Store a bit field that is split across multiple accessible memory objects.
 914
 915    OP0 is the REG, SUBREG or MEM rtx for the first of the objects.
 916    BITSIZE is the field width; BITPOS the position of its first bit
 917    (within the word).
 918    VALUE is the value to store.
 919
 920    This does not yet handle fields wider than BITS_PER_WORD.  */
 921
 922 static void
 923 store_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
 924                        unsigned HOST_WIDE_INT bitpos, rtx value)
 925 {
 926   unsigned int unit;
 927   unsigned int bitsdone = 0;
 928
 929   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
 930      much at a time.  */
 931   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
 932     unit = BITS_PER_WORD;
 933   else
 934     unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
 935
 936   /* If VALUE is a constant other than a CONST_INT, get it into a register in
 937      WORD_MODE.  If we can do this using gen_lowpart_common, do so.  Note
 938      that VALUE might be a floating-point constant.  */
 939   if (CONSTANT_P (value) && GET_CODE (value) != CONST_INT)
 940     {
 941       rtx word = gen_lowpart_common (word_mode, value);
 942
 943       if (word && (value != word))
 944         value = word;
 945       else
 946         value = gen_lowpart_common (word_mode,
 947                                     force_reg (GET_MODE (value) != VOIDmode
 948                                                ? GET_MODE (value)
 949                                                : word_mode, value));
 950     }
 951
 952   while (bitsdone < bitsize)
 953     {
 954       unsigned HOST_WIDE_INT thissize;
 955       rtx part, word;
 956       unsigned HOST_WIDE_INT thispos;
 957       unsigned HOST_WIDE_INT offset;
 958
 959       offset = (bitpos + bitsdone) / unit;
 960       thispos = (bitpos + bitsdone) % unit;
 961
 962       /* THISSIZE must not overrun a word boundary.  Otherwise,
 963          store_fixed_bit_field will call us again, and we will mutually
 964          recurse forever.  */
 965       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
 966       thissize = MIN (thissize, unit - thispos);
 967
 968       if (BYTES_BIG_ENDIAN)
 969         {
 970           int total_bits;
 971
 972           /* We must do an endian conversion exactly the same way as it is
 973              done in extract_bit_field, so that the two calls to
 974              extract_fixed_bit_field will have comparable arguments.  */
 975           if (!MEM_P (value) || GET_MODE (value) == BLKmode)
 976             total_bits = BITS_PER_WORD;
 977           else
 978             total_bits = GET_MODE_BITSIZE (GET_MODE (value));
 979
 980           /* Fetch successively less significant portions.  */
 981           if (GET_CODE (value) == CONST_INT)
 982             part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
 983                              >> (bitsize - bitsdone - thissize))
 984                             & (((HOST_WIDE_INT) 1 << thissize) - 1));
 985           else
 986             /* The args are chosen so that the last part includes the
 987                lsb.  Give extract_bit_field the value it needs (with
 988                endianness compensation) to fetch the piece we want.  */
 989             part = extract_fixed_bit_field (word_mode, value, 0, thissize,
 990                                             total_bits - bitsize + bitsdone,
 991                                             NULL_RTX, 1);
 992         }
 993       else
 994         {
 995           /* Fetch successively more significant portions.  */
 996           if (GET_CODE (value) == CONST_INT)
 997             part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
 998                              >> bitsdone)
 999                             & (((HOST_WIDE_INT) 1 << thissize) - 1));
1000           else
1001             part = extract_fixed_bit_field (word_mode, value, 0, thissize,
1002                                             bitsdone, NULL_RTX, 1);
1003         }
1004
1005       /* If OP0 is a register, then handle OFFSET here.
1006
1007          When handling multiword bitfields, extract_bit_field may pass
1008          down a word_mode SUBREG of a larger REG for a bitfield that actually
1009          crosses a word boundary.  Thus, for a SUBREG, we must find
1010          the current word starting from the base register.  */
1011       if (GET_CODE (op0) == SUBREG)
1012         {
1013           int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset;
1014           word = operand_subword_force (SUBREG_REG (op0), word_offset,
1015                                         GET_MODE (SUBREG_REG (op0)));
1016           offset = 0;
1017         }
1018       else if (REG_P (op0))
1019         {
1020           word = operand_subword_force (op0, offset, GET_MODE (op0));
1021           offset = 0;
1022         }
1023       else
1024         word = op0;
1025
1026       /* OFFSET is in UNITs, and UNIT is in bits.
1027          store_fixed_bit_field wants offset in bytes.  */
1028       store_fixed_bit_field (word, offset * unit / BITS_PER_UNIT, thissize,
1029                              thispos, part);
1030       bitsdone += thissize;
1031     }
1032 }
1033 \f
1034 /* Generate code to extract a byte-field from STR_RTX
1035    containing BITSIZE bits, starting at BITNUM,
1036    and put it in TARGET if possible (if TARGET is nonzero).
1037    Regardless of TARGET, we return the rtx for where the value is placed.
1038
1039    STR_RTX is the structure containing the byte (a REG or MEM).
1040    UNSIGNEDP is nonzero if this is an unsigned bit field.
1041    MODE is the natural mode of the field value once extracted.
1042    TMODE is the mode the caller would like the value to have;
1043    but the value may be returned with type MODE instead.
1044
1045    TOTAL_SIZE is the size in bytes of the containing structure,
1046    or -1 if varying.
1047
1048    If a TARGET is specified and we can store in it at no extra cost,
1049    we do so, and return TARGET.
1050    Otherwise, we return a REG of mode TMODE or MODE, with TMODE preferred
1051    if they are equally easy.  */
1052
1053 rtx
1054 extract_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1055                    unsigned HOST_WIDE_INT bitnum, int unsignedp, rtx target,
1056                    enum machine_mode mode, enum machine_mode tmode)
1057 {
1058   unsigned int unit
1059     = (MEM_P (str_rtx)) ? BITS_PER_UNIT : BITS_PER_WORD;
1060   unsigned HOST_WIDE_INT offset, bitpos;
1061   rtx op0 = str_rtx;
1062   rtx spec_target = target;
1063   rtx spec_target_subreg = 0;
1064   enum machine_mode int_mode;
1065   enum machine_mode extv_mode = mode_for_extraction (EP_extv, 0);
1066   enum machine_mode extzv_mode = mode_for_extraction (EP_extzv, 0);
1067   enum machine_mode mode1;
1068   int byte_offset;
1069
1070   if (tmode == VOIDmode)
1071     tmode = mode;
1072
1073   while (GET_CODE (op0) == SUBREG)
1074     {
1075       bitnum += SUBREG_BYTE (op0) * BITS_PER_UNIT;
1076       op0 = SUBREG_REG (op0);
1077     }
1078
1079   /* If we have an out-of-bounds access to a register, just return an
1080      uninitialized register of the required mode.  This can occur if the
1081      source code contains an out-of-bounds access to a small array.  */
1082   if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
1083     return gen_reg_rtx (tmode);
1084
1085   if (REG_P (op0)
1086       && mode == GET_MODE (op0)
1087       && bitnum == 0
1088       && bitsize == GET_MODE_BITSIZE (GET_MODE (op0)))
1089     {
1090       /* We're trying to extract a full register from itself.  */
1091       return op0;
1092     }
1093
1094   /* Use vec_extract patterns for extracting parts of vectors whenever
1095      available.  */
1096   if (VECTOR_MODE_P (GET_MODE (op0))
1097       && !MEM_P (op0)
1098       && (vec_extract_optab->handlers[GET_MODE (op0)].insn_code
1099           != CODE_FOR_nothing)
1100       && ((bitnum + bitsize - 1) / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
1101           == bitnum / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
1102     {
1103       enum machine_mode outermode = GET_MODE (op0);
1104       enum machine_mode innermode = GET_MODE_INNER (outermode);
1105       int icode = (int) vec_extract_optab->handlers[outermode].insn_code;
1106       unsigned HOST_WIDE_INT pos = bitnum / GET_MODE_BITSIZE (innermode);
1107       rtx rtxpos = GEN_INT (pos);
1108       rtx src = op0;
1109       rtx dest = NULL, pat, seq;
1110       enum machine_mode mode0 = insn_data[icode].operand[0].mode;
1111       enum machine_mode mode1 = insn_data[icode].operand[1].mode;
1112       enum machine_mode mode2 = insn_data[icode].operand[2].mode;
1113
1114       if (innermode == tmode || innermode == mode)
1115         dest = target;
1116
1117       if (!dest)
1118         dest = gen_reg_rtx (innermode);
1119
1120       start_sequence ();
1121
1122       if (! (*insn_data[icode].operand[0].predicate) (dest, mode0))
1123         dest = copy_to_mode_reg (mode0, dest);
1124
1125       if (! (*insn_data[icode].operand[1].predicate) (src, mode1))
1126         src = copy_to_mode_reg (mode1, src);
1127
1128       if (! (*insn_data[icode].operand[2].predicate) (rtxpos, mode2))
1129         rtxpos = copy_to_mode_reg (mode1, rtxpos);
1130
1131       /* We could handle this, but we should always be called with a pseudo
1132          for our targets and all insns should take them as outputs.  */
1133       gcc_assert ((*insn_data[icode].operand[0].predicate) (dest, mode0)
1134                   && (*insn_data[icode].operand[1].predicate) (src, mode1)
1135                   && (*insn_data[icode].operand[2].predicate) (rtxpos, mode2));
1136
1137       pat = GEN_FCN (icode) (dest, src, rtxpos);
1138       seq = get_insns ();
1139       end_sequence ();
1140       if (pat)
1141         {
1142           emit_insn (seq);
1143           emit_insn (pat);
1144           return dest;
1145         }
1146     }
1147
1148   /* Make sure we are playing with integral modes.  Pun with subregs
1149      if we aren't.  */
1150   {
1151     enum machine_mode imode = int_mode_for_mode (GET_MODE (op0));
1152     if (imode != GET_MODE (op0))
1153       {
1154         if (MEM_P (op0))
1155           op0 = adjust_address (op0, imode, 0);
1156         else
1157           {
1158             gcc_assert (imode != BLKmode);
1159             op0 = gen_lowpart (imode, op0);
1160
1161             /* If we got a SUBREG, force it into a register since we
1162                aren't going to be able to do another SUBREG on it.  */
1163             if (GET_CODE (op0) == SUBREG)
1164               op0 = force_reg (imode, op0);
1165           }
1166       }
1167   }
1168
1169   /* We may be accessing data outside the field, which means
1170      we can alias adjacent data.  */
1171   if (MEM_P (op0))
1172     {
1173       op0 = shallow_copy_rtx (op0);
1174       set_mem_alias_set (op0, 0);
1175       set_mem_expr (op0, 0);
1176     }
1177
1178   /* Extraction of a full-word or multi-word value from a structure
1179      in a register or aligned memory can be done with just a SUBREG.
1180      A subword value in the least significant part of a register
1181      can also be extracted with a SUBREG.  For this, we need the
1182      byte offset of the value in op0.  */
1183
1184   bitpos = bitnum % unit;
1185   offset = bitnum / unit;
1186   byte_offset = bitpos / BITS_PER_UNIT + offset * UNITS_PER_WORD;
1187
1188   /* If OP0 is a register, BITPOS must count within a word.
1189      But as we have it, it counts within whatever size OP0 now has.
1190      On a bigendian machine, these are not the same, so convert.  */
1191   if (BYTES_BIG_ENDIAN
1192       && !MEM_P (op0)
1193       && unit > GET_MODE_BITSIZE (GET_MODE (op0)))
1194     bitpos += unit - GET_MODE_BITSIZE (GET_MODE (op0));
1195
1196   /* ??? We currently assume TARGET is at least as big as BITSIZE.
1197      If that's wrong, the solution is to test for it and set TARGET to 0
1198      if needed.  */
1199
1200   /* Only scalar integer modes can be converted via subregs.  There is an
1201      additional problem for FP modes here in that they can have a precision
1202      which is different from the size.  mode_for_size uses precision, but
1203      we want a mode based on the size, so we must avoid calling it for FP
1204      modes.  */
1205   mode1  = (SCALAR_INT_MODE_P (tmode)
1206             ? mode_for_size (bitsize, GET_MODE_CLASS (tmode), 0)
1207             : mode);
1208
1209   if (((bitsize >= BITS_PER_WORD && bitsize == GET_MODE_BITSIZE (mode)
1210         && bitpos % BITS_PER_WORD == 0)
1211        || (mode1 != BLKmode
1212            /* ??? The big endian test here is wrong.  This is correct
1213               if the value is in a register, and if mode_for_size is not
1214               the same mode as op0.  This causes us to get unnecessarily
1215               inefficient code from the Thumb port when -mbig-endian.  */
1216            && (BYTES_BIG_ENDIAN
1217                ? bitpos + bitsize == BITS_PER_WORD
1218                : bitpos == 0)))
1219       && ((!MEM_P (op0)
1220            && TRULY_NOOP_TRUNCATION (GET_MODE_BITSIZE (mode),
1221                                      GET_MODE_BITSIZE (GET_MODE (op0)))
1222            && GET_MODE_SIZE (mode1) != 0
1223            && byte_offset % GET_MODE_SIZE (mode1) == 0)
1224           || (MEM_P (op0)
1225               && (! SLOW_UNALIGNED_ACCESS (mode, MEM_ALIGN (op0))
1226                   || (offset * BITS_PER_UNIT % bitsize == 0
1227                       && MEM_ALIGN (op0) % bitsize == 0)))))
1228     {
1229       if (mode1 != GET_MODE (op0))
1230         {
1231           if (MEM_P (op0))
1232             op0 = adjust_address (op0, mode1, offset);
1233           else
1234             {
1235               rtx sub = simplify_gen_subreg (mode1, op0, GET_MODE (op0),
1236                                              byte_offset);
1237               if (sub == NULL)
1238                 goto no_subreg_mode_swap;
1239               op0 = sub;
1240             }
1241         }
1242       if (mode1 != mode)
1243         return convert_to_mode (tmode, op0, unsignedp);
1244       return op0;
1245     }
1246  no_subreg_mode_swap:
1247
1248   /* Handle fields bigger than a word.  */
1249
1250   if (bitsize > BITS_PER_WORD)
1251     {
1252       /* Here we transfer the words of the field
1253          in the order least significant first.
1254          This is because the most significant word is the one which may
1255          be less than full.  */
1256
1257       unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
1258       unsigned int i;
1259
1260       if (target == 0 || !REG_P (target))
1261         target = gen_reg_rtx (mode);
1262
1263       /* Indicate for flow that the entire target reg is being set.  */
1264       emit_insn (gen_rtx_CLOBBER (VOIDmode, target));
1265
1266       for (i = 0; i < nwords; i++)
1267         {
1268           /* If I is 0, use the low-order word in both field and target;
1269              if I is 1, use the next to lowest word; and so on.  */
1270           /* Word number in TARGET to use.  */
1271           unsigned int wordnum
1272             = (WORDS_BIG_ENDIAN
1273                ? GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD - i - 1
1274                : i);
1275           /* Offset from start of field in OP0.  */
1276           unsigned int bit_offset = (WORDS_BIG_ENDIAN
1277                                      ? MAX (0, ((int) bitsize - ((int) i + 1)
1278                                                 * (int) BITS_PER_WORD))
1279                                      : (int) i * BITS_PER_WORD);
1280           rtx target_part = operand_subword (target, wordnum, 1, VOIDmode);
1281           rtx result_part
1282             = extract_bit_field (op0, MIN (BITS_PER_WORD,
1283                                            bitsize - i * BITS_PER_WORD),
1284                                  bitnum + bit_offset, 1, target_part, mode,
1285                                  word_mode);
1286
1287           gcc_assert (target_part);
1288
1289           if (result_part != target_part)
1290             emit_move_insn (target_part, result_part);
1291         }
1292
1293       if (unsignedp)
1294         {
1295           /* Unless we've filled TARGET, the upper regs in a multi-reg value
1296              need to be zero'd out.  */
1297           if (GET_MODE_SIZE (GET_MODE (target)) > nwords * UNITS_PER_WORD)
1298             {
1299               unsigned int i, total_words;
1300
1301               total_words = GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD;
1302               for (i = nwords; i < total_words; i++)
1303                 emit_move_insn
1304                   (operand_subword (target,
1305                                     WORDS_BIG_ENDIAN ? total_words - i - 1 : i,
1306                                     1, VOIDmode),
1307                    const0_rtx);
1308             }
1309           return target;
1310         }
1311
1312       /* Signed bit field: sign-extend with two arithmetic shifts.  */
1313       target = expand_shift (LSHIFT_EXPR, mode, target,
1314                              build_int_cst (NULL_TREE,
1315                                             GET_MODE_BITSIZE (mode) - bitsize),
1316                              NULL_RTX, 0);
1317       return expand_shift (RSHIFT_EXPR, mode, target,
1318                            build_int_cst (NULL_TREE,
1319                                           GET_MODE_BITSIZE (mode) - bitsize),
1320                            NULL_RTX, 0);
1321     }
1322
1323   /* From here on we know the desired field is smaller than a word.  */
1324
1325   /* Check if there is a correspondingly-sized integer field, so we can
1326      safely extract it as one size of integer, if necessary; then
1327      truncate or extend to the size that is wanted; then use SUBREGs or
1328      convert_to_mode to get one of the modes we really wanted.  */
1329
1330   int_mode = int_mode_for_mode (tmode);
1331   if (int_mode == BLKmode)
1332     int_mode = int_mode_for_mode (mode);
1333   /* Should probably push op0 out to memory and then do a load.  */
1334   gcc_assert (int_mode != BLKmode);
1335
1336   /* OFFSET is the number of words or bytes (UNIT says which)
1337      from STR_RTX to the first word or byte containing part of the field.  */
1338   if (!MEM_P (op0))
1339     {
1340       if (offset != 0
1341           || GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
1342         {
1343           if (!REG_P (op0))
1344             op0 = copy_to_reg (op0);
1345           op0 = gen_rtx_SUBREG (mode_for_size (BITS_PER_WORD, MODE_INT, 0),
1346                                 op0, (offset * UNITS_PER_WORD));
1347         }
1348       offset = 0;
1349     }
1350
1351   /* Now OFFSET is nonzero only for memory operands.  */
1352
1353   if (unsignedp)
1354     {
1355       if (HAVE_extzv
1356           && bitsize > 0
1357           && GET_MODE_BITSIZE (extzv_mode) >= bitsize
1358           && ! ((REG_P (op0) || GET_CODE (op0) == SUBREG)
1359                 && (bitsize + bitpos > GET_MODE_BITSIZE (extzv_mode))))
1360         {
1361           unsigned HOST_WIDE_INT xbitpos = bitpos, xoffset = offset;
1362           rtx bitsize_rtx, bitpos_rtx;
1363           rtx last = get_last_insn ();
1364           rtx xop0 = op0;
1365           rtx xtarget = target;
1366           rtx xspec_target = spec_target;
1367           rtx xspec_target_subreg = spec_target_subreg;
1368           rtx pat;
1369           enum machine_mode maxmode = mode_for_extraction (EP_extzv, 0);
1370
1371           if (MEM_P (xop0))
1372             {
1373               int save_volatile_ok = volatile_ok;
1374               volatile_ok = 1;
1375
1376               /* Is the memory operand acceptable?  */
1377               if (! ((*insn_data[(int) CODE_FOR_extzv].operand[1].predicate)
1378                      (xop0, GET_MODE (xop0))))
1379                 {
1380                   /* No, load into a reg and extract from there.  */
1381                   enum machine_mode bestmode;
1382
1383                   /* Get the mode to use for inserting into this field.  If
1384                      OP0 is BLKmode, get the smallest mode consistent with the
1385                      alignment. If OP0 is a non-BLKmode object that is no
1386                      wider than MAXMODE, use its mode. Otherwise, use the
1387                      smallest mode containing the field.  */
1388
1389                   if (GET_MODE (xop0) == BLKmode
1390                       || (GET_MODE_SIZE (GET_MODE (op0))
1391                           > GET_MODE_SIZE (maxmode)))
1392                     bestmode = get_best_mode (bitsize, bitnum,
1393                                               MEM_ALIGN (xop0), maxmode,
1394                                               MEM_VOLATILE_P (xop0));
1395                   else
1396                     bestmode = GET_MODE (xop0);
1397
1398                   if (bestmode == VOIDmode
1399                       || (SLOW_UNALIGNED_ACCESS (bestmode, MEM_ALIGN (xop0))
1400                           && GET_MODE_BITSIZE (bestmode) > MEM_ALIGN (xop0)))
1401                     goto extzv_loses;
1402
1403                   /* Compute offset as multiple of this unit,
1404                      counting in bytes.  */
1405                   unit = GET_MODE_BITSIZE (bestmode);
1406                   xoffset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
1407                   xbitpos = bitnum % unit;
1408                   xop0 = adjust_address (xop0, bestmode, xoffset);
1409
1410                   /* Make sure register is big enough for the whole field. */
1411                   if (xoffset * BITS_PER_UNIT + unit
1412                       < offset * BITS_PER_UNIT + bitsize)
1413                     goto extzv_loses;
1414
1415                   /* Fetch it to a register in that size.  */
1416                   xop0 = force_reg (bestmode, xop0);
1417
1418                   /* XBITPOS counts within UNIT, which is what is expected.  */
1419                 }
1420               else
1421                 /* Get ref to first byte containing part of the field.  */
1422                 xop0 = adjust_address (xop0, byte_mode, xoffset);
1423
1424               volatile_ok = save_volatile_ok;
1425             }
1426
1427           /* If op0 is a register, we need it in MAXMODE (which is usually
1428              SImode). to make it acceptable to the format of extzv.  */
1429           if (GET_CODE (xop0) == SUBREG && GET_MODE (xop0) != maxmode)
1430             goto extzv_loses;
1431           if (REG_P (xop0) && GET_MODE (xop0) != maxmode)
1432             xop0 = gen_rtx_SUBREG (maxmode, xop0, 0);
1433
1434           /* On big-endian machines, we count bits from the most significant.
1435              If the bit field insn does not, we must invert.  */
1436           if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1437             xbitpos = unit - bitsize - xbitpos;
1438
1439           /* Now convert from counting within UNIT to counting in MAXMODE.  */
1440           if (BITS_BIG_ENDIAN && !MEM_P (xop0))
1441             xbitpos += GET_MODE_BITSIZE (maxmode) - unit;
1442
1443           unit = GET_MODE_BITSIZE (maxmode);
1444
1445           if (xtarget == 0)
1446             xtarget = xspec_target = gen_reg_rtx (tmode);
1447
1448           if (GET_MODE (xtarget) != maxmode)
1449             {
1450               if (REG_P (xtarget))
1451                 {
1452                   int wider = (GET_MODE_SIZE (maxmode)
1453                                > GET_MODE_SIZE (GET_MODE (xtarget)));
1454                   xtarget = gen_lowpart (maxmode, xtarget);
1455                   if (wider)
1456                     xspec_target_subreg = xtarget;
1457                 }
1458               else
1459                 xtarget = gen_reg_rtx (maxmode);
1460             }
1461
1462           /* If this machine's extzv insists on a register target,
1463              make sure we have one.  */
1464           if (! ((*insn_data[(int) CODE_FOR_extzv].operand[0].predicate)
1465                  (xtarget, maxmode)))
1466             xtarget = gen_reg_rtx (maxmode);
1467
1468           bitsize_rtx = GEN_INT (bitsize);
1469           bitpos_rtx = GEN_INT (xbitpos);
1470
1471           pat = gen_extzv (xtarget, xop0, bitsize_rtx, bitpos_rtx);
1472           if (pat)
1473             {
1474               emit_insn (pat);
1475               target = xtarget;
1476               spec_target = xspec_target;
1477               spec_target_subreg = xspec_target_subreg;
1478             }
1479           else
1480             {
1481               delete_insns_since (last);
1482               target = extract_fixed_bit_field (int_mode, op0, offset, bitsize,
1483                                                 bitpos, target, 1);
1484             }
1485         }
1486       else
1487       extzv_loses:
1488         target = extract_fixed_bit_field (int_mode, op0, offset, bitsize,
1489                                           bitpos, target, 1);
1490     }
1491   else
1492     {
1493       if (HAVE_extv
1494           && bitsize > 0
1495           && GET_MODE_BITSIZE (extv_mode) >= bitsize
1496           && ! ((REG_P (op0) || GET_CODE (op0) == SUBREG)
1497                 && (bitsize + bitpos > GET_MODE_BITSIZE (extv_mode))))
1498         {
1499           int xbitpos = bitpos, xoffset = offset;
1500           rtx bitsize_rtx, bitpos_rtx;
1501           rtx last = get_last_insn ();
1502           rtx xop0 = op0, xtarget = target;
1503           rtx xspec_target = spec_target;
1504           rtx xspec_target_subreg = spec_target_subreg;
1505           rtx pat;
1506           enum machine_mode maxmode = mode_for_extraction (EP_extv, 0);
1507
1508           if (MEM_P (xop0))
1509             {
1510               /* Is the memory operand acceptable?  */
1511               if (! ((*insn_data[(int) CODE_FOR_extv].operand[1].predicate)
1512                      (xop0, GET_MODE (xop0))))
1513                 {
1514                   /* No, load into a reg and extract from there.  */
1515                   enum machine_mode bestmode;
1516
1517                   /* Get the mode to use for inserting into this field.  If
1518                      OP0 is BLKmode, get the smallest mode consistent with the
1519                      alignment. If OP0 is a non-BLKmode object that is no
1520                      wider than MAXMODE, use its mode. Otherwise, use the
1521                      smallest mode containing the field.  */
1522
1523                   if (GET_MODE (xop0) == BLKmode
1524                       || (GET_MODE_SIZE (GET_MODE (op0))
1525                           > GET_MODE_SIZE (maxmode)))
1526                     bestmode = get_best_mode (bitsize, bitnum,
1527                                               MEM_ALIGN (xop0), maxmode,
1528                                               MEM_VOLATILE_P (xop0));
1529                   else
1530                     bestmode = GET_MODE (xop0);
1531
1532                   if (bestmode == VOIDmode
1533                       || (SLOW_UNALIGNED_ACCESS (bestmode, MEM_ALIGN (xop0))
1534                           && GET_MODE_BITSIZE (bestmode) > MEM_ALIGN (xop0)))
1535                     goto extv_loses;
1536
1537                   /* Compute offset as multiple of this unit,
1538                      counting in bytes.  */
1539                   unit = GET_MODE_BITSIZE (bestmode);
1540                   xoffset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
1541                   xbitpos = bitnum % unit;
1542                   xop0 = adjust_address (xop0, bestmode, xoffset);
1543
1544                   /* Make sure register is big enough for the whole field. */
1545                   if (xoffset * BITS_PER_UNIT + unit
1546                       < offset * BITS_PER_UNIT + bitsize)
1547                     goto extv_loses;
1548
1549                   /* Fetch it to a register in that size.  */
1550                   xop0 = force_reg (bestmode, xop0);
1551
1552                   /* XBITPOS counts within UNIT, which is what is expected.  */
1553                 }
1554               else
1555                 /* Get ref to first byte containing part of the field.  */
1556                 xop0 = adjust_address (xop0, byte_mode, xoffset);
1557             }
1558
1559           /* If op0 is a register, we need it in MAXMODE (which is usually
1560              SImode) to make it acceptable to the format of extv.  */
1561           if (GET_CODE (xop0) == SUBREG && GET_MODE (xop0) != maxmode)
1562             goto extv_loses;
1563           if (REG_P (xop0) && GET_MODE (xop0) != maxmode)
1564             xop0 = gen_rtx_SUBREG (maxmode, xop0, 0);
1565
1566           /* On big-endian machines, we count bits from the most significant.
1567              If the bit field insn does not, we must invert.  */
1568           if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1569             xbitpos = unit - bitsize - xbitpos;
1570
1571           /* XBITPOS counts within a size of UNIT.
1572              Adjust to count within a size of MAXMODE.  */
1573           if (BITS_BIG_ENDIAN && !MEM_P (xop0))
1574             xbitpos += (GET_MODE_BITSIZE (maxmode) - unit);
1575
1576           unit = GET_MODE_BITSIZE (maxmode);
1577
1578           if (xtarget == 0)
1579             xtarget = xspec_target = gen_reg_rtx (tmode);
1580
1581           if (GET_MODE (xtarget) != maxmode)
1582             {
1583               if (REG_P (xtarget))
1584                 {
1585                   int wider = (GET_MODE_SIZE (maxmode)
1586                                > GET_MODE_SIZE (GET_MODE (xtarget)));
1587                   xtarget = gen_lowpart (maxmode, xtarget);
1588                   if (wider)
1589                     xspec_target_subreg = xtarget;
1590                 }
1591               else
1592                 xtarget = gen_reg_rtx (maxmode);
1593             }
1594
1595           /* If this machine's extv insists on a register target,
1596              make sure we have one.  */
1597           if (! ((*insn_data[(int) CODE_FOR_extv].operand[0].predicate)
1598                  (xtarget, maxmode)))
1599             xtarget = gen_reg_rtx (maxmode);
1600
1601           bitsize_rtx = GEN_INT (bitsize);
1602           bitpos_rtx = GEN_INT (xbitpos);
1603
1604           pat = gen_extv (xtarget, xop0, bitsize_rtx, bitpos_rtx);
1605           if (pat)
1606             {
1607               emit_insn (pat);
1608               target = xtarget;
1609               spec_target = xspec_target;
1610               spec_target_subreg = xspec_target_subreg;
1611             }
1612           else
1613             {
1614               delete_insns_since (last);
1615               target = extract_fixed_bit_field (int_mode, op0, offset, bitsize,
1616                                                 bitpos, target, 0);
1617             }
1618         }
1619       else
1620       extv_loses:
1621         target = extract_fixed_bit_field (int_mode, op0, offset, bitsize,
1622                                           bitpos, target, 0);
1623     }
1624   if (target == spec_target)
1625     return target;
1626   if (target == spec_target_subreg)
1627     return spec_target;
1628   if (GET_MODE (target) != tmode && GET_MODE (target) != mode)
1629     {
1630       /* If the target mode is not a scalar integral, first convert to the
1631          integer mode of that size and then access it as a floating-point
1632          value via a SUBREG.  */
1633       if (!SCALAR_INT_MODE_P (tmode))
1634         {
1635           enum machine_mode smode
1636             = mode_for_size (GET_MODE_BITSIZE (tmode), MODE_INT, 0);
1637           target = convert_to_mode (smode, target, unsignedp);
1638           target = force_reg (smode, target);
1639           return gen_lowpart (tmode, target);
1640         }
1641
1642       return convert_to_mode (tmode, target, unsignedp);
1643     }
1644   return target;
1645 }
1646 \f
1647 /* Extract a bit field using shifts and boolean operations
1648    Returns an rtx to represent the value.
1649    OP0 addresses a register (word) or memory (byte).
1650    BITPOS says which bit within the word or byte the bit field starts in.
1651    OFFSET says how many bytes farther the bit field starts;
1652     it is 0 if OP0 is a register.
1653    BITSIZE says how many bits long the bit field is.
1654     (If OP0 is a register, it may be narrower than a full word,
1655      but BITPOS still counts within a full word,
1656      which is significant on bigendian machines.)
1657
1658    UNSIGNEDP is nonzero for an unsigned bit field (don't sign-extend value).
1659    If TARGET is nonzero, attempts to store the value there
1660    and return TARGET, but this is not guaranteed.
1661    If TARGET is not used, create a pseudo-reg of mode TMODE for the value.  */
1662
1663 static rtx
1664 extract_fixed_bit_field (enum machine_mode tmode, rtx op0,
1665                          unsigned HOST_WIDE_INT offset,
1666                          unsigned HOST_WIDE_INT bitsize,
1667                          unsigned HOST_WIDE_INT bitpos, rtx target,
1668                          int unsignedp)
1669 {
1670   unsigned int total_bits = BITS_PER_WORD;
1671   enum machine_mode mode;
1672
1673   if (GET_CODE (op0) == SUBREG || REG_P (op0))
1674     {
1675       /* Special treatment for a bit field split across two registers.  */
1676       if (bitsize + bitpos > BITS_PER_WORD)
1677         return extract_split_bit_field (op0, bitsize, bitpos, unsignedp);
1678     }
1679   else
1680     {
1681       /* Get the proper mode to use for this field.  We want a mode that
1682          includes the entire field.  If such a mode would be larger than
1683          a word, we won't be doing the extraction the normal way.  */
1684
1685       mode = get_best_mode (bitsize, bitpos + offset * BITS_PER_UNIT,
1686                             MEM_ALIGN (op0), word_mode, MEM_VOLATILE_P (op0));
1687
1688       if (mode == VOIDmode)
1689         /* The only way this should occur is if the field spans word
1690            boundaries.  */
1691         return extract_split_bit_field (op0, bitsize,
1692                                         bitpos + offset * BITS_PER_UNIT,
1693                                         unsignedp);
1694
1695       total_bits = GET_MODE_BITSIZE (mode);
1696
1697       /* Make sure bitpos is valid for the chosen mode.  Adjust BITPOS to
1698          be in the range 0 to total_bits-1, and put any excess bytes in
1699          OFFSET.  */
1700       if (bitpos >= total_bits)
1701         {
1702           offset += (bitpos / total_bits) * (total_bits / BITS_PER_UNIT);
1703           bitpos -= ((bitpos / total_bits) * (total_bits / BITS_PER_UNIT)
1704                      * BITS_PER_UNIT);
1705         }
1706
1707       /* Get ref to an aligned byte, halfword, or word containing the field.
1708          Adjust BITPOS to be position within a word,
1709          and OFFSET to be the offset of that word.
1710          Then alter OP0 to refer to that word.  */
1711       bitpos += (offset % (total_bits / BITS_PER_UNIT)) * BITS_PER_UNIT;
1712       offset -= (offset % (total_bits / BITS_PER_UNIT));
1713       op0 = adjust_address (op0, mode, offset);
1714     }
1715
1716   mode = GET_MODE (op0);
1717
1718   if (BYTES_BIG_ENDIAN)
1719     /* BITPOS is the distance between our msb and that of OP0.
1720        Convert it to the distance from the lsb.  */
1721     bitpos = total_bits - bitsize - bitpos;
1722
1723   /* Now BITPOS is always the distance between the field's lsb and that of OP0.
1724      We have reduced the big-endian case to the little-endian case.  */
1725
1726   if (unsignedp)
1727     {
1728       if (bitpos)
1729         {
1730           /* If the field does not already start at the lsb,
1731              shift it so it does.  */
1732           tree amount = build_int_cst (NULL_TREE, bitpos);
1733           /* Maybe propagate the target for the shift.  */
1734           /* But not if we will return it--could confuse integrate.c.  */
1735           rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
1736           if (tmode != mode) subtarget = 0;
1737           op0 = expand_shift (RSHIFT_EXPR, mode, op0, amount, subtarget, 1);
1738         }
1739       /* Convert the value to the desired mode.  */
1740       if (mode != tmode)
1741         op0 = convert_to_mode (tmode, op0, 1);
1742
1743       /* Unless the msb of the field used to be the msb when we shifted,
1744          mask out the upper bits.  */
1745
1746       if (GET_MODE_BITSIZE (mode) != bitpos + bitsize)
1747         return expand_binop (GET_MODE (op0), and_optab, op0,
1748                              mask_rtx (GET_MODE (op0), 0, bitsize, 0),
1749                              target, 1, OPTAB_LIB_WIDEN);
1750       return op0;
1751     }
1752
1753   /* To extract a signed bit-field, first shift its msb to the msb of the word,
1754      then arithmetic-shift its lsb to the lsb of the word.  */
1755   op0 = force_reg (mode, op0);
1756   if (mode != tmode)
1757     target = 0;
1758
1759   /* Find the narrowest integer mode that contains the field.  */
1760
1761   for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT); mode != VOIDmode;
1762        mode = GET_MODE_WIDER_MODE (mode))
1763     if (GET_MODE_BITSIZE (mode) >= bitsize + bitpos)
1764       {
1765         op0 = convert_to_mode (mode, op0, 0);
1766         break;
1767       }
1768
1769   if (GET_MODE_BITSIZE (mode) != (bitsize + bitpos))
1770     {
1771       tree amount
1772         = build_int_cst (NULL_TREE,
1773                          GET_MODE_BITSIZE (mode) - (bitsize + bitpos));
1774       /* Maybe propagate the target for the shift.  */
1775       rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
1776       op0 = expand_shift (LSHIFT_EXPR, mode, op0, amount, subtarget, 1);
1777     }
1778
1779   return expand_shift (RSHIFT_EXPR, mode, op0,
1780                        build_int_cst (NULL_TREE,
1781                                       GET_MODE_BITSIZE (mode) - bitsize),
1782                        target, 0);
1783 }
1784 \f
1785 /* Return a constant integer (CONST_INT or CONST_DOUBLE) mask value
1786    of mode MODE with BITSIZE ones followed by BITPOS zeros, or the
1787    complement of that if COMPLEMENT.  The mask is truncated if
1788    necessary to the width of mode MODE.  The mask is zero-extended if
1789    BITSIZE+BITPOS is too small for MODE.  */
1790
1791 static rtx
1792 mask_rtx (enum machine_mode mode, int bitpos, int bitsize, int complement)
1793 {
1794   HOST_WIDE_INT masklow, maskhigh;
1795
1796   if (bitsize == 0)
1797     masklow = 0;
1798   else if (bitpos < HOST_BITS_PER_WIDE_INT)
1799     masklow = (HOST_WIDE_INT) -1 << bitpos;
1800   else
1801     masklow = 0;
1802
1803   if (bitpos + bitsize < HOST_BITS_PER_WIDE_INT)
1804     masklow &= ((unsigned HOST_WIDE_INT) -1
1805                 >> (HOST_BITS_PER_WIDE_INT - bitpos - bitsize));
1806
1807   if (bitpos <= HOST_BITS_PER_WIDE_INT)
1808     maskhigh = -1;
1809   else
1810     maskhigh = (HOST_WIDE_INT) -1 << (bitpos - HOST_BITS_PER_WIDE_INT);
1811
1812   if (bitsize == 0)
1813     maskhigh = 0;
1814   else if (bitpos + bitsize > HOST_BITS_PER_WIDE_INT)
1815     maskhigh &= ((unsigned HOST_WIDE_INT) -1
1816                  >> (2 * HOST_BITS_PER_WIDE_INT - bitpos - bitsize));
1817   else
1818     maskhigh = 0;
1819
1820   if (complement)
1821     {
1822       maskhigh = ~maskhigh;
1823       masklow = ~masklow;
1824     }
1825
1826   return immed_double_const (masklow, maskhigh, mode);
1827 }
1828
1829 /* Return a constant integer (CONST_INT or CONST_DOUBLE) rtx with the value
1830    VALUE truncated to BITSIZE bits and then shifted left BITPOS bits.  */
1831
1832 static rtx
1833 lshift_value (enum machine_mode mode, rtx value, int bitpos, int bitsize)
1834 {
1835   unsigned HOST_WIDE_INT v = INTVAL (value);
1836   HOST_WIDE_INT low, high;
1837
1838   if (bitsize < HOST_BITS_PER_WIDE_INT)
1839     v &= ~((HOST_WIDE_INT) -1 << bitsize);
1840
1841   if (bitpos < HOST_BITS_PER_WIDE_INT)
1842     {
1843       low = v << bitpos;
1844       high = (bitpos > 0 ? (v >> (HOST_BITS_PER_WIDE_INT - bitpos)) : 0);
1845     }
1846   else
1847     {
1848       low = 0;
1849       high = v << (bitpos - HOST_BITS_PER_WIDE_INT);
1850     }
1851
1852   return immed_double_const (low, high, mode);
1853 }
1854 \f
1855 /* Extract a bit field from a memory by forcing the alignment of the
1856    memory.  This efficient only if the field spans at least 4 boundaries.
1857
1858    OP0 is the MEM.
1859    BITSIZE is the field width; BITPOS is the position of the first bit.
1860    UNSIGNEDP is true if the result should be zero-extended.  */
1861
1862 static rtx
1863 extract_force_align_mem_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1864                                    unsigned HOST_WIDE_INT bitpos,
1865                                    int unsignedp)
1866 {
1867   enum machine_mode mode, dmode;
1868   unsigned int m_bitsize, m_size;
1869   unsigned int sign_shift_up, sign_shift_dn;
1870   rtx base, a1, a2, v1, v2, comb, shift, result, start;
1871
1872   /* Choose a mode that will fit BITSIZE.  */
1873   mode = smallest_mode_for_size (bitsize, MODE_INT);
1874   m_size = GET_MODE_SIZE (mode);
1875   m_bitsize = GET_MODE_BITSIZE (mode);
1876
1877   /* Choose a mode twice as wide.  Fail if no such mode exists.  */
1878   dmode = mode_for_size (m_bitsize * 2, MODE_INT, false);
1879   if (dmode == BLKmode)
1880     return NULL;
1881
1882   do_pending_stack_adjust ();
1883   start = get_last_insn ();
1884
1885   /* At the end, we'll need an additional shift to deal with sign/zero
1886      extension.  By default this will be a left+right shift of the
1887      appropriate size.  But we may be able to eliminate one of them.  */
1888   sign_shift_up = sign_shift_dn = m_bitsize - bitsize;
1889
1890   if (STRICT_ALIGNMENT)
1891     {
1892       base = plus_constant (XEXP (op0, 0), bitpos / BITS_PER_UNIT);
1893       bitpos %= BITS_PER_UNIT;
1894
1895       /* We load two values to be concatenate.  There's an edge condition
1896          that bears notice -- an aligned value at the end of a page can
1897          only load one value lest we segfault.  So the two values we load
1898          are at "base & -size" and "(base + size - 1) & -size".  If base
1899          is unaligned, the addresses will be aligned and sequential; if
1900          base is aligned, the addresses will both be equal to base.  */
1901
1902       a1 = expand_simple_binop (Pmode, AND, force_operand (base, NULL),
1903                                 GEN_INT (-(HOST_WIDE_INT)m_size),
1904                                 NULL, true, OPTAB_LIB_WIDEN);
1905       mark_reg_pointer (a1, m_bitsize);
1906       v1 = gen_rtx_MEM (mode, a1);
1907       set_mem_align (v1, m_bitsize);
1908       v1 = force_reg (mode, validize_mem (v1));
1909
1910       a2 = plus_constant (base, GET_MODE_SIZE (mode) - 1);
1911       a2 = expand_simple_binop (Pmode, AND, force_operand (a2, NULL),
1912                                 GEN_INT (-(HOST_WIDE_INT)m_size),
1913                                 NULL, true, OPTAB_LIB_WIDEN);
1914       v2 = gen_rtx_MEM (mode, a2);
1915       set_mem_align (v2, m_bitsize);
1916       v2 = force_reg (mode, validize_mem (v2));
1917
1918       /* Combine these two values into a double-word value.  */
1919       if (m_bitsize == BITS_PER_WORD)
1920         {
1921           comb = gen_reg_rtx (dmode);
1922           emit_insn (gen_rtx_CLOBBER (VOIDmode, comb));
1923           emit_move_insn (gen_rtx_SUBREG (mode, comb, 0), v1);
1924           emit_move_insn (gen_rtx_SUBREG (mode, comb, m_size), v2);
1925         }
1926       else
1927         {
1928           if (BYTES_BIG_ENDIAN)
1929             comb = v1, v1 = v2, v2 = comb;
1930           v1 = convert_modes (dmode, mode, v1, true);
1931           if (v1 == NULL)
1932             goto fail;
1933           v2 = convert_modes (dmode, mode, v2, true);
1934           v2 = expand_simple_binop (dmode, ASHIFT, v2, GEN_INT (m_bitsize),
1935                                     NULL, true, OPTAB_LIB_WIDEN);
1936           if (v2 == NULL)
1937             goto fail;
1938           comb = expand_simple_binop (dmode, IOR, v1, v2, NULL,
1939                                       true, OPTAB_LIB_WIDEN);
1940           if (comb == NULL)
1941             goto fail;
1942         }
1943
1944       shift = expand_simple_binop (Pmode, AND, base, GEN_INT (m_size - 1),
1945                                    NULL, true, OPTAB_LIB_WIDEN);
1946       shift = expand_mult (Pmode, shift, GEN_INT (BITS_PER_UNIT), NULL, 1);
1947
1948       if (bitpos != 0)
1949         {
1950           if (sign_shift_up <= bitpos)
1951             bitpos -= sign_shift_up, sign_shift_up = 0;
1952           shift = expand_simple_binop (Pmode, PLUS, shift, GEN_INT (bitpos),
1953                                        NULL, true, OPTAB_LIB_WIDEN);
1954         }
1955     }
1956   else
1957     {
1958       unsigned HOST_WIDE_INT offset = bitpos / BITS_PER_UNIT;
1959       bitpos %= BITS_PER_UNIT;
1960
1961       /* When strict alignment is not required, we can just load directly
1962          from memory without masking.  If the remaining BITPOS offset is
1963          small enough, we may be able to do all operations in MODE as
1964          opposed to DMODE.  */
1965       if (bitpos + bitsize <= m_bitsize)
1966         dmode = mode;
1967       comb = adjust_address (op0, dmode, offset);
1968
1969       if (sign_shift_up <= bitpos)
1970         bitpos -= sign_shift_up, sign_shift_up = 0;
1971       shift = GEN_INT (bitpos);
1972     }
1973
1974   /* Shift down the double-word such that the requested value is at bit 0.  */
1975   if (shift != const0_rtx)
1976     comb = expand_simple_binop (dmode, unsignedp ? LSHIFTRT : ASHIFTRT,
1977                                 comb, shift, NULL, unsignedp, OPTAB_LIB_WIDEN);
1978   if (comb == NULL)
1979     goto fail;
1980
1981   /* If the field exactly matches MODE, then all we need to do is return the
1982      lowpart.  Otherwise, shift to get the sign bits set properly.  */
1983   result = force_reg (mode, gen_lowpart (mode, comb));
1984
1985   if (sign_shift_up)
1986     result = expand_simple_binop (mode, ASHIFT, result,
1987                                   GEN_INT (sign_shift_up),
1988                                   NULL_RTX, 0, OPTAB_LIB_WIDEN);
1989   if (sign_shift_dn)
1990     result = expand_simple_binop (mode, unsignedp ? LSHIFTRT : ASHIFTRT,
1991                                   result, GEN_INT (sign_shift_dn),
1992                                   NULL_RTX, 0, OPTAB_LIB_WIDEN);
1993
1994   return result;
1995
1996  fail:
1997   delete_insns_since (start);
1998   return NULL;
1999 }
2000
2001 /* Extract a bit field that is split across two words
2002    and return an RTX for the result.
2003
2004    OP0 is the REG, SUBREG or MEM rtx for the first of the two words.
2005    BITSIZE is the field width; BITPOS, position of its first bit, in the word.
2006    UNSIGNEDP is 1 if should zero-extend the contents; else sign-extend.  */
2007
2008 static rtx
2009 extract_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
2010                          unsigned HOST_WIDE_INT bitpos, int unsignedp)
2011 {
2012   unsigned int unit;
2013   unsigned int bitsdone = 0;
2014   rtx result = NULL_RTX;
2015   int first = 1;
2016
2017   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
2018      much at a time.  */
2019   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
2020     unit = BITS_PER_WORD;
2021   else
2022     {
2023       unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
2024       if (0 && bitsize / unit > 2)
2025         {
2026           rtx tmp = extract_force_align_mem_bit_field (op0, bitsize, bitpos,
2027                                                        unsignedp);
2028           if (tmp)
2029             return tmp;
2030         }
2031     }
2032
2033   while (bitsdone < bitsize)
2034     {
2035       unsigned HOST_WIDE_INT thissize;
2036       rtx part, word;
2037       unsigned HOST_WIDE_INT thispos;
2038       unsigned HOST_WIDE_INT offset;
2039
2040       offset = (bitpos + bitsdone) / unit;
2041       thispos = (bitpos + bitsdone) % unit;
2042
2043       /* THISSIZE must not overrun a word boundary.  Otherwise,
2044          extract_fixed_bit_field will call us again, and we will mutually
2045          recurse forever.  */
2046       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
2047       thissize = MIN (thissize, unit - thispos);
2048
2049       /* If OP0 is a register, then handle OFFSET here.
2050
2051          When handling multiword bitfields, extract_bit_field may pass
2052          down a word_mode SUBREG of a larger REG for a bitfield that actually
2053          crosses a word boundary.  Thus, for a SUBREG, we must find
2054          the current word starting from the base register.  */
2055       if (GET_CODE (op0) == SUBREG)
2056         {
2057           int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset;
2058           word = operand_subword_force (SUBREG_REG (op0), word_offset,
2059                                         GET_MODE (SUBREG_REG (op0)));
2060           offset = 0;
2061         }
2062       else if (REG_P (op0))
2063         {
2064           word = operand_subword_force (op0, offset, GET_MODE (op0));
2065           offset = 0;
2066         }
2067       else
2068         word = op0;
2069
2070       /* Extract the parts in bit-counting order,
2071          whose meaning is determined by BYTES_PER_UNIT.
2072          OFFSET is in UNITs, and UNIT is in bits.
2073          extract_fixed_bit_field wants offset in bytes.  */
2074       part = extract_fixed_bit_field (word_mode, word,
2075                                       offset * unit / BITS_PER_UNIT,
2076                                       thissize, thispos, 0, 1);
2077       bitsdone += thissize;
2078
2079       /* Shift this part into place for the result.  */
2080       if (BYTES_BIG_ENDIAN)
2081         {
2082           if (bitsize != bitsdone)
2083             part = expand_shift (LSHIFT_EXPR, word_mode, part,
2084                                  build_int_cst (NULL_TREE, bitsize - bitsdone),
2085                                  0, 1);
2086         }
2087       else
2088         {
2089           if (bitsdone != thissize)
2090             part = expand_shift (LSHIFT_EXPR, word_mode, part,
2091                                  build_int_cst (NULL_TREE,
2092                                                 bitsdone - thissize), 0, 1);
2093         }
2094
2095       if (first)
2096         result = part;
2097       else
2098         /* Combine the parts with bitwise or.  This works
2099            because we extracted each part as an unsigned bit field.  */
2100         result = expand_binop (word_mode, ior_optab, part, result, NULL_RTX, 1,
2101                                OPTAB_LIB_WIDEN);
2102
2103       first = 0;
2104     }
2105
2106   /* Unsigned bit field: we are done.  */
2107   if (unsignedp)
2108     return result;
2109   /* Signed bit field: sign-extend with two arithmetic shifts.  */
2110   result = expand_shift (LSHIFT_EXPR, word_mode, result,
2111                          build_int_cst (NULL_TREE, BITS_PER_WORD - bitsize),
2112                          NULL_RTX, 0);
2113   return expand_shift (RSHIFT_EXPR, word_mode, result,
2114                        build_int_cst (NULL_TREE, BITS_PER_WORD - bitsize),
2115                        NULL_RTX, 0);
2116 }
2117 \f
2118 /* Add INC into TARGET.  */
2119
2120 void
2121 expand_inc (rtx target, rtx inc)
2122 {
2123   rtx value = expand_binop (GET_MODE (target), add_optab,
2124                             target, inc,
2125                             target, 0, OPTAB_LIB_WIDEN);
2126   if (value != target)
2127     emit_move_insn (target, value);
2128 }
2129
2130 /* Subtract DEC from TARGET.  */
2131
2132 void
2133 expand_dec (rtx target, rtx dec)
2134 {
2135   rtx value = expand_binop (GET_MODE (target), sub_optab,
2136                             target, dec,
2137                             target, 0, OPTAB_LIB_WIDEN);
2138   if (value != target)
2139     emit_move_insn (target, value);
2140 }
2141 \f
2142 /* Output a shift instruction for expression code CODE,
2143    with SHIFTED being the rtx for the value to shift,
2144    and AMOUNT the tree for the amount to shift by.
2145    Store the result in the rtx TARGET, if that is convenient.
2146    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2147    Return the rtx for where the value is.  */
2148
2149 rtx
2150 expand_shift (enum tree_code code, enum machine_mode mode, rtx shifted,
2151               tree amount, rtx target, int unsignedp)
2152 {
2153   rtx op1, temp = 0;
2154   int left = (code == LSHIFT_EXPR || code == LROTATE_EXPR);
2155   int rotate = (code == LROTATE_EXPR || code == RROTATE_EXPR);
2156   int try;
2157
2158   /* Previously detected shift-counts computed by NEGATE_EXPR
2159      and shifted in the other direction; but that does not work
2160      on all machines.  */
2161
2162   op1 = expand_expr (amount, NULL_RTX, VOIDmode, 0);
2163
2164   if (SHIFT_COUNT_TRUNCATED)
2165     {
2166       if (GET_CODE (op1) == CONST_INT
2167           && ((unsigned HOST_WIDE_INT) INTVAL (op1) >=
2168               (unsigned HOST_WIDE_INT) GET_MODE_BITSIZE (mode)))
2169         op1 = GEN_INT ((unsigned HOST_WIDE_INT) INTVAL (op1)
2170                        % GET_MODE_BITSIZE (mode));
2171       else if (GET_CODE (op1) == SUBREG
2172                && subreg_lowpart_p (op1))
2173         op1 = SUBREG_REG (op1);
2174     }
2175
2176   if (op1 == const0_rtx)
2177     return shifted;
2178
2179   /* Check whether its cheaper to implement a left shift by a constant
2180      bit count by a sequence of additions.  */
2181   if (code == LSHIFT_EXPR
2182       && GET_CODE (op1) == CONST_INT
2183       && INTVAL (op1) > 0
2184       && INTVAL (op1) < GET_MODE_BITSIZE (mode)
2185       && shift_cost[mode][INTVAL (op1)] > INTVAL (op1) * add_cost[mode])
2186     {
2187       int i;
2188       for (i = 0; i < INTVAL (op1); i++)
2189         {
2190           temp = force_reg (mode, shifted);
2191           shifted = expand_binop (mode, add_optab, temp, temp, NULL_RTX,
2192                                   unsignedp, OPTAB_LIB_WIDEN);
2193         }
2194       return shifted;
2195     }
2196
2197   for (try = 0; temp == 0 && try < 3; try++)
2198     {
2199       enum optab_methods methods;
2200
2201       if (try == 0)
2202         methods = OPTAB_DIRECT;
2203       else if (try == 1)
2204         methods = OPTAB_WIDEN;
2205       else
2206         methods = OPTAB_LIB_WIDEN;
2207
2208       if (rotate)
2209         {
2210           /* Widening does not work for rotation.  */
2211           if (methods == OPTAB_WIDEN)
2212             continue;
2213           else if (methods == OPTAB_LIB_WIDEN)
2214             {
2215               /* If we have been unable to open-code this by a rotation,
2216                  do it as the IOR of two shifts.  I.e., to rotate A
2217                  by N bits, compute (A << N) | ((unsigned) A >> (C - N))
2218                  where C is the bitsize of A.
2219
2220                  It is theoretically possible that the target machine might
2221                  not be able to perform either shift and hence we would
2222                  be making two libcalls rather than just the one for the
2223                  shift (similarly if IOR could not be done).  We will allow
2224                  this extremely unlikely lossage to avoid complicating the
2225                  code below.  */
2226
2227               rtx subtarget = target == shifted ? 0 : target;
2228               rtx temp1;
2229               tree type = TREE_TYPE (amount);
2230               tree new_amount = make_tree (type, op1);
2231               tree other_amount
2232                 = fold_build2 (MINUS_EXPR, type,
2233                                build_int_cst (type, GET_MODE_BITSIZE (mode)),
2234                                amount);
2235
2236               shifted = force_reg (mode, shifted);
2237
2238               temp = expand_shift (left ? LSHIFT_EXPR : RSHIFT_EXPR,
2239                                    mode, shifted, new_amount, 0, 1);
2240               temp1 = expand_shift (left ? RSHIFT_EXPR : LSHIFT_EXPR,
2241                                     mode, shifted, other_amount, subtarget, 1);
2242               return expand_binop (mode, ior_optab, temp, temp1, target,
2243                                    unsignedp, methods);
2244             }
2245
2246           temp = expand_binop (mode,
2247                                left ? rotl_optab : rotr_optab,
2248                                shifted, op1, target, unsignedp, methods);
2249         }
2250       else if (unsignedp)
2251         temp = expand_binop (mode,
2252                              left ? ashl_optab : lshr_optab,
2253                              shifted, op1, target, unsignedp, methods);
2254
2255       /* Do arithmetic shifts.
2256          Also, if we are going to widen the operand, we can just as well
2257          use an arithmetic right-shift instead of a logical one.  */
2258       if (temp == 0 && ! rotate
2259           && (! unsignedp || (! left && methods == OPTAB_WIDEN)))
2260         {
2261           enum optab_methods methods1 = methods;
2262
2263           /* If trying to widen a log shift to an arithmetic shift,
2264              don't accept an arithmetic shift of the same size.  */
2265           if (unsignedp)
2266             methods1 = OPTAB_MUST_WIDEN;
2267
2268           /* Arithmetic shift */
2269
2270           temp = expand_binop (mode,
2271                                left ? ashl_optab : ashr_optab,
2272                                shifted, op1, target, unsignedp, methods1);
2273         }
2274
2275       /* We used to try extzv here for logical right shifts, but that was
2276          only useful for one machine, the VAX, and caused poor code
2277          generation there for lshrdi3, so the code was deleted and a
2278          define_expand for lshrsi3 was added to vax.md.  */
2279     }
2280
2281   gcc_assert (temp);
2282   return temp;
2283 }
2284 \f
2285 enum alg_code {
2286   alg_unknown,
2287   alg_zero,
2288   alg_m, alg_shift,
2289   alg_add_t_m2,
2290   alg_sub_t_m2,
2291   alg_add_factor,
2292   alg_sub_factor,
2293   alg_add_t2_m,
2294   alg_sub_t2_m,
2295   alg_impossible
2296 };
2297
2298 /* This structure holds the "cost" of a multiply sequence.  The
2299    "cost" field holds the total rtx_cost of every operator in the
2300    synthetic multiplication sequence, hence cost(a op b) is defined
2301    as rtx_cost(op) + cost(a) + cost(b), where cost(leaf) is zero.
2302    The "latency" field holds the minimum possible latency of the
2303    synthetic multiply, on a hypothetical infinitely parallel CPU.
2304    This is the critical path, or the maximum height, of the expression
2305    tree which is the sum of rtx_costs on the most expensive path from
2306    any leaf to the root.  Hence latency(a op b) is defined as zero for
2307    leaves and rtx_cost(op) + max(latency(a), latency(b)) otherwise.  */
2308
2309 struct mult_cost {
2310   short cost;     /* Total rtx_cost of the multiplication sequence.  */
2311   short latency;  /* The latency of the multiplication sequence.  */
2312 };
2313
2314 /* This macro is used to compare a pointer to a mult_cost against an
2315    single integer "rtx_cost" value.  This is equivalent to the macro
2316    CHEAPER_MULT_COST(X,Z) where Z = {Y,Y}.  */
2317 #define MULT_COST_LESS(X,Y) ((X)->cost < (Y)    \
2318                              || ((X)->cost == (Y) && (X)->latency < (Y)))
2319
2320 /* This macro is used to compare two pointers to mult_costs against
2321    each other.  The macro returns true if X is cheaper than Y.
2322    Currently, the cheaper of two mult_costs is the one with the
2323    lower "cost".  If "cost"s are tied, the lower latency is cheaper.  */
2324 #define CHEAPER_MULT_COST(X,Y)  ((X)->cost < (Y)->cost          \
2325                                  || ((X)->cost == (Y)->cost     \
2326                                      && (X)->latency < (Y)->latency))
2327
2328 /* This structure records a sequence of operations.
2329    `ops' is the number of operations recorded.
2330    `cost' is their total cost.
2331    The operations are stored in `op' and the corresponding
2332    logarithms of the integer coefficients in `log'.
2333
2334    These are the operations:
2335    alg_zero             total := 0;
2336    alg_m                total := multiplicand;
2337    alg_shift            total := total * coeff
2338    alg_add_t_m2         total := total + multiplicand * coeff;
2339    alg_sub_t_m2         total := total - multiplicand * coeff;
2340    alg_add_factor       total := total * coeff + total;
2341    alg_sub_factor       total := total * coeff - total;
2342    alg_add_t2_m         total := total * coeff + multiplicand;
2343    alg_sub_t2_m         total := total * coeff - multiplicand;
2344
2345    The first operand must be either alg_zero or alg_m.  */
2346
2347 struct algorithm
2348 {
2349   struct mult_cost cost;
2350   short ops;
2351   /* The size of the OP and LOG fields are not directly related to the
2352      word size, but the worst-case algorithms will be if we have few
2353      consecutive ones or zeros, i.e., a multiplicand like 10101010101...
2354      In that case we will generate shift-by-2, add, shift-by-2, add,...,
2355      in total wordsize operations.  */
2356   enum alg_code op[MAX_BITS_PER_WORD];
2357   char log[MAX_BITS_PER_WORD];
2358 };
2359
2360 /* The entry for our multiplication cache/hash table.  */
2361 struct alg_hash_entry {
2362   /* The number we are multiplying by.  */
2363   unsigned int t;
2364
2365   /* The mode in which we are multiplying something by T.  */
2366   enum machine_mode mode;
2367
2368   /* The best multiplication algorithm for t.  */
2369   enum alg_code alg;
2370
2371   /* The cost of multiplication if ALG_CODE is not alg_impossible.
2372      Otherwise, the cost within which multiplication by T is
2373      impossible.  */
2374   struct mult_cost cost;
2375 };
2376
2377 /* The number of cache/hash entries.  */
2378 #define NUM_ALG_HASH_ENTRIES 307
2379
2380 /* Each entry of ALG_HASH caches alg_code for some integer.  This is
2381    actually a hash table.  If we have a collision, that the older
2382    entry is kicked out.  */
2383 static struct alg_hash_entry alg_hash[NUM_ALG_HASH_ENTRIES];
2384
2385 /* Indicates the type of fixup needed after a constant multiplication.
2386    BASIC_VARIANT means no fixup is needed, NEGATE_VARIANT means that
2387    the result should be negated, and ADD_VARIANT means that the
2388    multiplicand should be added to the result.  */
2389 enum mult_variant {basic_variant, negate_variant, add_variant};
2390
2391 static void synth_mult (struct algorithm *, unsigned HOST_WIDE_INT,
2392                         const struct mult_cost *, enum machine_mode mode);
2393 static bool choose_mult_variant (enum machine_mode, HOST_WIDE_INT,
2394                                  struct algorithm *, enum mult_variant *, int);
2395 static rtx expand_mult_const (enum machine_mode, rtx, HOST_WIDE_INT, rtx,
2396                               const struct algorithm *, enum mult_variant);
2397 static unsigned HOST_WIDE_INT choose_multiplier (unsigned HOST_WIDE_INT, int,
2398                                                  int, rtx *, int *, int *);
2399 static unsigned HOST_WIDE_INT invert_mod2n (unsigned HOST_WIDE_INT, int);
2400 static rtx extract_high_half (enum machine_mode, rtx);
2401 static rtx expand_mult_highpart (enum machine_mode, rtx, rtx, rtx, int, int);
2402 static rtx expand_mult_highpart_optab (enum machine_mode, rtx, rtx, rtx,
2403                                        int, int);
2404 /* Compute and return the best algorithm for multiplying by T.
2405    The algorithm must cost less than cost_limit
2406    If retval.cost >= COST_LIMIT, no algorithm was found and all
2407    other field of the returned struct are undefined.
2408    MODE is the machine mode of the multiplication.  */
2409
2410 static void
2411 synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
2412             const struct mult_cost *cost_limit, enum machine_mode mode)
2413 {
2414   int m;
2415   struct algorithm *alg_in, *best_alg;
2416   struct mult_cost best_cost;
2417   struct mult_cost new_limit;
2418   int op_cost, op_latency;
2419   unsigned HOST_WIDE_INT q;
2420   int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode));
2421   int hash_index;
2422   bool cache_hit = false;
2423   enum alg_code cache_alg = alg_zero;
2424
2425   /* Indicate that no algorithm is yet found.  If no algorithm
2426      is found, this value will be returned and indicate failure.  */
2427   alg_out->cost.cost = cost_limit->cost + 1;
2428   alg_out->cost.latency = cost_limit->latency + 1;
2429
2430   if (cost_limit->cost < 0
2431       || (cost_limit->cost == 0 && cost_limit->latency <= 0))
2432     return;
2433
2434   /* Restrict the bits of "t" to the multiplication's mode.  */
2435   t &= GET_MODE_MASK (mode);
2436
2437   /* t == 1 can be done in zero cost.  */
2438   if (t == 1)
2439     {
2440       alg_out->ops = 1;
2441       alg_out->cost.cost = 0;
2442       alg_out->cost.latency = 0;
2443       alg_out->op[0] = alg_m;
2444       return;
2445     }
2446
2447   /* t == 0 sometimes has a cost.  If it does and it exceeds our limit,
2448      fail now.  */
2449   if (t == 0)
2450     {
2451       if (MULT_COST_LESS (cost_limit, zero_cost))
2452         return;
2453       else
2454         {
2455           alg_out->ops = 1;
2456           alg_out->cost.cost = zero_cost;
2457           alg_out->cost.latency = zero_cost;
2458           alg_out->op[0] = alg_zero;
2459           return;
2460         }
2461     }
2462
2463   /* We'll be needing a couple extra algorithm structures now.  */
2464
2465   alg_in = alloca (sizeof (struct algorithm));
2466   best_alg = alloca (sizeof (struct algorithm));
2467   best_cost = *cost_limit;
2468
2469   /* Compute the hash index.  */
2470   hash_index = (t ^ (unsigned int) mode) % NUM_ALG_HASH_ENTRIES;
2471
2472   /* See if we already know what to do for T.  */
2473   if (alg_hash[hash_index].t == t
2474       && alg_hash[hash_index].mode == mode
2475       && alg_hash[hash_index].alg != alg_unknown)
2476     {
2477       cache_alg = alg_hash[hash_index].alg;
2478
2479       if (cache_alg == alg_impossible)
2480         {
2481           /* The cache tells us that it's impossible to synthesize
2482              multiplication by T within alg_hash[hash_index].cost.  */
2483           if (!CHEAPER_MULT_COST (&alg_hash[hash_index].cost, cost_limit))
2484             /* COST_LIMIT is at least as restrictive as the one
2485                recorded in the hash table, in which case we have no
2486                hope of synthesizing a multiplication.  Just
2487                return.  */
2488             return;
2489
2490           /* If we get here, COST_LIMIT is less restrictive than the
2491              one recorded in the hash table, so we may be able to
2492              synthesize a multiplication.  Proceed as if we didn't
2493              have the cache entry.  */
2494         }
2495       else
2496         {
2497           if (CHEAPER_MULT_COST (cost_limit, &alg_hash[hash_index].cost))
2498             /* The cached algorithm shows that this multiplication
2499                requires more cost than COST_LIMIT.  Just return.  This
2500                way, we don't clobber this cache entry with
2501                alg_impossible but retain useful information.  */
2502             return;
2503
2504           cache_hit = true;
2505
2506           switch (cache_alg)
2507             {
2508             case alg_shift:
2509               goto do_alg_shift;
2510
2511             case alg_add_t_m2:
2512             case alg_sub_t_m2:
2513               goto do_alg_addsub_t_m2;
2514
2515             case alg_add_factor:
2516             case alg_sub_factor:
2517               goto do_alg_addsub_factor;
2518
2519             case alg_add_t2_m:
2520               goto do_alg_add_t2_m;
2521
2522             case alg_sub_t2_m:
2523               goto do_alg_sub_t2_m;
2524
2525             default:
2526               gcc_unreachable ();
2527             }
2528         }
2529     }
2530
2531   /* If we have a group of zero bits at the low-order part of T, try
2532      multiplying by the remaining bits and then doing a shift.  */
2533
2534   if ((t & 1) == 0)
2535     {
2536     do_alg_shift:
2537       m = floor_log2 (t & -t);  /* m = number of low zero bits */
2538       if (m < maxm)
2539         {
2540           q = t >> m;
2541           /* The function expand_shift will choose between a shift and
2542              a sequence of additions, so the observed cost is given as
2543              MIN (m * add_cost[mode], shift_cost[mode][m]).  */
2544           op_cost = m * add_cost[mode];
2545           if (shift_cost[mode][m] < op_cost)
2546             op_cost = shift_cost[mode][m];
2547           new_limit.cost = best_cost.cost - op_cost;
2548           new_limit.latency = best_cost.latency - op_cost;
2549           synth_mult (alg_in, q, &new_limit, mode);
2550
2551           alg_in->cost.cost += op_cost;
2552           alg_in->cost.latency += op_cost;
2553           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2554             {
2555               struct algorithm *x;
2556               best_cost = alg_in->cost;
2557               x = alg_in, alg_in = best_alg, best_alg = x;
2558               best_alg->log[best_alg->ops] = m;
2559               best_alg->op[best_alg->ops] = alg_shift;
2560             }
2561         }
2562       if (cache_hit)
2563         goto done;
2564     }
2565
2566   /* If we have an odd number, add or subtract one.  */
2567   if ((t & 1) != 0)
2568     {
2569       unsigned HOST_WIDE_INT w;
2570
2571     do_alg_addsub_t_m2:
2572       for (w = 1; (w & t) != 0; w <<= 1)
2573         ;
2574       /* If T was -1, then W will be zero after the loop.  This is another
2575          case where T ends with ...111.  Handling this with (T + 1) and
2576          subtract 1 produces slightly better code and results in algorithm
2577          selection much faster than treating it like the ...0111 case
2578          below.  */
2579       if (w == 0
2580           || (w > 2
2581               /* Reject the case where t is 3.
2582                  Thus we prefer addition in that case.  */
2583               && t != 3))
2584         {
2585           /* T ends with ...111.  Multiply by (T + 1) and subtract 1.  */
2586
2587           op_cost = add_cost[mode];
2588           new_limit.cost = best_cost.cost - op_cost;
2589           new_limit.latency = best_cost.latency - op_cost;
2590           synth_mult (alg_in, t + 1, &new_limit, mode);
2591
2592           alg_in->cost.cost += op_cost;
2593           alg_in->cost.latency += op_cost;
2594           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2595             {
2596               struct algorithm *x;
2597               best_cost = alg_in->cost;
2598               x = alg_in, alg_in = best_alg, best_alg = x;
2599               best_alg->log[best_alg->ops] = 0;
2600               best_alg->op[best_alg->ops] = alg_sub_t_m2;
2601             }
2602         }
2603       else
2604         {
2605           /* T ends with ...01 or ...011.  Multiply by (T - 1) and add 1.  */
2606
2607           op_cost = add_cost[mode];
2608           new_limit.cost = best_cost.cost - op_cost;
2609           new_limit.latency = best_cost.latency - op_cost;
2610           synth_mult (alg_in, t - 1, &new_limit, mode);
2611
2612           alg_in->cost.cost += op_cost;
2613           alg_in->cost.latency += op_cost;
2614           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2615             {
2616               struct algorithm *x;
2617               best_cost = alg_in->cost;
2618               x = alg_in, alg_in = best_alg, best_alg = x;
2619               best_alg->log[best_alg->ops] = 0;
2620               best_alg->op[best_alg->ops] = alg_add_t_m2;
2621             }
2622         }
2623       if (cache_hit)
2624         goto done;
2625     }
2626
2627   /* Look for factors of t of the form
2628      t = q(2**m +- 1), 2 <= m <= floor(log2(t - 1)).
2629      If we find such a factor, we can multiply by t using an algorithm that
2630      multiplies by q, shift the result by m and add/subtract it to itself.
2631
2632      We search for large factors first and loop down, even if large factors
2633      are less probable than small; if we find a large factor we will find a
2634      good sequence quickly, and therefore be able to prune (by decreasing
2635      COST_LIMIT) the search.  */
2636
2637  do_alg_addsub_factor:
2638   for (m = floor_log2 (t - 1); m >= 2; m--)
2639     {
2640       unsigned HOST_WIDE_INT d;
2641
2642       d = ((unsigned HOST_WIDE_INT) 1 << m) + 1;
2643       if (t % d == 0 && t > d && m < maxm
2644           && (!cache_hit || cache_alg == alg_add_factor))
2645         {
2646           /* If the target has a cheap shift-and-add instruction use
2647              that in preference to a shift insn followed by an add insn.
2648              Assume that the shift-and-add is "atomic" with a latency
2649              equal to its cost, otherwise assume that on superscalar
2650              hardware the shift may be executed concurrently with the
2651              earlier steps in the algorithm.  */
2652           op_cost = add_cost[mode] + shift_cost[mode][m];
2653           if (shiftadd_cost[mode][m] < op_cost)
2654             {
2655               op_cost = shiftadd_cost[mode][m];
2656               op_latency = op_cost;
2657             }
2658           else
2659             op_latency = add_cost[mode];
2660
2661           new_limit.cost = best_cost.cost - op_cost;
2662           new_limit.latency = best_cost.latency - op_latency;
2663           synth_mult (alg_in, t / d, &new_limit, mode);
2664
2665           alg_in->cost.cost += op_cost;
2666           alg_in->cost.latency += op_latency;
2667           if (alg_in->cost.latency < op_cost)
2668             alg_in->cost.latency = op_cost;
2669           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2670             {
2671               struct algorithm *x;
2672               best_cost = alg_in->cost;
2673               x = alg_in, alg_in = best_alg, best_alg = x;
2674               best_alg->log[best_alg->ops] = m;
2675               best_alg->op[best_alg->ops] = alg_add_factor;
2676             }
2677           /* Other factors will have been taken care of in the recursion.  */
2678           break;
2679         }
2680
2681       d = ((unsigned HOST_WIDE_INT) 1 << m) - 1;
2682       if (t % d == 0 && t > d && m < maxm
2683           && (!cache_hit || cache_alg == alg_sub_factor))
2684         {
2685           /* If the target has a cheap shift-and-subtract insn use
2686              that in preference to a shift insn followed by a sub insn.
2687              Assume that the shift-and-sub is "atomic" with a latency
2688              equal to it's cost, otherwise assume that on superscalar
2689              hardware the shift may be executed concurrently with the
2690              earlier steps in the algorithm.  */
2691           op_cost = add_cost[mode] + shift_cost[mode][m];
2692           if (shiftsub_cost[mode][m] < op_cost)
2693             {
2694               op_cost = shiftsub_cost[mode][m];
2695               op_latency = op_cost;
2696             }
2697           else
2698             op_latency = add_cost[mode];
2699
2700           new_limit.cost = best_cost.cost - op_cost;
2701           new_limit.latency = best_cost.latency - op_latency;
2702           synth_mult (alg_in, t / d, &new_limit, mode);
2703
2704           alg_in->cost.cost += op_cost;
2705           alg_in->cost.latency += op_latency;
2706           if (alg_in->cost.latency < op_cost)
2707             alg_in->cost.latency = op_cost;
2708           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2709             {
2710               struct algorithm *x;
2711               best_cost = alg_in->cost;
2712               x = alg_in, alg_in = best_alg, best_alg = x;
2713               best_alg->log[best_alg->ops] = m;
2714               best_alg->op[best_alg->ops] = alg_sub_factor;
2715             }
2716           break;
2717         }
2718     }
2719   if (cache_hit)
2720     goto done;
2721
2722   /* Try shift-and-add (load effective address) instructions,
2723      i.e. do a*3, a*5, a*9.  */
2724   if ((t & 1) != 0)
2725     {
2726     do_alg_add_t2_m:
2727       q = t - 1;
2728       q = q & -q;
2729       m = exact_log2 (q);
2730       if (m >= 0 && m < maxm)
2731         {
2732           op_cost = shiftadd_cost[mode][m];
2733           new_limit.cost = best_cost.cost - op_cost;
2734           new_limit.latency = best_cost.latency - op_cost;
2735           synth_mult (alg_in, (t - 1) >> m, &new_limit, mode);
2736
2737           alg_in->cost.cost += op_cost;
2738           alg_in->cost.latency += op_cost;
2739           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2740             {
2741               struct algorithm *x;
2742               best_cost = alg_in->cost;
2743               x = alg_in, alg_in = best_alg, best_alg = x;
2744               best_alg->log[best_alg->ops] = m;
2745               best_alg->op[best_alg->ops] = alg_add_t2_m;
2746             }
2747         }
2748       if (cache_hit)
2749         goto done;
2750
2751     do_alg_sub_t2_m:
2752       q = t + 1;
2753       q = q & -q;
2754       m = exact_log2 (q);
2755       if (m >= 0 && m < maxm)
2756         {
2757           op_cost = shiftsub_cost[mode][m];
2758           new_limit.cost = best_cost.cost - op_cost;
2759           new_limit.latency = best_cost.latency - op_cost;
2760           synth_mult (alg_in, (t + 1) >> m, &new_limit, mode);
2761
2762           alg_in->cost.cost += op_cost;
2763           alg_in->cost.latency += op_cost;
2764           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2765             {
2766               struct algorithm *x;
2767               best_cost = alg_in->cost;
2768               x = alg_in, alg_in = best_alg, best_alg = x;
2769               best_alg->log[best_alg->ops] = m;
2770               best_alg->op[best_alg->ops] = alg_sub_t2_m;
2771             }
2772         }
2773       if (cache_hit)
2774         goto done;
2775     }
2776
2777  done:
2778   /* If best_cost has not decreased, we have not found any algorithm.  */
2779   if (!CHEAPER_MULT_COST (&best_cost, cost_limit))
2780     {
2781       /* We failed to find an algorithm.  Record alg_impossible for
2782          this case (that is, <T, MODE, COST_LIMIT>) so that next time
2783          we are asked to find an algorithm for T within the same or
2784          lower COST_LIMIT, we can immediately return to the
2785          caller.  */
2786       alg_hash[hash_index].t = t;
2787       alg_hash[hash_index].mode = mode;
2788       alg_hash[hash_index].alg = alg_impossible;
2789       alg_hash[hash_index].cost = *cost_limit;
2790       return;
2791     }
2792
2793   /* Cache the result.  */
2794   if (!cache_hit)
2795     {
2796       alg_hash[hash_index].t = t;
2797       alg_hash[hash_index].mode = mode;
2798       alg_hash[hash_index].alg = best_alg->op[best_alg->ops];
2799       alg_hash[hash_index].cost.cost = best_cost.cost;
2800       alg_hash[hash_index].cost.latency = best_cost.latency;
2801     }
2802
2803   /* If we are getting a too long sequence for `struct algorithm'
2804      to record, make this search fail.  */
2805   if (best_alg->ops == MAX_BITS_PER_WORD)
2806     return;
2807
2808   /* Copy the algorithm from temporary space to the space at alg_out.
2809      We avoid using structure assignment because the majority of
2810      best_alg is normally undefined, and this is a critical function.  */
2811   alg_out->ops = best_alg->ops + 1;
2812   alg_out->cost = best_cost;
2813   memcpy (alg_out->op, best_alg->op,
2814           alg_out->ops * sizeof *alg_out->op);
2815   memcpy (alg_out->log, best_alg->log,
2816           alg_out->ops * sizeof *alg_out->log);
2817 }
2818 \f
2819 /* Find the cheapest way of multiplying a value of mode MODE by VAL.
2820    Try three variations:
2821
2822        - a shift/add sequence based on VAL itself
2823        - a shift/add sequence based on -VAL, followed by a negation
2824        - a shift/add sequence based on VAL - 1, followed by an addition.
2825
2826    Return true if the cheapest of these cost less than MULT_COST,
2827    describing the algorithm in *ALG and final fixup in *VARIANT.  */
2828
2829 static bool
2830 choose_mult_variant (enum machine_mode mode, HOST_WIDE_INT val,
2831                      struct algorithm *alg, enum mult_variant *variant,
2832                      int mult_cost)
2833 {
2834   struct algorithm alg2;
2835   struct mult_cost limit;
2836   int op_cost;
2837
2838   *variant = basic_variant;
2839   limit.cost = mult_cost;
2840   limit.latency = mult_cost;
2841   synth_mult (alg, val, &limit, mode);
2842
2843   /* This works only if the inverted value actually fits in an
2844      `unsigned int' */
2845   if (HOST_BITS_PER_INT >= GET_MODE_BITSIZE (mode))
2846     {
2847       op_cost = neg_cost[mode];
2848       if (MULT_COST_LESS (&alg->cost, mult_cost))
2849         {
2850           limit.cost = alg->cost.cost - op_cost;
2851           limit.latency = alg->cost.latency - op_cost;
2852         }
2853       else
2854         {
2855           limit.cost = mult_cost - op_cost;
2856           limit.latency = mult_cost - op_cost;
2857         }
2858
2859       synth_mult (&alg2, -val, &limit, mode);
2860       alg2.cost.cost += op_cost;
2861       alg2.cost.latency += op_cost;
2862       if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
2863         *alg = alg2, *variant = negate_variant;
2864     }
2865
2866   /* This proves very useful for division-by-constant.  */
2867   op_cost = add_cost[mode];
2868   if (MULT_COST_LESS (&alg->cost, mult_cost))
2869     {
2870       limit.cost = alg->cost.cost - op_cost;
2871       limit.latency = alg->cost.latency - op_cost;
2872     }
2873   else
2874     {
2875       limit.cost = mult_cost - op_cost;
2876       limit.latency = mult_cost - op_cost;
2877     }
2878
2879   synth_mult (&alg2, val - 1, &limit, mode);
2880   alg2.cost.cost += op_cost;
2881   alg2.cost.latency += op_cost;
2882   if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
2883     *alg = alg2, *variant = add_variant;
2884
2885   return MULT_COST_LESS (&alg->cost, mult_cost);
2886 }
2887
2888 /* A subroutine of expand_mult, used for constant multiplications.
2889    Multiply OP0 by VAL in mode MODE, storing the result in TARGET if
2890    convenient.  Use the shift/add sequence described by ALG and apply
2891    the final fixup specified by VARIANT.  */
2892
2893 static rtx
2894 expand_mult_const (enum machine_mode mode, rtx op0, HOST_WIDE_INT val,
2895                    rtx target, const struct algorithm *alg,
2896                    enum mult_variant variant)
2897 {
2898   HOST_WIDE_INT val_so_far;
2899   rtx insn, accum, tem;
2900   int opno;
2901   enum machine_mode nmode;
2902
2903   /* Avoid referencing memory over and over.
2904      For speed, but also for correctness when mem is volatile.  */
2905   if (MEM_P (op0))
2906     op0 = force_reg (mode, op0);
2907
2908   /* ACCUM starts out either as OP0 or as a zero, depending on
2909      the first operation.  */
2910
2911   if (alg->op[0] == alg_zero)
2912     {
2913       accum = copy_to_mode_reg (mode, const0_rtx);
2914       val_so_far = 0;
2915     }
2916   else if (alg->op[0] == alg_m)
2917     {
2918       accum = copy_to_mode_reg (mode, op0);
2919       val_so_far = 1;
2920     }
2921   else
2922     gcc_unreachable ();
2923
2924   for (opno = 1; opno < alg->ops; opno++)
2925     {
2926       int log = alg->log[opno];
2927       rtx shift_subtarget = optimize ? 0 : accum;
2928       rtx add_target
2929         = (opno == alg->ops - 1 && target != 0 && variant != add_variant
2930            && !optimize)
2931           ? target : 0;
2932       rtx accum_target = optimize ? 0 : accum;
2933
2934       switch (alg->op[opno])
2935         {
2936         case alg_shift:
2937           accum = expand_shift (LSHIFT_EXPR, mode, accum,
2938                                 build_int_cst (NULL_TREE, log),
2939                                 NULL_RTX, 0);
2940           val_so_far <<= log;
2941           break;
2942
2943         case alg_add_t_m2:
2944           tem = expand_shift (LSHIFT_EXPR, mode, op0,
2945                               build_int_cst (NULL_TREE, log),
2946                               NULL_RTX, 0);
2947           accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
2948                                  add_target ? add_target : accum_target);
2949           val_so_far += (HOST_WIDE_INT) 1 << log;
2950           break;
2951
2952         case alg_sub_t_m2:
2953           tem = expand_shift (LSHIFT_EXPR, mode, op0,
2954                               build_int_cst (NULL_TREE, log),
2955                               NULL_RTX, 0);
2956           accum = force_operand (gen_rtx_MINUS (mode, accum, tem),
2957                                  add_target ? add_target : accum_target);
2958           val_so_far -= (HOST_WIDE_INT) 1 << log;
2959           break;
2960
2961         case alg_add_t2_m:
2962           accum = expand_shift (LSHIFT_EXPR, mode, accum,
2963                                 build_int_cst (NULL_TREE, log),
2964                                 shift_subtarget,
2965                                 0);
2966           accum = force_operand (gen_rtx_PLUS (mode, accum, op0),
2967                                  add_target ? add_target : accum_target);
2968           val_so_far = (val_so_far << log) + 1;
2969           break;
2970
2971         case alg_sub_t2_m:
2972           accum = expand_shift (LSHIFT_EXPR, mode, accum,
2973                                 build_int_cst (NULL_TREE, log),
2974                                 shift_subtarget, 0);
2975           accum = force_operand (gen_rtx_MINUS (mode, accum, op0),
2976                                  add_target ? add_target : accum_target);
2977           val_so_far = (val_so_far << log) - 1;
2978           break;
2979
2980         case alg_add_factor:
2981           tem = expand_shift (LSHIFT_EXPR, mode, accum,
2982                               build_int_cst (NULL_TREE, log),
2983                               NULL_RTX, 0);
2984           accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
2985                                  add_target ? add_target : accum_target);
2986           val_so_far += val_so_far << log;
2987           break;
2988
2989         case alg_sub_factor:
2990           tem = expand_shift (LSHIFT_EXPR, mode, accum,
2991                               build_int_cst (NULL_TREE, log),
2992                               NULL_RTX, 0);
2993           accum = force_operand (gen_rtx_MINUS (mode, tem, accum),
2994                                  (add_target
2995                                   ? add_target : (optimize ? 0 : tem)));
2996           val_so_far = (val_so_far << log) - val_so_far;
2997           break;
2998
2999         default:
3000           gcc_unreachable ();
3001         }
3002
3003       /* Write a REG_EQUAL note on the last insn so that we can cse
3004          multiplication sequences.  Note that if ACCUM is a SUBREG,
3005          we've set the inner register and must properly indicate
3006          that.  */
3007
3008       tem = op0, nmode = mode;
3009       if (GET_CODE (accum) == SUBREG)
3010         {
3011           nmode = GET_MODE (SUBREG_REG (accum));
3012           tem = gen_lowpart (nmode, op0);
3013         }
3014
3015       insn = get_last_insn ();
3016       set_unique_reg_note (insn, REG_EQUAL,
3017                            gen_rtx_MULT (nmode, tem, GEN_INT (val_so_far)));
3018     }
3019
3020   if (variant == negate_variant)
3021     {
3022       val_so_far = -val_so_far;
3023       accum = expand_unop (mode, neg_optab, accum, target, 0);
3024     }
3025   else if (variant == add_variant)
3026     {
3027       val_so_far = val_so_far + 1;
3028       accum = force_operand (gen_rtx_PLUS (mode, accum, op0), target);
3029     }
3030
3031   /* Compare only the bits of val and val_so_far that are significant
3032      in the result mode, to avoid sign-/zero-extension confusion.  */
3033   val &= GET_MODE_MASK (mode);
3034   val_so_far &= GET_MODE_MASK (mode);
3035   gcc_assert (val == val_so_far);
3036
3037   return accum;
3038 }
3039
3040 /* Perform a multiplication and return an rtx for the result.
3041    MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3042    TARGET is a suggestion for where to store the result (an rtx).
3043
3044    We check specially for a constant integer as OP1.
3045    If you want this check for OP0 as well, then before calling
3046    you should swap the two operands if OP0 would be constant.  */
3047
3048 rtx
3049 expand_mult (enum machine_mode mode, rtx op0, rtx op1, rtx target,
3050              int unsignedp)
3051 {
3052   enum mult_variant variant;
3053   struct algorithm algorithm;
3054   int max_cost;
3055
3056   /* Handling const0_rtx here allows us to use zero as a rogue value for
3057      coeff below.  */
3058   if (op1 == const0_rtx)
3059     return const0_rtx;
3060   if (op1 == const1_rtx)
3061     return op0;
3062   if (op1 == constm1_rtx)
3063     return expand_unop (mode,
3064                         GET_MODE_CLASS (mode) == MODE_INT
3065                         && !unsignedp && flag_trapv
3066                         ? negv_optab : neg_optab,
3067                         op0, target, 0);
3068
3069   /* These are the operations that are potentially turned into a sequence
3070      of shifts and additions.  */
3071   if (SCALAR_INT_MODE_P (mode)
3072       && (unsignedp || !flag_trapv))
3073     {
3074       HOST_WIDE_INT coeff = 0;
3075       rtx fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3076
3077       /* synth_mult does an `unsigned int' multiply.  As long as the mode is
3078          less than or equal in size to `unsigned int' this doesn't matter.
3079          If the mode is larger than `unsigned int', then synth_mult works
3080          only if the constant value exactly fits in an `unsigned int' without
3081          any truncation.  This means that multiplying by negative values does
3082          not work; results are off by 2^32 on a 32 bit machine.  */
3083
3084       if (GET_CODE (op1) == CONST_INT)
3085         {
3086           /* Attempt to handle multiplication of DImode values by negative
3087              coefficients, by performing the multiplication by a positive
3088              multiplier and then inverting the result.  */
3089           if (INTVAL (op1) < 0
3090               && GET_MODE_BITSIZE (mode) > HOST_BITS_PER_WIDE_INT)
3091             {
3092               /* Its safe to use -INTVAL (op1) even for INT_MIN, as the
3093                  result is interpreted as an unsigned coefficient.
3094                  Exclude cost of op0 from max_cost to match the cost
3095                  calculation of the synth_mult.  */
3096               max_cost = rtx_cost (gen_rtx_MULT (mode, fake_reg, op1), SET)
3097                          - neg_cost[mode];
3098               if (max_cost > 0
3099                   && choose_mult_variant (mode, -INTVAL (op1), &algorithm,
3100                                           &variant, max_cost))
3101                 {
3102                   rtx temp = expand_mult_const (mode, op0, -INTVAL (op1),
3103                                                 NULL_RTX, &algorithm,
3104                                                 variant);
3105                   return expand_unop (mode, neg_optab, temp, target, 0);
3106                 }
3107             }
3108           else coeff = INTVAL (op1);
3109         }
3110       else if (GET_CODE (op1) == CONST_DOUBLE)
3111         {
3112           /* If we are multiplying in DImode, it may still be a win
3113              to try to work with shifts and adds.  */
3114           if (CONST_DOUBLE_HIGH (op1) == 0)
3115             coeff = CONST_DOUBLE_LOW (op1);
3116           else if (CONST_DOUBLE_LOW (op1) == 0
3117                    && EXACT_POWER_OF_2_OR_ZERO_P (CONST_DOUBLE_HIGH (op1)))
3118             {
3119               int shift = floor_log2 (CONST_DOUBLE_HIGH (op1))
3120                           + HOST_BITS_PER_WIDE_INT;
3121               return expand_shift (LSHIFT_EXPR, mode, op0,
3122                                    build_int_cst (NULL_TREE, shift),
3123                                    target, unsignedp);
3124             }
3125         }
3126
3127       /* We used to test optimize here, on the grounds that it's better to
3128          produce a smaller program when -O is not used.  But this causes
3129          such a terrible slowdown sometimes that it seems better to always
3130          use synth_mult.  */
3131       if (coeff != 0)
3132         {
3133           /* Special case powers of two.  */
3134           if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3135             return expand_shift (LSHIFT_EXPR, mode, op0,
3136                                  build_int_cst (NULL_TREE, floor_log2 (coeff)),
3137                                  target, unsignedp);
3138
3139           /* Exclude cost of op0 from max_cost to match the cost
3140              calculation of the synth_mult.  */
3141           max_cost = rtx_cost (gen_rtx_MULT (mode, fake_reg, op1), SET);
3142           if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3143                                    max_cost))
3144             return expand_mult_const (mode, op0, coeff, target,
3145                                       &algorithm, variant);
3146         }
3147     }
3148
3149   if (GET_CODE (op0) == CONST_DOUBLE)
3150     {
3151       rtx temp = op0;
3152       op0 = op1;
3153       op1 = temp;
3154     }
3155
3156   /* Expand x*2.0 as x+x.  */
3157   if (GET_CODE (op1) == CONST_DOUBLE
3158       && GET_MODE_CLASS (mode) == MODE_FLOAT)
3159     {
3160       REAL_VALUE_TYPE d;
3161       REAL_VALUE_FROM_CONST_DOUBLE (d, op1);
3162
3163       if (REAL_VALUES_EQUAL (d, dconst2))
3164         {
3165           op0 = force_reg (GET_MODE (op0), op0);
3166           return expand_binop (mode, add_optab, op0, op0,
3167                                target, unsignedp, OPTAB_LIB_WIDEN);
3168         }
3169     }
3170
3171   /* This used to use umul_optab if unsigned, but for non-widening multiply
3172      there is no difference between signed and unsigned.  */
3173   op0 = expand_binop (mode,
3174                       ! unsignedp
3175                       && flag_trapv && (GET_MODE_CLASS(mode) == MODE_INT)
3176                       ? smulv_optab : smul_optab,
3177                       op0, op1, target, unsignedp, OPTAB_LIB_WIDEN);
3178   gcc_assert (op0);
3179   return op0;
3180 }
3181 \f
3182 /* Return the smallest n such that 2**n >= X.  */
3183
3184 int
3185 ceil_log2 (unsigned HOST_WIDE_INT x)
3186 {
3187   return floor_log2 (x - 1) + 1;
3188 }
3189
3190 /* Choose a minimal N + 1 bit approximation to 1/D that can be used to
3191    replace division by D, and put the least significant N bits of the result
3192    in *MULTIPLIER_PTR and return the most significant bit.
3193
3194    The width of operations is N (should be <= HOST_BITS_PER_WIDE_INT), the
3195    needed precision is in PRECISION (should be <= N).
3196
3197    PRECISION should be as small as possible so this function can choose
3198    multiplier more freely.
3199
3200    The rounded-up logarithm of D is placed in *lgup_ptr.  A shift count that
3201    is to be used for a final right shift is placed in *POST_SHIFT_PTR.
3202
3203    Using this function, x/D will be equal to (x * m) >> (*POST_SHIFT_PTR),
3204    where m is the full HOST_BITS_PER_WIDE_INT + 1 bit multiplier.  */
3205
3206 static
3207 unsigned HOST_WIDE_INT
3208 choose_multiplier (unsigned HOST_WIDE_INT d, int n, int precision,
3209                    rtx *multiplier_ptr, int *post_shift_ptr, int *lgup_ptr)
3210 {
3211   HOST_WIDE_INT mhigh_hi, mlow_hi;
3212   unsigned HOST_WIDE_INT mhigh_lo, mlow_lo;
3213   int lgup, post_shift;
3214   int pow, pow2;
3215   unsigned HOST_WIDE_INT nl, dummy1;
3216   HOST_WIDE_INT nh, dummy2;
3217
3218   /* lgup = ceil(log2(divisor)); */
3219   lgup = ceil_log2 (d);
3220
3221   gcc_assert (lgup <= n);
3222
3223   pow = n + lgup;
3224   pow2 = n + lgup - precision;
3225
3226   /* We could handle this with some effort, but this case is much
3227      better handled directly with a scc insn, so rely on caller using
3228      that.  */
3229   gcc_assert (pow != 2 * HOST_BITS_PER_WIDE_INT);
3230
3231   /* mlow = 2^(N + lgup)/d */
3232  if (pow >= HOST_BITS_PER_WIDE_INT)
3233     {
3234       nh = (HOST_WIDE_INT) 1 << (pow - HOST_BITS_PER_WIDE_INT);
3235       nl = 0;
3236     }
3237   else
3238     {
3239       nh = 0;
3240       nl = (unsigned HOST_WIDE_INT) 1 << pow;
3241     }
3242   div_and_round_double (TRUNC_DIV_EXPR, 1, nl, nh, d, (HOST_WIDE_INT) 0,
3243                         &mlow_lo, &mlow_hi, &dummy1, &dummy2);
3244
3245   /* mhigh = (2^(N + lgup) + 2^N + lgup - precision)/d */
3246   if (pow2 >= HOST_BITS_PER_WIDE_INT)
3247     nh |= (HOST_WIDE_INT) 1 << (pow2 - HOST_BITS_PER_WIDE_INT);
3248   else
3249     nl |= (unsigned HOST_WIDE_INT) 1 << pow2;
3250   div_and_round_double (TRUNC_DIV_EXPR, 1, nl, nh, d, (HOST_WIDE_INT) 0,
3251                         &mhigh_lo, &mhigh_hi, &dummy1, &dummy2);
3252
3253   gcc_assert (!mhigh_hi || nh - d < d);
3254   gcc_assert (mhigh_hi <= 1 && mlow_hi <= 1);
3255   /* Assert that mlow < mhigh.  */
3256   gcc_assert (mlow_hi < mhigh_hi
3257               || (mlow_hi == mhigh_hi && mlow_lo < mhigh_lo));
3258
3259   /* If precision == N, then mlow, mhigh exceed 2^N
3260      (but they do not exceed 2^(N+1)).  */
3261
3262   /* Reduce to lowest terms.  */
3263   for (post_shift = lgup; post_shift > 0; post_shift--)
3264     {
3265       unsigned HOST_WIDE_INT ml_lo = (mlow_hi << (HOST_BITS_PER_WIDE_INT - 1)) | (mlow_lo >> 1);
3266       unsigned HOST_WIDE_INT mh_lo = (mhigh_hi << (HOST_BITS_PER_WIDE_INT - 1)) | (mhigh_lo >> 1);
3267       if (ml_lo >= mh_lo)
3268         break;
3269
3270       mlow_hi = 0;
3271       mlow_lo = ml_lo;
3272       mhigh_hi = 0;
3273       mhigh_lo = mh_lo;
3274     }
3275
3276   *post_shift_ptr = post_shift;
3277   *lgup_ptr = lgup;
3278   if (n < HOST_BITS_PER_WIDE_INT)
3279     {
3280       unsigned HOST_WIDE_INT mask = ((unsigned HOST_WIDE_INT) 1 << n) - 1;
3281       *multiplier_ptr = GEN_INT (mhigh_lo & mask);
3282       return mhigh_lo >= mask;
3283     }
3284   else
3285     {
3286       *multiplier_ptr = GEN_INT (mhigh_lo);
3287       return mhigh_hi;
3288     }
3289 }
3290
3291 /* Compute the inverse of X mod 2**n, i.e., find Y such that X * Y is
3292    congruent to 1 (mod 2**N).  */
3293
3294 static unsigned HOST_WIDE_INT
3295 invert_mod2n (unsigned HOST_WIDE_INT x, int n)
3296 {
3297   /* Solve x*y == 1 (mod 2^n), where x is odd.  Return y.  */
3298
3299   /* The algorithm notes that the choice y = x satisfies
3300      x*y == 1 mod 2^3, since x is assumed odd.
3301      Each iteration doubles the number of bits of significance in y.  */
3302
3303   unsigned HOST_WIDE_INT mask;
3304   unsigned HOST_WIDE_INT y = x;
3305   int nbit = 3;
3306
3307   mask = (n == HOST_BITS_PER_WIDE_INT
3308           ? ~(unsigned HOST_WIDE_INT) 0
3309           : ((unsigned HOST_WIDE_INT) 1 << n) - 1);
3310
3311   while (nbit < n)
3312     {
3313       y = y * (2 - x*y) & mask;         /* Modulo 2^N */
3314       nbit *= 2;
3315     }
3316   return y;
3317 }
3318
3319 /* Emit code to adjust ADJ_OPERAND after multiplication of wrong signedness
3320    flavor of OP0 and OP1.  ADJ_OPERAND is already the high half of the
3321    product OP0 x OP1.  If UNSIGNEDP is nonzero, adjust the signed product
3322    to become unsigned, if UNSIGNEDP is zero, adjust the unsigned product to
3323    become signed.
3324
3325    The result is put in TARGET if that is convenient.
3326
3327    MODE is the mode of operation.  */
3328
3329 rtx
3330 expand_mult_highpart_adjust (enum machine_mode mode, rtx adj_operand, rtx op0,
3331                              rtx op1, rtx target, int unsignedp)
3332 {
3333   rtx tem;
3334   enum rtx_code adj_code = unsignedp ? PLUS : MINUS;
3335
3336   tem = expand_shift (RSHIFT_EXPR, mode, op0,
3337                       build_int_cst (NULL_TREE, GET_MODE_BITSIZE (mode) - 1),
3338                       NULL_RTX, 0);
3339   tem = expand_and (mode, tem, op1, NULL_RTX);
3340   adj_operand
3341     = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3342                      adj_operand);
3343
3344   tem = expand_shift (RSHIFT_EXPR, mode, op1,
3345                       build_int_cst (NULL_TREE, GET_MODE_BITSIZE (mode) - 1),
3346                       NULL_RTX, 0);
3347   tem = expand_and (mode, tem, op0, NULL_RTX);
3348   target = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3349                           target);
3350
3351   return target;
3352 }
3353
3354 /* Subroutine of expand_mult_highpart.  Return the MODE high part of OP.  */
3355
3356 static rtx
3357 extract_high_half (enum machine_mode mode, rtx op)
3358 {
3359   enum machine_mode wider_mode;
3360
3361   if (mode == word_mode)
3362     return gen_highpart (mode, op);
3363
3364   wider_mode = GET_MODE_WIDER_MODE (mode);
3365   op = expand_shift (RSHIFT_EXPR, wider_mode, op,
3366                      build_int_cst (NULL_TREE, GET_MODE_BITSIZE (mode)), 0, 1);
3367   return convert_modes (mode, wider_mode, op, 0);
3368 }
3369
3370 /* Like expand_mult_highpart, but only consider using a multiplication
3371    optab.  OP1 is an rtx for the constant operand.  */
3372
3373 static rtx
3374 expand_mult_highpart_optab (enum machine_mode mode, rtx op0, rtx op1,
3375                             rtx target, int unsignedp, int max_cost)
3376 {
3377   rtx narrow_op1 = gen_int_mode (INTVAL (op1), mode);
3378   enum machine_mode wider_mode;
3379   optab moptab;
3380   rtx tem;
3381   int size;
3382
3383   wider_mode = GET_MODE_WIDER_MODE (mode);
3384   size = GET_MODE_BITSIZE (mode);
3385
3386   /* Firstly, try using a multiplication insn that only generates the needed
3387      high part of the product, and in the sign flavor of unsignedp.  */
3388   if (mul_highpart_cost[mode] < max_cost)
3389     {
3390       moptab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
3391       tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3392                           unsignedp, OPTAB_DIRECT);
3393       if (tem)
3394         return tem;
3395     }
3396
3397   /* Secondly, same as above, but use sign flavor opposite of unsignedp.
3398      Need to adjust the result after the multiplication.  */
3399   if (size - 1 < BITS_PER_WORD
3400       && (mul_highpart_cost[mode] + 2 * shift_cost[mode][size-1]
3401           + 4 * add_cost[mode] < max_cost))
3402     {
3403       moptab = unsignedp ? smul_highpart_optab : umul_highpart_optab;
3404       tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3405                           unsignedp, OPTAB_DIRECT);
3406       if (tem)
3407         /* We used the wrong signedness.  Adjust the result.  */
3408         return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3409                                             tem, unsignedp);
3410     }
3411
3412   /* Try widening multiplication.  */
3413   moptab = unsignedp ? umul_widen_optab : smul_widen_optab;
3414   if (moptab->handlers[wider_mode].insn_code != CODE_FOR_nothing
3415       && mul_widen_cost[wider_mode] < max_cost)
3416     {
3417       tem = expand_binop (wider_mode, moptab, op0, narrow_op1, 0,
3418                           unsignedp, OPTAB_WIDEN);
3419       if (tem)
3420         return extract_high_half (mode, tem);
3421     }
3422
3423   /* Try widening the mode and perform a non-widening multiplication.  */
3424   if (smul_optab->handlers[wider_mode].insn_code != CODE_FOR_nothing
3425       && size - 1 < BITS_PER_WORD
3426       && mul_cost[wider_mode] + shift_cost[mode][size-1] < max_cost)
3427     {
3428       rtx insns, wop0, wop1;
3429
3430       /* We need to widen the operands, for example to ensure the
3431          constant multiplier is correctly sign or zero extended.
3432          Use a sequence to clean-up any instructions emitted by
3433          the conversions if things don't work out.  */
3434       start_sequence ();
3435       wop0 = convert_modes (wider_mode, mode, op0, unsignedp);
3436       wop1 = convert_modes (wider_mode, mode, op1, unsignedp);
3437       tem = expand_binop (wider_mode, smul_optab, wop0, wop1, 0,
3438                           unsignedp, OPTAB_WIDEN);
3439       insns = get_insns ();
3440       end_sequence ();
3441
3442       if (tem)
3443         {
3444           emit_insn (insns);
3445           return extract_high_half (mode, tem);
3446         }
3447     }
3448
3449   /* Try widening multiplication of opposite signedness, and adjust.  */
3450   moptab = unsignedp ? smul_widen_optab : umul_widen_optab;
3451   if (moptab->handlers[wider_mode].insn_code != CODE_FOR_nothing
3452       && size - 1 < BITS_PER_WORD
3453       && (mul_widen_cost[wider_mode] + 2 * shift_cost[mode][size-1]
3454           + 4 * add_cost[mode] < max_cost))
3455     {
3456       tem = expand_binop (wider_mode, moptab, op0, narrow_op1,
3457                           NULL_RTX, ! unsignedp, OPTAB_WIDEN);
3458       if (tem != 0)
3459         {
3460           tem = extract_high_half (mode, tem);
3461           /* We used the wrong signedness.  Adjust the result.  */
3462           return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3463                                               target, unsignedp);
3464         }
3465     }
3466
3467   return 0;
3468 }
3469
3470 /* Emit code to multiply OP0 and OP1 (where OP1 is an integer constant),
3471    putting the high half of the result in TARGET if that is convenient,
3472    and return where the result is.  If the operation can not be performed,
3473    0 is returned.
3474
3475    MODE is the mode of operation and result.
3476
3477    UNSIGNEDP nonzero means unsigned multiply.
3478
3479    MAX_COST is the total allowed cost for the expanded RTL.  */
3480
3481 static rtx
3482 expand_mult_highpart (enum machine_mode mode, rtx op0, rtx op1,
3483                       rtx target, int unsignedp, int max_cost)
3484 {
3485   enum machine_mode wider_mode = GET_MODE_WIDER_MODE (mode);
3486   unsigned HOST_WIDE_INT cnst1;
3487   int extra_cost;
3488   bool sign_adjust = false;
3489   enum mult_variant variant;
3490   struct algorithm alg;
3491   rtx tem;
3492
3493   /* We can't support modes wider than HOST_BITS_PER_INT.  */
3494   gcc_assert (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT);
3495
3496   cnst1 = INTVAL (op1) & GET_MODE_MASK (mode);
3497
3498   /* We can't optimize modes wider than BITS_PER_WORD.
3499      ??? We might be able to perform double-word arithmetic if
3500      mode == word_mode, however all the cost calculations in
3501      synth_mult etc. assume single-word operations.  */
3502   if (GET_MODE_BITSIZE (wider_mode) > BITS_PER_WORD)
3503     return expand_mult_highpart_optab (mode, op0, op1, target,
3504                                        unsignedp, max_cost);
3505
3506   extra_cost = shift_cost[mode][GET_MODE_BITSIZE (mode) - 1];
3507
3508   /* Check whether we try to multiply by a negative constant.  */
3509   if (!unsignedp && ((cnst1 >> (GET_MODE_BITSIZE (mode) - 1)) & 1))
3510     {
3511       sign_adjust = true;
3512       extra_cost += add_cost[mode];
3513     }
3514
3515   /* See whether shift/add multiplication is cheap enough.  */
3516   if (choose_mult_variant (wider_mode, cnst1, &alg, &variant,
3517                            max_cost - extra_cost))
3518     {
3519       /* See whether the specialized multiplication optabs are
3520          cheaper than the shift/add version.  */
3521       tem = expand_mult_highpart_optab (mode, op0, op1, target, unsignedp,
3522                                         alg.cost.cost + extra_cost);
3523       if (tem)
3524         return tem;
3525
3526       tem = convert_to_mode (wider_mode, op0, unsignedp);
3527       tem = expand_mult_const (wider_mode, tem, cnst1, 0, &alg, variant);
3528       tem = extract_high_half (mode, tem);
3529
3530       /* Adjust result for signedness.  */
3531       if (sign_adjust)
3532         tem = force_operand (gen_rtx_MINUS (mode, tem, op0), tem);
3533
3534       return tem;
3535     }
3536   return expand_mult_highpart_optab (mode, op0, op1, target,
3537                                      unsignedp, max_cost);
3538 }
3539
3540
3541 /* Expand signed modulus of OP0 by a power of two D in mode MODE.  */
3542
3543 static rtx
3544 expand_smod_pow2 (enum machine_mode mode, rtx op0, HOST_WIDE_INT d)
3545 {
3546   unsigned HOST_WIDE_INT masklow, maskhigh;
3547   rtx result, temp, shift, label;
3548   int logd;
3549
3550   logd = floor_log2 (d);
3551   result = gen_reg_rtx (mode);
3552
3553   /* Avoid conditional branches when they're expensive.  */
3554   if (BRANCH_COST >= 2
3555       && !optimize_size)
3556     {
3557       rtx signmask = emit_store_flag (result, LT, op0, const0_rtx,
3558                                       mode, 0, -1);
3559       if (signmask)
3560         {
3561           signmask = force_reg (mode, signmask);
3562           masklow = ((HOST_WIDE_INT) 1 << logd) - 1;
3563           shift = GEN_INT (GET_MODE_BITSIZE (mode) - logd);
3564
3565           /* Use the rtx_cost of a LSHIFTRT instruction to determine
3566              which instruction sequence to use.  If logical right shifts
3567              are expensive the use 2 XORs, 2 SUBs and an AND, otherwise
3568              use a LSHIFTRT, 1 ADD, 1 SUB and an AND.  */
3569
3570           temp = gen_rtx_LSHIFTRT (mode, result, shift);
3571           if (lshr_optab->handlers[mode].insn_code == CODE_FOR_nothing
3572               || rtx_cost (temp, SET) > COSTS_N_INSNS (2))
3573             {
3574               temp = expand_binop (mode, xor_optab, op0, signmask,
3575                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3576               temp = expand_binop (mode, sub_optab, temp, signmask,
3577                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3578               temp = expand_binop (mode, and_optab, temp, GEN_INT (masklow),
3579                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3580               temp = expand_binop (mode, xor_optab, temp, signmask,
3581                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3582               temp = expand_binop (mode, sub_optab, temp, signmask,
3583                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3584             }
3585           else
3586             {
3587               signmask = expand_binop (mode, lshr_optab, signmask, shift,
3588                                        NULL_RTX, 1, OPTAB_LIB_WIDEN);
3589               signmask = force_reg (mode, signmask);
3590
3591               temp = expand_binop (mode, add_optab, op0, signmask,
3592                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3593               temp = expand_binop (mode, and_optab, temp, GEN_INT (masklow),
3594                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3595               temp = expand_binop (mode, sub_optab, temp, signmask,
3596                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3597             }
3598           return temp;
3599         }
3600     }
3601
3602   /* Mask contains the mode's signbit and the significant bits of the
3603      modulus.  By including the signbit in the operation, many targets
3604      can avoid an explicit compare operation in the following comparison
3605      against zero.  */
3606
3607   masklow = ((HOST_WIDE_INT) 1 << logd) - 1;
3608   if (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
3609     {
3610       masklow |= (HOST_WIDE_INT) -1 << (GET_MODE_BITSIZE (mode) - 1);
3611       maskhigh = -1;
3612     }
3613   else
3614     maskhigh = (HOST_WIDE_INT) -1
3615                  << (GET_MODE_BITSIZE (mode) - HOST_BITS_PER_WIDE_INT - 1);
3616
3617   temp = expand_binop (mode, and_optab, op0,
3618                        immed_double_const (masklow, maskhigh, mode),
3619                        result, 1, OPTAB_LIB_WIDEN);
3620   if (temp != result)
3621     emit_move_insn (result, temp);
3622
3623   label = gen_label_rtx ();
3624   do_cmp_and_jump (result, const0_rtx, GE, mode, label);
3625
3626   temp = expand_binop (mode, sub_optab, result, const1_rtx, result,
3627                        0, OPTAB_LIB_WIDEN);
3628   masklow = (HOST_WIDE_INT) -1 << logd;
3629   maskhigh = -1;
3630   temp = expand_binop (mode, ior_optab, temp,
3631                        immed_double_const (masklow, maskhigh, mode),
3632                        result, 1, OPTAB_LIB_WIDEN);
3633   temp = expand_binop (mode, add_optab, temp, const1_rtx, result,
3634                        0, OPTAB_LIB_WIDEN);
3635   if (temp != result)
3636     emit_move_insn (result, temp);
3637   emit_label (label);
3638   return result;
3639 }
3640
3641 /* Expand signed division of OP0 by a power of two D in mode MODE.
3642    This routine is only called for positive values of D.  */
3643
3644 static rtx
3645 expand_sdiv_pow2 (enum machine_mode mode, rtx op0, HOST_WIDE_INT d)
3646 {
3647   rtx temp, label;
3648   tree shift;
3649   int logd;
3650
3651   logd = floor_log2 (d);
3652   shift = build_int_cst (NULL_TREE, logd);
3653
3654   if (d == 2 && BRANCH_COST >= 1)
3655     {
3656       temp = gen_reg_rtx (mode);
3657       temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, 1);
3658       temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3659                            0, OPTAB_LIB_WIDEN);
3660       return expand_shift (RSHIFT_EXPR, mode, temp, shift, NULL_RTX, 0);
3661     }
3662
3663 #ifdef HAVE_conditional_move
3664   if (BRANCH_COST >= 2)
3665     {
3666       rtx temp2;
3667
3668       /* ??? emit_conditional_move forces a stack adjustment via
3669          compare_from_rtx so, if the sequence is discarded, it will
3670          be lost.  Do it now instead.  */
3671       do_pending_stack_adjust ();
3672
3673       start_sequence ();
3674       temp2 = copy_to_mode_reg (mode, op0);
3675       temp = expand_binop (mode, add_optab, temp2, GEN_INT (d-1),
3676                            NULL_RTX, 0, OPTAB_LIB_WIDEN);
3677       temp = force_reg (mode, temp);
3678
3679       /* Construct "temp2 = (temp2 < 0) ? temp : temp2".  */
3680       temp2 = emit_conditional_move (temp2, LT, temp2, const0_rtx,
3681                                      mode, temp, temp2, mode, 0);
3682       if (temp2)
3683         {
3684           rtx seq = get_insns ();
3685           end_sequence ();
3686           emit_insn (seq);
3687           return expand_shift (RSHIFT_EXPR, mode, temp2, shift, NULL_RTX, 0);
3688         }
3689       end_sequence ();
3690     }
3691 #endif
3692
3693   if (BRANCH_COST >= 2)
3694     {
3695       int ushift = GET_MODE_BITSIZE (mode) - logd;
3696
3697       temp = gen_reg_rtx (mode);
3698       temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, -1);
3699       if (shift_cost[mode][ushift] > COSTS_N_INSNS (1))
3700         temp = expand_binop (mode, and_optab, temp, GEN_INT (d - 1),
3701                              NULL_RTX, 0, OPTAB_LIB_WIDEN);
3702       else
3703         temp = expand_shift (RSHIFT_EXPR, mode, temp,
3704                              build_int_cst (NULL_TREE, ushift),
3705                              NULL_RTX, 1);
3706       temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3707                            0, OPTAB_LIB_WIDEN);
3708       return expand_shift (RSHIFT_EXPR, mode, temp, shift, NULL_RTX, 0);
3709     }
3710
3711   label = gen_label_rtx ();
3712   temp = copy_to_mode_reg (mode, op0);
3713   do_cmp_and_jump (temp, const0_rtx, GE, mode, label);
3714   expand_inc (temp, GEN_INT (d - 1));
3715   emit_label (label);
3716   return expand_shift (RSHIFT_EXPR, mode, temp, shift, NULL_RTX, 0);
3717 }
3718 \f
3719 /* Emit the code to divide OP0 by OP1, putting the result in TARGET
3720    if that is convenient, and returning where the result is.
3721    You may request either the quotient or the remainder as the result;
3722    specify REM_FLAG nonzero to get the remainder.
3723
3724    CODE is the expression code for which kind of division this is;
3725    it controls how rounding is done.  MODE is the machine mode to use.
3726    UNSIGNEDP nonzero means do unsigned division.  */
3727
3728 /* ??? For CEIL_MOD_EXPR, can compute incorrect remainder with ANDI
3729    and then correct it by or'ing in missing high bits
3730    if result of ANDI is nonzero.
3731    For ROUND_MOD_EXPR, can use ANDI and then sign-extend the result.
3732    This could optimize to a bfexts instruction.
3733    But C doesn't use these operations, so their optimizations are
3734    left for later.  */
3735 /* ??? For modulo, we don't actually need the highpart of the first product,
3736    the low part will do nicely.  And for small divisors, the second multiply
3737    can also be a low-part only multiply or even be completely left out.
3738    E.g. to calculate the remainder of a division by 3 with a 32 bit
3739    multiply, multiply with 0x55555556 and extract the upper two bits;
3740    the result is exact for inputs up to 0x1fffffff.
3741    The input range can be reduced by using cross-sum rules.
3742    For odd divisors >= 3, the following table gives right shift counts
3743    so that if a number is shifted by an integer multiple of the given
3744    amount, the remainder stays the same:
3745    2, 4, 3, 6, 10, 12, 4, 8, 18, 6, 11, 20, 18, 0, 5, 10, 12, 0, 12, 20,
3746    14, 12, 23, 21, 8, 0, 20, 18, 0, 0, 6, 12, 0, 22, 0, 18, 20, 30, 0, 0,
3747    0, 8, 0, 11, 12, 10, 36, 0, 30, 0, 0, 12, 0, 0, 0, 0, 44, 12, 24, 0,
3748    20, 0, 7, 14, 0, 18, 36, 0, 0, 46, 60, 0, 42, 0, 15, 24, 20, 0, 0, 33,
3749    0, 20, 0, 0, 18, 0, 60, 0, 0, 0, 0, 0, 40, 18, 0, 0, 12
3750
3751    Cross-sum rules for even numbers can be derived by leaving as many bits
3752    to the right alone as the divisor has zeros to the right.
3753    E.g. if x is an unsigned 32 bit number:
3754    (x mod 12) == (((x & 1023) + ((x >> 8) & ~3)) * 0x15555558 >> 2 * 3) >> 28
3755    */
3756
3757 rtx
3758 expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode,
3759                rtx op0, rtx op1, rtx target, int unsignedp)
3760 {
3761   enum machine_mode compute_mode;
3762   rtx tquotient;
3763   rtx quotient = 0, remainder = 0;
3764   rtx last;
3765   int size;
3766   rtx insn, set;
3767   optab optab1, optab2;
3768   int op1_is_constant, op1_is_pow2 = 0;
3769   int max_cost, extra_cost;
3770   static HOST_WIDE_INT last_div_const = 0;
3771   static HOST_WIDE_INT ext_op1;
3772
3773   op1_is_constant = GET_CODE (op1) == CONST_INT;
3774   if (op1_is_constant)
3775     {
3776       ext_op1 = INTVAL (op1);
3777       if (unsignedp)
3778         ext_op1 &= GET_MODE_MASK (mode);
3779       op1_is_pow2 = ((EXACT_POWER_OF_2_OR_ZERO_P (ext_op1)
3780                      || (! unsignedp && EXACT_POWER_OF_2_OR_ZERO_P (-ext_op1))));
3781     }
3782
3783   /*
3784      This is the structure of expand_divmod:
3785
3786      First comes code to fix up the operands so we can perform the operations
3787      correctly and efficiently.
3788
3789      Second comes a switch statement with code specific for each rounding mode.
3790      For some special operands this code emits all RTL for the desired
3791      operation, for other cases, it generates only a quotient and stores it in
3792      QUOTIENT.  The case for trunc division/remainder might leave quotient = 0,
3793      to indicate that it has not done anything.
3794
3795      Last comes code that finishes the operation.  If QUOTIENT is set and
3796      REM_FLAG is set, the remainder is computed as OP0 - QUOTIENT * OP1.  If
3797      QUOTIENT is not set, it is computed using trunc rounding.
3798
3799      We try to generate special code for division and remainder when OP1 is a
3800      constant.  If |OP1| = 2**n we can use shifts and some other fast
3801      operations.  For other values of OP1, we compute a carefully selected
3802      fixed-point approximation m = 1/OP1, and generate code that multiplies OP0
3803      by m.
3804
3805      In all cases but EXACT_DIV_EXPR, this multiplication requires the upper
3806      half of the product.  Different strategies for generating the product are
3807      implemented in expand_mult_highpart.
3808
3809      If what we actually want is the remainder, we generate that by another
3810      by-constant multiplication and a subtraction.  */
3811
3812   /* We shouldn't be called with OP1 == const1_rtx, but some of the
3813      code below will malfunction if we are, so check here and handle
3814      the special case if so.  */
3815   if (op1 == const1_rtx)
3816     return rem_flag ? const0_rtx : op0;
3817
3818     /* When dividing by -1, we could get an overflow.
3819      negv_optab can handle overflows.  */
3820   if (! unsignedp && op1 == constm1_rtx)
3821     {
3822       if (rem_flag)
3823         return const0_rtx;
3824       return expand_unop (mode, flag_trapv && GET_MODE_CLASS(mode) == MODE_INT
3825                           ? negv_optab : neg_optab, op0, target, 0);
3826     }
3827
3828   if (target
3829       /* Don't use the function value register as a target
3830          since we have to read it as well as write it,
3831          and function-inlining gets confused by this.  */
3832       && ((REG_P (target) && REG_FUNCTION_VALUE_P (target))
3833           /* Don't clobber an operand while doing a multi-step calculation.  */
3834           || ((rem_flag || op1_is_constant)
3835               && (reg_mentioned_p (target, op0)
3836                   || (MEM_P (op0) && MEM_P (target))))
3837           || reg_mentioned_p (target, op1)
3838           || (MEM_P (op1) && MEM_P (target))))
3839     target = 0;
3840
3841   /* Get the mode in which to perform this computation.  Normally it will
3842      be MODE, but sometimes we can't do the desired operation in MODE.
3843      If so, pick a wider mode in which we can do the operation.  Convert
3844      to that mode at the start to avoid repeated conversions.
3845
3846      First see what operations we need.  These depend on the expression
3847      we are evaluating.  (We assume that divxx3 insns exist under the
3848      same conditions that modxx3 insns and that these insns don't normally
3849      fail.  If these assumptions are not correct, we may generate less
3850      efficient code in some cases.)
3851
3852      Then see if we find a mode in which we can open-code that operation
3853      (either a division, modulus, or shift).  Finally, check for the smallest
3854      mode for which we can do the operation with a library call.  */
3855
3856   /* We might want to refine this now that we have division-by-constant
3857      optimization.  Since expand_mult_highpart tries so many variants, it is
3858      not straightforward to generalize this.  Maybe we should make an array
3859      of possible modes in init_expmed?  Save this for GCC 2.7.  */
3860
3861   optab1 = ((op1_is_pow2 && op1 != const0_rtx)
3862             ? (unsignedp ? lshr_optab : ashr_optab)
3863             : (unsignedp ? udiv_optab : sdiv_optab));
3864   optab2 = ((op1_is_pow2 && op1 != const0_rtx)
3865             ? optab1
3866             : (unsignedp ? udivmod_optab : sdivmod_optab));
3867
3868   for (compute_mode = mode; compute_mode != VOIDmode;
3869        compute_mode = GET_MODE_WIDER_MODE (compute_mode))
3870     if (optab1->handlers[compute_mode].insn_code != CODE_FOR_nothing
3871         || optab2->handlers[compute_mode].insn_code != CODE_FOR_nothing)
3872       break;
3873
3874   if (compute_mode == VOIDmode)
3875     for (compute_mode = mode; compute_mode != VOIDmode;
3876          compute_mode = GET_MODE_WIDER_MODE (compute_mode))
3877       if (optab1->handlers[compute_mode].libfunc
3878           || optab2->handlers[compute_mode].libfunc)
3879         break;
3880
3881   /* If we still couldn't find a mode, use MODE, but expand_binop will
3882      probably die.  */
3883   if (compute_mode == VOIDmode)
3884     compute_mode = mode;
3885
3886   if (target && GET_MODE (target) == compute_mode)
3887     tquotient = target;
3888   else
3889     tquotient = gen_reg_rtx (compute_mode);
3890
3891   size = GET_MODE_BITSIZE (compute_mode);
3892 #if 0
3893   /* It should be possible to restrict the precision to GET_MODE_BITSIZE
3894      (mode), and thereby get better code when OP1 is a constant.  Do that
3895      later.  It will require going over all usages of SIZE below.  */
3896   size = GET_MODE_BITSIZE (mode);
3897 #endif
3898
3899   /* Only deduct something for a REM if the last divide done was
3900      for a different constant.   Then set the constant of the last
3901      divide.  */
3902   max_cost = div_cost[compute_mode]
3903     - (rem_flag && ! (last_div_const != 0 && op1_is_constant
3904                       && INTVAL (op1) == last_div_const)
3905        ? mul_cost[compute_mode] + add_cost[compute_mode]
3906        : 0);
3907
3908   last_div_const = ! rem_flag && op1_is_constant ? INTVAL (op1) : 0;
3909
3910   /* Now convert to the best mode to use.  */
3911   if (compute_mode != mode)
3912     {
3913       op0 = convert_modes (compute_mode, mode, op0, unsignedp);
3914       op1 = convert_modes (compute_mode, mode, op1, unsignedp);
3915
3916       /* convert_modes may have placed op1 into a register, so we
3917          must recompute the following.  */
3918       op1_is_constant = GET_CODE (op1) == CONST_INT;
3919       op1_is_pow2 = (op1_is_constant
3920                      && ((EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
3921                           || (! unsignedp
3922                               && EXACT_POWER_OF_2_OR_ZERO_P (-INTVAL (op1)))))) ;
3923     }
3924
3925   /* If one of the operands is a volatile MEM, copy it into a register.  */
3926
3927   if (MEM_P (op0) && MEM_VOLATILE_P (op0))
3928     op0 = force_reg (compute_mode, op0);
3929   if (MEM_P (op1) && MEM_VOLATILE_P (op1))
3930     op1 = force_reg (compute_mode, op1);
3931
3932   /* If we need the remainder or if OP1 is constant, we need to
3933      put OP0 in a register in case it has any queued subexpressions.  */
3934   if (rem_flag || op1_is_constant)
3935     op0 = force_reg (compute_mode, op0);
3936
3937   last = get_last_insn ();
3938
3939   /* Promote floor rounding to trunc rounding for unsigned operations.  */
3940   if (unsignedp)
3941     {
3942       if (code == FLOOR_DIV_EXPR)
3943         code = TRUNC_DIV_EXPR;
3944       if (code == FLOOR_MOD_EXPR)
3945         code = TRUNC_MOD_EXPR;
3946       if (code == EXACT_DIV_EXPR && op1_is_pow2)
3947         code = TRUNC_DIV_EXPR;
3948     }
3949
3950   if (op1 != const0_rtx)
3951     switch (code)
3952       {
3953       case TRUNC_MOD_EXPR:
3954       case TRUNC_DIV_EXPR:
3955         if (op1_is_constant)
3956           {
3957             if (unsignedp)
3958               {
3959                 unsigned HOST_WIDE_INT mh;
3960                 int pre_shift, post_shift;
3961                 int dummy;
3962                 rtx ml;
3963                 unsigned HOST_WIDE_INT d = (INTVAL (op1)
3964                                             & GET_MODE_MASK (compute_mode));
3965
3966                 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
3967                   {
3968                     pre_shift = floor_log2 (d);
3969                     if (rem_flag)
3970                       {
3971                         remainder
3972                           = expand_binop (compute_mode, and_optab, op0,
3973                                           GEN_INT (((HOST_WIDE_INT) 1 << pre_shift) - 1),
3974                                           remainder, 1,
3975                                           OPTAB_LIB_WIDEN);
3976                         if (remainder)
3977                           return gen_lowpart (mode, remainder);
3978                       }
3979                     quotient = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3980                                              build_int_cst (NULL_TREE,
3981                                                             pre_shift),
3982                                              tquotient, 1);
3983                   }
3984                 else if (size <= HOST_BITS_PER_WIDE_INT)
3985                   {
3986                     if (d >= ((unsigned HOST_WIDE_INT) 1 << (size - 1)))
3987                       {
3988                         /* Most significant bit of divisor is set; emit an scc
3989                            insn.  */
3990                         quotient = emit_store_flag (tquotient, GEU, op0, op1,
3991                                                     compute_mode, 1, 1);
3992                         if (quotient == 0)
3993                           goto fail1;
3994                       }
3995                     else
3996                       {
3997                         /* Find a suitable multiplier and right shift count
3998                            instead of multiplying with D.  */
3999
4000                         mh = choose_multiplier (d, size, size,
4001                                                 &ml, &post_shift, &dummy);
4002
4003                         /* If the suggested multiplier is more than SIZE bits,
4004                            we can do better for even divisors, using an
4005                            initial right shift.  */
4006                         if (mh != 0 && (d & 1) == 0)
4007                           {
4008                             pre_shift = floor_log2 (d & -d);
4009                             mh = choose_multiplier (d >> pre_shift, size,
4010                                                     size - pre_shift,
4011                                                     &ml, &post_shift, &dummy);
4012                             gcc_assert (!mh);
4013                           }
4014                         else
4015                           pre_shift = 0;
4016
4017                         if (mh != 0)
4018                           {
4019                             rtx t1, t2, t3, t4;
4020
4021                             if (post_shift - 1 >= BITS_PER_WORD)
4022                               goto fail1;
4023
4024                             extra_cost
4025                               = (shift_cost[compute_mode][post_shift - 1]
4026                                  + shift_cost[compute_mode][1]
4027                                  + 2 * add_cost[compute_mode]);
4028                             t1 = expand_mult_highpart (compute_mode, op0, ml,
4029                                                        NULL_RTX, 1,
4030                                                        max_cost - extra_cost);
4031                             if (t1 == 0)
4032                               goto fail1;
4033                             t2 = force_operand (gen_rtx_MINUS (compute_mode,
4034                                                                op0, t1),
4035                                                 NULL_RTX);
4036                             t3 = expand_shift
4037                               (RSHIFT_EXPR, compute_mode, t2,
4038                                build_int_cst (NULL_TREE, 1),
4039                                NULL_RTX,1);
4040                             t4 = force_operand (gen_rtx_PLUS (compute_mode,
4041                                                               t1, t3),
4042                                                 NULL_RTX);
4043                             quotient = expand_shift
4044                               (RSHIFT_EXPR, compute_mode, t4,
4045                                build_int_cst (NULL_TREE, post_shift - 1),
4046                                tquotient, 1);
4047                           }
4048                         else
4049                           {
4050                             rtx t1, t2;
4051
4052                             if (pre_shift >= BITS_PER_WORD
4053                                 || post_shift >= BITS_PER_WORD)
4054                               goto fail1;
4055
4056                             t1 = expand_shift
4057                               (RSHIFT_EXPR, compute_mode, op0,
4058                                build_int_cst (NULL_TREE, pre_shift),
4059                                NULL_RTX, 1);
4060                             extra_cost
4061                               = (shift_cost[compute_mode][pre_shift]
4062                                  + shift_cost[compute_mode][post_shift]);
4063                             t2 = expand_mult_highpart (compute_mode, t1, ml,
4064                                                        NULL_RTX, 1,
4065                                                        max_cost - extra_cost);
4066                             if (t2 == 0)
4067                               goto fail1;
4068                             quotient = expand_shift
4069                               (RSHIFT_EXPR, compute_mode, t2,
4070                                build_int_cst (NULL_TREE, post_shift),
4071                                tquotient, 1);
4072                           }
4073                       }
4074                   }
4075                 else            /* Too wide mode to use tricky code */
4076                   break;
4077
4078                 insn = get_last_insn ();
4079                 if (insn != last
4080                     && (set = single_set (insn)) != 0
4081                     && SET_DEST (set) == quotient)
4082                   set_unique_reg_note (insn,
4083                                        REG_EQUAL,
4084                                        gen_rtx_UDIV (compute_mode, op0, op1));
4085               }
4086             else                /* TRUNC_DIV, signed */
4087               {
4088                 unsigned HOST_WIDE_INT ml;
4089                 int lgup, post_shift;
4090                 rtx mlr;
4091                 HOST_WIDE_INT d = INTVAL (op1);
4092                 unsigned HOST_WIDE_INT abs_d = d >= 0 ? d : -d;
4093
4094                 /* n rem d = n rem -d */
4095                 if (rem_flag && d < 0)
4096                   {
4097                     d = abs_d;
4098                     op1 = gen_int_mode (abs_d, compute_mode);
4099                   }
4100
4101                 if (d == 1)
4102                   quotient = op0;
4103                 else if (d == -1)
4104                   quotient = expand_unop (compute_mode, neg_optab, op0,
4105                                           tquotient, 0);
4106                 else if (abs_d == (unsigned HOST_WIDE_INT) 1 << (size - 1))
4107                   {
4108                     /* This case is not handled correctly below.  */
4109                     quotient = emit_store_flag (tquotient, EQ, op0, op1,
4110                                                 compute_mode, 1, 1);
4111                     if (quotient == 0)
4112                       goto fail1;
4113                   }
4114                 else if (EXACT_POWER_OF_2_OR_ZERO_P (d)
4115                          && (rem_flag ? smod_pow2_cheap[compute_mode]
4116                                       : sdiv_pow2_cheap[compute_mode])
4117                          /* We assume that cheap metric is true if the
4118                             optab has an expander for this mode.  */
4119                          && (((rem_flag ? smod_optab : sdiv_optab)
4120                               ->handlers[compute_mode].insn_code
4121                               != CODE_FOR_nothing)
4122                              || (sdivmod_optab->handlers[compute_mode]
4123                                  .insn_code != CODE_FOR_nothing)))
4124                   ;
4125                 else if (EXACT_POWER_OF_2_OR_ZERO_P (abs_d))
4126                   {
4127                     if (rem_flag)
4128                       {
4129                         remainder = expand_smod_pow2 (compute_mode, op0, d);
4130                         if (remainder)
4131                           return gen_lowpart (mode, remainder);
4132                       }
4133
4134                     if (sdiv_pow2_cheap[compute_mode]
4135                         && ((sdiv_optab->handlers[compute_mode].insn_code
4136                              != CODE_FOR_nothing)
4137                             || (sdivmod_optab->handlers[compute_mode].insn_code
4138                                 != CODE_FOR_nothing)))
4139                       quotient = expand_divmod (0, TRUNC_DIV_EXPR,
4140                                                 compute_mode, op0,
4141                                                 gen_int_mode (abs_d,
4142                                                               compute_mode),
4143                                                 NULL_RTX, 0);
4144                     else
4145                       quotient = expand_sdiv_pow2 (compute_mode, op0, abs_d);
4146
4147                     /* We have computed OP0 / abs(OP1).  If OP1 is negative,
4148                        negate the quotient.  */
4149                     if (d < 0)
4150                       {
4151                         insn = get_last_insn ();
4152                         if (insn != last
4153                             && (set = single_set (insn)) != 0
4154                             && SET_DEST (set) == quotient
4155                             && abs_d < ((unsigned HOST_WIDE_INT) 1
4156                                         << (HOST_BITS_PER_WIDE_INT - 1)))
4157                           set_unique_reg_note (insn,
4158                                                REG_EQUAL,
4159                                                gen_rtx_DIV (compute_mode,
4160                                                             op0,
4161                                                             GEN_INT
4162                                                             (trunc_int_for_mode
4163                                                              (abs_d,
4164                                                               compute_mode))));
4165
4166                         quotient = expand_unop (compute_mode, neg_optab,
4167                                                 quotient, quotient, 0);
4168                       }
4169                   }
4170                 else if (size <= HOST_BITS_PER_WIDE_INT)
4171                   {
4172                     choose_multiplier (abs_d, size, size - 1,
4173                                        &mlr, &post_shift, &lgup);
4174                     ml = (unsigned HOST_WIDE_INT) INTVAL (mlr);
4175                     if (ml < (unsigned HOST_WIDE_INT) 1 << (size - 1))
4176                       {
4177                         rtx t1, t2, t3;
4178
4179                         if (post_shift >= BITS_PER_WORD
4180                             || size - 1 >= BITS_PER_WORD)
4181                           goto fail1;
4182
4183                         extra_cost = (shift_cost[compute_mode][post_shift]
4184                                       + shift_cost[compute_mode][size - 1]
4185                                       + add_cost[compute_mode]);
4186                         t1 = expand_mult_highpart (compute_mode, op0, mlr,
4187                                                    NULL_RTX, 0,
4188                                                    max_cost - extra_cost);
4189                         if (t1 == 0)
4190                           goto fail1;
4191                         t2 = expand_shift
4192                           (RSHIFT_EXPR, compute_mode, t1,
4193                            build_int_cst (NULL_TREE, post_shift),
4194                            NULL_RTX, 0);
4195                         t3 = expand_shift
4196                           (RSHIFT_EXPR, compute_mode, op0,
4197                            build_int_cst (NULL_TREE, size - 1),
4198                            NULL_RTX, 0);
4199                         if (d < 0)
4200                           quotient
4201                             = force_operand (gen_rtx_MINUS (compute_mode,
4202                                                             t3, t2),
4203                                              tquotient);
4204                         else
4205                           quotient
4206                             = force_operand (gen_rtx_MINUS (compute_mode,
4207                                                             t2, t3),
4208                                              tquotient);
4209                       }
4210                     else
4211                       {
4212                         rtx t1, t2, t3, t4;
4213
4214                         if (post_shift >= BITS_PER_WORD
4215                             || size - 1 >= BITS_PER_WORD)
4216                           goto fail1;
4217
4218                         ml |= (~(unsigned HOST_WIDE_INT) 0) << (size - 1);
4219                         mlr = gen_int_mode (ml, compute_mode);
4220                         extra_cost = (shift_cost[compute_mode][post_shift]
4221                                       + shift_cost[compute_mode][size - 1]
4222                                       + 2 * add_cost[compute_mode]);
4223                         t1 = expand_mult_highpart (compute_mode, op0, mlr,
4224                                                    NULL_RTX, 0,
4225                                                    max_cost - extra_cost);
4226                         if (t1 == 0)
4227                           goto fail1;
4228                         t2 = force_operand (gen_rtx_PLUS (compute_mode,
4229                                                           t1, op0),
4230                                             NULL_RTX);
4231                         t3 = expand_shift
4232                           (RSHIFT_EXPR, compute_mode, t2,
4233                            build_int_cst (NULL_TREE, post_shift),
4234                            NULL_RTX, 0);
4235                         t4 = expand_shift
4236                           (RSHIFT_EXPR, compute_mode, op0,
4237                            build_int_cst (NULL_TREE, size - 1),
4238                            NULL_RTX, 0);
4239                         if (d < 0)
4240                           quotient
4241                             = force_operand (gen_rtx_MINUS (compute_mode,
4242                                                             t4, t3),
4243                                              tquotient);
4244                         else
4245                           quotient
4246                             = force_operand (gen_rtx_MINUS (compute_mode,
4247                                                             t3, t4),
4248                                              tquotient);
4249                       }
4250                   }
4251                 else            /* Too wide mode to use tricky code */
4252                   break;
4253
4254                 insn = get_last_insn ();
4255                 if (insn != last
4256                     && (set = single_set (insn)) != 0
4257                     && SET_DEST (set) == quotient)
4258                   set_unique_reg_note (insn,
4259                                        REG_EQUAL,
4260                                        gen_rtx_DIV (compute_mode, op0, op1));
4261               }
4262             break;
4263           }
4264       fail1:
4265         delete_insns_since (last);
4266         break;
4267
4268       case FLOOR_DIV_EXPR:
4269       case FLOOR_MOD_EXPR:
4270       /* We will come here only for signed operations.  */
4271         if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4272           {
4273             unsigned HOST_WIDE_INT mh;
4274             int pre_shift, lgup, post_shift;
4275             HOST_WIDE_INT d = INTVAL (op1);
4276             rtx ml;
4277
4278             if (d > 0)
4279               {
4280                 /* We could just as easily deal with negative constants here,
4281                    but it does not seem worth the trouble for GCC 2.6.  */
4282                 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4283                   {
4284                     pre_shift = floor_log2 (d);
4285                     if (rem_flag)
4286                       {
4287                         remainder = expand_binop (compute_mode, and_optab, op0,
4288                                                   GEN_INT (((HOST_WIDE_INT) 1 << pre_shift) - 1),
4289                                                   remainder, 0, OPTAB_LIB_WIDEN);
4290                         if (remainder)
4291                           return gen_lowpart (mode, remainder);
4292                       }
4293                     quotient = expand_shift
4294                       (RSHIFT_EXPR, compute_mode, op0,
4295                        build_int_cst (NULL_TREE, pre_shift),
4296                        tquotient, 0);
4297                   }
4298                 else
4299                   {
4300                     rtx t1, t2, t3, t4;
4301
4302                     mh = choose_multiplier (d, size, size - 1,
4303                                             &ml, &post_shift, &lgup);
4304                     gcc_assert (!mh);
4305
4306                     if (post_shift < BITS_PER_WORD
4307                         && size - 1 < BITS_PER_WORD)
4308                       {
4309                         t1 = expand_shift
4310                           (RSHIFT_EXPR, compute_mode, op0,
4311                            build_int_cst (NULL_TREE, size - 1),
4312                            NULL_RTX, 0);
4313                         t2 = expand_binop (compute_mode, xor_optab, op0, t1,
4314                                            NULL_RTX, 0, OPTAB_WIDEN);
4315                         extra_cost = (shift_cost[compute_mode][post_shift]
4316                                       + shift_cost[compute_mode][size - 1]
4317                                       + 2 * add_cost[compute_mode]);
4318                         t3 = expand_mult_highpart (compute_mode, t2, ml,
4319                                                    NULL_RTX, 1,
4320                                                    max_cost - extra_cost);
4321                         if (t3 != 0)
4322                           {
4323                             t4 = expand_shift
4324                               (RSHIFT_EXPR, compute_mode, t3,
4325                                build_int_cst (NULL_TREE, post_shift),
4326                                NULL_RTX, 1);
4327                             quotient = expand_binop (compute_mode, xor_optab,
4328                                                      t4, t1, tquotient, 0,
4329                                                      OPTAB_WIDEN);
4330                           }
4331                       }
4332                   }
4333               }
4334             else
4335               {
4336                 rtx nsign, t1, t2, t3, t4;
4337                 t1 = force_operand (gen_rtx_PLUS (compute_mode,
4338                                                   op0, constm1_rtx), NULL_RTX);
4339                 t2 = expand_binop (compute_mode, ior_optab, op0, t1, NULL_RTX,
4340                                    0, OPTAB_WIDEN);
4341                 nsign = expand_shift
4342                   (RSHIFT_EXPR, compute_mode, t2,
4343                    build_int_cst (NULL_TREE, size - 1),
4344                    NULL_RTX, 0);
4345                 t3 = force_operand (gen_rtx_MINUS (compute_mode, t1, nsign),
4346                                     NULL_RTX);
4347                 t4 = expand_divmod (0, TRUNC_DIV_EXPR, compute_mode, t3, op1,
4348                                     NULL_RTX, 0);
4349                 if (t4)
4350                   {
4351                     rtx t5;
4352                     t5 = expand_unop (compute_mode, one_cmpl_optab, nsign,
4353                                       NULL_RTX, 0);
4354                     quotient = force_operand (gen_rtx_PLUS (compute_mode,
4355                                                             t4, t5),
4356                                               tquotient);
4357                   }
4358               }
4359           }
4360
4361         if (quotient != 0)
4362           break;
4363         delete_insns_since (last);
4364
4365         /* Try using an instruction that produces both the quotient and
4366            remainder, using truncation.  We can easily compensate the quotient
4367            or remainder to get floor rounding, once we have the remainder.
4368            Notice that we compute also the final remainder value here,
4369            and return the result right away.  */
4370         if (target == 0 || GET_MODE (target) != compute_mode)
4371           target = gen_reg_rtx (compute_mode);
4372
4373         if (rem_flag)
4374           {
4375             remainder
4376               = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4377             quotient = gen_reg_rtx (compute_mode);
4378           }
4379         else
4380           {
4381             quotient
4382               = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4383             remainder = gen_reg_rtx (compute_mode);
4384           }
4385
4386         if (expand_twoval_binop (sdivmod_optab, op0, op1,
4387                                  quotient, remainder, 0))
4388           {
4389             /* This could be computed with a branch-less sequence.
4390                Save that for later.  */
4391             rtx tem;
4392             rtx label = gen_label_rtx ();
4393             do_cmp_and_jump (remainder, const0_rtx, EQ, compute_mode, label);
4394             tem = expand_binop (compute_mode, xor_optab, op0, op1,
4395                                 NULL_RTX, 0, OPTAB_WIDEN);
4396             do_cmp_and_jump (tem, const0_rtx, GE, compute_mode, label);
4397             expand_dec (quotient, const1_rtx);
4398             expand_inc (remainder, op1);
4399             emit_label (label);
4400             return gen_lowpart (mode, rem_flag ? remainder : quotient);
4401           }
4402
4403         /* No luck with division elimination or divmod.  Have to do it
4404            by conditionally adjusting op0 *and* the result.  */
4405         {
4406           rtx label1, label2, label3, label4, label5;
4407           rtx adjusted_op0;
4408           rtx tem;
4409
4410           quotient = gen_reg_rtx (compute_mode);
4411           adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4412           label1 = gen_label_rtx ();
4413           label2 = gen_label_rtx ();
4414           label3 = gen_label_rtx ();
4415           label4 = gen_label_rtx ();
4416           label5 = gen_label_rtx ();
4417           do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4418           do_cmp_and_jump (adjusted_op0, const0_rtx, LT, compute_mode, label1);
4419           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4420                               quotient, 0, OPTAB_LIB_WIDEN);
4421           if (tem != quotient)
4422             emit_move_insn (quotient, tem);
4423           emit_jump_insn (gen_jump (label5));
4424           emit_barrier ();
4425           emit_label (label1);
4426           expand_inc (adjusted_op0, const1_rtx);
4427           emit_jump_insn (gen_jump (label4));
4428           emit_barrier ();
4429           emit_label (label2);
4430           do_cmp_and_jump (adjusted_op0, const0_rtx, GT, compute_mode, label3);
4431           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4432                               quotient, 0, OPTAB_LIB_WIDEN);
4433           if (tem != quotient)
4434             emit_move_insn (quotient, tem);
4435           emit_jump_insn (gen_jump (label5));
4436           emit_barrier ();
4437           emit_label (label3);
4438           expand_dec (adjusted_op0, const1_rtx);
4439           emit_label (label4);
4440           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4441                               quotient, 0, OPTAB_LIB_WIDEN);
4442           if (tem != quotient)
4443             emit_move_insn (quotient, tem);
4444           expand_dec (quotient, const1_rtx);
4445           emit_label (label5);
4446         }
4447         break;
4448
4449       case CEIL_DIV_EXPR:
4450       case CEIL_MOD_EXPR:
4451         if (unsignedp)
4452           {
4453             if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1)))
4454               {
4455                 rtx t1, t2, t3;
4456                 unsigned HOST_WIDE_INT d = INTVAL (op1);
4457                 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4458                                    build_int_cst (NULL_TREE, floor_log2 (d)),
4459                                    tquotient, 1);
4460                 t2 = expand_binop (compute_mode, and_optab, op0,
4461                                    GEN_INT (d - 1),
4462                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4463                 t3 = gen_reg_rtx (compute_mode);
4464                 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4465                                       compute_mode, 1, 1);
4466                 if (t3 == 0)
4467                   {
4468                     rtx lab;
4469                     lab = gen_label_rtx ();
4470                     do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4471                     expand_inc (t1, const1_rtx);
4472                     emit_label (lab);
4473                     quotient = t1;
4474                   }
4475                 else
4476                   quotient = force_operand (gen_rtx_PLUS (compute_mode,
4477                                                           t1, t3),
4478                                             tquotient);
4479                 break;
4480               }
4481
4482             /* Try using an instruction that produces both the quotient and
4483                remainder, using truncation.  We can easily compensate the
4484                quotient or remainder to get ceiling rounding, once we have the
4485                remainder.  Notice that we compute also the final remainder
4486                value here, and return the result right away.  */
4487             if (target == 0 || GET_MODE (target) != compute_mode)
4488               target = gen_reg_rtx (compute_mode);
4489
4490             if (rem_flag)
4491               {
4492                 remainder = (REG_P (target)
4493                              ? target : gen_reg_rtx (compute_mode));
4494                 quotient = gen_reg_rtx (compute_mode);
4495               }
4496             else
4497               {
4498                 quotient = (REG_P (target)
4499                             ? target : gen_reg_rtx (compute_mode));
4500                 remainder = gen_reg_rtx (compute_mode);
4501               }
4502
4503             if (expand_twoval_binop (udivmod_optab, op0, op1, quotient,
4504                                      remainder, 1))
4505               {
4506                 /* This could be computed with a branch-less sequence.
4507                    Save that for later.  */
4508                 rtx label = gen_label_rtx ();
4509                 do_cmp_and_jump (remainder, const0_rtx, EQ,
4510                                  compute_mode, label);
4511                 expand_inc (quotient, const1_rtx);
4512                 expand_dec (remainder, op1);
4513                 emit_label (label);
4514                 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4515               }
4516
4517             /* No luck with division elimination or divmod.  Have to do it
4518                by conditionally adjusting op0 *and* the result.  */
4519             {
4520               rtx label1, label2;
4521               rtx adjusted_op0, tem;
4522
4523               quotient = gen_reg_rtx (compute_mode);
4524               adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4525               label1 = gen_label_rtx ();
4526               label2 = gen_label_rtx ();
4527               do_cmp_and_jump (adjusted_op0, const0_rtx, NE,
4528                                compute_mode, label1);
4529               emit_move_insn  (quotient, const0_rtx);
4530               emit_jump_insn (gen_jump (label2));
4531               emit_barrier ();
4532               emit_label (label1);
4533               expand_dec (adjusted_op0, const1_rtx);
4534               tem = expand_binop (compute_mode, udiv_optab, adjusted_op0, op1,
4535                                   quotient, 1, OPTAB_LIB_WIDEN);
4536               if (tem != quotient)
4537                 emit_move_insn (quotient, tem);
4538               expand_inc (quotient, const1_rtx);
4539               emit_label (label2);
4540             }
4541           }
4542         else /* signed */
4543           {
4544             if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4545                 && INTVAL (op1) >= 0)
4546               {
4547                 /* This is extremely similar to the code for the unsigned case
4548                    above.  For 2.7 we should merge these variants, but for
4549                    2.6.1 I don't want to touch the code for unsigned since that
4550                    get used in C.  The signed case will only be used by other
4551                    languages (Ada).  */
4552
4553                 rtx t1, t2, t3;
4554                 unsigned HOST_WIDE_INT d = INTVAL (op1);
4555                 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4556                                    build_int_cst (NULL_TREE, floor_log2 (d)),
4557                                    tquotient, 0);
4558                 t2 = expand_binop (compute_mode, and_optab, op0,
4559                                    GEN_INT (d - 1),
4560                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4561                 t3 = gen_reg_rtx (compute_mode);
4562                 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4563                                       compute_mode, 1, 1);
4564                 if (t3 == 0)
4565                   {
4566                     rtx lab;
4567                     lab = gen_label_rtx ();
4568                     do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4569                     expand_inc (t1, const1_rtx);
4570                     emit_label (lab);
4571                     quotient = t1;
4572                   }
4573                 else
4574                   quotient = force_operand (gen_rtx_PLUS (compute_mode,
4575                                                           t1, t3),
4576                                             tquotient);
4577                 break;
4578               }
4579
4580             /* Try using an instruction that produces both the quotient and
4581                remainder, using truncation.  We can easily compensate the
4582                quotient or remainder to get ceiling rounding, once we have the
4583                remainder.  Notice that we compute also the final remainder
4584                value here, and return the result right away.  */
4585             if (target == 0 || GET_MODE (target) != compute_mode)
4586               target = gen_reg_rtx (compute_mode);
4587             if (rem_flag)
4588               {
4589                 remainder= (REG_P (target)
4590                             ? target : gen_reg_rtx (compute_mode));
4591                 quotient = gen_reg_rtx (compute_mode);
4592               }
4593             else
4594               {
4595                 quotient = (REG_P (target)
4596                             ? target : gen_reg_rtx (compute_mode));
4597                 remainder = gen_reg_rtx (compute_mode);
4598               }
4599
4600             if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient,
4601                                      remainder, 0))
4602               {
4603                 /* This could be computed with a branch-less sequence.
4604                    Save that for later.  */
4605                 rtx tem;
4606                 rtx label = gen_label_rtx ();
4607                 do_cmp_and_jump (remainder, const0_rtx, EQ,
4608                                  compute_mode, label);
4609                 tem = expand_binop (compute_mode, xor_optab, op0, op1,
4610                                     NULL_RTX, 0, OPTAB_WIDEN);
4611                 do_cmp_and_jump (tem, const0_rtx, LT, compute_mode, label);
4612                 expand_inc (quotient, const1_rtx);
4613                 expand_dec (remainder, op1);
4614                 emit_label (label);
4615                 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4616               }
4617
4618             /* No luck with division elimination or divmod.  Have to do it
4619                by conditionally adjusting op0 *and* the result.  */
4620             {
4621               rtx label1, label2, label3, label4, label5;
4622               rtx adjusted_op0;
4623               rtx tem;
4624
4625               quotient = gen_reg_rtx (compute_mode);
4626               adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4627               label1 = gen_label_rtx ();
4628               label2 = gen_label_rtx ();
4629               label3 = gen_label_rtx ();
4630               label4 = gen_label_rtx ();
4631               label5 = gen_label_rtx ();
4632               do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4633               do_cmp_and_jump (adjusted_op0, const0_rtx, GT,
4634                                compute_mode, label1);
4635               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4636                                   quotient, 0, OPTAB_LIB_WIDEN);
4637               if (tem != quotient)
4638                 emit_move_insn (quotient, tem);
4639               emit_jump_insn (gen_jump (label5));
4640               emit_barrier ();
4641               emit_label (label1);
4642               expand_dec (adjusted_op0, const1_rtx);
4643               emit_jump_insn (gen_jump (label4));
4644               emit_barrier ();
4645               emit_label (label2);
4646               do_cmp_and_jump (adjusted_op0, const0_rtx, LT,
4647                                compute_mode, label3);
4648               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4649                                   quotient, 0, OPTAB_LIB_WIDEN);
4650               if (tem != quotient)
4651                 emit_move_insn (quotient, tem);
4652               emit_jump_insn (gen_jump (label5));
4653               emit_barrier ();
4654               emit_label (label3);
4655               expand_inc (adjusted_op0, const1_rtx);
4656               emit_label (label4);
4657               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4658                                   quotient, 0, OPTAB_LIB_WIDEN);
4659               if (tem != quotient)
4660                 emit_move_insn (quotient, tem);
4661               expand_inc (quotient, const1_rtx);
4662               emit_label (label5);
4663             }
4664           }
4665         break;
4666
4667       case EXACT_DIV_EXPR:
4668         if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4669           {
4670             HOST_WIDE_INT d = INTVAL (op1);
4671             unsigned HOST_WIDE_INT ml;
4672             int pre_shift;
4673             rtx t1;
4674
4675             pre_shift = floor_log2 (d & -d);
4676             ml = invert_mod2n (d >> pre_shift, size);
4677             t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4678                                build_int_cst (NULL_TREE, pre_shift),
4679                                NULL_RTX, unsignedp);
4680             quotient = expand_mult (compute_mode, t1,
4681                                     gen_int_mode (ml, compute_mode),
4682                                     NULL_RTX, 1);
4683
4684             insn = get_last_insn ();
4685             set_unique_reg_note (insn,
4686                                  REG_EQUAL,
4687                                  gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
4688                                                  compute_mode,
4689                                                  op0, op1));
4690           }
4691         break;
4692
4693       case ROUND_DIV_EXPR:
4694       case ROUND_MOD_EXPR:
4695         if (unsignedp)
4696           {
4697             rtx tem;
4698             rtx label;
4699             label = gen_label_rtx ();
4700             quotient = gen_reg_rtx (compute_mode);
4701             remainder = gen_reg_rtx (compute_mode);
4702             if (expand_twoval_binop (udivmod_optab, op0, op1, quotient, remainder, 1) == 0)
4703               {
4704                 rtx tem;
4705                 quotient = expand_binop (compute_mode, udiv_optab, op0, op1,
4706                                          quotient, 1, OPTAB_LIB_WIDEN);
4707                 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 1);
4708                 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4709                                           remainder, 1, OPTAB_LIB_WIDEN);
4710               }
4711             tem = plus_constant (op1, -1);
4712             tem = expand_shift (RSHIFT_EXPR, compute_mode, tem,
4713                                 build_int_cst (NULL_TREE, 1),
4714                                 NULL_RTX, 1);
4715             do_cmp_and_jump (remainder, tem, LEU, compute_mode, label);
4716             expand_inc (quotient, const1_rtx);
4717             expand_dec (remainder, op1);
4718             emit_label (label);
4719           }
4720         else
4721           {
4722             rtx abs_rem, abs_op1, tem, mask;
4723             rtx label;
4724             label = gen_label_rtx ();
4725             quotient = gen_reg_rtx (compute_mode);
4726             remainder = gen_reg_rtx (compute_mode);
4727             if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient, remainder, 0) == 0)
4728               {
4729                 rtx tem;
4730                 quotient = expand_binop (compute_mode, sdiv_optab, op0, op1,
4731                                          quotient, 0, OPTAB_LIB_WIDEN);
4732                 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 0);
4733                 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4734                                           remainder, 0, OPTAB_LIB_WIDEN);
4735               }
4736             abs_rem = expand_abs (compute_mode, remainder, NULL_RTX, 1, 0);
4737             abs_op1 = expand_abs (compute_mode, op1, NULL_RTX, 1, 0);
4738             tem = expand_shift (LSHIFT_EXPR, compute_mode, abs_rem,
4739                                 build_int_cst (NULL_TREE, 1),
4740                                 NULL_RTX, 1);
4741             do_cmp_and_jump (tem, abs_op1, LTU, compute_mode, label);
4742             tem = expand_binop (compute_mode, xor_optab, op0, op1,
4743                                 NULL_RTX, 0, OPTAB_WIDEN);
4744             mask = expand_shift (RSHIFT_EXPR, compute_mode, tem,
4745                                  build_int_cst (NULL_TREE, size - 1),
4746                                  NULL_RTX, 0);
4747             tem = expand_binop (compute_mode, xor_optab, mask, const1_rtx,
4748                                 NULL_RTX, 0, OPTAB_WIDEN);
4749             tem = expand_binop (compute_mode, sub_optab, tem, mask,
4750                                 NULL_RTX, 0, OPTAB_WIDEN);
4751             expand_inc (quotient, tem);
4752             tem = expand_binop (compute_mode, xor_optab, mask, op1,
4753                                 NULL_RTX, 0, OPTAB_WIDEN);
4754             tem = expand_binop (compute_mode, sub_optab, tem, mask,
4755                                 NULL_RTX, 0, OPTAB_WIDEN);
4756             expand_dec (remainder, tem);
4757             emit_label (label);
4758           }
4759         return gen_lowpart (mode, rem_flag ? remainder : quotient);
4760
4761       default:
4762         gcc_unreachable ();
4763       }
4764
4765   if (quotient == 0)
4766     {
4767       if (target && GET_MODE (target) != compute_mode)
4768         target = 0;
4769
4770       if (rem_flag)
4771         {
4772           /* Try to produce the remainder without producing the quotient.
4773              If we seem to have a divmod pattern that does not require widening,
4774              don't try widening here.  We should really have a WIDEN argument
4775              to expand_twoval_binop, since what we'd really like to do here is
4776              1) try a mod insn in compute_mode
4777              2) try a divmod insn in compute_mode
4778              3) try a div insn in compute_mode and multiply-subtract to get
4779                 remainder
4780              4) try the same things with widening allowed.  */
4781           remainder
4782             = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4783                                  op0, op1, target,
4784                                  unsignedp,
4785                                  ((optab2->handlers[compute_mode].insn_code
4786                                    != CODE_FOR_nothing)
4787                                   ? OPTAB_DIRECT : OPTAB_WIDEN));
4788           if (remainder == 0)
4789             {
4790               /* No luck there.  Can we do remainder and divide at once
4791                  without a library call?  */
4792               remainder = gen_reg_rtx (compute_mode);
4793               if (! expand_twoval_binop ((unsignedp
4794                                           ? udivmod_optab
4795                                           : sdivmod_optab),
4796                                          op0, op1,
4797                                          NULL_RTX, remainder, unsignedp))
4798                 remainder = 0;
4799             }
4800
4801           if (remainder)
4802             return gen_lowpart (mode, remainder);
4803         }
4804
4805       /* Produce the quotient.  Try a quotient insn, but not a library call.
4806          If we have a divmod in this mode, use it in preference to widening
4807          the div (for this test we assume it will not fail). Note that optab2
4808          is set to the one of the two optabs that the call below will use.  */
4809       quotient
4810         = sign_expand_binop (compute_mode, udiv_optab, sdiv_optab,
4811                              op0, op1, rem_flag ? NULL_RTX : target,
4812                              unsignedp,
4813                              ((optab2->handlers[compute_mode].insn_code
4814                                != CODE_FOR_nothing)
4815                               ? OPTAB_DIRECT : OPTAB_WIDEN));
4816
4817       if (quotient == 0)
4818         {
4819           /* No luck there.  Try a quotient-and-remainder insn,
4820              keeping the quotient alone.  */
4821           quotient = gen_reg_rtx (compute_mode);
4822           if (! expand_twoval_binop (unsignedp ? udivmod_optab : sdivmod_optab,
4823                                      op0, op1,
4824                                      quotient, NULL_RTX, unsignedp))
4825             {
4826               quotient = 0;
4827               if (! rem_flag)
4828                 /* Still no luck.  If we are not computing the remainder,
4829                    use a library call for the quotient.  */
4830                 quotient = sign_expand_binop (compute_mode,
4831                                               udiv_optab, sdiv_optab,
4832                                               op0, op1, target,
4833                                               unsignedp, OPTAB_LIB_WIDEN);
4834             }
4835         }
4836     }
4837
4838   if (rem_flag)
4839     {
4840       if (target && GET_MODE (target) != compute_mode)
4841         target = 0;
4842
4843       if (quotient == 0)
4844         {
4845           /* No divide instruction either.  Use library for remainder.  */
4846           remainder = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4847                                          op0, op1, target,
4848                                          unsignedp, OPTAB_LIB_WIDEN);
4849           /* No remainder function.  Try a quotient-and-remainder
4850              function, keeping the remainder.  */
4851           if (!remainder)
4852             {
4853               remainder = gen_reg_rtx (compute_mode);
4854               if (!expand_twoval_binop_libfunc
4855                   (unsignedp ? udivmod_optab : sdivmod_optab,
4856                    op0, op1,
4857                    NULL_RTX, remainder,
4858                    unsignedp ? UMOD : MOD))
4859                 remainder = NULL_RTX;
4860             }
4861         }
4862       else
4863         {
4864           /* We divided.  Now finish doing X - Y * (X / Y).  */
4865           remainder = expand_mult (compute_mode, quotient, op1,
4866                                    NULL_RTX, unsignedp);
4867           remainder = expand_binop (compute_mode, sub_optab, op0,
4868                                     remainder, target, unsignedp,
4869                                     OPTAB_LIB_WIDEN);
4870         }
4871     }
4872
4873   return gen_lowpart (mode, rem_flag ? remainder : quotient);
4874 }
4875 \f
4876 /* Return a tree node with data type TYPE, describing the value of X.
4877    Usually this is an VAR_DECL, if there is no obvious better choice.
4878    X may be an expression, however we only support those expressions
4879    generated by loop.c.  */
4880
4881 tree
4882 make_tree (tree type, rtx x)
4883 {
4884   tree t;
4885
4886   switch (GET_CODE (x))
4887     {
4888     case CONST_INT:
4889       {
4890         HOST_WIDE_INT hi = 0;
4891
4892         if (INTVAL (x) < 0
4893             && !(TYPE_UNSIGNED (type)
4894                  && (GET_MODE_BITSIZE (TYPE_MODE (type))
4895                      < HOST_BITS_PER_WIDE_INT)))
4896           hi = -1;
4897
4898         t = build_int_cst_wide (type, INTVAL (x), hi);
4899
4900         return t;
4901       }
4902
4903     case CONST_DOUBLE:
4904       if (GET_MODE (x) == VOIDmode)
4905         t = build_int_cst_wide (type,
4906                                 CONST_DOUBLE_LOW (x), CONST_DOUBLE_HIGH (x));
4907       else
4908         {
4909           REAL_VALUE_TYPE d;
4910
4911           REAL_VALUE_FROM_CONST_DOUBLE (d, x);
4912           t = build_real (type, d);
4913         }
4914
4915       return t;
4916
4917     case CONST_VECTOR:
4918       {
4919         int i, units;
4920         rtx elt;
4921         tree t = NULL_TREE;
4922
4923         units = CONST_VECTOR_NUNITS (x);
4924
4925         /* Build a tree with vector elements.  */
4926         for (i = units - 1; i >= 0; --i)
4927           {
4928             elt = CONST_VECTOR_ELT (x, i);
4929             t = tree_cons (NULL_TREE, make_tree (type, elt), t);
4930           }
4931
4932         return build_vector (type, t);
4933       }
4934
4935     case PLUS:
4936       return fold_build2 (PLUS_EXPR, type, make_tree (type, XEXP (x, 0)),
4937                           make_tree (type, XEXP (x, 1)));
4938
4939     case MINUS:
4940       return fold_build2 (MINUS_EXPR, type, make_tree (type, XEXP (x, 0)),
4941                           make_tree (type, XEXP (x, 1)));
4942
4943     case NEG:
4944       return fold_build1 (NEGATE_EXPR, type, make_tree (type, XEXP (x, 0)));
4945
4946     case MULT:
4947       return fold_build2 (MULT_EXPR, type, make_tree (type, XEXP (x, 0)),
4948                           make_tree (type, XEXP (x, 1)));
4949
4950     case ASHIFT:
4951       return fold_build2 (LSHIFT_EXPR, type, make_tree (type, XEXP (x, 0)),
4952                           make_tree (type, XEXP (x, 1)));
4953
4954     case LSHIFTRT:
4955       t = lang_hooks.types.unsigned_type (type);
4956       return fold_convert (type, build2 (RSHIFT_EXPR, t,
4957                                          make_tree (t, XEXP (x, 0)),
4958                                          make_tree (type, XEXP (x, 1))));
4959
4960     case ASHIFTRT:
4961       t = lang_hooks.types.signed_type (type);
4962       return fold_convert (type, build2 (RSHIFT_EXPR, t,
4963                                          make_tree (t, XEXP (x, 0)),
4964                                          make_tree (type, XEXP (x, 1))));
4965
4966     case DIV:
4967       if (TREE_CODE (type) != REAL_TYPE)
4968         t = lang_hooks.types.signed_type (type);
4969       else
4970         t = type;
4971
4972       return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
4973                                          make_tree (t, XEXP (x, 0)),
4974                                          make_tree (t, XEXP (x, 1))));
4975     case UDIV:
4976       t = lang_hooks.types.unsigned_type (type);
4977       return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
4978                                          make_tree (t, XEXP (x, 0)),
4979                                          make_tree (t, XEXP (x, 1))));
4980
4981     case SIGN_EXTEND:
4982     case ZERO_EXTEND:
4983       t = lang_hooks.types.type_for_mode (GET_MODE (XEXP (x, 0)),
4984                                           GET_CODE (x) == ZERO_EXTEND);
4985       return fold_convert (type, make_tree (t, XEXP (x, 0)));
4986
4987     default:
4988       t = build_decl (VAR_DECL, NULL_TREE, type);
4989
4990       /* If TYPE is a POINTER_TYPE, X might be Pmode with TYPE_MODE being
4991          ptr_mode.  So convert.  */
4992       if (POINTER_TYPE_P (type))
4993         x = convert_memory_address (TYPE_MODE (type), x);
4994
4995       /* Note that we do *not* use SET_DECL_RTL here, because we do not
4996          want set_decl_rtl to go adjusting REG_ATTRS for this temporary.  */
4997       t->decl_with_rtl.rtl = x;
4998
4999       return t;
5000     }
5001 }
5002
5003 /* Check whether the multiplication X * MULT + ADD overflows.
5004    X, MULT and ADD must be CONST_*.
5005    MODE is the machine mode for the computation.
5006    X and MULT must have mode MODE.  ADD may have a different mode.
5007    So can X (defaults to same as MODE).
5008    UNSIGNEDP is nonzero to do unsigned multiplication.  */
5009
5010 bool
5011 const_mult_add_overflow_p (rtx x, rtx mult, rtx add,
5012                            enum machine_mode mode, int unsignedp)
5013 {
5014   tree type, mult_type, add_type, result;
5015
5016   type = lang_hooks.types.type_for_mode (mode, unsignedp);
5017
5018   /* In order to get a proper overflow indication from an unsigned
5019      type, we have to pretend that it's a sizetype.  */
5020   mult_type = type;
5021   if (unsignedp)
5022     {
5023       /* FIXME:It would be nice if we could step directly from this
5024          type to its sizetype equivalent.  */
5025       mult_type = build_distinct_type_copy (type);
5026       TYPE_IS_SIZETYPE (mult_type) = 1;
5027     }
5028
5029   add_type = (GET_MODE (add) == VOIDmode ? mult_type
5030               : lang_hooks.types.type_for_mode (GET_MODE (add), unsignedp));
5031
5032   result = fold_build2 (PLUS_EXPR, mult_type,
5033                         fold_build2 (MULT_EXPR, mult_type,
5034                                      make_tree (mult_type, x),
5035                                      make_tree (mult_type, mult)),
5036                         make_tree (add_type, add));
5037
5038   return TREE_CONSTANT_OVERFLOW (result);
5039 }
5040
5041 /* Return an rtx representing the value of X * MULT + ADD.
5042    TARGET is a suggestion for where to store the result (an rtx).
5043    MODE is the machine mode for the computation.
5044    X and MULT must have mode MODE.  ADD may have a different mode.
5045    So can X (defaults to same as MODE).
5046    UNSIGNEDP is nonzero to do unsigned multiplication.
5047    This may emit insns.  */
5048
5049 rtx
5050 expand_mult_add (rtx x, rtx target, rtx mult, rtx add, enum machine_mode mode,
5051                  int unsignedp)
5052 {
5053   tree type = lang_hooks.types.type_for_mode (mode, unsignedp);
5054   tree add_type = (GET_MODE (add) == VOIDmode
5055                    ? type: lang_hooks.types.type_for_mode (GET_MODE (add),
5056                                                            unsignedp));
5057   tree result = fold_build2 (PLUS_EXPR, type,
5058                              fold_build2 (MULT_EXPR, type,
5059                                           make_tree (type, x),
5060                                           make_tree (type, mult)),
5061                              make_tree (add_type, add));
5062
5063   return expand_expr (result, target, VOIDmode, 0);
5064 }
5065 \f
5066 /* Compute the logical-and of OP0 and OP1, storing it in TARGET
5067    and returning TARGET.
5068
5069    If TARGET is 0, a pseudo-register or constant is returned.  */
5070
5071 rtx
5072 expand_and (enum machine_mode mode, rtx op0, rtx op1, rtx target)
5073 {
5074   rtx tem = 0;
5075
5076   if (GET_MODE (op0) == VOIDmode && GET_MODE (op1) == VOIDmode)
5077     tem = simplify_binary_operation (AND, mode, op0, op1);
5078   if (tem == 0)
5079     tem = expand_binop (mode, and_optab, op0, op1, target, 0, OPTAB_LIB_WIDEN);
5080
5081   if (target == 0)
5082     target = tem;
5083   else if (tem != target)
5084     emit_move_insn (target, tem);
5085   return target;
5086 }
5087 \f
5088 /* Emit a store-flags instruction for comparison CODE on OP0 and OP1
5089    and storing in TARGET.  Normally return TARGET.
5090    Return 0 if that cannot be done.
5091
5092    MODE is the mode to use for OP0 and OP1 should they be CONST_INTs.  If
5093    it is VOIDmode, they cannot both be CONST_INT.
5094
5095    UNSIGNEDP is for the case where we have to widen the operands
5096    to perform the operation.  It says to use zero-extension.
5097
5098    NORMALIZEP is 1 if we should convert the result to be either zero
5099    or one.  Normalize is -1 if we should convert the result to be
5100    either zero or -1.  If NORMALIZEP is zero, the result will be left
5101    "raw" out of the scc insn.  */
5102
5103 rtx
5104 emit_store_flag (rtx target, enum rtx_code code, rtx op0, rtx op1,
5105                  enum machine_mode mode, int unsignedp, int normalizep)
5106 {
5107   rtx subtarget;
5108   enum insn_code icode;
5109   enum machine_mode compare_mode;
5110   enum machine_mode target_mode = GET_MODE (target);
5111   rtx tem;
5112   rtx last = get_last_insn ();
5113   rtx pattern, comparison;
5114
5115   if (unsignedp)
5116     code = unsigned_condition (code);
5117
5118   /* If one operand is constant, make it the second one.  Only do this
5119      if the other operand is not constant as well.  */
5120
5121   if (swap_commutative_operands_p (op0, op1))
5122     {
5123       tem = op0;
5124       op0 = op1;
5125       op1 = tem;
5126       code = swap_condition (code);
5127     }
5128
5129   if (mode == VOIDmode)
5130     mode = GET_MODE (op0);
5131
5132   /* For some comparisons with 1 and -1, we can convert this to
5133      comparisons with zero.  This will often produce more opportunities for
5134      store-flag insns.  */
5135
5136   switch (code)
5137     {
5138     case LT:
5139       if (op1 == const1_rtx)
5140         op1 = const0_rtx, code = LE;
5141       break;
5142     case LE:
5143       if (op1 == constm1_rtx)
5144         op1 = const0_rtx, code = LT;
5145       break;
5146     case GE:
5147       if (op1 == const1_rtx)
5148         op1 = const0_rtx, code = GT;
5149       break;
5150     case GT:
5151       if (op1 == constm1_rtx)
5152         op1 = const0_rtx, code = GE;
5153       break;
5154     case GEU:
5155       if (op1 == const1_rtx)
5156         op1 = const0_rtx, code = NE;
5157       break;
5158     case LTU:
5159       if (op1 == const1_rtx)
5160         op1 = const0_rtx, code = EQ;
5161       break;
5162     default:
5163       break;
5164     }
5165
5166   /* If we are comparing a double-word integer with zero or -1, we can
5167      convert the comparison into one involving a single word.  */
5168   if (GET_MODE_BITSIZE (mode) == BITS_PER_WORD * 2
5169       && GET_MODE_CLASS (mode) == MODE_INT
5170       && (!MEM_P (op0) || ! MEM_VOLATILE_P (op0)))
5171     {
5172       if ((code == EQ || code == NE)
5173           && (op1 == const0_rtx || op1 == constm1_rtx))
5174         {
5175           rtx op00, op01, op0both;
5176
5177           /* Do a logical OR or AND of the two words and compare the result.  */
5178           op00 = simplify_gen_subreg (word_mode, op0, mode, 0);
5179           op01 = simplify_gen_subreg (word_mode, op0, mode, UNITS_PER_WORD);
5180           op0both = expand_binop (word_mode,
5181                                   op1 == const0_rtx ? ior_optab : and_optab,
5182                                   op00, op01, NULL_RTX, unsignedp, OPTAB_DIRECT);
5183
5184           if (op0both != 0)
5185             return emit_store_flag (target, code, op0both, op1, word_mode,
5186                                     unsignedp, normalizep);
5187         }
5188       else if ((code == LT || code == GE) && op1 == const0_rtx)
5189         {
5190           rtx op0h;
5191
5192           /* If testing the sign bit, can just test on high word.  */
5193           op0h = simplify_gen_subreg (word_mode, op0, mode,
5194                                       subreg_highpart_offset (word_mode, mode));
5195           return emit_store_flag (target, code, op0h, op1, word_mode,
5196                                   unsignedp, normalizep);
5197         }
5198     }
5199
5200   /* From now on, we won't change CODE, so set ICODE now.  */
5201   icode = setcc_gen_code[(int) code];
5202
5203   /* If this is A < 0 or A >= 0, we can do this by taking the ones
5204      complement of A (for GE) and shifting the sign bit to the low bit.  */
5205   if (op1 == const0_rtx && (code == LT || code == GE)
5206       && GET_MODE_CLASS (mode) == MODE_INT
5207       && (normalizep || STORE_FLAG_VALUE == 1
5208           || (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT
5209               && ((STORE_FLAG_VALUE & GET_MODE_MASK (mode))
5210                   == (unsigned HOST_WIDE_INT) 1 << (GET_MODE_BITSIZE (mode) - 1)))))
5211     {
5212       subtarget = target;
5213
5214       /* If the result is to be wider than OP0, it is best to convert it
5215          first.  If it is to be narrower, it is *incorrect* to convert it
5216          first.  */
5217       if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (mode))
5218         {
5219           op0 = convert_modes (target_mode, mode, op0, 0);
5220           mode = target_mode;
5221         }
5222
5223       if (target_mode != mode)
5224         subtarget = 0;
5225
5226       if (code == GE)
5227         op0 = expand_unop (mode, one_cmpl_optab, op0,
5228                            ((STORE_FLAG_VALUE == 1 || normalizep)
5229                             ? 0 : subtarget), 0);
5230
5231       if (STORE_FLAG_VALUE == 1 || normalizep)
5232         /* If we are supposed to produce a 0/1 value, we want to do
5233            a logical shift from the sign bit to the low-order bit; for
5234            a -1/0 value, we do an arithmetic shift.  */
5235         op0 = expand_shift (RSHIFT_EXPR, mode, op0,
5236                             size_int (GET_MODE_BITSIZE (mode) - 1),
5237                             subtarget, normalizep != -1);
5238
5239       if (mode != target_mode)
5240         op0 = convert_modes (target_mode, mode, op0, 0);
5241
5242       return op0;
5243     }
5244
5245   if (icode != CODE_FOR_nothing)
5246     {
5247       insn_operand_predicate_fn pred;
5248
5249       /* We think we may be able to do this with a scc insn.  Emit the
5250          comparison and then the scc insn.  */
5251
5252       do_pending_stack_adjust ();
5253       last = get_last_insn ();
5254
5255       comparison
5256         = compare_from_rtx (op0, op1, code, unsignedp, mode, NULL_RTX);
5257       if (CONSTANT_P (comparison))
5258         {
5259           switch (GET_CODE (comparison))
5260             {
5261             case CONST_INT:
5262               if (comparison == const0_rtx)
5263                 return const0_rtx;
5264               break;
5265
5266 #ifdef FLOAT_STORE_FLAG_VALUE
5267             case CONST_DOUBLE:
5268               if (comparison == CONST0_RTX (GET_MODE (comparison)))
5269                 return const0_rtx;
5270               break;
5271 #endif
5272             default:
5273               gcc_unreachable ();
5274             }
5275
5276           if (normalizep == 1)
5277             return const1_rtx;
5278           if (normalizep == -1)
5279             return constm1_rtx;
5280           return const_true_rtx;
5281         }
5282
5283       /* The code of COMPARISON may not match CODE if compare_from_rtx
5284          decided to swap its operands and reverse the original code.
5285
5286          We know that compare_from_rtx returns either a CONST_INT or
5287          a new comparison code, so it is safe to just extract the
5288          code from COMPARISON.  */
5289       code = GET_CODE (comparison);
5290
5291       /* Get a reference to the target in the proper mode for this insn.  */
5292       compare_mode = insn_data[(int) icode].operand[0].mode;
5293       subtarget = target;
5294       pred = insn_data[(int) icode].operand[0].predicate;
5295       if (optimize || ! (*pred) (subtarget, compare_mode))
5296         subtarget = gen_reg_rtx (compare_mode);
5297
5298       pattern = GEN_FCN (icode) (subtarget);
5299       if (pattern)
5300         {
5301           emit_insn (pattern);
5302
5303           /* If we are converting to a wider mode, first convert to
5304              TARGET_MODE, then normalize.  This produces better combining
5305              opportunities on machines that have a SIGN_EXTRACT when we are
5306              testing a single bit.  This mostly benefits the 68k.
5307
5308              If STORE_FLAG_VALUE does not have the sign bit set when
5309              interpreted in COMPARE_MODE, we can do this conversion as
5310              unsigned, which is usually more efficient.  */
5311           if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (compare_mode))
5312             {
5313               convert_move (target, subtarget,
5314                             (GET_MODE_BITSIZE (compare_mode)
5315                              <= HOST_BITS_PER_WIDE_INT)
5316                             && 0 == (STORE_FLAG_VALUE
5317                                      & ((HOST_WIDE_INT) 1
5318                                         << (GET_MODE_BITSIZE (compare_mode) -1))));
5319               op0 = target;
5320               compare_mode = target_mode;
5321             }
5322           else
5323             op0 = subtarget;
5324
5325           /* If we want to keep subexpressions around, don't reuse our
5326              last target.  */
5327
5328           if (optimize)
5329             subtarget = 0;
5330
5331           /* Now normalize to the proper value in COMPARE_MODE.  Sometimes
5332              we don't have to do anything.  */
5333           if (normalizep == 0 || normalizep == STORE_FLAG_VALUE)
5334             ;
5335           /* STORE_FLAG_VALUE might be the most negative number, so write
5336              the comparison this way to avoid a compiler-time warning.  */
5337           else if (- normalizep == STORE_FLAG_VALUE)
5338             op0 = expand_unop (compare_mode, neg_optab, op0, subtarget, 0);
5339
5340           /* We don't want to use STORE_FLAG_VALUE < 0 below since this
5341              makes it hard to use a value of just the sign bit due to
5342              ANSI integer constant typing rules.  */
5343           else if (GET_MODE_BITSIZE (compare_mode) <= HOST_BITS_PER_WIDE_INT
5344                    && (STORE_FLAG_VALUE
5345                        & ((HOST_WIDE_INT) 1
5346                           << (GET_MODE_BITSIZE (compare_mode) - 1))))
5347             op0 = expand_shift (RSHIFT_EXPR, compare_mode, op0,
5348                                 size_int (GET_MODE_BITSIZE (compare_mode) - 1),
5349                                 subtarget, normalizep == 1);
5350           else
5351             {
5352               gcc_assert (STORE_FLAG_VALUE & 1);
5353
5354               op0 = expand_and (compare_mode, op0, const1_rtx, subtarget);
5355               if (normalizep == -1)
5356                 op0 = expand_unop (compare_mode, neg_optab, op0, op0, 0);
5357             }
5358
5359           /* If we were converting to a smaller mode, do the
5360              conversion now.  */
5361           if (target_mode != compare_mode)
5362             {
5363               convert_move (target, op0, 0);
5364               return target;
5365             }
5366           else
5367             return op0;
5368         }
5369     }
5370
5371   delete_insns_since (last);
5372
5373   /* If optimizing, use different pseudo registers for each insn, instead
5374      of reusing the same pseudo.  This leads to better CSE, but slows
5375      down the compiler, since there are more pseudos */
5376   subtarget = (!optimize
5377                && (target_mode == mode)) ? target : NULL_RTX;
5378
5379   /* If we reached here, we can't do this with a scc insn.  However, there
5380      are some comparisons that can be done directly.  For example, if
5381      this is an equality comparison of integers, we can try to exclusive-or
5382      (or subtract) the two operands and use a recursive call to try the
5383      comparison with zero.  Don't do any of these cases if branches are
5384      very cheap.  */
5385
5386   if (BRANCH_COST > 0
5387       && GET_MODE_CLASS (mode) == MODE_INT && (code == EQ || code == NE)
5388       && op1 != const0_rtx)
5389     {
5390       tem = expand_binop (mode, xor_optab, op0, op1, subtarget, 1,
5391                           OPTAB_WIDEN);
5392
5393       if (tem == 0)
5394         tem = expand_binop (mode, sub_optab, op0, op1, subtarget, 1,
5395                             OPTAB_WIDEN);
5396       if (tem != 0)
5397         tem = emit_store_flag (target, code, tem, const0_rtx,
5398                                mode, unsignedp, normalizep);
5399       if (tem == 0)
5400         delete_insns_since (last);
5401       return tem;
5402     }
5403
5404   /* Some other cases we can do are EQ, NE, LE, and GT comparisons with
5405      the constant zero.  Reject all other comparisons at this point.  Only
5406      do LE and GT if branches are expensive since they are expensive on
5407      2-operand machines.  */
5408
5409   if (BRANCH_COST == 0
5410       || GET_MODE_CLASS (mode) != MODE_INT || op1 != const0_rtx
5411       || (code != EQ && code != NE
5412           && (BRANCH_COST <= 1 || (code != LE && code != GT))))
5413     return 0;
5414
5415   /* See what we need to return.  We can only return a 1, -1, or the
5416      sign bit.  */
5417
5418   if (normalizep == 0)
5419     {
5420       if (STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1)
5421         normalizep = STORE_FLAG_VALUE;
5422
5423       else if (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT
5424                && ((STORE_FLAG_VALUE & GET_MODE_MASK (mode))
5425                    == (unsigned HOST_WIDE_INT) 1 << (GET_MODE_BITSIZE (mode) - 1)))
5426         ;
5427       else
5428         return 0;
5429     }
5430
5431   /* Try to put the result of the comparison in the sign bit.  Assume we can't
5432      do the necessary operation below.  */
5433
5434   tem = 0;
5435
5436   /* To see if A <= 0, compute (A | (A - 1)).  A <= 0 iff that result has
5437      the sign bit set.  */
5438
5439   if (code == LE)
5440     {
5441       /* This is destructive, so SUBTARGET can't be OP0.  */
5442       if (rtx_equal_p (subtarget, op0))
5443         subtarget = 0;
5444
5445       tem = expand_binop (mode, sub_optab, op0, const1_rtx, subtarget, 0,
5446                           OPTAB_WIDEN);
5447       if (tem)
5448         tem = expand_binop (mode, ior_optab, op0, tem, subtarget, 0,
5449                             OPTAB_WIDEN);
5450     }
5451
5452   /* To see if A > 0, compute (((signed) A) << BITS) - A, where BITS is the
5453      number of bits in the mode of OP0, minus one.  */
5454
5455   if (code == GT)
5456     {
5457       if (rtx_equal_p (subtarget, op0))
5458         subtarget = 0;
5459
5460       tem = expand_shift (RSHIFT_EXPR, mode, op0,
5461                           size_int (GET_MODE_BITSIZE (mode) - 1),
5462                           subtarget, 0);
5463       tem = expand_binop (mode, sub_optab, tem, op0, subtarget, 0,
5464                           OPTAB_WIDEN);
5465     }
5466
5467   if (code == EQ || code == NE)
5468     {
5469       /* For EQ or NE, one way to do the comparison is to apply an operation
5470          that converts the operand into a positive number if it is nonzero
5471          or zero if it was originally zero.  Then, for EQ, we subtract 1 and
5472          for NE we negate.  This puts the result in the sign bit.  Then we
5473          normalize with a shift, if needed.
5474
5475          Two operations that can do the above actions are ABS and FFS, so try
5476          them.  If that doesn't work, and MODE is smaller than a full word,
5477          we can use zero-extension to the wider mode (an unsigned conversion)
5478          as the operation.  */
5479
5480       /* Note that ABS doesn't yield a positive number for INT_MIN, but
5481          that is compensated by the subsequent overflow when subtracting
5482          one / negating.  */
5483
5484       if (abs_optab->handlers[mode].insn_code != CODE_FOR_nothing)
5485         tem = expand_unop (mode, abs_optab, op0, subtarget, 1);
5486       else if (ffs_optab->handlers[mode].insn_code != CODE_FOR_nothing)
5487         tem = expand_unop (mode, ffs_optab, op0, subtarget, 1);
5488       else if (GET_MODE_SIZE (mode) < UNITS_PER_WORD)
5489         {
5490           tem = convert_modes (word_mode, mode, op0, 1);
5491           mode = word_mode;
5492         }
5493
5494       if (tem != 0)
5495         {
5496           if (code == EQ)
5497             tem = expand_binop (mode, sub_optab, tem, const1_rtx, subtarget,
5498                                 0, OPTAB_WIDEN);
5499           else
5500             tem = expand_unop (mode, neg_optab, tem, subtarget, 0);
5501         }
5502
5503       /* If we couldn't do it that way, for NE we can "or" the two's complement
5504          of the value with itself.  For EQ, we take the one's complement of
5505          that "or", which is an extra insn, so we only handle EQ if branches
5506          are expensive.  */
5507
5508       if (tem == 0 && (code == NE || BRANCH_COST > 1))
5509         {
5510           if (rtx_equal_p (subtarget, op0))
5511             subtarget = 0;
5512
5513           tem = expand_unop (mode, neg_optab, op0, subtarget, 0);
5514           tem = expand_binop (mode, ior_optab, tem, op0, subtarget, 0,
5515                               OPTAB_WIDEN);
5516
5517           if (tem && code == EQ)
5518             tem = expand_unop (mode, one_cmpl_optab, tem, subtarget, 0);
5519         }
5520     }
5521
5522   if (tem && normalizep)
5523     tem = expand_shift (RSHIFT_EXPR, mode, tem,
5524                         size_int (GET_MODE_BITSIZE (mode) - 1),
5525                         subtarget, normalizep == 1);
5526
5527   if (tem)
5528     {
5529       if (GET_MODE (tem) != target_mode)
5530         {
5531           convert_move (target, tem, 0);
5532           tem = target;
5533         }
5534       else if (!subtarget)
5535         {
5536           emit_move_insn (target, tem);
5537           tem = target;
5538         }
5539     }
5540   else
5541     delete_insns_since (last);
5542
5543   return tem;
5544 }
5545
5546 /* Like emit_store_flag, but always succeeds.  */
5547
5548 rtx
5549 emit_store_flag_force (rtx target, enum rtx_code code, rtx op0, rtx op1,
5550                        enum machine_mode mode, int unsignedp, int normalizep)
5551 {
5552   rtx tem, label;
5553
5554   /* First see if emit_store_flag can do the job.  */
5555   tem = emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep);
5556   if (tem != 0)
5557     return tem;
5558
5559   if (normalizep == 0)
5560     normalizep = 1;
5561
5562   /* If this failed, we have to do this with set/compare/jump/set code.  */
5563
5564   if (!REG_P (target)
5565       || reg_mentioned_p (target, op0) || reg_mentioned_p (target, op1))
5566     target = gen_reg_rtx (GET_MODE (target));
5567
5568   emit_move_insn (target, const1_rtx);
5569   label = gen_label_rtx ();
5570   do_compare_rtx_and_jump (op0, op1, code, unsignedp, mode, NULL_RTX,
5571                            NULL_RTX, label);
5572
5573   emit_move_insn (target, const0_rtx);
5574   emit_label (label);
5575
5576   return target;
5577 }
5578 \f
5579 /* Perform possibly multi-word comparison and conditional jump to LABEL
5580    if ARG1 OP ARG2 true where ARG1 and ARG2 are of mode MODE
5581
5582    The algorithm is based on the code in expr.c:do_jump.
5583
5584    Note that this does not perform a general comparison.  Only
5585    variants generated within expmed.c are correctly handled, others
5586    could be handled if needed.  */
5587
5588 static void
5589 do_cmp_and_jump (rtx arg1, rtx arg2, enum rtx_code op, enum machine_mode mode,
5590                  rtx label)
5591 {
5592   /* If this mode is an integer too wide to compare properly,
5593      compare word by word.  Rely on cse to optimize constant cases.  */
5594
5595   if (GET_MODE_CLASS (mode) == MODE_INT
5596       && ! can_compare_p (op, mode, ccp_jump))
5597     {
5598       rtx label2 = gen_label_rtx ();
5599
5600       switch (op)
5601         {
5602         case LTU:
5603           do_jump_by_parts_greater_rtx (mode, 1, arg2, arg1, label2, label);
5604           break;
5605
5606         case LEU:
5607           do_jump_by_parts_greater_rtx (mode, 1, arg1, arg2, label, label2);
5608           break;
5609
5610         case LT:
5611           do_jump_by_parts_greater_rtx (mode, 0, arg2, arg1, label2, label);
5612           break;
5613
5614         case GT:
5615           do_jump_by_parts_greater_rtx (mode, 0, arg1, arg2, label2, label);
5616           break;
5617
5618         case GE:
5619           do_jump_by_parts_greater_rtx (mode, 0, arg2, arg1, label, label2);
5620           break;
5621
5622           /* do_jump_by_parts_equality_rtx compares with zero.  Luckily
5623              that's the only equality operations we do */
5624         case EQ:
5625           gcc_assert (arg2 == const0_rtx && mode == GET_MODE(arg1));
5626           do_jump_by_parts_equality_rtx (arg1, label2, label);
5627           break;
5628
5629         case NE:
5630           gcc_assert (arg2 == const0_rtx && mode == GET_MODE(arg1));
5631           do_jump_by_parts_equality_rtx (arg1, label, label2);
5632           break;
5633
5634         default:
5635           gcc_unreachable ();
5636         }
5637
5638       emit_label (label2);
5639     }
5640   else
5641     emit_cmp_and_jump_insns (arg1, arg2, op, NULL_RTX, mode, 0, label);
5642 }