gcc/expmed.c

   1 /* Medium-level subroutines: convert bit-field store and extract
   2    and shifts, multiplies and divides to rtl instructions.
   3    Copyright (C) 1987, 1988, 1989, 1992, 1993, 1994, 1995, 1996, 1997, 1998,
   4    1999, 2000, 2001, 2002, 2003, 2004, 2005 Free Software Foundation, Inc.
   5
   6 This file is part of GCC.
   7
   8 GCC is free software; you can redistribute it and/or modify it under
   9 the terms of the GNU General Public License as published by the Free
  10 Software Foundation; either version 2, or (at your option) any later
  11 version.
  12
  13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  15 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  16 for more details.
  17
  18 You should have received a copy of the GNU General Public License
  19 along with GCC; see the file COPYING.  If not, write to the Free
  20 Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
  21 02110-1301, USA.  */
  22
  23
  24 #include "config.h"
  25 #include "system.h"
  26 #include "coretypes.h"
  27 #include "tm.h"
  28 #include "toplev.h"
  29 #include "rtl.h"
  30 #include "tree.h"
  31 #include "tm_p.h"
  32 #include "flags.h"
  33 #include "insn-config.h"
  34 #include "expr.h"
  35 #include "optabs.h"
  36 #include "real.h"
  37 #include "recog.h"
  38 #include "langhooks.h"
  39
  40 static void store_fixed_bit_field (rtx, unsigned HOST_WIDE_INT,
  41                                    unsigned HOST_WIDE_INT,
  42                                    unsigned HOST_WIDE_INT, rtx);
  43 static void store_split_bit_field (rtx, unsigned HOST_WIDE_INT,
  44                                    unsigned HOST_WIDE_INT, rtx);
  45 static rtx extract_fixed_bit_field (enum machine_mode, rtx,
  46                                     unsigned HOST_WIDE_INT,
  47                                     unsigned HOST_WIDE_INT,
  48                                     unsigned HOST_WIDE_INT, rtx, int);
  49 static rtx mask_rtx (enum machine_mode, int, int, int);
  50 static rtx lshift_value (enum machine_mode, rtx, int, int);
  51 static rtx extract_split_bit_field (rtx, unsigned HOST_WIDE_INT,
  52                                     unsigned HOST_WIDE_INT, int);
  53 static void do_cmp_and_jump (rtx, rtx, enum rtx_code, enum machine_mode, rtx);
  54 static rtx expand_smod_pow2 (enum machine_mode, rtx, HOST_WIDE_INT);
  55 static rtx expand_sdiv_pow2 (enum machine_mode, rtx, HOST_WIDE_INT);
  56
  57 /* Test whether a value is zero of a power of two.  */
  58 #define EXACT_POWER_OF_2_OR_ZERO_P(x) (((x) & ((x) - 1)) == 0)
  59
  60 /* Nonzero means divides or modulus operations are relatively cheap for
  61    powers of two, so don't use branches; emit the operation instead.
  62    Usually, this will mean that the MD file will emit non-branch
  63    sequences.  */
  64
  65 static bool sdiv_pow2_cheap[NUM_MACHINE_MODES];
  66 static bool smod_pow2_cheap[NUM_MACHINE_MODES];
  67
  68 #ifndef SLOW_UNALIGNED_ACCESS
  69 #define SLOW_UNALIGNED_ACCESS(MODE, ALIGN) STRICT_ALIGNMENT
  70 #endif
  71
  72 /* For compilers that support multiple targets with different word sizes,
  73    MAX_BITS_PER_WORD contains the biggest value of BITS_PER_WORD.  An example
  74    is the H8/300(H) compiler.  */
  75
  76 #ifndef MAX_BITS_PER_WORD
  77 #define MAX_BITS_PER_WORD BITS_PER_WORD
  78 #endif
  79
  80 /* Reduce conditional compilation elsewhere.  */
  81 #ifndef HAVE_insv
  82 #define HAVE_insv       0
  83 #define CODE_FOR_insv   CODE_FOR_nothing
  84 #define gen_insv(a,b,c,d) NULL_RTX
  85 #endif
  86 #ifndef HAVE_extv
  87 #define HAVE_extv       0
  88 #define CODE_FOR_extv   CODE_FOR_nothing
  89 #define gen_extv(a,b,c,d) NULL_RTX
  90 #endif
  91 #ifndef HAVE_extzv
  92 #define HAVE_extzv      0
  93 #define CODE_FOR_extzv  CODE_FOR_nothing
  94 #define gen_extzv(a,b,c,d) NULL_RTX
  95 #endif
  96
  97 /* Cost of various pieces of RTL.  Note that some of these are indexed by
  98    shift count and some by mode.  */
  99 static int zero_cost;
 100 static int add_cost[NUM_MACHINE_MODES];
 101 static int neg_cost[NUM_MACHINE_MODES];
 102 static int shift_cost[NUM_MACHINE_MODES][MAX_BITS_PER_WORD];
 103 static int shiftadd_cost[NUM_MACHINE_MODES][MAX_BITS_PER_WORD];
 104 static int shiftsub_cost[NUM_MACHINE_MODES][MAX_BITS_PER_WORD];
 105 static int mul_cost[NUM_MACHINE_MODES];
 106 static int div_cost[NUM_MACHINE_MODES];
 107 static int mul_widen_cost[NUM_MACHINE_MODES];
 108 static int mul_highpart_cost[NUM_MACHINE_MODES];
 109
 110 void
 111 init_expmed (void)
 112 {
 113   struct
 114   {
 115     struct rtx_def reg;         rtunion reg_fld[2];
 116     struct rtx_def plus;        rtunion plus_fld1;
 117     struct rtx_def neg;
 118     struct rtx_def udiv;        rtunion udiv_fld1;
 119     struct rtx_def mult;        rtunion mult_fld1;
 120     struct rtx_def div;         rtunion div_fld1;
 121     struct rtx_def mod;         rtunion mod_fld1;
 122     struct rtx_def zext;
 123     struct rtx_def wide_mult;   rtunion wide_mult_fld1;
 124     struct rtx_def wide_lshr;   rtunion wide_lshr_fld1;
 125     struct rtx_def wide_trunc;
 126     struct rtx_def shift;       rtunion shift_fld1;
 127     struct rtx_def shift_mult;  rtunion shift_mult_fld1;
 128     struct rtx_def shift_add;   rtunion shift_add_fld1;
 129     struct rtx_def shift_sub;   rtunion shift_sub_fld1;
 130   } all;
 131
 132   rtx pow2[MAX_BITS_PER_WORD];
 133   rtx cint[MAX_BITS_PER_WORD];
 134   int m, n;
 135   enum machine_mode mode, wider_mode;
 136
 137   zero_cost = rtx_cost (const0_rtx, 0);
 138
 139   for (m = 1; m < MAX_BITS_PER_WORD; m++)
 140     {
 141       pow2[m] = GEN_INT ((HOST_WIDE_INT) 1 << m);
 142       cint[m] = GEN_INT (m);
 143     }
 144
 145   memset (&all, 0, sizeof all);
 146
 147   PUT_CODE (&all.reg, REG);
 148   /* Avoid using hard regs in ways which may be unsupported.  */
 149   REGNO (&all.reg) = LAST_VIRTUAL_REGISTER + 1;
 150
 151   PUT_CODE (&all.plus, PLUS);
 152   XEXP (&all.plus, 0) = &all.reg;
 153   XEXP (&all.plus, 1) = &all.reg;
 154
 155   PUT_CODE (&all.neg, NEG);
 156   XEXP (&all.neg, 0) = &all.reg;
 157
 158   PUT_CODE (&all.udiv, UDIV);
 159   XEXP (&all.udiv, 0) = &all.reg;
 160   XEXP (&all.udiv, 1) = &all.reg;
 161
 162   PUT_CODE (&all.mult, MULT);
 163   XEXP (&all.mult, 0) = &all.reg;
 164   XEXP (&all.mult, 1) = &all.reg;
 165
 166   PUT_CODE (&all.div, DIV);
 167   XEXP (&all.div, 0) = &all.reg;
 168   XEXP (&all.div, 1) = 32 < MAX_BITS_PER_WORD ? cint[32] : GEN_INT (32);
 169
 170   PUT_CODE (&all.mod, MOD);
 171   XEXP (&all.mod, 0) = &all.reg;
 172   XEXP (&all.mod, 1) = XEXP (&all.div, 1);
 173
 174   PUT_CODE (&all.zext, ZERO_EXTEND);
 175   XEXP (&all.zext, 0) = &all.reg;
 176
 177   PUT_CODE (&all.wide_mult, MULT);
 178   XEXP (&all.wide_mult, 0) = &all.zext;
 179   XEXP (&all.wide_mult, 1) = &all.zext;
 180
 181   PUT_CODE (&all.wide_lshr, LSHIFTRT);
 182   XEXP (&all.wide_lshr, 0) = &all.wide_mult;
 183
 184   PUT_CODE (&all.wide_trunc, TRUNCATE);
 185   XEXP (&all.wide_trunc, 0) = &all.wide_lshr;
 186
 187   PUT_CODE (&all.shift, ASHIFT);
 188   XEXP (&all.shift, 0) = &all.reg;
 189
 190   PUT_CODE (&all.shift_mult, MULT);
 191   XEXP (&all.shift_mult, 0) = &all.reg;
 192
 193   PUT_CODE (&all.shift_add, PLUS);
 194   XEXP (&all.shift_add, 0) = &all.shift_mult;
 195   XEXP (&all.shift_add, 1) = &all.reg;
 196
 197   PUT_CODE (&all.shift_sub, MINUS);
 198   XEXP (&all.shift_sub, 0) = &all.shift_mult;
 199   XEXP (&all.shift_sub, 1) = &all.reg;
 200
 201   for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT);
 202        mode != VOIDmode;
 203        mode = GET_MODE_WIDER_MODE (mode))
 204     {
 205       PUT_MODE (&all.reg, mode);
 206       PUT_MODE (&all.plus, mode);
 207       PUT_MODE (&all.neg, mode);
 208       PUT_MODE (&all.udiv, mode);
 209       PUT_MODE (&all.mult, mode);
 210       PUT_MODE (&all.div, mode);
 211       PUT_MODE (&all.mod, mode);
 212       PUT_MODE (&all.wide_trunc, mode);
 213       PUT_MODE (&all.shift, mode);
 214       PUT_MODE (&all.shift_mult, mode);
 215       PUT_MODE (&all.shift_add, mode);
 216       PUT_MODE (&all.shift_sub, mode);
 217
 218       add_cost[mode] = rtx_cost (&all.plus, SET);
 219       neg_cost[mode] = rtx_cost (&all.neg, SET);
 220       div_cost[mode] = rtx_cost (&all.udiv, SET);
 221       mul_cost[mode] = rtx_cost (&all.mult, SET);
 222
 223       sdiv_pow2_cheap[mode] = (rtx_cost (&all.div, SET) <= 2 * add_cost[mode]);
 224       smod_pow2_cheap[mode] = (rtx_cost (&all.mod, SET) <= 4 * add_cost[mode]);
 225
 226       wider_mode = GET_MODE_WIDER_MODE (mode);
 227       if (wider_mode != VOIDmode)
 228         {
 229           PUT_MODE (&all.zext, wider_mode);
 230           PUT_MODE (&all.wide_mult, wider_mode);
 231           PUT_MODE (&all.wide_lshr, wider_mode);
 232           XEXP (&all.wide_lshr, 1) = GEN_INT (GET_MODE_BITSIZE (mode));
 233
 234           mul_widen_cost[wider_mode] = rtx_cost (&all.wide_mult, SET);
 235           mul_highpart_cost[mode] = rtx_cost (&all.wide_trunc, SET);
 236         }
 237
 238       shift_cost[mode][0] = 0;
 239       shiftadd_cost[mode][0] = shiftsub_cost[mode][0] = add_cost[mode];
 240
 241       n = MIN (MAX_BITS_PER_WORD, GET_MODE_BITSIZE (mode));
 242       for (m = 1; m < n; m++)
 243         {
 244           XEXP (&all.shift, 1) = cint[m];
 245           XEXP (&all.shift_mult, 1) = pow2[m];
 246
 247           shift_cost[mode][m] = rtx_cost (&all.shift, SET);
 248           shiftadd_cost[mode][m] = rtx_cost (&all.shift_add, SET);
 249           shiftsub_cost[mode][m] = rtx_cost (&all.shift_sub, SET);
 250         }
 251     }
 252 }
 253
 254 /* Return an rtx representing minus the value of X.
 255    MODE is the intended mode of the result,
 256    useful if X is a CONST_INT.  */
 257
 258 rtx
 259 negate_rtx (enum machine_mode mode, rtx x)
 260 {
 261   rtx result = simplify_unary_operation (NEG, mode, x, mode);
 262
 263   if (result == 0)
 264     result = expand_unop (mode, neg_optab, x, NULL_RTX, 0);
 265
 266   return result;
 267 }
 268
 269 /* Report on the availability of insv/extv/extzv and the desired mode
 270    of each of their operands.  Returns MAX_MACHINE_MODE if HAVE_foo
 271    is false; else the mode of the specified operand.  If OPNO is -1,
 272    all the caller cares about is whether the insn is available.  */
 273 enum machine_mode
 274 mode_for_extraction (enum extraction_pattern pattern, int opno)
 275 {
 276   const struct insn_data *data;
 277
 278   switch (pattern)
 279     {
 280     case EP_insv:
 281       if (HAVE_insv)
 282         {
 283           data = &insn_data[CODE_FOR_insv];
 284           break;
 285         }
 286       return MAX_MACHINE_MODE;
 287
 288     case EP_extv:
 289       if (HAVE_extv)
 290         {
 291           data = &insn_data[CODE_FOR_extv];
 292           break;
 293         }
 294       return MAX_MACHINE_MODE;
 295
 296     case EP_extzv:
 297       if (HAVE_extzv)
 298         {
 299           data = &insn_data[CODE_FOR_extzv];
 300           break;
 301         }
 302       return MAX_MACHINE_MODE;
 303
 304     default:
 305       gcc_unreachable ();
 306     }
 307
 308   if (opno == -1)
 309     return VOIDmode;
 310
 311   /* Everyone who uses this function used to follow it with
 312      if (result == VOIDmode) result = word_mode; */
 313   if (data->operand[opno].mode == VOIDmode)
 314     return word_mode;
 315   return data->operand[opno].mode;
 316 }
 317
 318 \f
 319 /* Generate code to store value from rtx VALUE
 320    into a bit-field within structure STR_RTX
 321    containing BITSIZE bits starting at bit BITNUM.
 322    FIELDMODE is the machine-mode of the FIELD_DECL node for this field.
 323    ALIGN is the alignment that STR_RTX is known to have.
 324    TOTAL_SIZE is the size of the structure in bytes, or -1 if varying.  */
 325
 326 /* ??? Note that there are two different ideas here for how
 327    to determine the size to count bits within, for a register.
 328    One is BITS_PER_WORD, and the other is the size of operand 3
 329    of the insv pattern.
 330
 331    If operand 3 of the insv pattern is VOIDmode, then we will use BITS_PER_WORD
 332    else, we use the mode of operand 3.  */
 333
 334 rtx
 335 store_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
 336                  unsigned HOST_WIDE_INT bitnum, enum machine_mode fieldmode,
 337                  rtx value)
 338 {
 339   unsigned int unit
 340     = (MEM_P (str_rtx)) ? BITS_PER_UNIT : BITS_PER_WORD;
 341   unsigned HOST_WIDE_INT offset, bitpos;
 342   rtx op0 = str_rtx;
 343   int byte_offset;
 344   rtx orig_value;
 345
 346   enum machine_mode op_mode = mode_for_extraction (EP_insv, 3);
 347
 348   while (GET_CODE (op0) == SUBREG)
 349     {
 350       /* The following line once was done only if WORDS_BIG_ENDIAN,
 351          but I think that is a mistake.  WORDS_BIG_ENDIAN is
 352          meaningful at a much higher level; when structures are copied
 353          between memory and regs, the higher-numbered regs
 354          always get higher addresses.  */
 355       bitnum += SUBREG_BYTE (op0) * BITS_PER_UNIT;
 356       op0 = SUBREG_REG (op0);
 357     }
 358
 359   /* No action is needed if the target is a register and if the field
 360      lies completely outside that register.  This can occur if the source
 361      code contains an out-of-bounds access to a small array.  */
 362   if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
 363     return value;
 364
 365   /* Use vec_set patterns for inserting parts of vectors whenever
 366      available.  */
 367   if (VECTOR_MODE_P (GET_MODE (op0))
 368       && !MEM_P (op0)
 369       && (vec_set_optab->handlers[GET_MODE (op0)].insn_code
 370           != CODE_FOR_nothing)
 371       && fieldmode == GET_MODE_INNER (GET_MODE (op0))
 372       && bitsize == GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
 373       && !(bitnum % GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
 374     {
 375       enum machine_mode outermode = GET_MODE (op0);
 376       enum machine_mode innermode = GET_MODE_INNER (outermode);
 377       int icode = (int) vec_set_optab->handlers[outermode].insn_code;
 378       int pos = bitnum / GET_MODE_BITSIZE (innermode);
 379       rtx rtxpos = GEN_INT (pos);
 380       rtx src = value;
 381       rtx dest = op0;
 382       rtx pat, seq;
 383       enum machine_mode mode0 = insn_data[icode].operand[0].mode;
 384       enum machine_mode mode1 = insn_data[icode].operand[1].mode;
 385       enum machine_mode mode2 = insn_data[icode].operand[2].mode;
 386
 387       start_sequence ();
 388
 389       if (! (*insn_data[icode].operand[1].predicate) (src, mode1))
 390         src = copy_to_mode_reg (mode1, src);
 391
 392       if (! (*insn_data[icode].operand[2].predicate) (rtxpos, mode2))
 393         rtxpos = copy_to_mode_reg (mode1, rtxpos);
 394
 395       /* We could handle this, but we should always be called with a pseudo
 396          for our targets and all insns should take them as outputs.  */
 397       gcc_assert ((*insn_data[icode].operand[0].predicate) (dest, mode0)
 398                   && (*insn_data[icode].operand[1].predicate) (src, mode1)
 399                   && (*insn_data[icode].operand[2].predicate) (rtxpos, mode2));
 400       pat = GEN_FCN (icode) (dest, src, rtxpos);
 401       seq = get_insns ();
 402       end_sequence ();
 403       if (pat)
 404         {
 405           emit_insn (seq);
 406           emit_insn (pat);
 407           return dest;
 408         }
 409     }
 410
 411   /* If the target is a register, overwriting the entire object, or storing
 412      a full-word or multi-word field can be done with just a SUBREG.
 413
 414      If the target is memory, storing any naturally aligned field can be
 415      done with a simple store.  For targets that support fast unaligned
 416      memory, any naturally sized, unit aligned field can be done directly.  */
 417
 418   offset = bitnum / unit;
 419   bitpos = bitnum % unit;
 420   byte_offset = (bitnum % BITS_PER_WORD) / BITS_PER_UNIT
 421                 + (offset * UNITS_PER_WORD);
 422
 423   if (bitpos == 0
 424       && bitsize == GET_MODE_BITSIZE (fieldmode)
 425       && (!MEM_P (op0)
 426           ? ((GET_MODE_SIZE (fieldmode) >= UNITS_PER_WORD
 427              || GET_MODE_SIZE (GET_MODE (op0)) == GET_MODE_SIZE (fieldmode))
 428              && byte_offset % GET_MODE_SIZE (fieldmode) == 0)
 429           : (! SLOW_UNALIGNED_ACCESS (fieldmode, MEM_ALIGN (op0))
 430              || (offset * BITS_PER_UNIT % bitsize == 0
 431                  && MEM_ALIGN (op0) % GET_MODE_BITSIZE (fieldmode) == 0))))
 432     {
 433       if (GET_MODE (op0) != fieldmode)
 434         {
 435           if (MEM_P (op0))
 436             op0 = adjust_address (op0, fieldmode, offset);
 437           else
 438             op0 = simplify_gen_subreg (fieldmode, op0, GET_MODE (op0),
 439                                        byte_offset);
 440         }
 441       emit_move_insn (op0, value);
 442       return value;
 443     }
 444
 445   /* Make sure we are playing with integral modes.  Pun with subregs
 446      if we aren't.  This must come after the entire register case above,
 447      since that case is valid for any mode.  The following cases are only
 448      valid for integral modes.  */
 449   {
 450     enum machine_mode imode = int_mode_for_mode (GET_MODE (op0));
 451     if (imode != GET_MODE (op0))
 452       {
 453         if (MEM_P (op0))
 454           op0 = adjust_address (op0, imode, 0);
 455         else
 456           {
 457             gcc_assert (imode != BLKmode);
 458             op0 = gen_lowpart (imode, op0);
 459           }
 460       }
 461   }
 462
 463   /* We may be accessing data outside the field, which means
 464      we can alias adjacent data.  */
 465   if (MEM_P (op0))
 466     {
 467       op0 = shallow_copy_rtx (op0);
 468       set_mem_alias_set (op0, 0);
 469       set_mem_expr (op0, 0);
 470     }
 471
 472   /* If OP0 is a register, BITPOS must count within a word.
 473      But as we have it, it counts within whatever size OP0 now has.
 474      On a bigendian machine, these are not the same, so convert.  */
 475   if (BYTES_BIG_ENDIAN
 476       && !MEM_P (op0)
 477       && unit > GET_MODE_BITSIZE (GET_MODE (op0)))
 478     bitpos += unit - GET_MODE_BITSIZE (GET_MODE (op0));
 479
 480   /* Storing an lsb-aligned field in a register
 481      can be done with a movestrict instruction.  */
 482
 483   if (!MEM_P (op0)
 484       && (BYTES_BIG_ENDIAN ? bitpos + bitsize == unit : bitpos == 0)
 485       && bitsize == GET_MODE_BITSIZE (fieldmode)
 486       && (movstrict_optab->handlers[fieldmode].insn_code
 487           != CODE_FOR_nothing))
 488     {
 489       int icode = movstrict_optab->handlers[fieldmode].insn_code;
 490
 491       /* Get appropriate low part of the value being stored.  */
 492       if (GET_CODE (value) == CONST_INT || REG_P (value))
 493         value = gen_lowpart (fieldmode, value);
 494       else if (!(GET_CODE (value) == SYMBOL_REF
 495                  || GET_CODE (value) == LABEL_REF
 496                  || GET_CODE (value) == CONST))
 497         value = convert_to_mode (fieldmode, value, 0);
 498
 499       if (! (*insn_data[icode].operand[1].predicate) (value, fieldmode))
 500         value = copy_to_mode_reg (fieldmode, value);
 501
 502       if (GET_CODE (op0) == SUBREG)
 503         {
 504           /* Else we've got some float mode source being extracted into
 505              a different float mode destination -- this combination of
 506              subregs results in Severe Tire Damage.  */
 507           gcc_assert (GET_MODE (SUBREG_REG (op0)) == fieldmode
 508                       || GET_MODE_CLASS (fieldmode) == MODE_INT
 509                       || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT);
 510           op0 = SUBREG_REG (op0);
 511         }
 512
 513       emit_insn (GEN_FCN (icode)
 514                  (gen_rtx_SUBREG (fieldmode, op0,
 515                                   (bitnum % BITS_PER_WORD) / BITS_PER_UNIT
 516                                   + (offset * UNITS_PER_WORD)),
 517                                   value));
 518
 519       return value;
 520     }
 521
 522   /* Handle fields bigger than a word.  */
 523
 524   if (bitsize > BITS_PER_WORD)
 525     {
 526       /* Here we transfer the words of the field
 527          in the order least significant first.
 528          This is because the most significant word is the one which may
 529          be less than full.
 530          However, only do that if the value is not BLKmode.  */
 531
 532       unsigned int backwards = WORDS_BIG_ENDIAN && fieldmode != BLKmode;
 533       unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
 534       unsigned int i;
 535
 536       /* This is the mode we must force value to, so that there will be enough
 537          subwords to extract.  Note that fieldmode will often (always?) be
 538          VOIDmode, because that is what store_field uses to indicate that this
 539          is a bit field, but passing VOIDmode to operand_subword_force
 540          is not allowed.  */
 541       fieldmode = GET_MODE (value);
 542       if (fieldmode == VOIDmode)
 543         fieldmode = smallest_mode_for_size (nwords * BITS_PER_WORD, MODE_INT);
 544
 545       for (i = 0; i < nwords; i++)
 546         {
 547           /* If I is 0, use the low-order word in both field and target;
 548              if I is 1, use the next to lowest word; and so on.  */
 549           unsigned int wordnum = (backwards ? nwords - i - 1 : i);
 550           unsigned int bit_offset = (backwards
 551                                      ? MAX ((int) bitsize - ((int) i + 1)
 552                                             * BITS_PER_WORD,
 553                                             0)
 554                                      : (int) i * BITS_PER_WORD);
 555
 556           store_bit_field (op0, MIN (BITS_PER_WORD,
 557                                      bitsize - i * BITS_PER_WORD),
 558                            bitnum + bit_offset, word_mode,
 559                            operand_subword_force (value, wordnum, fieldmode));
 560         }
 561       return value;
 562     }
 563
 564   /* From here on we can assume that the field to be stored in is
 565      a full-word (whatever type that is), since it is shorter than a word.  */
 566
 567   /* OFFSET is the number of words or bytes (UNIT says which)
 568      from STR_RTX to the first word or byte containing part of the field.  */
 569
 570   if (!MEM_P (op0))
 571     {
 572       if (offset != 0
 573           || GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
 574         {
 575           if (!REG_P (op0))
 576             {
 577               /* Since this is a destination (lvalue), we can't copy
 578                  it to a pseudo.  We can remove a SUBREG that does not
 579                  change the size of the operand.  Such a SUBREG may
 580                  have been added above.  */
 581               gcc_assert (GET_CODE (op0) == SUBREG
 582                           && (GET_MODE_SIZE (GET_MODE (op0))
 583                               == GET_MODE_SIZE (GET_MODE (SUBREG_REG (op0)))));
 584               op0 = SUBREG_REG (op0);
 585             }
 586           op0 = gen_rtx_SUBREG (mode_for_size (BITS_PER_WORD, MODE_INT, 0),
 587                                 op0, (offset * UNITS_PER_WORD));
 588         }
 589       offset = 0;
 590     }
 591
 592   /* If VALUE has a floating-point or complex mode, access it as an
 593      integer of the corresponding size.  This can occur on a machine
 594      with 64 bit registers that uses SFmode for float.  It can also
 595      occur for unaligned float or complex fields.  */
 596   orig_value = value;
 597   if (GET_MODE (value) != VOIDmode
 598       && GET_MODE_CLASS (GET_MODE (value)) != MODE_INT
 599       && GET_MODE_CLASS (GET_MODE (value)) != MODE_PARTIAL_INT)
 600     {
 601       value = gen_reg_rtx (int_mode_for_mode (GET_MODE (value)));
 602       emit_move_insn (gen_lowpart (GET_MODE (orig_value), value), orig_value);
 603     }
 604
 605   /* Now OFFSET is nonzero only if OP0 is memory
 606      and is therefore always measured in bytes.  */
 607
 608   if (HAVE_insv
 609       && GET_MODE (value) != BLKmode
 610       && !(bitsize == 1 && GET_CODE (value) == CONST_INT)
 611       && bitsize > 0
 612       && GET_MODE_BITSIZE (op_mode) >= bitsize
 613       && ! ((REG_P (op0) || GET_CODE (op0) == SUBREG)
 614             && (bitsize + bitpos > GET_MODE_BITSIZE (op_mode))))
 615     {
 616       int xbitpos = bitpos;
 617       rtx value1;
 618       rtx xop0 = op0;
 619       rtx last = get_last_insn ();
 620       rtx pat;
 621       enum machine_mode maxmode = mode_for_extraction (EP_insv, 3);
 622       int save_volatile_ok = volatile_ok;
 623
 624       volatile_ok = 1;
 625
 626       /* If this machine's insv can only insert into a register, copy OP0
 627          into a register and save it back later.  */
 628       if (MEM_P (op0)
 629           && ! ((*insn_data[(int) CODE_FOR_insv].operand[0].predicate)
 630                 (op0, VOIDmode)))
 631         {
 632           rtx tempreg;
 633           enum machine_mode bestmode;
 634
 635           /* Get the mode to use for inserting into this field.  If OP0 is
 636              BLKmode, get the smallest mode consistent with the alignment. If
 637              OP0 is a non-BLKmode object that is no wider than MAXMODE, use its
 638              mode. Otherwise, use the smallest mode containing the field.  */
 639
 640           if (GET_MODE (op0) == BLKmode
 641               || GET_MODE_SIZE (GET_MODE (op0)) > GET_MODE_SIZE (maxmode))
 642             bestmode
 643               = get_best_mode (bitsize, bitnum, MEM_ALIGN (op0), maxmode,
 644                                MEM_VOLATILE_P (op0));
 645           else
 646             bestmode = GET_MODE (op0);
 647
 648           if (bestmode == VOIDmode
 649               || (SLOW_UNALIGNED_ACCESS (bestmode, MEM_ALIGN (op0))
 650                   && GET_MODE_BITSIZE (bestmode) > MEM_ALIGN (op0)))
 651             goto insv_loses;
 652
 653           /* Adjust address to point to the containing unit of that mode.
 654              Compute offset as multiple of this unit, counting in bytes.  */
 655           unit = GET_MODE_BITSIZE (bestmode);
 656           offset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
 657           bitpos = bitnum % unit;
 658           op0 = adjust_address (op0, bestmode,  offset);
 659
 660           /* Fetch that unit, store the bitfield in it, then store
 661              the unit.  */
 662           tempreg = copy_to_reg (op0);
 663           store_bit_field (tempreg, bitsize, bitpos, fieldmode, orig_value);
 664           emit_move_insn (op0, tempreg);
 665           return value;
 666         }
 667       volatile_ok = save_volatile_ok;
 668
 669       /* Add OFFSET into OP0's address.  */
 670       if (MEM_P (xop0))
 671         xop0 = adjust_address (xop0, byte_mode, offset);
 672
 673       /* If xop0 is a register, we need it in MAXMODE
 674          to make it acceptable to the format of insv.  */
 675       if (GET_CODE (xop0) == SUBREG)
 676         /* We can't just change the mode, because this might clobber op0,
 677            and we will need the original value of op0 if insv fails.  */
 678         xop0 = gen_rtx_SUBREG (maxmode, SUBREG_REG (xop0), SUBREG_BYTE (xop0));
 679       if (REG_P (xop0) && GET_MODE (xop0) != maxmode)
 680         xop0 = gen_rtx_SUBREG (maxmode, xop0, 0);
 681
 682       /* On big-endian machines, we count bits from the most significant.
 683          If the bit field insn does not, we must invert.  */
 684
 685       if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
 686         xbitpos = unit - bitsize - xbitpos;
 687
 688       /* We have been counting XBITPOS within UNIT.
 689          Count instead within the size of the register.  */
 690       if (BITS_BIG_ENDIAN && !MEM_P (xop0))
 691         xbitpos += GET_MODE_BITSIZE (maxmode) - unit;
 692
 693       unit = GET_MODE_BITSIZE (maxmode);
 694
 695       /* Convert VALUE to maxmode (which insv insn wants) in VALUE1.  */
 696       value1 = value;
 697       if (GET_MODE (value) != maxmode)
 698         {
 699           if (GET_MODE_BITSIZE (GET_MODE (value)) >= bitsize)
 700             {
 701               /* Optimization: Don't bother really extending VALUE
 702                  if it has all the bits we will actually use.  However,
 703                  if we must narrow it, be sure we do it correctly.  */
 704
 705               if (GET_MODE_SIZE (GET_MODE (value)) < GET_MODE_SIZE (maxmode))
 706                 {
 707                   rtx tmp;
 708
 709                   tmp = simplify_subreg (maxmode, value1, GET_MODE (value), 0);
 710                   if (! tmp)
 711                     tmp = simplify_gen_subreg (maxmode,
 712                                                force_reg (GET_MODE (value),
 713                                                           value1),
 714                                                GET_MODE (value), 0);
 715                   value1 = tmp;
 716                 }
 717               else
 718                 value1 = gen_lowpart (maxmode, value1);
 719             }
 720           else if (GET_CODE (value) == CONST_INT)
 721             value1 = gen_int_mode (INTVAL (value), maxmode);
 722           else
 723             /* Parse phase is supposed to make VALUE's data type
 724                match that of the component reference, which is a type
 725                at least as wide as the field; so VALUE should have
 726                a mode that corresponds to that type.  */
 727             gcc_assert (CONSTANT_P (value));
 728         }
 729
 730       /* If this machine's insv insists on a register,
 731          get VALUE1 into a register.  */
 732       if (! ((*insn_data[(int) CODE_FOR_insv].operand[3].predicate)
 733              (value1, maxmode)))
 734         value1 = force_reg (maxmode, value1);
 735
 736       pat = gen_insv (xop0, GEN_INT (bitsize), GEN_INT (xbitpos), value1);
 737       if (pat)
 738         emit_insn (pat);
 739       else
 740         {
 741           delete_insns_since (last);
 742           store_fixed_bit_field (op0, offset, bitsize, bitpos, value);
 743         }
 744     }
 745   else
 746     insv_loses:
 747     /* Insv is not available; store using shifts and boolean ops.  */
 748     store_fixed_bit_field (op0, offset, bitsize, bitpos, value);
 749   return value;
 750 }
 751 \f
 752 /* Use shifts and boolean operations to store VALUE
 753    into a bit field of width BITSIZE
 754    in a memory location specified by OP0 except offset by OFFSET bytes.
 755      (OFFSET must be 0 if OP0 is a register.)
 756    The field starts at position BITPOS within the byte.
 757     (If OP0 is a register, it may be a full word or a narrower mode,
 758      but BITPOS still counts within a full word,
 759      which is significant on bigendian machines.)  */
 760
 761 static void
 762 store_fixed_bit_field (rtx op0, unsigned HOST_WIDE_INT offset,
 763                        unsigned HOST_WIDE_INT bitsize,
 764                        unsigned HOST_WIDE_INT bitpos, rtx value)
 765 {
 766   enum machine_mode mode;
 767   unsigned int total_bits = BITS_PER_WORD;
 768   rtx subtarget, temp;
 769   int all_zero = 0;
 770   int all_one = 0;
 771
 772   /* There is a case not handled here:
 773      a structure with a known alignment of just a halfword
 774      and a field split across two aligned halfwords within the structure.
 775      Or likewise a structure with a known alignment of just a byte
 776      and a field split across two bytes.
 777      Such cases are not supposed to be able to occur.  */
 778
 779   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
 780     {
 781       gcc_assert (!offset);
 782       /* Special treatment for a bit field split across two registers.  */
 783       if (bitsize + bitpos > BITS_PER_WORD)
 784         {
 785           store_split_bit_field (op0, bitsize, bitpos, value);
 786           return;
 787         }
 788     }
 789   else
 790     {
 791       /* Get the proper mode to use for this field.  We want a mode that
 792          includes the entire field.  If such a mode would be larger than
 793          a word, we won't be doing the extraction the normal way.
 794          We don't want a mode bigger than the destination.  */
 795
 796       mode = GET_MODE (op0);
 797       if (GET_MODE_BITSIZE (mode) == 0
 798           || GET_MODE_BITSIZE (mode) > GET_MODE_BITSIZE (word_mode))
 799         mode = word_mode;
 800       mode = get_best_mode (bitsize, bitpos + offset * BITS_PER_UNIT,
 801                             MEM_ALIGN (op0), mode, MEM_VOLATILE_P (op0));
 802
 803       if (mode == VOIDmode)
 804         {
 805           /* The only way this should occur is if the field spans word
 806              boundaries.  */
 807           store_split_bit_field (op0, bitsize, bitpos + offset * BITS_PER_UNIT,
 808                                  value);
 809           return;
 810         }
 811
 812       total_bits = GET_MODE_BITSIZE (mode);
 813
 814       /* Make sure bitpos is valid for the chosen mode.  Adjust BITPOS to
 815          be in the range 0 to total_bits-1, and put any excess bytes in
 816          OFFSET.  */
 817       if (bitpos >= total_bits)
 818         {
 819           offset += (bitpos / total_bits) * (total_bits / BITS_PER_UNIT);
 820           bitpos -= ((bitpos / total_bits) * (total_bits / BITS_PER_UNIT)
 821                      * BITS_PER_UNIT);
 822         }
 823
 824       /* Get ref to an aligned byte, halfword, or word containing the field.
 825          Adjust BITPOS to be position within a word,
 826          and OFFSET to be the offset of that word.
 827          Then alter OP0 to refer to that word.  */
 828       bitpos += (offset % (total_bits / BITS_PER_UNIT)) * BITS_PER_UNIT;
 829       offset -= (offset % (total_bits / BITS_PER_UNIT));
 830       op0 = adjust_address (op0, mode, offset);
 831     }
 832
 833   mode = GET_MODE (op0);
 834
 835   /* Now MODE is either some integral mode for a MEM as OP0,
 836      or is a full-word for a REG as OP0.  TOTAL_BITS corresponds.
 837      The bit field is contained entirely within OP0.
 838      BITPOS is the starting bit number within OP0.
 839      (OP0's mode may actually be narrower than MODE.)  */
 840
 841   if (BYTES_BIG_ENDIAN)
 842       /* BITPOS is the distance between our msb
 843          and that of the containing datum.
 844          Convert it to the distance from the lsb.  */
 845       bitpos = total_bits - bitsize - bitpos;
 846
 847   /* Now BITPOS is always the distance between our lsb
 848      and that of OP0.  */
 849
 850   /* Shift VALUE left by BITPOS bits.  If VALUE is not constant,
 851      we must first convert its mode to MODE.  */
 852
 853   if (GET_CODE (value) == CONST_INT)
 854     {
 855       HOST_WIDE_INT v = INTVAL (value);
 856
 857       if (bitsize < HOST_BITS_PER_WIDE_INT)
 858         v &= ((HOST_WIDE_INT) 1 << bitsize) - 1;
 859
 860       if (v == 0)
 861         all_zero = 1;
 862       else if ((bitsize < HOST_BITS_PER_WIDE_INT
 863                 && v == ((HOST_WIDE_INT) 1 << bitsize) - 1)
 864                || (bitsize == HOST_BITS_PER_WIDE_INT && v == -1))
 865         all_one = 1;
 866
 867       value = lshift_value (mode, value, bitpos, bitsize);
 868     }
 869   else
 870     {
 871       int must_and = (GET_MODE_BITSIZE (GET_MODE (value)) != bitsize
 872                       && bitpos + bitsize != GET_MODE_BITSIZE (mode));
 873
 874       if (GET_MODE (value) != mode)
 875         {
 876           if ((REG_P (value) || GET_CODE (value) == SUBREG)
 877               && GET_MODE_SIZE (mode) < GET_MODE_SIZE (GET_MODE (value)))
 878             value = gen_lowpart (mode, value);
 879           else
 880             value = convert_to_mode (mode, value, 1);
 881         }
 882
 883       if (must_and)
 884         value = expand_binop (mode, and_optab, value,
 885                               mask_rtx (mode, 0, bitsize, 0),
 886                               NULL_RTX, 1, OPTAB_LIB_WIDEN);
 887       if (bitpos > 0)
 888         value = expand_shift (LSHIFT_EXPR, mode, value,
 889                               build_int_cst (NULL_TREE, bitpos), NULL_RTX, 1);
 890     }
 891
 892   /* Now clear the chosen bits in OP0,
 893      except that if VALUE is -1 we need not bother.  */
 894
 895   subtarget = op0;
 896
 897   if (! all_one)
 898     {
 899       temp = expand_binop (mode, and_optab, op0,
 900                            mask_rtx (mode, bitpos, bitsize, 1),
 901                            subtarget, 1, OPTAB_LIB_WIDEN);
 902       subtarget = temp;
 903     }
 904   else
 905     temp = op0;
 906
 907   /* Now logical-or VALUE into OP0, unless it is zero.  */
 908
 909   if (! all_zero)
 910     temp = expand_binop (mode, ior_optab, temp, value,
 911                          subtarget, 1, OPTAB_LIB_WIDEN);
 912   if (op0 != temp)
 913     emit_move_insn (op0, temp);
 914 }
 915 \f
 916 /* Store a bit field that is split across multiple accessible memory objects.
 917
 918    OP0 is the REG, SUBREG or MEM rtx for the first of the objects.
 919    BITSIZE is the field width; BITPOS the position of its first bit
 920    (within the word).
 921    VALUE is the value to store.
 922
 923    This does not yet handle fields wider than BITS_PER_WORD.  */
 924
 925 static void
 926 store_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
 927                        unsigned HOST_WIDE_INT bitpos, rtx value)
 928 {
 929   unsigned int unit;
 930   unsigned int bitsdone = 0;
 931
 932   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
 933      much at a time.  */
 934   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
 935     unit = BITS_PER_WORD;
 936   else
 937     unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
 938
 939   /* If VALUE is a constant other than a CONST_INT, get it into a register in
 940      WORD_MODE.  If we can do this using gen_lowpart_common, do so.  Note
 941      that VALUE might be a floating-point constant.  */
 942   if (CONSTANT_P (value) && GET_CODE (value) != CONST_INT)
 943     {
 944       rtx word = gen_lowpart_common (word_mode, value);
 945
 946       if (word && (value != word))
 947         value = word;
 948       else
 949         value = gen_lowpart_common (word_mode,
 950                                     force_reg (GET_MODE (value) != VOIDmode
 951                                                ? GET_MODE (value)
 952                                                : word_mode, value));
 953     }
 954
 955   while (bitsdone < bitsize)
 956     {
 957       unsigned HOST_WIDE_INT thissize;
 958       rtx part, word;
 959       unsigned HOST_WIDE_INT thispos;
 960       unsigned HOST_WIDE_INT offset;
 961
 962       offset = (bitpos + bitsdone) / unit;
 963       thispos = (bitpos + bitsdone) % unit;
 964
 965       /* THISSIZE must not overrun a word boundary.  Otherwise,
 966          store_fixed_bit_field will call us again, and we will mutually
 967          recurse forever.  */
 968       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
 969       thissize = MIN (thissize, unit - thispos);
 970
 971       if (BYTES_BIG_ENDIAN)
 972         {
 973           int total_bits;
 974
 975           /* We must do an endian conversion exactly the same way as it is
 976              done in extract_bit_field, so that the two calls to
 977              extract_fixed_bit_field will have comparable arguments.  */
 978           if (!MEM_P (value) || GET_MODE (value) == BLKmode)
 979             total_bits = BITS_PER_WORD;
 980           else
 981             total_bits = GET_MODE_BITSIZE (GET_MODE (value));
 982
 983           /* Fetch successively less significant portions.  */
 984           if (GET_CODE (value) == CONST_INT)
 985             part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
 986                              >> (bitsize - bitsdone - thissize))
 987                             & (((HOST_WIDE_INT) 1 << thissize) - 1));
 988           else
 989             /* The args are chosen so that the last part includes the
 990                lsb.  Give extract_bit_field the value it needs (with
 991                endianness compensation) to fetch the piece we want.  */
 992             part = extract_fixed_bit_field (word_mode, value, 0, thissize,
 993                                             total_bits - bitsize + bitsdone,
 994                                             NULL_RTX, 1);
 995         }
 996       else
 997         {
 998           /* Fetch successively more significant portions.  */
 999           if (GET_CODE (value) == CONST_INT)
1000             part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1001                              >> bitsdone)
1002                             & (((HOST_WIDE_INT) 1 << thissize) - 1));
1003           else
1004             part = extract_fixed_bit_field (word_mode, value, 0, thissize,
1005                                             bitsdone, NULL_RTX, 1);
1006         }
1007
1008       /* If OP0 is a register, then handle OFFSET here.
1009
1010          When handling multiword bitfields, extract_bit_field may pass
1011          down a word_mode SUBREG of a larger REG for a bitfield that actually
1012          crosses a word boundary.  Thus, for a SUBREG, we must find
1013          the current word starting from the base register.  */
1014       if (GET_CODE (op0) == SUBREG)
1015         {
1016           int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset;
1017           word = operand_subword_force (SUBREG_REG (op0), word_offset,
1018                                         GET_MODE (SUBREG_REG (op0)));
1019           offset = 0;
1020         }
1021       else if (REG_P (op0))
1022         {
1023           word = operand_subword_force (op0, offset, GET_MODE (op0));
1024           offset = 0;
1025         }
1026       else
1027         word = op0;
1028
1029       /* OFFSET is in UNITs, and UNIT is in bits.
1030          store_fixed_bit_field wants offset in bytes.  */
1031       store_fixed_bit_field (word, offset * unit / BITS_PER_UNIT, thissize,
1032                              thispos, part);
1033       bitsdone += thissize;
1034     }
1035 }
1036 \f
1037 /* Generate code to extract a byte-field from STR_RTX
1038    containing BITSIZE bits, starting at BITNUM,
1039    and put it in TARGET if possible (if TARGET is nonzero).
1040    Regardless of TARGET, we return the rtx for where the value is placed.
1041
1042    STR_RTX is the structure containing the byte (a REG or MEM).
1043    UNSIGNEDP is nonzero if this is an unsigned bit field.
1044    MODE is the natural mode of the field value once extracted.
1045    TMODE is the mode the caller would like the value to have;
1046    but the value may be returned with type MODE instead.
1047
1048    TOTAL_SIZE is the size in bytes of the containing structure,
1049    or -1 if varying.
1050
1051    If a TARGET is specified and we can store in it at no extra cost,
1052    we do so, and return TARGET.
1053    Otherwise, we return a REG of mode TMODE or MODE, with TMODE preferred
1054    if they are equally easy.  */
1055
1056 rtx
1057 extract_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1058                    unsigned HOST_WIDE_INT bitnum, int unsignedp, rtx target,
1059                    enum machine_mode mode, enum machine_mode tmode)
1060 {
1061   unsigned int unit
1062     = (MEM_P (str_rtx)) ? BITS_PER_UNIT : BITS_PER_WORD;
1063   unsigned HOST_WIDE_INT offset, bitpos;
1064   rtx op0 = str_rtx;
1065   rtx spec_target = target;
1066   rtx spec_target_subreg = 0;
1067   enum machine_mode int_mode;
1068   enum machine_mode extv_mode = mode_for_extraction (EP_extv, 0);
1069   enum machine_mode extzv_mode = mode_for_extraction (EP_extzv, 0);
1070   enum machine_mode mode1;
1071   int byte_offset;
1072
1073   if (tmode == VOIDmode)
1074     tmode = mode;
1075
1076   while (GET_CODE (op0) == SUBREG)
1077     {
1078       bitnum += SUBREG_BYTE (op0) * BITS_PER_UNIT;
1079       op0 = SUBREG_REG (op0);
1080     }
1081
1082   /* If we have an out-of-bounds access to a register, just return an
1083      uninitialized register of the required mode.  This can occur if the
1084      source code contains an out-of-bounds access to a small array.  */
1085   if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
1086     return gen_reg_rtx (tmode);
1087
1088   if (REG_P (op0)
1089       && mode == GET_MODE (op0)
1090       && bitnum == 0
1091       && bitsize == GET_MODE_BITSIZE (GET_MODE (op0)))
1092     {
1093       /* We're trying to extract a full register from itself.  */
1094       return op0;
1095     }
1096
1097   /* Use vec_extract patterns for extracting parts of vectors whenever
1098      available.  */
1099   if (VECTOR_MODE_P (GET_MODE (op0))
1100       && !MEM_P (op0)
1101       && (vec_extract_optab->handlers[GET_MODE (op0)].insn_code
1102           != CODE_FOR_nothing)
1103       && ((bitnum + bitsize - 1) / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
1104           == bitnum / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
1105     {
1106       enum machine_mode outermode = GET_MODE (op0);
1107       enum machine_mode innermode = GET_MODE_INNER (outermode);
1108       int icode = (int) vec_extract_optab->handlers[outermode].insn_code;
1109       unsigned HOST_WIDE_INT pos = bitnum / GET_MODE_BITSIZE (innermode);
1110       rtx rtxpos = GEN_INT (pos);
1111       rtx src = op0;
1112       rtx dest = NULL, pat, seq;
1113       enum machine_mode mode0 = insn_data[icode].operand[0].mode;
1114       enum machine_mode mode1 = insn_data[icode].operand[1].mode;
1115       enum machine_mode mode2 = insn_data[icode].operand[2].mode;
1116
1117       if (innermode == tmode || innermode == mode)
1118         dest = target;
1119
1120       if (!dest)
1121         dest = gen_reg_rtx (innermode);
1122
1123       start_sequence ();
1124
1125       if (! (*insn_data[icode].operand[0].predicate) (dest, mode0))
1126         dest = copy_to_mode_reg (mode0, dest);
1127
1128       if (! (*insn_data[icode].operand[1].predicate) (src, mode1))
1129         src = copy_to_mode_reg (mode1, src);
1130
1131       if (! (*insn_data[icode].operand[2].predicate) (rtxpos, mode2))
1132         rtxpos = copy_to_mode_reg (mode1, rtxpos);
1133
1134       /* We could handle this, but we should always be called with a pseudo
1135          for our targets and all insns should take them as outputs.  */
1136       gcc_assert ((*insn_data[icode].operand[0].predicate) (dest, mode0)
1137                   && (*insn_data[icode].operand[1].predicate) (src, mode1)
1138                   && (*insn_data[icode].operand[2].predicate) (rtxpos, mode2));
1139
1140       pat = GEN_FCN (icode) (dest, src, rtxpos);
1141       seq = get_insns ();
1142       end_sequence ();
1143       if (pat)
1144         {
1145           emit_insn (seq);
1146           emit_insn (pat);
1147           return dest;
1148         }
1149     }
1150
1151   /* Make sure we are playing with integral modes.  Pun with subregs
1152      if we aren't.  */
1153   {
1154     enum machine_mode imode = int_mode_for_mode (GET_MODE (op0));
1155     if (imode != GET_MODE (op0))
1156       {
1157         if (MEM_P (op0))
1158           op0 = adjust_address (op0, imode, 0);
1159         else
1160           {
1161             gcc_assert (imode != BLKmode);
1162             op0 = gen_lowpart (imode, op0);
1163
1164             /* If we got a SUBREG, force it into a register since we
1165                aren't going to be able to do another SUBREG on it.  */
1166             if (GET_CODE (op0) == SUBREG)
1167               op0 = force_reg (imode, op0);
1168           }
1169       }
1170   }
1171
1172   /* We may be accessing data outside the field, which means
1173      we can alias adjacent data.  */
1174   if (MEM_P (op0))
1175     {
1176       op0 = shallow_copy_rtx (op0);
1177       set_mem_alias_set (op0, 0);
1178       set_mem_expr (op0, 0);
1179     }
1180
1181   /* Extraction of a full-word or multi-word value from a structure
1182      in a register or aligned memory can be done with just a SUBREG.
1183      A subword value in the least significant part of a register
1184      can also be extracted with a SUBREG.  For this, we need the
1185      byte offset of the value in op0.  */
1186
1187   bitpos = bitnum % unit;
1188   offset = bitnum / unit;
1189   byte_offset = bitpos / BITS_PER_UNIT + offset * UNITS_PER_WORD;
1190
1191   /* If OP0 is a register, BITPOS must count within a word.
1192      But as we have it, it counts within whatever size OP0 now has.
1193      On a bigendian machine, these are not the same, so convert.  */
1194   if (BYTES_BIG_ENDIAN
1195       && !MEM_P (op0)
1196       && unit > GET_MODE_BITSIZE (GET_MODE (op0)))
1197     bitpos += unit - GET_MODE_BITSIZE (GET_MODE (op0));
1198
1199   /* ??? We currently assume TARGET is at least as big as BITSIZE.
1200      If that's wrong, the solution is to test for it and set TARGET to 0
1201      if needed.  */
1202
1203   /* Only scalar integer modes can be converted via subregs.  There is an
1204      additional problem for FP modes here in that they can have a precision
1205      which is different from the size.  mode_for_size uses precision, but
1206      we want a mode based on the size, so we must avoid calling it for FP
1207      modes.  */
1208   mode1  = (SCALAR_INT_MODE_P (tmode)
1209             ? mode_for_size (bitsize, GET_MODE_CLASS (tmode), 0)
1210             : mode);
1211
1212   if (((bitsize >= BITS_PER_WORD && bitsize == GET_MODE_BITSIZE (mode)
1213         && bitpos % BITS_PER_WORD == 0)
1214        || (mode1 != BLKmode
1215            /* ??? The big endian test here is wrong.  This is correct
1216               if the value is in a register, and if mode_for_size is not
1217               the same mode as op0.  This causes us to get unnecessarily
1218               inefficient code from the Thumb port when -mbig-endian.  */
1219            && (BYTES_BIG_ENDIAN
1220                ? bitpos + bitsize == BITS_PER_WORD
1221                : bitpos == 0)))
1222       && ((!MEM_P (op0)
1223            && TRULY_NOOP_TRUNCATION (GET_MODE_BITSIZE (mode),
1224                                      GET_MODE_BITSIZE (GET_MODE (op0)))
1225            && GET_MODE_SIZE (mode1) != 0
1226            && byte_offset % GET_MODE_SIZE (mode1) == 0)
1227           || (MEM_P (op0)
1228               && (! SLOW_UNALIGNED_ACCESS (mode, MEM_ALIGN (op0))
1229                   || (offset * BITS_PER_UNIT % bitsize == 0
1230                       && MEM_ALIGN (op0) % bitsize == 0)))))
1231     {
1232       if (mode1 != GET_MODE (op0))
1233         {
1234           if (MEM_P (op0))
1235             op0 = adjust_address (op0, mode1, offset);
1236           else
1237             {
1238               rtx sub = simplify_gen_subreg (mode1, op0, GET_MODE (op0),
1239                                              byte_offset);
1240               if (sub == NULL)
1241                 goto no_subreg_mode_swap;
1242               op0 = sub;
1243             }
1244         }
1245       if (mode1 != mode)
1246         return convert_to_mode (tmode, op0, unsignedp);
1247       return op0;
1248     }
1249  no_subreg_mode_swap:
1250
1251   /* Handle fields bigger than a word.  */
1252
1253   if (bitsize > BITS_PER_WORD)
1254     {
1255       /* Here we transfer the words of the field
1256          in the order least significant first.
1257          This is because the most significant word is the one which may
1258          be less than full.  */
1259
1260       unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
1261       unsigned int i;
1262
1263       if (target == 0 || !REG_P (target))
1264         target = gen_reg_rtx (mode);
1265
1266       /* Indicate for flow that the entire target reg is being set.  */
1267       emit_insn (gen_rtx_CLOBBER (VOIDmode, target));
1268
1269       for (i = 0; i < nwords; i++)
1270         {
1271           /* If I is 0, use the low-order word in both field and target;
1272              if I is 1, use the next to lowest word; and so on.  */
1273           /* Word number in TARGET to use.  */
1274           unsigned int wordnum
1275             = (WORDS_BIG_ENDIAN
1276                ? GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD - i - 1
1277                : i);
1278           /* Offset from start of field in OP0.  */
1279           unsigned int bit_offset = (WORDS_BIG_ENDIAN
1280                                      ? MAX (0, ((int) bitsize - ((int) i + 1)
1281                                                 * (int) BITS_PER_WORD))
1282                                      : (int) i * BITS_PER_WORD);
1283           rtx target_part = operand_subword (target, wordnum, 1, VOIDmode);
1284           rtx result_part
1285             = extract_bit_field (op0, MIN (BITS_PER_WORD,
1286                                            bitsize - i * BITS_PER_WORD),
1287                                  bitnum + bit_offset, 1, target_part, mode,
1288                                  word_mode);
1289
1290           gcc_assert (target_part);
1291
1292           if (result_part != target_part)
1293             emit_move_insn (target_part, result_part);
1294         }
1295
1296       if (unsignedp)
1297         {
1298           /* Unless we've filled TARGET, the upper regs in a multi-reg value
1299              need to be zero'd out.  */
1300           if (GET_MODE_SIZE (GET_MODE (target)) > nwords * UNITS_PER_WORD)
1301             {
1302               unsigned int i, total_words;
1303
1304               total_words = GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD;
1305               for (i = nwords; i < total_words; i++)
1306                 emit_move_insn
1307                   (operand_subword (target,
1308                                     WORDS_BIG_ENDIAN ? total_words - i - 1 : i,
1309                                     1, VOIDmode),
1310                    const0_rtx);
1311             }
1312           return target;
1313         }
1314
1315       /* Signed bit field: sign-extend with two arithmetic shifts.  */
1316       target = expand_shift (LSHIFT_EXPR, mode, target,
1317                              build_int_cst (NULL_TREE,
1318                                             GET_MODE_BITSIZE (mode) - bitsize),
1319                              NULL_RTX, 0);
1320       return expand_shift (RSHIFT_EXPR, mode, target,
1321                            build_int_cst (NULL_TREE,
1322                                           GET_MODE_BITSIZE (mode) - bitsize),
1323                            NULL_RTX, 0);
1324     }
1325
1326   /* From here on we know the desired field is smaller than a word.  */
1327
1328   /* Check if there is a correspondingly-sized integer field, so we can
1329      safely extract it as one size of integer, if necessary; then
1330      truncate or extend to the size that is wanted; then use SUBREGs or
1331      convert_to_mode to get one of the modes we really wanted.  */
1332
1333   int_mode = int_mode_for_mode (tmode);
1334   if (int_mode == BLKmode)
1335     int_mode = int_mode_for_mode (mode);
1336   /* Should probably push op0 out to memory and then do a load.  */
1337   gcc_assert (int_mode != BLKmode);
1338
1339   /* OFFSET is the number of words or bytes (UNIT says which)
1340      from STR_RTX to the first word or byte containing part of the field.  */
1341   if (!MEM_P (op0))
1342     {
1343       if (offset != 0
1344           || GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
1345         {
1346           if (!REG_P (op0))
1347             op0 = copy_to_reg (op0);
1348           op0 = gen_rtx_SUBREG (mode_for_size (BITS_PER_WORD, MODE_INT, 0),
1349                                 op0, (offset * UNITS_PER_WORD));
1350         }
1351       offset = 0;
1352     }
1353
1354   /* Now OFFSET is nonzero only for memory operands.  */
1355
1356   if (unsignedp)
1357     {
1358       if (HAVE_extzv
1359           && bitsize > 0
1360           && GET_MODE_BITSIZE (extzv_mode) >= bitsize
1361           && ! ((REG_P (op0) || GET_CODE (op0) == SUBREG)
1362                 && (bitsize + bitpos > GET_MODE_BITSIZE (extzv_mode))))
1363         {
1364           unsigned HOST_WIDE_INT xbitpos = bitpos, xoffset = offset;
1365           rtx bitsize_rtx, bitpos_rtx;
1366           rtx last = get_last_insn ();
1367           rtx xop0 = op0;
1368           rtx xtarget = target;
1369           rtx xspec_target = spec_target;
1370           rtx xspec_target_subreg = spec_target_subreg;
1371           rtx pat;
1372           enum machine_mode maxmode = mode_for_extraction (EP_extzv, 0);
1373
1374           if (MEM_P (xop0))
1375             {
1376               int save_volatile_ok = volatile_ok;
1377               volatile_ok = 1;
1378
1379               /* Is the memory operand acceptable?  */
1380               if (! ((*insn_data[(int) CODE_FOR_extzv].operand[1].predicate)
1381                      (xop0, GET_MODE (xop0))))
1382                 {
1383                   /* No, load into a reg and extract from there.  */
1384                   enum machine_mode bestmode;
1385
1386                   /* Get the mode to use for inserting into this field.  If
1387                      OP0 is BLKmode, get the smallest mode consistent with the
1388                      alignment. If OP0 is a non-BLKmode object that is no
1389                      wider than MAXMODE, use its mode. Otherwise, use the
1390                      smallest mode containing the field.  */
1391
1392                   if (GET_MODE (xop0) == BLKmode
1393                       || (GET_MODE_SIZE (GET_MODE (op0))
1394                           > GET_MODE_SIZE (maxmode)))
1395                     bestmode = get_best_mode (bitsize, bitnum,
1396                                               MEM_ALIGN (xop0), maxmode,
1397                                               MEM_VOLATILE_P (xop0));
1398                   else
1399                     bestmode = GET_MODE (xop0);
1400
1401                   if (bestmode == VOIDmode
1402                       || (SLOW_UNALIGNED_ACCESS (bestmode, MEM_ALIGN (xop0))
1403                           && GET_MODE_BITSIZE (bestmode) > MEM_ALIGN (xop0)))
1404                     goto extzv_loses;
1405
1406                   /* Compute offset as multiple of this unit,
1407                      counting in bytes.  */
1408                   unit = GET_MODE_BITSIZE (bestmode);
1409                   xoffset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
1410                   xbitpos = bitnum % unit;
1411                   xop0 = adjust_address (xop0, bestmode, xoffset);
1412
1413                   /* Fetch it to a register in that size.  */
1414                   xop0 = force_reg (bestmode, xop0);
1415
1416                   /* XBITPOS counts within UNIT, which is what is expected.  */
1417                 }
1418               else
1419                 /* Get ref to first byte containing part of the field.  */
1420                 xop0 = adjust_address (xop0, byte_mode, xoffset);
1421
1422               volatile_ok = save_volatile_ok;
1423             }
1424
1425           /* If op0 is a register, we need it in MAXMODE (which is usually
1426              SImode). to make it acceptable to the format of extzv.  */
1427           if (GET_CODE (xop0) == SUBREG && GET_MODE (xop0) != maxmode)
1428             goto extzv_loses;
1429           if (REG_P (xop0) && GET_MODE (xop0) != maxmode)
1430             xop0 = gen_rtx_SUBREG (maxmode, xop0, 0);
1431
1432           /* On big-endian machines, we count bits from the most significant.
1433              If the bit field insn does not, we must invert.  */
1434           if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1435             xbitpos = unit - bitsize - xbitpos;
1436
1437           /* Now convert from counting within UNIT to counting in MAXMODE.  */
1438           if (BITS_BIG_ENDIAN && !MEM_P (xop0))
1439             xbitpos += GET_MODE_BITSIZE (maxmode) - unit;
1440
1441           unit = GET_MODE_BITSIZE (maxmode);
1442
1443           if (xtarget == 0)
1444             xtarget = xspec_target = gen_reg_rtx (tmode);
1445
1446           if (GET_MODE (xtarget) != maxmode)
1447             {
1448               if (REG_P (xtarget))
1449                 {
1450                   int wider = (GET_MODE_SIZE (maxmode)
1451                                > GET_MODE_SIZE (GET_MODE (xtarget)));
1452                   xtarget = gen_lowpart (maxmode, xtarget);
1453                   if (wider)
1454                     xspec_target_subreg = xtarget;
1455                 }
1456               else
1457                 xtarget = gen_reg_rtx (maxmode);
1458             }
1459
1460           /* If this machine's extzv insists on a register target,
1461              make sure we have one.  */
1462           if (! ((*insn_data[(int) CODE_FOR_extzv].operand[0].predicate)
1463                  (xtarget, maxmode)))
1464             xtarget = gen_reg_rtx (maxmode);
1465
1466           bitsize_rtx = GEN_INT (bitsize);
1467           bitpos_rtx = GEN_INT (xbitpos);
1468
1469           pat = gen_extzv (xtarget, xop0, bitsize_rtx, bitpos_rtx);
1470           if (pat)
1471             {
1472               emit_insn (pat);
1473               target = xtarget;
1474               spec_target = xspec_target;
1475               spec_target_subreg = xspec_target_subreg;
1476             }
1477           else
1478             {
1479               delete_insns_since (last);
1480               target = extract_fixed_bit_field (int_mode, op0, offset, bitsize,
1481                                                 bitpos, target, 1);
1482             }
1483         }
1484       else
1485       extzv_loses:
1486         target = extract_fixed_bit_field (int_mode, op0, offset, bitsize,
1487                                           bitpos, target, 1);
1488     }
1489   else
1490     {
1491       if (HAVE_extv
1492           && bitsize > 0
1493           && GET_MODE_BITSIZE (extv_mode) >= bitsize
1494           && ! ((REG_P (op0) || GET_CODE (op0) == SUBREG)
1495                 && (bitsize + bitpos > GET_MODE_BITSIZE (extv_mode))))
1496         {
1497           int xbitpos = bitpos, xoffset = offset;
1498           rtx bitsize_rtx, bitpos_rtx;
1499           rtx last = get_last_insn ();
1500           rtx xop0 = op0, xtarget = target;
1501           rtx xspec_target = spec_target;
1502           rtx xspec_target_subreg = spec_target_subreg;
1503           rtx pat;
1504           enum machine_mode maxmode = mode_for_extraction (EP_extv, 0);
1505
1506           if (MEM_P (xop0))
1507             {
1508               /* Is the memory operand acceptable?  */
1509               if (! ((*insn_data[(int) CODE_FOR_extv].operand[1].predicate)
1510                      (xop0, GET_MODE (xop0))))
1511                 {
1512                   /* No, load into a reg and extract from there.  */
1513                   enum machine_mode bestmode;
1514
1515                   /* Get the mode to use for inserting into this field.  If
1516                      OP0 is BLKmode, get the smallest mode consistent with the
1517                      alignment. If OP0 is a non-BLKmode object that is no
1518                      wider than MAXMODE, use its mode. Otherwise, use the
1519                      smallest mode containing the field.  */
1520
1521                   if (GET_MODE (xop0) == BLKmode
1522                       || (GET_MODE_SIZE (GET_MODE (op0))
1523                           > GET_MODE_SIZE (maxmode)))
1524                     bestmode = get_best_mode (bitsize, bitnum,
1525                                               MEM_ALIGN (xop0), maxmode,
1526                                               MEM_VOLATILE_P (xop0));
1527                   else
1528                     bestmode = GET_MODE (xop0);
1529
1530                   if (bestmode == VOIDmode
1531                       || (SLOW_UNALIGNED_ACCESS (bestmode, MEM_ALIGN (xop0))
1532                           && GET_MODE_BITSIZE (bestmode) > MEM_ALIGN (xop0)))
1533                     goto extv_loses;
1534
1535                   /* Compute offset as multiple of this unit,
1536                      counting in bytes.  */
1537                   unit = GET_MODE_BITSIZE (bestmode);
1538                   xoffset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
1539                   xbitpos = bitnum % unit;
1540                   xop0 = adjust_address (xop0, bestmode, xoffset);
1541
1542                   /* Fetch it to a register in that size.  */
1543                   xop0 = force_reg (bestmode, xop0);
1544
1545                   /* XBITPOS counts within UNIT, which is what is expected.  */
1546                 }
1547               else
1548                 /* Get ref to first byte containing part of the field.  */
1549                 xop0 = adjust_address (xop0, byte_mode, xoffset);
1550             }
1551
1552           /* If op0 is a register, we need it in MAXMODE (which is usually
1553              SImode) to make it acceptable to the format of extv.  */
1554           if (GET_CODE (xop0) == SUBREG && GET_MODE (xop0) != maxmode)
1555             goto extv_loses;
1556           if (REG_P (xop0) && GET_MODE (xop0) != maxmode)
1557             xop0 = gen_rtx_SUBREG (maxmode, xop0, 0);
1558
1559           /* On big-endian machines, we count bits from the most significant.
1560              If the bit field insn does not, we must invert.  */
1561           if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1562             xbitpos = unit - bitsize - xbitpos;
1563
1564           /* XBITPOS counts within a size of UNIT.
1565              Adjust to count within a size of MAXMODE.  */
1566           if (BITS_BIG_ENDIAN && !MEM_P (xop0))
1567             xbitpos += (GET_MODE_BITSIZE (maxmode) - unit);
1568
1569           unit = GET_MODE_BITSIZE (maxmode);
1570
1571           if (xtarget == 0)
1572             xtarget = xspec_target = gen_reg_rtx (tmode);
1573
1574           if (GET_MODE (xtarget) != maxmode)
1575             {
1576               if (REG_P (xtarget))
1577                 {
1578                   int wider = (GET_MODE_SIZE (maxmode)
1579                                > GET_MODE_SIZE (GET_MODE (xtarget)));
1580                   xtarget = gen_lowpart (maxmode, xtarget);
1581                   if (wider)
1582                     xspec_target_subreg = xtarget;
1583                 }
1584               else
1585                 xtarget = gen_reg_rtx (maxmode);
1586             }
1587
1588           /* If this machine's extv insists on a register target,
1589              make sure we have one.  */
1590           if (! ((*insn_data[(int) CODE_FOR_extv].operand[0].predicate)
1591                  (xtarget, maxmode)))
1592             xtarget = gen_reg_rtx (maxmode);
1593
1594           bitsize_rtx = GEN_INT (bitsize);
1595           bitpos_rtx = GEN_INT (xbitpos);
1596
1597           pat = gen_extv (xtarget, xop0, bitsize_rtx, bitpos_rtx);
1598           if (pat)
1599             {
1600               emit_insn (pat);
1601               target = xtarget;
1602               spec_target = xspec_target;
1603               spec_target_subreg = xspec_target_subreg;
1604             }
1605           else
1606             {
1607               delete_insns_since (last);
1608               target = extract_fixed_bit_field (int_mode, op0, offset, bitsize,
1609                                                 bitpos, target, 0);
1610             }
1611         }
1612       else
1613       extv_loses:
1614         target = extract_fixed_bit_field (int_mode, op0, offset, bitsize,
1615                                           bitpos, target, 0);
1616     }
1617   if (target == spec_target)
1618     return target;
1619   if (target == spec_target_subreg)
1620     return spec_target;
1621   if (GET_MODE (target) != tmode && GET_MODE (target) != mode)
1622     {
1623       /* If the target mode is not a scalar integral, first convert to the
1624          integer mode of that size and then access it as a floating-point
1625          value via a SUBREG.  */
1626       if (!SCALAR_INT_MODE_P (tmode))
1627         {
1628           enum machine_mode smode
1629             = mode_for_size (GET_MODE_BITSIZE (tmode), MODE_INT, 0);
1630           target = convert_to_mode (smode, target, unsignedp);
1631           target = force_reg (smode, target);
1632           return gen_lowpart (tmode, target);
1633         }
1634
1635       return convert_to_mode (tmode, target, unsignedp);
1636     }
1637   return target;
1638 }
1639 \f
1640 /* Extract a bit field using shifts and boolean operations
1641    Returns an rtx to represent the value.
1642    OP0 addresses a register (word) or memory (byte).
1643    BITPOS says which bit within the word or byte the bit field starts in.
1644    OFFSET says how many bytes farther the bit field starts;
1645     it is 0 if OP0 is a register.
1646    BITSIZE says how many bits long the bit field is.
1647     (If OP0 is a register, it may be narrower than a full word,
1648      but BITPOS still counts within a full word,
1649      which is significant on bigendian machines.)
1650
1651    UNSIGNEDP is nonzero for an unsigned bit field (don't sign-extend value).
1652    If TARGET is nonzero, attempts to store the value there
1653    and return TARGET, but this is not guaranteed.
1654    If TARGET is not used, create a pseudo-reg of mode TMODE for the value.  */
1655
1656 static rtx
1657 extract_fixed_bit_field (enum machine_mode tmode, rtx op0,
1658                          unsigned HOST_WIDE_INT offset,
1659                          unsigned HOST_WIDE_INT bitsize,
1660                          unsigned HOST_WIDE_INT bitpos, rtx target,
1661                          int unsignedp)
1662 {
1663   unsigned int total_bits = BITS_PER_WORD;
1664   enum machine_mode mode;
1665
1666   if (GET_CODE (op0) == SUBREG || REG_P (op0))
1667     {
1668       /* Special treatment for a bit field split across two registers.  */
1669       if (bitsize + bitpos > BITS_PER_WORD)
1670         return extract_split_bit_field (op0, bitsize, bitpos, unsignedp);
1671     }
1672   else
1673     {
1674       /* Get the proper mode to use for this field.  We want a mode that
1675          includes the entire field.  If such a mode would be larger than
1676          a word, we won't be doing the extraction the normal way.  */
1677
1678       mode = get_best_mode (bitsize, bitpos + offset * BITS_PER_UNIT,
1679                             MEM_ALIGN (op0), word_mode, MEM_VOLATILE_P (op0));
1680
1681       if (mode == VOIDmode)
1682         /* The only way this should occur is if the field spans word
1683            boundaries.  */
1684         return extract_split_bit_field (op0, bitsize,
1685                                         bitpos + offset * BITS_PER_UNIT,
1686                                         unsignedp);
1687
1688       total_bits = GET_MODE_BITSIZE (mode);
1689
1690       /* Make sure bitpos is valid for the chosen mode.  Adjust BITPOS to
1691          be in the range 0 to total_bits-1, and put any excess bytes in
1692          OFFSET.  */
1693       if (bitpos >= total_bits)
1694         {
1695           offset += (bitpos / total_bits) * (total_bits / BITS_PER_UNIT);
1696           bitpos -= ((bitpos / total_bits) * (total_bits / BITS_PER_UNIT)
1697                      * BITS_PER_UNIT);
1698         }
1699
1700       /* Get ref to an aligned byte, halfword, or word containing the field.
1701          Adjust BITPOS to be position within a word,
1702          and OFFSET to be the offset of that word.
1703          Then alter OP0 to refer to that word.  */
1704       bitpos += (offset % (total_bits / BITS_PER_UNIT)) * BITS_PER_UNIT;
1705       offset -= (offset % (total_bits / BITS_PER_UNIT));
1706       op0 = adjust_address (op0, mode, offset);
1707     }
1708
1709   mode = GET_MODE (op0);
1710
1711   if (BYTES_BIG_ENDIAN)
1712     /* BITPOS is the distance between our msb and that of OP0.
1713        Convert it to the distance from the lsb.  */
1714     bitpos = total_bits - bitsize - bitpos;
1715
1716   /* Now BITPOS is always the distance between the field's lsb and that of OP0.
1717      We have reduced the big-endian case to the little-endian case.  */
1718
1719   if (unsignedp)
1720     {
1721       if (bitpos)
1722         {
1723           /* If the field does not already start at the lsb,
1724              shift it so it does.  */
1725           tree amount = build_int_cst (NULL_TREE, bitpos);
1726           /* Maybe propagate the target for the shift.  */
1727           /* But not if we will return it--could confuse integrate.c.  */
1728           rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
1729           if (tmode != mode) subtarget = 0;
1730           op0 = expand_shift (RSHIFT_EXPR, mode, op0, amount, subtarget, 1);
1731         }
1732       /* Convert the value to the desired mode.  */
1733       if (mode != tmode)
1734         op0 = convert_to_mode (tmode, op0, 1);
1735
1736       /* Unless the msb of the field used to be the msb when we shifted,
1737          mask out the upper bits.  */
1738
1739       if (GET_MODE_BITSIZE (mode) != bitpos + bitsize)
1740         return expand_binop (GET_MODE (op0), and_optab, op0,
1741                              mask_rtx (GET_MODE (op0), 0, bitsize, 0),
1742                              target, 1, OPTAB_LIB_WIDEN);
1743       return op0;
1744     }
1745
1746   /* To extract a signed bit-field, first shift its msb to the msb of the word,
1747      then arithmetic-shift its lsb to the lsb of the word.  */
1748   op0 = force_reg (mode, op0);
1749   if (mode != tmode)
1750     target = 0;
1751
1752   /* Find the narrowest integer mode that contains the field.  */
1753
1754   for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT); mode != VOIDmode;
1755        mode = GET_MODE_WIDER_MODE (mode))
1756     if (GET_MODE_BITSIZE (mode) >= bitsize + bitpos)
1757       {
1758         op0 = convert_to_mode (mode, op0, 0);
1759         break;
1760       }
1761
1762   if (GET_MODE_BITSIZE (mode) != (bitsize + bitpos))
1763     {
1764       tree amount
1765         = build_int_cst (NULL_TREE,
1766                          GET_MODE_BITSIZE (mode) - (bitsize + bitpos));
1767       /* Maybe propagate the target for the shift.  */
1768       rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
1769       op0 = expand_shift (LSHIFT_EXPR, mode, op0, amount, subtarget, 1);
1770     }
1771
1772   return expand_shift (RSHIFT_EXPR, mode, op0,
1773                        build_int_cst (NULL_TREE,
1774                                       GET_MODE_BITSIZE (mode) - bitsize),
1775                        target, 0);
1776 }
1777 \f
1778 /* Return a constant integer (CONST_INT or CONST_DOUBLE) mask value
1779    of mode MODE with BITSIZE ones followed by BITPOS zeros, or the
1780    complement of that if COMPLEMENT.  The mask is truncated if
1781    necessary to the width of mode MODE.  The mask is zero-extended if
1782    BITSIZE+BITPOS is too small for MODE.  */
1783
1784 static rtx
1785 mask_rtx (enum machine_mode mode, int bitpos, int bitsize, int complement)
1786 {
1787   HOST_WIDE_INT masklow, maskhigh;
1788
1789   if (bitsize == 0)
1790     masklow = 0;
1791   else if (bitpos < HOST_BITS_PER_WIDE_INT)
1792     masklow = (HOST_WIDE_INT) -1 << bitpos;
1793   else
1794     masklow = 0;
1795
1796   if (bitpos + bitsize < HOST_BITS_PER_WIDE_INT)
1797     masklow &= ((unsigned HOST_WIDE_INT) -1
1798                 >> (HOST_BITS_PER_WIDE_INT - bitpos - bitsize));
1799
1800   if (bitpos <= HOST_BITS_PER_WIDE_INT)
1801     maskhigh = -1;
1802   else
1803     maskhigh = (HOST_WIDE_INT) -1 << (bitpos - HOST_BITS_PER_WIDE_INT);
1804
1805   if (bitsize == 0)
1806     maskhigh = 0;
1807   else if (bitpos + bitsize > HOST_BITS_PER_WIDE_INT)
1808     maskhigh &= ((unsigned HOST_WIDE_INT) -1
1809                  >> (2 * HOST_BITS_PER_WIDE_INT - bitpos - bitsize));
1810   else
1811     maskhigh = 0;
1812
1813   if (complement)
1814     {
1815       maskhigh = ~maskhigh;
1816       masklow = ~masklow;
1817     }
1818
1819   return immed_double_const (masklow, maskhigh, mode);
1820 }
1821
1822 /* Return a constant integer (CONST_INT or CONST_DOUBLE) rtx with the value
1823    VALUE truncated to BITSIZE bits and then shifted left BITPOS bits.  */
1824
1825 static rtx
1826 lshift_value (enum machine_mode mode, rtx value, int bitpos, int bitsize)
1827 {
1828   unsigned HOST_WIDE_INT v = INTVAL (value);
1829   HOST_WIDE_INT low, high;
1830
1831   if (bitsize < HOST_BITS_PER_WIDE_INT)
1832     v &= ~((HOST_WIDE_INT) -1 << bitsize);
1833
1834   if (bitpos < HOST_BITS_PER_WIDE_INT)
1835     {
1836       low = v << bitpos;
1837       high = (bitpos > 0 ? (v >> (HOST_BITS_PER_WIDE_INT - bitpos)) : 0);
1838     }
1839   else
1840     {
1841       low = 0;
1842       high = v << (bitpos - HOST_BITS_PER_WIDE_INT);
1843     }
1844
1845   return immed_double_const (low, high, mode);
1846 }
1847 \f
1848 /* Extract a bit field from a memory by forcing the alignment of the
1849    memory.  This efficient only if the field spans at least 4 boundaries.
1850
1851    OP0 is the MEM.
1852    BITSIZE is the field width; BITPOS is the position of the first bit.
1853    UNSIGNEDP is true if the result should be zero-extended.  */
1854
1855 static rtx
1856 extract_force_align_mem_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1857                                    unsigned HOST_WIDE_INT bitpos,
1858                                    int unsignedp)
1859 {
1860   enum machine_mode mode, dmode;
1861   unsigned int m_bitsize, m_size;
1862   unsigned int sign_shift_up, sign_shift_dn;
1863   rtx base, a1, a2, v1, v2, comb, shift, result, start;
1864
1865   /* Choose a mode that will fit BITSIZE.  */
1866   mode = smallest_mode_for_size (bitsize, MODE_INT);
1867   m_size = GET_MODE_SIZE (mode);
1868   m_bitsize = GET_MODE_BITSIZE (mode);
1869
1870   /* Choose a mode twice as wide.  Fail if no such mode exists.  */
1871   dmode = mode_for_size (m_bitsize * 2, MODE_INT, false);
1872   if (dmode == BLKmode)
1873     return NULL;
1874
1875   do_pending_stack_adjust ();
1876   start = get_last_insn ();
1877
1878   /* At the end, we'll need an additional shift to deal with sign/zero
1879      extension.  By default this will be a left+right shift of the
1880      appropriate size.  But we may be able to eliminate one of them.  */
1881   sign_shift_up = sign_shift_dn = m_bitsize - bitsize;
1882
1883   if (STRICT_ALIGNMENT)
1884     {
1885       base = plus_constant (XEXP (op0, 0), bitpos / BITS_PER_UNIT);
1886       bitpos %= BITS_PER_UNIT;
1887
1888       /* We load two values to be concatenate.  There's an edge condition
1889          that bears notice -- an aligned value at the end of a page can
1890          only load one value lest we segfault.  So the two values we load
1891          are at "base & -size" and "(base + size - 1) & -size".  If base
1892          is unaligned, the addresses will be aligned and sequential; if
1893          base is aligned, the addresses will both be equal to base.  */
1894
1895       a1 = expand_simple_binop (Pmode, AND, force_operand (base, NULL),
1896                                 GEN_INT (-(HOST_WIDE_INT)m_size),
1897                                 NULL, true, OPTAB_LIB_WIDEN);
1898       mark_reg_pointer (a1, m_bitsize);
1899       v1 = gen_rtx_MEM (mode, a1);
1900       set_mem_align (v1, m_bitsize);
1901       v1 = force_reg (mode, validize_mem (v1));
1902
1903       a2 = plus_constant (base, GET_MODE_SIZE (mode) - 1);
1904       a2 = expand_simple_binop (Pmode, AND, force_operand (a2, NULL),
1905                                 GEN_INT (-(HOST_WIDE_INT)m_size),
1906                                 NULL, true, OPTAB_LIB_WIDEN);
1907       v2 = gen_rtx_MEM (mode, a2);
1908       set_mem_align (v2, m_bitsize);
1909       v2 = force_reg (mode, validize_mem (v2));
1910
1911       /* Combine these two values into a double-word value.  */
1912       if (m_bitsize == BITS_PER_WORD)
1913         {
1914           comb = gen_reg_rtx (dmode);
1915           emit_insn (gen_rtx_CLOBBER (VOIDmode, comb));
1916           emit_move_insn (gen_rtx_SUBREG (mode, comb, 0), v1);
1917           emit_move_insn (gen_rtx_SUBREG (mode, comb, m_size), v2);
1918         }
1919       else
1920         {
1921           if (BYTES_BIG_ENDIAN)
1922             comb = v1, v1 = v2, v2 = comb;
1923           v1 = convert_modes (dmode, mode, v1, true);
1924           if (v1 == NULL)
1925             goto fail;
1926           v2 = convert_modes (dmode, mode, v2, true);
1927           v2 = expand_simple_binop (dmode, ASHIFT, v2, GEN_INT (m_bitsize),
1928                                     NULL, true, OPTAB_LIB_WIDEN);
1929           if (v2 == NULL)
1930             goto fail;
1931           comb = expand_simple_binop (dmode, IOR, v1, v2, NULL,
1932                                       true, OPTAB_LIB_WIDEN);
1933           if (comb == NULL)
1934             goto fail;
1935         }
1936
1937       shift = expand_simple_binop (Pmode, AND, base, GEN_INT (m_size - 1),
1938                                    NULL, true, OPTAB_LIB_WIDEN);
1939       shift = expand_mult (Pmode, shift, GEN_INT (BITS_PER_UNIT), NULL, 1);
1940
1941       if (bitpos != 0)
1942         {
1943           if (sign_shift_up <= bitpos)
1944             bitpos -= sign_shift_up, sign_shift_up = 0;
1945           shift = expand_simple_binop (Pmode, PLUS, shift, GEN_INT (bitpos),
1946                                        NULL, true, OPTAB_LIB_WIDEN);
1947         }
1948     }
1949   else
1950     {
1951       unsigned HOST_WIDE_INT offset = bitpos / BITS_PER_UNIT;
1952       bitpos %= BITS_PER_UNIT;
1953
1954       /* When strict alignment is not required, we can just load directly
1955          from memory without masking.  If the remaining BITPOS offset is
1956          small enough, we may be able to do all operations in MODE as
1957          opposed to DMODE.  */
1958       if (bitpos + bitsize <= m_bitsize)
1959         dmode = mode;
1960       comb = adjust_address (op0, dmode, offset);
1961
1962       if (sign_shift_up <= bitpos)
1963         bitpos -= sign_shift_up, sign_shift_up = 0;
1964       shift = GEN_INT (bitpos);
1965     }
1966
1967   /* Shift down the double-word such that the requested value is at bit 0.  */
1968   if (shift != const0_rtx)
1969     comb = expand_simple_binop (dmode, unsignedp ? LSHIFTRT : ASHIFTRT,
1970                                 comb, shift, NULL, unsignedp, OPTAB_LIB_WIDEN);
1971   if (comb == NULL)
1972     goto fail;
1973
1974   /* If the field exactly matches MODE, then all we need to do is return the
1975      lowpart.  Otherwise, shift to get the sign bits set properly.  */
1976   result = force_reg (mode, gen_lowpart (mode, comb));
1977
1978   if (sign_shift_up)
1979     result = expand_simple_binop (mode, ASHIFT, result,
1980                                   GEN_INT (sign_shift_up),
1981                                   NULL_RTX, 0, OPTAB_LIB_WIDEN);
1982   if (sign_shift_dn)
1983     result = expand_simple_binop (mode, unsignedp ? LSHIFTRT : ASHIFTRT,
1984                                   result, GEN_INT (sign_shift_dn),
1985                                   NULL_RTX, 0, OPTAB_LIB_WIDEN);
1986
1987   return result;
1988
1989  fail:
1990   delete_insns_since (start);
1991   return NULL;
1992 }
1993
1994 /* Extract a bit field that is split across two words
1995    and return an RTX for the result.
1996
1997    OP0 is the REG, SUBREG or MEM rtx for the first of the two words.
1998    BITSIZE is the field width; BITPOS, position of its first bit, in the word.
1999    UNSIGNEDP is 1 if should zero-extend the contents; else sign-extend.  */
2000
2001 static rtx
2002 extract_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
2003                          unsigned HOST_WIDE_INT bitpos, int unsignedp)
2004 {
2005   unsigned int unit;
2006   unsigned int bitsdone = 0;
2007   rtx result = NULL_RTX;
2008   int first = 1;
2009
2010   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
2011      much at a time.  */
2012   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
2013     unit = BITS_PER_WORD;
2014   else
2015     {
2016       unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
2017       if (0 && bitsize / unit > 2)
2018         {
2019           rtx tmp = extract_force_align_mem_bit_field (op0, bitsize, bitpos,
2020                                                        unsignedp);
2021           if (tmp)
2022             return tmp;
2023         }
2024     }
2025
2026   while (bitsdone < bitsize)
2027     {
2028       unsigned HOST_WIDE_INT thissize;
2029       rtx part, word;
2030       unsigned HOST_WIDE_INT thispos;
2031       unsigned HOST_WIDE_INT offset;
2032
2033       offset = (bitpos + bitsdone) / unit;
2034       thispos = (bitpos + bitsdone) % unit;
2035
2036       /* THISSIZE must not overrun a word boundary.  Otherwise,
2037          extract_fixed_bit_field will call us again, and we will mutually
2038          recurse forever.  */
2039       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
2040       thissize = MIN (thissize, unit - thispos);
2041
2042       /* If OP0 is a register, then handle OFFSET here.
2043
2044          When handling multiword bitfields, extract_bit_field may pass
2045          down a word_mode SUBREG of a larger REG for a bitfield that actually
2046          crosses a word boundary.  Thus, for a SUBREG, we must find
2047          the current word starting from the base register.  */
2048       if (GET_CODE (op0) == SUBREG)
2049         {
2050           int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset;
2051           word = operand_subword_force (SUBREG_REG (op0), word_offset,
2052                                         GET_MODE (SUBREG_REG (op0)));
2053           offset = 0;
2054         }
2055       else if (REG_P (op0))
2056         {
2057           word = operand_subword_force (op0, offset, GET_MODE (op0));
2058           offset = 0;
2059         }
2060       else
2061         word = op0;
2062
2063       /* Extract the parts in bit-counting order,
2064          whose meaning is determined by BYTES_PER_UNIT.
2065          OFFSET is in UNITs, and UNIT is in bits.
2066          extract_fixed_bit_field wants offset in bytes.  */
2067       part = extract_fixed_bit_field (word_mode, word,
2068                                       offset * unit / BITS_PER_UNIT,
2069                                       thissize, thispos, 0, 1);
2070       bitsdone += thissize;
2071
2072       /* Shift this part into place for the result.  */
2073       if (BYTES_BIG_ENDIAN)
2074         {
2075           if (bitsize != bitsdone)
2076             part = expand_shift (LSHIFT_EXPR, word_mode, part,
2077                                  build_int_cst (NULL_TREE, bitsize - bitsdone),
2078                                  0, 1);
2079         }
2080       else
2081         {
2082           if (bitsdone != thissize)
2083             part = expand_shift (LSHIFT_EXPR, word_mode, part,
2084                                  build_int_cst (NULL_TREE,
2085                                                 bitsdone - thissize), 0, 1);
2086         }
2087
2088       if (first)
2089         result = part;
2090       else
2091         /* Combine the parts with bitwise or.  This works
2092            because we extracted each part as an unsigned bit field.  */
2093         result = expand_binop (word_mode, ior_optab, part, result, NULL_RTX, 1,
2094                                OPTAB_LIB_WIDEN);
2095
2096       first = 0;
2097     }
2098
2099   /* Unsigned bit field: we are done.  */
2100   if (unsignedp)
2101     return result;
2102   /* Signed bit field: sign-extend with two arithmetic shifts.  */
2103   result = expand_shift (LSHIFT_EXPR, word_mode, result,
2104                          build_int_cst (NULL_TREE, BITS_PER_WORD - bitsize),
2105                          NULL_RTX, 0);
2106   return expand_shift (RSHIFT_EXPR, word_mode, result,
2107                        build_int_cst (NULL_TREE, BITS_PER_WORD - bitsize),
2108                        NULL_RTX, 0);
2109 }
2110 \f
2111 /* Add INC into TARGET.  */
2112
2113 void
2114 expand_inc (rtx target, rtx inc)
2115 {
2116   rtx value = expand_binop (GET_MODE (target), add_optab,
2117                             target, inc,
2118                             target, 0, OPTAB_LIB_WIDEN);
2119   if (value != target)
2120     emit_move_insn (target, value);
2121 }
2122
2123 /* Subtract DEC from TARGET.  */
2124
2125 void
2126 expand_dec (rtx target, rtx dec)
2127 {
2128   rtx value = expand_binop (GET_MODE (target), sub_optab,
2129                             target, dec,
2130                             target, 0, OPTAB_LIB_WIDEN);
2131   if (value != target)
2132     emit_move_insn (target, value);
2133 }
2134 \f
2135 /* Output a shift instruction for expression code CODE,
2136    with SHIFTED being the rtx for the value to shift,
2137    and AMOUNT the tree for the amount to shift by.
2138    Store the result in the rtx TARGET, if that is convenient.
2139    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2140    Return the rtx for where the value is.  */
2141
2142 rtx
2143 expand_shift (enum tree_code code, enum machine_mode mode, rtx shifted,
2144               tree amount, rtx target, int unsignedp)
2145 {
2146   rtx op1, temp = 0;
2147   int left = (code == LSHIFT_EXPR || code == LROTATE_EXPR);
2148   int rotate = (code == LROTATE_EXPR || code == RROTATE_EXPR);
2149   int try;
2150
2151   /* Previously detected shift-counts computed by NEGATE_EXPR
2152      and shifted in the other direction; but that does not work
2153      on all machines.  */
2154
2155   op1 = expand_expr (amount, NULL_RTX, VOIDmode, 0);
2156
2157   if (SHIFT_COUNT_TRUNCATED)
2158     {
2159       if (GET_CODE (op1) == CONST_INT
2160           && ((unsigned HOST_WIDE_INT) INTVAL (op1) >=
2161               (unsigned HOST_WIDE_INT) GET_MODE_BITSIZE (mode)))
2162         op1 = GEN_INT ((unsigned HOST_WIDE_INT) INTVAL (op1)
2163                        % GET_MODE_BITSIZE (mode));
2164       else if (GET_CODE (op1) == SUBREG
2165                && subreg_lowpart_p (op1))
2166         op1 = SUBREG_REG (op1);
2167     }
2168
2169   if (op1 == const0_rtx)
2170     return shifted;
2171
2172   /* Check whether its cheaper to implement a left shift by a constant
2173      bit count by a sequence of additions.  */
2174   if (code == LSHIFT_EXPR
2175       && GET_CODE (op1) == CONST_INT
2176       && INTVAL (op1) > 0
2177       && INTVAL (op1) < GET_MODE_BITSIZE (mode)
2178       && shift_cost[mode][INTVAL (op1)] > INTVAL (op1) * add_cost[mode])
2179     {
2180       int i;
2181       for (i = 0; i < INTVAL (op1); i++)
2182         {
2183           temp = force_reg (mode, shifted);
2184           shifted = expand_binop (mode, add_optab, temp, temp, NULL_RTX,
2185                                   unsignedp, OPTAB_LIB_WIDEN);
2186         }
2187       return shifted;
2188     }
2189
2190   for (try = 0; temp == 0 && try < 3; try++)
2191     {
2192       enum optab_methods methods;
2193
2194       if (try == 0)
2195         methods = OPTAB_DIRECT;
2196       else if (try == 1)
2197         methods = OPTAB_WIDEN;
2198       else
2199         methods = OPTAB_LIB_WIDEN;
2200
2201       if (rotate)
2202         {
2203           /* Widening does not work for rotation.  */
2204           if (methods == OPTAB_WIDEN)
2205             continue;
2206           else if (methods == OPTAB_LIB_WIDEN)
2207             {
2208               /* If we have been unable to open-code this by a rotation,
2209                  do it as the IOR of two shifts.  I.e., to rotate A
2210                  by N bits, compute (A << N) | ((unsigned) A >> (C - N))
2211                  where C is the bitsize of A.
2212
2213                  It is theoretically possible that the target machine might
2214                  not be able to perform either shift and hence we would
2215                  be making two libcalls rather than just the one for the
2216                  shift (similarly if IOR could not be done).  We will allow
2217                  this extremely unlikely lossage to avoid complicating the
2218                  code below.  */
2219
2220               rtx subtarget = target == shifted ? 0 : target;
2221               rtx temp1;
2222               tree type = TREE_TYPE (amount);
2223               tree new_amount = make_tree (type, op1);
2224               tree other_amount
2225                 = fold_build2 (MINUS_EXPR, type,
2226                                build_int_cst (type, GET_MODE_BITSIZE (mode)),
2227                                amount);
2228
2229               shifted = force_reg (mode, shifted);
2230
2231               temp = expand_shift (left ? LSHIFT_EXPR : RSHIFT_EXPR,
2232                                    mode, shifted, new_amount, 0, 1);
2233               temp1 = expand_shift (left ? RSHIFT_EXPR : LSHIFT_EXPR,
2234                                     mode, shifted, other_amount, subtarget, 1);
2235               return expand_binop (mode, ior_optab, temp, temp1, target,
2236                                    unsignedp, methods);
2237             }
2238
2239           temp = expand_binop (mode,
2240                                left ? rotl_optab : rotr_optab,
2241                                shifted, op1, target, unsignedp, methods);
2242         }
2243       else if (unsignedp)
2244         temp = expand_binop (mode,
2245                              left ? ashl_optab : lshr_optab,
2246                              shifted, op1, target, unsignedp, methods);
2247
2248       /* Do arithmetic shifts.
2249          Also, if we are going to widen the operand, we can just as well
2250          use an arithmetic right-shift instead of a logical one.  */
2251       if (temp == 0 && ! rotate
2252           && (! unsignedp || (! left && methods == OPTAB_WIDEN)))
2253         {
2254           enum optab_methods methods1 = methods;
2255
2256           /* If trying to widen a log shift to an arithmetic shift,
2257              don't accept an arithmetic shift of the same size.  */
2258           if (unsignedp)
2259             methods1 = OPTAB_MUST_WIDEN;
2260
2261           /* Arithmetic shift */
2262
2263           temp = expand_binop (mode,
2264                                left ? ashl_optab : ashr_optab,
2265                                shifted, op1, target, unsignedp, methods1);
2266         }
2267
2268       /* We used to try extzv here for logical right shifts, but that was
2269          only useful for one machine, the VAX, and caused poor code
2270          generation there for lshrdi3, so the code was deleted and a
2271          define_expand for lshrsi3 was added to vax.md.  */
2272     }
2273
2274   gcc_assert (temp);
2275   return temp;
2276 }
2277 \f
2278 enum alg_code {
2279   alg_unknown,
2280   alg_zero,
2281   alg_m, alg_shift,
2282   alg_add_t_m2,
2283   alg_sub_t_m2,
2284   alg_add_factor,
2285   alg_sub_factor,
2286   alg_add_t2_m,
2287   alg_sub_t2_m,
2288   alg_impossible
2289 };
2290
2291 /* This structure holds the "cost" of a multiply sequence.  The
2292    "cost" field holds the total rtx_cost of every operator in the
2293    synthetic multiplication sequence, hence cost(a op b) is defined
2294    as rtx_cost(op) + cost(a) + cost(b), where cost(leaf) is zero.
2295    The "latency" field holds the minimum possible latency of the
2296    synthetic multiply, on a hypothetical infinitely parallel CPU.
2297    This is the critical path, or the maximum height, of the expression
2298    tree which is the sum of rtx_costs on the most expensive path from
2299    any leaf to the root.  Hence latency(a op b) is defined as zero for
2300    leaves and rtx_cost(op) + max(latency(a), latency(b)) otherwise.  */
2301
2302 struct mult_cost {
2303   short cost;     /* Total rtx_cost of the multiplication sequence.  */
2304   short latency;  /* The latency of the multiplication sequence.  */
2305 };
2306
2307 /* This macro is used to compare a pointer to a mult_cost against an
2308    single integer "rtx_cost" value.  This is equivalent to the macro
2309    CHEAPER_MULT_COST(X,Z) where Z = {Y,Y}.  */
2310 #define MULT_COST_LESS(X,Y) ((X)->cost < (Y)    \
2311                              || ((X)->cost == (Y) && (X)->latency < (Y)))
2312
2313 /* This macro is used to compare two pointers to mult_costs against
2314    each other.  The macro returns true if X is cheaper than Y.
2315    Currently, the cheaper of two mult_costs is the one with the
2316    lower "cost".  If "cost"s are tied, the lower latency is cheaper.  */
2317 #define CHEAPER_MULT_COST(X,Y)  ((X)->cost < (Y)->cost          \
2318                                  || ((X)->cost == (Y)->cost     \
2319                                      && (X)->latency < (Y)->latency))
2320
2321 /* This structure records a sequence of operations.
2322    `ops' is the number of operations recorded.
2323    `cost' is their total cost.
2324    The operations are stored in `op' and the corresponding
2325    logarithms of the integer coefficients in `log'.
2326
2327    These are the operations:
2328    alg_zero             total := 0;
2329    alg_m                total := multiplicand;
2330    alg_shift            total := total * coeff
2331    alg_add_t_m2         total := total + multiplicand * coeff;
2332    alg_sub_t_m2         total := total - multiplicand * coeff;
2333    alg_add_factor       total := total * coeff + total;
2334    alg_sub_factor       total := total * coeff - total;
2335    alg_add_t2_m         total := total * coeff + multiplicand;
2336    alg_sub_t2_m         total := total * coeff - multiplicand;
2337
2338    The first operand must be either alg_zero or alg_m.  */
2339
2340 struct algorithm
2341 {
2342   struct mult_cost cost;
2343   short ops;
2344   /* The size of the OP and LOG fields are not directly related to the
2345      word size, but the worst-case algorithms will be if we have few
2346      consecutive ones or zeros, i.e., a multiplicand like 10101010101...
2347      In that case we will generate shift-by-2, add, shift-by-2, add,...,
2348      in total wordsize operations.  */
2349   enum alg_code op[MAX_BITS_PER_WORD];
2350   char log[MAX_BITS_PER_WORD];
2351 };
2352
2353 /* The entry for our multiplication cache/hash table.  */
2354 struct alg_hash_entry {
2355   /* The number we are multiplying by.  */
2356   unsigned int t;
2357
2358   /* The mode in which we are multiplying something by T.  */
2359   enum machine_mode mode;
2360
2361   /* The best multiplication algorithm for t.  */
2362   enum alg_code alg;
2363
2364   /* The cost of multiplication if ALG_CODE is not alg_impossible.
2365      Otherwise, the cost within which multiplication by T is
2366      impossible.  */
2367   struct mult_cost cost;
2368 };
2369
2370 /* The number of cache/hash entries.  */
2371 #define NUM_ALG_HASH_ENTRIES 307
2372
2373 /* Each entry of ALG_HASH caches alg_code for some integer.  This is
2374    actually a hash table.  If we have a collision, that the older
2375    entry is kicked out.  */
2376 static struct alg_hash_entry alg_hash[NUM_ALG_HASH_ENTRIES];
2377
2378 /* Indicates the type of fixup needed after a constant multiplication.
2379    BASIC_VARIANT means no fixup is needed, NEGATE_VARIANT means that
2380    the result should be negated, and ADD_VARIANT means that the
2381    multiplicand should be added to the result.  */
2382 enum mult_variant {basic_variant, negate_variant, add_variant};
2383
2384 static void synth_mult (struct algorithm *, unsigned HOST_WIDE_INT,
2385                         const struct mult_cost *, enum machine_mode mode);
2386 static bool choose_mult_variant (enum machine_mode, HOST_WIDE_INT,
2387                                  struct algorithm *, enum mult_variant *, int);
2388 static rtx expand_mult_const (enum machine_mode, rtx, HOST_WIDE_INT, rtx,
2389                               const struct algorithm *, enum mult_variant);
2390 static unsigned HOST_WIDE_INT choose_multiplier (unsigned HOST_WIDE_INT, int,
2391                                                  int, rtx *, int *, int *);
2392 static unsigned HOST_WIDE_INT invert_mod2n (unsigned HOST_WIDE_INT, int);
2393 static rtx extract_high_half (enum machine_mode, rtx);
2394 static rtx expand_mult_highpart (enum machine_mode, rtx, rtx, rtx, int, int);
2395 static rtx expand_mult_highpart_optab (enum machine_mode, rtx, rtx, rtx,
2396                                        int, int);
2397 /* Compute and return the best algorithm for multiplying by T.
2398    The algorithm must cost less than cost_limit
2399    If retval.cost >= COST_LIMIT, no algorithm was found and all
2400    other field of the returned struct are undefined.
2401    MODE is the machine mode of the multiplication.  */
2402
2403 static void
2404 synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
2405             const struct mult_cost *cost_limit, enum machine_mode mode)
2406 {
2407   int m;
2408   struct algorithm *alg_in, *best_alg;
2409   struct mult_cost best_cost;
2410   struct mult_cost new_limit;
2411   int op_cost, op_latency;
2412   unsigned HOST_WIDE_INT q;
2413   int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode));
2414   int hash_index;
2415   bool cache_hit = false;
2416   enum alg_code cache_alg = alg_zero;
2417
2418   /* Indicate that no algorithm is yet found.  If no algorithm
2419      is found, this value will be returned and indicate failure.  */
2420   alg_out->cost.cost = cost_limit->cost + 1;
2421   alg_out->cost.latency = cost_limit->latency + 1;
2422
2423   if (cost_limit->cost < 0
2424       || (cost_limit->cost == 0 && cost_limit->latency <= 0))
2425     return;
2426
2427   /* Restrict the bits of "t" to the multiplication's mode.  */
2428   t &= GET_MODE_MASK (mode);
2429
2430   /* t == 1 can be done in zero cost.  */
2431   if (t == 1)
2432     {
2433       alg_out->ops = 1;
2434       alg_out->cost.cost = 0;
2435       alg_out->cost.latency = 0;
2436       alg_out->op[0] = alg_m;
2437       return;
2438     }
2439
2440   /* t == 0 sometimes has a cost.  If it does and it exceeds our limit,
2441      fail now.  */
2442   if (t == 0)
2443     {
2444       if (MULT_COST_LESS (cost_limit, zero_cost))
2445         return;
2446       else
2447         {
2448           alg_out->ops = 1;
2449           alg_out->cost.cost = zero_cost;
2450           alg_out->cost.latency = zero_cost;
2451           alg_out->op[0] = alg_zero;
2452           return;
2453         }
2454     }
2455
2456   /* We'll be needing a couple extra algorithm structures now.  */
2457
2458   alg_in = alloca (sizeof (struct algorithm));
2459   best_alg = alloca (sizeof (struct algorithm));
2460   best_cost = *cost_limit;
2461
2462   /* Compute the hash index.  */
2463   hash_index = (t ^ (unsigned int) mode) % NUM_ALG_HASH_ENTRIES;
2464
2465   /* See if we already know what to do for T.  */
2466   if (alg_hash[hash_index].t == t
2467       && alg_hash[hash_index].mode == mode
2468       && alg_hash[hash_index].alg != alg_unknown)
2469     {
2470       cache_alg = alg_hash[hash_index].alg;
2471
2472       if (cache_alg == alg_impossible)
2473         {
2474           /* The cache tells us that it's impossible to synthesize
2475              multiplication by T within alg_hash[hash_index].cost.  */
2476           if (!CHEAPER_MULT_COST (&alg_hash[hash_index].cost, cost_limit))
2477             /* COST_LIMIT is at least as restrictive as the one
2478                recorded in the hash table, in which case we have no
2479                hope of synthesizing a multiplication.  Just
2480                return.  */
2481             return;
2482
2483           /* If we get here, COST_LIMIT is less restrictive than the
2484              one recorded in the hash table, so we may be able to
2485              synthesize a multiplication.  Proceed as if we didn't
2486              have the cache entry.  */
2487         }
2488       else
2489         {
2490           if (CHEAPER_MULT_COST (cost_limit, &alg_hash[hash_index].cost))
2491             /* The cached algorithm shows that this multiplication
2492                requires more cost than COST_LIMIT.  Just return.  This
2493                way, we don't clobber this cache entry with
2494                alg_impossible but retain useful information.  */
2495             return;
2496
2497           cache_hit = true;
2498
2499           switch (cache_alg)
2500             {
2501             case alg_shift:
2502               goto do_alg_shift;
2503
2504             case alg_add_t_m2:
2505             case alg_sub_t_m2:
2506               goto do_alg_addsub_t_m2;
2507
2508             case alg_add_factor:
2509             case alg_sub_factor:
2510               goto do_alg_addsub_factor;
2511
2512             case alg_add_t2_m:
2513               goto do_alg_add_t2_m;
2514
2515             case alg_sub_t2_m:
2516               goto do_alg_sub_t2_m;
2517
2518             default:
2519               gcc_unreachable ();
2520             }
2521         }
2522     }
2523
2524   /* If we have a group of zero bits at the low-order part of T, try
2525      multiplying by the remaining bits and then doing a shift.  */
2526
2527   if ((t & 1) == 0)
2528     {
2529     do_alg_shift:
2530       m = floor_log2 (t & -t);  /* m = number of low zero bits */
2531       if (m < maxm)
2532         {
2533           q = t >> m;
2534           /* The function expand_shift will choose between a shift and
2535              a sequence of additions, so the observed cost is given as
2536              MIN (m * add_cost[mode], shift_cost[mode][m]).  */
2537           op_cost = m * add_cost[mode];
2538           if (shift_cost[mode][m] < op_cost)
2539             op_cost = shift_cost[mode][m];
2540           new_limit.cost = best_cost.cost - op_cost;
2541           new_limit.latency = best_cost.latency - op_cost;
2542           synth_mult (alg_in, q, &new_limit, mode);
2543
2544           alg_in->cost.cost += op_cost;
2545           alg_in->cost.latency += op_cost;
2546           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2547             {
2548               struct algorithm *x;
2549               best_cost = alg_in->cost;
2550               x = alg_in, alg_in = best_alg, best_alg = x;
2551               best_alg->log[best_alg->ops] = m;
2552               best_alg->op[best_alg->ops] = alg_shift;
2553             }
2554         }
2555       if (cache_hit)
2556         goto done;
2557     }
2558
2559   /* If we have an odd number, add or subtract one.  */
2560   if ((t & 1) != 0)
2561     {
2562       unsigned HOST_WIDE_INT w;
2563
2564     do_alg_addsub_t_m2:
2565       for (w = 1; (w & t) != 0; w <<= 1)
2566         ;
2567       /* If T was -1, then W will be zero after the loop.  This is another
2568          case where T ends with ...111.  Handling this with (T + 1) and
2569          subtract 1 produces slightly better code and results in algorithm
2570          selection much faster than treating it like the ...0111 case
2571          below.  */
2572       if (w == 0
2573           || (w > 2
2574               /* Reject the case where t is 3.
2575                  Thus we prefer addition in that case.  */
2576               && t != 3))
2577         {
2578           /* T ends with ...111.  Multiply by (T + 1) and subtract 1.  */
2579
2580           op_cost = add_cost[mode];
2581           new_limit.cost = best_cost.cost - op_cost;
2582           new_limit.latency = best_cost.latency - op_cost;
2583           synth_mult (alg_in, t + 1, &new_limit, mode);
2584
2585           alg_in->cost.cost += op_cost;
2586           alg_in->cost.latency += op_cost;
2587           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2588             {
2589               struct algorithm *x;
2590               best_cost = alg_in->cost;
2591               x = alg_in, alg_in = best_alg, best_alg = x;
2592               best_alg->log[best_alg->ops] = 0;
2593               best_alg->op[best_alg->ops] = alg_sub_t_m2;
2594             }
2595         }
2596       else
2597         {
2598           /* T ends with ...01 or ...011.  Multiply by (T - 1) and add 1.  */
2599
2600           op_cost = add_cost[mode];
2601           new_limit.cost = best_cost.cost - op_cost;
2602           new_limit.latency = best_cost.latency - op_cost;
2603           synth_mult (alg_in, t - 1, &new_limit, mode);
2604
2605           alg_in->cost.cost += op_cost;
2606           alg_in->cost.latency += op_cost;
2607           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2608             {
2609               struct algorithm *x;
2610               best_cost = alg_in->cost;
2611               x = alg_in, alg_in = best_alg, best_alg = x;
2612               best_alg->log[best_alg->ops] = 0;
2613               best_alg->op[best_alg->ops] = alg_add_t_m2;
2614             }
2615         }
2616       if (cache_hit)
2617         goto done;
2618     }
2619
2620   /* Look for factors of t of the form
2621      t = q(2**m +- 1), 2 <= m <= floor(log2(t - 1)).
2622      If we find such a factor, we can multiply by t using an algorithm that
2623      multiplies by q, shift the result by m and add/subtract it to itself.
2624
2625      We search for large factors first and loop down, even if large factors
2626      are less probable than small; if we find a large factor we will find a
2627      good sequence quickly, and therefore be able to prune (by decreasing
2628      COST_LIMIT) the search.  */
2629
2630  do_alg_addsub_factor:
2631   for (m = floor_log2 (t - 1); m >= 2; m--)
2632     {
2633       unsigned HOST_WIDE_INT d;
2634
2635       d = ((unsigned HOST_WIDE_INT) 1 << m) + 1;
2636       if (t % d == 0 && t > d && m < maxm
2637           && (!cache_hit || cache_alg == alg_add_factor))
2638         {
2639           /* If the target has a cheap shift-and-add instruction use
2640              that in preference to a shift insn followed by an add insn.
2641              Assume that the shift-and-add is "atomic" with a latency
2642              equal to its cost, otherwise assume that on superscalar
2643              hardware the shift may be executed concurrently with the
2644              earlier steps in the algorithm.  */
2645           op_cost = add_cost[mode] + shift_cost[mode][m];
2646           if (shiftadd_cost[mode][m] < op_cost)
2647             {
2648               op_cost = shiftadd_cost[mode][m];
2649               op_latency = op_cost;
2650             }
2651           else
2652             op_latency = add_cost[mode];
2653
2654           new_limit.cost = best_cost.cost - op_cost;
2655           new_limit.latency = best_cost.latency - op_latency;
2656           synth_mult (alg_in, t / d, &new_limit, mode);
2657
2658           alg_in->cost.cost += op_cost;
2659           alg_in->cost.latency += op_latency;
2660           if (alg_in->cost.latency < op_cost)
2661             alg_in->cost.latency = op_cost;
2662           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2663             {
2664               struct algorithm *x;
2665               best_cost = alg_in->cost;
2666               x = alg_in, alg_in = best_alg, best_alg = x;
2667               best_alg->log[best_alg->ops] = m;
2668               best_alg->op[best_alg->ops] = alg_add_factor;
2669             }
2670           /* Other factors will have been taken care of in the recursion.  */
2671           break;
2672         }
2673
2674       d = ((unsigned HOST_WIDE_INT) 1 << m) - 1;
2675       if (t % d == 0 && t > d && m < maxm
2676           && (!cache_hit || cache_alg == alg_sub_factor))
2677         {
2678           /* If the target has a cheap shift-and-subtract insn use
2679              that in preference to a shift insn followed by a sub insn.
2680              Assume that the shift-and-sub is "atomic" with a latency
2681              equal to it's cost, otherwise assume that on superscalar
2682              hardware the shift may be executed concurrently with the
2683              earlier steps in the algorithm.  */
2684           op_cost = add_cost[mode] + shift_cost[mode][m];
2685           if (shiftsub_cost[mode][m] < op_cost)
2686             {
2687               op_cost = shiftsub_cost[mode][m];
2688               op_latency = op_cost;
2689             }
2690           else
2691             op_latency = add_cost[mode];
2692
2693           new_limit.cost = best_cost.cost - op_cost;
2694           new_limit.latency = best_cost.latency - op_latency;
2695           synth_mult (alg_in, t / d, &new_limit, mode);
2696
2697           alg_in->cost.cost += op_cost;
2698           alg_in->cost.latency += op_latency;
2699           if (alg_in->cost.latency < op_cost)
2700             alg_in->cost.latency = op_cost;
2701           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2702             {
2703               struct algorithm *x;
2704               best_cost = alg_in->cost;
2705               x = alg_in, alg_in = best_alg, best_alg = x;
2706               best_alg->log[best_alg->ops] = m;
2707               best_alg->op[best_alg->ops] = alg_sub_factor;
2708             }
2709           break;
2710         }
2711     }
2712   if (cache_hit)
2713     goto done;
2714
2715   /* Try shift-and-add (load effective address) instructions,
2716      i.e. do a*3, a*5, a*9.  */
2717   if ((t & 1) != 0)
2718     {
2719     do_alg_add_t2_m:
2720       q = t - 1;
2721       q = q & -q;
2722       m = exact_log2 (q);
2723       if (m >= 0 && m < maxm)
2724         {
2725           op_cost = shiftadd_cost[mode][m];
2726           new_limit.cost = best_cost.cost - op_cost;
2727           new_limit.latency = best_cost.latency - op_cost;
2728           synth_mult (alg_in, (t - 1) >> m, &new_limit, mode);
2729
2730           alg_in->cost.cost += op_cost;
2731           alg_in->cost.latency += op_cost;
2732           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2733             {
2734               struct algorithm *x;
2735               best_cost = alg_in->cost;
2736               x = alg_in, alg_in = best_alg, best_alg = x;
2737               best_alg->log[best_alg->ops] = m;
2738               best_alg->op[best_alg->ops] = alg_add_t2_m;
2739             }
2740         }
2741       if (cache_hit)
2742         goto done;
2743
2744     do_alg_sub_t2_m:
2745       q = t + 1;
2746       q = q & -q;
2747       m = exact_log2 (q);
2748       if (m >= 0 && m < maxm)
2749         {
2750           op_cost = shiftsub_cost[mode][m];
2751           new_limit.cost = best_cost.cost - op_cost;
2752           new_limit.latency = best_cost.latency - op_cost;
2753           synth_mult (alg_in, (t + 1) >> m, &new_limit, mode);
2754
2755           alg_in->cost.cost += op_cost;
2756           alg_in->cost.latency += op_cost;
2757           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2758             {
2759               struct algorithm *x;
2760               best_cost = alg_in->cost;
2761               x = alg_in, alg_in = best_alg, best_alg = x;
2762               best_alg->log[best_alg->ops] = m;
2763               best_alg->op[best_alg->ops] = alg_sub_t2_m;
2764             }
2765         }
2766       if (cache_hit)
2767         goto done;
2768     }
2769
2770  done:
2771   /* If best_cost has not decreased, we have not found any algorithm.  */
2772   if (!CHEAPER_MULT_COST (&best_cost, cost_limit))
2773     {
2774       /* We failed to find an algorithm.  Record alg_impossible for
2775          this case (that is, <T, MODE, COST_LIMIT>) so that next time
2776          we are asked to find an algorithm for T within the same or
2777          lower COST_LIMIT, we can immediately return to the
2778          caller.  */
2779       alg_hash[hash_index].t = t;
2780       alg_hash[hash_index].mode = mode;
2781       alg_hash[hash_index].alg = alg_impossible;
2782       alg_hash[hash_index].cost = *cost_limit;
2783       return;
2784     }
2785
2786   /* Cache the result.  */
2787   if (!cache_hit)
2788     {
2789       alg_hash[hash_index].t = t;
2790       alg_hash[hash_index].mode = mode;
2791       alg_hash[hash_index].alg = best_alg->op[best_alg->ops];
2792       alg_hash[hash_index].cost.cost = best_cost.cost;
2793       alg_hash[hash_index].cost.latency = best_cost.latency;
2794     }
2795
2796   /* If we are getting a too long sequence for `struct algorithm'
2797      to record, make this search fail.  */
2798   if (best_alg->ops == MAX_BITS_PER_WORD)
2799     return;
2800
2801   /* Copy the algorithm from temporary space to the space at alg_out.
2802      We avoid using structure assignment because the majority of
2803      best_alg is normally undefined, and this is a critical function.  */
2804   alg_out->ops = best_alg->ops + 1;
2805   alg_out->cost = best_cost;
2806   memcpy (alg_out->op, best_alg->op,
2807           alg_out->ops * sizeof *alg_out->op);
2808   memcpy (alg_out->log, best_alg->log,
2809           alg_out->ops * sizeof *alg_out->log);
2810 }
2811 \f
2812 /* Find the cheapest way of multiplying a value of mode MODE by VAL.
2813    Try three variations:
2814
2815        - a shift/add sequence based on VAL itself
2816        - a shift/add sequence based on -VAL, followed by a negation
2817        - a shift/add sequence based on VAL - 1, followed by an addition.
2818
2819    Return true if the cheapest of these cost less than MULT_COST,
2820    describing the algorithm in *ALG and final fixup in *VARIANT.  */
2821
2822 static bool
2823 choose_mult_variant (enum machine_mode mode, HOST_WIDE_INT val,
2824                      struct algorithm *alg, enum mult_variant *variant,
2825                      int mult_cost)
2826 {
2827   struct algorithm alg2;
2828   struct mult_cost limit;
2829   int op_cost;
2830
2831   *variant = basic_variant;
2832   limit.cost = mult_cost;
2833   limit.latency = mult_cost;
2834   synth_mult (alg, val, &limit, mode);
2835
2836   /* This works only if the inverted value actually fits in an
2837      `unsigned int' */
2838   if (HOST_BITS_PER_INT >= GET_MODE_BITSIZE (mode))
2839     {
2840       op_cost = neg_cost[mode];
2841       if (MULT_COST_LESS (&alg->cost, mult_cost))
2842         {
2843           limit.cost = alg->cost.cost - op_cost;
2844           limit.latency = alg->cost.latency - op_cost;
2845         }
2846       else
2847         {
2848           limit.cost = mult_cost - op_cost;
2849           limit.latency = mult_cost - op_cost;
2850         }
2851
2852       synth_mult (&alg2, -val, &limit, mode);
2853       alg2.cost.cost += op_cost;
2854       alg2.cost.latency += op_cost;
2855       if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
2856         *alg = alg2, *variant = negate_variant;
2857     }
2858
2859   /* This proves very useful for division-by-constant.  */
2860   op_cost = add_cost[mode];
2861   if (MULT_COST_LESS (&alg->cost, mult_cost))
2862     {
2863       limit.cost = alg->cost.cost - op_cost;
2864       limit.latency = alg->cost.latency - op_cost;
2865     }
2866   else
2867     {
2868       limit.cost = mult_cost - op_cost;
2869       limit.latency = mult_cost - op_cost;
2870     }
2871
2872   synth_mult (&alg2, val - 1, &limit, mode);
2873   alg2.cost.cost += op_cost;
2874   alg2.cost.latency += op_cost;
2875   if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
2876     *alg = alg2, *variant = add_variant;
2877
2878   return MULT_COST_LESS (&alg->cost, mult_cost);
2879 }
2880
2881 /* A subroutine of expand_mult, used for constant multiplications.
2882    Multiply OP0 by VAL in mode MODE, storing the result in TARGET if
2883    convenient.  Use the shift/add sequence described by ALG and apply
2884    the final fixup specified by VARIANT.  */
2885
2886 static rtx
2887 expand_mult_const (enum machine_mode mode, rtx op0, HOST_WIDE_INT val,
2888                    rtx target, const struct algorithm *alg,
2889                    enum mult_variant variant)
2890 {
2891   HOST_WIDE_INT val_so_far;
2892   rtx insn, accum, tem;
2893   int opno;
2894   enum machine_mode nmode;
2895
2896   /* Avoid referencing memory over and over.
2897      For speed, but also for correctness when mem is volatile.  */
2898   if (MEM_P (op0))
2899     op0 = force_reg (mode, op0);
2900
2901   /* ACCUM starts out either as OP0 or as a zero, depending on
2902      the first operation.  */
2903
2904   if (alg->op[0] == alg_zero)
2905     {
2906       accum = copy_to_mode_reg (mode, const0_rtx);
2907       val_so_far = 0;
2908     }
2909   else if (alg->op[0] == alg_m)
2910     {
2911       accum = copy_to_mode_reg (mode, op0);
2912       val_so_far = 1;
2913     }
2914   else
2915     gcc_unreachable ();
2916
2917   for (opno = 1; opno < alg->ops; opno++)
2918     {
2919       int log = alg->log[opno];
2920       rtx shift_subtarget = optimize ? 0 : accum;
2921       rtx add_target
2922         = (opno == alg->ops - 1 && target != 0 && variant != add_variant
2923            && !optimize)
2924           ? target : 0;
2925       rtx accum_target = optimize ? 0 : accum;
2926
2927       switch (alg->op[opno])
2928         {
2929         case alg_shift:
2930           accum = expand_shift (LSHIFT_EXPR, mode, accum,
2931                                 build_int_cst (NULL_TREE, log),
2932                                 NULL_RTX, 0);
2933           val_so_far <<= log;
2934           break;
2935
2936         case alg_add_t_m2:
2937           tem = expand_shift (LSHIFT_EXPR, mode, op0,
2938                               build_int_cst (NULL_TREE, log),
2939                               NULL_RTX, 0);
2940           accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
2941                                  add_target ? add_target : accum_target);
2942           val_so_far += (HOST_WIDE_INT) 1 << log;
2943           break;
2944
2945         case alg_sub_t_m2:
2946           tem = expand_shift (LSHIFT_EXPR, mode, op0,
2947                               build_int_cst (NULL_TREE, log),
2948                               NULL_RTX, 0);
2949           accum = force_operand (gen_rtx_MINUS (mode, accum, tem),
2950                                  add_target ? add_target : accum_target);
2951           val_so_far -= (HOST_WIDE_INT) 1 << log;
2952           break;
2953
2954         case alg_add_t2_m:
2955           accum = expand_shift (LSHIFT_EXPR, mode, accum,
2956                                 build_int_cst (NULL_TREE, log),
2957                                 shift_subtarget,
2958                                 0);
2959           accum = force_operand (gen_rtx_PLUS (mode, accum, op0),
2960                                  add_target ? add_target : accum_target);
2961           val_so_far = (val_so_far << log) + 1;
2962           break;
2963
2964         case alg_sub_t2_m:
2965           accum = expand_shift (LSHIFT_EXPR, mode, accum,
2966                                 build_int_cst (NULL_TREE, log),
2967                                 shift_subtarget, 0);
2968           accum = force_operand (gen_rtx_MINUS (mode, accum, op0),
2969                                  add_target ? add_target : accum_target);
2970           val_so_far = (val_so_far << log) - 1;
2971           break;
2972
2973         case alg_add_factor:
2974           tem = expand_shift (LSHIFT_EXPR, mode, accum,
2975                               build_int_cst (NULL_TREE, log),
2976                               NULL_RTX, 0);
2977           accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
2978                                  add_target ? add_target : accum_target);
2979           val_so_far += val_so_far << log;
2980           break;
2981
2982         case alg_sub_factor:
2983           tem = expand_shift (LSHIFT_EXPR, mode, accum,
2984                               build_int_cst (NULL_TREE, log),
2985                               NULL_RTX, 0);
2986           accum = force_operand (gen_rtx_MINUS (mode, tem, accum),
2987                                  (add_target
2988                                   ? add_target : (optimize ? 0 : tem)));
2989           val_so_far = (val_so_far << log) - val_so_far;
2990           break;
2991
2992         default:
2993           gcc_unreachable ();
2994         }
2995
2996       /* Write a REG_EQUAL note on the last insn so that we can cse
2997          multiplication sequences.  Note that if ACCUM is a SUBREG,
2998          we've set the inner register and must properly indicate
2999          that.  */
3000
3001       tem = op0, nmode = mode;
3002       if (GET_CODE (accum) == SUBREG)
3003         {
3004           nmode = GET_MODE (SUBREG_REG (accum));
3005           tem = gen_lowpart (nmode, op0);
3006         }
3007
3008       insn = get_last_insn ();
3009       set_unique_reg_note (insn, REG_EQUAL,
3010                            gen_rtx_MULT (nmode, tem, GEN_INT (val_so_far)));
3011     }
3012
3013   if (variant == negate_variant)
3014     {
3015       val_so_far = -val_so_far;
3016       accum = expand_unop (mode, neg_optab, accum, target, 0);
3017     }
3018   else if (variant == add_variant)
3019     {
3020       val_so_far = val_so_far + 1;
3021       accum = force_operand (gen_rtx_PLUS (mode, accum, op0), target);
3022     }
3023
3024   /* Compare only the bits of val and val_so_far that are significant
3025      in the result mode, to avoid sign-/zero-extension confusion.  */
3026   val &= GET_MODE_MASK (mode);
3027   val_so_far &= GET_MODE_MASK (mode);
3028   gcc_assert (val == val_so_far);
3029
3030   return accum;
3031 }
3032
3033 /* Perform a multiplication and return an rtx for the result.
3034    MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3035    TARGET is a suggestion for where to store the result (an rtx).
3036
3037    We check specially for a constant integer as OP1.
3038    If you want this check for OP0 as well, then before calling
3039    you should swap the two operands if OP0 would be constant.  */
3040
3041 rtx
3042 expand_mult (enum machine_mode mode, rtx op0, rtx op1, rtx target,
3043              int unsignedp)
3044 {
3045   enum mult_variant variant;
3046   struct algorithm algorithm;
3047   int max_cost;
3048
3049   /* Handling const0_rtx here allows us to use zero as a rogue value for
3050      coeff below.  */
3051   if (op1 == const0_rtx)
3052     return const0_rtx;
3053   if (op1 == const1_rtx)
3054     return op0;
3055   if (op1 == constm1_rtx)
3056     return expand_unop (mode,
3057                         GET_MODE_CLASS (mode) == MODE_INT
3058                         && !unsignedp && flag_trapv
3059                         ? negv_optab : neg_optab,
3060                         op0, target, 0);
3061
3062   /* These are the operations that are potentially turned into a sequence
3063      of shifts and additions.  */
3064   if (SCALAR_INT_MODE_P (mode)
3065       && (unsignedp || !flag_trapv))
3066     {
3067       HOST_WIDE_INT coeff = 0;
3068       rtx fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3069
3070       /* synth_mult does an `unsigned int' multiply.  As long as the mode is
3071          less than or equal in size to `unsigned int' this doesn't matter.
3072          If the mode is larger than `unsigned int', then synth_mult works
3073          only if the constant value exactly fits in an `unsigned int' without
3074          any truncation.  This means that multiplying by negative values does
3075          not work; results are off by 2^32 on a 32 bit machine.  */
3076
3077       if (GET_CODE (op1) == CONST_INT)
3078         {
3079           /* Attempt to handle multiplication of DImode values by negative
3080              coefficients, by performing the multiplication by a positive
3081              multiplier and then inverting the result.  */
3082           if (INTVAL (op1) < 0
3083               && GET_MODE_BITSIZE (mode) > HOST_BITS_PER_WIDE_INT)
3084             {
3085               /* Its safe to use -INTVAL (op1) even for INT_MIN, as the
3086                  result is interpreted as an unsigned coefficient.
3087                  Exclude cost of op0 from max_cost to match the cost
3088                  calculation of the synth_mult.  */
3089               max_cost = rtx_cost (gen_rtx_MULT (mode, fake_reg, op1), SET)
3090                          - neg_cost[mode];
3091               if (max_cost > 0
3092                   && choose_mult_variant (mode, -INTVAL (op1), &algorithm,
3093                                           &variant, max_cost))
3094                 {
3095                   rtx temp = expand_mult_const (mode, op0, -INTVAL (op1),
3096                                                 NULL_RTX, &algorithm,
3097                                                 variant);
3098                   return expand_unop (mode, neg_optab, temp, target, 0);
3099                 }
3100             }
3101           else coeff = INTVAL (op1);
3102         }
3103       else if (GET_CODE (op1) == CONST_DOUBLE)
3104         {
3105           /* If we are multiplying in DImode, it may still be a win
3106              to try to work with shifts and adds.  */
3107           if (CONST_DOUBLE_HIGH (op1) == 0)
3108             coeff = CONST_DOUBLE_LOW (op1);
3109           else if (CONST_DOUBLE_LOW (op1) == 0
3110                    && EXACT_POWER_OF_2_OR_ZERO_P (CONST_DOUBLE_HIGH (op1)))
3111             {
3112               int shift = floor_log2 (CONST_DOUBLE_HIGH (op1))
3113                           + HOST_BITS_PER_WIDE_INT;
3114               return expand_shift (LSHIFT_EXPR, mode, op0,
3115                                    build_int_cst (NULL_TREE, shift),
3116                                    target, unsignedp);
3117             }
3118         }
3119
3120       /* We used to test optimize here, on the grounds that it's better to
3121          produce a smaller program when -O is not used.  But this causes
3122          such a terrible slowdown sometimes that it seems better to always
3123          use synth_mult.  */
3124       if (coeff != 0)
3125         {
3126           /* Special case powers of two.  */
3127           if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3128             return expand_shift (LSHIFT_EXPR, mode, op0,
3129                                  build_int_cst (NULL_TREE, floor_log2 (coeff)),
3130                                  target, unsignedp);
3131
3132           /* Exclude cost of op0 from max_cost to match the cost
3133              calculation of the synth_mult.  */
3134           max_cost = rtx_cost (gen_rtx_MULT (mode, fake_reg, op1), SET);
3135           if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3136                                    max_cost))
3137             return expand_mult_const (mode, op0, coeff, target,
3138                                       &algorithm, variant);
3139         }
3140     }
3141
3142   if (GET_CODE (op0) == CONST_DOUBLE)
3143     {
3144       rtx temp = op0;
3145       op0 = op1;
3146       op1 = temp;
3147     }
3148
3149   /* Expand x*2.0 as x+x.  */
3150   if (GET_CODE (op1) == CONST_DOUBLE
3151       && GET_MODE_CLASS (mode) == MODE_FLOAT)
3152     {
3153       REAL_VALUE_TYPE d;
3154       REAL_VALUE_FROM_CONST_DOUBLE (d, op1);
3155
3156       if (REAL_VALUES_EQUAL (d, dconst2))
3157         {
3158           op0 = force_reg (GET_MODE (op0), op0);
3159           return expand_binop (mode, add_optab, op0, op0,
3160                                target, unsignedp, OPTAB_LIB_WIDEN);
3161         }
3162     }
3163
3164   /* This used to use umul_optab if unsigned, but for non-widening multiply
3165      there is no difference between signed and unsigned.  */
3166   op0 = expand_binop (mode,
3167                       ! unsignedp
3168                       && flag_trapv && (GET_MODE_CLASS(mode) == MODE_INT)
3169                       ? smulv_optab : smul_optab,
3170                       op0, op1, target, unsignedp, OPTAB_LIB_WIDEN);
3171   gcc_assert (op0);
3172   return op0;
3173 }
3174 \f
3175 /* Return the smallest n such that 2**n >= X.  */
3176
3177 int
3178 ceil_log2 (unsigned HOST_WIDE_INT x)
3179 {
3180   return floor_log2 (x - 1) + 1;
3181 }
3182
3183 /* Choose a minimal N + 1 bit approximation to 1/D that can be used to
3184    replace division by D, and put the least significant N bits of the result
3185    in *MULTIPLIER_PTR and return the most significant bit.
3186
3187    The width of operations is N (should be <= HOST_BITS_PER_WIDE_INT), the
3188    needed precision is in PRECISION (should be <= N).
3189
3190    PRECISION should be as small as possible so this function can choose
3191    multiplier more freely.
3192
3193    The rounded-up logarithm of D is placed in *lgup_ptr.  A shift count that
3194    is to be used for a final right shift is placed in *POST_SHIFT_PTR.
3195
3196    Using this function, x/D will be equal to (x * m) >> (*POST_SHIFT_PTR),
3197    where m is the full HOST_BITS_PER_WIDE_INT + 1 bit multiplier.  */
3198
3199 static
3200 unsigned HOST_WIDE_INT
3201 choose_multiplier (unsigned HOST_WIDE_INT d, int n, int precision,
3202                    rtx *multiplier_ptr, int *post_shift_ptr, int *lgup_ptr)
3203 {
3204   HOST_WIDE_INT mhigh_hi, mlow_hi;
3205   unsigned HOST_WIDE_INT mhigh_lo, mlow_lo;
3206   int lgup, post_shift;
3207   int pow, pow2;
3208   unsigned HOST_WIDE_INT nl, dummy1;
3209   HOST_WIDE_INT nh, dummy2;
3210
3211   /* lgup = ceil(log2(divisor)); */
3212   lgup = ceil_log2 (d);
3213
3214   gcc_assert (lgup <= n);
3215
3216   pow = n + lgup;
3217   pow2 = n + lgup - precision;
3218
3219   /* We could handle this with some effort, but this case is much
3220      better handled directly with a scc insn, so rely on caller using
3221      that.  */
3222   gcc_assert (pow != 2 * HOST_BITS_PER_WIDE_INT);
3223
3224   /* mlow = 2^(N + lgup)/d */
3225  if (pow >= HOST_BITS_PER_WIDE_INT)
3226     {
3227       nh = (HOST_WIDE_INT) 1 << (pow - HOST_BITS_PER_WIDE_INT);
3228       nl = 0;
3229     }
3230   else
3231     {
3232       nh = 0;
3233       nl = (unsigned HOST_WIDE_INT) 1 << pow;
3234     }
3235   div_and_round_double (TRUNC_DIV_EXPR, 1, nl, nh, d, (HOST_WIDE_INT) 0,
3236                         &mlow_lo, &mlow_hi, &dummy1, &dummy2);
3237
3238   /* mhigh = (2^(N + lgup) + 2^N + lgup - precision)/d */
3239   if (pow2 >= HOST_BITS_PER_WIDE_INT)
3240     nh |= (HOST_WIDE_INT) 1 << (pow2 - HOST_BITS_PER_WIDE_INT);
3241   else
3242     nl |= (unsigned HOST_WIDE_INT) 1 << pow2;
3243   div_and_round_double (TRUNC_DIV_EXPR, 1, nl, nh, d, (HOST_WIDE_INT) 0,
3244                         &mhigh_lo, &mhigh_hi, &dummy1, &dummy2);
3245
3246   gcc_assert (!mhigh_hi || nh - d < d);
3247   gcc_assert (mhigh_hi <= 1 && mlow_hi <= 1);
3248   /* Assert that mlow < mhigh.  */
3249   gcc_assert (mlow_hi < mhigh_hi
3250               || (mlow_hi == mhigh_hi && mlow_lo < mhigh_lo));
3251
3252   /* If precision == N, then mlow, mhigh exceed 2^N
3253      (but they do not exceed 2^(N+1)).  */
3254
3255   /* Reduce to lowest terms.  */
3256   for (post_shift = lgup; post_shift > 0; post_shift--)
3257     {
3258       unsigned HOST_WIDE_INT ml_lo = (mlow_hi << (HOST_BITS_PER_WIDE_INT - 1)) | (mlow_lo >> 1);
3259       unsigned HOST_WIDE_INT mh_lo = (mhigh_hi << (HOST_BITS_PER_WIDE_INT - 1)) | (mhigh_lo >> 1);
3260       if (ml_lo >= mh_lo)
3261         break;
3262
3263       mlow_hi = 0;
3264       mlow_lo = ml_lo;
3265       mhigh_hi = 0;
3266       mhigh_lo = mh_lo;
3267     }
3268
3269   *post_shift_ptr = post_shift;
3270   *lgup_ptr = lgup;
3271   if (n < HOST_BITS_PER_WIDE_INT)
3272     {
3273       unsigned HOST_WIDE_INT mask = ((unsigned HOST_WIDE_INT) 1 << n) - 1;
3274       *multiplier_ptr = GEN_INT (mhigh_lo & mask);
3275       return mhigh_lo >= mask;
3276     }
3277   else
3278     {
3279       *multiplier_ptr = GEN_INT (mhigh_lo);
3280       return mhigh_hi;
3281     }
3282 }
3283
3284 /* Compute the inverse of X mod 2**n, i.e., find Y such that X * Y is
3285    congruent to 1 (mod 2**N).  */
3286
3287 static unsigned HOST_WIDE_INT
3288 invert_mod2n (unsigned HOST_WIDE_INT x, int n)
3289 {
3290   /* Solve x*y == 1 (mod 2^n), where x is odd.  Return y.  */
3291
3292   /* The algorithm notes that the choice y = x satisfies
3293      x*y == 1 mod 2^3, since x is assumed odd.
3294      Each iteration doubles the number of bits of significance in y.  */
3295
3296   unsigned HOST_WIDE_INT mask;
3297   unsigned HOST_WIDE_INT y = x;
3298   int nbit = 3;
3299
3300   mask = (n == HOST_BITS_PER_WIDE_INT
3301           ? ~(unsigned HOST_WIDE_INT) 0
3302           : ((unsigned HOST_WIDE_INT) 1 << n) - 1);
3303
3304   while (nbit < n)
3305     {
3306       y = y * (2 - x*y) & mask;         /* Modulo 2^N */
3307       nbit *= 2;
3308     }
3309   return y;
3310 }
3311
3312 /* Emit code to adjust ADJ_OPERAND after multiplication of wrong signedness
3313    flavor of OP0 and OP1.  ADJ_OPERAND is already the high half of the
3314    product OP0 x OP1.  If UNSIGNEDP is nonzero, adjust the signed product
3315    to become unsigned, if UNSIGNEDP is zero, adjust the unsigned product to
3316    become signed.
3317
3318    The result is put in TARGET if that is convenient.
3319
3320    MODE is the mode of operation.  */
3321
3322 rtx
3323 expand_mult_highpart_adjust (enum machine_mode mode, rtx adj_operand, rtx op0,
3324                              rtx op1, rtx target, int unsignedp)
3325 {
3326   rtx tem;
3327   enum rtx_code adj_code = unsignedp ? PLUS : MINUS;
3328
3329   tem = expand_shift (RSHIFT_EXPR, mode, op0,
3330                       build_int_cst (NULL_TREE, GET_MODE_BITSIZE (mode) - 1),
3331                       NULL_RTX, 0);
3332   tem = expand_and (mode, tem, op1, NULL_RTX);
3333   adj_operand
3334     = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3335                      adj_operand);
3336
3337   tem = expand_shift (RSHIFT_EXPR, mode, op1,
3338                       build_int_cst (NULL_TREE, GET_MODE_BITSIZE (mode) - 1),
3339                       NULL_RTX, 0);
3340   tem = expand_and (mode, tem, op0, NULL_RTX);
3341   target = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3342                           target);
3343
3344   return target;
3345 }
3346
3347 /* Subroutine of expand_mult_highpart.  Return the MODE high part of OP.  */
3348
3349 static rtx
3350 extract_high_half (enum machine_mode mode, rtx op)
3351 {
3352   enum machine_mode wider_mode;
3353
3354   if (mode == word_mode)
3355     return gen_highpart (mode, op);
3356
3357   wider_mode = GET_MODE_WIDER_MODE (mode);
3358   op = expand_shift (RSHIFT_EXPR, wider_mode, op,
3359                      build_int_cst (NULL_TREE, GET_MODE_BITSIZE (mode)), 0, 1);
3360   return convert_modes (mode, wider_mode, op, 0);
3361 }
3362
3363 /* Like expand_mult_highpart, but only consider using a multiplication
3364    optab.  OP1 is an rtx for the constant operand.  */
3365
3366 static rtx
3367 expand_mult_highpart_optab (enum machine_mode mode, rtx op0, rtx op1,
3368                             rtx target, int unsignedp, int max_cost)
3369 {
3370   rtx narrow_op1 = gen_int_mode (INTVAL (op1), mode);
3371   enum machine_mode wider_mode;
3372   optab moptab;
3373   rtx tem;
3374   int size;
3375
3376   wider_mode = GET_MODE_WIDER_MODE (mode);
3377   size = GET_MODE_BITSIZE (mode);
3378
3379   /* Firstly, try using a multiplication insn that only generates the needed
3380      high part of the product, and in the sign flavor of unsignedp.  */
3381   if (mul_highpart_cost[mode] < max_cost)
3382     {
3383       moptab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
3384       tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3385                           unsignedp, OPTAB_DIRECT);
3386       if (tem)
3387         return tem;
3388     }
3389
3390   /* Secondly, same as above, but use sign flavor opposite of unsignedp.
3391      Need to adjust the result after the multiplication.  */
3392   if (size - 1 < BITS_PER_WORD
3393       && (mul_highpart_cost[mode] + 2 * shift_cost[mode][size-1]
3394           + 4 * add_cost[mode] < max_cost))
3395     {
3396       moptab = unsignedp ? smul_highpart_optab : umul_highpart_optab;
3397       tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3398                           unsignedp, OPTAB_DIRECT);
3399       if (tem)
3400         /* We used the wrong signedness.  Adjust the result.  */
3401         return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3402                                             tem, unsignedp);
3403     }
3404
3405   /* Try widening multiplication.  */
3406   moptab = unsignedp ? umul_widen_optab : smul_widen_optab;
3407   if (moptab->handlers[wider_mode].insn_code != CODE_FOR_nothing
3408       && mul_widen_cost[wider_mode] < max_cost)
3409     {
3410       tem = expand_binop (wider_mode, moptab, op0, narrow_op1, 0,
3411                           unsignedp, OPTAB_WIDEN);
3412       if (tem)
3413         return extract_high_half (mode, tem);
3414     }
3415
3416   /* Try widening the mode and perform a non-widening multiplication.  */
3417   if (smul_optab->handlers[wider_mode].insn_code != CODE_FOR_nothing
3418       && size - 1 < BITS_PER_WORD
3419       && mul_cost[wider_mode] + shift_cost[mode][size-1] < max_cost)
3420     {
3421       rtx insns, wop0, wop1;
3422
3423       /* We need to widen the operands, for example to ensure the
3424          constant multiplier is correctly sign or zero extended.
3425          Use a sequence to clean-up any instructions emitted by
3426          the conversions if things don't work out.  */
3427       start_sequence ();
3428       wop0 = convert_modes (wider_mode, mode, op0, unsignedp);
3429       wop1 = convert_modes (wider_mode, mode, op1, unsignedp);
3430       tem = expand_binop (wider_mode, smul_optab, wop0, wop1, 0,
3431                           unsignedp, OPTAB_WIDEN);
3432       insns = get_insns ();
3433       end_sequence ();
3434
3435       if (tem)
3436         {
3437           emit_insn (insns);
3438           return extract_high_half (mode, tem);
3439         }
3440     }
3441
3442   /* Try widening multiplication of opposite signedness, and adjust.  */
3443   moptab = unsignedp ? smul_widen_optab : umul_widen_optab;
3444   if (moptab->handlers[wider_mode].insn_code != CODE_FOR_nothing
3445       && size - 1 < BITS_PER_WORD
3446       && (mul_widen_cost[wider_mode] + 2 * shift_cost[mode][size-1]
3447           + 4 * add_cost[mode] < max_cost))
3448     {
3449       tem = expand_binop (wider_mode, moptab, op0, narrow_op1,
3450                           NULL_RTX, ! unsignedp, OPTAB_WIDEN);
3451       if (tem != 0)
3452         {
3453           tem = extract_high_half (mode, tem);
3454           /* We used the wrong signedness.  Adjust the result.  */
3455           return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3456                                               target, unsignedp);
3457         }
3458     }
3459
3460   return 0;
3461 }
3462
3463 /* Emit code to multiply OP0 and OP1 (where OP1 is an integer constant),
3464    putting the high half of the result in TARGET if that is convenient,
3465    and return where the result is.  If the operation can not be performed,
3466    0 is returned.
3467
3468    MODE is the mode of operation and result.
3469
3470    UNSIGNEDP nonzero means unsigned multiply.
3471
3472    MAX_COST is the total allowed cost for the expanded RTL.  */
3473
3474 static rtx
3475 expand_mult_highpart (enum machine_mode mode, rtx op0, rtx op1,
3476                       rtx target, int unsignedp, int max_cost)
3477 {
3478   enum machine_mode wider_mode = GET_MODE_WIDER_MODE (mode);
3479   unsigned HOST_WIDE_INT cnst1;
3480   int extra_cost;
3481   bool sign_adjust = false;
3482   enum mult_variant variant;
3483   struct algorithm alg;
3484   rtx tem;
3485
3486   /* We can't support modes wider than HOST_BITS_PER_INT.  */
3487   gcc_assert (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT);
3488
3489   cnst1 = INTVAL (op1) & GET_MODE_MASK (mode);
3490
3491   /* We can't optimize modes wider than BITS_PER_WORD.
3492      ??? We might be able to perform double-word arithmetic if
3493      mode == word_mode, however all the cost calculations in
3494      synth_mult etc. assume single-word operations.  */
3495   if (GET_MODE_BITSIZE (wider_mode) > BITS_PER_WORD)
3496     return expand_mult_highpart_optab (mode, op0, op1, target,
3497                                        unsignedp, max_cost);
3498
3499   extra_cost = shift_cost[mode][GET_MODE_BITSIZE (mode) - 1];
3500
3501   /* Check whether we try to multiply by a negative constant.  */
3502   if (!unsignedp && ((cnst1 >> (GET_MODE_BITSIZE (mode) - 1)) & 1))
3503     {
3504       sign_adjust = true;
3505       extra_cost += add_cost[mode];
3506     }
3507
3508   /* See whether shift/add multiplication is cheap enough.  */
3509   if (choose_mult_variant (wider_mode, cnst1, &alg, &variant,
3510                            max_cost - extra_cost))
3511     {
3512       /* See whether the specialized multiplication optabs are
3513          cheaper than the shift/add version.  */
3514       tem = expand_mult_highpart_optab (mode, op0, op1, target, unsignedp,
3515                                         alg.cost.cost + extra_cost);
3516       if (tem)
3517         return tem;
3518
3519       tem = convert_to_mode (wider_mode, op0, unsignedp);
3520       tem = expand_mult_const (wider_mode, tem, cnst1, 0, &alg, variant);
3521       tem = extract_high_half (mode, tem);
3522
3523       /* Adjust result for signedness.  */
3524       if (sign_adjust)
3525         tem = force_operand (gen_rtx_MINUS (mode, tem, op0), tem);
3526
3527       return tem;
3528     }
3529   return expand_mult_highpart_optab (mode, op0, op1, target,
3530                                      unsignedp, max_cost);
3531 }
3532
3533
3534 /* Expand signed modulus of OP0 by a power of two D in mode MODE.  */
3535
3536 static rtx
3537 expand_smod_pow2 (enum machine_mode mode, rtx op0, HOST_WIDE_INT d)
3538 {
3539   unsigned HOST_WIDE_INT masklow, maskhigh;
3540   rtx result, temp, shift, label;
3541   int logd;
3542
3543   logd = floor_log2 (d);
3544   result = gen_reg_rtx (mode);
3545
3546   /* Avoid conditional branches when they're expensive.  */
3547   if (BRANCH_COST >= 2
3548       && !optimize_size)
3549     {
3550       rtx signmask = emit_store_flag (result, LT, op0, const0_rtx,
3551                                       mode, 0, -1);
3552       if (signmask)
3553         {
3554           signmask = force_reg (mode, signmask);
3555           masklow = ((HOST_WIDE_INT) 1 << logd) - 1;
3556           shift = GEN_INT (GET_MODE_BITSIZE (mode) - logd);
3557
3558           /* Use the rtx_cost of a LSHIFTRT instruction to determine
3559              which instruction sequence to use.  If logical right shifts
3560              are expensive the use 2 XORs, 2 SUBs and an AND, otherwise
3561              use a LSHIFTRT, 1 ADD, 1 SUB and an AND.  */
3562
3563           temp = gen_rtx_LSHIFTRT (mode, result, shift);
3564           if (lshr_optab->handlers[mode].insn_code == CODE_FOR_nothing
3565               || rtx_cost (temp, SET) > COSTS_N_INSNS (2))
3566             {
3567               temp = expand_binop (mode, xor_optab, op0, signmask,
3568                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3569               temp = expand_binop (mode, sub_optab, temp, signmask,
3570                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3571               temp = expand_binop (mode, and_optab, temp, GEN_INT (masklow),
3572                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3573               temp = expand_binop (mode, xor_optab, temp, signmask,
3574                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3575               temp = expand_binop (mode, sub_optab, temp, signmask,
3576                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3577             }
3578           else
3579             {
3580               signmask = expand_binop (mode, lshr_optab, signmask, shift,
3581                                        NULL_RTX, 1, OPTAB_LIB_WIDEN);
3582               signmask = force_reg (mode, signmask);
3583
3584               temp = expand_binop (mode, add_optab, op0, signmask,
3585                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3586               temp = expand_binop (mode, and_optab, temp, GEN_INT (masklow),
3587                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3588               temp = expand_binop (mode, sub_optab, temp, signmask,
3589                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3590             }
3591           return temp;
3592         }
3593     }
3594
3595   /* Mask contains the mode's signbit and the significant bits of the
3596      modulus.  By including the signbit in the operation, many targets
3597      can avoid an explicit compare operation in the following comparison
3598      against zero.  */
3599
3600   masklow = ((HOST_WIDE_INT) 1 << logd) - 1;
3601   if (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
3602     {
3603       masklow |= (HOST_WIDE_INT) -1 << (GET_MODE_BITSIZE (mode) - 1);
3604       maskhigh = -1;
3605     }
3606   else
3607     maskhigh = (HOST_WIDE_INT) -1
3608                  << (GET_MODE_BITSIZE (mode) - HOST_BITS_PER_WIDE_INT - 1);
3609
3610   temp = expand_binop (mode, and_optab, op0,
3611                        immed_double_const (masklow, maskhigh, mode),
3612                        result, 1, OPTAB_LIB_WIDEN);
3613   if (temp != result)
3614     emit_move_insn (result, temp);
3615
3616   label = gen_label_rtx ();
3617   do_cmp_and_jump (result, const0_rtx, GE, mode, label);
3618
3619   temp = expand_binop (mode, sub_optab, result, const1_rtx, result,
3620                        0, OPTAB_LIB_WIDEN);
3621   masklow = (HOST_WIDE_INT) -1 << logd;
3622   maskhigh = -1;
3623   temp = expand_binop (mode, ior_optab, temp,
3624                        immed_double_const (masklow, maskhigh, mode),
3625                        result, 1, OPTAB_LIB_WIDEN);
3626   temp = expand_binop (mode, add_optab, temp, const1_rtx, result,
3627                        0, OPTAB_LIB_WIDEN);
3628   if (temp != result)
3629     emit_move_insn (result, temp);
3630   emit_label (label);
3631   return result;
3632 }
3633
3634 /* Expand signed division of OP0 by a power of two D in mode MODE.
3635    This routine is only called for positive values of D.  */
3636
3637 static rtx
3638 expand_sdiv_pow2 (enum machine_mode mode, rtx op0, HOST_WIDE_INT d)
3639 {
3640   rtx temp, label;
3641   tree shift;
3642   int logd;
3643
3644   logd = floor_log2 (d);
3645   shift = build_int_cst (NULL_TREE, logd);
3646
3647   if (d == 2 && BRANCH_COST >= 1)
3648     {
3649       temp = gen_reg_rtx (mode);
3650       temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, 1);
3651       temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3652                            0, OPTAB_LIB_WIDEN);
3653       return expand_shift (RSHIFT_EXPR, mode, temp, shift, NULL_RTX, 0);
3654     }
3655
3656 #ifdef HAVE_conditional_move
3657   if (BRANCH_COST >= 2)
3658     {
3659       rtx temp2;
3660
3661       /* ??? emit_conditional_move forces a stack adjustment via
3662          compare_from_rtx so, if the sequence is discarded, it will
3663          be lost.  Do it now instead.  */
3664       do_pending_stack_adjust ();
3665
3666       start_sequence ();
3667       temp2 = copy_to_mode_reg (mode, op0);
3668       temp = expand_binop (mode, add_optab, temp2, GEN_INT (d-1),
3669                            NULL_RTX, 0, OPTAB_LIB_WIDEN);
3670       temp = force_reg (mode, temp);
3671
3672       /* Construct "temp2 = (temp2 < 0) ? temp : temp2".  */
3673       temp2 = emit_conditional_move (temp2, LT, temp2, const0_rtx,
3674                                      mode, temp, temp2, mode, 0);
3675       if (temp2)
3676         {
3677           rtx seq = get_insns ();
3678           end_sequence ();
3679           emit_insn (seq);
3680           return expand_shift (RSHIFT_EXPR, mode, temp2, shift, NULL_RTX, 0);
3681         }
3682       end_sequence ();
3683     }
3684 #endif
3685
3686   if (BRANCH_COST >= 2)
3687     {
3688       int ushift = GET_MODE_BITSIZE (mode) - logd;
3689
3690       temp = gen_reg_rtx (mode);
3691       temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, -1);
3692       if (shift_cost[mode][ushift] > COSTS_N_INSNS (1))
3693         temp = expand_binop (mode, and_optab, temp, GEN_INT (d - 1),
3694                              NULL_RTX, 0, OPTAB_LIB_WIDEN);
3695       else
3696         temp = expand_shift (RSHIFT_EXPR, mode, temp,
3697                              build_int_cst (NULL_TREE, ushift),
3698                              NULL_RTX, 1);
3699       temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3700                            0, OPTAB_LIB_WIDEN);
3701       return expand_shift (RSHIFT_EXPR, mode, temp, shift, NULL_RTX, 0);
3702     }
3703
3704   label = gen_label_rtx ();
3705   temp = copy_to_mode_reg (mode, op0);
3706   do_cmp_and_jump (temp, const0_rtx, GE, mode, label);
3707   expand_inc (temp, GEN_INT (d - 1));
3708   emit_label (label);
3709   return expand_shift (RSHIFT_EXPR, mode, temp, shift, NULL_RTX, 0);
3710 }
3711 \f
3712 /* Emit the code to divide OP0 by OP1, putting the result in TARGET
3713    if that is convenient, and returning where the result is.
3714    You may request either the quotient or the remainder as the result;
3715    specify REM_FLAG nonzero to get the remainder.
3716
3717    CODE is the expression code for which kind of division this is;
3718    it controls how rounding is done.  MODE is the machine mode to use.
3719    UNSIGNEDP nonzero means do unsigned division.  */
3720
3721 /* ??? For CEIL_MOD_EXPR, can compute incorrect remainder with ANDI
3722    and then correct it by or'ing in missing high bits
3723    if result of ANDI is nonzero.
3724    For ROUND_MOD_EXPR, can use ANDI and then sign-extend the result.
3725    This could optimize to a bfexts instruction.
3726    But C doesn't use these operations, so their optimizations are
3727    left for later.  */
3728 /* ??? For modulo, we don't actually need the highpart of the first product,
3729    the low part will do nicely.  And for small divisors, the second multiply
3730    can also be a low-part only multiply or even be completely left out.
3731    E.g. to calculate the remainder of a division by 3 with a 32 bit
3732    multiply, multiply with 0x55555556 and extract the upper two bits;
3733    the result is exact for inputs up to 0x1fffffff.
3734    The input range can be reduced by using cross-sum rules.
3735    For odd divisors >= 3, the following table gives right shift counts
3736    so that if a number is shifted by an integer multiple of the given
3737    amount, the remainder stays the same:
3738    2, 4, 3, 6, 10, 12, 4, 8, 18, 6, 11, 20, 18, 0, 5, 10, 12, 0, 12, 20,
3739    14, 12, 23, 21, 8, 0, 20, 18, 0, 0, 6, 12, 0, 22, 0, 18, 20, 30, 0, 0,
3740    0, 8, 0, 11, 12, 10, 36, 0, 30, 0, 0, 12, 0, 0, 0, 0, 44, 12, 24, 0,
3741    20, 0, 7, 14, 0, 18, 36, 0, 0, 46, 60, 0, 42, 0, 15, 24, 20, 0, 0, 33,
3742    0, 20, 0, 0, 18, 0, 60, 0, 0, 0, 0, 0, 40, 18, 0, 0, 12
3743
3744    Cross-sum rules for even numbers can be derived by leaving as many bits
3745    to the right alone as the divisor has zeros to the right.
3746    E.g. if x is an unsigned 32 bit number:
3747    (x mod 12) == (((x & 1023) + ((x >> 8) & ~3)) * 0x15555558 >> 2 * 3) >> 28
3748    */
3749
3750 rtx
3751 expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode,
3752                rtx op0, rtx op1, rtx target, int unsignedp)
3753 {
3754   enum machine_mode compute_mode;
3755   rtx tquotient;
3756   rtx quotient = 0, remainder = 0;
3757   rtx last;
3758   int size;
3759   rtx insn, set;
3760   optab optab1, optab2;
3761   int op1_is_constant, op1_is_pow2 = 0;
3762   int max_cost, extra_cost;
3763   static HOST_WIDE_INT last_div_const = 0;
3764   static HOST_WIDE_INT ext_op1;
3765
3766   op1_is_constant = GET_CODE (op1) == CONST_INT;
3767   if (op1_is_constant)
3768     {
3769       ext_op1 = INTVAL (op1);
3770       if (unsignedp)
3771         ext_op1 &= GET_MODE_MASK (mode);
3772       op1_is_pow2 = ((EXACT_POWER_OF_2_OR_ZERO_P (ext_op1)
3773                      || (! unsignedp && EXACT_POWER_OF_2_OR_ZERO_P (-ext_op1))));
3774     }
3775
3776   /*
3777      This is the structure of expand_divmod:
3778
3779      First comes code to fix up the operands so we can perform the operations
3780      correctly and efficiently.
3781
3782      Second comes a switch statement with code specific for each rounding mode.
3783      For some special operands this code emits all RTL for the desired
3784      operation, for other cases, it generates only a quotient and stores it in
3785      QUOTIENT.  The case for trunc division/remainder might leave quotient = 0,
3786      to indicate that it has not done anything.
3787
3788      Last comes code that finishes the operation.  If QUOTIENT is set and
3789      REM_FLAG is set, the remainder is computed as OP0 - QUOTIENT * OP1.  If
3790      QUOTIENT is not set, it is computed using trunc rounding.
3791
3792      We try to generate special code for division and remainder when OP1 is a
3793      constant.  If |OP1| = 2**n we can use shifts and some other fast
3794      operations.  For other values of OP1, we compute a carefully selected
3795      fixed-point approximation m = 1/OP1, and generate code that multiplies OP0
3796      by m.
3797
3798      In all cases but EXACT_DIV_EXPR, this multiplication requires the upper
3799      half of the product.  Different strategies for generating the product are
3800      implemented in expand_mult_highpart.
3801
3802      If what we actually want is the remainder, we generate that by another
3803      by-constant multiplication and a subtraction.  */
3804
3805   /* We shouldn't be called with OP1 == const1_rtx, but some of the
3806      code below will malfunction if we are, so check here and handle
3807      the special case if so.  */
3808   if (op1 == const1_rtx)
3809     return rem_flag ? const0_rtx : op0;
3810
3811     /* When dividing by -1, we could get an overflow.
3812      negv_optab can handle overflows.  */
3813   if (! unsignedp && op1 == constm1_rtx)
3814     {
3815       if (rem_flag)
3816         return const0_rtx;
3817       return expand_unop (mode, flag_trapv && GET_MODE_CLASS(mode) == MODE_INT
3818                           ? negv_optab : neg_optab, op0, target, 0);
3819     }
3820
3821   if (target
3822       /* Don't use the function value register as a target
3823          since we have to read it as well as write it,
3824          and function-inlining gets confused by this.  */
3825       && ((REG_P (target) && REG_FUNCTION_VALUE_P (target))
3826           /* Don't clobber an operand while doing a multi-step calculation.  */
3827           || ((rem_flag || op1_is_constant)
3828               && (reg_mentioned_p (target, op0)
3829                   || (MEM_P (op0) && MEM_P (target))))
3830           || reg_mentioned_p (target, op1)
3831           || (MEM_P (op1) && MEM_P (target))))
3832     target = 0;
3833
3834   /* Get the mode in which to perform this computation.  Normally it will
3835      be MODE, but sometimes we can't do the desired operation in MODE.
3836      If so, pick a wider mode in which we can do the operation.  Convert
3837      to that mode at the start to avoid repeated conversions.
3838
3839      First see what operations we need.  These depend on the expression
3840      we are evaluating.  (We assume that divxx3 insns exist under the
3841      same conditions that modxx3 insns and that these insns don't normally
3842      fail.  If these assumptions are not correct, we may generate less
3843      efficient code in some cases.)
3844
3845      Then see if we find a mode in which we can open-code that operation
3846      (either a division, modulus, or shift).  Finally, check for the smallest
3847      mode for which we can do the operation with a library call.  */
3848
3849   /* We might want to refine this now that we have division-by-constant
3850      optimization.  Since expand_mult_highpart tries so many variants, it is
3851      not straightforward to generalize this.  Maybe we should make an array
3852      of possible modes in init_expmed?  Save this for GCC 2.7.  */
3853
3854   optab1 = ((op1_is_pow2 && op1 != const0_rtx)
3855             ? (unsignedp ? lshr_optab : ashr_optab)
3856             : (unsignedp ? udiv_optab : sdiv_optab));
3857   optab2 = ((op1_is_pow2 && op1 != const0_rtx)
3858             ? optab1
3859             : (unsignedp ? udivmod_optab : sdivmod_optab));
3860
3861   for (compute_mode = mode; compute_mode != VOIDmode;
3862        compute_mode = GET_MODE_WIDER_MODE (compute_mode))
3863     if (optab1->handlers[compute_mode].insn_code != CODE_FOR_nothing
3864         || optab2->handlers[compute_mode].insn_code != CODE_FOR_nothing)
3865       break;
3866
3867   if (compute_mode == VOIDmode)
3868     for (compute_mode = mode; compute_mode != VOIDmode;
3869          compute_mode = GET_MODE_WIDER_MODE (compute_mode))
3870       if (optab1->handlers[compute_mode].libfunc
3871           || optab2->handlers[compute_mode].libfunc)
3872         break;
3873
3874   /* If we still couldn't find a mode, use MODE, but expand_binop will
3875      probably die.  */
3876   if (compute_mode == VOIDmode)
3877     compute_mode = mode;
3878
3879   if (target && GET_MODE (target) == compute_mode)
3880     tquotient = target;
3881   else
3882     tquotient = gen_reg_rtx (compute_mode);
3883
3884   size = GET_MODE_BITSIZE (compute_mode);
3885 #if 0
3886   /* It should be possible to restrict the precision to GET_MODE_BITSIZE
3887      (mode), and thereby get better code when OP1 is a constant.  Do that
3888      later.  It will require going over all usages of SIZE below.  */
3889   size = GET_MODE_BITSIZE (mode);
3890 #endif
3891
3892   /* Only deduct something for a REM if the last divide done was
3893      for a different constant.   Then set the constant of the last
3894      divide.  */
3895   max_cost = div_cost[compute_mode]
3896     - (rem_flag && ! (last_div_const != 0 && op1_is_constant
3897                       && INTVAL (op1) == last_div_const)
3898        ? mul_cost[compute_mode] + add_cost[compute_mode]
3899        : 0);
3900
3901   last_div_const = ! rem_flag && op1_is_constant ? INTVAL (op1) : 0;
3902
3903   /* Now convert to the best mode to use.  */
3904   if (compute_mode != mode)
3905     {
3906       op0 = convert_modes (compute_mode, mode, op0, unsignedp);
3907       op1 = convert_modes (compute_mode, mode, op1, unsignedp);
3908
3909       /* convert_modes may have placed op1 into a register, so we
3910          must recompute the following.  */
3911       op1_is_constant = GET_CODE (op1) == CONST_INT;
3912       op1_is_pow2 = (op1_is_constant
3913                      && ((EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
3914                           || (! unsignedp
3915                               && EXACT_POWER_OF_2_OR_ZERO_P (-INTVAL (op1)))))) ;
3916     }
3917
3918   /* If one of the operands is a volatile MEM, copy it into a register.  */
3919
3920   if (MEM_P (op0) && MEM_VOLATILE_P (op0))
3921     op0 = force_reg (compute_mode, op0);
3922   if (MEM_P (op1) && MEM_VOLATILE_P (op1))
3923     op1 = force_reg (compute_mode, op1);
3924
3925   /* If we need the remainder or if OP1 is constant, we need to
3926      put OP0 in a register in case it has any queued subexpressions.  */
3927   if (rem_flag || op1_is_constant)
3928     op0 = force_reg (compute_mode, op0);
3929
3930   last = get_last_insn ();
3931
3932   /* Promote floor rounding to trunc rounding for unsigned operations.  */
3933   if (unsignedp)
3934     {
3935       if (code == FLOOR_DIV_EXPR)
3936         code = TRUNC_DIV_EXPR;
3937       if (code == FLOOR_MOD_EXPR)
3938         code = TRUNC_MOD_EXPR;
3939       if (code == EXACT_DIV_EXPR && op1_is_pow2)
3940         code = TRUNC_DIV_EXPR;
3941     }
3942
3943   if (op1 != const0_rtx)
3944     switch (code)
3945       {
3946       case TRUNC_MOD_EXPR:
3947       case TRUNC_DIV_EXPR:
3948         if (op1_is_constant)
3949           {
3950             if (unsignedp)
3951               {
3952                 unsigned HOST_WIDE_INT mh;
3953                 int pre_shift, post_shift;
3954                 int dummy;
3955                 rtx ml;
3956                 unsigned HOST_WIDE_INT d = (INTVAL (op1)
3957                                             & GET_MODE_MASK (compute_mode));
3958
3959                 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
3960                   {
3961                     pre_shift = floor_log2 (d);
3962                     if (rem_flag)
3963                       {
3964                         remainder
3965                           = expand_binop (compute_mode, and_optab, op0,
3966                                           GEN_INT (((HOST_WIDE_INT) 1 << pre_shift) - 1),
3967                                           remainder, 1,
3968                                           OPTAB_LIB_WIDEN);
3969                         if (remainder)
3970                           return gen_lowpart (mode, remainder);
3971                       }
3972                     quotient = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3973                                              build_int_cst (NULL_TREE,
3974                                                             pre_shift),
3975                                              tquotient, 1);
3976                   }
3977                 else if (size <= HOST_BITS_PER_WIDE_INT)
3978                   {
3979                     if (d >= ((unsigned HOST_WIDE_INT) 1 << (size - 1)))
3980                       {
3981                         /* Most significant bit of divisor is set; emit an scc
3982                            insn.  */
3983                         quotient = emit_store_flag (tquotient, GEU, op0, op1,
3984                                                     compute_mode, 1, 1);
3985                         if (quotient == 0)
3986                           goto fail1;
3987                       }
3988                     else
3989                       {
3990                         /* Find a suitable multiplier and right shift count
3991                            instead of multiplying with D.  */
3992
3993                         mh = choose_multiplier (d, size, size,
3994                                                 &ml, &post_shift, &dummy);
3995
3996                         /* If the suggested multiplier is more than SIZE bits,
3997                            we can do better for even divisors, using an
3998                            initial right shift.  */
3999                         if (mh != 0 && (d & 1) == 0)
4000                           {
4001                             pre_shift = floor_log2 (d & -d);
4002                             mh = choose_multiplier (d >> pre_shift, size,
4003                                                     size - pre_shift,
4004                                                     &ml, &post_shift, &dummy);
4005                             gcc_assert (!mh);
4006                           }
4007                         else
4008                           pre_shift = 0;
4009
4010                         if (mh != 0)
4011                           {
4012                             rtx t1, t2, t3, t4;
4013
4014                             if (post_shift - 1 >= BITS_PER_WORD)
4015                               goto fail1;
4016
4017                             extra_cost
4018                               = (shift_cost[compute_mode][post_shift - 1]
4019                                  + shift_cost[compute_mode][1]
4020                                  + 2 * add_cost[compute_mode]);
4021                             t1 = expand_mult_highpart (compute_mode, op0, ml,
4022                                                        NULL_RTX, 1,
4023                                                        max_cost - extra_cost);
4024                             if (t1 == 0)
4025                               goto fail1;
4026                             t2 = force_operand (gen_rtx_MINUS (compute_mode,
4027                                                                op0, t1),
4028                                                 NULL_RTX);
4029                             t3 = expand_shift
4030                               (RSHIFT_EXPR, compute_mode, t2,
4031                                build_int_cst (NULL_TREE, 1),
4032                                NULL_RTX,1);
4033                             t4 = force_operand (gen_rtx_PLUS (compute_mode,
4034                                                               t1, t3),
4035                                                 NULL_RTX);
4036                             quotient = expand_shift
4037                               (RSHIFT_EXPR, compute_mode, t4,
4038                                build_int_cst (NULL_TREE, post_shift - 1),
4039                                tquotient, 1);
4040                           }
4041                         else
4042                           {
4043                             rtx t1, t2;
4044
4045                             if (pre_shift >= BITS_PER_WORD
4046                                 || post_shift >= BITS_PER_WORD)
4047                               goto fail1;
4048
4049                             t1 = expand_shift
4050                               (RSHIFT_EXPR, compute_mode, op0,
4051                                build_int_cst (NULL_TREE, pre_shift),
4052                                NULL_RTX, 1);
4053                             extra_cost
4054                               = (shift_cost[compute_mode][pre_shift]
4055                                  + shift_cost[compute_mode][post_shift]);
4056                             t2 = expand_mult_highpart (compute_mode, t1, ml,
4057                                                        NULL_RTX, 1,
4058                                                        max_cost - extra_cost);
4059                             if (t2 == 0)
4060                               goto fail1;
4061                             quotient = expand_shift
4062                               (RSHIFT_EXPR, compute_mode, t2,
4063                                build_int_cst (NULL_TREE, post_shift),
4064                                tquotient, 1);
4065                           }
4066                       }
4067                   }
4068                 else            /* Too wide mode to use tricky code */
4069                   break;
4070
4071                 insn = get_last_insn ();
4072                 if (insn != last
4073                     && (set = single_set (insn)) != 0
4074                     && SET_DEST (set) == quotient)
4075                   set_unique_reg_note (insn,
4076                                        REG_EQUAL,
4077                                        gen_rtx_UDIV (compute_mode, op0, op1));
4078               }
4079             else                /* TRUNC_DIV, signed */
4080               {
4081                 unsigned HOST_WIDE_INT ml;
4082                 int lgup, post_shift;
4083                 rtx mlr;
4084                 HOST_WIDE_INT d = INTVAL (op1);
4085                 unsigned HOST_WIDE_INT abs_d = d >= 0 ? d : -d;
4086
4087                 /* n rem d = n rem -d */
4088                 if (rem_flag && d < 0)
4089                   {
4090                     d = abs_d;
4091                     op1 = gen_int_mode (abs_d, compute_mode);
4092                   }
4093
4094                 if (d == 1)
4095                   quotient = op0;
4096                 else if (d == -1)
4097                   quotient = expand_unop (compute_mode, neg_optab, op0,
4098                                           tquotient, 0);
4099                 else if (abs_d == (unsigned HOST_WIDE_INT) 1 << (size - 1))
4100                   {
4101                     /* This case is not handled correctly below.  */
4102                     quotient = emit_store_flag (tquotient, EQ, op0, op1,
4103                                                 compute_mode, 1, 1);
4104                     if (quotient == 0)
4105                       goto fail1;
4106                   }
4107                 else if (EXACT_POWER_OF_2_OR_ZERO_P (d)
4108                          && (rem_flag ? smod_pow2_cheap[compute_mode]
4109                                       : sdiv_pow2_cheap[compute_mode])
4110                          /* We assume that cheap metric is true if the
4111                             optab has an expander for this mode.  */
4112                          && (((rem_flag ? smod_optab : sdiv_optab)
4113                               ->handlers[compute_mode].insn_code
4114                               != CODE_FOR_nothing)
4115                              || (sdivmod_optab->handlers[compute_mode]
4116                                  .insn_code != CODE_FOR_nothing)))
4117                   ;
4118                 else if (EXACT_POWER_OF_2_OR_ZERO_P (abs_d))
4119                   {
4120                     if (rem_flag)
4121                       {
4122                         remainder = expand_smod_pow2 (compute_mode, op0, d);
4123                         if (remainder)
4124                           return gen_lowpart (mode, remainder);
4125                       }
4126
4127                     if (sdiv_pow2_cheap[compute_mode]
4128                         && ((sdiv_optab->handlers[compute_mode].insn_code
4129                              != CODE_FOR_nothing)
4130                             || (sdivmod_optab->handlers[compute_mode].insn_code
4131                                 != CODE_FOR_nothing)))
4132                       quotient = expand_divmod (0, TRUNC_DIV_EXPR,
4133                                                 compute_mode, op0,
4134                                                 gen_int_mode (abs_d,
4135                                                               compute_mode),
4136                                                 NULL_RTX, 0);
4137                     else
4138                       quotient = expand_sdiv_pow2 (compute_mode, op0, abs_d);
4139
4140                     /* We have computed OP0 / abs(OP1).  If OP1 is negative,
4141                        negate the quotient.  */
4142                     if (d < 0)
4143                       {
4144                         insn = get_last_insn ();
4145                         if (insn != last
4146                             && (set = single_set (insn)) != 0
4147                             && SET_DEST (set) == quotient
4148                             && abs_d < ((unsigned HOST_WIDE_INT) 1
4149                                         << (HOST_BITS_PER_WIDE_INT - 1)))
4150                           set_unique_reg_note (insn,
4151                                                REG_EQUAL,
4152                                                gen_rtx_DIV (compute_mode,
4153                                                             op0,
4154                                                             GEN_INT
4155                                                             (trunc_int_for_mode
4156                                                              (abs_d,
4157                                                               compute_mode))));
4158
4159                         quotient = expand_unop (compute_mode, neg_optab,
4160                                                 quotient, quotient, 0);
4161                       }
4162                   }
4163                 else if (size <= HOST_BITS_PER_WIDE_INT)
4164                   {
4165                     choose_multiplier (abs_d, size, size - 1,
4166                                        &mlr, &post_shift, &lgup);
4167                     ml = (unsigned HOST_WIDE_INT) INTVAL (mlr);
4168                     if (ml < (unsigned HOST_WIDE_INT) 1 << (size - 1))
4169                       {
4170                         rtx t1, t2, t3;
4171
4172                         if (post_shift >= BITS_PER_WORD
4173                             || size - 1 >= BITS_PER_WORD)
4174                           goto fail1;
4175
4176                         extra_cost = (shift_cost[compute_mode][post_shift]
4177                                       + shift_cost[compute_mode][size - 1]
4178                                       + add_cost[compute_mode]);
4179                         t1 = expand_mult_highpart (compute_mode, op0, mlr,
4180                                                    NULL_RTX, 0,
4181                                                    max_cost - extra_cost);
4182                         if (t1 == 0)
4183                           goto fail1;
4184                         t2 = expand_shift
4185                           (RSHIFT_EXPR, compute_mode, t1,
4186                            build_int_cst (NULL_TREE, post_shift),
4187                            NULL_RTX, 0);
4188                         t3 = expand_shift
4189                           (RSHIFT_EXPR, compute_mode, op0,
4190                            build_int_cst (NULL_TREE, size - 1),
4191                            NULL_RTX, 0);
4192                         if (d < 0)
4193                           quotient
4194                             = force_operand (gen_rtx_MINUS (compute_mode,
4195                                                             t3, t2),
4196                                              tquotient);
4197                         else
4198                           quotient
4199                             = force_operand (gen_rtx_MINUS (compute_mode,
4200                                                             t2, t3),
4201                                              tquotient);
4202                       }
4203                     else
4204                       {
4205                         rtx t1, t2, t3, t4;
4206
4207                         if (post_shift >= BITS_PER_WORD
4208                             || size - 1 >= BITS_PER_WORD)
4209                           goto fail1;
4210
4211                         ml |= (~(unsigned HOST_WIDE_INT) 0) << (size - 1);
4212                         mlr = gen_int_mode (ml, compute_mode);
4213                         extra_cost = (shift_cost[compute_mode][post_shift]
4214                                       + shift_cost[compute_mode][size - 1]
4215                                       + 2 * add_cost[compute_mode]);
4216                         t1 = expand_mult_highpart (compute_mode, op0, mlr,
4217                                                    NULL_RTX, 0,
4218                                                    max_cost - extra_cost);
4219                         if (t1 == 0)
4220                           goto fail1;
4221                         t2 = force_operand (gen_rtx_PLUS (compute_mode,
4222                                                           t1, op0),
4223                                             NULL_RTX);
4224                         t3 = expand_shift
4225                           (RSHIFT_EXPR, compute_mode, t2,
4226                            build_int_cst (NULL_TREE, post_shift),
4227                            NULL_RTX, 0);
4228                         t4 = expand_shift
4229                           (RSHIFT_EXPR, compute_mode, op0,
4230                            build_int_cst (NULL_TREE, size - 1),
4231                            NULL_RTX, 0);
4232                         if (d < 0)
4233                           quotient
4234                             = force_operand (gen_rtx_MINUS (compute_mode,
4235                                                             t4, t3),
4236                                              tquotient);
4237                         else
4238                           quotient
4239                             = force_operand (gen_rtx_MINUS (compute_mode,
4240                                                             t3, t4),
4241                                              tquotient);
4242                       }
4243                   }
4244                 else            /* Too wide mode to use tricky code */
4245                   break;
4246
4247                 insn = get_last_insn ();
4248                 if (insn != last
4249                     && (set = single_set (insn)) != 0
4250                     && SET_DEST (set) == quotient)
4251                   set_unique_reg_note (insn,
4252                                        REG_EQUAL,
4253                                        gen_rtx_DIV (compute_mode, op0, op1));
4254               }
4255             break;
4256           }
4257       fail1:
4258         delete_insns_since (last);
4259         break;
4260
4261       case FLOOR_DIV_EXPR:
4262       case FLOOR_MOD_EXPR:
4263       /* We will come here only for signed operations.  */
4264         if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4265           {
4266             unsigned HOST_WIDE_INT mh;
4267             int pre_shift, lgup, post_shift;
4268             HOST_WIDE_INT d = INTVAL (op1);
4269             rtx ml;
4270
4271             if (d > 0)
4272               {
4273                 /* We could just as easily deal with negative constants here,
4274                    but it does not seem worth the trouble for GCC 2.6.  */
4275                 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4276                   {
4277                     pre_shift = floor_log2 (d);
4278                     if (rem_flag)
4279                       {
4280                         remainder = expand_binop (compute_mode, and_optab, op0,
4281                                                   GEN_INT (((HOST_WIDE_INT) 1 << pre_shift) - 1),
4282                                                   remainder, 0, OPTAB_LIB_WIDEN);
4283                         if (remainder)
4284                           return gen_lowpart (mode, remainder);
4285                       }
4286                     quotient = expand_shift
4287                       (RSHIFT_EXPR, compute_mode, op0,
4288                        build_int_cst (NULL_TREE, pre_shift),
4289                        tquotient, 0);
4290                   }
4291                 else
4292                   {
4293                     rtx t1, t2, t3, t4;
4294
4295                     mh = choose_multiplier (d, size, size - 1,
4296                                             &ml, &post_shift, &lgup);
4297                     gcc_assert (!mh);
4298
4299                     if (post_shift < BITS_PER_WORD
4300                         && size - 1 < BITS_PER_WORD)
4301                       {
4302                         t1 = expand_shift
4303                           (RSHIFT_EXPR, compute_mode, op0,
4304                            build_int_cst (NULL_TREE, size - 1),
4305                            NULL_RTX, 0);
4306                         t2 = expand_binop (compute_mode, xor_optab, op0, t1,
4307                                            NULL_RTX, 0, OPTAB_WIDEN);
4308                         extra_cost = (shift_cost[compute_mode][post_shift]
4309                                       + shift_cost[compute_mode][size - 1]
4310                                       + 2 * add_cost[compute_mode]);
4311                         t3 = expand_mult_highpart (compute_mode, t2, ml,
4312                                                    NULL_RTX, 1,
4313                                                    max_cost - extra_cost);
4314                         if (t3 != 0)
4315                           {
4316                             t4 = expand_shift
4317                               (RSHIFT_EXPR, compute_mode, t3,
4318                                build_int_cst (NULL_TREE, post_shift),
4319                                NULL_RTX, 1);
4320                             quotient = expand_binop (compute_mode, xor_optab,
4321                                                      t4, t1, tquotient, 0,
4322                                                      OPTAB_WIDEN);
4323                           }
4324                       }
4325                   }
4326               }
4327             else
4328               {
4329                 rtx nsign, t1, t2, t3, t4;
4330                 t1 = force_operand (gen_rtx_PLUS (compute_mode,
4331                                                   op0, constm1_rtx), NULL_RTX);
4332                 t2 = expand_binop (compute_mode, ior_optab, op0, t1, NULL_RTX,
4333                                    0, OPTAB_WIDEN);
4334                 nsign = expand_shift
4335                   (RSHIFT_EXPR, compute_mode, t2,
4336                    build_int_cst (NULL_TREE, size - 1),
4337                    NULL_RTX, 0);
4338                 t3 = force_operand (gen_rtx_MINUS (compute_mode, t1, nsign),
4339                                     NULL_RTX);
4340                 t4 = expand_divmod (0, TRUNC_DIV_EXPR, compute_mode, t3, op1,
4341                                     NULL_RTX, 0);
4342                 if (t4)
4343                   {
4344                     rtx t5;
4345                     t5 = expand_unop (compute_mode, one_cmpl_optab, nsign,
4346                                       NULL_RTX, 0);
4347                     quotient = force_operand (gen_rtx_PLUS (compute_mode,
4348                                                             t4, t5),
4349                                               tquotient);
4350                   }
4351               }
4352           }
4353
4354         if (quotient != 0)
4355           break;
4356         delete_insns_since (last);
4357
4358         /* Try using an instruction that produces both the quotient and
4359            remainder, using truncation.  We can easily compensate the quotient
4360            or remainder to get floor rounding, once we have the remainder.
4361            Notice that we compute also the final remainder value here,
4362            and return the result right away.  */
4363         if (target == 0 || GET_MODE (target) != compute_mode)
4364           target = gen_reg_rtx (compute_mode);
4365
4366         if (rem_flag)
4367           {
4368             remainder
4369               = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4370             quotient = gen_reg_rtx (compute_mode);
4371           }
4372         else
4373           {
4374             quotient
4375               = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4376             remainder = gen_reg_rtx (compute_mode);
4377           }
4378
4379         if (expand_twoval_binop (sdivmod_optab, op0, op1,
4380                                  quotient, remainder, 0))
4381           {
4382             /* This could be computed with a branch-less sequence.
4383                Save that for later.  */
4384             rtx tem;
4385             rtx label = gen_label_rtx ();
4386             do_cmp_and_jump (remainder, const0_rtx, EQ, compute_mode, label);
4387             tem = expand_binop (compute_mode, xor_optab, op0, op1,
4388                                 NULL_RTX, 0, OPTAB_WIDEN);
4389             do_cmp_and_jump (tem, const0_rtx, GE, compute_mode, label);
4390             expand_dec (quotient, const1_rtx);
4391             expand_inc (remainder, op1);
4392             emit_label (label);
4393             return gen_lowpart (mode, rem_flag ? remainder : quotient);
4394           }
4395
4396         /* No luck with division elimination or divmod.  Have to do it
4397            by conditionally adjusting op0 *and* the result.  */
4398         {
4399           rtx label1, label2, label3, label4, label5;
4400           rtx adjusted_op0;
4401           rtx tem;
4402
4403           quotient = gen_reg_rtx (compute_mode);
4404           adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4405           label1 = gen_label_rtx ();
4406           label2 = gen_label_rtx ();
4407           label3 = gen_label_rtx ();
4408           label4 = gen_label_rtx ();
4409           label5 = gen_label_rtx ();
4410           do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4411           do_cmp_and_jump (adjusted_op0, const0_rtx, LT, compute_mode, label1);
4412           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4413                               quotient, 0, OPTAB_LIB_WIDEN);
4414           if (tem != quotient)
4415             emit_move_insn (quotient, tem);
4416           emit_jump_insn (gen_jump (label5));
4417           emit_barrier ();
4418           emit_label (label1);
4419           expand_inc (adjusted_op0, const1_rtx);
4420           emit_jump_insn (gen_jump (label4));
4421           emit_barrier ();
4422           emit_label (label2);
4423           do_cmp_and_jump (adjusted_op0, const0_rtx, GT, compute_mode, label3);
4424           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4425                               quotient, 0, OPTAB_LIB_WIDEN);
4426           if (tem != quotient)
4427             emit_move_insn (quotient, tem);
4428           emit_jump_insn (gen_jump (label5));
4429           emit_barrier ();
4430           emit_label (label3);
4431           expand_dec (adjusted_op0, const1_rtx);
4432           emit_label (label4);
4433           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4434                               quotient, 0, OPTAB_LIB_WIDEN);
4435           if (tem != quotient)
4436             emit_move_insn (quotient, tem);
4437           expand_dec (quotient, const1_rtx);
4438           emit_label (label5);
4439         }
4440         break;
4441
4442       case CEIL_DIV_EXPR:
4443       case CEIL_MOD_EXPR:
4444         if (unsignedp)
4445           {
4446             if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1)))
4447               {
4448                 rtx t1, t2, t3;
4449                 unsigned HOST_WIDE_INT d = INTVAL (op1);
4450                 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4451                                    build_int_cst (NULL_TREE, floor_log2 (d)),
4452                                    tquotient, 1);
4453                 t2 = expand_binop (compute_mode, and_optab, op0,
4454                                    GEN_INT (d - 1),
4455                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4456                 t3 = gen_reg_rtx (compute_mode);
4457                 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4458                                       compute_mode, 1, 1);
4459                 if (t3 == 0)
4460                   {
4461                     rtx lab;
4462                     lab = gen_label_rtx ();
4463                     do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4464                     expand_inc (t1, const1_rtx);
4465                     emit_label (lab);
4466                     quotient = t1;
4467                   }
4468                 else
4469                   quotient = force_operand (gen_rtx_PLUS (compute_mode,
4470                                                           t1, t3),
4471                                             tquotient);
4472                 break;
4473               }
4474
4475             /* Try using an instruction that produces both the quotient and
4476                remainder, using truncation.  We can easily compensate the
4477                quotient or remainder to get ceiling rounding, once we have the
4478                remainder.  Notice that we compute also the final remainder
4479                value here, and return the result right away.  */
4480             if (target == 0 || GET_MODE (target) != compute_mode)
4481               target = gen_reg_rtx (compute_mode);
4482
4483             if (rem_flag)
4484               {
4485                 remainder = (REG_P (target)
4486                              ? target : gen_reg_rtx (compute_mode));
4487                 quotient = gen_reg_rtx (compute_mode);
4488               }
4489             else
4490               {
4491                 quotient = (REG_P (target)
4492                             ? target : gen_reg_rtx (compute_mode));
4493                 remainder = gen_reg_rtx (compute_mode);
4494               }
4495
4496             if (expand_twoval_binop (udivmod_optab, op0, op1, quotient,
4497                                      remainder, 1))
4498               {
4499                 /* This could be computed with a branch-less sequence.
4500                    Save that for later.  */
4501                 rtx label = gen_label_rtx ();
4502                 do_cmp_and_jump (remainder, const0_rtx, EQ,
4503                                  compute_mode, label);
4504                 expand_inc (quotient, const1_rtx);
4505                 expand_dec (remainder, op1);
4506                 emit_label (label);
4507                 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4508               }
4509
4510             /* No luck with division elimination or divmod.  Have to do it
4511                by conditionally adjusting op0 *and* the result.  */
4512             {
4513               rtx label1, label2;
4514               rtx adjusted_op0, tem;
4515
4516               quotient = gen_reg_rtx (compute_mode);
4517               adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4518               label1 = gen_label_rtx ();
4519               label2 = gen_label_rtx ();
4520               do_cmp_and_jump (adjusted_op0, const0_rtx, NE,
4521                                compute_mode, label1);
4522               emit_move_insn  (quotient, const0_rtx);
4523               emit_jump_insn (gen_jump (label2));
4524               emit_barrier ();
4525               emit_label (label1);
4526               expand_dec (adjusted_op0, const1_rtx);
4527               tem = expand_binop (compute_mode, udiv_optab, adjusted_op0, op1,
4528                                   quotient, 1, OPTAB_LIB_WIDEN);
4529               if (tem != quotient)
4530                 emit_move_insn (quotient, tem);
4531               expand_inc (quotient, const1_rtx);
4532               emit_label (label2);
4533             }
4534           }
4535         else /* signed */
4536           {
4537             if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4538                 && INTVAL (op1) >= 0)
4539               {
4540                 /* This is extremely similar to the code for the unsigned case
4541                    above.  For 2.7 we should merge these variants, but for
4542                    2.6.1 I don't want to touch the code for unsigned since that
4543                    get used in C.  The signed case will only be used by other
4544                    languages (Ada).  */
4545
4546                 rtx t1, t2, t3;
4547                 unsigned HOST_WIDE_INT d = INTVAL (op1);
4548                 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4549                                    build_int_cst (NULL_TREE, floor_log2 (d)),
4550                                    tquotient, 0);
4551                 t2 = expand_binop (compute_mode, and_optab, op0,
4552                                    GEN_INT (d - 1),
4553                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4554                 t3 = gen_reg_rtx (compute_mode);
4555                 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4556                                       compute_mode, 1, 1);
4557                 if (t3 == 0)
4558                   {
4559                     rtx lab;
4560                     lab = gen_label_rtx ();
4561                     do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4562                     expand_inc (t1, const1_rtx);
4563                     emit_label (lab);
4564                     quotient = t1;
4565                   }
4566                 else
4567                   quotient = force_operand (gen_rtx_PLUS (compute_mode,
4568                                                           t1, t3),
4569                                             tquotient);
4570                 break;
4571               }
4572
4573             /* Try using an instruction that produces both the quotient and
4574                remainder, using truncation.  We can easily compensate the
4575                quotient or remainder to get ceiling rounding, once we have the
4576                remainder.  Notice that we compute also the final remainder
4577                value here, and return the result right away.  */
4578             if (target == 0 || GET_MODE (target) != compute_mode)
4579               target = gen_reg_rtx (compute_mode);
4580             if (rem_flag)
4581               {
4582                 remainder= (REG_P (target)
4583                             ? target : gen_reg_rtx (compute_mode));
4584                 quotient = gen_reg_rtx (compute_mode);
4585               }
4586             else
4587               {
4588                 quotient = (REG_P (target)
4589                             ? target : gen_reg_rtx (compute_mode));
4590                 remainder = gen_reg_rtx (compute_mode);
4591               }
4592
4593             if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient,
4594                                      remainder, 0))
4595               {
4596                 /* This could be computed with a branch-less sequence.
4597                    Save that for later.  */
4598                 rtx tem;
4599                 rtx label = gen_label_rtx ();
4600                 do_cmp_and_jump (remainder, const0_rtx, EQ,
4601                                  compute_mode, label);
4602                 tem = expand_binop (compute_mode, xor_optab, op0, op1,
4603                                     NULL_RTX, 0, OPTAB_WIDEN);
4604                 do_cmp_and_jump (tem, const0_rtx, LT, compute_mode, label);
4605                 expand_inc (quotient, const1_rtx);
4606                 expand_dec (remainder, op1);
4607                 emit_label (label);
4608                 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4609               }
4610
4611             /* No luck with division elimination or divmod.  Have to do it
4612                by conditionally adjusting op0 *and* the result.  */
4613             {
4614               rtx label1, label2, label3, label4, label5;
4615               rtx adjusted_op0;
4616               rtx tem;
4617
4618               quotient = gen_reg_rtx (compute_mode);
4619               adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4620               label1 = gen_label_rtx ();
4621               label2 = gen_label_rtx ();
4622               label3 = gen_label_rtx ();
4623               label4 = gen_label_rtx ();
4624               label5 = gen_label_rtx ();
4625               do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4626               do_cmp_and_jump (adjusted_op0, const0_rtx, GT,
4627                                compute_mode, label1);
4628               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4629                                   quotient, 0, OPTAB_LIB_WIDEN);
4630               if (tem != quotient)
4631                 emit_move_insn (quotient, tem);
4632               emit_jump_insn (gen_jump (label5));
4633               emit_barrier ();
4634               emit_label (label1);
4635               expand_dec (adjusted_op0, const1_rtx);
4636               emit_jump_insn (gen_jump (label4));
4637               emit_barrier ();
4638               emit_label (label2);
4639               do_cmp_and_jump (adjusted_op0, const0_rtx, LT,
4640                                compute_mode, label3);
4641               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4642                                   quotient, 0, OPTAB_LIB_WIDEN);
4643               if (tem != quotient)
4644                 emit_move_insn (quotient, tem);
4645               emit_jump_insn (gen_jump (label5));
4646               emit_barrier ();
4647               emit_label (label3);
4648               expand_inc (adjusted_op0, const1_rtx);
4649               emit_label (label4);
4650               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4651                                   quotient, 0, OPTAB_LIB_WIDEN);
4652               if (tem != quotient)
4653                 emit_move_insn (quotient, tem);
4654               expand_inc (quotient, const1_rtx);
4655               emit_label (label5);
4656             }
4657           }
4658         break;
4659
4660       case EXACT_DIV_EXPR:
4661         if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4662           {
4663             HOST_WIDE_INT d = INTVAL (op1);
4664             unsigned HOST_WIDE_INT ml;
4665             int pre_shift;
4666             rtx t1;
4667
4668             pre_shift = floor_log2 (d & -d);
4669             ml = invert_mod2n (d >> pre_shift, size);
4670             t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4671                                build_int_cst (NULL_TREE, pre_shift),
4672                                NULL_RTX, unsignedp);
4673             quotient = expand_mult (compute_mode, t1,
4674                                     gen_int_mode (ml, compute_mode),
4675                                     NULL_RTX, 1);
4676
4677             insn = get_last_insn ();
4678             set_unique_reg_note (insn,
4679                                  REG_EQUAL,
4680                                  gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
4681                                                  compute_mode,
4682                                                  op0, op1));
4683           }
4684         break;
4685
4686       case ROUND_DIV_EXPR:
4687       case ROUND_MOD_EXPR:
4688         if (unsignedp)
4689           {
4690             rtx tem;
4691             rtx label;
4692             label = gen_label_rtx ();
4693             quotient = gen_reg_rtx (compute_mode);
4694             remainder = gen_reg_rtx (compute_mode);
4695             if (expand_twoval_binop (udivmod_optab, op0, op1, quotient, remainder, 1) == 0)
4696               {
4697                 rtx tem;
4698                 quotient = expand_binop (compute_mode, udiv_optab, op0, op1,
4699                                          quotient, 1, OPTAB_LIB_WIDEN);
4700                 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 1);
4701                 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4702                                           remainder, 1, OPTAB_LIB_WIDEN);
4703               }
4704             tem = plus_constant (op1, -1);
4705             tem = expand_shift (RSHIFT_EXPR, compute_mode, tem,
4706                                 build_int_cst (NULL_TREE, 1),
4707                                 NULL_RTX, 1);
4708             do_cmp_and_jump (remainder, tem, LEU, compute_mode, label);
4709             expand_inc (quotient, const1_rtx);
4710             expand_dec (remainder, op1);
4711             emit_label (label);
4712           }
4713         else
4714           {
4715             rtx abs_rem, abs_op1, tem, mask;
4716             rtx label;
4717             label = gen_label_rtx ();
4718             quotient = gen_reg_rtx (compute_mode);
4719             remainder = gen_reg_rtx (compute_mode);
4720             if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient, remainder, 0) == 0)
4721               {
4722                 rtx tem;
4723                 quotient = expand_binop (compute_mode, sdiv_optab, op0, op1,
4724                                          quotient, 0, OPTAB_LIB_WIDEN);
4725                 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 0);
4726                 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4727                                           remainder, 0, OPTAB_LIB_WIDEN);
4728               }
4729             abs_rem = expand_abs (compute_mode, remainder, NULL_RTX, 1, 0);
4730             abs_op1 = expand_abs (compute_mode, op1, NULL_RTX, 1, 0);
4731             tem = expand_shift (LSHIFT_EXPR, compute_mode, abs_rem,
4732                                 build_int_cst (NULL_TREE, 1),
4733                                 NULL_RTX, 1);
4734             do_cmp_and_jump (tem, abs_op1, LTU, compute_mode, label);
4735             tem = expand_binop (compute_mode, xor_optab, op0, op1,
4736                                 NULL_RTX, 0, OPTAB_WIDEN);
4737             mask = expand_shift (RSHIFT_EXPR, compute_mode, tem,
4738                                  build_int_cst (NULL_TREE, size - 1),
4739                                  NULL_RTX, 0);
4740             tem = expand_binop (compute_mode, xor_optab, mask, const1_rtx,
4741                                 NULL_RTX, 0, OPTAB_WIDEN);
4742             tem = expand_binop (compute_mode, sub_optab, tem, mask,
4743                                 NULL_RTX, 0, OPTAB_WIDEN);
4744             expand_inc (quotient, tem);
4745             tem = expand_binop (compute_mode, xor_optab, mask, op1,
4746                                 NULL_RTX, 0, OPTAB_WIDEN);
4747             tem = expand_binop (compute_mode, sub_optab, tem, mask,
4748                                 NULL_RTX, 0, OPTAB_WIDEN);
4749             expand_dec (remainder, tem);
4750             emit_label (label);
4751           }
4752         return gen_lowpart (mode, rem_flag ? remainder : quotient);
4753
4754       default:
4755         gcc_unreachable ();
4756       }
4757
4758   if (quotient == 0)
4759     {
4760       if (target && GET_MODE (target) != compute_mode)
4761         target = 0;
4762
4763       if (rem_flag)
4764         {
4765           /* Try to produce the remainder without producing the quotient.
4766              If we seem to have a divmod pattern that does not require widening,
4767              don't try widening here.  We should really have a WIDEN argument
4768              to expand_twoval_binop, since what we'd really like to do here is
4769              1) try a mod insn in compute_mode
4770              2) try a divmod insn in compute_mode
4771              3) try a div insn in compute_mode and multiply-subtract to get
4772                 remainder
4773              4) try the same things with widening allowed.  */
4774           remainder
4775             = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4776                                  op0, op1, target,
4777                                  unsignedp,
4778                                  ((optab2->handlers[compute_mode].insn_code
4779                                    != CODE_FOR_nothing)
4780                                   ? OPTAB_DIRECT : OPTAB_WIDEN));
4781           if (remainder == 0)
4782             {
4783               /* No luck there.  Can we do remainder and divide at once
4784                  without a library call?  */
4785               remainder = gen_reg_rtx (compute_mode);
4786               if (! expand_twoval_binop ((unsignedp
4787                                           ? udivmod_optab
4788                                           : sdivmod_optab),
4789                                          op0, op1,
4790                                          NULL_RTX, remainder, unsignedp))
4791                 remainder = 0;
4792             }
4793
4794           if (remainder)
4795             return gen_lowpart (mode, remainder);
4796         }
4797
4798       /* Produce the quotient.  Try a quotient insn, but not a library call.
4799          If we have a divmod in this mode, use it in preference to widening
4800          the div (for this test we assume it will not fail). Note that optab2
4801          is set to the one of the two optabs that the call below will use.  */
4802       quotient
4803         = sign_expand_binop (compute_mode, udiv_optab, sdiv_optab,
4804                              op0, op1, rem_flag ? NULL_RTX : target,
4805                              unsignedp,
4806                              ((optab2->handlers[compute_mode].insn_code
4807                                != CODE_FOR_nothing)
4808                               ? OPTAB_DIRECT : OPTAB_WIDEN));
4809
4810       if (quotient == 0)
4811         {
4812           /* No luck there.  Try a quotient-and-remainder insn,
4813              keeping the quotient alone.  */
4814           quotient = gen_reg_rtx (compute_mode);
4815           if (! expand_twoval_binop (unsignedp ? udivmod_optab : sdivmod_optab,
4816                                      op0, op1,
4817                                      quotient, NULL_RTX, unsignedp))
4818             {
4819               quotient = 0;
4820               if (! rem_flag)
4821                 /* Still no luck.  If we are not computing the remainder,
4822                    use a library call for the quotient.  */
4823                 quotient = sign_expand_binop (compute_mode,
4824                                               udiv_optab, sdiv_optab,
4825                                               op0, op1, target,
4826                                               unsignedp, OPTAB_LIB_WIDEN);
4827             }
4828         }
4829     }
4830
4831   if (rem_flag)
4832     {
4833       if (target && GET_MODE (target) != compute_mode)
4834         target = 0;
4835
4836       if (quotient == 0)
4837         {
4838           /* No divide instruction either.  Use library for remainder.  */
4839           remainder = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4840                                          op0, op1, target,
4841                                          unsignedp, OPTAB_LIB_WIDEN);
4842           /* No remainder function.  Try a quotient-and-remainder
4843              function, keeping the remainder.  */
4844           if (!remainder)
4845             {
4846               remainder = gen_reg_rtx (compute_mode);
4847               if (!expand_twoval_binop_libfunc
4848                   (unsignedp ? udivmod_optab : sdivmod_optab,
4849                    op0, op1,
4850                    NULL_RTX, remainder,
4851                    unsignedp ? UMOD : MOD))
4852                 remainder = NULL_RTX;
4853             }
4854         }
4855       else
4856         {
4857           /* We divided.  Now finish doing X - Y * (X / Y).  */
4858           remainder = expand_mult (compute_mode, quotient, op1,
4859                                    NULL_RTX, unsignedp);
4860           remainder = expand_binop (compute_mode, sub_optab, op0,
4861                                     remainder, target, unsignedp,
4862                                     OPTAB_LIB_WIDEN);
4863         }
4864     }
4865
4866   return gen_lowpart (mode, rem_flag ? remainder : quotient);
4867 }
4868 \f
4869 /* Return a tree node with data type TYPE, describing the value of X.
4870    Usually this is an VAR_DECL, if there is no obvious better choice.
4871    X may be an expression, however we only support those expressions
4872    generated by loop.c.  */
4873
4874 tree
4875 make_tree (tree type, rtx x)
4876 {
4877   tree t;
4878
4879   switch (GET_CODE (x))
4880     {
4881     case CONST_INT:
4882       {
4883         HOST_WIDE_INT hi = 0;
4884
4885         if (INTVAL (x) < 0
4886             && !(TYPE_UNSIGNED (type)
4887                  && (GET_MODE_BITSIZE (TYPE_MODE (type))
4888                      < HOST_BITS_PER_WIDE_INT)))
4889           hi = -1;
4890
4891         t = build_int_cst_wide (type, INTVAL (x), hi);
4892
4893         return t;
4894       }
4895
4896     case CONST_DOUBLE:
4897       if (GET_MODE (x) == VOIDmode)
4898         t = build_int_cst_wide (type,
4899                                 CONST_DOUBLE_LOW (x), CONST_DOUBLE_HIGH (x));
4900       else
4901         {
4902           REAL_VALUE_TYPE d;
4903
4904           REAL_VALUE_FROM_CONST_DOUBLE (d, x);
4905           t = build_real (type, d);
4906         }
4907
4908       return t;
4909
4910     case CONST_VECTOR:
4911       {
4912         int i, units;
4913         rtx elt;
4914         tree t = NULL_TREE;
4915
4916         units = CONST_VECTOR_NUNITS (x);
4917
4918         /* Build a tree with vector elements.  */
4919         for (i = units - 1; i >= 0; --i)
4920           {
4921             elt = CONST_VECTOR_ELT (x, i);
4922             t = tree_cons (NULL_TREE, make_tree (type, elt), t);
4923           }
4924
4925         return build_vector (type, t);
4926       }
4927
4928     case PLUS:
4929       return fold_build2 (PLUS_EXPR, type, make_tree (type, XEXP (x, 0)),
4930                           make_tree (type, XEXP (x, 1)));
4931
4932     case MINUS:
4933       return fold_build2 (MINUS_EXPR, type, make_tree (type, XEXP (x, 0)),
4934                           make_tree (type, XEXP (x, 1)));
4935
4936     case NEG:
4937       return fold_build1 (NEGATE_EXPR, type, make_tree (type, XEXP (x, 0)));
4938
4939     case MULT:
4940       return fold_build2 (MULT_EXPR, type, make_tree (type, XEXP (x, 0)),
4941                           make_tree (type, XEXP (x, 1)));
4942
4943     case ASHIFT:
4944       return fold_build2 (LSHIFT_EXPR, type, make_tree (type, XEXP (x, 0)),
4945                           make_tree (type, XEXP (x, 1)));
4946
4947     case LSHIFTRT:
4948       t = lang_hooks.types.unsigned_type (type);
4949       return fold_convert (type, build2 (RSHIFT_EXPR, t,
4950                                          make_tree (t, XEXP (x, 0)),
4951                                          make_tree (type, XEXP (x, 1))));
4952
4953     case ASHIFTRT:
4954       t = lang_hooks.types.signed_type (type);
4955       return fold_convert (type, build2 (RSHIFT_EXPR, t,
4956                                          make_tree (t, XEXP (x, 0)),
4957                                          make_tree (type, XEXP (x, 1))));
4958
4959     case DIV:
4960       if (TREE_CODE (type) != REAL_TYPE)
4961         t = lang_hooks.types.signed_type (type);
4962       else
4963         t = type;
4964
4965       return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
4966                                          make_tree (t, XEXP (x, 0)),
4967                                          make_tree (t, XEXP (x, 1))));
4968     case UDIV:
4969       t = lang_hooks.types.unsigned_type (type);
4970       return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
4971                                          make_tree (t, XEXP (x, 0)),
4972                                          make_tree (t, XEXP (x, 1))));
4973
4974     case SIGN_EXTEND:
4975     case ZERO_EXTEND:
4976       t = lang_hooks.types.type_for_mode (GET_MODE (XEXP (x, 0)),
4977                                           GET_CODE (x) == ZERO_EXTEND);
4978       return fold_convert (type, make_tree (t, XEXP (x, 0)));
4979
4980     default:
4981       t = build_decl (VAR_DECL, NULL_TREE, type);
4982
4983       /* If TYPE is a POINTER_TYPE, X might be Pmode with TYPE_MODE being
4984          ptr_mode.  So convert.  */
4985       if (POINTER_TYPE_P (type))
4986         x = convert_memory_address (TYPE_MODE (type), x);
4987
4988       /* Note that we do *not* use SET_DECL_RTL here, because we do not
4989          want set_decl_rtl to go adjusting REG_ATTRS for this temporary.  */
4990       t->decl_with_rtl.rtl = x;
4991
4992       return t;
4993     }
4994 }
4995
4996 /* Check whether the multiplication X * MULT + ADD overflows.
4997    X, MULT and ADD must be CONST_*.
4998    MODE is the machine mode for the computation.
4999    X and MULT must have mode MODE.  ADD may have a different mode.
5000    So can X (defaults to same as MODE).
5001    UNSIGNEDP is nonzero to do unsigned multiplication.  */
5002
5003 bool
5004 const_mult_add_overflow_p (rtx x, rtx mult, rtx add,
5005                            enum machine_mode mode, int unsignedp)
5006 {
5007   tree type, mult_type, add_type, result;
5008
5009   type = lang_hooks.types.type_for_mode (mode, unsignedp);
5010
5011   /* In order to get a proper overflow indication from an unsigned
5012      type, we have to pretend that it's a sizetype.  */
5013   mult_type = type;
5014   if (unsignedp)
5015     {
5016       /* FIXME:It would be nice if we could step directly from this
5017          type to its sizetype equivalent.  */
5018       mult_type = build_distinct_type_copy (type);
5019       TYPE_IS_SIZETYPE (mult_type) = 1;
5020     }
5021
5022   add_type = (GET_MODE (add) == VOIDmode ? mult_type
5023               : lang_hooks.types.type_for_mode (GET_MODE (add), unsignedp));
5024
5025   result = fold_build2 (PLUS_EXPR, mult_type,
5026                         fold_build2 (MULT_EXPR, mult_type,
5027                                      make_tree (mult_type, x),
5028                                      make_tree (mult_type, mult)),
5029                         make_tree (add_type, add));
5030
5031   return TREE_CONSTANT_OVERFLOW (result);
5032 }
5033
5034 /* Return an rtx representing the value of X * MULT + ADD.
5035    TARGET is a suggestion for where to store the result (an rtx).
5036    MODE is the machine mode for the computation.
5037    X and MULT must have mode MODE.  ADD may have a different mode.
5038    So can X (defaults to same as MODE).
5039    UNSIGNEDP is nonzero to do unsigned multiplication.
5040    This may emit insns.  */
5041
5042 rtx
5043 expand_mult_add (rtx x, rtx target, rtx mult, rtx add, enum machine_mode mode,
5044                  int unsignedp)
5045 {
5046   tree type = lang_hooks.types.type_for_mode (mode, unsignedp);
5047   tree add_type = (GET_MODE (add) == VOIDmode
5048                    ? type: lang_hooks.types.type_for_mode (GET_MODE (add),
5049                                                            unsignedp));
5050   tree result = fold_build2 (PLUS_EXPR, type,
5051                              fold_build2 (MULT_EXPR, type,
5052                                           make_tree (type, x),
5053                                           make_tree (type, mult)),
5054                              make_tree (add_type, add));
5055
5056   return expand_expr (result, target, VOIDmode, 0);
5057 }
5058 \f
5059 /* Compute the logical-and of OP0 and OP1, storing it in TARGET
5060    and returning TARGET.
5061
5062    If TARGET is 0, a pseudo-register or constant is returned.  */
5063
5064 rtx
5065 expand_and (enum machine_mode mode, rtx op0, rtx op1, rtx target)
5066 {
5067   rtx tem = 0;
5068
5069   if (GET_MODE (op0) == VOIDmode && GET_MODE (op1) == VOIDmode)
5070     tem = simplify_binary_operation (AND, mode, op0, op1);
5071   if (tem == 0)
5072     tem = expand_binop (mode, and_optab, op0, op1, target, 0, OPTAB_LIB_WIDEN);
5073
5074   if (target == 0)
5075     target = tem;
5076   else if (tem != target)
5077     emit_move_insn (target, tem);
5078   return target;
5079 }
5080 \f
5081 /* Emit a store-flags instruction for comparison CODE on OP0 and OP1
5082    and storing in TARGET.  Normally return TARGET.
5083    Return 0 if that cannot be done.
5084
5085    MODE is the mode to use for OP0 and OP1 should they be CONST_INTs.  If
5086    it is VOIDmode, they cannot both be CONST_INT.
5087
5088    UNSIGNEDP is for the case where we have to widen the operands
5089    to perform the operation.  It says to use zero-extension.
5090
5091    NORMALIZEP is 1 if we should convert the result to be either zero
5092    or one.  Normalize is -1 if we should convert the result to be
5093    either zero or -1.  If NORMALIZEP is zero, the result will be left
5094    "raw" out of the scc insn.  */
5095
5096 rtx
5097 emit_store_flag (rtx target, enum rtx_code code, rtx op0, rtx op1,
5098                  enum machine_mode mode, int unsignedp, int normalizep)
5099 {
5100   rtx subtarget;
5101   enum insn_code icode;
5102   enum machine_mode compare_mode;
5103   enum machine_mode target_mode = GET_MODE (target);
5104   rtx tem;
5105   rtx last = get_last_insn ();
5106   rtx pattern, comparison;
5107
5108   if (unsignedp)
5109     code = unsigned_condition (code);
5110
5111   /* If one operand is constant, make it the second one.  Only do this
5112      if the other operand is not constant as well.  */
5113
5114   if (swap_commutative_operands_p (op0, op1))
5115     {
5116       tem = op0;
5117       op0 = op1;
5118       op1 = tem;
5119       code = swap_condition (code);
5120     }
5121
5122   if (mode == VOIDmode)
5123     mode = GET_MODE (op0);
5124
5125   /* For some comparisons with 1 and -1, we can convert this to
5126      comparisons with zero.  This will often produce more opportunities for
5127      store-flag insns.  */
5128
5129   switch (code)
5130     {
5131     case LT:
5132       if (op1 == const1_rtx)
5133         op1 = const0_rtx, code = LE;
5134       break;
5135     case LE:
5136       if (op1 == constm1_rtx)
5137         op1 = const0_rtx, code = LT;
5138       break;
5139     case GE:
5140       if (op1 == const1_rtx)
5141         op1 = const0_rtx, code = GT;
5142       break;
5143     case GT:
5144       if (op1 == constm1_rtx)
5145         op1 = const0_rtx, code = GE;
5146       break;
5147     case GEU:
5148       if (op1 == const1_rtx)
5149         op1 = const0_rtx, code = NE;
5150       break;
5151     case LTU:
5152       if (op1 == const1_rtx)
5153         op1 = const0_rtx, code = EQ;
5154       break;
5155     default:
5156       break;
5157     }
5158
5159   /* If we are comparing a double-word integer with zero or -1, we can
5160      convert the comparison into one involving a single word.  */
5161   if (GET_MODE_BITSIZE (mode) == BITS_PER_WORD * 2
5162       && GET_MODE_CLASS (mode) == MODE_INT
5163       && (!MEM_P (op0) || ! MEM_VOLATILE_P (op0)))
5164     {
5165       if ((code == EQ || code == NE)
5166           && (op1 == const0_rtx || op1 == constm1_rtx))
5167         {
5168           rtx op00, op01, op0both;
5169
5170           /* Do a logical OR or AND of the two words and compare the result.  */
5171           op00 = simplify_gen_subreg (word_mode, op0, mode, 0);
5172           op01 = simplify_gen_subreg (word_mode, op0, mode, UNITS_PER_WORD);
5173           op0both = expand_binop (word_mode,
5174                                   op1 == const0_rtx ? ior_optab : and_optab,
5175                                   op00, op01, NULL_RTX, unsignedp, OPTAB_DIRECT);
5176
5177           if (op0both != 0)
5178             return emit_store_flag (target, code, op0both, op1, word_mode,
5179                                     unsignedp, normalizep);
5180         }
5181       else if ((code == LT || code == GE) && op1 == const0_rtx)
5182         {
5183           rtx op0h;
5184
5185           /* If testing the sign bit, can just test on high word.  */
5186           op0h = simplify_gen_subreg (word_mode, op0, mode,
5187                                       subreg_highpart_offset (word_mode, mode));
5188           return emit_store_flag (target, code, op0h, op1, word_mode,
5189                                   unsignedp, normalizep);
5190         }
5191     }
5192
5193   /* From now on, we won't change CODE, so set ICODE now.  */
5194   icode = setcc_gen_code[(int) code];
5195
5196   /* If this is A < 0 or A >= 0, we can do this by taking the ones
5197      complement of A (for GE) and shifting the sign bit to the low bit.  */
5198   if (op1 == const0_rtx && (code == LT || code == GE)
5199       && GET_MODE_CLASS (mode) == MODE_INT
5200       && (normalizep || STORE_FLAG_VALUE == 1
5201           || (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT
5202               && ((STORE_FLAG_VALUE & GET_MODE_MASK (mode))
5203                   == (unsigned HOST_WIDE_INT) 1 << (GET_MODE_BITSIZE (mode) - 1)))))
5204     {
5205       subtarget = target;
5206
5207       /* If the result is to be wider than OP0, it is best to convert it
5208          first.  If it is to be narrower, it is *incorrect* to convert it
5209          first.  */
5210       if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (mode))
5211         {
5212           op0 = convert_modes (target_mode, mode, op0, 0);
5213           mode = target_mode;
5214         }
5215
5216       if (target_mode != mode)
5217         subtarget = 0;
5218
5219       if (code == GE)
5220         op0 = expand_unop (mode, one_cmpl_optab, op0,
5221                            ((STORE_FLAG_VALUE == 1 || normalizep)
5222                             ? 0 : subtarget), 0);
5223
5224       if (STORE_FLAG_VALUE == 1 || normalizep)
5225         /* If we are supposed to produce a 0/1 value, we want to do
5226            a logical shift from the sign bit to the low-order bit; for
5227            a -1/0 value, we do an arithmetic shift.  */
5228         op0 = expand_shift (RSHIFT_EXPR, mode, op0,
5229                             size_int (GET_MODE_BITSIZE (mode) - 1),
5230                             subtarget, normalizep != -1);
5231
5232       if (mode != target_mode)
5233         op0 = convert_modes (target_mode, mode, op0, 0);
5234
5235       return op0;
5236     }
5237
5238   if (icode != CODE_FOR_nothing)
5239     {
5240       insn_operand_predicate_fn pred;
5241
5242       /* We think we may be able to do this with a scc insn.  Emit the
5243          comparison and then the scc insn.  */
5244
5245       do_pending_stack_adjust ();
5246       last = get_last_insn ();
5247
5248       comparison
5249         = compare_from_rtx (op0, op1, code, unsignedp, mode, NULL_RTX);
5250       if (CONSTANT_P (comparison))
5251         {
5252           switch (GET_CODE (comparison))
5253             {
5254             case CONST_INT:
5255               if (comparison == const0_rtx)
5256                 return const0_rtx;
5257               break;
5258
5259 #ifdef FLOAT_STORE_FLAG_VALUE
5260             case CONST_DOUBLE:
5261               if (comparison == CONST0_RTX (GET_MODE (comparison)))
5262                 return const0_rtx;
5263               break;
5264 #endif
5265             default:
5266               gcc_unreachable ();
5267             }
5268
5269           if (normalizep == 1)
5270             return const1_rtx;
5271           if (normalizep == -1)
5272             return constm1_rtx;
5273           return const_true_rtx;
5274         }
5275
5276       /* The code of COMPARISON may not match CODE if compare_from_rtx
5277          decided to swap its operands and reverse the original code.
5278
5279          We know that compare_from_rtx returns either a CONST_INT or
5280          a new comparison code, so it is safe to just extract the
5281          code from COMPARISON.  */
5282       code = GET_CODE (comparison);
5283
5284       /* Get a reference to the target in the proper mode for this insn.  */
5285       compare_mode = insn_data[(int) icode].operand[0].mode;
5286       subtarget = target;
5287       pred = insn_data[(int) icode].operand[0].predicate;
5288       if (optimize || ! (*pred) (subtarget, compare_mode))
5289         subtarget = gen_reg_rtx (compare_mode);
5290
5291       pattern = GEN_FCN (icode) (subtarget);
5292       if (pattern)
5293         {
5294           emit_insn (pattern);
5295
5296           /* If we are converting to a wider mode, first convert to
5297              TARGET_MODE, then normalize.  This produces better combining
5298              opportunities on machines that have a SIGN_EXTRACT when we are
5299              testing a single bit.  This mostly benefits the 68k.
5300
5301              If STORE_FLAG_VALUE does not have the sign bit set when
5302              interpreted in COMPARE_MODE, we can do this conversion as
5303              unsigned, which is usually more efficient.  */
5304           if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (compare_mode))
5305             {
5306               convert_move (target, subtarget,
5307                             (GET_MODE_BITSIZE (compare_mode)
5308                              <= HOST_BITS_PER_WIDE_INT)
5309                             && 0 == (STORE_FLAG_VALUE
5310                                      & ((HOST_WIDE_INT) 1
5311                                         << (GET_MODE_BITSIZE (compare_mode) -1))));
5312               op0 = target;
5313               compare_mode = target_mode;
5314             }
5315           else
5316             op0 = subtarget;
5317
5318           /* If we want to keep subexpressions around, don't reuse our
5319              last target.  */
5320
5321           if (optimize)
5322             subtarget = 0;
5323
5324           /* Now normalize to the proper value in COMPARE_MODE.  Sometimes
5325              we don't have to do anything.  */
5326           if (normalizep == 0 || normalizep == STORE_FLAG_VALUE)
5327             ;
5328           /* STORE_FLAG_VALUE might be the most negative number, so write
5329              the comparison this way to avoid a compiler-time warning.  */
5330           else if (- normalizep == STORE_FLAG_VALUE)
5331             op0 = expand_unop (compare_mode, neg_optab, op0, subtarget, 0);
5332
5333           /* We don't want to use STORE_FLAG_VALUE < 0 below since this
5334              makes it hard to use a value of just the sign bit due to
5335              ANSI integer constant typing rules.  */
5336           else if (GET_MODE_BITSIZE (compare_mode) <= HOST_BITS_PER_WIDE_INT
5337                    && (STORE_FLAG_VALUE
5338                        & ((HOST_WIDE_INT) 1
5339                           << (GET_MODE_BITSIZE (compare_mode) - 1))))
5340             op0 = expand_shift (RSHIFT_EXPR, compare_mode, op0,
5341                                 size_int (GET_MODE_BITSIZE (compare_mode) - 1),
5342                                 subtarget, normalizep == 1);
5343           else
5344             {
5345               gcc_assert (STORE_FLAG_VALUE & 1);
5346
5347               op0 = expand_and (compare_mode, op0, const1_rtx, subtarget);
5348               if (normalizep == -1)
5349                 op0 = expand_unop (compare_mode, neg_optab, op0, op0, 0);
5350             }
5351
5352           /* If we were converting to a smaller mode, do the
5353              conversion now.  */
5354           if (target_mode != compare_mode)
5355             {
5356               convert_move (target, op0, 0);
5357               return target;
5358             }
5359           else
5360             return op0;
5361         }
5362     }
5363
5364   delete_insns_since (last);
5365
5366   /* If optimizing, use different pseudo registers for each insn, instead
5367      of reusing the same pseudo.  This leads to better CSE, but slows
5368      down the compiler, since there are more pseudos */
5369   subtarget = (!optimize
5370                && (target_mode == mode)) ? target : NULL_RTX;
5371
5372   /* If we reached here, we can't do this with a scc insn.  However, there
5373      are some comparisons that can be done directly.  For example, if
5374      this is an equality comparison of integers, we can try to exclusive-or
5375      (or subtract) the two operands and use a recursive call to try the
5376      comparison with zero.  Don't do any of these cases if branches are
5377      very cheap.  */
5378
5379   if (BRANCH_COST > 0
5380       && GET_MODE_CLASS (mode) == MODE_INT && (code == EQ || code == NE)
5381       && op1 != const0_rtx)
5382     {
5383       tem = expand_binop (mode, xor_optab, op0, op1, subtarget, 1,
5384                           OPTAB_WIDEN);
5385
5386       if (tem == 0)
5387         tem = expand_binop (mode, sub_optab, op0, op1, subtarget, 1,
5388                             OPTAB_WIDEN);
5389       if (tem != 0)
5390         tem = emit_store_flag (target, code, tem, const0_rtx,
5391                                mode, unsignedp, normalizep);
5392       if (tem == 0)
5393         delete_insns_since (last);
5394       return tem;
5395     }
5396
5397   /* Some other cases we can do are EQ, NE, LE, and GT comparisons with
5398      the constant zero.  Reject all other comparisons at this point.  Only
5399      do LE and GT if branches are expensive since they are expensive on
5400      2-operand machines.  */
5401
5402   if (BRANCH_COST == 0
5403       || GET_MODE_CLASS (mode) != MODE_INT || op1 != const0_rtx
5404       || (code != EQ && code != NE
5405           && (BRANCH_COST <= 1 || (code != LE && code != GT))))
5406     return 0;
5407
5408   /* See what we need to return.  We can only return a 1, -1, or the
5409      sign bit.  */
5410
5411   if (normalizep == 0)
5412     {
5413       if (STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1)
5414         normalizep = STORE_FLAG_VALUE;
5415
5416       else if (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT
5417                && ((STORE_FLAG_VALUE & GET_MODE_MASK (mode))
5418                    == (unsigned HOST_WIDE_INT) 1 << (GET_MODE_BITSIZE (mode) - 1)))
5419         ;
5420       else
5421         return 0;
5422     }
5423
5424   /* Try to put the result of the comparison in the sign bit.  Assume we can't
5425      do the necessary operation below.  */
5426
5427   tem = 0;
5428
5429   /* To see if A <= 0, compute (A | (A - 1)).  A <= 0 iff that result has
5430      the sign bit set.  */
5431
5432   if (code == LE)
5433     {
5434       /* This is destructive, so SUBTARGET can't be OP0.  */
5435       if (rtx_equal_p (subtarget, op0))
5436         subtarget = 0;
5437
5438       tem = expand_binop (mode, sub_optab, op0, const1_rtx, subtarget, 0,
5439                           OPTAB_WIDEN);
5440       if (tem)
5441         tem = expand_binop (mode, ior_optab, op0, tem, subtarget, 0,
5442                             OPTAB_WIDEN);
5443     }
5444
5445   /* To see if A > 0, compute (((signed) A) << BITS) - A, where BITS is the
5446      number of bits in the mode of OP0, minus one.  */
5447
5448   if (code == GT)
5449     {
5450       if (rtx_equal_p (subtarget, op0))
5451         subtarget = 0;
5452
5453       tem = expand_shift (RSHIFT_EXPR, mode, op0,
5454                           size_int (GET_MODE_BITSIZE (mode) - 1),
5455                           subtarget, 0);
5456       tem = expand_binop (mode, sub_optab, tem, op0, subtarget, 0,
5457                           OPTAB_WIDEN);
5458     }
5459
5460   if (code == EQ || code == NE)
5461     {
5462       /* For EQ or NE, one way to do the comparison is to apply an operation
5463          that converts the operand into a positive number if it is nonzero
5464          or zero if it was originally zero.  Then, for EQ, we subtract 1 and
5465          for NE we negate.  This puts the result in the sign bit.  Then we
5466          normalize with a shift, if needed.
5467
5468          Two operations that can do the above actions are ABS and FFS, so try
5469          them.  If that doesn't work, and MODE is smaller than a full word,
5470          we can use zero-extension to the wider mode (an unsigned conversion)
5471          as the operation.  */
5472
5473       /* Note that ABS doesn't yield a positive number for INT_MIN, but
5474          that is compensated by the subsequent overflow when subtracting
5475          one / negating.  */
5476
5477       if (abs_optab->handlers[mode].insn_code != CODE_FOR_nothing)
5478         tem = expand_unop (mode, abs_optab, op0, subtarget, 1);
5479       else if (ffs_optab->handlers[mode].insn_code != CODE_FOR_nothing)
5480         tem = expand_unop (mode, ffs_optab, op0, subtarget, 1);
5481       else if (GET_MODE_SIZE (mode) < UNITS_PER_WORD)
5482         {
5483           tem = convert_modes (word_mode, mode, op0, 1);
5484           mode = word_mode;
5485         }
5486
5487       if (tem != 0)
5488         {
5489           if (code == EQ)
5490             tem = expand_binop (mode, sub_optab, tem, const1_rtx, subtarget,
5491                                 0, OPTAB_WIDEN);
5492           else
5493             tem = expand_unop (mode, neg_optab, tem, subtarget, 0);
5494         }
5495
5496       /* If we couldn't do it that way, for NE we can "or" the two's complement
5497          of the value with itself.  For EQ, we take the one's complement of
5498          that "or", which is an extra insn, so we only handle EQ if branches
5499          are expensive.  */
5500
5501       if (tem == 0 && (code == NE || BRANCH_COST > 1))
5502         {
5503           if (rtx_equal_p (subtarget, op0))
5504             subtarget = 0;
5505
5506           tem = expand_unop (mode, neg_optab, op0, subtarget, 0);
5507           tem = expand_binop (mode, ior_optab, tem, op0, subtarget, 0,
5508                               OPTAB_WIDEN);
5509
5510           if (tem && code == EQ)
5511             tem = expand_unop (mode, one_cmpl_optab, tem, subtarget, 0);
5512         }
5513     }
5514
5515   if (tem && normalizep)
5516     tem = expand_shift (RSHIFT_EXPR, mode, tem,
5517                         size_int (GET_MODE_BITSIZE (mode) - 1),
5518                         subtarget, normalizep == 1);
5519
5520   if (tem)
5521     {
5522       if (GET_MODE (tem) != target_mode)
5523         {
5524           convert_move (target, tem, 0);
5525           tem = target;
5526         }
5527       else if (!subtarget)
5528         {
5529           emit_move_insn (target, tem);
5530           tem = target;
5531         }
5532     }
5533   else
5534     delete_insns_since (last);
5535
5536   return tem;
5537 }
5538
5539 /* Like emit_store_flag, but always succeeds.  */
5540
5541 rtx
5542 emit_store_flag_force (rtx target, enum rtx_code code, rtx op0, rtx op1,
5543                        enum machine_mode mode, int unsignedp, int normalizep)
5544 {
5545   rtx tem, label;
5546
5547   /* First see if emit_store_flag can do the job.  */
5548   tem = emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep);
5549   if (tem != 0)
5550     return tem;
5551
5552   if (normalizep == 0)
5553     normalizep = 1;
5554
5555   /* If this failed, we have to do this with set/compare/jump/set code.  */
5556
5557   if (!REG_P (target)
5558       || reg_mentioned_p (target, op0) || reg_mentioned_p (target, op1))
5559     target = gen_reg_rtx (GET_MODE (target));
5560
5561   emit_move_insn (target, const1_rtx);
5562   label = gen_label_rtx ();
5563   do_compare_rtx_and_jump (op0, op1, code, unsignedp, mode, NULL_RTX,
5564                            NULL_RTX, label);
5565
5566   emit_move_insn (target, const0_rtx);
5567   emit_label (label);
5568
5569   return target;
5570 }
5571 \f
5572 /* Perform possibly multi-word comparison and conditional jump to LABEL
5573    if ARG1 OP ARG2 true where ARG1 and ARG2 are of mode MODE
5574
5575    The algorithm is based on the code in expr.c:do_jump.
5576
5577    Note that this does not perform a general comparison.  Only
5578    variants generated within expmed.c are correctly handled, others
5579    could be handled if needed.  */
5580
5581 static void
5582 do_cmp_and_jump (rtx arg1, rtx arg2, enum rtx_code op, enum machine_mode mode,
5583                  rtx label)
5584 {
5585   /* If this mode is an integer too wide to compare properly,
5586      compare word by word.  Rely on cse to optimize constant cases.  */
5587
5588   if (GET_MODE_CLASS (mode) == MODE_INT
5589       && ! can_compare_p (op, mode, ccp_jump))
5590     {
5591       rtx label2 = gen_label_rtx ();
5592
5593       switch (op)
5594         {
5595         case LTU:
5596           do_jump_by_parts_greater_rtx (mode, 1, arg2, arg1, label2, label);
5597           break;
5598
5599         case LEU:
5600           do_jump_by_parts_greater_rtx (mode, 1, arg1, arg2, label, label2);
5601           break;
5602
5603         case LT:
5604           do_jump_by_parts_greater_rtx (mode, 0, arg2, arg1, label2, label);
5605           break;
5606
5607         case GT:
5608           do_jump_by_parts_greater_rtx (mode, 0, arg1, arg2, label2, label);
5609           break;
5610
5611         case GE:
5612           do_jump_by_parts_greater_rtx (mode, 0, arg2, arg1, label, label2);
5613           break;
5614
5615           /* do_jump_by_parts_equality_rtx compares with zero.  Luckily
5616              that's the only equality operations we do */
5617         case EQ:
5618           gcc_assert (arg2 == const0_rtx && mode == GET_MODE(arg1));
5619           do_jump_by_parts_equality_rtx (arg1, label2, label);
5620           break;
5621
5622         case NE:
5623           gcc_assert (arg2 == const0_rtx && mode == GET_MODE(arg1));
5624           do_jump_by_parts_equality_rtx (arg1, label, label2);
5625           break;
5626
5627         default:
5628           gcc_unreachable ();
5629         }
5630
5631       emit_label (label2);
5632     }
5633   else
5634     emit_cmp_and_jump_insns (arg1, arg2, op, NULL_RTX, mode, 0, label);
5635 }