gcc/expmed.c

   1 /* Medium-level subroutines: convert bit-field store and extract
   2    and shifts, multiplies and divides to rtl instructions.
   3    Copyright (C) 1987, 1988, 1989, 1992, 1993, 1994, 1995, 1996, 1997, 1998,
   4    1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006
   5    Free Software Foundation, Inc.
   6
   7 This file is part of GCC.
   8
   9 GCC is free software; you can redistribute it and/or modify it under
  10 the terms of the GNU General Public License as published by the Free
  11 Software Foundation; either version 2, or (at your option) any later
  12 version.
  13
  14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  16 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  17 for more details.
  18
  19 You should have received a copy of the GNU General Public License
  20 along with GCC; see the file COPYING.  If not, write to the Free
  21 Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
  22 02110-1301, USA.  */
  23
  24
  25 #include "config.h"
  26 #include "system.h"
  27 #include "coretypes.h"
  28 #include "tm.h"
  29 #include "toplev.h"
  30 #include "rtl.h"
  31 #include "tree.h"
  32 #include "tm_p.h"
  33 #include "flags.h"
  34 #include "insn-config.h"
  35 #include "expr.h"
  36 #include "optabs.h"
  37 #include "real.h"
  38 #include "recog.h"
  39 #include "langhooks.h"
  40
  41 static void store_fixed_bit_field (rtx, unsigned HOST_WIDE_INT,
  42                                    unsigned HOST_WIDE_INT,
  43                                    unsigned HOST_WIDE_INT, rtx);
  44 static void store_split_bit_field (rtx, unsigned HOST_WIDE_INT,
  45                                    unsigned HOST_WIDE_INT, rtx);
  46 static rtx extract_fixed_bit_field (enum machine_mode, rtx,
  47                                     unsigned HOST_WIDE_INT,
  48                                     unsigned HOST_WIDE_INT,
  49                                     unsigned HOST_WIDE_INT, rtx, int);
  50 static rtx mask_rtx (enum machine_mode, int, int, int);
  51 static rtx lshift_value (enum machine_mode, rtx, int, int);
  52 static rtx extract_split_bit_field (rtx, unsigned HOST_WIDE_INT,
  53                                     unsigned HOST_WIDE_INT, int);
  54 static void do_cmp_and_jump (rtx, rtx, enum rtx_code, enum machine_mode, rtx);
  55 static rtx expand_smod_pow2 (enum machine_mode, rtx, HOST_WIDE_INT);
  56 static rtx expand_sdiv_pow2 (enum machine_mode, rtx, HOST_WIDE_INT);
  57
  58 /* Test whether a value is zero of a power of two.  */
  59 #define EXACT_POWER_OF_2_OR_ZERO_P(x) (((x) & ((x) - 1)) == 0)
  60
  61 /* Nonzero means divides or modulus operations are relatively cheap for
  62    powers of two, so don't use branches; emit the operation instead.
  63    Usually, this will mean that the MD file will emit non-branch
  64    sequences.  */
  65
  66 static bool sdiv_pow2_cheap[NUM_MACHINE_MODES];
  67 static bool smod_pow2_cheap[NUM_MACHINE_MODES];
  68
  69 #ifndef SLOW_UNALIGNED_ACCESS
  70 #define SLOW_UNALIGNED_ACCESS(MODE, ALIGN) STRICT_ALIGNMENT
  71 #endif
  72
  73 /* For compilers that support multiple targets with different word sizes,
  74    MAX_BITS_PER_WORD contains the biggest value of BITS_PER_WORD.  An example
  75    is the H8/300(H) compiler.  */
  76
  77 #ifndef MAX_BITS_PER_WORD
  78 #define MAX_BITS_PER_WORD BITS_PER_WORD
  79 #endif
  80
  81 /* Reduce conditional compilation elsewhere.  */
  82 #ifndef HAVE_insv
  83 #define HAVE_insv       0
  84 #define CODE_FOR_insv   CODE_FOR_nothing
  85 #define gen_insv(a,b,c,d) NULL_RTX
  86 #endif
  87 #ifndef HAVE_extv
  88 #define HAVE_extv       0
  89 #define CODE_FOR_extv   CODE_FOR_nothing
  90 #define gen_extv(a,b,c,d) NULL_RTX
  91 #endif
  92 #ifndef HAVE_extzv
  93 #define HAVE_extzv      0
  94 #define CODE_FOR_extzv  CODE_FOR_nothing
  95 #define gen_extzv(a,b,c,d) NULL_RTX
  96 #endif
  97
  98 /* Cost of various pieces of RTL.  Note that some of these are indexed by
  99    shift count and some by mode.  */
 100 static int zero_cost;
 101 static int add_cost[NUM_MACHINE_MODES];
 102 static int neg_cost[NUM_MACHINE_MODES];
 103 static int shift_cost[NUM_MACHINE_MODES][MAX_BITS_PER_WORD];
 104 static int shiftadd_cost[NUM_MACHINE_MODES][MAX_BITS_PER_WORD];
 105 static int shiftsub_cost[NUM_MACHINE_MODES][MAX_BITS_PER_WORD];
 106 static int mul_cost[NUM_MACHINE_MODES];
 107 static int sdiv_cost[NUM_MACHINE_MODES];
 108 static int udiv_cost[NUM_MACHINE_MODES];
 109 static int mul_widen_cost[NUM_MACHINE_MODES];
 110 static int mul_highpart_cost[NUM_MACHINE_MODES];
 111
 112 void
 113 init_expmed (void)
 114 {
 115   struct
 116   {
 117     struct rtx_def reg;         rtunion reg_fld[2];
 118     struct rtx_def plus;        rtunion plus_fld1;
 119     struct rtx_def neg;
 120     struct rtx_def mult;        rtunion mult_fld1;
 121     struct rtx_def sdiv;        rtunion sdiv_fld1;
 122     struct rtx_def udiv;        rtunion udiv_fld1;
 123     struct rtx_def zext;
 124     struct rtx_def sdiv_32;     rtunion sdiv_32_fld1;
 125     struct rtx_def smod_32;     rtunion smod_32_fld1;
 126     struct rtx_def wide_mult;   rtunion wide_mult_fld1;
 127     struct rtx_def wide_lshr;   rtunion wide_lshr_fld1;
 128     struct rtx_def wide_trunc;
 129     struct rtx_def shift;       rtunion shift_fld1;
 130     struct rtx_def shift_mult;  rtunion shift_mult_fld1;
 131     struct rtx_def shift_add;   rtunion shift_add_fld1;
 132     struct rtx_def shift_sub;   rtunion shift_sub_fld1;
 133   } all;
 134
 135   rtx pow2[MAX_BITS_PER_WORD];
 136   rtx cint[MAX_BITS_PER_WORD];
 137   int m, n;
 138   enum machine_mode mode, wider_mode;
 139
 140   zero_cost = rtx_cost (const0_rtx, 0);
 141
 142   for (m = 1; m < MAX_BITS_PER_WORD; m++)
 143     {
 144       pow2[m] = GEN_INT ((HOST_WIDE_INT) 1 << m);
 145       cint[m] = GEN_INT (m);
 146     }
 147
 148   memset (&all, 0, sizeof all);
 149
 150   PUT_CODE (&all.reg, REG);
 151   /* Avoid using hard regs in ways which may be unsupported.  */
 152   REGNO (&all.reg) = LAST_VIRTUAL_REGISTER + 1;
 153
 154   PUT_CODE (&all.plus, PLUS);
 155   XEXP (&all.plus, 0) = &all.reg;
 156   XEXP (&all.plus, 1) = &all.reg;
 157
 158   PUT_CODE (&all.neg, NEG);
 159   XEXP (&all.neg, 0) = &all.reg;
 160
 161   PUT_CODE (&all.mult, MULT);
 162   XEXP (&all.mult, 0) = &all.reg;
 163   XEXP (&all.mult, 1) = &all.reg;
 164
 165   PUT_CODE (&all.sdiv, DIV);
 166   XEXP (&all.sdiv, 0) = &all.reg;
 167   XEXP (&all.sdiv, 1) = &all.reg;
 168
 169   PUT_CODE (&all.udiv, UDIV);
 170   XEXP (&all.udiv, 0) = &all.reg;
 171   XEXP (&all.udiv, 1) = &all.reg;
 172
 173   PUT_CODE (&all.sdiv_32, DIV);
 174   XEXP (&all.sdiv_32, 0) = &all.reg;
 175   XEXP (&all.sdiv_32, 1) = 32 < MAX_BITS_PER_WORD ? cint[32] : GEN_INT (32);
 176
 177   PUT_CODE (&all.smod_32, MOD);
 178   XEXP (&all.smod_32, 0) = &all.reg;
 179   XEXP (&all.smod_32, 1) = XEXP (&all.sdiv_32, 1);
 180
 181   PUT_CODE (&all.zext, ZERO_EXTEND);
 182   XEXP (&all.zext, 0) = &all.reg;
 183
 184   PUT_CODE (&all.wide_mult, MULT);
 185   XEXP (&all.wide_mult, 0) = &all.zext;
 186   XEXP (&all.wide_mult, 1) = &all.zext;
 187
 188   PUT_CODE (&all.wide_lshr, LSHIFTRT);
 189   XEXP (&all.wide_lshr, 0) = &all.wide_mult;
 190
 191   PUT_CODE (&all.wide_trunc, TRUNCATE);
 192   XEXP (&all.wide_trunc, 0) = &all.wide_lshr;
 193
 194   PUT_CODE (&all.shift, ASHIFT);
 195   XEXP (&all.shift, 0) = &all.reg;
 196
 197   PUT_CODE (&all.shift_mult, MULT);
 198   XEXP (&all.shift_mult, 0) = &all.reg;
 199
 200   PUT_CODE (&all.shift_add, PLUS);
 201   XEXP (&all.shift_add, 0) = &all.shift_mult;
 202   XEXP (&all.shift_add, 1) = &all.reg;
 203
 204   PUT_CODE (&all.shift_sub, MINUS);
 205   XEXP (&all.shift_sub, 0) = &all.shift_mult;
 206   XEXP (&all.shift_sub, 1) = &all.reg;
 207
 208   for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT);
 209        mode != VOIDmode;
 210        mode = GET_MODE_WIDER_MODE (mode))
 211     {
 212       PUT_MODE (&all.reg, mode);
 213       PUT_MODE (&all.plus, mode);
 214       PUT_MODE (&all.neg, mode);
 215       PUT_MODE (&all.mult, mode);
 216       PUT_MODE (&all.sdiv, mode);
 217       PUT_MODE (&all.udiv, mode);
 218       PUT_MODE (&all.sdiv_32, mode);
 219       PUT_MODE (&all.smod_32, mode);
 220       PUT_MODE (&all.wide_trunc, mode);
 221       PUT_MODE (&all.shift, mode);
 222       PUT_MODE (&all.shift_mult, mode);
 223       PUT_MODE (&all.shift_add, mode);
 224       PUT_MODE (&all.shift_sub, mode);
 225
 226       add_cost[mode] = rtx_cost (&all.plus, SET);
 227       neg_cost[mode] = rtx_cost (&all.neg, SET);
 228       mul_cost[mode] = rtx_cost (&all.mult, SET);
 229       sdiv_cost[mode] = rtx_cost (&all.sdiv, SET);
 230       udiv_cost[mode] = rtx_cost (&all.udiv, SET);
 231
 232       sdiv_pow2_cheap[mode] = (rtx_cost (&all.sdiv_32, SET)
 233                                <= 2 * add_cost[mode]);
 234       smod_pow2_cheap[mode] = (rtx_cost (&all.smod_32, SET)
 235                                <= 4 * add_cost[mode]);
 236
 237       wider_mode = GET_MODE_WIDER_MODE (mode);
 238       if (wider_mode != VOIDmode)
 239         {
 240           PUT_MODE (&all.zext, wider_mode);
 241           PUT_MODE (&all.wide_mult, wider_mode);
 242           PUT_MODE (&all.wide_lshr, wider_mode);
 243           XEXP (&all.wide_lshr, 1) = GEN_INT (GET_MODE_BITSIZE (mode));
 244
 245           mul_widen_cost[wider_mode] = rtx_cost (&all.wide_mult, SET);
 246           mul_highpart_cost[mode] = rtx_cost (&all.wide_trunc, SET);
 247         }
 248
 249       shift_cost[mode][0] = 0;
 250       shiftadd_cost[mode][0] = shiftsub_cost[mode][0] = add_cost[mode];
 251
 252       n = MIN (MAX_BITS_PER_WORD, GET_MODE_BITSIZE (mode));
 253       for (m = 1; m < n; m++)
 254         {
 255           XEXP (&all.shift, 1) = cint[m];
 256           XEXP (&all.shift_mult, 1) = pow2[m];
 257
 258           shift_cost[mode][m] = rtx_cost (&all.shift, SET);
 259           shiftadd_cost[mode][m] = rtx_cost (&all.shift_add, SET);
 260           shiftsub_cost[mode][m] = rtx_cost (&all.shift_sub, SET);
 261         }
 262     }
 263 }
 264
 265 /* Return an rtx representing minus the value of X.
 266    MODE is the intended mode of the result,
 267    useful if X is a CONST_INT.  */
 268
 269 rtx
 270 negate_rtx (enum machine_mode mode, rtx x)
 271 {
 272   rtx result = simplify_unary_operation (NEG, mode, x, mode);
 273
 274   if (result == 0)
 275     result = expand_unop (mode, neg_optab, x, NULL_RTX, 0);
 276
 277   return result;
 278 }
 279
 280 /* Report on the availability of insv/extv/extzv and the desired mode
 281    of each of their operands.  Returns MAX_MACHINE_MODE if HAVE_foo
 282    is false; else the mode of the specified operand.  If OPNO is -1,
 283    all the caller cares about is whether the insn is available.  */
 284 enum machine_mode
 285 mode_for_extraction (enum extraction_pattern pattern, int opno)
 286 {
 287   const struct insn_data *data;
 288
 289   switch (pattern)
 290     {
 291     case EP_insv:
 292       if (HAVE_insv)
 293         {
 294           data = &insn_data[CODE_FOR_insv];
 295           break;
 296         }
 297       return MAX_MACHINE_MODE;
 298
 299     case EP_extv:
 300       if (HAVE_extv)
 301         {
 302           data = &insn_data[CODE_FOR_extv];
 303           break;
 304         }
 305       return MAX_MACHINE_MODE;
 306
 307     case EP_extzv:
 308       if (HAVE_extzv)
 309         {
 310           data = &insn_data[CODE_FOR_extzv];
 311           break;
 312         }
 313       return MAX_MACHINE_MODE;
 314
 315     default:
 316       gcc_unreachable ();
 317     }
 318
 319   if (opno == -1)
 320     return VOIDmode;
 321
 322   /* Everyone who uses this function used to follow it with
 323      if (result == VOIDmode) result = word_mode; */
 324   if (data->operand[opno].mode == VOIDmode)
 325     return word_mode;
 326   return data->operand[opno].mode;
 327 }
 328
 329 \f
 330 /* Generate code to store value from rtx VALUE
 331    into a bit-field within structure STR_RTX
 332    containing BITSIZE bits starting at bit BITNUM.
 333    FIELDMODE is the machine-mode of the FIELD_DECL node for this field.
 334    ALIGN is the alignment that STR_RTX is known to have.
 335    TOTAL_SIZE is the size of the structure in bytes, or -1 if varying.  */
 336
 337 /* ??? Note that there are two different ideas here for how
 338    to determine the size to count bits within, for a register.
 339    One is BITS_PER_WORD, and the other is the size of operand 3
 340    of the insv pattern.
 341
 342    If operand 3 of the insv pattern is VOIDmode, then we will use BITS_PER_WORD
 343    else, we use the mode of operand 3.  */
 344
 345 rtx
 346 store_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
 347                  unsigned HOST_WIDE_INT bitnum, enum machine_mode fieldmode,
 348                  rtx value)
 349 {
 350   unsigned int unit
 351     = (MEM_P (str_rtx)) ? BITS_PER_UNIT : BITS_PER_WORD;
 352   unsigned HOST_WIDE_INT offset, bitpos;
 353   rtx op0 = str_rtx;
 354   int byte_offset;
 355   rtx orig_value;
 356
 357   enum machine_mode op_mode = mode_for_extraction (EP_insv, 3);
 358
 359   while (GET_CODE (op0) == SUBREG)
 360     {
 361       /* The following line once was done only if WORDS_BIG_ENDIAN,
 362          but I think that is a mistake.  WORDS_BIG_ENDIAN is
 363          meaningful at a much higher level; when structures are copied
 364          between memory and regs, the higher-numbered regs
 365          always get higher addresses.  */
 366       int inner_mode_size = GET_MODE_SIZE (GET_MODE (SUBREG_REG (op0)));
 367       int outer_mode_size = GET_MODE_SIZE (GET_MODE (op0));
 368
 369       byte_offset = 0;
 370
 371       /* Paradoxical subregs need special handling on big endian machines.  */
 372       if (SUBREG_BYTE (op0) == 0 && inner_mode_size < outer_mode_size)
 373         {
 374           int difference = inner_mode_size - outer_mode_size;
 375
 376           if (WORDS_BIG_ENDIAN)
 377             byte_offset += (difference / UNITS_PER_WORD) * UNITS_PER_WORD;
 378           if (BYTES_BIG_ENDIAN)
 379             byte_offset += difference % UNITS_PER_WORD;
 380         }
 381       else
 382         byte_offset = SUBREG_BYTE (op0);
 383
 384       bitnum += byte_offset * BITS_PER_UNIT;
 385       op0 = SUBREG_REG (op0);
 386     }
 387
 388   /* No action is needed if the target is a register and if the field
 389      lies completely outside that register.  This can occur if the source
 390      code contains an out-of-bounds access to a small array.  */
 391   if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
 392     return value;
 393
 394   /* Use vec_set patterns for inserting parts of vectors whenever
 395      available.  */
 396   if (VECTOR_MODE_P (GET_MODE (op0))
 397       && !MEM_P (op0)
 398       && (vec_set_optab->handlers[GET_MODE (op0)].insn_code
 399           != CODE_FOR_nothing)
 400       && fieldmode == GET_MODE_INNER (GET_MODE (op0))
 401       && bitsize == GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
 402       && !(bitnum % GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
 403     {
 404       enum machine_mode outermode = GET_MODE (op0);
 405       enum machine_mode innermode = GET_MODE_INNER (outermode);
 406       int icode = (int) vec_set_optab->handlers[outermode].insn_code;
 407       int pos = bitnum / GET_MODE_BITSIZE (innermode);
 408       rtx rtxpos = GEN_INT (pos);
 409       rtx src = value;
 410       rtx dest = op0;
 411       rtx pat, seq;
 412       enum machine_mode mode0 = insn_data[icode].operand[0].mode;
 413       enum machine_mode mode1 = insn_data[icode].operand[1].mode;
 414       enum machine_mode mode2 = insn_data[icode].operand[2].mode;
 415
 416       start_sequence ();
 417
 418       if (! (*insn_data[icode].operand[1].predicate) (src, mode1))
 419         src = copy_to_mode_reg (mode1, src);
 420
 421       if (! (*insn_data[icode].operand[2].predicate) (rtxpos, mode2))
 422         rtxpos = copy_to_mode_reg (mode1, rtxpos);
 423
 424       /* We could handle this, but we should always be called with a pseudo
 425          for our targets and all insns should take them as outputs.  */
 426       gcc_assert ((*insn_data[icode].operand[0].predicate) (dest, mode0)
 427                   && (*insn_data[icode].operand[1].predicate) (src, mode1)
 428                   && (*insn_data[icode].operand[2].predicate) (rtxpos, mode2));
 429       pat = GEN_FCN (icode) (dest, src, rtxpos);
 430       seq = get_insns ();
 431       end_sequence ();
 432       if (pat)
 433         {
 434           emit_insn (seq);
 435           emit_insn (pat);
 436           return dest;
 437         }
 438     }
 439
 440   /* If the target is a register, overwriting the entire object, or storing
 441      a full-word or multi-word field can be done with just a SUBREG.
 442
 443      If the target is memory, storing any naturally aligned field can be
 444      done with a simple store.  For targets that support fast unaligned
 445      memory, any naturally sized, unit aligned field can be done directly.  */
 446
 447   offset = bitnum / unit;
 448   bitpos = bitnum % unit;
 449   byte_offset = (bitnum % BITS_PER_WORD) / BITS_PER_UNIT
 450                 + (offset * UNITS_PER_WORD);
 451
 452   if (bitpos == 0
 453       && bitsize == GET_MODE_BITSIZE (fieldmode)
 454       && (!MEM_P (op0)
 455           ? ((GET_MODE_SIZE (fieldmode) >= UNITS_PER_WORD
 456              || GET_MODE_SIZE (GET_MODE (op0)) == GET_MODE_SIZE (fieldmode))
 457              && byte_offset % GET_MODE_SIZE (fieldmode) == 0)
 458           : (! SLOW_UNALIGNED_ACCESS (fieldmode, MEM_ALIGN (op0))
 459              || (offset * BITS_PER_UNIT % bitsize == 0
 460                  && MEM_ALIGN (op0) % GET_MODE_BITSIZE (fieldmode) == 0))))
 461     {
 462       if (MEM_P (op0))
 463         op0 = adjust_address (op0, fieldmode, offset);
 464       else if (GET_MODE (op0) != fieldmode)
 465         op0 = simplify_gen_subreg (fieldmode, op0, GET_MODE (op0),
 466                                    byte_offset);
 467       emit_move_insn (op0, value);
 468       return value;
 469     }
 470
 471   /* Make sure we are playing with integral modes.  Pun with subregs
 472      if we aren't.  This must come after the entire register case above,
 473      since that case is valid for any mode.  The following cases are only
 474      valid for integral modes.  */
 475   {
 476     enum machine_mode imode = int_mode_for_mode (GET_MODE (op0));
 477     if (imode != GET_MODE (op0))
 478       {
 479         if (MEM_P (op0))
 480           op0 = adjust_address (op0, imode, 0);
 481         else
 482           {
 483             gcc_assert (imode != BLKmode);
 484             op0 = gen_lowpart (imode, op0);
 485           }
 486       }
 487   }
 488
 489   /* We may be accessing data outside the field, which means
 490      we can alias adjacent data.  */
 491   if (MEM_P (op0))
 492     {
 493       op0 = shallow_copy_rtx (op0);
 494       set_mem_alias_set (op0, 0);
 495       set_mem_expr (op0, 0);
 496     }
 497
 498   /* If OP0 is a register, BITPOS must count within a word.
 499      But as we have it, it counts within whatever size OP0 now has.
 500      On a bigendian machine, these are not the same, so convert.  */
 501   if (BYTES_BIG_ENDIAN
 502       && !MEM_P (op0)
 503       && unit > GET_MODE_BITSIZE (GET_MODE (op0)))
 504     bitpos += unit - GET_MODE_BITSIZE (GET_MODE (op0));
 505
 506   /* Storing an lsb-aligned field in a register
 507      can be done with a movestrict instruction.  */
 508
 509   if (!MEM_P (op0)
 510       && (BYTES_BIG_ENDIAN ? bitpos + bitsize == unit : bitpos == 0)
 511       && bitsize == GET_MODE_BITSIZE (fieldmode)
 512       && (movstrict_optab->handlers[fieldmode].insn_code
 513           != CODE_FOR_nothing))
 514     {
 515       int icode = movstrict_optab->handlers[fieldmode].insn_code;
 516
 517       /* Get appropriate low part of the value being stored.  */
 518       if (GET_CODE (value) == CONST_INT || REG_P (value))
 519         value = gen_lowpart (fieldmode, value);
 520       else if (!(GET_CODE (value) == SYMBOL_REF
 521                  || GET_CODE (value) == LABEL_REF
 522                  || GET_CODE (value) == CONST))
 523         value = convert_to_mode (fieldmode, value, 0);
 524
 525       if (! (*insn_data[icode].operand[1].predicate) (value, fieldmode))
 526         value = copy_to_mode_reg (fieldmode, value);
 527
 528       if (GET_CODE (op0) == SUBREG)
 529         {
 530           /* Else we've got some float mode source being extracted into
 531              a different float mode destination -- this combination of
 532              subregs results in Severe Tire Damage.  */
 533           gcc_assert (GET_MODE (SUBREG_REG (op0)) == fieldmode
 534                       || GET_MODE_CLASS (fieldmode) == MODE_INT
 535                       || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT);
 536           op0 = SUBREG_REG (op0);
 537         }
 538
 539       emit_insn (GEN_FCN (icode)
 540                  (gen_rtx_SUBREG (fieldmode, op0,
 541                                   (bitnum % BITS_PER_WORD) / BITS_PER_UNIT
 542                                   + (offset * UNITS_PER_WORD)),
 543                                   value));
 544
 545       return value;
 546     }
 547
 548   /* Handle fields bigger than a word.  */
 549
 550   if (bitsize > BITS_PER_WORD)
 551     {
 552       /* Here we transfer the words of the field
 553          in the order least significant first.
 554          This is because the most significant word is the one which may
 555          be less than full.
 556          However, only do that if the value is not BLKmode.  */
 557
 558       unsigned int backwards = WORDS_BIG_ENDIAN && fieldmode != BLKmode;
 559       unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
 560       unsigned int i;
 561
 562       /* This is the mode we must force value to, so that there will be enough
 563          subwords to extract.  Note that fieldmode will often (always?) be
 564          VOIDmode, because that is what store_field uses to indicate that this
 565          is a bit field, but passing VOIDmode to operand_subword_force
 566          is not allowed.  */
 567       fieldmode = GET_MODE (value);
 568       if (fieldmode == VOIDmode)
 569         fieldmode = smallest_mode_for_size (nwords * BITS_PER_WORD, MODE_INT);
 570
 571       for (i = 0; i < nwords; i++)
 572         {
 573           /* If I is 0, use the low-order word in both field and target;
 574              if I is 1, use the next to lowest word; and so on.  */
 575           unsigned int wordnum = (backwards ? nwords - i - 1 : i);
 576           unsigned int bit_offset = (backwards
 577                                      ? MAX ((int) bitsize - ((int) i + 1)
 578                                             * BITS_PER_WORD,
 579                                             0)
 580                                      : (int) i * BITS_PER_WORD);
 581
 582           store_bit_field (op0, MIN (BITS_PER_WORD,
 583                                      bitsize - i * BITS_PER_WORD),
 584                            bitnum + bit_offset, word_mode,
 585                            operand_subword_force (value, wordnum, fieldmode));
 586         }
 587       return value;
 588     }
 589
 590   /* From here on we can assume that the field to be stored in is
 591      a full-word (whatever type that is), since it is shorter than a word.  */
 592
 593   /* OFFSET is the number of words or bytes (UNIT says which)
 594      from STR_RTX to the first word or byte containing part of the field.  */
 595
 596   if (!MEM_P (op0))
 597     {
 598       if (offset != 0
 599           || GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
 600         {
 601           if (!REG_P (op0))
 602             {
 603               /* Since this is a destination (lvalue), we can't copy
 604                  it to a pseudo.  We can remove a SUBREG that does not
 605                  change the size of the operand.  Such a SUBREG may
 606                  have been added above.  */
 607               gcc_assert (GET_CODE (op0) == SUBREG
 608                           && (GET_MODE_SIZE (GET_MODE (op0))
 609                               == GET_MODE_SIZE (GET_MODE (SUBREG_REG (op0)))));
 610               op0 = SUBREG_REG (op0);
 611             }
 612           op0 = gen_rtx_SUBREG (mode_for_size (BITS_PER_WORD, MODE_INT, 0),
 613                                 op0, (offset * UNITS_PER_WORD));
 614         }
 615       offset = 0;
 616     }
 617
 618   /* If VALUE has a floating-point or complex mode, access it as an
 619      integer of the corresponding size.  This can occur on a machine
 620      with 64 bit registers that uses SFmode for float.  It can also
 621      occur for unaligned float or complex fields.  */
 622   orig_value = value;
 623   if (GET_MODE (value) != VOIDmode
 624       && GET_MODE_CLASS (GET_MODE (value)) != MODE_INT
 625       && GET_MODE_CLASS (GET_MODE (value)) != MODE_PARTIAL_INT)
 626     {
 627       value = gen_reg_rtx (int_mode_for_mode (GET_MODE (value)));
 628       emit_move_insn (gen_lowpart (GET_MODE (orig_value), value), orig_value);
 629     }
 630
 631   /* Now OFFSET is nonzero only if OP0 is memory
 632      and is therefore always measured in bytes.  */
 633
 634   if (HAVE_insv
 635       && GET_MODE (value) != BLKmode
 636       && bitsize > 0
 637       && GET_MODE_BITSIZE (op_mode) >= bitsize
 638       && ! ((REG_P (op0) || GET_CODE (op0) == SUBREG)
 639             && (bitsize + bitpos > GET_MODE_BITSIZE (op_mode)))
 640       && insn_data[CODE_FOR_insv].operand[1].predicate (GEN_INT (bitsize),
 641                                                         VOIDmode))
 642     {
 643       int xbitpos = bitpos;
 644       rtx value1;
 645       rtx xop0 = op0;
 646       rtx last = get_last_insn ();
 647       rtx pat;
 648       enum machine_mode maxmode = mode_for_extraction (EP_insv, 3);
 649       int save_volatile_ok = volatile_ok;
 650
 651       volatile_ok = 1;
 652
 653       /* If this machine's insv can only insert into a register, copy OP0
 654          into a register and save it back later.  */
 655       if (MEM_P (op0)
 656           && ! ((*insn_data[(int) CODE_FOR_insv].operand[0].predicate)
 657                 (op0, VOIDmode)))
 658         {
 659           rtx tempreg;
 660           enum machine_mode bestmode;
 661
 662           /* Get the mode to use for inserting into this field.  If OP0 is
 663              BLKmode, get the smallest mode consistent with the alignment. If
 664              OP0 is a non-BLKmode object that is no wider than MAXMODE, use its
 665              mode. Otherwise, use the smallest mode containing the field.  */
 666
 667           if (GET_MODE (op0) == BLKmode
 668               || GET_MODE_SIZE (GET_MODE (op0)) > GET_MODE_SIZE (maxmode))
 669             bestmode
 670               = get_best_mode (bitsize, bitnum, MEM_ALIGN (op0), maxmode,
 671                                MEM_VOLATILE_P (op0));
 672           else
 673             bestmode = GET_MODE (op0);
 674
 675           if (bestmode == VOIDmode
 676               || GET_MODE_SIZE (bestmode) < GET_MODE_SIZE (fieldmode)
 677               || (SLOW_UNALIGNED_ACCESS (bestmode, MEM_ALIGN (op0))
 678                   && GET_MODE_BITSIZE (bestmode) > MEM_ALIGN (op0)))
 679             goto insv_loses;
 680
 681           /* Adjust address to point to the containing unit of that mode.
 682              Compute offset as multiple of this unit, counting in bytes.  */
 683           unit = GET_MODE_BITSIZE (bestmode);
 684           offset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
 685           bitpos = bitnum % unit;
 686           op0 = adjust_address (op0, bestmode,  offset);
 687
 688           /* Fetch that unit, store the bitfield in it, then store
 689              the unit.  */
 690           tempreg = copy_to_reg (op0);
 691           store_bit_field (tempreg, bitsize, bitpos, fieldmode, orig_value);
 692           emit_move_insn (op0, tempreg);
 693           return value;
 694         }
 695       volatile_ok = save_volatile_ok;
 696
 697       /* Add OFFSET into OP0's address.  */
 698       if (MEM_P (xop0))
 699         xop0 = adjust_address (xop0, byte_mode, offset);
 700
 701       /* If xop0 is a register, we need it in MAXMODE
 702          to make it acceptable to the format of insv.  */
 703       if (GET_CODE (xop0) == SUBREG)
 704         /* We can't just change the mode, because this might clobber op0,
 705            and we will need the original value of op0 if insv fails.  */
 706         xop0 = gen_rtx_SUBREG (maxmode, SUBREG_REG (xop0), SUBREG_BYTE (xop0));
 707       if (REG_P (xop0) && GET_MODE (xop0) != maxmode)
 708         xop0 = gen_rtx_SUBREG (maxmode, xop0, 0);
 709
 710       /* On big-endian machines, we count bits from the most significant.
 711          If the bit field insn does not, we must invert.  */
 712
 713       if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
 714         xbitpos = unit - bitsize - xbitpos;
 715
 716       /* We have been counting XBITPOS within UNIT.
 717          Count instead within the size of the register.  */
 718       if (BITS_BIG_ENDIAN && !MEM_P (xop0))
 719         xbitpos += GET_MODE_BITSIZE (maxmode) - unit;
 720
 721       unit = GET_MODE_BITSIZE (maxmode);
 722
 723       /* Convert VALUE to maxmode (which insv insn wants) in VALUE1.  */
 724       value1 = value;
 725       if (GET_MODE (value) != maxmode)
 726         {
 727           if (GET_MODE_BITSIZE (GET_MODE (value)) >= bitsize)
 728             {
 729               /* Optimization: Don't bother really extending VALUE
 730                  if it has all the bits we will actually use.  However,
 731                  if we must narrow it, be sure we do it correctly.  */
 732
 733               if (GET_MODE_SIZE (GET_MODE (value)) < GET_MODE_SIZE (maxmode))
 734                 {
 735                   rtx tmp;
 736
 737                   tmp = simplify_subreg (maxmode, value1, GET_MODE (value), 0);
 738                   if (! tmp)
 739                     tmp = simplify_gen_subreg (maxmode,
 740                                                force_reg (GET_MODE (value),
 741                                                           value1),
 742                                                GET_MODE (value), 0);
 743                   value1 = tmp;
 744                 }
 745               else
 746                 value1 = gen_lowpart (maxmode, value1);
 747             }
 748           else if (GET_CODE (value) == CONST_INT)
 749             value1 = gen_int_mode (INTVAL (value), maxmode);
 750           else
 751             /* Parse phase is supposed to make VALUE's data type
 752                match that of the component reference, which is a type
 753                at least as wide as the field; so VALUE should have
 754                a mode that corresponds to that type.  */
 755             gcc_assert (CONSTANT_P (value));
 756         }
 757
 758       /* If this machine's insv insists on a register,
 759          get VALUE1 into a register.  */
 760       if (! ((*insn_data[(int) CODE_FOR_insv].operand[3].predicate)
 761              (value1, maxmode)))
 762         value1 = force_reg (maxmode, value1);
 763
 764       pat = gen_insv (xop0, GEN_INT (bitsize), GEN_INT (xbitpos), value1);
 765       if (pat)
 766         emit_insn (pat);
 767       else
 768         {
 769           delete_insns_since (last);
 770           store_fixed_bit_field (op0, offset, bitsize, bitpos, value);
 771         }
 772     }
 773   else
 774     insv_loses:
 775     /* Insv is not available; store using shifts and boolean ops.  */
 776     store_fixed_bit_field (op0, offset, bitsize, bitpos, value);
 777   return value;
 778 }
 779 \f
 780 /* Use shifts and boolean operations to store VALUE
 781    into a bit field of width BITSIZE
 782    in a memory location specified by OP0 except offset by OFFSET bytes.
 783      (OFFSET must be 0 if OP0 is a register.)
 784    The field starts at position BITPOS within the byte.
 785     (If OP0 is a register, it may be a full word or a narrower mode,
 786      but BITPOS still counts within a full word,
 787      which is significant on bigendian machines.)  */
 788
 789 static void
 790 store_fixed_bit_field (rtx op0, unsigned HOST_WIDE_INT offset,
 791                        unsigned HOST_WIDE_INT bitsize,
 792                        unsigned HOST_WIDE_INT bitpos, rtx value)
 793 {
 794   enum machine_mode mode;
 795   unsigned int total_bits = BITS_PER_WORD;
 796   rtx subtarget, temp;
 797   int all_zero = 0;
 798   int all_one = 0;
 799
 800   /* There is a case not handled here:
 801      a structure with a known alignment of just a halfword
 802      and a field split across two aligned halfwords within the structure.
 803      Or likewise a structure with a known alignment of just a byte
 804      and a field split across two bytes.
 805      Such cases are not supposed to be able to occur.  */
 806
 807   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
 808     {
 809       gcc_assert (!offset);
 810       /* Special treatment for a bit field split across two registers.  */
 811       if (bitsize + bitpos > BITS_PER_WORD)
 812         {
 813           store_split_bit_field (op0, bitsize, bitpos, value);
 814           return;
 815         }
 816     }
 817   else
 818     {
 819       /* Get the proper mode to use for this field.  We want a mode that
 820          includes the entire field.  If such a mode would be larger than
 821          a word, we won't be doing the extraction the normal way.
 822          We don't want a mode bigger than the destination.  */
 823
 824       mode = GET_MODE (op0);
 825       if (GET_MODE_BITSIZE (mode) == 0
 826           || GET_MODE_BITSIZE (mode) > GET_MODE_BITSIZE (word_mode))
 827         mode = word_mode;
 828       mode = get_best_mode (bitsize, bitpos + offset * BITS_PER_UNIT,
 829                             MEM_ALIGN (op0), mode, MEM_VOLATILE_P (op0));
 830
 831       if (mode == VOIDmode)
 832         {
 833           /* The only way this should occur is if the field spans word
 834              boundaries.  */
 835           store_split_bit_field (op0, bitsize, bitpos + offset * BITS_PER_UNIT,
 836                                  value);
 837           return;
 838         }
 839
 840       total_bits = GET_MODE_BITSIZE (mode);
 841
 842       /* Make sure bitpos is valid for the chosen mode.  Adjust BITPOS to
 843          be in the range 0 to total_bits-1, and put any excess bytes in
 844          OFFSET.  */
 845       if (bitpos >= total_bits)
 846         {
 847           offset += (bitpos / total_bits) * (total_bits / BITS_PER_UNIT);
 848           bitpos -= ((bitpos / total_bits) * (total_bits / BITS_PER_UNIT)
 849                      * BITS_PER_UNIT);
 850         }
 851
 852       /* Get ref to an aligned byte, halfword, or word containing the field.
 853          Adjust BITPOS to be position within a word,
 854          and OFFSET to be the offset of that word.
 855          Then alter OP0 to refer to that word.  */
 856       bitpos += (offset % (total_bits / BITS_PER_UNIT)) * BITS_PER_UNIT;
 857       offset -= (offset % (total_bits / BITS_PER_UNIT));
 858       op0 = adjust_address (op0, mode, offset);
 859     }
 860
 861   mode = GET_MODE (op0);
 862
 863   /* Now MODE is either some integral mode for a MEM as OP0,
 864      or is a full-word for a REG as OP0.  TOTAL_BITS corresponds.
 865      The bit field is contained entirely within OP0.
 866      BITPOS is the starting bit number within OP0.
 867      (OP0's mode may actually be narrower than MODE.)  */
 868
 869   if (BYTES_BIG_ENDIAN)
 870       /* BITPOS is the distance between our msb
 871          and that of the containing datum.
 872          Convert it to the distance from the lsb.  */
 873       bitpos = total_bits - bitsize - bitpos;
 874
 875   /* Now BITPOS is always the distance between our lsb
 876      and that of OP0.  */
 877
 878   /* Shift VALUE left by BITPOS bits.  If VALUE is not constant,
 879      we must first convert its mode to MODE.  */
 880
 881   if (GET_CODE (value) == CONST_INT)
 882     {
 883       HOST_WIDE_INT v = INTVAL (value);
 884
 885       if (bitsize < HOST_BITS_PER_WIDE_INT)
 886         v &= ((HOST_WIDE_INT) 1 << bitsize) - 1;
 887
 888       if (v == 0)
 889         all_zero = 1;
 890       else if ((bitsize < HOST_BITS_PER_WIDE_INT
 891                 && v == ((HOST_WIDE_INT) 1 << bitsize) - 1)
 892                || (bitsize == HOST_BITS_PER_WIDE_INT && v == -1))
 893         all_one = 1;
 894
 895       value = lshift_value (mode, value, bitpos, bitsize);
 896     }
 897   else
 898     {
 899       int must_and = (GET_MODE_BITSIZE (GET_MODE (value)) != bitsize
 900                       && bitpos + bitsize != GET_MODE_BITSIZE (mode));
 901
 902       if (GET_MODE (value) != mode)
 903         {
 904           if ((REG_P (value) || GET_CODE (value) == SUBREG)
 905               && GET_MODE_SIZE (mode) < GET_MODE_SIZE (GET_MODE (value)))
 906             value = gen_lowpart (mode, value);
 907           else
 908             value = convert_to_mode (mode, value, 1);
 909         }
 910
 911       if (must_and)
 912         value = expand_binop (mode, and_optab, value,
 913                               mask_rtx (mode, 0, bitsize, 0),
 914                               NULL_RTX, 1, OPTAB_LIB_WIDEN);
 915       if (bitpos > 0)
 916         value = expand_shift (LSHIFT_EXPR, mode, value,
 917                               build_int_cst (NULL_TREE, bitpos), NULL_RTX, 1);
 918     }
 919
 920   /* Now clear the chosen bits in OP0,
 921      except that if VALUE is -1 we need not bother.  */
 922
 923   subtarget = op0;
 924
 925   if (! all_one)
 926     {
 927       temp = expand_binop (mode, and_optab, op0,
 928                            mask_rtx (mode, bitpos, bitsize, 1),
 929                            subtarget, 1, OPTAB_LIB_WIDEN);
 930       subtarget = temp;
 931     }
 932   else
 933     temp = op0;
 934
 935   /* Now logical-or VALUE into OP0, unless it is zero.  */
 936
 937   if (! all_zero)
 938     temp = expand_binop (mode, ior_optab, temp, value,
 939                          subtarget, 1, OPTAB_LIB_WIDEN);
 940   if (op0 != temp)
 941     emit_move_insn (op0, temp);
 942 }
 943 \f
 944 /* Store a bit field that is split across multiple accessible memory objects.
 945
 946    OP0 is the REG, SUBREG or MEM rtx for the first of the objects.
 947    BITSIZE is the field width; BITPOS the position of its first bit
 948    (within the word).
 949    VALUE is the value to store.
 950
 951    This does not yet handle fields wider than BITS_PER_WORD.  */
 952
 953 static void
 954 store_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
 955                        unsigned HOST_WIDE_INT bitpos, rtx value)
 956 {
 957   unsigned int unit;
 958   unsigned int bitsdone = 0;
 959
 960   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
 961      much at a time.  */
 962   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
 963     unit = BITS_PER_WORD;
 964   else
 965     unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
 966
 967   /* If VALUE is a constant other than a CONST_INT, get it into a register in
 968      WORD_MODE.  If we can do this using gen_lowpart_common, do so.  Note
 969      that VALUE might be a floating-point constant.  */
 970   if (CONSTANT_P (value) && GET_CODE (value) != CONST_INT)
 971     {
 972       rtx word = gen_lowpart_common (word_mode, value);
 973
 974       if (word && (value != word))
 975         value = word;
 976       else
 977         value = gen_lowpart_common (word_mode,
 978                                     force_reg (GET_MODE (value) != VOIDmode
 979                                                ? GET_MODE (value)
 980                                                : word_mode, value));
 981     }
 982
 983   while (bitsdone < bitsize)
 984     {
 985       unsigned HOST_WIDE_INT thissize;
 986       rtx part, word;
 987       unsigned HOST_WIDE_INT thispos;
 988       unsigned HOST_WIDE_INT offset;
 989
 990       offset = (bitpos + bitsdone) / unit;
 991       thispos = (bitpos + bitsdone) % unit;
 992
 993       /* THISSIZE must not overrun a word boundary.  Otherwise,
 994          store_fixed_bit_field will call us again, and we will mutually
 995          recurse forever.  */
 996       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
 997       thissize = MIN (thissize, unit - thispos);
 998
 999       if (BYTES_BIG_ENDIAN)
1000         {
1001           int total_bits;
1002
1003           /* We must do an endian conversion exactly the same way as it is
1004              done in extract_bit_field, so that the two calls to
1005              extract_fixed_bit_field will have comparable arguments.  */
1006           if (!MEM_P (value) || GET_MODE (value) == BLKmode)
1007             total_bits = BITS_PER_WORD;
1008           else
1009             total_bits = GET_MODE_BITSIZE (GET_MODE (value));
1010
1011           /* Fetch successively less significant portions.  */
1012           if (GET_CODE (value) == CONST_INT)
1013             part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1014                              >> (bitsize - bitsdone - thissize))
1015                             & (((HOST_WIDE_INT) 1 << thissize) - 1));
1016           else
1017             /* The args are chosen so that the last part includes the
1018                lsb.  Give extract_bit_field the value it needs (with
1019                endianness compensation) to fetch the piece we want.  */
1020             part = extract_fixed_bit_field (word_mode, value, 0, thissize,
1021                                             total_bits - bitsize + bitsdone,
1022                                             NULL_RTX, 1);
1023         }
1024       else
1025         {
1026           /* Fetch successively more significant portions.  */
1027           if (GET_CODE (value) == CONST_INT)
1028             part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1029                              >> bitsdone)
1030                             & (((HOST_WIDE_INT) 1 << thissize) - 1));
1031           else
1032             part = extract_fixed_bit_field (word_mode, value, 0, thissize,
1033                                             bitsdone, NULL_RTX, 1);
1034         }
1035
1036       /* If OP0 is a register, then handle OFFSET here.
1037
1038          When handling multiword bitfields, extract_bit_field may pass
1039          down a word_mode SUBREG of a larger REG for a bitfield that actually
1040          crosses a word boundary.  Thus, for a SUBREG, we must find
1041          the current word starting from the base register.  */
1042       if (GET_CODE (op0) == SUBREG)
1043         {
1044           int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset;
1045           word = operand_subword_force (SUBREG_REG (op0), word_offset,
1046                                         GET_MODE (SUBREG_REG (op0)));
1047           offset = 0;
1048         }
1049       else if (REG_P (op0))
1050         {
1051           word = operand_subword_force (op0, offset, GET_MODE (op0));
1052           offset = 0;
1053         }
1054       else
1055         word = op0;
1056
1057       /* OFFSET is in UNITs, and UNIT is in bits.
1058          store_fixed_bit_field wants offset in bytes.  */
1059       store_fixed_bit_field (word, offset * unit / BITS_PER_UNIT, thissize,
1060                              thispos, part);
1061       bitsdone += thissize;
1062     }
1063 }
1064 \f
1065 /* Generate code to extract a byte-field from STR_RTX
1066    containing BITSIZE bits, starting at BITNUM,
1067    and put it in TARGET if possible (if TARGET is nonzero).
1068    Regardless of TARGET, we return the rtx for where the value is placed.
1069
1070    STR_RTX is the structure containing the byte (a REG or MEM).
1071    UNSIGNEDP is nonzero if this is an unsigned bit field.
1072    MODE is the natural mode of the field value once extracted.
1073    TMODE is the mode the caller would like the value to have;
1074    but the value may be returned with type MODE instead.
1075
1076    TOTAL_SIZE is the size in bytes of the containing structure,
1077    or -1 if varying.
1078
1079    If a TARGET is specified and we can store in it at no extra cost,
1080    we do so, and return TARGET.
1081    Otherwise, we return a REG of mode TMODE or MODE, with TMODE preferred
1082    if they are equally easy.  */
1083
1084 rtx
1085 extract_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1086                    unsigned HOST_WIDE_INT bitnum, int unsignedp, rtx target,
1087                    enum machine_mode mode, enum machine_mode tmode)
1088 {
1089   unsigned int unit
1090     = (MEM_P (str_rtx)) ? BITS_PER_UNIT : BITS_PER_WORD;
1091   unsigned HOST_WIDE_INT offset, bitpos;
1092   rtx op0 = str_rtx;
1093   rtx spec_target = target;
1094   rtx spec_target_subreg = 0;
1095   enum machine_mode int_mode;
1096   enum machine_mode extv_mode = mode_for_extraction (EP_extv, 0);
1097   enum machine_mode extzv_mode = mode_for_extraction (EP_extzv, 0);
1098   enum machine_mode mode1;
1099   int byte_offset;
1100
1101   if (tmode == VOIDmode)
1102     tmode = mode;
1103
1104   while (GET_CODE (op0) == SUBREG)
1105     {
1106       bitnum += SUBREG_BYTE (op0) * BITS_PER_UNIT;
1107       op0 = SUBREG_REG (op0);
1108     }
1109
1110   /* If we have an out-of-bounds access to a register, just return an
1111      uninitialized register of the required mode.  This can occur if the
1112      source code contains an out-of-bounds access to a small array.  */
1113   if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
1114     return gen_reg_rtx (tmode);
1115
1116   if (REG_P (op0)
1117       && mode == GET_MODE (op0)
1118       && bitnum == 0
1119       && bitsize == GET_MODE_BITSIZE (GET_MODE (op0)))
1120     {
1121       /* We're trying to extract a full register from itself.  */
1122       return op0;
1123     }
1124
1125   /* Use vec_extract patterns for extracting parts of vectors whenever
1126      available.  */
1127   if (VECTOR_MODE_P (GET_MODE (op0))
1128       && !MEM_P (op0)
1129       && (vec_extract_optab->handlers[GET_MODE (op0)].insn_code
1130           != CODE_FOR_nothing)
1131       && ((bitnum + bitsize - 1) / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
1132           == bitnum / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
1133     {
1134       enum machine_mode outermode = GET_MODE (op0);
1135       enum machine_mode innermode = GET_MODE_INNER (outermode);
1136       int icode = (int) vec_extract_optab->handlers[outermode].insn_code;
1137       unsigned HOST_WIDE_INT pos = bitnum / GET_MODE_BITSIZE (innermode);
1138       rtx rtxpos = GEN_INT (pos);
1139       rtx src = op0;
1140       rtx dest = NULL, pat, seq;
1141       enum machine_mode mode0 = insn_data[icode].operand[0].mode;
1142       enum machine_mode mode1 = insn_data[icode].operand[1].mode;
1143       enum machine_mode mode2 = insn_data[icode].operand[2].mode;
1144
1145       if (innermode == tmode || innermode == mode)
1146         dest = target;
1147
1148       if (!dest)
1149         dest = gen_reg_rtx (innermode);
1150
1151       start_sequence ();
1152
1153       if (! (*insn_data[icode].operand[0].predicate) (dest, mode0))
1154         dest = copy_to_mode_reg (mode0, dest);
1155
1156       if (! (*insn_data[icode].operand[1].predicate) (src, mode1))
1157         src = copy_to_mode_reg (mode1, src);
1158
1159       if (! (*insn_data[icode].operand[2].predicate) (rtxpos, mode2))
1160         rtxpos = copy_to_mode_reg (mode1, rtxpos);
1161
1162       /* We could handle this, but we should always be called with a pseudo
1163          for our targets and all insns should take them as outputs.  */
1164       gcc_assert ((*insn_data[icode].operand[0].predicate) (dest, mode0)
1165                   && (*insn_data[icode].operand[1].predicate) (src, mode1)
1166                   && (*insn_data[icode].operand[2].predicate) (rtxpos, mode2));
1167
1168       pat = GEN_FCN (icode) (dest, src, rtxpos);
1169       seq = get_insns ();
1170       end_sequence ();
1171       if (pat)
1172         {
1173           emit_insn (seq);
1174           emit_insn (pat);
1175           return dest;
1176         }
1177     }
1178
1179   /* Make sure we are playing with integral modes.  Pun with subregs
1180      if we aren't.  */
1181   {
1182     enum machine_mode imode = int_mode_for_mode (GET_MODE (op0));
1183     if (imode != GET_MODE (op0))
1184       {
1185         if (MEM_P (op0))
1186           op0 = adjust_address (op0, imode, 0);
1187         else
1188           {
1189             gcc_assert (imode != BLKmode);
1190             op0 = gen_lowpart (imode, op0);
1191
1192             /* If we got a SUBREG, force it into a register since we
1193                aren't going to be able to do another SUBREG on it.  */
1194             if (GET_CODE (op0) == SUBREG)
1195               op0 = force_reg (imode, op0);
1196           }
1197       }
1198   }
1199
1200   /* We may be accessing data outside the field, which means
1201      we can alias adjacent data.  */
1202   if (MEM_P (op0))
1203     {
1204       op0 = shallow_copy_rtx (op0);
1205       set_mem_alias_set (op0, 0);
1206       set_mem_expr (op0, 0);
1207     }
1208
1209   /* Extraction of a full-word or multi-word value from a structure
1210      in a register or aligned memory can be done with just a SUBREG.
1211      A subword value in the least significant part of a register
1212      can also be extracted with a SUBREG.  For this, we need the
1213      byte offset of the value in op0.  */
1214
1215   bitpos = bitnum % unit;
1216   offset = bitnum / unit;
1217   byte_offset = bitpos / BITS_PER_UNIT + offset * UNITS_PER_WORD;
1218
1219   /* If OP0 is a register, BITPOS must count within a word.
1220      But as we have it, it counts within whatever size OP0 now has.
1221      On a bigendian machine, these are not the same, so convert.  */
1222   if (BYTES_BIG_ENDIAN
1223       && !MEM_P (op0)
1224       && unit > GET_MODE_BITSIZE (GET_MODE (op0)))
1225     bitpos += unit - GET_MODE_BITSIZE (GET_MODE (op0));
1226
1227   /* ??? We currently assume TARGET is at least as big as BITSIZE.
1228      If that's wrong, the solution is to test for it and set TARGET to 0
1229      if needed.  */
1230
1231   /* Only scalar integer modes can be converted via subregs.  There is an
1232      additional problem for FP modes here in that they can have a precision
1233      which is different from the size.  mode_for_size uses precision, but
1234      we want a mode based on the size, so we must avoid calling it for FP
1235      modes.  */
1236   mode1  = (SCALAR_INT_MODE_P (tmode)
1237             ? mode_for_size (bitsize, GET_MODE_CLASS (tmode), 0)
1238             : mode);
1239
1240   if (((bitsize >= BITS_PER_WORD && bitsize == GET_MODE_BITSIZE (mode)
1241         && bitpos % BITS_PER_WORD == 0)
1242        || (mode1 != BLKmode
1243            /* ??? The big endian test here is wrong.  This is correct
1244               if the value is in a register, and if mode_for_size is not
1245               the same mode as op0.  This causes us to get unnecessarily
1246               inefficient code from the Thumb port when -mbig-endian.  */
1247            && (BYTES_BIG_ENDIAN
1248                ? bitpos + bitsize == BITS_PER_WORD
1249                : bitpos == 0)))
1250       && ((!MEM_P (op0)
1251            && TRULY_NOOP_TRUNCATION (GET_MODE_BITSIZE (mode),
1252                                      GET_MODE_BITSIZE (GET_MODE (op0)))
1253            && GET_MODE_SIZE (mode1) != 0
1254            && byte_offset % GET_MODE_SIZE (mode1) == 0)
1255           || (MEM_P (op0)
1256               && (! SLOW_UNALIGNED_ACCESS (mode, MEM_ALIGN (op0))
1257                   || (offset * BITS_PER_UNIT % bitsize == 0
1258                       && MEM_ALIGN (op0) % bitsize == 0)))))
1259     {
1260       if (mode1 != GET_MODE (op0))
1261         {
1262           if (MEM_P (op0))
1263             op0 = adjust_address (op0, mode1, offset);
1264           else
1265             {
1266               rtx sub = simplify_gen_subreg (mode1, op0, GET_MODE (op0),
1267                                              byte_offset);
1268               if (sub == NULL)
1269                 goto no_subreg_mode_swap;
1270               op0 = sub;
1271             }
1272         }
1273       if (mode1 != mode)
1274         return convert_to_mode (tmode, op0, unsignedp);
1275       return op0;
1276     }
1277  no_subreg_mode_swap:
1278
1279   /* Handle fields bigger than a word.  */
1280
1281   if (bitsize > BITS_PER_WORD)
1282     {
1283       /* Here we transfer the words of the field
1284          in the order least significant first.
1285          This is because the most significant word is the one which may
1286          be less than full.  */
1287
1288       unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
1289       unsigned int i;
1290
1291       if (target == 0 || !REG_P (target))
1292         target = gen_reg_rtx (mode);
1293
1294       /* Indicate for flow that the entire target reg is being set.  */
1295       emit_insn (gen_rtx_CLOBBER (VOIDmode, target));
1296
1297       for (i = 0; i < nwords; i++)
1298         {
1299           /* If I is 0, use the low-order word in both field and target;
1300              if I is 1, use the next to lowest word; and so on.  */
1301           /* Word number in TARGET to use.  */
1302           unsigned int wordnum
1303             = (WORDS_BIG_ENDIAN
1304                ? GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD - i - 1
1305                : i);
1306           /* Offset from start of field in OP0.  */
1307           unsigned int bit_offset = (WORDS_BIG_ENDIAN
1308                                      ? MAX (0, ((int) bitsize - ((int) i + 1)
1309                                                 * (int) BITS_PER_WORD))
1310                                      : (int) i * BITS_PER_WORD);
1311           rtx target_part = operand_subword (target, wordnum, 1, VOIDmode);
1312           rtx result_part
1313             = extract_bit_field (op0, MIN (BITS_PER_WORD,
1314                                            bitsize - i * BITS_PER_WORD),
1315                                  bitnum + bit_offset, 1, target_part, mode,
1316                                  word_mode);
1317
1318           gcc_assert (target_part);
1319
1320           if (result_part != target_part)
1321             emit_move_insn (target_part, result_part);
1322         }
1323
1324       if (unsignedp)
1325         {
1326           /* Unless we've filled TARGET, the upper regs in a multi-reg value
1327              need to be zero'd out.  */
1328           if (GET_MODE_SIZE (GET_MODE (target)) > nwords * UNITS_PER_WORD)
1329             {
1330               unsigned int i, total_words;
1331
1332               total_words = GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD;
1333               for (i = nwords; i < total_words; i++)
1334                 emit_move_insn
1335                   (operand_subword (target,
1336                                     WORDS_BIG_ENDIAN ? total_words - i - 1 : i,
1337                                     1, VOIDmode),
1338                    const0_rtx);
1339             }
1340           return target;
1341         }
1342
1343       /* Signed bit field: sign-extend with two arithmetic shifts.  */
1344       target = expand_shift (LSHIFT_EXPR, mode, target,
1345                              build_int_cst (NULL_TREE,
1346                                             GET_MODE_BITSIZE (mode) - bitsize),
1347                              NULL_RTX, 0);
1348       return expand_shift (RSHIFT_EXPR, mode, target,
1349                            build_int_cst (NULL_TREE,
1350                                           GET_MODE_BITSIZE (mode) - bitsize),
1351                            NULL_RTX, 0);
1352     }
1353
1354   /* From here on we know the desired field is smaller than a word.  */
1355
1356   /* Check if there is a correspondingly-sized integer field, so we can
1357      safely extract it as one size of integer, if necessary; then
1358      truncate or extend to the size that is wanted; then use SUBREGs or
1359      convert_to_mode to get one of the modes we really wanted.  */
1360
1361   int_mode = int_mode_for_mode (tmode);
1362   if (int_mode == BLKmode)
1363     int_mode = int_mode_for_mode (mode);
1364   /* Should probably push op0 out to memory and then do a load.  */
1365   gcc_assert (int_mode != BLKmode);
1366
1367   /* OFFSET is the number of words or bytes (UNIT says which)
1368      from STR_RTX to the first word or byte containing part of the field.  */
1369   if (!MEM_P (op0))
1370     {
1371       if (offset != 0
1372           || GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
1373         {
1374           if (!REG_P (op0))
1375             op0 = copy_to_reg (op0);
1376           op0 = gen_rtx_SUBREG (mode_for_size (BITS_PER_WORD, MODE_INT, 0),
1377                                 op0, (offset * UNITS_PER_WORD));
1378         }
1379       offset = 0;
1380     }
1381
1382   /* Now OFFSET is nonzero only for memory operands.  */
1383
1384   if (unsignedp)
1385     {
1386       if (HAVE_extzv
1387           && bitsize > 0
1388           && GET_MODE_BITSIZE (extzv_mode) >= bitsize
1389           && ! ((REG_P (op0) || GET_CODE (op0) == SUBREG)
1390                 && (bitsize + bitpos > GET_MODE_BITSIZE (extzv_mode))))
1391         {
1392           unsigned HOST_WIDE_INT xbitpos = bitpos, xoffset = offset;
1393           rtx bitsize_rtx, bitpos_rtx;
1394           rtx last = get_last_insn ();
1395           rtx xop0 = op0;
1396           rtx xtarget = target;
1397           rtx xspec_target = spec_target;
1398           rtx xspec_target_subreg = spec_target_subreg;
1399           rtx pat;
1400           enum machine_mode maxmode = mode_for_extraction (EP_extzv, 0);
1401
1402           if (MEM_P (xop0))
1403             {
1404               int save_volatile_ok = volatile_ok;
1405               volatile_ok = 1;
1406
1407               /* Is the memory operand acceptable?  */
1408               if (! ((*insn_data[(int) CODE_FOR_extzv].operand[1].predicate)
1409                      (xop0, GET_MODE (xop0))))
1410                 {
1411                   /* No, load into a reg and extract from there.  */
1412                   enum machine_mode bestmode;
1413
1414                   /* Get the mode to use for inserting into this field.  If
1415                      OP0 is BLKmode, get the smallest mode consistent with the
1416                      alignment. If OP0 is a non-BLKmode object that is no
1417                      wider than MAXMODE, use its mode. Otherwise, use the
1418                      smallest mode containing the field.  */
1419
1420                   if (GET_MODE (xop0) == BLKmode
1421                       || (GET_MODE_SIZE (GET_MODE (op0))
1422                           > GET_MODE_SIZE (maxmode)))
1423                     bestmode = get_best_mode (bitsize, bitnum,
1424                                               MEM_ALIGN (xop0), maxmode,
1425                                               MEM_VOLATILE_P (xop0));
1426                   else
1427                     bestmode = GET_MODE (xop0);
1428
1429                   if (bestmode == VOIDmode
1430                       || (SLOW_UNALIGNED_ACCESS (bestmode, MEM_ALIGN (xop0))
1431                           && GET_MODE_BITSIZE (bestmode) > MEM_ALIGN (xop0)))
1432                     goto extzv_loses;
1433
1434                   /* Compute offset as multiple of this unit,
1435                      counting in bytes.  */
1436                   unit = GET_MODE_BITSIZE (bestmode);
1437                   xoffset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
1438                   xbitpos = bitnum % unit;
1439                   xop0 = adjust_address (xop0, bestmode, xoffset);
1440
1441                   /* Make sure register is big enough for the whole field. */
1442                   if (xoffset * BITS_PER_UNIT + unit
1443                       < offset * BITS_PER_UNIT + bitsize)
1444                     goto extzv_loses;
1445
1446                   /* Fetch it to a register in that size.  */
1447                   xop0 = force_reg (bestmode, xop0);
1448
1449                   /* XBITPOS counts within UNIT, which is what is expected.  */
1450                 }
1451               else
1452                 /* Get ref to first byte containing part of the field.  */
1453                 xop0 = adjust_address (xop0, byte_mode, xoffset);
1454
1455               volatile_ok = save_volatile_ok;
1456             }
1457
1458           /* If op0 is a register, we need it in MAXMODE (which is usually
1459              SImode). to make it acceptable to the format of extzv.  */
1460           if (GET_CODE (xop0) == SUBREG && GET_MODE (xop0) != maxmode)
1461             goto extzv_loses;
1462           if (REG_P (xop0) && GET_MODE (xop0) != maxmode)
1463             xop0 = gen_rtx_SUBREG (maxmode, xop0, 0);
1464
1465           /* On big-endian machines, we count bits from the most significant.
1466              If the bit field insn does not, we must invert.  */
1467           if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1468             xbitpos = unit - bitsize - xbitpos;
1469
1470           /* Now convert from counting within UNIT to counting in MAXMODE.  */
1471           if (BITS_BIG_ENDIAN && !MEM_P (xop0))
1472             xbitpos += GET_MODE_BITSIZE (maxmode) - unit;
1473
1474           unit = GET_MODE_BITSIZE (maxmode);
1475
1476           if (xtarget == 0)
1477             xtarget = xspec_target = gen_reg_rtx (tmode);
1478
1479           if (GET_MODE (xtarget) != maxmode)
1480             {
1481               if (REG_P (xtarget))
1482                 {
1483                   int wider = (GET_MODE_SIZE (maxmode)
1484                                > GET_MODE_SIZE (GET_MODE (xtarget)));
1485                   xtarget = gen_lowpart (maxmode, xtarget);
1486                   if (wider)
1487                     xspec_target_subreg = xtarget;
1488                 }
1489               else
1490                 xtarget = gen_reg_rtx (maxmode);
1491             }
1492
1493           /* If this machine's extzv insists on a register target,
1494              make sure we have one.  */
1495           if (! ((*insn_data[(int) CODE_FOR_extzv].operand[0].predicate)
1496                  (xtarget, maxmode)))
1497             xtarget = gen_reg_rtx (maxmode);
1498
1499           bitsize_rtx = GEN_INT (bitsize);
1500           bitpos_rtx = GEN_INT (xbitpos);
1501
1502           pat = gen_extzv (xtarget, xop0, bitsize_rtx, bitpos_rtx);
1503           if (pat)
1504             {
1505               emit_insn (pat);
1506               target = xtarget;
1507               spec_target = xspec_target;
1508               spec_target_subreg = xspec_target_subreg;
1509             }
1510           else
1511             {
1512               delete_insns_since (last);
1513               target = extract_fixed_bit_field (int_mode, op0, offset, bitsize,
1514                                                 bitpos, target, 1);
1515             }
1516         }
1517       else
1518       extzv_loses:
1519         target = extract_fixed_bit_field (int_mode, op0, offset, bitsize,
1520                                           bitpos, target, 1);
1521     }
1522   else
1523     {
1524       if (HAVE_extv
1525           && bitsize > 0
1526           && GET_MODE_BITSIZE (extv_mode) >= bitsize
1527           && ! ((REG_P (op0) || GET_CODE (op0) == SUBREG)
1528                 && (bitsize + bitpos > GET_MODE_BITSIZE (extv_mode))))
1529         {
1530           int xbitpos = bitpos, xoffset = offset;
1531           rtx bitsize_rtx, bitpos_rtx;
1532           rtx last = get_last_insn ();
1533           rtx xop0 = op0, xtarget = target;
1534           rtx xspec_target = spec_target;
1535           rtx xspec_target_subreg = spec_target_subreg;
1536           rtx pat;
1537           enum machine_mode maxmode = mode_for_extraction (EP_extv, 0);
1538
1539           if (MEM_P (xop0))
1540             {
1541               /* Is the memory operand acceptable?  */
1542               if (! ((*insn_data[(int) CODE_FOR_extv].operand[1].predicate)
1543                      (xop0, GET_MODE (xop0))))
1544                 {
1545                   /* No, load into a reg and extract from there.  */
1546                   enum machine_mode bestmode;
1547
1548                   /* Get the mode to use for inserting into this field.  If
1549                      OP0 is BLKmode, get the smallest mode consistent with the
1550                      alignment. If OP0 is a non-BLKmode object that is no
1551                      wider than MAXMODE, use its mode. Otherwise, use the
1552                      smallest mode containing the field.  */
1553
1554                   if (GET_MODE (xop0) == BLKmode
1555                       || (GET_MODE_SIZE (GET_MODE (op0))
1556                           > GET_MODE_SIZE (maxmode)))
1557                     bestmode = get_best_mode (bitsize, bitnum,
1558                                               MEM_ALIGN (xop0), maxmode,
1559                                               MEM_VOLATILE_P (xop0));
1560                   else
1561                     bestmode = GET_MODE (xop0);
1562
1563                   if (bestmode == VOIDmode
1564                       || (SLOW_UNALIGNED_ACCESS (bestmode, MEM_ALIGN (xop0))
1565                           && GET_MODE_BITSIZE (bestmode) > MEM_ALIGN (xop0)))
1566                     goto extv_loses;
1567
1568                   /* Compute offset as multiple of this unit,
1569                      counting in bytes.  */
1570                   unit = GET_MODE_BITSIZE (bestmode);
1571                   xoffset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
1572                   xbitpos = bitnum % unit;
1573                   xop0 = adjust_address (xop0, bestmode, xoffset);
1574
1575                   /* Make sure register is big enough for the whole field. */
1576                   if (xoffset * BITS_PER_UNIT + unit
1577                       < offset * BITS_PER_UNIT + bitsize)
1578                     goto extv_loses;
1579
1580                   /* Fetch it to a register in that size.  */
1581                   xop0 = force_reg (bestmode, xop0);
1582
1583                   /* XBITPOS counts within UNIT, which is what is expected.  */
1584                 }
1585               else
1586                 /* Get ref to first byte containing part of the field.  */
1587                 xop0 = adjust_address (xop0, byte_mode, xoffset);
1588             }
1589
1590           /* If op0 is a register, we need it in MAXMODE (which is usually
1591              SImode) to make it acceptable to the format of extv.  */
1592           if (GET_CODE (xop0) == SUBREG && GET_MODE (xop0) != maxmode)
1593             goto extv_loses;
1594           if (REG_P (xop0) && GET_MODE (xop0) != maxmode)
1595             xop0 = gen_rtx_SUBREG (maxmode, xop0, 0);
1596
1597           /* On big-endian machines, we count bits from the most significant.
1598              If the bit field insn does not, we must invert.  */
1599           if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1600             xbitpos = unit - bitsize - xbitpos;
1601
1602           /* XBITPOS counts within a size of UNIT.
1603              Adjust to count within a size of MAXMODE.  */
1604           if (BITS_BIG_ENDIAN && !MEM_P (xop0))
1605             xbitpos += (GET_MODE_BITSIZE (maxmode) - unit);
1606
1607           unit = GET_MODE_BITSIZE (maxmode);
1608
1609           if (xtarget == 0)
1610             xtarget = xspec_target = gen_reg_rtx (tmode);
1611
1612           if (GET_MODE (xtarget) != maxmode)
1613             {
1614               if (REG_P (xtarget))
1615                 {
1616                   int wider = (GET_MODE_SIZE (maxmode)
1617                                > GET_MODE_SIZE (GET_MODE (xtarget)));
1618                   xtarget = gen_lowpart (maxmode, xtarget);
1619                   if (wider)
1620                     xspec_target_subreg = xtarget;
1621                 }
1622               else
1623                 xtarget = gen_reg_rtx (maxmode);
1624             }
1625
1626           /* If this machine's extv insists on a register target,
1627              make sure we have one.  */
1628           if (! ((*insn_data[(int) CODE_FOR_extv].operand[0].predicate)
1629                  (xtarget, maxmode)))
1630             xtarget = gen_reg_rtx (maxmode);
1631
1632           bitsize_rtx = GEN_INT (bitsize);
1633           bitpos_rtx = GEN_INT (xbitpos);
1634
1635           pat = gen_extv (xtarget, xop0, bitsize_rtx, bitpos_rtx);
1636           if (pat)
1637             {
1638               emit_insn (pat);
1639               target = xtarget;
1640               spec_target = xspec_target;
1641               spec_target_subreg = xspec_target_subreg;
1642             }
1643           else
1644             {
1645               delete_insns_since (last);
1646               target = extract_fixed_bit_field (int_mode, op0, offset, bitsize,
1647                                                 bitpos, target, 0);
1648             }
1649         }
1650       else
1651       extv_loses:
1652         target = extract_fixed_bit_field (int_mode, op0, offset, bitsize,
1653                                           bitpos, target, 0);
1654     }
1655   if (target == spec_target)
1656     return target;
1657   if (target == spec_target_subreg)
1658     return spec_target;
1659   if (GET_MODE (target) != tmode && GET_MODE (target) != mode)
1660     {
1661       /* If the target mode is not a scalar integral, first convert to the
1662          integer mode of that size and then access it as a floating-point
1663          value via a SUBREG.  */
1664       if (!SCALAR_INT_MODE_P (tmode))
1665         {
1666           enum machine_mode smode
1667             = mode_for_size (GET_MODE_BITSIZE (tmode), MODE_INT, 0);
1668           target = convert_to_mode (smode, target, unsignedp);
1669           target = force_reg (smode, target);
1670           return gen_lowpart (tmode, target);
1671         }
1672
1673       return convert_to_mode (tmode, target, unsignedp);
1674     }
1675   return target;
1676 }
1677 \f
1678 /* Extract a bit field using shifts and boolean operations
1679    Returns an rtx to represent the value.
1680    OP0 addresses a register (word) or memory (byte).
1681    BITPOS says which bit within the word or byte the bit field starts in.
1682    OFFSET says how many bytes farther the bit field starts;
1683     it is 0 if OP0 is a register.
1684    BITSIZE says how many bits long the bit field is.
1685     (If OP0 is a register, it may be narrower than a full word,
1686      but BITPOS still counts within a full word,
1687      which is significant on bigendian machines.)
1688
1689    UNSIGNEDP is nonzero for an unsigned bit field (don't sign-extend value).
1690    If TARGET is nonzero, attempts to store the value there
1691    and return TARGET, but this is not guaranteed.
1692    If TARGET is not used, create a pseudo-reg of mode TMODE for the value.  */
1693
1694 static rtx
1695 extract_fixed_bit_field (enum machine_mode tmode, rtx op0,
1696                          unsigned HOST_WIDE_INT offset,
1697                          unsigned HOST_WIDE_INT bitsize,
1698                          unsigned HOST_WIDE_INT bitpos, rtx target,
1699                          int unsignedp)
1700 {
1701   unsigned int total_bits = BITS_PER_WORD;
1702   enum machine_mode mode;
1703
1704   if (GET_CODE (op0) == SUBREG || REG_P (op0))
1705     {
1706       /* Special treatment for a bit field split across two registers.  */
1707       if (bitsize + bitpos > BITS_PER_WORD)
1708         return extract_split_bit_field (op0, bitsize, bitpos, unsignedp);
1709     }
1710   else
1711     {
1712       /* Get the proper mode to use for this field.  We want a mode that
1713          includes the entire field.  If such a mode would be larger than
1714          a word, we won't be doing the extraction the normal way.  */
1715
1716       mode = get_best_mode (bitsize, bitpos + offset * BITS_PER_UNIT,
1717                             MEM_ALIGN (op0), word_mode, MEM_VOLATILE_P (op0));
1718
1719       if (mode == VOIDmode)
1720         /* The only way this should occur is if the field spans word
1721            boundaries.  */
1722         return extract_split_bit_field (op0, bitsize,
1723                                         bitpos + offset * BITS_PER_UNIT,
1724                                         unsignedp);
1725
1726       total_bits = GET_MODE_BITSIZE (mode);
1727
1728       /* Make sure bitpos is valid for the chosen mode.  Adjust BITPOS to
1729          be in the range 0 to total_bits-1, and put any excess bytes in
1730          OFFSET.  */
1731       if (bitpos >= total_bits)
1732         {
1733           offset += (bitpos / total_bits) * (total_bits / BITS_PER_UNIT);
1734           bitpos -= ((bitpos / total_bits) * (total_bits / BITS_PER_UNIT)
1735                      * BITS_PER_UNIT);
1736         }
1737
1738       /* Get ref to an aligned byte, halfword, or word containing the field.
1739          Adjust BITPOS to be position within a word,
1740          and OFFSET to be the offset of that word.
1741          Then alter OP0 to refer to that word.  */
1742       bitpos += (offset % (total_bits / BITS_PER_UNIT)) * BITS_PER_UNIT;
1743       offset -= (offset % (total_bits / BITS_PER_UNIT));
1744       op0 = adjust_address (op0, mode, offset);
1745     }
1746
1747   mode = GET_MODE (op0);
1748
1749   if (BYTES_BIG_ENDIAN)
1750     /* BITPOS is the distance between our msb and that of OP0.
1751        Convert it to the distance from the lsb.  */
1752     bitpos = total_bits - bitsize - bitpos;
1753
1754   /* Now BITPOS is always the distance between the field's lsb and that of OP0.
1755      We have reduced the big-endian case to the little-endian case.  */
1756
1757   if (unsignedp)
1758     {
1759       if (bitpos)
1760         {
1761           /* If the field does not already start at the lsb,
1762              shift it so it does.  */
1763           tree amount = build_int_cst (NULL_TREE, bitpos);
1764           /* Maybe propagate the target for the shift.  */
1765           /* But not if we will return it--could confuse integrate.c.  */
1766           rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
1767           if (tmode != mode) subtarget = 0;
1768           op0 = expand_shift (RSHIFT_EXPR, mode, op0, amount, subtarget, 1);
1769         }
1770       /* Convert the value to the desired mode.  */
1771       if (mode != tmode)
1772         op0 = convert_to_mode (tmode, op0, 1);
1773
1774       /* Unless the msb of the field used to be the msb when we shifted,
1775          mask out the upper bits.  */
1776
1777       if (GET_MODE_BITSIZE (mode) != bitpos + bitsize)
1778         return expand_binop (GET_MODE (op0), and_optab, op0,
1779                              mask_rtx (GET_MODE (op0), 0, bitsize, 0),
1780                              target, 1, OPTAB_LIB_WIDEN);
1781       return op0;
1782     }
1783
1784   /* To extract a signed bit-field, first shift its msb to the msb of the word,
1785      then arithmetic-shift its lsb to the lsb of the word.  */
1786   op0 = force_reg (mode, op0);
1787   if (mode != tmode)
1788     target = 0;
1789
1790   /* Find the narrowest integer mode that contains the field.  */
1791
1792   for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT); mode != VOIDmode;
1793        mode = GET_MODE_WIDER_MODE (mode))
1794     if (GET_MODE_BITSIZE (mode) >= bitsize + bitpos)
1795       {
1796         op0 = convert_to_mode (mode, op0, 0);
1797         break;
1798       }
1799
1800   if (GET_MODE_BITSIZE (mode) != (bitsize + bitpos))
1801     {
1802       tree amount
1803         = build_int_cst (NULL_TREE,
1804                          GET_MODE_BITSIZE (mode) - (bitsize + bitpos));
1805       /* Maybe propagate the target for the shift.  */
1806       rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
1807       op0 = expand_shift (LSHIFT_EXPR, mode, op0, amount, subtarget, 1);
1808     }
1809
1810   return expand_shift (RSHIFT_EXPR, mode, op0,
1811                        build_int_cst (NULL_TREE,
1812                                       GET_MODE_BITSIZE (mode) - bitsize),
1813                        target, 0);
1814 }
1815 \f
1816 /* Return a constant integer (CONST_INT or CONST_DOUBLE) mask value
1817    of mode MODE with BITSIZE ones followed by BITPOS zeros, or the
1818    complement of that if COMPLEMENT.  The mask is truncated if
1819    necessary to the width of mode MODE.  The mask is zero-extended if
1820    BITSIZE+BITPOS is too small for MODE.  */
1821
1822 static rtx
1823 mask_rtx (enum machine_mode mode, int bitpos, int bitsize, int complement)
1824 {
1825   HOST_WIDE_INT masklow, maskhigh;
1826
1827   if (bitsize == 0)
1828     masklow = 0;
1829   else if (bitpos < HOST_BITS_PER_WIDE_INT)
1830     masklow = (HOST_WIDE_INT) -1 << bitpos;
1831   else
1832     masklow = 0;
1833
1834   if (bitpos + bitsize < HOST_BITS_PER_WIDE_INT)
1835     masklow &= ((unsigned HOST_WIDE_INT) -1
1836                 >> (HOST_BITS_PER_WIDE_INT - bitpos - bitsize));
1837
1838   if (bitpos <= HOST_BITS_PER_WIDE_INT)
1839     maskhigh = -1;
1840   else
1841     maskhigh = (HOST_WIDE_INT) -1 << (bitpos - HOST_BITS_PER_WIDE_INT);
1842
1843   if (bitsize == 0)
1844     maskhigh = 0;
1845   else if (bitpos + bitsize > HOST_BITS_PER_WIDE_INT)
1846     maskhigh &= ((unsigned HOST_WIDE_INT) -1
1847                  >> (2 * HOST_BITS_PER_WIDE_INT - bitpos - bitsize));
1848   else
1849     maskhigh = 0;
1850
1851   if (complement)
1852     {
1853       maskhigh = ~maskhigh;
1854       masklow = ~masklow;
1855     }
1856
1857   return immed_double_const (masklow, maskhigh, mode);
1858 }
1859
1860 /* Return a constant integer (CONST_INT or CONST_DOUBLE) rtx with the value
1861    VALUE truncated to BITSIZE bits and then shifted left BITPOS bits.  */
1862
1863 static rtx
1864 lshift_value (enum machine_mode mode, rtx value, int bitpos, int bitsize)
1865 {
1866   unsigned HOST_WIDE_INT v = INTVAL (value);
1867   HOST_WIDE_INT low, high;
1868
1869   if (bitsize < HOST_BITS_PER_WIDE_INT)
1870     v &= ~((HOST_WIDE_INT) -1 << bitsize);
1871
1872   if (bitpos < HOST_BITS_PER_WIDE_INT)
1873     {
1874       low = v << bitpos;
1875       high = (bitpos > 0 ? (v >> (HOST_BITS_PER_WIDE_INT - bitpos)) : 0);
1876     }
1877   else
1878     {
1879       low = 0;
1880       high = v << (bitpos - HOST_BITS_PER_WIDE_INT);
1881     }
1882
1883   return immed_double_const (low, high, mode);
1884 }
1885 \f
1886 /* Extract a bit field from a memory by forcing the alignment of the
1887    memory.  This efficient only if the field spans at least 4 boundaries.
1888
1889    OP0 is the MEM.
1890    BITSIZE is the field width; BITPOS is the position of the first bit.
1891    UNSIGNEDP is true if the result should be zero-extended.  */
1892
1893 static rtx
1894 extract_force_align_mem_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1895                                    unsigned HOST_WIDE_INT bitpos,
1896                                    int unsignedp)
1897 {
1898   enum machine_mode mode, dmode;
1899   unsigned int m_bitsize, m_size;
1900   unsigned int sign_shift_up, sign_shift_dn;
1901   rtx base, a1, a2, v1, v2, comb, shift, result, start;
1902
1903   /* Choose a mode that will fit BITSIZE.  */
1904   mode = smallest_mode_for_size (bitsize, MODE_INT);
1905   m_size = GET_MODE_SIZE (mode);
1906   m_bitsize = GET_MODE_BITSIZE (mode);
1907
1908   /* Choose a mode twice as wide.  Fail if no such mode exists.  */
1909   dmode = mode_for_size (m_bitsize * 2, MODE_INT, false);
1910   if (dmode == BLKmode)
1911     return NULL;
1912
1913   do_pending_stack_adjust ();
1914   start = get_last_insn ();
1915
1916   /* At the end, we'll need an additional shift to deal with sign/zero
1917      extension.  By default this will be a left+right shift of the
1918      appropriate size.  But we may be able to eliminate one of them.  */
1919   sign_shift_up = sign_shift_dn = m_bitsize - bitsize;
1920
1921   if (STRICT_ALIGNMENT)
1922     {
1923       base = plus_constant (XEXP (op0, 0), bitpos / BITS_PER_UNIT);
1924       bitpos %= BITS_PER_UNIT;
1925
1926       /* We load two values to be concatenate.  There's an edge condition
1927          that bears notice -- an aligned value at the end of a page can
1928          only load one value lest we segfault.  So the two values we load
1929          are at "base & -size" and "(base + size - 1) & -size".  If base
1930          is unaligned, the addresses will be aligned and sequential; if
1931          base is aligned, the addresses will both be equal to base.  */
1932
1933       a1 = expand_simple_binop (Pmode, AND, force_operand (base, NULL),
1934                                 GEN_INT (-(HOST_WIDE_INT)m_size),
1935                                 NULL, true, OPTAB_LIB_WIDEN);
1936       mark_reg_pointer (a1, m_bitsize);
1937       v1 = gen_rtx_MEM (mode, a1);
1938       set_mem_align (v1, m_bitsize);
1939       v1 = force_reg (mode, validize_mem (v1));
1940
1941       a2 = plus_constant (base, GET_MODE_SIZE (mode) - 1);
1942       a2 = expand_simple_binop (Pmode, AND, force_operand (a2, NULL),
1943                                 GEN_INT (-(HOST_WIDE_INT)m_size),
1944                                 NULL, true, OPTAB_LIB_WIDEN);
1945       v2 = gen_rtx_MEM (mode, a2);
1946       set_mem_align (v2, m_bitsize);
1947       v2 = force_reg (mode, validize_mem (v2));
1948
1949       /* Combine these two values into a double-word value.  */
1950       if (m_bitsize == BITS_PER_WORD)
1951         {
1952           comb = gen_reg_rtx (dmode);
1953           emit_insn (gen_rtx_CLOBBER (VOIDmode, comb));
1954           emit_move_insn (gen_rtx_SUBREG (mode, comb, 0), v1);
1955           emit_move_insn (gen_rtx_SUBREG (mode, comb, m_size), v2);
1956         }
1957       else
1958         {
1959           if (BYTES_BIG_ENDIAN)
1960             comb = v1, v1 = v2, v2 = comb;
1961           v1 = convert_modes (dmode, mode, v1, true);
1962           if (v1 == NULL)
1963             goto fail;
1964           v2 = convert_modes (dmode, mode, v2, true);
1965           v2 = expand_simple_binop (dmode, ASHIFT, v2, GEN_INT (m_bitsize),
1966                                     NULL, true, OPTAB_LIB_WIDEN);
1967           if (v2 == NULL)
1968             goto fail;
1969           comb = expand_simple_binop (dmode, IOR, v1, v2, NULL,
1970                                       true, OPTAB_LIB_WIDEN);
1971           if (comb == NULL)
1972             goto fail;
1973         }
1974
1975       shift = expand_simple_binop (Pmode, AND, base, GEN_INT (m_size - 1),
1976                                    NULL, true, OPTAB_LIB_WIDEN);
1977       shift = expand_mult (Pmode, shift, GEN_INT (BITS_PER_UNIT), NULL, 1);
1978
1979       if (bitpos != 0)
1980         {
1981           if (sign_shift_up <= bitpos)
1982             bitpos -= sign_shift_up, sign_shift_up = 0;
1983           shift = expand_simple_binop (Pmode, PLUS, shift, GEN_INT (bitpos),
1984                                        NULL, true, OPTAB_LIB_WIDEN);
1985         }
1986     }
1987   else
1988     {
1989       unsigned HOST_WIDE_INT offset = bitpos / BITS_PER_UNIT;
1990       bitpos %= BITS_PER_UNIT;
1991
1992       /* When strict alignment is not required, we can just load directly
1993          from memory without masking.  If the remaining BITPOS offset is
1994          small enough, we may be able to do all operations in MODE as
1995          opposed to DMODE.  */
1996       if (bitpos + bitsize <= m_bitsize)
1997         dmode = mode;
1998       comb = adjust_address (op0, dmode, offset);
1999
2000       if (sign_shift_up <= bitpos)
2001         bitpos -= sign_shift_up, sign_shift_up = 0;
2002       shift = GEN_INT (bitpos);
2003     }
2004
2005   /* Shift down the double-word such that the requested value is at bit 0.  */
2006   if (shift != const0_rtx)
2007     comb = expand_simple_binop (dmode, unsignedp ? LSHIFTRT : ASHIFTRT,
2008                                 comb, shift, NULL, unsignedp, OPTAB_LIB_WIDEN);
2009   if (comb == NULL)
2010     goto fail;
2011
2012   /* If the field exactly matches MODE, then all we need to do is return the
2013      lowpart.  Otherwise, shift to get the sign bits set properly.  */
2014   result = force_reg (mode, gen_lowpart (mode, comb));
2015
2016   if (sign_shift_up)
2017     result = expand_simple_binop (mode, ASHIFT, result,
2018                                   GEN_INT (sign_shift_up),
2019                                   NULL_RTX, 0, OPTAB_LIB_WIDEN);
2020   if (sign_shift_dn)
2021     result = expand_simple_binop (mode, unsignedp ? LSHIFTRT : ASHIFTRT,
2022                                   result, GEN_INT (sign_shift_dn),
2023                                   NULL_RTX, 0, OPTAB_LIB_WIDEN);
2024
2025   return result;
2026
2027  fail:
2028   delete_insns_since (start);
2029   return NULL;
2030 }
2031
2032 /* Extract a bit field that is split across two words
2033    and return an RTX for the result.
2034
2035    OP0 is the REG, SUBREG or MEM rtx for the first of the two words.
2036    BITSIZE is the field width; BITPOS, position of its first bit, in the word.
2037    UNSIGNEDP is 1 if should zero-extend the contents; else sign-extend.  */
2038
2039 static rtx
2040 extract_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
2041                          unsigned HOST_WIDE_INT bitpos, int unsignedp)
2042 {
2043   unsigned int unit;
2044   unsigned int bitsdone = 0;
2045   rtx result = NULL_RTX;
2046   int first = 1;
2047
2048   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
2049      much at a time.  */
2050   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
2051     unit = BITS_PER_WORD;
2052   else
2053     {
2054       unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
2055       if (0 && bitsize / unit > 2)
2056         {
2057           rtx tmp = extract_force_align_mem_bit_field (op0, bitsize, bitpos,
2058                                                        unsignedp);
2059           if (tmp)
2060             return tmp;
2061         }
2062     }
2063
2064   while (bitsdone < bitsize)
2065     {
2066       unsigned HOST_WIDE_INT thissize;
2067       rtx part, word;
2068       unsigned HOST_WIDE_INT thispos;
2069       unsigned HOST_WIDE_INT offset;
2070
2071       offset = (bitpos + bitsdone) / unit;
2072       thispos = (bitpos + bitsdone) % unit;
2073
2074       /* THISSIZE must not overrun a word boundary.  Otherwise,
2075          extract_fixed_bit_field will call us again, and we will mutually
2076          recurse forever.  */
2077       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
2078       thissize = MIN (thissize, unit - thispos);
2079
2080       /* If OP0 is a register, then handle OFFSET here.
2081
2082          When handling multiword bitfields, extract_bit_field may pass
2083          down a word_mode SUBREG of a larger REG for a bitfield that actually
2084          crosses a word boundary.  Thus, for a SUBREG, we must find
2085          the current word starting from the base register.  */
2086       if (GET_CODE (op0) == SUBREG)
2087         {
2088           int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset;
2089           word = operand_subword_force (SUBREG_REG (op0), word_offset,
2090                                         GET_MODE (SUBREG_REG (op0)));
2091           offset = 0;
2092         }
2093       else if (REG_P (op0))
2094         {
2095           word = operand_subword_force (op0, offset, GET_MODE (op0));
2096           offset = 0;
2097         }
2098       else
2099         word = op0;
2100
2101       /* Extract the parts in bit-counting order,
2102          whose meaning is determined by BYTES_PER_UNIT.
2103          OFFSET is in UNITs, and UNIT is in bits.
2104          extract_fixed_bit_field wants offset in bytes.  */
2105       part = extract_fixed_bit_field (word_mode, word,
2106                                       offset * unit / BITS_PER_UNIT,
2107                                       thissize, thispos, 0, 1);
2108       bitsdone += thissize;
2109
2110       /* Shift this part into place for the result.  */
2111       if (BYTES_BIG_ENDIAN)
2112         {
2113           if (bitsize != bitsdone)
2114             part = expand_shift (LSHIFT_EXPR, word_mode, part,
2115                                  build_int_cst (NULL_TREE, bitsize - bitsdone),
2116                                  0, 1);
2117         }
2118       else
2119         {
2120           if (bitsdone != thissize)
2121             part = expand_shift (LSHIFT_EXPR, word_mode, part,
2122                                  build_int_cst (NULL_TREE,
2123                                                 bitsdone - thissize), 0, 1);
2124         }
2125
2126       if (first)
2127         result = part;
2128       else
2129         /* Combine the parts with bitwise or.  This works
2130            because we extracted each part as an unsigned bit field.  */
2131         result = expand_binop (word_mode, ior_optab, part, result, NULL_RTX, 1,
2132                                OPTAB_LIB_WIDEN);
2133
2134       first = 0;
2135     }
2136
2137   /* Unsigned bit field: we are done.  */
2138   if (unsignedp)
2139     return result;
2140   /* Signed bit field: sign-extend with two arithmetic shifts.  */
2141   result = expand_shift (LSHIFT_EXPR, word_mode, result,
2142                          build_int_cst (NULL_TREE, BITS_PER_WORD - bitsize),
2143                          NULL_RTX, 0);
2144   return expand_shift (RSHIFT_EXPR, word_mode, result,
2145                        build_int_cst (NULL_TREE, BITS_PER_WORD - bitsize),
2146                        NULL_RTX, 0);
2147 }
2148 \f
2149 /* Add INC into TARGET.  */
2150
2151 void
2152 expand_inc (rtx target, rtx inc)
2153 {
2154   rtx value = expand_binop (GET_MODE (target), add_optab,
2155                             target, inc,
2156                             target, 0, OPTAB_LIB_WIDEN);
2157   if (value != target)
2158     emit_move_insn (target, value);
2159 }
2160
2161 /* Subtract DEC from TARGET.  */
2162
2163 void
2164 expand_dec (rtx target, rtx dec)
2165 {
2166   rtx value = expand_binop (GET_MODE (target), sub_optab,
2167                             target, dec,
2168                             target, 0, OPTAB_LIB_WIDEN);
2169   if (value != target)
2170     emit_move_insn (target, value);
2171 }
2172 \f
2173 /* Output a shift instruction for expression code CODE,
2174    with SHIFTED being the rtx for the value to shift,
2175    and AMOUNT the tree for the amount to shift by.
2176    Store the result in the rtx TARGET, if that is convenient.
2177    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2178    Return the rtx for where the value is.  */
2179
2180 rtx
2181 expand_shift (enum tree_code code, enum machine_mode mode, rtx shifted,
2182               tree amount, rtx target, int unsignedp)
2183 {
2184   rtx op1, temp = 0;
2185   int left = (code == LSHIFT_EXPR || code == LROTATE_EXPR);
2186   int rotate = (code == LROTATE_EXPR || code == RROTATE_EXPR);
2187   int try;
2188
2189   /* Previously detected shift-counts computed by NEGATE_EXPR
2190      and shifted in the other direction; but that does not work
2191      on all machines.  */
2192
2193   op1 = expand_normal (amount);
2194
2195   if (SHIFT_COUNT_TRUNCATED)
2196     {
2197       if (GET_CODE (op1) == CONST_INT
2198           && ((unsigned HOST_WIDE_INT) INTVAL (op1) >=
2199               (unsigned HOST_WIDE_INT) GET_MODE_BITSIZE (mode)))
2200         op1 = GEN_INT ((unsigned HOST_WIDE_INT) INTVAL (op1)
2201                        % GET_MODE_BITSIZE (mode));
2202       else if (GET_CODE (op1) == SUBREG
2203                && subreg_lowpart_p (op1))
2204         op1 = SUBREG_REG (op1);
2205     }
2206
2207   if (op1 == const0_rtx)
2208     return shifted;
2209
2210   /* Check whether its cheaper to implement a left shift by a constant
2211      bit count by a sequence of additions.  */
2212   if (code == LSHIFT_EXPR
2213       && GET_CODE (op1) == CONST_INT
2214       && INTVAL (op1) > 0
2215       && INTVAL (op1) < GET_MODE_BITSIZE (mode)
2216       && shift_cost[mode][INTVAL (op1)] > INTVAL (op1) * add_cost[mode])
2217     {
2218       int i;
2219       for (i = 0; i < INTVAL (op1); i++)
2220         {
2221           temp = force_reg (mode, shifted);
2222           shifted = expand_binop (mode, add_optab, temp, temp, NULL_RTX,
2223                                   unsignedp, OPTAB_LIB_WIDEN);
2224         }
2225       return shifted;
2226     }
2227
2228   for (try = 0; temp == 0 && try < 3; try++)
2229     {
2230       enum optab_methods methods;
2231
2232       if (try == 0)
2233         methods = OPTAB_DIRECT;
2234       else if (try == 1)
2235         methods = OPTAB_WIDEN;
2236       else
2237         methods = OPTAB_LIB_WIDEN;
2238
2239       if (rotate)
2240         {
2241           /* Widening does not work for rotation.  */
2242           if (methods == OPTAB_WIDEN)
2243             continue;
2244           else if (methods == OPTAB_LIB_WIDEN)
2245             {
2246               /* If we have been unable to open-code this by a rotation,
2247                  do it as the IOR of two shifts.  I.e., to rotate A
2248                  by N bits, compute (A << N) | ((unsigned) A >> (C - N))
2249                  where C is the bitsize of A.
2250
2251                  It is theoretically possible that the target machine might
2252                  not be able to perform either shift and hence we would
2253                  be making two libcalls rather than just the one for the
2254                  shift (similarly if IOR could not be done).  We will allow
2255                  this extremely unlikely lossage to avoid complicating the
2256                  code below.  */
2257
2258               rtx subtarget = target == shifted ? 0 : target;
2259               rtx temp1;
2260               tree type = TREE_TYPE (amount);
2261               tree new_amount = make_tree (type, op1);
2262               tree other_amount
2263                 = fold_build2 (MINUS_EXPR, type,
2264                                build_int_cst (type, GET_MODE_BITSIZE (mode)),
2265                                amount);
2266
2267               shifted = force_reg (mode, shifted);
2268
2269               temp = expand_shift (left ? LSHIFT_EXPR : RSHIFT_EXPR,
2270                                    mode, shifted, new_amount, 0, 1);
2271               temp1 = expand_shift (left ? RSHIFT_EXPR : LSHIFT_EXPR,
2272                                     mode, shifted, other_amount, subtarget, 1);
2273               return expand_binop (mode, ior_optab, temp, temp1, target,
2274                                    unsignedp, methods);
2275             }
2276
2277           temp = expand_binop (mode,
2278                                left ? rotl_optab : rotr_optab,
2279                                shifted, op1, target, unsignedp, methods);
2280         }
2281       else if (unsignedp)
2282         temp = expand_binop (mode,
2283                              left ? ashl_optab : lshr_optab,
2284                              shifted, op1, target, unsignedp, methods);
2285
2286       /* Do arithmetic shifts.
2287          Also, if we are going to widen the operand, we can just as well
2288          use an arithmetic right-shift instead of a logical one.  */
2289       if (temp == 0 && ! rotate
2290           && (! unsignedp || (! left && methods == OPTAB_WIDEN)))
2291         {
2292           enum optab_methods methods1 = methods;
2293
2294           /* If trying to widen a log shift to an arithmetic shift,
2295              don't accept an arithmetic shift of the same size.  */
2296           if (unsignedp)
2297             methods1 = OPTAB_MUST_WIDEN;
2298
2299           /* Arithmetic shift */
2300
2301           temp = expand_binop (mode,
2302                                left ? ashl_optab : ashr_optab,
2303                                shifted, op1, target, unsignedp, methods1);
2304         }
2305
2306       /* We used to try extzv here for logical right shifts, but that was
2307          only useful for one machine, the VAX, and caused poor code
2308          generation there for lshrdi3, so the code was deleted and a
2309          define_expand for lshrsi3 was added to vax.md.  */
2310     }
2311
2312   gcc_assert (temp);
2313   return temp;
2314 }
2315 \f
2316 enum alg_code {
2317   alg_unknown,
2318   alg_zero,
2319   alg_m, alg_shift,
2320   alg_add_t_m2,
2321   alg_sub_t_m2,
2322   alg_add_factor,
2323   alg_sub_factor,
2324   alg_add_t2_m,
2325   alg_sub_t2_m,
2326   alg_impossible
2327 };
2328
2329 /* This structure holds the "cost" of a multiply sequence.  The
2330    "cost" field holds the total rtx_cost of every operator in the
2331    synthetic multiplication sequence, hence cost(a op b) is defined
2332    as rtx_cost(op) + cost(a) + cost(b), where cost(leaf) is zero.
2333    The "latency" field holds the minimum possible latency of the
2334    synthetic multiply, on a hypothetical infinitely parallel CPU.
2335    This is the critical path, or the maximum height, of the expression
2336    tree which is the sum of rtx_costs on the most expensive path from
2337    any leaf to the root.  Hence latency(a op b) is defined as zero for
2338    leaves and rtx_cost(op) + max(latency(a), latency(b)) otherwise.  */
2339
2340 struct mult_cost {
2341   short cost;     /* Total rtx_cost of the multiplication sequence.  */
2342   short latency;  /* The latency of the multiplication sequence.  */
2343 };
2344
2345 /* This macro is used to compare a pointer to a mult_cost against an
2346    single integer "rtx_cost" value.  This is equivalent to the macro
2347    CHEAPER_MULT_COST(X,Z) where Z = {Y,Y}.  */
2348 #define MULT_COST_LESS(X,Y) ((X)->cost < (Y)    \
2349                              || ((X)->cost == (Y) && (X)->latency < (Y)))
2350
2351 /* This macro is used to compare two pointers to mult_costs against
2352    each other.  The macro returns true if X is cheaper than Y.
2353    Currently, the cheaper of two mult_costs is the one with the
2354    lower "cost".  If "cost"s are tied, the lower latency is cheaper.  */
2355 #define CHEAPER_MULT_COST(X,Y)  ((X)->cost < (Y)->cost          \
2356                                  || ((X)->cost == (Y)->cost     \
2357                                      && (X)->latency < (Y)->latency))
2358
2359 /* This structure records a sequence of operations.
2360    `ops' is the number of operations recorded.
2361    `cost' is their total cost.
2362    The operations are stored in `op' and the corresponding
2363    logarithms of the integer coefficients in `log'.
2364
2365    These are the operations:
2366    alg_zero             total := 0;
2367    alg_m                total := multiplicand;
2368    alg_shift            total := total * coeff
2369    alg_add_t_m2         total := total + multiplicand * coeff;
2370    alg_sub_t_m2         total := total - multiplicand * coeff;
2371    alg_add_factor       total := total * coeff + total;
2372    alg_sub_factor       total := total * coeff - total;
2373    alg_add_t2_m         total := total * coeff + multiplicand;
2374    alg_sub_t2_m         total := total * coeff - multiplicand;
2375
2376    The first operand must be either alg_zero or alg_m.  */
2377
2378 struct algorithm
2379 {
2380   struct mult_cost cost;
2381   short ops;
2382   /* The size of the OP and LOG fields are not directly related to the
2383      word size, but the worst-case algorithms will be if we have few
2384      consecutive ones or zeros, i.e., a multiplicand like 10101010101...
2385      In that case we will generate shift-by-2, add, shift-by-2, add,...,
2386      in total wordsize operations.  */
2387   enum alg_code op[MAX_BITS_PER_WORD];
2388   char log[MAX_BITS_PER_WORD];
2389 };
2390
2391 /* The entry for our multiplication cache/hash table.  */
2392 struct alg_hash_entry {
2393   /* The number we are multiplying by.  */
2394   unsigned int t;
2395
2396   /* The mode in which we are multiplying something by T.  */
2397   enum machine_mode mode;
2398
2399   /* The best multiplication algorithm for t.  */
2400   enum alg_code alg;
2401
2402   /* The cost of multiplication if ALG_CODE is not alg_impossible.
2403      Otherwise, the cost within which multiplication by T is
2404      impossible.  */
2405   struct mult_cost cost;
2406 };
2407
2408 /* The number of cache/hash entries.  */
2409 #define NUM_ALG_HASH_ENTRIES 307
2410
2411 /* Each entry of ALG_HASH caches alg_code for some integer.  This is
2412    actually a hash table.  If we have a collision, that the older
2413    entry is kicked out.  */
2414 static struct alg_hash_entry alg_hash[NUM_ALG_HASH_ENTRIES];
2415
2416 /* Indicates the type of fixup needed after a constant multiplication.
2417    BASIC_VARIANT means no fixup is needed, NEGATE_VARIANT means that
2418    the result should be negated, and ADD_VARIANT means that the
2419    multiplicand should be added to the result.  */
2420 enum mult_variant {basic_variant, negate_variant, add_variant};
2421
2422 static void synth_mult (struct algorithm *, unsigned HOST_WIDE_INT,
2423                         const struct mult_cost *, enum machine_mode mode);
2424 static bool choose_mult_variant (enum machine_mode, HOST_WIDE_INT,
2425                                  struct algorithm *, enum mult_variant *, int);
2426 static rtx expand_mult_const (enum machine_mode, rtx, HOST_WIDE_INT, rtx,
2427                               const struct algorithm *, enum mult_variant);
2428 static unsigned HOST_WIDE_INT choose_multiplier (unsigned HOST_WIDE_INT, int,
2429                                                  int, rtx *, int *, int *);
2430 static unsigned HOST_WIDE_INT invert_mod2n (unsigned HOST_WIDE_INT, int);
2431 static rtx extract_high_half (enum machine_mode, rtx);
2432 static rtx expand_mult_highpart (enum machine_mode, rtx, rtx, rtx, int, int);
2433 static rtx expand_mult_highpart_optab (enum machine_mode, rtx, rtx, rtx,
2434                                        int, int);
2435 /* Compute and return the best algorithm for multiplying by T.
2436    The algorithm must cost less than cost_limit
2437    If retval.cost >= COST_LIMIT, no algorithm was found and all
2438    other field of the returned struct are undefined.
2439    MODE is the machine mode of the multiplication.  */
2440
2441 static void
2442 synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
2443             const struct mult_cost *cost_limit, enum machine_mode mode)
2444 {
2445   int m;
2446   struct algorithm *alg_in, *best_alg;
2447   struct mult_cost best_cost;
2448   struct mult_cost new_limit;
2449   int op_cost, op_latency;
2450   unsigned HOST_WIDE_INT q;
2451   int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode));
2452   int hash_index;
2453   bool cache_hit = false;
2454   enum alg_code cache_alg = alg_zero;
2455
2456   /* Indicate that no algorithm is yet found.  If no algorithm
2457      is found, this value will be returned and indicate failure.  */
2458   alg_out->cost.cost = cost_limit->cost + 1;
2459   alg_out->cost.latency = cost_limit->latency + 1;
2460
2461   if (cost_limit->cost < 0
2462       || (cost_limit->cost == 0 && cost_limit->latency <= 0))
2463     return;
2464
2465   /* Restrict the bits of "t" to the multiplication's mode.  */
2466   t &= GET_MODE_MASK (mode);
2467
2468   /* t == 1 can be done in zero cost.  */
2469   if (t == 1)
2470     {
2471       alg_out->ops = 1;
2472       alg_out->cost.cost = 0;
2473       alg_out->cost.latency = 0;
2474       alg_out->op[0] = alg_m;
2475       return;
2476     }
2477
2478   /* t == 0 sometimes has a cost.  If it does and it exceeds our limit,
2479      fail now.  */
2480   if (t == 0)
2481     {
2482       if (MULT_COST_LESS (cost_limit, zero_cost))
2483         return;
2484       else
2485         {
2486           alg_out->ops = 1;
2487           alg_out->cost.cost = zero_cost;
2488           alg_out->cost.latency = zero_cost;
2489           alg_out->op[0] = alg_zero;
2490           return;
2491         }
2492     }
2493
2494   /* We'll be needing a couple extra algorithm structures now.  */
2495
2496   alg_in = alloca (sizeof (struct algorithm));
2497   best_alg = alloca (sizeof (struct algorithm));
2498   best_cost = *cost_limit;
2499
2500   /* Compute the hash index.  */
2501   hash_index = (t ^ (unsigned int) mode) % NUM_ALG_HASH_ENTRIES;
2502
2503   /* See if we already know what to do for T.  */
2504   if (alg_hash[hash_index].t == t
2505       && alg_hash[hash_index].mode == mode
2506       && alg_hash[hash_index].alg != alg_unknown)
2507     {
2508       cache_alg = alg_hash[hash_index].alg;
2509
2510       if (cache_alg == alg_impossible)
2511         {
2512           /* The cache tells us that it's impossible to synthesize
2513              multiplication by T within alg_hash[hash_index].cost.  */
2514           if (!CHEAPER_MULT_COST (&alg_hash[hash_index].cost, cost_limit))
2515             /* COST_LIMIT is at least as restrictive as the one
2516                recorded in the hash table, in which case we have no
2517                hope of synthesizing a multiplication.  Just
2518                return.  */
2519             return;
2520
2521           /* If we get here, COST_LIMIT is less restrictive than the
2522              one recorded in the hash table, so we may be able to
2523              synthesize a multiplication.  Proceed as if we didn't
2524              have the cache entry.  */
2525         }
2526       else
2527         {
2528           if (CHEAPER_MULT_COST (cost_limit, &alg_hash[hash_index].cost))
2529             /* The cached algorithm shows that this multiplication
2530                requires more cost than COST_LIMIT.  Just return.  This
2531                way, we don't clobber this cache entry with
2532                alg_impossible but retain useful information.  */
2533             return;
2534
2535           cache_hit = true;
2536
2537           switch (cache_alg)
2538             {
2539             case alg_shift:
2540               goto do_alg_shift;
2541
2542             case alg_add_t_m2:
2543             case alg_sub_t_m2:
2544               goto do_alg_addsub_t_m2;
2545
2546             case alg_add_factor:
2547             case alg_sub_factor:
2548               goto do_alg_addsub_factor;
2549
2550             case alg_add_t2_m:
2551               goto do_alg_add_t2_m;
2552
2553             case alg_sub_t2_m:
2554               goto do_alg_sub_t2_m;
2555
2556             default:
2557               gcc_unreachable ();
2558             }
2559         }
2560     }
2561
2562   /* If we have a group of zero bits at the low-order part of T, try
2563      multiplying by the remaining bits and then doing a shift.  */
2564
2565   if ((t & 1) == 0)
2566     {
2567     do_alg_shift:
2568       m = floor_log2 (t & -t);  /* m = number of low zero bits */
2569       if (m < maxm)
2570         {
2571           q = t >> m;
2572           /* The function expand_shift will choose between a shift and
2573              a sequence of additions, so the observed cost is given as
2574              MIN (m * add_cost[mode], shift_cost[mode][m]).  */
2575           op_cost = m * add_cost[mode];
2576           if (shift_cost[mode][m] < op_cost)
2577             op_cost = shift_cost[mode][m];
2578           new_limit.cost = best_cost.cost - op_cost;
2579           new_limit.latency = best_cost.latency - op_cost;
2580           synth_mult (alg_in, q, &new_limit, mode);
2581
2582           alg_in->cost.cost += op_cost;
2583           alg_in->cost.latency += op_cost;
2584           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2585             {
2586               struct algorithm *x;
2587               best_cost = alg_in->cost;
2588               x = alg_in, alg_in = best_alg, best_alg = x;
2589               best_alg->log[best_alg->ops] = m;
2590               best_alg->op[best_alg->ops] = alg_shift;
2591             }
2592         }
2593       if (cache_hit)
2594         goto done;
2595     }
2596
2597   /* If we have an odd number, add or subtract one.  */
2598   if ((t & 1) != 0)
2599     {
2600       unsigned HOST_WIDE_INT w;
2601
2602     do_alg_addsub_t_m2:
2603       for (w = 1; (w & t) != 0; w <<= 1)
2604         ;
2605       /* If T was -1, then W will be zero after the loop.  This is another
2606          case where T ends with ...111.  Handling this with (T + 1) and
2607          subtract 1 produces slightly better code and results in algorithm
2608          selection much faster than treating it like the ...0111 case
2609          below.  */
2610       if (w == 0
2611           || (w > 2
2612               /* Reject the case where t is 3.
2613                  Thus we prefer addition in that case.  */
2614               && t != 3))
2615         {
2616           /* T ends with ...111.  Multiply by (T + 1) and subtract 1.  */
2617
2618           op_cost = add_cost[mode];
2619           new_limit.cost = best_cost.cost - op_cost;
2620           new_limit.latency = best_cost.latency - op_cost;
2621           synth_mult (alg_in, t + 1, &new_limit, mode);
2622
2623           alg_in->cost.cost += op_cost;
2624           alg_in->cost.latency += op_cost;
2625           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2626             {
2627               struct algorithm *x;
2628               best_cost = alg_in->cost;
2629               x = alg_in, alg_in = best_alg, best_alg = x;
2630               best_alg->log[best_alg->ops] = 0;
2631               best_alg->op[best_alg->ops] = alg_sub_t_m2;
2632             }
2633         }
2634       else
2635         {
2636           /* T ends with ...01 or ...011.  Multiply by (T - 1) and add 1.  */
2637
2638           op_cost = add_cost[mode];
2639           new_limit.cost = best_cost.cost - op_cost;
2640           new_limit.latency = best_cost.latency - op_cost;
2641           synth_mult (alg_in, t - 1, &new_limit, mode);
2642
2643           alg_in->cost.cost += op_cost;
2644           alg_in->cost.latency += op_cost;
2645           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2646             {
2647               struct algorithm *x;
2648               best_cost = alg_in->cost;
2649               x = alg_in, alg_in = best_alg, best_alg = x;
2650               best_alg->log[best_alg->ops] = 0;
2651               best_alg->op[best_alg->ops] = alg_add_t_m2;
2652             }
2653         }
2654       if (cache_hit)
2655         goto done;
2656     }
2657
2658   /* Look for factors of t of the form
2659      t = q(2**m +- 1), 2 <= m <= floor(log2(t - 1)).
2660      If we find such a factor, we can multiply by t using an algorithm that
2661      multiplies by q, shift the result by m and add/subtract it to itself.
2662
2663      We search for large factors first and loop down, even if large factors
2664      are less probable than small; if we find a large factor we will find a
2665      good sequence quickly, and therefore be able to prune (by decreasing
2666      COST_LIMIT) the search.  */
2667
2668  do_alg_addsub_factor:
2669   for (m = floor_log2 (t - 1); m >= 2; m--)
2670     {
2671       unsigned HOST_WIDE_INT d;
2672
2673       d = ((unsigned HOST_WIDE_INT) 1 << m) + 1;
2674       if (t % d == 0 && t > d && m < maxm
2675           && (!cache_hit || cache_alg == alg_add_factor))
2676         {
2677           /* If the target has a cheap shift-and-add instruction use
2678              that in preference to a shift insn followed by an add insn.
2679              Assume that the shift-and-add is "atomic" with a latency
2680              equal to its cost, otherwise assume that on superscalar
2681              hardware the shift may be executed concurrently with the
2682              earlier steps in the algorithm.  */
2683           op_cost = add_cost[mode] + shift_cost[mode][m];
2684           if (shiftadd_cost[mode][m] < op_cost)
2685             {
2686               op_cost = shiftadd_cost[mode][m];
2687               op_latency = op_cost;
2688             }
2689           else
2690             op_latency = add_cost[mode];
2691
2692           new_limit.cost = best_cost.cost - op_cost;
2693           new_limit.latency = best_cost.latency - op_latency;
2694           synth_mult (alg_in, t / d, &new_limit, mode);
2695
2696           alg_in->cost.cost += op_cost;
2697           alg_in->cost.latency += op_latency;
2698           if (alg_in->cost.latency < op_cost)
2699             alg_in->cost.latency = op_cost;
2700           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2701             {
2702               struct algorithm *x;
2703               best_cost = alg_in->cost;
2704               x = alg_in, alg_in = best_alg, best_alg = x;
2705               best_alg->log[best_alg->ops] = m;
2706               best_alg->op[best_alg->ops] = alg_add_factor;
2707             }
2708           /* Other factors will have been taken care of in the recursion.  */
2709           break;
2710         }
2711
2712       d = ((unsigned HOST_WIDE_INT) 1 << m) - 1;
2713       if (t % d == 0 && t > d && m < maxm
2714           && (!cache_hit || cache_alg == alg_sub_factor))
2715         {
2716           /* If the target has a cheap shift-and-subtract insn use
2717              that in preference to a shift insn followed by a sub insn.
2718              Assume that the shift-and-sub is "atomic" with a latency
2719              equal to it's cost, otherwise assume that on superscalar
2720              hardware the shift may be executed concurrently with the
2721              earlier steps in the algorithm.  */
2722           op_cost = add_cost[mode] + shift_cost[mode][m];
2723           if (shiftsub_cost[mode][m] < op_cost)
2724             {
2725               op_cost = shiftsub_cost[mode][m];
2726               op_latency = op_cost;
2727             }
2728           else
2729             op_latency = add_cost[mode];
2730
2731           new_limit.cost = best_cost.cost - op_cost;
2732           new_limit.latency = best_cost.latency - op_latency;
2733           synth_mult (alg_in, t / d, &new_limit, mode);
2734
2735           alg_in->cost.cost += op_cost;
2736           alg_in->cost.latency += op_latency;
2737           if (alg_in->cost.latency < op_cost)
2738             alg_in->cost.latency = op_cost;
2739           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2740             {
2741               struct algorithm *x;
2742               best_cost = alg_in->cost;
2743               x = alg_in, alg_in = best_alg, best_alg = x;
2744               best_alg->log[best_alg->ops] = m;
2745               best_alg->op[best_alg->ops] = alg_sub_factor;
2746             }
2747           break;
2748         }
2749     }
2750   if (cache_hit)
2751     goto done;
2752
2753   /* Try shift-and-add (load effective address) instructions,
2754      i.e. do a*3, a*5, a*9.  */
2755   if ((t & 1) != 0)
2756     {
2757     do_alg_add_t2_m:
2758       q = t - 1;
2759       q = q & -q;
2760       m = exact_log2 (q);
2761       if (m >= 0 && m < maxm)
2762         {
2763           op_cost = shiftadd_cost[mode][m];
2764           new_limit.cost = best_cost.cost - op_cost;
2765           new_limit.latency = best_cost.latency - op_cost;
2766           synth_mult (alg_in, (t - 1) >> m, &new_limit, mode);
2767
2768           alg_in->cost.cost += op_cost;
2769           alg_in->cost.latency += op_cost;
2770           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2771             {
2772               struct algorithm *x;
2773               best_cost = alg_in->cost;
2774               x = alg_in, alg_in = best_alg, best_alg = x;
2775               best_alg->log[best_alg->ops] = m;
2776               best_alg->op[best_alg->ops] = alg_add_t2_m;
2777             }
2778         }
2779       if (cache_hit)
2780         goto done;
2781
2782     do_alg_sub_t2_m:
2783       q = t + 1;
2784       q = q & -q;
2785       m = exact_log2 (q);
2786       if (m >= 0 && m < maxm)
2787         {
2788           op_cost = shiftsub_cost[mode][m];
2789           new_limit.cost = best_cost.cost - op_cost;
2790           new_limit.latency = best_cost.latency - op_cost;
2791           synth_mult (alg_in, (t + 1) >> m, &new_limit, mode);
2792
2793           alg_in->cost.cost += op_cost;
2794           alg_in->cost.latency += op_cost;
2795           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2796             {
2797               struct algorithm *x;
2798               best_cost = alg_in->cost;
2799               x = alg_in, alg_in = best_alg, best_alg = x;
2800               best_alg->log[best_alg->ops] = m;
2801               best_alg->op[best_alg->ops] = alg_sub_t2_m;
2802             }
2803         }
2804       if (cache_hit)
2805         goto done;
2806     }
2807
2808  done:
2809   /* If best_cost has not decreased, we have not found any algorithm.  */
2810   if (!CHEAPER_MULT_COST (&best_cost, cost_limit))
2811     {
2812       /* We failed to find an algorithm.  Record alg_impossible for
2813          this case (that is, <T, MODE, COST_LIMIT>) so that next time
2814          we are asked to find an algorithm for T within the same or
2815          lower COST_LIMIT, we can immediately return to the
2816          caller.  */
2817       alg_hash[hash_index].t = t;
2818       alg_hash[hash_index].mode = mode;
2819       alg_hash[hash_index].alg = alg_impossible;
2820       alg_hash[hash_index].cost = *cost_limit;
2821       return;
2822     }
2823
2824   /* Cache the result.  */
2825   if (!cache_hit)
2826     {
2827       alg_hash[hash_index].t = t;
2828       alg_hash[hash_index].mode = mode;
2829       alg_hash[hash_index].alg = best_alg->op[best_alg->ops];
2830       alg_hash[hash_index].cost.cost = best_cost.cost;
2831       alg_hash[hash_index].cost.latency = best_cost.latency;
2832     }
2833
2834   /* If we are getting a too long sequence for `struct algorithm'
2835      to record, make this search fail.  */
2836   if (best_alg->ops == MAX_BITS_PER_WORD)
2837     return;
2838
2839   /* Copy the algorithm from temporary space to the space at alg_out.
2840      We avoid using structure assignment because the majority of
2841      best_alg is normally undefined, and this is a critical function.  */
2842   alg_out->ops = best_alg->ops + 1;
2843   alg_out->cost = best_cost;
2844   memcpy (alg_out->op, best_alg->op,
2845           alg_out->ops * sizeof *alg_out->op);
2846   memcpy (alg_out->log, best_alg->log,
2847           alg_out->ops * sizeof *alg_out->log);
2848 }
2849 \f
2850 /* Find the cheapest way of multiplying a value of mode MODE by VAL.
2851    Try three variations:
2852
2853        - a shift/add sequence based on VAL itself
2854        - a shift/add sequence based on -VAL, followed by a negation
2855        - a shift/add sequence based on VAL - 1, followed by an addition.
2856
2857    Return true if the cheapest of these cost less than MULT_COST,
2858    describing the algorithm in *ALG and final fixup in *VARIANT.  */
2859
2860 static bool
2861 choose_mult_variant (enum machine_mode mode, HOST_WIDE_INT val,
2862                      struct algorithm *alg, enum mult_variant *variant,
2863                      int mult_cost)
2864 {
2865   struct algorithm alg2;
2866   struct mult_cost limit;
2867   int op_cost;
2868
2869   /* Fail quickly for impossible bounds.  */
2870   if (mult_cost < 0)
2871     return false;
2872
2873   /* Ensure that mult_cost provides a reasonable upper bound.
2874      Any constant multiplication can be performed with less
2875      than 2 * bits additions.  */
2876   op_cost = 2 * GET_MODE_BITSIZE (mode) * add_cost[mode];
2877   if (mult_cost > op_cost)
2878     mult_cost = op_cost;
2879
2880   *variant = basic_variant;
2881   limit.cost = mult_cost;
2882   limit.latency = mult_cost;
2883   synth_mult (alg, val, &limit, mode);
2884
2885   /* This works only if the inverted value actually fits in an
2886      `unsigned int' */
2887   if (HOST_BITS_PER_INT >= GET_MODE_BITSIZE (mode))
2888     {
2889       op_cost = neg_cost[mode];
2890       if (MULT_COST_LESS (&alg->cost, mult_cost))
2891         {
2892           limit.cost = alg->cost.cost - op_cost;
2893           limit.latency = alg->cost.latency - op_cost;
2894         }
2895       else
2896         {
2897           limit.cost = mult_cost - op_cost;
2898           limit.latency = mult_cost - op_cost;
2899         }
2900
2901       synth_mult (&alg2, -val, &limit, mode);
2902       alg2.cost.cost += op_cost;
2903       alg2.cost.latency += op_cost;
2904       if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
2905         *alg = alg2, *variant = negate_variant;
2906     }
2907
2908   /* This proves very useful for division-by-constant.  */
2909   op_cost = add_cost[mode];
2910   if (MULT_COST_LESS (&alg->cost, mult_cost))
2911     {
2912       limit.cost = alg->cost.cost - op_cost;
2913       limit.latency = alg->cost.latency - op_cost;
2914     }
2915   else
2916     {
2917       limit.cost = mult_cost - op_cost;
2918       limit.latency = mult_cost - op_cost;
2919     }
2920
2921   synth_mult (&alg2, val - 1, &limit, mode);
2922   alg2.cost.cost += op_cost;
2923   alg2.cost.latency += op_cost;
2924   if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
2925     *alg = alg2, *variant = add_variant;
2926
2927   return MULT_COST_LESS (&alg->cost, mult_cost);
2928 }
2929
2930 /* A subroutine of expand_mult, used for constant multiplications.
2931    Multiply OP0 by VAL in mode MODE, storing the result in TARGET if
2932    convenient.  Use the shift/add sequence described by ALG and apply
2933    the final fixup specified by VARIANT.  */
2934
2935 static rtx
2936 expand_mult_const (enum machine_mode mode, rtx op0, HOST_WIDE_INT val,
2937                    rtx target, const struct algorithm *alg,
2938                    enum mult_variant variant)
2939 {
2940   HOST_WIDE_INT val_so_far;
2941   rtx insn, accum, tem;
2942   int opno;
2943   enum machine_mode nmode;
2944
2945   /* Avoid referencing memory over and over.
2946      For speed, but also for correctness when mem is volatile.  */
2947   if (MEM_P (op0))
2948     op0 = force_reg (mode, op0);
2949
2950   /* ACCUM starts out either as OP0 or as a zero, depending on
2951      the first operation.  */
2952
2953   if (alg->op[0] == alg_zero)
2954     {
2955       accum = copy_to_mode_reg (mode, const0_rtx);
2956       val_so_far = 0;
2957     }
2958   else if (alg->op[0] == alg_m)
2959     {
2960       accum = copy_to_mode_reg (mode, op0);
2961       val_so_far = 1;
2962     }
2963   else
2964     gcc_unreachable ();
2965
2966   for (opno = 1; opno < alg->ops; opno++)
2967     {
2968       int log = alg->log[opno];
2969       rtx shift_subtarget = optimize ? 0 : accum;
2970       rtx add_target
2971         = (opno == alg->ops - 1 && target != 0 && variant != add_variant
2972            && !optimize)
2973           ? target : 0;
2974       rtx accum_target = optimize ? 0 : accum;
2975
2976       switch (alg->op[opno])
2977         {
2978         case alg_shift:
2979           accum = expand_shift (LSHIFT_EXPR, mode, accum,
2980                                 build_int_cst (NULL_TREE, log),
2981                                 NULL_RTX, 0);
2982           val_so_far <<= log;
2983           break;
2984
2985         case alg_add_t_m2:
2986           tem = expand_shift (LSHIFT_EXPR, mode, op0,
2987                               build_int_cst (NULL_TREE, log),
2988                               NULL_RTX, 0);
2989           accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
2990                                  add_target ? add_target : accum_target);
2991           val_so_far += (HOST_WIDE_INT) 1 << log;
2992           break;
2993
2994         case alg_sub_t_m2:
2995           tem = expand_shift (LSHIFT_EXPR, mode, op0,
2996                               build_int_cst (NULL_TREE, log),
2997                               NULL_RTX, 0);
2998           accum = force_operand (gen_rtx_MINUS (mode, accum, tem),
2999                                  add_target ? add_target : accum_target);
3000           val_so_far -= (HOST_WIDE_INT) 1 << log;
3001           break;
3002
3003         case alg_add_t2_m:
3004           accum = expand_shift (LSHIFT_EXPR, mode, accum,
3005                                 build_int_cst (NULL_TREE, log),
3006                                 shift_subtarget,
3007                                 0);
3008           accum = force_operand (gen_rtx_PLUS (mode, accum, op0),
3009                                  add_target ? add_target : accum_target);
3010           val_so_far = (val_so_far << log) + 1;
3011           break;
3012
3013         case alg_sub_t2_m:
3014           accum = expand_shift (LSHIFT_EXPR, mode, accum,
3015                                 build_int_cst (NULL_TREE, log),
3016                                 shift_subtarget, 0);
3017           accum = force_operand (gen_rtx_MINUS (mode, accum, op0),
3018                                  add_target ? add_target : accum_target);
3019           val_so_far = (val_so_far << log) - 1;
3020           break;
3021
3022         case alg_add_factor:
3023           tem = expand_shift (LSHIFT_EXPR, mode, accum,
3024                               build_int_cst (NULL_TREE, log),
3025                               NULL_RTX, 0);
3026           accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
3027                                  add_target ? add_target : accum_target);
3028           val_so_far += val_so_far << log;
3029           break;
3030
3031         case alg_sub_factor:
3032           tem = expand_shift (LSHIFT_EXPR, mode, accum,
3033                               build_int_cst (NULL_TREE, log),
3034                               NULL_RTX, 0);
3035           accum = force_operand (gen_rtx_MINUS (mode, tem, accum),
3036                                  (add_target
3037                                   ? add_target : (optimize ? 0 : tem)));
3038           val_so_far = (val_so_far << log) - val_so_far;
3039           break;
3040
3041         default:
3042           gcc_unreachable ();
3043         }
3044
3045       /* Write a REG_EQUAL note on the last insn so that we can cse
3046          multiplication sequences.  Note that if ACCUM is a SUBREG,
3047          we've set the inner register and must properly indicate
3048          that.  */
3049
3050       tem = op0, nmode = mode;
3051       if (GET_CODE (accum) == SUBREG)
3052         {
3053           nmode = GET_MODE (SUBREG_REG (accum));
3054           tem = gen_lowpart (nmode, op0);
3055         }
3056
3057       insn = get_last_insn ();
3058       set_unique_reg_note (insn, REG_EQUAL,
3059                            gen_rtx_MULT (nmode, tem, GEN_INT (val_so_far)));
3060     }
3061
3062   if (variant == negate_variant)
3063     {
3064       val_so_far = -val_so_far;
3065       accum = expand_unop (mode, neg_optab, accum, target, 0);
3066     }
3067   else if (variant == add_variant)
3068     {
3069       val_so_far = val_so_far + 1;
3070       accum = force_operand (gen_rtx_PLUS (mode, accum, op0), target);
3071     }
3072
3073   /* Compare only the bits of val and val_so_far that are significant
3074      in the result mode, to avoid sign-/zero-extension confusion.  */
3075   val &= GET_MODE_MASK (mode);
3076   val_so_far &= GET_MODE_MASK (mode);
3077   gcc_assert (val == val_so_far);
3078
3079   return accum;
3080 }
3081
3082 /* Perform a multiplication and return an rtx for the result.
3083    MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3084    TARGET is a suggestion for where to store the result (an rtx).
3085
3086    We check specially for a constant integer as OP1.
3087    If you want this check for OP0 as well, then before calling
3088    you should swap the two operands if OP0 would be constant.  */
3089
3090 rtx
3091 expand_mult (enum machine_mode mode, rtx op0, rtx op1, rtx target,
3092              int unsignedp)
3093 {
3094   enum mult_variant variant;
3095   struct algorithm algorithm;
3096   int max_cost;
3097
3098   /* Handling const0_rtx here allows us to use zero as a rogue value for
3099      coeff below.  */
3100   if (op1 == const0_rtx)
3101     return const0_rtx;
3102   if (op1 == const1_rtx)
3103     return op0;
3104   if (op1 == constm1_rtx)
3105     return expand_unop (mode,
3106                         GET_MODE_CLASS (mode) == MODE_INT
3107                         && !unsignedp && flag_trapv
3108                         ? negv_optab : neg_optab,
3109                         op0, target, 0);
3110
3111   /* These are the operations that are potentially turned into a sequence
3112      of shifts and additions.  */
3113   if (SCALAR_INT_MODE_P (mode)
3114       && (unsignedp || !flag_trapv))
3115     {
3116       HOST_WIDE_INT coeff = 0;
3117       rtx fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3118
3119       /* synth_mult does an `unsigned int' multiply.  As long as the mode is
3120          less than or equal in size to `unsigned int' this doesn't matter.
3121          If the mode is larger than `unsigned int', then synth_mult works
3122          only if the constant value exactly fits in an `unsigned int' without
3123          any truncation.  This means that multiplying by negative values does
3124          not work; results are off by 2^32 on a 32 bit machine.  */
3125
3126       if (GET_CODE (op1) == CONST_INT)
3127         {
3128           /* Attempt to handle multiplication of DImode values by negative
3129              coefficients, by performing the multiplication by a positive
3130              multiplier and then inverting the result.  */
3131           if (INTVAL (op1) < 0
3132               && GET_MODE_BITSIZE (mode) > HOST_BITS_PER_WIDE_INT)
3133             {
3134               /* Its safe to use -INTVAL (op1) even for INT_MIN, as the
3135                  result is interpreted as an unsigned coefficient.
3136                  Exclude cost of op0 from max_cost to match the cost
3137                  calculation of the synth_mult.  */
3138               max_cost = rtx_cost (gen_rtx_MULT (mode, fake_reg, op1), SET)
3139                          - neg_cost[mode];
3140               if (max_cost > 0
3141                   && choose_mult_variant (mode, -INTVAL (op1), &algorithm,
3142                                           &variant, max_cost))
3143                 {
3144                   rtx temp = expand_mult_const (mode, op0, -INTVAL (op1),
3145                                                 NULL_RTX, &algorithm,
3146                                                 variant);
3147                   return expand_unop (mode, neg_optab, temp, target, 0);
3148                 }
3149             }
3150           else coeff = INTVAL (op1);
3151         }
3152       else if (GET_CODE (op1) == CONST_DOUBLE)
3153         {
3154           /* If we are multiplying in DImode, it may still be a win
3155              to try to work with shifts and adds.  */
3156           if (CONST_DOUBLE_HIGH (op1) == 0)
3157             coeff = CONST_DOUBLE_LOW (op1);
3158           else if (CONST_DOUBLE_LOW (op1) == 0
3159                    && EXACT_POWER_OF_2_OR_ZERO_P (CONST_DOUBLE_HIGH (op1)))
3160             {
3161               int shift = floor_log2 (CONST_DOUBLE_HIGH (op1))
3162                           + HOST_BITS_PER_WIDE_INT;
3163               return expand_shift (LSHIFT_EXPR, mode, op0,
3164                                    build_int_cst (NULL_TREE, shift),
3165                                    target, unsignedp);
3166             }
3167         }
3168
3169       /* We used to test optimize here, on the grounds that it's better to
3170          produce a smaller program when -O is not used.  But this causes
3171          such a terrible slowdown sometimes that it seems better to always
3172          use synth_mult.  */
3173       if (coeff != 0)
3174         {
3175           /* Special case powers of two.  */
3176           if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3177             return expand_shift (LSHIFT_EXPR, mode, op0,
3178                                  build_int_cst (NULL_TREE, floor_log2 (coeff)),
3179                                  target, unsignedp);
3180
3181           /* Exclude cost of op0 from max_cost to match the cost
3182              calculation of the synth_mult.  */
3183           max_cost = rtx_cost (gen_rtx_MULT (mode, fake_reg, op1), SET);
3184           if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3185                                    max_cost))
3186             return expand_mult_const (mode, op0, coeff, target,
3187                                       &algorithm, variant);
3188         }
3189     }
3190
3191   if (GET_CODE (op0) == CONST_DOUBLE)
3192     {
3193       rtx temp = op0;
3194       op0 = op1;
3195       op1 = temp;
3196     }
3197
3198   /* Expand x*2.0 as x+x.  */
3199   if (GET_CODE (op1) == CONST_DOUBLE
3200       && SCALAR_FLOAT_MODE_P (mode))
3201     {
3202       REAL_VALUE_TYPE d;
3203       REAL_VALUE_FROM_CONST_DOUBLE (d, op1);
3204
3205       if (REAL_VALUES_EQUAL (d, dconst2))
3206         {
3207           op0 = force_reg (GET_MODE (op0), op0);
3208           return expand_binop (mode, add_optab, op0, op0,
3209                                target, unsignedp, OPTAB_LIB_WIDEN);
3210         }
3211     }
3212
3213   /* This used to use umul_optab if unsigned, but for non-widening multiply
3214      there is no difference between signed and unsigned.  */
3215   op0 = expand_binop (mode,
3216                       ! unsignedp
3217                       && flag_trapv && (GET_MODE_CLASS(mode) == MODE_INT)
3218                       ? smulv_optab : smul_optab,
3219                       op0, op1, target, unsignedp, OPTAB_LIB_WIDEN);
3220   gcc_assert (op0);
3221   return op0;
3222 }
3223 \f
3224 /* Return the smallest n such that 2**n >= X.  */
3225
3226 int
3227 ceil_log2 (unsigned HOST_WIDE_INT x)
3228 {
3229   return floor_log2 (x - 1) + 1;
3230 }
3231
3232 /* Choose a minimal N + 1 bit approximation to 1/D that can be used to
3233    replace division by D, and put the least significant N bits of the result
3234    in *MULTIPLIER_PTR and return the most significant bit.
3235
3236    The width of operations is N (should be <= HOST_BITS_PER_WIDE_INT), the
3237    needed precision is in PRECISION (should be <= N).
3238
3239    PRECISION should be as small as possible so this function can choose
3240    multiplier more freely.
3241
3242    The rounded-up logarithm of D is placed in *lgup_ptr.  A shift count that
3243    is to be used for a final right shift is placed in *POST_SHIFT_PTR.
3244
3245    Using this function, x/D will be equal to (x * m) >> (*POST_SHIFT_PTR),
3246    where m is the full HOST_BITS_PER_WIDE_INT + 1 bit multiplier.  */
3247
3248 static
3249 unsigned HOST_WIDE_INT
3250 choose_multiplier (unsigned HOST_WIDE_INT d, int n, int precision,
3251                    rtx *multiplier_ptr, int *post_shift_ptr, int *lgup_ptr)
3252 {
3253   HOST_WIDE_INT mhigh_hi, mlow_hi;
3254   unsigned HOST_WIDE_INT mhigh_lo, mlow_lo;
3255   int lgup, post_shift;
3256   int pow, pow2;
3257   unsigned HOST_WIDE_INT nl, dummy1;
3258   HOST_WIDE_INT nh, dummy2;
3259
3260   /* lgup = ceil(log2(divisor)); */
3261   lgup = ceil_log2 (d);
3262
3263   gcc_assert (lgup <= n);
3264
3265   pow = n + lgup;
3266   pow2 = n + lgup - precision;
3267
3268   /* We could handle this with some effort, but this case is much
3269      better handled directly with a scc insn, so rely on caller using
3270      that.  */
3271   gcc_assert (pow != 2 * HOST_BITS_PER_WIDE_INT);
3272
3273   /* mlow = 2^(N + lgup)/d */
3274  if (pow >= HOST_BITS_PER_WIDE_INT)
3275     {
3276       nh = (HOST_WIDE_INT) 1 << (pow - HOST_BITS_PER_WIDE_INT);
3277       nl = 0;
3278     }
3279   else
3280     {
3281       nh = 0;
3282       nl = (unsigned HOST_WIDE_INT) 1 << pow;
3283     }
3284   div_and_round_double (TRUNC_DIV_EXPR, 1, nl, nh, d, (HOST_WIDE_INT) 0,
3285                         &mlow_lo, &mlow_hi, &dummy1, &dummy2);
3286
3287   /* mhigh = (2^(N + lgup) + 2^N + lgup - precision)/d */
3288   if (pow2 >= HOST_BITS_PER_WIDE_INT)
3289     nh |= (HOST_WIDE_INT) 1 << (pow2 - HOST_BITS_PER_WIDE_INT);
3290   else
3291     nl |= (unsigned HOST_WIDE_INT) 1 << pow2;
3292   div_and_round_double (TRUNC_DIV_EXPR, 1, nl, nh, d, (HOST_WIDE_INT) 0,
3293                         &mhigh_lo, &mhigh_hi, &dummy1, &dummy2);
3294
3295   gcc_assert (!mhigh_hi || nh - d < d);
3296   gcc_assert (mhigh_hi <= 1 && mlow_hi <= 1);
3297   /* Assert that mlow < mhigh.  */
3298   gcc_assert (mlow_hi < mhigh_hi
3299               || (mlow_hi == mhigh_hi && mlow_lo < mhigh_lo));
3300
3301   /* If precision == N, then mlow, mhigh exceed 2^N
3302      (but they do not exceed 2^(N+1)).  */
3303
3304   /* Reduce to lowest terms.  */
3305   for (post_shift = lgup; post_shift > 0; post_shift--)
3306     {
3307       unsigned HOST_WIDE_INT ml_lo = (mlow_hi << (HOST_BITS_PER_WIDE_INT - 1)) | (mlow_lo >> 1);
3308       unsigned HOST_WIDE_INT mh_lo = (mhigh_hi << (HOST_BITS_PER_WIDE_INT - 1)) | (mhigh_lo >> 1);
3309       if (ml_lo >= mh_lo)
3310         break;
3311
3312       mlow_hi = 0;
3313       mlow_lo = ml_lo;
3314       mhigh_hi = 0;
3315       mhigh_lo = mh_lo;
3316     }
3317
3318   *post_shift_ptr = post_shift;
3319   *lgup_ptr = lgup;
3320   if (n < HOST_BITS_PER_WIDE_INT)
3321     {
3322       unsigned HOST_WIDE_INT mask = ((unsigned HOST_WIDE_INT) 1 << n) - 1;
3323       *multiplier_ptr = GEN_INT (mhigh_lo & mask);
3324       return mhigh_lo >= mask;
3325     }
3326   else
3327     {
3328       *multiplier_ptr = GEN_INT (mhigh_lo);
3329       return mhigh_hi;
3330     }
3331 }
3332
3333 /* Compute the inverse of X mod 2**n, i.e., find Y such that X * Y is
3334    congruent to 1 (mod 2**N).  */
3335
3336 static unsigned HOST_WIDE_INT
3337 invert_mod2n (unsigned HOST_WIDE_INT x, int n)
3338 {
3339   /* Solve x*y == 1 (mod 2^n), where x is odd.  Return y.  */
3340
3341   /* The algorithm notes that the choice y = x satisfies
3342      x*y == 1 mod 2^3, since x is assumed odd.
3343      Each iteration doubles the number of bits of significance in y.  */
3344
3345   unsigned HOST_WIDE_INT mask;
3346   unsigned HOST_WIDE_INT y = x;
3347   int nbit = 3;
3348
3349   mask = (n == HOST_BITS_PER_WIDE_INT
3350           ? ~(unsigned HOST_WIDE_INT) 0
3351           : ((unsigned HOST_WIDE_INT) 1 << n) - 1);
3352
3353   while (nbit < n)
3354     {
3355       y = y * (2 - x*y) & mask;         /* Modulo 2^N */
3356       nbit *= 2;
3357     }
3358   return y;
3359 }
3360
3361 /* Emit code to adjust ADJ_OPERAND after multiplication of wrong signedness
3362    flavor of OP0 and OP1.  ADJ_OPERAND is already the high half of the
3363    product OP0 x OP1.  If UNSIGNEDP is nonzero, adjust the signed product
3364    to become unsigned, if UNSIGNEDP is zero, adjust the unsigned product to
3365    become signed.
3366
3367    The result is put in TARGET if that is convenient.
3368
3369    MODE is the mode of operation.  */
3370
3371 rtx
3372 expand_mult_highpart_adjust (enum machine_mode mode, rtx adj_operand, rtx op0,
3373                              rtx op1, rtx target, int unsignedp)
3374 {
3375   rtx tem;
3376   enum rtx_code adj_code = unsignedp ? PLUS : MINUS;
3377
3378   tem = expand_shift (RSHIFT_EXPR, mode, op0,
3379                       build_int_cst (NULL_TREE, GET_MODE_BITSIZE (mode) - 1),
3380                       NULL_RTX, 0);
3381   tem = expand_and (mode, tem, op1, NULL_RTX);
3382   adj_operand
3383     = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3384                      adj_operand);
3385
3386   tem = expand_shift (RSHIFT_EXPR, mode, op1,
3387                       build_int_cst (NULL_TREE, GET_MODE_BITSIZE (mode) - 1),
3388                       NULL_RTX, 0);
3389   tem = expand_and (mode, tem, op0, NULL_RTX);
3390   target = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3391                           target);
3392
3393   return target;
3394 }
3395
3396 /* Subroutine of expand_mult_highpart.  Return the MODE high part of OP.  */
3397
3398 static rtx
3399 extract_high_half (enum machine_mode mode, rtx op)
3400 {
3401   enum machine_mode wider_mode;
3402
3403   if (mode == word_mode)
3404     return gen_highpart (mode, op);
3405
3406   gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3407
3408   wider_mode = GET_MODE_WIDER_MODE (mode);
3409   op = expand_shift (RSHIFT_EXPR, wider_mode, op,
3410                      build_int_cst (NULL_TREE, GET_MODE_BITSIZE (mode)), 0, 1);
3411   return convert_modes (mode, wider_mode, op, 0);
3412 }
3413
3414 /* Like expand_mult_highpart, but only consider using a multiplication
3415    optab.  OP1 is an rtx for the constant operand.  */
3416
3417 static rtx
3418 expand_mult_highpart_optab (enum machine_mode mode, rtx op0, rtx op1,
3419                             rtx target, int unsignedp, int max_cost)
3420 {
3421   rtx narrow_op1 = gen_int_mode (INTVAL (op1), mode);
3422   enum machine_mode wider_mode;
3423   optab moptab;
3424   rtx tem;
3425   int size;
3426
3427   gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3428
3429   wider_mode = GET_MODE_WIDER_MODE (mode);
3430   size = GET_MODE_BITSIZE (mode);
3431
3432   /* Firstly, try using a multiplication insn that only generates the needed
3433      high part of the product, and in the sign flavor of unsignedp.  */
3434   if (mul_highpart_cost[mode] < max_cost)
3435     {
3436       moptab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
3437       tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3438                           unsignedp, OPTAB_DIRECT);
3439       if (tem)
3440         return tem;
3441     }
3442
3443   /* Secondly, same as above, but use sign flavor opposite of unsignedp.
3444      Need to adjust the result after the multiplication.  */
3445   if (size - 1 < BITS_PER_WORD
3446       && (mul_highpart_cost[mode] + 2 * shift_cost[mode][size-1]
3447           + 4 * add_cost[mode] < max_cost))
3448     {
3449       moptab = unsignedp ? smul_highpart_optab : umul_highpart_optab;
3450       tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3451                           unsignedp, OPTAB_DIRECT);
3452       if (tem)
3453         /* We used the wrong signedness.  Adjust the result.  */
3454         return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3455                                             tem, unsignedp);
3456     }
3457
3458   /* Try widening multiplication.  */
3459   moptab = unsignedp ? umul_widen_optab : smul_widen_optab;
3460   if (moptab->handlers[wider_mode].insn_code != CODE_FOR_nothing
3461       && mul_widen_cost[wider_mode] < max_cost)
3462     {
3463       tem = expand_binop (wider_mode, moptab, op0, narrow_op1, 0,
3464                           unsignedp, OPTAB_WIDEN);
3465       if (tem)
3466         return extract_high_half (mode, tem);
3467     }
3468
3469   /* Try widening the mode and perform a non-widening multiplication.  */
3470   if (smul_optab->handlers[wider_mode].insn_code != CODE_FOR_nothing
3471       && size - 1 < BITS_PER_WORD
3472       && mul_cost[wider_mode] + shift_cost[mode][size-1] < max_cost)
3473     {
3474       rtx insns, wop0, wop1;
3475
3476       /* We need to widen the operands, for example to ensure the
3477          constant multiplier is correctly sign or zero extended.
3478          Use a sequence to clean-up any instructions emitted by
3479          the conversions if things don't work out.  */
3480       start_sequence ();
3481       wop0 = convert_modes (wider_mode, mode, op0, unsignedp);
3482       wop1 = convert_modes (wider_mode, mode, op1, unsignedp);
3483       tem = expand_binop (wider_mode, smul_optab, wop0, wop1, 0,
3484                           unsignedp, OPTAB_WIDEN);
3485       insns = get_insns ();
3486       end_sequence ();
3487
3488       if (tem)
3489         {
3490           emit_insn (insns);
3491           return extract_high_half (mode, tem);
3492         }
3493     }
3494
3495   /* Try widening multiplication of opposite signedness, and adjust.  */
3496   moptab = unsignedp ? smul_widen_optab : umul_widen_optab;
3497   if (moptab->handlers[wider_mode].insn_code != CODE_FOR_nothing
3498       && size - 1 < BITS_PER_WORD
3499       && (mul_widen_cost[wider_mode] + 2 * shift_cost[mode][size-1]
3500           + 4 * add_cost[mode] < max_cost))
3501     {
3502       tem = expand_binop (wider_mode, moptab, op0, narrow_op1,
3503                           NULL_RTX, ! unsignedp, OPTAB_WIDEN);
3504       if (tem != 0)
3505         {
3506           tem = extract_high_half (mode, tem);
3507           /* We used the wrong signedness.  Adjust the result.  */
3508           return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3509                                               target, unsignedp);
3510         }
3511     }
3512
3513   return 0;
3514 }
3515
3516 /* Emit code to multiply OP0 and OP1 (where OP1 is an integer constant),
3517    putting the high half of the result in TARGET if that is convenient,
3518    and return where the result is.  If the operation can not be performed,
3519    0 is returned.
3520
3521    MODE is the mode of operation and result.
3522
3523    UNSIGNEDP nonzero means unsigned multiply.
3524
3525    MAX_COST is the total allowed cost for the expanded RTL.  */
3526
3527 static rtx
3528 expand_mult_highpart (enum machine_mode mode, rtx op0, rtx op1,
3529                       rtx target, int unsignedp, int max_cost)
3530 {
3531   enum machine_mode wider_mode = GET_MODE_WIDER_MODE (mode);
3532   unsigned HOST_WIDE_INT cnst1;
3533   int extra_cost;
3534   bool sign_adjust = false;
3535   enum mult_variant variant;
3536   struct algorithm alg;
3537   rtx tem;
3538
3539   gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3540   /* We can't support modes wider than HOST_BITS_PER_INT.  */
3541   gcc_assert (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT);
3542
3543   cnst1 = INTVAL (op1) & GET_MODE_MASK (mode);
3544
3545   /* We can't optimize modes wider than BITS_PER_WORD.
3546      ??? We might be able to perform double-word arithmetic if
3547      mode == word_mode, however all the cost calculations in
3548      synth_mult etc. assume single-word operations.  */
3549   if (GET_MODE_BITSIZE (wider_mode) > BITS_PER_WORD)
3550     return expand_mult_highpart_optab (mode, op0, op1, target,
3551                                        unsignedp, max_cost);
3552
3553   extra_cost = shift_cost[mode][GET_MODE_BITSIZE (mode) - 1];
3554
3555   /* Check whether we try to multiply by a negative constant.  */
3556   if (!unsignedp && ((cnst1 >> (GET_MODE_BITSIZE (mode) - 1)) & 1))
3557     {
3558       sign_adjust = true;
3559       extra_cost += add_cost[mode];
3560     }
3561
3562   /* See whether shift/add multiplication is cheap enough.  */
3563   if (choose_mult_variant (wider_mode, cnst1, &alg, &variant,
3564                            max_cost - extra_cost))
3565     {
3566       /* See whether the specialized multiplication optabs are
3567          cheaper than the shift/add version.  */
3568       tem = expand_mult_highpart_optab (mode, op0, op1, target, unsignedp,
3569                                         alg.cost.cost + extra_cost);
3570       if (tem)
3571         return tem;
3572
3573       tem = convert_to_mode (wider_mode, op0, unsignedp);
3574       tem = expand_mult_const (wider_mode, tem, cnst1, 0, &alg, variant);
3575       tem = extract_high_half (mode, tem);
3576
3577       /* Adjust result for signedness.  */
3578       if (sign_adjust)
3579         tem = force_operand (gen_rtx_MINUS (mode, tem, op0), tem);
3580
3581       return tem;
3582     }
3583   return expand_mult_highpart_optab (mode, op0, op1, target,
3584                                      unsignedp, max_cost);
3585 }
3586
3587
3588 /* Expand signed modulus of OP0 by a power of two D in mode MODE.  */
3589
3590 static rtx
3591 expand_smod_pow2 (enum machine_mode mode, rtx op0, HOST_WIDE_INT d)
3592 {
3593   unsigned HOST_WIDE_INT masklow, maskhigh;
3594   rtx result, temp, shift, label;
3595   int logd;
3596
3597   logd = floor_log2 (d);
3598   result = gen_reg_rtx (mode);
3599
3600   /* Avoid conditional branches when they're expensive.  */
3601   if (BRANCH_COST >= 2
3602       && !optimize_size)
3603     {
3604       rtx signmask = emit_store_flag (result, LT, op0, const0_rtx,
3605                                       mode, 0, -1);
3606       if (signmask)
3607         {
3608           signmask = force_reg (mode, signmask);
3609           masklow = ((HOST_WIDE_INT) 1 << logd) - 1;
3610           shift = GEN_INT (GET_MODE_BITSIZE (mode) - logd);
3611
3612           /* Use the rtx_cost of a LSHIFTRT instruction to determine
3613              which instruction sequence to use.  If logical right shifts
3614              are expensive the use 2 XORs, 2 SUBs and an AND, otherwise
3615              use a LSHIFTRT, 1 ADD, 1 SUB and an AND.  */
3616
3617           temp = gen_rtx_LSHIFTRT (mode, result, shift);
3618           if (lshr_optab->handlers[mode].insn_code == CODE_FOR_nothing
3619               || rtx_cost (temp, SET) > COSTS_N_INSNS (2))
3620             {
3621               temp = expand_binop (mode, xor_optab, op0, signmask,
3622                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3623               temp = expand_binop (mode, sub_optab, temp, signmask,
3624                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3625               temp = expand_binop (mode, and_optab, temp, GEN_INT (masklow),
3626                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3627               temp = expand_binop (mode, xor_optab, temp, signmask,
3628                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3629               temp = expand_binop (mode, sub_optab, temp, signmask,
3630                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3631             }
3632           else
3633             {
3634               signmask = expand_binop (mode, lshr_optab, signmask, shift,
3635                                        NULL_RTX, 1, OPTAB_LIB_WIDEN);
3636               signmask = force_reg (mode, signmask);
3637
3638               temp = expand_binop (mode, add_optab, op0, signmask,
3639                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3640               temp = expand_binop (mode, and_optab, temp, GEN_INT (masklow),
3641                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3642               temp = expand_binop (mode, sub_optab, temp, signmask,
3643                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3644             }
3645           return temp;
3646         }
3647     }
3648
3649   /* Mask contains the mode's signbit and the significant bits of the
3650      modulus.  By including the signbit in the operation, many targets
3651      can avoid an explicit compare operation in the following comparison
3652      against zero.  */
3653
3654   masklow = ((HOST_WIDE_INT) 1 << logd) - 1;
3655   if (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
3656     {
3657       masklow |= (HOST_WIDE_INT) -1 << (GET_MODE_BITSIZE (mode) - 1);
3658       maskhigh = -1;
3659     }
3660   else
3661     maskhigh = (HOST_WIDE_INT) -1
3662                  << (GET_MODE_BITSIZE (mode) - HOST_BITS_PER_WIDE_INT - 1);
3663
3664   temp = expand_binop (mode, and_optab, op0,
3665                        immed_double_const (masklow, maskhigh, mode),
3666                        result, 1, OPTAB_LIB_WIDEN);
3667   if (temp != result)
3668     emit_move_insn (result, temp);
3669
3670   label = gen_label_rtx ();
3671   do_cmp_and_jump (result, const0_rtx, GE, mode, label);
3672
3673   temp = expand_binop (mode, sub_optab, result, const1_rtx, result,
3674                        0, OPTAB_LIB_WIDEN);
3675   masklow = (HOST_WIDE_INT) -1 << logd;
3676   maskhigh = -1;
3677   temp = expand_binop (mode, ior_optab, temp,
3678                        immed_double_const (masklow, maskhigh, mode),
3679                        result, 1, OPTAB_LIB_WIDEN);
3680   temp = expand_binop (mode, add_optab, temp, const1_rtx, result,
3681                        0, OPTAB_LIB_WIDEN);
3682   if (temp != result)
3683     emit_move_insn (result, temp);
3684   emit_label (label);
3685   return result;
3686 }
3687
3688 /* Expand signed division of OP0 by a power of two D in mode MODE.
3689    This routine is only called for positive values of D.  */
3690
3691 static rtx
3692 expand_sdiv_pow2 (enum machine_mode mode, rtx op0, HOST_WIDE_INT d)
3693 {
3694   rtx temp, label;
3695   tree shift;
3696   int logd;
3697
3698   logd = floor_log2 (d);
3699   shift = build_int_cst (NULL_TREE, logd);
3700
3701   if (d == 2 && BRANCH_COST >= 1)
3702     {
3703       temp = gen_reg_rtx (mode);
3704       temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, 1);
3705       temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3706                            0, OPTAB_LIB_WIDEN);
3707       return expand_shift (RSHIFT_EXPR, mode, temp, shift, NULL_RTX, 0);
3708     }
3709
3710 #ifdef HAVE_conditional_move
3711   if (BRANCH_COST >= 2)
3712     {
3713       rtx temp2;
3714
3715       /* ??? emit_conditional_move forces a stack adjustment via
3716          compare_from_rtx so, if the sequence is discarded, it will
3717          be lost.  Do it now instead.  */
3718       do_pending_stack_adjust ();
3719
3720       start_sequence ();
3721       temp2 = copy_to_mode_reg (mode, op0);
3722       temp = expand_binop (mode, add_optab, temp2, GEN_INT (d-1),
3723                            NULL_RTX, 0, OPTAB_LIB_WIDEN);
3724       temp = force_reg (mode, temp);
3725
3726       /* Construct "temp2 = (temp2 < 0) ? temp : temp2".  */
3727       temp2 = emit_conditional_move (temp2, LT, temp2, const0_rtx,
3728                                      mode, temp, temp2, mode, 0);
3729       if (temp2)
3730         {
3731           rtx seq = get_insns ();
3732           end_sequence ();
3733           emit_insn (seq);
3734           return expand_shift (RSHIFT_EXPR, mode, temp2, shift, NULL_RTX, 0);
3735         }
3736       end_sequence ();
3737     }
3738 #endif
3739
3740   if (BRANCH_COST >= 2)
3741     {
3742       int ushift = GET_MODE_BITSIZE (mode) - logd;
3743
3744       temp = gen_reg_rtx (mode);
3745       temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, -1);
3746       if (shift_cost[mode][ushift] > COSTS_N_INSNS (1))
3747         temp = expand_binop (mode, and_optab, temp, GEN_INT (d - 1),
3748                              NULL_RTX, 0, OPTAB_LIB_WIDEN);
3749       else
3750         temp = expand_shift (RSHIFT_EXPR, mode, temp,
3751                              build_int_cst (NULL_TREE, ushift),
3752                              NULL_RTX, 1);
3753       temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3754                            0, OPTAB_LIB_WIDEN);
3755       return expand_shift (RSHIFT_EXPR, mode, temp, shift, NULL_RTX, 0);
3756     }
3757
3758   label = gen_label_rtx ();
3759   temp = copy_to_mode_reg (mode, op0);
3760   do_cmp_and_jump (temp, const0_rtx, GE, mode, label);
3761   expand_inc (temp, GEN_INT (d - 1));
3762   emit_label (label);
3763   return expand_shift (RSHIFT_EXPR, mode, temp, shift, NULL_RTX, 0);
3764 }
3765 \f
3766 /* Emit the code to divide OP0 by OP1, putting the result in TARGET
3767    if that is convenient, and returning where the result is.
3768    You may request either the quotient or the remainder as the result;
3769    specify REM_FLAG nonzero to get the remainder.
3770
3771    CODE is the expression code for which kind of division this is;
3772    it controls how rounding is done.  MODE is the machine mode to use.
3773    UNSIGNEDP nonzero means do unsigned division.  */
3774
3775 /* ??? For CEIL_MOD_EXPR, can compute incorrect remainder with ANDI
3776    and then correct it by or'ing in missing high bits
3777    if result of ANDI is nonzero.
3778    For ROUND_MOD_EXPR, can use ANDI and then sign-extend the result.
3779    This could optimize to a bfexts instruction.
3780    But C doesn't use these operations, so their optimizations are
3781    left for later.  */
3782 /* ??? For modulo, we don't actually need the highpart of the first product,
3783    the low part will do nicely.  And for small divisors, the second multiply
3784    can also be a low-part only multiply or even be completely left out.
3785    E.g. to calculate the remainder of a division by 3 with a 32 bit
3786    multiply, multiply with 0x55555556 and extract the upper two bits;
3787    the result is exact for inputs up to 0x1fffffff.
3788    The input range can be reduced by using cross-sum rules.
3789    For odd divisors >= 3, the following table gives right shift counts
3790    so that if a number is shifted by an integer multiple of the given
3791    amount, the remainder stays the same:
3792    2, 4, 3, 6, 10, 12, 4, 8, 18, 6, 11, 20, 18, 0, 5, 10, 12, 0, 12, 20,
3793    14, 12, 23, 21, 8, 0, 20, 18, 0, 0, 6, 12, 0, 22, 0, 18, 20, 30, 0, 0,
3794    0, 8, 0, 11, 12, 10, 36, 0, 30, 0, 0, 12, 0, 0, 0, 0, 44, 12, 24, 0,
3795    20, 0, 7, 14, 0, 18, 36, 0, 0, 46, 60, 0, 42, 0, 15, 24, 20, 0, 0, 33,
3796    0, 20, 0, 0, 18, 0, 60, 0, 0, 0, 0, 0, 40, 18, 0, 0, 12
3797
3798    Cross-sum rules for even numbers can be derived by leaving as many bits
3799    to the right alone as the divisor has zeros to the right.
3800    E.g. if x is an unsigned 32 bit number:
3801    (x mod 12) == (((x & 1023) + ((x >> 8) & ~3)) * 0x15555558 >> 2 * 3) >> 28
3802    */
3803
3804 rtx
3805 expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode,
3806                rtx op0, rtx op1, rtx target, int unsignedp)
3807 {
3808   enum machine_mode compute_mode;
3809   rtx tquotient;
3810   rtx quotient = 0, remainder = 0;
3811   rtx last;
3812   int size;
3813   rtx insn, set;
3814   optab optab1, optab2;
3815   int op1_is_constant, op1_is_pow2 = 0;
3816   int max_cost, extra_cost;
3817   static HOST_WIDE_INT last_div_const = 0;
3818   static HOST_WIDE_INT ext_op1;
3819
3820   op1_is_constant = GET_CODE (op1) == CONST_INT;
3821   if (op1_is_constant)
3822     {
3823       ext_op1 = INTVAL (op1);
3824       if (unsignedp)
3825         ext_op1 &= GET_MODE_MASK (mode);
3826       op1_is_pow2 = ((EXACT_POWER_OF_2_OR_ZERO_P (ext_op1)
3827                      || (! unsignedp && EXACT_POWER_OF_2_OR_ZERO_P (-ext_op1))));
3828     }
3829
3830   /*
3831      This is the structure of expand_divmod:
3832
3833      First comes code to fix up the operands so we can perform the operations
3834      correctly and efficiently.
3835
3836      Second comes a switch statement with code specific for each rounding mode.
3837      For some special operands this code emits all RTL for the desired
3838      operation, for other cases, it generates only a quotient and stores it in
3839      QUOTIENT.  The case for trunc division/remainder might leave quotient = 0,
3840      to indicate that it has not done anything.
3841
3842      Last comes code that finishes the operation.  If QUOTIENT is set and
3843      REM_FLAG is set, the remainder is computed as OP0 - QUOTIENT * OP1.  If
3844      QUOTIENT is not set, it is computed using trunc rounding.
3845
3846      We try to generate special code for division and remainder when OP1 is a
3847      constant.  If |OP1| = 2**n we can use shifts and some other fast
3848      operations.  For other values of OP1, we compute a carefully selected
3849      fixed-point approximation m = 1/OP1, and generate code that multiplies OP0
3850      by m.
3851
3852      In all cases but EXACT_DIV_EXPR, this multiplication requires the upper
3853      half of the product.  Different strategies for generating the product are
3854      implemented in expand_mult_highpart.
3855
3856      If what we actually want is the remainder, we generate that by another
3857      by-constant multiplication and a subtraction.  */
3858
3859   /* We shouldn't be called with OP1 == const1_rtx, but some of the
3860      code below will malfunction if we are, so check here and handle
3861      the special case if so.  */
3862   if (op1 == const1_rtx)
3863     return rem_flag ? const0_rtx : op0;
3864
3865     /* When dividing by -1, we could get an overflow.
3866      negv_optab can handle overflows.  */
3867   if (! unsignedp && op1 == constm1_rtx)
3868     {
3869       if (rem_flag)
3870         return const0_rtx;
3871       return expand_unop (mode, flag_trapv && GET_MODE_CLASS(mode) == MODE_INT
3872                           ? negv_optab : neg_optab, op0, target, 0);
3873     }
3874
3875   if (target
3876       /* Don't use the function value register as a target
3877          since we have to read it as well as write it,
3878          and function-inlining gets confused by this.  */
3879       && ((REG_P (target) && REG_FUNCTION_VALUE_P (target))
3880           /* Don't clobber an operand while doing a multi-step calculation.  */
3881           || ((rem_flag || op1_is_constant)
3882               && (reg_mentioned_p (target, op0)
3883                   || (MEM_P (op0) && MEM_P (target))))
3884           || reg_mentioned_p (target, op1)
3885           || (MEM_P (op1) && MEM_P (target))))
3886     target = 0;
3887
3888   /* Get the mode in which to perform this computation.  Normally it will
3889      be MODE, but sometimes we can't do the desired operation in MODE.
3890      If so, pick a wider mode in which we can do the operation.  Convert
3891      to that mode at the start to avoid repeated conversions.
3892
3893      First see what operations we need.  These depend on the expression
3894      we are evaluating.  (We assume that divxx3 insns exist under the
3895      same conditions that modxx3 insns and that these insns don't normally
3896      fail.  If these assumptions are not correct, we may generate less
3897      efficient code in some cases.)
3898
3899      Then see if we find a mode in which we can open-code that operation
3900      (either a division, modulus, or shift).  Finally, check for the smallest
3901      mode for which we can do the operation with a library call.  */
3902
3903   /* We might want to refine this now that we have division-by-constant
3904      optimization.  Since expand_mult_highpart tries so many variants, it is
3905      not straightforward to generalize this.  Maybe we should make an array
3906      of possible modes in init_expmed?  Save this for GCC 2.7.  */
3907
3908   optab1 = ((op1_is_pow2 && op1 != const0_rtx)
3909             ? (unsignedp ? lshr_optab : ashr_optab)
3910             : (unsignedp ? udiv_optab : sdiv_optab));
3911   optab2 = ((op1_is_pow2 && op1 != const0_rtx)
3912             ? optab1
3913             : (unsignedp ? udivmod_optab : sdivmod_optab));
3914
3915   for (compute_mode = mode; compute_mode != VOIDmode;
3916        compute_mode = GET_MODE_WIDER_MODE (compute_mode))
3917     if (optab1->handlers[compute_mode].insn_code != CODE_FOR_nothing
3918         || optab2->handlers[compute_mode].insn_code != CODE_FOR_nothing)
3919       break;
3920
3921   if (compute_mode == VOIDmode)
3922     for (compute_mode = mode; compute_mode != VOIDmode;
3923          compute_mode = GET_MODE_WIDER_MODE (compute_mode))
3924       if (optab1->handlers[compute_mode].libfunc
3925           || optab2->handlers[compute_mode].libfunc)
3926         break;
3927
3928   /* If we still couldn't find a mode, use MODE, but expand_binop will
3929      probably die.  */
3930   if (compute_mode == VOIDmode)
3931     compute_mode = mode;
3932
3933   if (target && GET_MODE (target) == compute_mode)
3934     tquotient = target;
3935   else
3936     tquotient = gen_reg_rtx (compute_mode);
3937
3938   size = GET_MODE_BITSIZE (compute_mode);
3939 #if 0
3940   /* It should be possible to restrict the precision to GET_MODE_BITSIZE
3941      (mode), and thereby get better code when OP1 is a constant.  Do that
3942      later.  It will require going over all usages of SIZE below.  */
3943   size = GET_MODE_BITSIZE (mode);
3944 #endif
3945
3946   /* Only deduct something for a REM if the last divide done was
3947      for a different constant.   Then set the constant of the last
3948      divide.  */
3949   max_cost = unsignedp ? udiv_cost[compute_mode] : sdiv_cost[compute_mode];
3950   if (rem_flag && ! (last_div_const != 0 && op1_is_constant
3951                      && INTVAL (op1) == last_div_const))
3952     max_cost -= mul_cost[compute_mode] + add_cost[compute_mode];
3953
3954   last_div_const = ! rem_flag && op1_is_constant ? INTVAL (op1) : 0;
3955
3956   /* Now convert to the best mode to use.  */
3957   if (compute_mode != mode)
3958     {
3959       op0 = convert_modes (compute_mode, mode, op0, unsignedp);
3960       op1 = convert_modes (compute_mode, mode, op1, unsignedp);
3961
3962       /* convert_modes may have placed op1 into a register, so we
3963          must recompute the following.  */
3964       op1_is_constant = GET_CODE (op1) == CONST_INT;
3965       op1_is_pow2 = (op1_is_constant
3966                      && ((EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
3967                           || (! unsignedp
3968                               && EXACT_POWER_OF_2_OR_ZERO_P (-INTVAL (op1)))))) ;
3969     }
3970
3971   /* If one of the operands is a volatile MEM, copy it into a register.  */
3972
3973   if (MEM_P (op0) && MEM_VOLATILE_P (op0))
3974     op0 = force_reg (compute_mode, op0);
3975   if (MEM_P (op1) && MEM_VOLATILE_P (op1))
3976     op1 = force_reg (compute_mode, op1);
3977
3978   /* If we need the remainder or if OP1 is constant, we need to
3979      put OP0 in a register in case it has any queued subexpressions.  */
3980   if (rem_flag || op1_is_constant)
3981     op0 = force_reg (compute_mode, op0);
3982
3983   last = get_last_insn ();
3984
3985   /* Promote floor rounding to trunc rounding for unsigned operations.  */
3986   if (unsignedp)
3987     {
3988       if (code == FLOOR_DIV_EXPR)
3989         code = TRUNC_DIV_EXPR;
3990       if (code == FLOOR_MOD_EXPR)
3991         code = TRUNC_MOD_EXPR;
3992       if (code == EXACT_DIV_EXPR && op1_is_pow2)
3993         code = TRUNC_DIV_EXPR;
3994     }
3995
3996   if (op1 != const0_rtx)
3997     switch (code)
3998       {
3999       case TRUNC_MOD_EXPR:
4000       case TRUNC_DIV_EXPR:
4001         if (op1_is_constant)
4002           {
4003             if (unsignedp)
4004               {
4005                 unsigned HOST_WIDE_INT mh;
4006                 int pre_shift, post_shift;
4007                 int dummy;
4008                 rtx ml;
4009                 unsigned HOST_WIDE_INT d = (INTVAL (op1)
4010                                             & GET_MODE_MASK (compute_mode));
4011
4012                 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4013                   {
4014                     pre_shift = floor_log2 (d);
4015                     if (rem_flag)
4016                       {
4017                         remainder
4018                           = expand_binop (compute_mode, and_optab, op0,
4019                                           GEN_INT (((HOST_WIDE_INT) 1 << pre_shift) - 1),
4020                                           remainder, 1,
4021                                           OPTAB_LIB_WIDEN);
4022                         if (remainder)
4023                           return gen_lowpart (mode, remainder);
4024                       }
4025                     quotient = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4026                                              build_int_cst (NULL_TREE,
4027                                                             pre_shift),
4028                                              tquotient, 1);
4029                   }
4030                 else if (size <= HOST_BITS_PER_WIDE_INT)
4031                   {
4032                     if (d >= ((unsigned HOST_WIDE_INT) 1 << (size - 1)))
4033                       {
4034                         /* Most significant bit of divisor is set; emit an scc
4035                            insn.  */
4036                         quotient = emit_store_flag (tquotient, GEU, op0, op1,
4037                                                     compute_mode, 1, 1);
4038                         if (quotient == 0)
4039                           goto fail1;
4040                       }
4041                     else
4042                       {
4043                         /* Find a suitable multiplier and right shift count
4044                            instead of multiplying with D.  */
4045
4046                         mh = choose_multiplier (d, size, size,
4047                                                 &ml, &post_shift, &dummy);
4048
4049                         /* If the suggested multiplier is more than SIZE bits,
4050                            we can do better for even divisors, using an
4051                            initial right shift.  */
4052                         if (mh != 0 && (d & 1) == 0)
4053                           {
4054                             pre_shift = floor_log2 (d & -d);
4055                             mh = choose_multiplier (d >> pre_shift, size,
4056                                                     size - pre_shift,
4057                                                     &ml, &post_shift, &dummy);
4058                             gcc_assert (!mh);
4059                           }
4060                         else
4061                           pre_shift = 0;
4062
4063                         if (mh != 0)
4064                           {
4065                             rtx t1, t2, t3, t4;
4066
4067                             if (post_shift - 1 >= BITS_PER_WORD)
4068                               goto fail1;
4069
4070                             extra_cost
4071                               = (shift_cost[compute_mode][post_shift - 1]
4072                                  + shift_cost[compute_mode][1]
4073                                  + 2 * add_cost[compute_mode]);
4074                             t1 = expand_mult_highpart (compute_mode, op0, ml,
4075                                                        NULL_RTX, 1,
4076                                                        max_cost - extra_cost);
4077                             if (t1 == 0)
4078                               goto fail1;
4079                             t2 = force_operand (gen_rtx_MINUS (compute_mode,
4080                                                                op0, t1),
4081                                                 NULL_RTX);
4082                             t3 = expand_shift
4083                               (RSHIFT_EXPR, compute_mode, t2,
4084                                build_int_cst (NULL_TREE, 1),
4085                                NULL_RTX,1);
4086                             t4 = force_operand (gen_rtx_PLUS (compute_mode,
4087                                                               t1, t3),
4088                                                 NULL_RTX);
4089                             quotient = expand_shift
4090                               (RSHIFT_EXPR, compute_mode, t4,
4091                                build_int_cst (NULL_TREE, post_shift - 1),
4092                                tquotient, 1);
4093                           }
4094                         else
4095                           {
4096                             rtx t1, t2;
4097
4098                             if (pre_shift >= BITS_PER_WORD
4099                                 || post_shift >= BITS_PER_WORD)
4100                               goto fail1;
4101
4102                             t1 = expand_shift
4103                               (RSHIFT_EXPR, compute_mode, op0,
4104                                build_int_cst (NULL_TREE, pre_shift),
4105                                NULL_RTX, 1);
4106                             extra_cost
4107                               = (shift_cost[compute_mode][pre_shift]
4108                                  + shift_cost[compute_mode][post_shift]);
4109                             t2 = expand_mult_highpart (compute_mode, t1, ml,
4110                                                        NULL_RTX, 1,
4111                                                        max_cost - extra_cost);
4112                             if (t2 == 0)
4113                               goto fail1;
4114                             quotient = expand_shift
4115                               (RSHIFT_EXPR, compute_mode, t2,
4116                                build_int_cst (NULL_TREE, post_shift),
4117                                tquotient, 1);
4118                           }
4119                       }
4120                   }
4121                 else            /* Too wide mode to use tricky code */
4122                   break;
4123
4124                 insn = get_last_insn ();
4125                 if (insn != last
4126                     && (set = single_set (insn)) != 0
4127                     && SET_DEST (set) == quotient)
4128                   set_unique_reg_note (insn,
4129                                        REG_EQUAL,
4130                                        gen_rtx_UDIV (compute_mode, op0, op1));
4131               }
4132             else                /* TRUNC_DIV, signed */
4133               {
4134                 unsigned HOST_WIDE_INT ml;
4135                 int lgup, post_shift;
4136                 rtx mlr;
4137                 HOST_WIDE_INT d = INTVAL (op1);
4138                 unsigned HOST_WIDE_INT abs_d = d >= 0 ? d : -d;
4139
4140                 /* n rem d = n rem -d */
4141                 if (rem_flag && d < 0)
4142                   {
4143                     d = abs_d;
4144                     op1 = gen_int_mode (abs_d, compute_mode);
4145                   }
4146
4147                 if (d == 1)
4148                   quotient = op0;
4149                 else if (d == -1)
4150                   quotient = expand_unop (compute_mode, neg_optab, op0,
4151                                           tquotient, 0);
4152                 else if (abs_d == (unsigned HOST_WIDE_INT) 1 << (size - 1))
4153                   {
4154                     /* This case is not handled correctly below.  */
4155                     quotient = emit_store_flag (tquotient, EQ, op0, op1,
4156                                                 compute_mode, 1, 1);
4157                     if (quotient == 0)
4158                       goto fail1;
4159                   }
4160                 else if (EXACT_POWER_OF_2_OR_ZERO_P (d)
4161                          && (rem_flag ? smod_pow2_cheap[compute_mode]
4162                                       : sdiv_pow2_cheap[compute_mode])
4163                          /* We assume that cheap metric is true if the
4164                             optab has an expander for this mode.  */
4165                          && (((rem_flag ? smod_optab : sdiv_optab)
4166                               ->handlers[compute_mode].insn_code
4167                               != CODE_FOR_nothing)
4168                              || (sdivmod_optab->handlers[compute_mode]
4169                                  .insn_code != CODE_FOR_nothing)))
4170                   ;
4171                 else if (EXACT_POWER_OF_2_OR_ZERO_P (abs_d))
4172                   {
4173                     if (rem_flag)
4174                       {
4175                         remainder = expand_smod_pow2 (compute_mode, op0, d);
4176                         if (remainder)
4177                           return gen_lowpart (mode, remainder);
4178                       }
4179
4180                     if (sdiv_pow2_cheap[compute_mode]
4181                         && ((sdiv_optab->handlers[compute_mode].insn_code
4182                              != CODE_FOR_nothing)
4183                             || (sdivmod_optab->handlers[compute_mode].insn_code
4184                                 != CODE_FOR_nothing)))
4185                       quotient = expand_divmod (0, TRUNC_DIV_EXPR,
4186                                                 compute_mode, op0,
4187                                                 gen_int_mode (abs_d,
4188                                                               compute_mode),
4189                                                 NULL_RTX, 0);
4190                     else
4191                       quotient = expand_sdiv_pow2 (compute_mode, op0, abs_d);
4192
4193                     /* We have computed OP0 / abs(OP1).  If OP1 is negative,
4194                        negate the quotient.  */
4195                     if (d < 0)
4196                       {
4197                         insn = get_last_insn ();
4198                         if (insn != last
4199                             && (set = single_set (insn)) != 0
4200                             && SET_DEST (set) == quotient
4201                             && abs_d < ((unsigned HOST_WIDE_INT) 1
4202                                         << (HOST_BITS_PER_WIDE_INT - 1)))
4203                           set_unique_reg_note (insn,
4204                                                REG_EQUAL,
4205                                                gen_rtx_DIV (compute_mode,
4206                                                             op0,
4207                                                             GEN_INT
4208                                                             (trunc_int_for_mode
4209                                                              (abs_d,
4210                                                               compute_mode))));
4211
4212                         quotient = expand_unop (compute_mode, neg_optab,
4213                                                 quotient, quotient, 0);
4214                       }
4215                   }
4216                 else if (size <= HOST_BITS_PER_WIDE_INT)
4217                   {
4218                     choose_multiplier (abs_d, size, size - 1,
4219                                        &mlr, &post_shift, &lgup);
4220                     ml = (unsigned HOST_WIDE_INT) INTVAL (mlr);
4221                     if (ml < (unsigned HOST_WIDE_INT) 1 << (size - 1))
4222                       {
4223                         rtx t1, t2, t3;
4224
4225                         if (post_shift >= BITS_PER_WORD
4226                             || size - 1 >= BITS_PER_WORD)
4227                           goto fail1;
4228
4229                         extra_cost = (shift_cost[compute_mode][post_shift]
4230                                       + shift_cost[compute_mode][size - 1]
4231                                       + add_cost[compute_mode]);
4232                         t1 = expand_mult_highpart (compute_mode, op0, mlr,
4233                                                    NULL_RTX, 0,
4234                                                    max_cost - extra_cost);
4235                         if (t1 == 0)
4236                           goto fail1;
4237                         t2 = expand_shift
4238                           (RSHIFT_EXPR, compute_mode, t1,
4239                            build_int_cst (NULL_TREE, post_shift),
4240                            NULL_RTX, 0);
4241                         t3 = expand_shift
4242                           (RSHIFT_EXPR, compute_mode, op0,
4243                            build_int_cst (NULL_TREE, size - 1),
4244                            NULL_RTX, 0);
4245                         if (d < 0)
4246                           quotient
4247                             = force_operand (gen_rtx_MINUS (compute_mode,
4248                                                             t3, t2),
4249                                              tquotient);
4250                         else
4251                           quotient
4252                             = force_operand (gen_rtx_MINUS (compute_mode,
4253                                                             t2, t3),
4254                                              tquotient);
4255                       }
4256                     else
4257                       {
4258                         rtx t1, t2, t3, t4;
4259
4260                         if (post_shift >= BITS_PER_WORD
4261                             || size - 1 >= BITS_PER_WORD)
4262                           goto fail1;
4263
4264                         ml |= (~(unsigned HOST_WIDE_INT) 0) << (size - 1);
4265                         mlr = gen_int_mode (ml, compute_mode);
4266                         extra_cost = (shift_cost[compute_mode][post_shift]
4267                                       + shift_cost[compute_mode][size - 1]
4268                                       + 2 * add_cost[compute_mode]);
4269                         t1 = expand_mult_highpart (compute_mode, op0, mlr,
4270                                                    NULL_RTX, 0,
4271                                                    max_cost - extra_cost);
4272                         if (t1 == 0)
4273                           goto fail1;
4274                         t2 = force_operand (gen_rtx_PLUS (compute_mode,
4275                                                           t1, op0),
4276                                             NULL_RTX);
4277                         t3 = expand_shift
4278                           (RSHIFT_EXPR, compute_mode, t2,
4279                            build_int_cst (NULL_TREE, post_shift),
4280                            NULL_RTX, 0);
4281                         t4 = expand_shift
4282                           (RSHIFT_EXPR, compute_mode, op0,
4283                            build_int_cst (NULL_TREE, size - 1),
4284                            NULL_RTX, 0);
4285                         if (d < 0)
4286                           quotient
4287                             = force_operand (gen_rtx_MINUS (compute_mode,
4288                                                             t4, t3),
4289                                              tquotient);
4290                         else
4291                           quotient
4292                             = force_operand (gen_rtx_MINUS (compute_mode,
4293                                                             t3, t4),
4294                                              tquotient);
4295                       }
4296                   }
4297                 else            /* Too wide mode to use tricky code */
4298                   break;
4299
4300                 insn = get_last_insn ();
4301                 if (insn != last
4302                     && (set = single_set (insn)) != 0
4303                     && SET_DEST (set) == quotient)
4304                   set_unique_reg_note (insn,
4305                                        REG_EQUAL,
4306                                        gen_rtx_DIV (compute_mode, op0, op1));
4307               }
4308             break;
4309           }
4310       fail1:
4311         delete_insns_since (last);
4312         break;
4313
4314       case FLOOR_DIV_EXPR:
4315       case FLOOR_MOD_EXPR:
4316       /* We will come here only for signed operations.  */
4317         if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4318           {
4319             unsigned HOST_WIDE_INT mh;
4320             int pre_shift, lgup, post_shift;
4321             HOST_WIDE_INT d = INTVAL (op1);
4322             rtx ml;
4323
4324             if (d > 0)
4325               {
4326                 /* We could just as easily deal with negative constants here,
4327                    but it does not seem worth the trouble for GCC 2.6.  */
4328                 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4329                   {
4330                     pre_shift = floor_log2 (d);
4331                     if (rem_flag)
4332                       {
4333                         remainder = expand_binop (compute_mode, and_optab, op0,
4334                                                   GEN_INT (((HOST_WIDE_INT) 1 << pre_shift) - 1),
4335                                                   remainder, 0, OPTAB_LIB_WIDEN);
4336                         if (remainder)
4337                           return gen_lowpart (mode, remainder);
4338                       }
4339                     quotient = expand_shift
4340                       (RSHIFT_EXPR, compute_mode, op0,
4341                        build_int_cst (NULL_TREE, pre_shift),
4342                        tquotient, 0);
4343                   }
4344                 else
4345                   {
4346                     rtx t1, t2, t3, t4;
4347
4348                     mh = choose_multiplier (d, size, size - 1,
4349                                             &ml, &post_shift, &lgup);
4350                     gcc_assert (!mh);
4351
4352                     if (post_shift < BITS_PER_WORD
4353                         && size - 1 < BITS_PER_WORD)
4354                       {
4355                         t1 = expand_shift
4356                           (RSHIFT_EXPR, compute_mode, op0,
4357                            build_int_cst (NULL_TREE, size - 1),
4358                            NULL_RTX, 0);
4359                         t2 = expand_binop (compute_mode, xor_optab, op0, t1,
4360                                            NULL_RTX, 0, OPTAB_WIDEN);
4361                         extra_cost = (shift_cost[compute_mode][post_shift]
4362                                       + shift_cost[compute_mode][size - 1]
4363                                       + 2 * add_cost[compute_mode]);
4364                         t3 = expand_mult_highpart (compute_mode, t2, ml,
4365                                                    NULL_RTX, 1,
4366                                                    max_cost - extra_cost);
4367                         if (t3 != 0)
4368                           {
4369                             t4 = expand_shift
4370                               (RSHIFT_EXPR, compute_mode, t3,
4371                                build_int_cst (NULL_TREE, post_shift),
4372                                NULL_RTX, 1);
4373                             quotient = expand_binop (compute_mode, xor_optab,
4374                                                      t4, t1, tquotient, 0,
4375                                                      OPTAB_WIDEN);
4376                           }
4377                       }
4378                   }
4379               }
4380             else
4381               {
4382                 rtx nsign, t1, t2, t3, t4;
4383                 t1 = force_operand (gen_rtx_PLUS (compute_mode,
4384                                                   op0, constm1_rtx), NULL_RTX);
4385                 t2 = expand_binop (compute_mode, ior_optab, op0, t1, NULL_RTX,
4386                                    0, OPTAB_WIDEN);
4387                 nsign = expand_shift
4388                   (RSHIFT_EXPR, compute_mode, t2,
4389                    build_int_cst (NULL_TREE, size - 1),
4390                    NULL_RTX, 0);
4391                 t3 = force_operand (gen_rtx_MINUS (compute_mode, t1, nsign),
4392                                     NULL_RTX);
4393                 t4 = expand_divmod (0, TRUNC_DIV_EXPR, compute_mode, t3, op1,
4394                                     NULL_RTX, 0);
4395                 if (t4)
4396                   {
4397                     rtx t5;
4398                     t5 = expand_unop (compute_mode, one_cmpl_optab, nsign,
4399                                       NULL_RTX, 0);
4400                     quotient = force_operand (gen_rtx_PLUS (compute_mode,
4401                                                             t4, t5),
4402                                               tquotient);
4403                   }
4404               }
4405           }
4406
4407         if (quotient != 0)
4408           break;
4409         delete_insns_since (last);
4410
4411         /* Try using an instruction that produces both the quotient and
4412            remainder, using truncation.  We can easily compensate the quotient
4413            or remainder to get floor rounding, once we have the remainder.
4414            Notice that we compute also the final remainder value here,
4415            and return the result right away.  */
4416         if (target == 0 || GET_MODE (target) != compute_mode)
4417           target = gen_reg_rtx (compute_mode);
4418
4419         if (rem_flag)
4420           {
4421             remainder
4422               = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4423             quotient = gen_reg_rtx (compute_mode);
4424           }
4425         else
4426           {
4427             quotient
4428               = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4429             remainder = gen_reg_rtx (compute_mode);
4430           }
4431
4432         if (expand_twoval_binop (sdivmod_optab, op0, op1,
4433                                  quotient, remainder, 0))
4434           {
4435             /* This could be computed with a branch-less sequence.
4436                Save that for later.  */
4437             rtx tem;
4438             rtx label = gen_label_rtx ();
4439             do_cmp_and_jump (remainder, const0_rtx, EQ, compute_mode, label);
4440             tem = expand_binop (compute_mode, xor_optab, op0, op1,
4441                                 NULL_RTX, 0, OPTAB_WIDEN);
4442             do_cmp_and_jump (tem, const0_rtx, GE, compute_mode, label);
4443             expand_dec (quotient, const1_rtx);
4444             expand_inc (remainder, op1);
4445             emit_label (label);
4446             return gen_lowpart (mode, rem_flag ? remainder : quotient);
4447           }
4448
4449         /* No luck with division elimination or divmod.  Have to do it
4450            by conditionally adjusting op0 *and* the result.  */
4451         {
4452           rtx label1, label2, label3, label4, label5;
4453           rtx adjusted_op0;
4454           rtx tem;
4455
4456           quotient = gen_reg_rtx (compute_mode);
4457           adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4458           label1 = gen_label_rtx ();
4459           label2 = gen_label_rtx ();
4460           label3 = gen_label_rtx ();
4461           label4 = gen_label_rtx ();
4462           label5 = gen_label_rtx ();
4463           do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4464           do_cmp_and_jump (adjusted_op0, const0_rtx, LT, compute_mode, label1);
4465           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4466                               quotient, 0, OPTAB_LIB_WIDEN);
4467           if (tem != quotient)
4468             emit_move_insn (quotient, tem);
4469           emit_jump_insn (gen_jump (label5));
4470           emit_barrier ();
4471           emit_label (label1);
4472           expand_inc (adjusted_op0, const1_rtx);
4473           emit_jump_insn (gen_jump (label4));
4474           emit_barrier ();
4475           emit_label (label2);
4476           do_cmp_and_jump (adjusted_op0, const0_rtx, GT, compute_mode, label3);
4477           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4478                               quotient, 0, OPTAB_LIB_WIDEN);
4479           if (tem != quotient)
4480             emit_move_insn (quotient, tem);
4481           emit_jump_insn (gen_jump (label5));
4482           emit_barrier ();
4483           emit_label (label3);
4484           expand_dec (adjusted_op0, const1_rtx);
4485           emit_label (label4);
4486           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4487                               quotient, 0, OPTAB_LIB_WIDEN);
4488           if (tem != quotient)
4489             emit_move_insn (quotient, tem);
4490           expand_dec (quotient, const1_rtx);
4491           emit_label (label5);
4492         }
4493         break;
4494
4495       case CEIL_DIV_EXPR:
4496       case CEIL_MOD_EXPR:
4497         if (unsignedp)
4498           {
4499             if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1)))
4500               {
4501                 rtx t1, t2, t3;
4502                 unsigned HOST_WIDE_INT d = INTVAL (op1);
4503                 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4504                                    build_int_cst (NULL_TREE, floor_log2 (d)),
4505                                    tquotient, 1);
4506                 t2 = expand_binop (compute_mode, and_optab, op0,
4507                                    GEN_INT (d - 1),
4508                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4509                 t3 = gen_reg_rtx (compute_mode);
4510                 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4511                                       compute_mode, 1, 1);
4512                 if (t3 == 0)
4513                   {
4514                     rtx lab;
4515                     lab = gen_label_rtx ();
4516                     do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4517                     expand_inc (t1, const1_rtx);
4518                     emit_label (lab);
4519                     quotient = t1;
4520                   }
4521                 else
4522                   quotient = force_operand (gen_rtx_PLUS (compute_mode,
4523                                                           t1, t3),
4524                                             tquotient);
4525                 break;
4526               }
4527
4528             /* Try using an instruction that produces both the quotient and
4529                remainder, using truncation.  We can easily compensate the
4530                quotient or remainder to get ceiling rounding, once we have the
4531                remainder.  Notice that we compute also the final remainder
4532                value here, and return the result right away.  */
4533             if (target == 0 || GET_MODE (target) != compute_mode)
4534               target = gen_reg_rtx (compute_mode);
4535
4536             if (rem_flag)
4537               {
4538                 remainder = (REG_P (target)
4539                              ? target : gen_reg_rtx (compute_mode));
4540                 quotient = gen_reg_rtx (compute_mode);
4541               }
4542             else
4543               {
4544                 quotient = (REG_P (target)
4545                             ? target : gen_reg_rtx (compute_mode));
4546                 remainder = gen_reg_rtx (compute_mode);
4547               }
4548
4549             if (expand_twoval_binop (udivmod_optab, op0, op1, quotient,
4550                                      remainder, 1))
4551               {
4552                 /* This could be computed with a branch-less sequence.
4553                    Save that for later.  */
4554                 rtx label = gen_label_rtx ();
4555                 do_cmp_and_jump (remainder, const0_rtx, EQ,
4556                                  compute_mode, label);
4557                 expand_inc (quotient, const1_rtx);
4558                 expand_dec (remainder, op1);
4559                 emit_label (label);
4560                 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4561               }
4562
4563             /* No luck with division elimination or divmod.  Have to do it
4564                by conditionally adjusting op0 *and* the result.  */
4565             {
4566               rtx label1, label2;
4567               rtx adjusted_op0, tem;
4568
4569               quotient = gen_reg_rtx (compute_mode);
4570               adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4571               label1 = gen_label_rtx ();
4572               label2 = gen_label_rtx ();
4573               do_cmp_and_jump (adjusted_op0, const0_rtx, NE,
4574                                compute_mode, label1);
4575               emit_move_insn  (quotient, const0_rtx);
4576               emit_jump_insn (gen_jump (label2));
4577               emit_barrier ();
4578               emit_label (label1);
4579               expand_dec (adjusted_op0, const1_rtx);
4580               tem = expand_binop (compute_mode, udiv_optab, adjusted_op0, op1,
4581                                   quotient, 1, OPTAB_LIB_WIDEN);
4582               if (tem != quotient)
4583                 emit_move_insn (quotient, tem);
4584               expand_inc (quotient, const1_rtx);
4585               emit_label (label2);
4586             }
4587           }
4588         else /* signed */
4589           {
4590             if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4591                 && INTVAL (op1) >= 0)
4592               {
4593                 /* This is extremely similar to the code for the unsigned case
4594                    above.  For 2.7 we should merge these variants, but for
4595                    2.6.1 I don't want to touch the code for unsigned since that
4596                    get used in C.  The signed case will only be used by other
4597                    languages (Ada).  */
4598
4599                 rtx t1, t2, t3;
4600                 unsigned HOST_WIDE_INT d = INTVAL (op1);
4601                 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4602                                    build_int_cst (NULL_TREE, floor_log2 (d)),
4603                                    tquotient, 0);
4604                 t2 = expand_binop (compute_mode, and_optab, op0,
4605                                    GEN_INT (d - 1),
4606                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4607                 t3 = gen_reg_rtx (compute_mode);
4608                 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4609                                       compute_mode, 1, 1);
4610                 if (t3 == 0)
4611                   {
4612                     rtx lab;
4613                     lab = gen_label_rtx ();
4614                     do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4615                     expand_inc (t1, const1_rtx);
4616                     emit_label (lab);
4617                     quotient = t1;
4618                   }
4619                 else
4620                   quotient = force_operand (gen_rtx_PLUS (compute_mode,
4621                                                           t1, t3),
4622                                             tquotient);
4623                 break;
4624               }
4625
4626             /* Try using an instruction that produces both the quotient and
4627                remainder, using truncation.  We can easily compensate the
4628                quotient or remainder to get ceiling rounding, once we have the
4629                remainder.  Notice that we compute also the final remainder
4630                value here, and return the result right away.  */
4631             if (target == 0 || GET_MODE (target) != compute_mode)
4632               target = gen_reg_rtx (compute_mode);
4633             if (rem_flag)
4634               {
4635                 remainder= (REG_P (target)
4636                             ? target : gen_reg_rtx (compute_mode));
4637                 quotient = gen_reg_rtx (compute_mode);
4638               }
4639             else
4640               {
4641                 quotient = (REG_P (target)
4642                             ? target : gen_reg_rtx (compute_mode));
4643                 remainder = gen_reg_rtx (compute_mode);
4644               }
4645
4646             if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient,
4647                                      remainder, 0))
4648               {
4649                 /* This could be computed with a branch-less sequence.
4650                    Save that for later.  */
4651                 rtx tem;
4652                 rtx label = gen_label_rtx ();
4653                 do_cmp_and_jump (remainder, const0_rtx, EQ,
4654                                  compute_mode, label);
4655                 tem = expand_binop (compute_mode, xor_optab, op0, op1,
4656                                     NULL_RTX, 0, OPTAB_WIDEN);
4657                 do_cmp_and_jump (tem, const0_rtx, LT, compute_mode, label);
4658                 expand_inc (quotient, const1_rtx);
4659                 expand_dec (remainder, op1);
4660                 emit_label (label);
4661                 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4662               }
4663
4664             /* No luck with division elimination or divmod.  Have to do it
4665                by conditionally adjusting op0 *and* the result.  */
4666             {
4667               rtx label1, label2, label3, label4, label5;
4668               rtx adjusted_op0;
4669               rtx tem;
4670
4671               quotient = gen_reg_rtx (compute_mode);
4672               adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4673               label1 = gen_label_rtx ();
4674               label2 = gen_label_rtx ();
4675               label3 = gen_label_rtx ();
4676               label4 = gen_label_rtx ();
4677               label5 = gen_label_rtx ();
4678               do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4679               do_cmp_and_jump (adjusted_op0, const0_rtx, GT,
4680                                compute_mode, label1);
4681               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4682                                   quotient, 0, OPTAB_LIB_WIDEN);
4683               if (tem != quotient)
4684                 emit_move_insn (quotient, tem);
4685               emit_jump_insn (gen_jump (label5));
4686               emit_barrier ();
4687               emit_label (label1);
4688               expand_dec (adjusted_op0, const1_rtx);
4689               emit_jump_insn (gen_jump (label4));
4690               emit_barrier ();
4691               emit_label (label2);
4692               do_cmp_and_jump (adjusted_op0, const0_rtx, LT,
4693                                compute_mode, label3);
4694               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4695                                   quotient, 0, OPTAB_LIB_WIDEN);
4696               if (tem != quotient)
4697                 emit_move_insn (quotient, tem);
4698               emit_jump_insn (gen_jump (label5));
4699               emit_barrier ();
4700               emit_label (label3);
4701               expand_inc (adjusted_op0, const1_rtx);
4702               emit_label (label4);
4703               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4704                                   quotient, 0, OPTAB_LIB_WIDEN);
4705               if (tem != quotient)
4706                 emit_move_insn (quotient, tem);
4707               expand_inc (quotient, const1_rtx);
4708               emit_label (label5);
4709             }
4710           }
4711         break;
4712
4713       case EXACT_DIV_EXPR:
4714         if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4715           {
4716             HOST_WIDE_INT d = INTVAL (op1);
4717             unsigned HOST_WIDE_INT ml;
4718             int pre_shift;
4719             rtx t1;
4720
4721             pre_shift = floor_log2 (d & -d);
4722             ml = invert_mod2n (d >> pre_shift, size);
4723             t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4724                                build_int_cst (NULL_TREE, pre_shift),
4725                                NULL_RTX, unsignedp);
4726             quotient = expand_mult (compute_mode, t1,
4727                                     gen_int_mode (ml, compute_mode),
4728                                     NULL_RTX, 1);
4729
4730             insn = get_last_insn ();
4731             set_unique_reg_note (insn,
4732                                  REG_EQUAL,
4733                                  gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
4734                                                  compute_mode,
4735                                                  op0, op1));
4736           }
4737         break;
4738
4739       case ROUND_DIV_EXPR:
4740       case ROUND_MOD_EXPR:
4741         if (unsignedp)
4742           {
4743             rtx tem;
4744             rtx label;
4745             label = gen_label_rtx ();
4746             quotient = gen_reg_rtx (compute_mode);
4747             remainder = gen_reg_rtx (compute_mode);
4748             if (expand_twoval_binop (udivmod_optab, op0, op1, quotient, remainder, 1) == 0)
4749               {
4750                 rtx tem;
4751                 quotient = expand_binop (compute_mode, udiv_optab, op0, op1,
4752                                          quotient, 1, OPTAB_LIB_WIDEN);
4753                 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 1);
4754                 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4755                                           remainder, 1, OPTAB_LIB_WIDEN);
4756               }
4757             tem = plus_constant (op1, -1);
4758             tem = expand_shift (RSHIFT_EXPR, compute_mode, tem,
4759                                 build_int_cst (NULL_TREE, 1),
4760                                 NULL_RTX, 1);
4761             do_cmp_and_jump (remainder, tem, LEU, compute_mode, label);
4762             expand_inc (quotient, const1_rtx);
4763             expand_dec (remainder, op1);
4764             emit_label (label);
4765           }
4766         else
4767           {
4768             rtx abs_rem, abs_op1, tem, mask;
4769             rtx label;
4770             label = gen_label_rtx ();
4771             quotient = gen_reg_rtx (compute_mode);
4772             remainder = gen_reg_rtx (compute_mode);
4773             if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient, remainder, 0) == 0)
4774               {
4775                 rtx tem;
4776                 quotient = expand_binop (compute_mode, sdiv_optab, op0, op1,
4777                                          quotient, 0, OPTAB_LIB_WIDEN);
4778                 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 0);
4779                 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4780                                           remainder, 0, OPTAB_LIB_WIDEN);
4781               }
4782             abs_rem = expand_abs (compute_mode, remainder, NULL_RTX, 1, 0);
4783             abs_op1 = expand_abs (compute_mode, op1, NULL_RTX, 1, 0);
4784             tem = expand_shift (LSHIFT_EXPR, compute_mode, abs_rem,
4785                                 build_int_cst (NULL_TREE, 1),
4786                                 NULL_RTX, 1);
4787             do_cmp_and_jump (tem, abs_op1, LTU, compute_mode, label);
4788             tem = expand_binop (compute_mode, xor_optab, op0, op1,
4789                                 NULL_RTX, 0, OPTAB_WIDEN);
4790             mask = expand_shift (RSHIFT_EXPR, compute_mode, tem,
4791                                  build_int_cst (NULL_TREE, size - 1),
4792                                  NULL_RTX, 0);
4793             tem = expand_binop (compute_mode, xor_optab, mask, const1_rtx,
4794                                 NULL_RTX, 0, OPTAB_WIDEN);
4795             tem = expand_binop (compute_mode, sub_optab, tem, mask,
4796                                 NULL_RTX, 0, OPTAB_WIDEN);
4797             expand_inc (quotient, tem);
4798             tem = expand_binop (compute_mode, xor_optab, mask, op1,
4799                                 NULL_RTX, 0, OPTAB_WIDEN);
4800             tem = expand_binop (compute_mode, sub_optab, tem, mask,
4801                                 NULL_RTX, 0, OPTAB_WIDEN);
4802             expand_dec (remainder, tem);
4803             emit_label (label);
4804           }
4805         return gen_lowpart (mode, rem_flag ? remainder : quotient);
4806
4807       default:
4808         gcc_unreachable ();
4809       }
4810
4811   if (quotient == 0)
4812     {
4813       if (target && GET_MODE (target) != compute_mode)
4814         target = 0;
4815
4816       if (rem_flag)
4817         {
4818           /* Try to produce the remainder without producing the quotient.
4819              If we seem to have a divmod pattern that does not require widening,
4820              don't try widening here.  We should really have a WIDEN argument
4821              to expand_twoval_binop, since what we'd really like to do here is
4822              1) try a mod insn in compute_mode
4823              2) try a divmod insn in compute_mode
4824              3) try a div insn in compute_mode and multiply-subtract to get
4825                 remainder
4826              4) try the same things with widening allowed.  */
4827           remainder
4828             = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4829                                  op0, op1, target,
4830                                  unsignedp,
4831                                  ((optab2->handlers[compute_mode].insn_code
4832                                    != CODE_FOR_nothing)
4833                                   ? OPTAB_DIRECT : OPTAB_WIDEN));
4834           if (remainder == 0)
4835             {
4836               /* No luck there.  Can we do remainder and divide at once
4837                  without a library call?  */
4838               remainder = gen_reg_rtx (compute_mode);
4839               if (! expand_twoval_binop ((unsignedp
4840                                           ? udivmod_optab
4841                                           : sdivmod_optab),
4842                                          op0, op1,
4843                                          NULL_RTX, remainder, unsignedp))
4844                 remainder = 0;
4845             }
4846
4847           if (remainder)
4848             return gen_lowpart (mode, remainder);
4849         }
4850
4851       /* Produce the quotient.  Try a quotient insn, but not a library call.
4852          If we have a divmod in this mode, use it in preference to widening
4853          the div (for this test we assume it will not fail). Note that optab2
4854          is set to the one of the two optabs that the call below will use.  */
4855       quotient
4856         = sign_expand_binop (compute_mode, udiv_optab, sdiv_optab,
4857                              op0, op1, rem_flag ? NULL_RTX : target,
4858                              unsignedp,
4859                              ((optab2->handlers[compute_mode].insn_code
4860                                != CODE_FOR_nothing)
4861                               ? OPTAB_DIRECT : OPTAB_WIDEN));
4862
4863       if (quotient == 0)
4864         {
4865           /* No luck there.  Try a quotient-and-remainder insn,
4866              keeping the quotient alone.  */
4867           quotient = gen_reg_rtx (compute_mode);
4868           if (! expand_twoval_binop (unsignedp ? udivmod_optab : sdivmod_optab,
4869                                      op0, op1,
4870                                      quotient, NULL_RTX, unsignedp))
4871             {
4872               quotient = 0;
4873               if (! rem_flag)
4874                 /* Still no luck.  If we are not computing the remainder,
4875                    use a library call for the quotient.  */
4876                 quotient = sign_expand_binop (compute_mode,
4877                                               udiv_optab, sdiv_optab,
4878                                               op0, op1, target,
4879                                               unsignedp, OPTAB_LIB_WIDEN);
4880             }
4881         }
4882     }
4883
4884   if (rem_flag)
4885     {
4886       if (target && GET_MODE (target) != compute_mode)
4887         target = 0;
4888
4889       if (quotient == 0)
4890         {
4891           /* No divide instruction either.  Use library for remainder.  */
4892           remainder = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4893                                          op0, op1, target,
4894                                          unsignedp, OPTAB_LIB_WIDEN);
4895           /* No remainder function.  Try a quotient-and-remainder
4896              function, keeping the remainder.  */
4897           if (!remainder)
4898             {
4899               remainder = gen_reg_rtx (compute_mode);
4900               if (!expand_twoval_binop_libfunc
4901                   (unsignedp ? udivmod_optab : sdivmod_optab,
4902                    op0, op1,
4903                    NULL_RTX, remainder,
4904                    unsignedp ? UMOD : MOD))
4905                 remainder = NULL_RTX;
4906             }
4907         }
4908       else
4909         {
4910           /* We divided.  Now finish doing X - Y * (X / Y).  */
4911           remainder = expand_mult (compute_mode, quotient, op1,
4912                                    NULL_RTX, unsignedp);
4913           remainder = expand_binop (compute_mode, sub_optab, op0,
4914                                     remainder, target, unsignedp,
4915                                     OPTAB_LIB_WIDEN);
4916         }
4917     }
4918
4919   return gen_lowpart (mode, rem_flag ? remainder : quotient);
4920 }
4921 \f
4922 /* Return a tree node with data type TYPE, describing the value of X.
4923    Usually this is an VAR_DECL, if there is no obvious better choice.
4924    X may be an expression, however we only support those expressions
4925    generated by loop.c.  */
4926
4927 tree
4928 make_tree (tree type, rtx x)
4929 {
4930   tree t;
4931
4932   switch (GET_CODE (x))
4933     {
4934     case CONST_INT:
4935       {
4936         HOST_WIDE_INT hi = 0;
4937
4938         if (INTVAL (x) < 0
4939             && !(TYPE_UNSIGNED (type)
4940                  && (GET_MODE_BITSIZE (TYPE_MODE (type))
4941                      < HOST_BITS_PER_WIDE_INT)))
4942           hi = -1;
4943
4944         t = build_int_cst_wide (type, INTVAL (x), hi);
4945
4946         return t;
4947       }
4948
4949     case CONST_DOUBLE:
4950       if (GET_MODE (x) == VOIDmode)
4951         t = build_int_cst_wide (type,
4952                                 CONST_DOUBLE_LOW (x), CONST_DOUBLE_HIGH (x));
4953       else
4954         {
4955           REAL_VALUE_TYPE d;
4956
4957           REAL_VALUE_FROM_CONST_DOUBLE (d, x);
4958           t = build_real (type, d);
4959         }
4960
4961       return t;
4962
4963     case CONST_VECTOR:
4964       {
4965         int i, units;
4966         rtx elt;
4967         tree t = NULL_TREE;
4968
4969         units = CONST_VECTOR_NUNITS (x);
4970
4971         /* Build a tree with vector elements.  */
4972         for (i = units - 1; i >= 0; --i)
4973           {
4974             elt = CONST_VECTOR_ELT (x, i);
4975             t = tree_cons (NULL_TREE, make_tree (type, elt), t);
4976           }
4977
4978         return build_vector (type, t);
4979       }
4980
4981     case PLUS:
4982       return fold_build2 (PLUS_EXPR, type, make_tree (type, XEXP (x, 0)),
4983                           make_tree (type, XEXP (x, 1)));
4984
4985     case MINUS:
4986       return fold_build2 (MINUS_EXPR, type, make_tree (type, XEXP (x, 0)),
4987                           make_tree (type, XEXP (x, 1)));
4988
4989     case NEG:
4990       return fold_build1 (NEGATE_EXPR, type, make_tree (type, XEXP (x, 0)));
4991
4992     case MULT:
4993       return fold_build2 (MULT_EXPR, type, make_tree (type, XEXP (x, 0)),
4994                           make_tree (type, XEXP (x, 1)));
4995
4996     case ASHIFT:
4997       return fold_build2 (LSHIFT_EXPR, type, make_tree (type, XEXP (x, 0)),
4998                           make_tree (type, XEXP (x, 1)));
4999
5000     case LSHIFTRT:
5001       t = lang_hooks.types.unsigned_type (type);
5002       return fold_convert (type, build2 (RSHIFT_EXPR, t,
5003                                          make_tree (t, XEXP (x, 0)),
5004                                          make_tree (type, XEXP (x, 1))));
5005
5006     case ASHIFTRT:
5007       t = lang_hooks.types.signed_type (type);
5008       return fold_convert (type, build2 (RSHIFT_EXPR, t,
5009                                          make_tree (t, XEXP (x, 0)),
5010                                          make_tree (type, XEXP (x, 1))));
5011
5012     case DIV:
5013       if (TREE_CODE (type) != REAL_TYPE)
5014         t = lang_hooks.types.signed_type (type);
5015       else
5016         t = type;
5017
5018       return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5019                                          make_tree (t, XEXP (x, 0)),
5020                                          make_tree (t, XEXP (x, 1))));
5021     case UDIV:
5022       t = lang_hooks.types.unsigned_type (type);
5023       return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5024                                          make_tree (t, XEXP (x, 0)),
5025                                          make_tree (t, XEXP (x, 1))));
5026
5027     case SIGN_EXTEND:
5028     case ZERO_EXTEND:
5029       t = lang_hooks.types.type_for_mode (GET_MODE (XEXP (x, 0)),
5030                                           GET_CODE (x) == ZERO_EXTEND);
5031       return fold_convert (type, make_tree (t, XEXP (x, 0)));
5032
5033     default:
5034       t = build_decl (VAR_DECL, NULL_TREE, type);
5035
5036       /* If TYPE is a POINTER_TYPE, X might be Pmode with TYPE_MODE being
5037          ptr_mode.  So convert.  */
5038       if (POINTER_TYPE_P (type))
5039         x = convert_memory_address (TYPE_MODE (type), x);
5040
5041       /* Note that we do *not* use SET_DECL_RTL here, because we do not
5042          want set_decl_rtl to go adjusting REG_ATTRS for this temporary.  */
5043       t->decl_with_rtl.rtl = x;
5044
5045       return t;
5046     }
5047 }
5048 \f
5049 /* Compute the logical-and of OP0 and OP1, storing it in TARGET
5050    and returning TARGET.
5051
5052    If TARGET is 0, a pseudo-register or constant is returned.  */
5053
5054 rtx
5055 expand_and (enum machine_mode mode, rtx op0, rtx op1, rtx target)
5056 {
5057   rtx tem = 0;
5058
5059   if (GET_MODE (op0) == VOIDmode && GET_MODE (op1) == VOIDmode)
5060     tem = simplify_binary_operation (AND, mode, op0, op1);
5061   if (tem == 0)
5062     tem = expand_binop (mode, and_optab, op0, op1, target, 0, OPTAB_LIB_WIDEN);
5063
5064   if (target == 0)
5065     target = tem;
5066   else if (tem != target)
5067     emit_move_insn (target, tem);
5068   return target;
5069 }
5070 \f
5071 /* Emit a store-flags instruction for comparison CODE on OP0 and OP1
5072    and storing in TARGET.  Normally return TARGET.
5073    Return 0 if that cannot be done.
5074
5075    MODE is the mode to use for OP0 and OP1 should they be CONST_INTs.  If
5076    it is VOIDmode, they cannot both be CONST_INT.
5077
5078    UNSIGNEDP is for the case where we have to widen the operands
5079    to perform the operation.  It says to use zero-extension.
5080
5081    NORMALIZEP is 1 if we should convert the result to be either zero
5082    or one.  Normalize is -1 if we should convert the result to be
5083    either zero or -1.  If NORMALIZEP is zero, the result will be left
5084    "raw" out of the scc insn.  */
5085
5086 rtx
5087 emit_store_flag (rtx target, enum rtx_code code, rtx op0, rtx op1,
5088                  enum machine_mode mode, int unsignedp, int normalizep)
5089 {
5090   rtx subtarget;
5091   enum insn_code icode;
5092   enum machine_mode compare_mode;
5093   enum machine_mode target_mode = GET_MODE (target);
5094   rtx tem;
5095   rtx last = get_last_insn ();
5096   rtx pattern, comparison;
5097
5098   if (unsignedp)
5099     code = unsigned_condition (code);
5100
5101   /* If one operand is constant, make it the second one.  Only do this
5102      if the other operand is not constant as well.  */
5103
5104   if (swap_commutative_operands_p (op0, op1))
5105     {
5106       tem = op0;
5107       op0 = op1;
5108       op1 = tem;
5109       code = swap_condition (code);
5110     }
5111
5112   if (mode == VOIDmode)
5113     mode = GET_MODE (op0);
5114
5115   /* For some comparisons with 1 and -1, we can convert this to
5116      comparisons with zero.  This will often produce more opportunities for
5117      store-flag insns.  */
5118
5119   switch (code)
5120     {
5121     case LT:
5122       if (op1 == const1_rtx)
5123         op1 = const0_rtx, code = LE;
5124       break;
5125     case LE:
5126       if (op1 == constm1_rtx)
5127         op1 = const0_rtx, code = LT;
5128       break;
5129     case GE:
5130       if (op1 == const1_rtx)
5131         op1 = const0_rtx, code = GT;
5132       break;
5133     case GT:
5134       if (op1 == constm1_rtx)
5135         op1 = const0_rtx, code = GE;
5136       break;
5137     case GEU:
5138       if (op1 == const1_rtx)
5139         op1 = const0_rtx, code = NE;
5140       break;
5141     case LTU:
5142       if (op1 == const1_rtx)
5143         op1 = const0_rtx, code = EQ;
5144       break;
5145     default:
5146       break;
5147     }
5148
5149   /* If we are comparing a double-word integer with zero or -1, we can
5150      convert the comparison into one involving a single word.  */
5151   if (GET_MODE_BITSIZE (mode) == BITS_PER_WORD * 2
5152       && GET_MODE_CLASS (mode) == MODE_INT
5153       && (!MEM_P (op0) || ! MEM_VOLATILE_P (op0)))
5154     {
5155       if ((code == EQ || code == NE)
5156           && (op1 == const0_rtx || op1 == constm1_rtx))
5157         {
5158           rtx op00, op01, op0both;
5159
5160           /* Do a logical OR or AND of the two words and compare the result.  */
5161           op00 = simplify_gen_subreg (word_mode, op0, mode, 0);
5162           op01 = simplify_gen_subreg (word_mode, op0, mode, UNITS_PER_WORD);
5163           op0both = expand_binop (word_mode,
5164                                   op1 == const0_rtx ? ior_optab : and_optab,
5165                                   op00, op01, NULL_RTX, unsignedp, OPTAB_DIRECT);
5166
5167           if (op0both != 0)
5168             return emit_store_flag (target, code, op0both, op1, word_mode,
5169                                     unsignedp, normalizep);
5170         }
5171       else if ((code == LT || code == GE) && op1 == const0_rtx)
5172         {
5173           rtx op0h;
5174
5175           /* If testing the sign bit, can just test on high word.  */
5176           op0h = simplify_gen_subreg (word_mode, op0, mode,
5177                                       subreg_highpart_offset (word_mode, mode));
5178           return emit_store_flag (target, code, op0h, op1, word_mode,
5179                                   unsignedp, normalizep);
5180         }
5181     }
5182
5183   /* From now on, we won't change CODE, so set ICODE now.  */
5184   icode = setcc_gen_code[(int) code];
5185
5186   /* If this is A < 0 or A >= 0, we can do this by taking the ones
5187      complement of A (for GE) and shifting the sign bit to the low bit.  */
5188   if (op1 == const0_rtx && (code == LT || code == GE)
5189       && GET_MODE_CLASS (mode) == MODE_INT
5190       && (normalizep || STORE_FLAG_VALUE == 1
5191           || (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT
5192               && ((STORE_FLAG_VALUE & GET_MODE_MASK (mode))
5193                   == (unsigned HOST_WIDE_INT) 1 << (GET_MODE_BITSIZE (mode) - 1)))))
5194     {
5195       subtarget = target;
5196
5197       /* If the result is to be wider than OP0, it is best to convert it
5198          first.  If it is to be narrower, it is *incorrect* to convert it
5199          first.  */
5200       if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (mode))
5201         {
5202           op0 = convert_modes (target_mode, mode, op0, 0);
5203           mode = target_mode;
5204         }
5205
5206       if (target_mode != mode)
5207         subtarget = 0;
5208
5209       if (code == GE)
5210         op0 = expand_unop (mode, one_cmpl_optab, op0,
5211                            ((STORE_FLAG_VALUE == 1 || normalizep)
5212                             ? 0 : subtarget), 0);
5213
5214       if (STORE_FLAG_VALUE == 1 || normalizep)
5215         /* If we are supposed to produce a 0/1 value, we want to do
5216            a logical shift from the sign bit to the low-order bit; for
5217            a -1/0 value, we do an arithmetic shift.  */
5218         op0 = expand_shift (RSHIFT_EXPR, mode, op0,
5219                             size_int (GET_MODE_BITSIZE (mode) - 1),
5220                             subtarget, normalizep != -1);
5221
5222       if (mode != target_mode)
5223         op0 = convert_modes (target_mode, mode, op0, 0);
5224
5225       return op0;
5226     }
5227
5228   if (icode != CODE_FOR_nothing)
5229     {
5230       insn_operand_predicate_fn pred;
5231
5232       /* We think we may be able to do this with a scc insn.  Emit the
5233          comparison and then the scc insn.  */
5234
5235       do_pending_stack_adjust ();
5236       last = get_last_insn ();
5237
5238       comparison
5239         = compare_from_rtx (op0, op1, code, unsignedp, mode, NULL_RTX);
5240       if (CONSTANT_P (comparison))
5241         {
5242           switch (GET_CODE (comparison))
5243             {
5244             case CONST_INT:
5245               if (comparison == const0_rtx)
5246                 return const0_rtx;
5247               break;
5248
5249 #ifdef FLOAT_STORE_FLAG_VALUE
5250             case CONST_DOUBLE:
5251               if (comparison == CONST0_RTX (GET_MODE (comparison)))
5252                 return const0_rtx;
5253               break;
5254 #endif
5255             default:
5256               gcc_unreachable ();
5257             }
5258
5259           if (normalizep == 1)
5260             return const1_rtx;
5261           if (normalizep == -1)
5262             return constm1_rtx;
5263           return const_true_rtx;
5264         }
5265
5266       /* The code of COMPARISON may not match CODE if compare_from_rtx
5267          decided to swap its operands and reverse the original code.
5268
5269          We know that compare_from_rtx returns either a CONST_INT or
5270          a new comparison code, so it is safe to just extract the
5271          code from COMPARISON.  */
5272       code = GET_CODE (comparison);
5273
5274       /* Get a reference to the target in the proper mode for this insn.  */
5275       compare_mode = insn_data[(int) icode].operand[0].mode;
5276       subtarget = target;
5277       pred = insn_data[(int) icode].operand[0].predicate;
5278       if (optimize || ! (*pred) (subtarget, compare_mode))
5279         subtarget = gen_reg_rtx (compare_mode);
5280
5281       pattern = GEN_FCN (icode) (subtarget);
5282       if (pattern)
5283         {
5284           emit_insn (pattern);
5285
5286           /* If we are converting to a wider mode, first convert to
5287              TARGET_MODE, then normalize.  This produces better combining
5288              opportunities on machines that have a SIGN_EXTRACT when we are
5289              testing a single bit.  This mostly benefits the 68k.
5290
5291              If STORE_FLAG_VALUE does not have the sign bit set when
5292              interpreted in COMPARE_MODE, we can do this conversion as
5293              unsigned, which is usually more efficient.  */
5294           if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (compare_mode))
5295             {
5296               convert_move (target, subtarget,
5297                             (GET_MODE_BITSIZE (compare_mode)
5298                              <= HOST_BITS_PER_WIDE_INT)
5299                             && 0 == (STORE_FLAG_VALUE
5300                                      & ((HOST_WIDE_INT) 1
5301                                         << (GET_MODE_BITSIZE (compare_mode) -1))));
5302               op0 = target;
5303               compare_mode = target_mode;
5304             }
5305           else
5306             op0 = subtarget;
5307
5308           /* If we want to keep subexpressions around, don't reuse our
5309              last target.  */
5310
5311           if (optimize)
5312             subtarget = 0;
5313
5314           /* Now normalize to the proper value in COMPARE_MODE.  Sometimes
5315              we don't have to do anything.  */
5316           if (normalizep == 0 || normalizep == STORE_FLAG_VALUE)
5317             ;
5318           /* STORE_FLAG_VALUE might be the most negative number, so write
5319              the comparison this way to avoid a compiler-time warning.  */
5320           else if (- normalizep == STORE_FLAG_VALUE)
5321             op0 = expand_unop (compare_mode, neg_optab, op0, subtarget, 0);
5322
5323           /* We don't want to use STORE_FLAG_VALUE < 0 below since this
5324              makes it hard to use a value of just the sign bit due to
5325              ANSI integer constant typing rules.  */
5326           else if (GET_MODE_BITSIZE (compare_mode) <= HOST_BITS_PER_WIDE_INT
5327                    && (STORE_FLAG_VALUE
5328                        & ((HOST_WIDE_INT) 1
5329                           << (GET_MODE_BITSIZE (compare_mode) - 1))))
5330             op0 = expand_shift (RSHIFT_EXPR, compare_mode, op0,
5331                                 size_int (GET_MODE_BITSIZE (compare_mode) - 1),
5332                                 subtarget, normalizep == 1);
5333           else
5334             {
5335               gcc_assert (STORE_FLAG_VALUE & 1);
5336
5337               op0 = expand_and (compare_mode, op0, const1_rtx, subtarget);
5338               if (normalizep == -1)
5339                 op0 = expand_unop (compare_mode, neg_optab, op0, op0, 0);
5340             }
5341
5342           /* If we were converting to a smaller mode, do the
5343              conversion now.  */
5344           if (target_mode != compare_mode)
5345             {
5346               convert_move (target, op0, 0);
5347               return target;
5348             }
5349           else
5350             return op0;
5351         }
5352     }
5353
5354   delete_insns_since (last);
5355
5356   /* If optimizing, use different pseudo registers for each insn, instead
5357      of reusing the same pseudo.  This leads to better CSE, but slows
5358      down the compiler, since there are more pseudos */
5359   subtarget = (!optimize
5360                && (target_mode == mode)) ? target : NULL_RTX;
5361
5362   /* If we reached here, we can't do this with a scc insn.  However, there
5363      are some comparisons that can be done directly.  For example, if
5364      this is an equality comparison of integers, we can try to exclusive-or
5365      (or subtract) the two operands and use a recursive call to try the
5366      comparison with zero.  Don't do any of these cases if branches are
5367      very cheap.  */
5368
5369   if (BRANCH_COST > 0
5370       && GET_MODE_CLASS (mode) == MODE_INT && (code == EQ || code == NE)
5371       && op1 != const0_rtx)
5372     {
5373       tem = expand_binop (mode, xor_optab, op0, op1, subtarget, 1,
5374                           OPTAB_WIDEN);
5375
5376       if (tem == 0)
5377         tem = expand_binop (mode, sub_optab, op0, op1, subtarget, 1,
5378                             OPTAB_WIDEN);
5379       if (tem != 0)
5380         tem = emit_store_flag (target, code, tem, const0_rtx,
5381                                mode, unsignedp, normalizep);
5382       if (tem == 0)
5383         delete_insns_since (last);
5384       return tem;
5385     }
5386
5387   /* Some other cases we can do are EQ, NE, LE, and GT comparisons with
5388      the constant zero.  Reject all other comparisons at this point.  Only
5389      do LE and GT if branches are expensive since they are expensive on
5390      2-operand machines.  */
5391
5392   if (BRANCH_COST == 0
5393       || GET_MODE_CLASS (mode) != MODE_INT || op1 != const0_rtx
5394       || (code != EQ && code != NE
5395           && (BRANCH_COST <= 1 || (code != LE && code != GT))))
5396     return 0;
5397
5398   /* See what we need to return.  We can only return a 1, -1, or the
5399      sign bit.  */
5400
5401   if (normalizep == 0)
5402     {
5403       if (STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1)
5404         normalizep = STORE_FLAG_VALUE;
5405
5406       else if (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT
5407                && ((STORE_FLAG_VALUE & GET_MODE_MASK (mode))
5408                    == (unsigned HOST_WIDE_INT) 1 << (GET_MODE_BITSIZE (mode) - 1)))
5409         ;
5410       else
5411         return 0;
5412     }
5413
5414   /* Try to put the result of the comparison in the sign bit.  Assume we can't
5415      do the necessary operation below.  */
5416
5417   tem = 0;
5418
5419   /* To see if A <= 0, compute (A | (A - 1)).  A <= 0 iff that result has
5420      the sign bit set.  */
5421
5422   if (code == LE)
5423     {
5424       /* This is destructive, so SUBTARGET can't be OP0.  */
5425       if (rtx_equal_p (subtarget, op0))
5426         subtarget = 0;
5427
5428       tem = expand_binop (mode, sub_optab, op0, const1_rtx, subtarget, 0,
5429                           OPTAB_WIDEN);
5430       if (tem)
5431         tem = expand_binop (mode, ior_optab, op0, tem, subtarget, 0,
5432                             OPTAB_WIDEN);
5433     }
5434
5435   /* To see if A > 0, compute (((signed) A) << BITS) - A, where BITS is the
5436      number of bits in the mode of OP0, minus one.  */
5437
5438   if (code == GT)
5439     {
5440       if (rtx_equal_p (subtarget, op0))
5441         subtarget = 0;
5442
5443       tem = expand_shift (RSHIFT_EXPR, mode, op0,
5444                           size_int (GET_MODE_BITSIZE (mode) - 1),
5445                           subtarget, 0);
5446       tem = expand_binop (mode, sub_optab, tem, op0, subtarget, 0,
5447                           OPTAB_WIDEN);
5448     }
5449
5450   if (code == EQ || code == NE)
5451     {
5452       /* For EQ or NE, one way to do the comparison is to apply an operation
5453          that converts the operand into a positive number if it is nonzero
5454          or zero if it was originally zero.  Then, for EQ, we subtract 1 and
5455          for NE we negate.  This puts the result in the sign bit.  Then we
5456          normalize with a shift, if needed.
5457
5458          Two operations that can do the above actions are ABS and FFS, so try
5459          them.  If that doesn't work, and MODE is smaller than a full word,
5460          we can use zero-extension to the wider mode (an unsigned conversion)
5461          as the operation.  */
5462
5463       /* Note that ABS doesn't yield a positive number for INT_MIN, but
5464          that is compensated by the subsequent overflow when subtracting
5465          one / negating.  */
5466
5467       if (abs_optab->handlers[mode].insn_code != CODE_FOR_nothing)
5468         tem = expand_unop (mode, abs_optab, op0, subtarget, 1);
5469       else if (ffs_optab->handlers[mode].insn_code != CODE_FOR_nothing)
5470         tem = expand_unop (mode, ffs_optab, op0, subtarget, 1);
5471       else if (GET_MODE_SIZE (mode) < UNITS_PER_WORD)
5472         {
5473           tem = convert_modes (word_mode, mode, op0, 1);
5474           mode = word_mode;
5475         }
5476
5477       if (tem != 0)
5478         {
5479           if (code == EQ)
5480             tem = expand_binop (mode, sub_optab, tem, const1_rtx, subtarget,
5481                                 0, OPTAB_WIDEN);
5482           else
5483             tem = expand_unop (mode, neg_optab, tem, subtarget, 0);
5484         }
5485
5486       /* If we couldn't do it that way, for NE we can "or" the two's complement
5487          of the value with itself.  For EQ, we take the one's complement of
5488          that "or", which is an extra insn, so we only handle EQ if branches
5489          are expensive.  */
5490
5491       if (tem == 0 && (code == NE || BRANCH_COST > 1))
5492         {
5493           if (rtx_equal_p (subtarget, op0))
5494             subtarget = 0;
5495
5496           tem = expand_unop (mode, neg_optab, op0, subtarget, 0);
5497           tem = expand_binop (mode, ior_optab, tem, op0, subtarget, 0,
5498                               OPTAB_WIDEN);
5499
5500           if (tem && code == EQ)
5501             tem = expand_unop (mode, one_cmpl_optab, tem, subtarget, 0);
5502         }
5503     }
5504
5505   if (tem && normalizep)
5506     tem = expand_shift (RSHIFT_EXPR, mode, tem,
5507                         size_int (GET_MODE_BITSIZE (mode) - 1),
5508                         subtarget, normalizep == 1);
5509
5510   if (tem)
5511     {
5512       if (GET_MODE (tem) != target_mode)
5513         {
5514           convert_move (target, tem, 0);
5515           tem = target;
5516         }
5517       else if (!subtarget)
5518         {
5519           emit_move_insn (target, tem);
5520           tem = target;
5521         }
5522     }
5523   else
5524     delete_insns_since (last);
5525
5526   return tem;
5527 }
5528
5529 /* Like emit_store_flag, but always succeeds.  */
5530
5531 rtx
5532 emit_store_flag_force (rtx target, enum rtx_code code, rtx op0, rtx op1,
5533                        enum machine_mode mode, int unsignedp, int normalizep)
5534 {
5535   rtx tem, label;
5536
5537   /* First see if emit_store_flag can do the job.  */
5538   tem = emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep);
5539   if (tem != 0)
5540     return tem;
5541
5542   if (normalizep == 0)
5543     normalizep = 1;
5544
5545   /* If this failed, we have to do this with set/compare/jump/set code.  */
5546
5547   if (!REG_P (target)
5548       || reg_mentioned_p (target, op0) || reg_mentioned_p (target, op1))
5549     target = gen_reg_rtx (GET_MODE (target));
5550
5551   emit_move_insn (target, const1_rtx);
5552   label = gen_label_rtx ();
5553   do_compare_rtx_and_jump (op0, op1, code, unsignedp, mode, NULL_RTX,
5554                            NULL_RTX, label);
5555
5556   emit_move_insn (target, const0_rtx);
5557   emit_label (label);
5558
5559   return target;
5560 }
5561 \f
5562 /* Perform possibly multi-word comparison and conditional jump to LABEL
5563    if ARG1 OP ARG2 true where ARG1 and ARG2 are of mode MODE.  This is
5564    now a thin wrapper around do_compare_rtx_and_jump.  */
5565
5566 static void
5567 do_cmp_and_jump (rtx arg1, rtx arg2, enum rtx_code op, enum machine_mode mode,
5568                  rtx label)
5569 {
5570   int unsignedp = (op == LTU || op == LEU || op == GTU || op == GEU);
5571   do_compare_rtx_and_jump (arg1, arg2, op, unsignedp, mode,
5572                            NULL_RTX, NULL_RTX, label);
5573 }