gcc/expmed.c

   1 /* Medium-level subroutines: convert bit-field store and extract
   2    and shifts, multiplies and divides to rtl instructions.
   3    Copyright (C) 1987, 1988, 1989, 1992, 1993, 1994, 1995, 1996, 1997, 1998,
   4    1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006
   5    Free Software Foundation, Inc.
   6
   7 This file is part of GCC.
   8
   9 GCC is free software; you can redistribute it and/or modify it under
  10 the terms of the GNU General Public License as published by the Free
  11 Software Foundation; either version 2, or (at your option) any later
  12 version.
  13
  14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  16 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  17 for more details.
  18
  19 You should have received a copy of the GNU General Public License
  20 along with GCC; see the file COPYING.  If not, write to the Free
  21 Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
  22 02110-1301, USA.  */
  23
  24
  25 #include "config.h"
  26 #include "system.h"
  27 #include "coretypes.h"
  28 #include "tm.h"
  29 #include "toplev.h"
  30 #include "rtl.h"
  31 #include "tree.h"
  32 #include "tm_p.h"
  33 #include "flags.h"
  34 #include "insn-config.h"
  35 #include "expr.h"
  36 #include "optabs.h"
  37 #include "real.h"
  38 #include "recog.h"
  39 #include "langhooks.h"
  40
  41 static void store_fixed_bit_field (rtx, unsigned HOST_WIDE_INT,
  42                                    unsigned HOST_WIDE_INT,
  43                                    unsigned HOST_WIDE_INT, rtx);
  44 static void store_split_bit_field (rtx, unsigned HOST_WIDE_INT,
  45                                    unsigned HOST_WIDE_INT, rtx);
  46 static rtx extract_fixed_bit_field (enum machine_mode, rtx,
  47                                     unsigned HOST_WIDE_INT,
  48                                     unsigned HOST_WIDE_INT,
  49                                     unsigned HOST_WIDE_INT, rtx, int);
  50 static rtx mask_rtx (enum machine_mode, int, int, int);
  51 static rtx lshift_value (enum machine_mode, rtx, int, int);
  52 static rtx extract_split_bit_field (rtx, unsigned HOST_WIDE_INT,
  53                                     unsigned HOST_WIDE_INT, int);
  54 static void do_cmp_and_jump (rtx, rtx, enum rtx_code, enum machine_mode, rtx);
  55 static rtx expand_smod_pow2 (enum machine_mode, rtx, HOST_WIDE_INT);
  56 static rtx expand_sdiv_pow2 (enum machine_mode, rtx, HOST_WIDE_INT);
  57
  58 /* Test whether a value is zero of a power of two.  */
  59 #define EXACT_POWER_OF_2_OR_ZERO_P(x) (((x) & ((x) - 1)) == 0)
  60
  61 /* Nonzero means divides or modulus operations are relatively cheap for
  62    powers of two, so don't use branches; emit the operation instead.
  63    Usually, this will mean that the MD file will emit non-branch
  64    sequences.  */
  65
  66 static bool sdiv_pow2_cheap[NUM_MACHINE_MODES];
  67 static bool smod_pow2_cheap[NUM_MACHINE_MODES];
  68
  69 #ifndef SLOW_UNALIGNED_ACCESS
  70 #define SLOW_UNALIGNED_ACCESS(MODE, ALIGN) STRICT_ALIGNMENT
  71 #endif
  72
  73 /* For compilers that support multiple targets with different word sizes,
  74    MAX_BITS_PER_WORD contains the biggest value of BITS_PER_WORD.  An example
  75    is the H8/300(H) compiler.  */
  76
  77 #ifndef MAX_BITS_PER_WORD
  78 #define MAX_BITS_PER_WORD BITS_PER_WORD
  79 #endif
  80
  81 /* Reduce conditional compilation elsewhere.  */
  82 #ifndef HAVE_insv
  83 #define HAVE_insv       0
  84 #define CODE_FOR_insv   CODE_FOR_nothing
  85 #define gen_insv(a,b,c,d) NULL_RTX
  86 #endif
  87 #ifndef HAVE_extv
  88 #define HAVE_extv       0
  89 #define CODE_FOR_extv   CODE_FOR_nothing
  90 #define gen_extv(a,b,c,d) NULL_RTX
  91 #endif
  92 #ifndef HAVE_extzv
  93 #define HAVE_extzv      0
  94 #define CODE_FOR_extzv  CODE_FOR_nothing
  95 #define gen_extzv(a,b,c,d) NULL_RTX
  96 #endif
  97
  98 /* Cost of various pieces of RTL.  Note that some of these are indexed by
  99    shift count and some by mode.  */
 100 static int zero_cost;
 101 static int add_cost[NUM_MACHINE_MODES];
 102 static int neg_cost[NUM_MACHINE_MODES];
 103 static int shift_cost[NUM_MACHINE_MODES][MAX_BITS_PER_WORD];
 104 static int shiftadd_cost[NUM_MACHINE_MODES][MAX_BITS_PER_WORD];
 105 static int shiftsub_cost[NUM_MACHINE_MODES][MAX_BITS_PER_WORD];
 106 static int mul_cost[NUM_MACHINE_MODES];
 107 static int sdiv_cost[NUM_MACHINE_MODES];
 108 static int udiv_cost[NUM_MACHINE_MODES];
 109 static int mul_widen_cost[NUM_MACHINE_MODES];
 110 static int mul_highpart_cost[NUM_MACHINE_MODES];
 111
 112 void
 113 init_expmed (void)
 114 {
 115   struct
 116   {
 117     struct rtx_def reg;         rtunion reg_fld[2];
 118     struct rtx_def plus;        rtunion plus_fld1;
 119     struct rtx_def neg;
 120     struct rtx_def mult;        rtunion mult_fld1;
 121     struct rtx_def sdiv;        rtunion sdiv_fld1;
 122     struct rtx_def udiv;        rtunion udiv_fld1;
 123     struct rtx_def zext;
 124     struct rtx_def sdiv_32;     rtunion sdiv_32_fld1;
 125     struct rtx_def smod_32;     rtunion smod_32_fld1;
 126     struct rtx_def wide_mult;   rtunion wide_mult_fld1;
 127     struct rtx_def wide_lshr;   rtunion wide_lshr_fld1;
 128     struct rtx_def wide_trunc;
 129     struct rtx_def shift;       rtunion shift_fld1;
 130     struct rtx_def shift_mult;  rtunion shift_mult_fld1;
 131     struct rtx_def shift_add;   rtunion shift_add_fld1;
 132     struct rtx_def shift_sub;   rtunion shift_sub_fld1;
 133   } all;
 134
 135   rtx pow2[MAX_BITS_PER_WORD];
 136   rtx cint[MAX_BITS_PER_WORD];
 137   int m, n;
 138   enum machine_mode mode, wider_mode;
 139
 140   zero_cost = rtx_cost (const0_rtx, 0);
 141
 142   for (m = 1; m < MAX_BITS_PER_WORD; m++)
 143     {
 144       pow2[m] = GEN_INT ((HOST_WIDE_INT) 1 << m);
 145       cint[m] = GEN_INT (m);
 146     }
 147
 148   memset (&all, 0, sizeof all);
 149
 150   PUT_CODE (&all.reg, REG);
 151   /* Avoid using hard regs in ways which may be unsupported.  */
 152   REGNO (&all.reg) = LAST_VIRTUAL_REGISTER + 1;
 153
 154   PUT_CODE (&all.plus, PLUS);
 155   XEXP (&all.plus, 0) = &all.reg;
 156   XEXP (&all.plus, 1) = &all.reg;
 157
 158   PUT_CODE (&all.neg, NEG);
 159   XEXP (&all.neg, 0) = &all.reg;
 160
 161   PUT_CODE (&all.mult, MULT);
 162   XEXP (&all.mult, 0) = &all.reg;
 163   XEXP (&all.mult, 1) = &all.reg;
 164
 165   PUT_CODE (&all.sdiv, DIV);
 166   XEXP (&all.sdiv, 0) = &all.reg;
 167   XEXP (&all.sdiv, 1) = &all.reg;
 168
 169   PUT_CODE (&all.udiv, UDIV);
 170   XEXP (&all.udiv, 0) = &all.reg;
 171   XEXP (&all.udiv, 1) = &all.reg;
 172
 173   PUT_CODE (&all.sdiv_32, DIV);
 174   XEXP (&all.sdiv_32, 0) = &all.reg;
 175   XEXP (&all.sdiv_32, 1) = 32 < MAX_BITS_PER_WORD ? cint[32] : GEN_INT (32);
 176
 177   PUT_CODE (&all.smod_32, MOD);
 178   XEXP (&all.smod_32, 0) = &all.reg;
 179   XEXP (&all.smod_32, 1) = XEXP (&all.sdiv_32, 1);
 180
 181   PUT_CODE (&all.zext, ZERO_EXTEND);
 182   XEXP (&all.zext, 0) = &all.reg;
 183
 184   PUT_CODE (&all.wide_mult, MULT);
 185   XEXP (&all.wide_mult, 0) = &all.zext;
 186   XEXP (&all.wide_mult, 1) = &all.zext;
 187
 188   PUT_CODE (&all.wide_lshr, LSHIFTRT);
 189   XEXP (&all.wide_lshr, 0) = &all.wide_mult;
 190
 191   PUT_CODE (&all.wide_trunc, TRUNCATE);
 192   XEXP (&all.wide_trunc, 0) = &all.wide_lshr;
 193
 194   PUT_CODE (&all.shift, ASHIFT);
 195   XEXP (&all.shift, 0) = &all.reg;
 196
 197   PUT_CODE (&all.shift_mult, MULT);
 198   XEXP (&all.shift_mult, 0) = &all.reg;
 199
 200   PUT_CODE (&all.shift_add, PLUS);
 201   XEXP (&all.shift_add, 0) = &all.shift_mult;
 202   XEXP (&all.shift_add, 1) = &all.reg;
 203
 204   PUT_CODE (&all.shift_sub, MINUS);
 205   XEXP (&all.shift_sub, 0) = &all.shift_mult;
 206   XEXP (&all.shift_sub, 1) = &all.reg;
 207
 208   for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT);
 209        mode != VOIDmode;
 210        mode = GET_MODE_WIDER_MODE (mode))
 211     {
 212       PUT_MODE (&all.reg, mode);
 213       PUT_MODE (&all.plus, mode);
 214       PUT_MODE (&all.neg, mode);
 215       PUT_MODE (&all.mult, mode);
 216       PUT_MODE (&all.sdiv, mode);
 217       PUT_MODE (&all.udiv, mode);
 218       PUT_MODE (&all.sdiv_32, mode);
 219       PUT_MODE (&all.smod_32, mode);
 220       PUT_MODE (&all.wide_trunc, mode);
 221       PUT_MODE (&all.shift, mode);
 222       PUT_MODE (&all.shift_mult, mode);
 223       PUT_MODE (&all.shift_add, mode);
 224       PUT_MODE (&all.shift_sub, mode);
 225
 226       add_cost[mode] = rtx_cost (&all.plus, SET);
 227       neg_cost[mode] = rtx_cost (&all.neg, SET);
 228       mul_cost[mode] = rtx_cost (&all.mult, SET);
 229       sdiv_cost[mode] = rtx_cost (&all.sdiv, SET);
 230       udiv_cost[mode] = rtx_cost (&all.udiv, SET);
 231
 232       sdiv_pow2_cheap[mode] = (rtx_cost (&all.sdiv_32, SET)
 233                                <= 2 * add_cost[mode]);
 234       smod_pow2_cheap[mode] = (rtx_cost (&all.smod_32, SET)
 235                                <= 4 * add_cost[mode]);
 236
 237       wider_mode = GET_MODE_WIDER_MODE (mode);
 238       if (wider_mode != VOIDmode)
 239         {
 240           PUT_MODE (&all.zext, wider_mode);
 241           PUT_MODE (&all.wide_mult, wider_mode);
 242           PUT_MODE (&all.wide_lshr, wider_mode);
 243           XEXP (&all.wide_lshr, 1) = GEN_INT (GET_MODE_BITSIZE (mode));
 244
 245           mul_widen_cost[wider_mode] = rtx_cost (&all.wide_mult, SET);
 246           mul_highpart_cost[mode] = rtx_cost (&all.wide_trunc, SET);
 247         }
 248
 249       shift_cost[mode][0] = 0;
 250       shiftadd_cost[mode][0] = shiftsub_cost[mode][0] = add_cost[mode];
 251
 252       n = MIN (MAX_BITS_PER_WORD, GET_MODE_BITSIZE (mode));
 253       for (m = 1; m < n; m++)
 254         {
 255           XEXP (&all.shift, 1) = cint[m];
 256           XEXP (&all.shift_mult, 1) = pow2[m];
 257
 258           shift_cost[mode][m] = rtx_cost (&all.shift, SET);
 259           shiftadd_cost[mode][m] = rtx_cost (&all.shift_add, SET);
 260           shiftsub_cost[mode][m] = rtx_cost (&all.shift_sub, SET);
 261         }
 262     }
 263 }
 264
 265 /* Return an rtx representing minus the value of X.
 266    MODE is the intended mode of the result,
 267    useful if X is a CONST_INT.  */
 268
 269 rtx
 270 negate_rtx (enum machine_mode mode, rtx x)
 271 {
 272   rtx result = simplify_unary_operation (NEG, mode, x, mode);
 273
 274   if (result == 0)
 275     result = expand_unop (mode, neg_optab, x, NULL_RTX, 0);
 276
 277   return result;
 278 }
 279
 280 /* Report on the availability of insv/extv/extzv and the desired mode
 281    of each of their operands.  Returns MAX_MACHINE_MODE if HAVE_foo
 282    is false; else the mode of the specified operand.  If OPNO is -1,
 283    all the caller cares about is whether the insn is available.  */
 284 enum machine_mode
 285 mode_for_extraction (enum extraction_pattern pattern, int opno)
 286 {
 287   const struct insn_data *data;
 288
 289   switch (pattern)
 290     {
 291     case EP_insv:
 292       if (HAVE_insv)
 293         {
 294           data = &insn_data[CODE_FOR_insv];
 295           break;
 296         }
 297       return MAX_MACHINE_MODE;
 298
 299     case EP_extv:
 300       if (HAVE_extv)
 301         {
 302           data = &insn_data[CODE_FOR_extv];
 303           break;
 304         }
 305       return MAX_MACHINE_MODE;
 306
 307     case EP_extzv:
 308       if (HAVE_extzv)
 309         {
 310           data = &insn_data[CODE_FOR_extzv];
 311           break;
 312         }
 313       return MAX_MACHINE_MODE;
 314
 315     default:
 316       gcc_unreachable ();
 317     }
 318
 319   if (opno == -1)
 320     return VOIDmode;
 321
 322   /* Everyone who uses this function used to follow it with
 323      if (result == VOIDmode) result = word_mode; */
 324   if (data->operand[opno].mode == VOIDmode)
 325     return word_mode;
 326   return data->operand[opno].mode;
 327 }
 328
 329 \f
 330 /* Generate code to store value from rtx VALUE
 331    into a bit-field within structure STR_RTX
 332    containing BITSIZE bits starting at bit BITNUM.
 333    FIELDMODE is the machine-mode of the FIELD_DECL node for this field.
 334    ALIGN is the alignment that STR_RTX is known to have.
 335    TOTAL_SIZE is the size of the structure in bytes, or -1 if varying.  */
 336
 337 /* ??? Note that there are two different ideas here for how
 338    to determine the size to count bits within, for a register.
 339    One is BITS_PER_WORD, and the other is the size of operand 3
 340    of the insv pattern.
 341
 342    If operand 3 of the insv pattern is VOIDmode, then we will use BITS_PER_WORD
 343    else, we use the mode of operand 3.  */
 344
 345 rtx
 346 store_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
 347                  unsigned HOST_WIDE_INT bitnum, enum machine_mode fieldmode,
 348                  rtx value)
 349 {
 350   unsigned int unit
 351     = (MEM_P (str_rtx)) ? BITS_PER_UNIT : BITS_PER_WORD;
 352   unsigned HOST_WIDE_INT offset, bitpos;
 353   rtx op0 = str_rtx;
 354   int byte_offset;
 355   rtx orig_value;
 356
 357   enum machine_mode op_mode = mode_for_extraction (EP_insv, 3);
 358
 359   while (GET_CODE (op0) == SUBREG)
 360     {
 361       /* The following line once was done only if WORDS_BIG_ENDIAN,
 362          but I think that is a mistake.  WORDS_BIG_ENDIAN is
 363          meaningful at a much higher level; when structures are copied
 364          between memory and regs, the higher-numbered regs
 365          always get higher addresses.  */
 366       int inner_mode_size = GET_MODE_SIZE (GET_MODE (SUBREG_REG (op0)));
 367       int outer_mode_size = GET_MODE_SIZE (GET_MODE (op0));
 368
 369       byte_offset = 0;
 370
 371       /* Paradoxical subregs need special handling on big endian machines.  */
 372       if (SUBREG_BYTE (op0) == 0 && inner_mode_size < outer_mode_size)
 373         {
 374           int difference = inner_mode_size - outer_mode_size;
 375
 376           if (WORDS_BIG_ENDIAN)
 377             byte_offset += (difference / UNITS_PER_WORD) * UNITS_PER_WORD;
 378           if (BYTES_BIG_ENDIAN)
 379             byte_offset += difference % UNITS_PER_WORD;
 380         }
 381       else
 382         byte_offset = SUBREG_BYTE (op0);
 383
 384       bitnum += byte_offset * BITS_PER_UNIT;
 385       op0 = SUBREG_REG (op0);
 386     }
 387
 388   /* No action is needed if the target is a register and if the field
 389      lies completely outside that register.  This can occur if the source
 390      code contains an out-of-bounds access to a small array.  */
 391   if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
 392     return value;
 393
 394   /* Use vec_set patterns for inserting parts of vectors whenever
 395      available.  */
 396   if (VECTOR_MODE_P (GET_MODE (op0))
 397       && !MEM_P (op0)
 398       && (vec_set_optab->handlers[GET_MODE (op0)].insn_code
 399           != CODE_FOR_nothing)
 400       && fieldmode == GET_MODE_INNER (GET_MODE (op0))
 401       && bitsize == GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
 402       && !(bitnum % GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
 403     {
 404       enum machine_mode outermode = GET_MODE (op0);
 405       enum machine_mode innermode = GET_MODE_INNER (outermode);
 406       int icode = (int) vec_set_optab->handlers[outermode].insn_code;
 407       int pos = bitnum / GET_MODE_BITSIZE (innermode);
 408       rtx rtxpos = GEN_INT (pos);
 409       rtx src = value;
 410       rtx dest = op0;
 411       rtx pat, seq;
 412       enum machine_mode mode0 = insn_data[icode].operand[0].mode;
 413       enum machine_mode mode1 = insn_data[icode].operand[1].mode;
 414       enum machine_mode mode2 = insn_data[icode].operand[2].mode;
 415
 416       start_sequence ();
 417
 418       if (! (*insn_data[icode].operand[1].predicate) (src, mode1))
 419         src = copy_to_mode_reg (mode1, src);
 420
 421       if (! (*insn_data[icode].operand[2].predicate) (rtxpos, mode2))
 422         rtxpos = copy_to_mode_reg (mode1, rtxpos);
 423
 424       /* We could handle this, but we should always be called with a pseudo
 425          for our targets and all insns should take them as outputs.  */
 426       gcc_assert ((*insn_data[icode].operand[0].predicate) (dest, mode0)
 427                   && (*insn_data[icode].operand[1].predicate) (src, mode1)
 428                   && (*insn_data[icode].operand[2].predicate) (rtxpos, mode2));
 429       pat = GEN_FCN (icode) (dest, src, rtxpos);
 430       seq = get_insns ();
 431       end_sequence ();
 432       if (pat)
 433         {
 434           emit_insn (seq);
 435           emit_insn (pat);
 436           return dest;
 437         }
 438     }
 439
 440   /* If the target is a register, overwriting the entire object, or storing
 441      a full-word or multi-word field can be done with just a SUBREG.
 442
 443      If the target is memory, storing any naturally aligned field can be
 444      done with a simple store.  For targets that support fast unaligned
 445      memory, any naturally sized, unit aligned field can be done directly.  */
 446
 447   offset = bitnum / unit;
 448   bitpos = bitnum % unit;
 449   byte_offset = (bitnum % BITS_PER_WORD) / BITS_PER_UNIT
 450                 + (offset * UNITS_PER_WORD);
 451
 452   if (bitpos == 0
 453       && bitsize == GET_MODE_BITSIZE (fieldmode)
 454       && (!MEM_P (op0)
 455           ? ((GET_MODE_SIZE (fieldmode) >= UNITS_PER_WORD
 456              || GET_MODE_SIZE (GET_MODE (op0)) == GET_MODE_SIZE (fieldmode))
 457              && byte_offset % GET_MODE_SIZE (fieldmode) == 0)
 458           : (! SLOW_UNALIGNED_ACCESS (fieldmode, MEM_ALIGN (op0))
 459              || (offset * BITS_PER_UNIT % bitsize == 0
 460                  && MEM_ALIGN (op0) % GET_MODE_BITSIZE (fieldmode) == 0))))
 461     {
 462       if (MEM_P (op0))
 463         op0 = adjust_address (op0, fieldmode, offset);
 464       else if (GET_MODE (op0) != fieldmode)
 465         op0 = simplify_gen_subreg (fieldmode, op0, GET_MODE (op0),
 466                                    byte_offset);
 467       emit_move_insn (op0, value);
 468       return value;
 469     }
 470
 471   /* Make sure we are playing with integral modes.  Pun with subregs
 472      if we aren't.  This must come after the entire register case above,
 473      since that case is valid for any mode.  The following cases are only
 474      valid for integral modes.  */
 475   {
 476     enum machine_mode imode = int_mode_for_mode (GET_MODE (op0));
 477     if (imode != GET_MODE (op0))
 478       {
 479         if (MEM_P (op0))
 480           op0 = adjust_address (op0, imode, 0);
 481         else
 482           {
 483             gcc_assert (imode != BLKmode);
 484             op0 = gen_lowpart (imode, op0);
 485           }
 486       }
 487   }
 488
 489   /* We may be accessing data outside the field, which means
 490      we can alias adjacent data.  */
 491   if (MEM_P (op0))
 492     {
 493       op0 = shallow_copy_rtx (op0);
 494       set_mem_alias_set (op0, 0);
 495       set_mem_expr (op0, 0);
 496     }
 497
 498   /* If OP0 is a register, BITPOS must count within a word.
 499      But as we have it, it counts within whatever size OP0 now has.
 500      On a bigendian machine, these are not the same, so convert.  */
 501   if (BYTES_BIG_ENDIAN
 502       && !MEM_P (op0)
 503       && unit > GET_MODE_BITSIZE (GET_MODE (op0)))
 504     bitpos += unit - GET_MODE_BITSIZE (GET_MODE (op0));
 505
 506   /* Storing an lsb-aligned field in a register
 507      can be done with a movestrict instruction.  */
 508
 509   if (!MEM_P (op0)
 510       && (BYTES_BIG_ENDIAN ? bitpos + bitsize == unit : bitpos == 0)
 511       && bitsize == GET_MODE_BITSIZE (fieldmode)
 512       && (movstrict_optab->handlers[fieldmode].insn_code
 513           != CODE_FOR_nothing))
 514     {
 515       int icode = movstrict_optab->handlers[fieldmode].insn_code;
 516
 517       /* Get appropriate low part of the value being stored.  */
 518       if (GET_CODE (value) == CONST_INT || REG_P (value))
 519         value = gen_lowpart (fieldmode, value);
 520       else if (!(GET_CODE (value) == SYMBOL_REF
 521                  || GET_CODE (value) == LABEL_REF
 522                  || GET_CODE (value) == CONST))
 523         value = convert_to_mode (fieldmode, value, 0);
 524
 525       if (! (*insn_data[icode].operand[1].predicate) (value, fieldmode))
 526         value = copy_to_mode_reg (fieldmode, value);
 527
 528       if (GET_CODE (op0) == SUBREG)
 529         {
 530           /* Else we've got some float mode source being extracted into
 531              a different float mode destination -- this combination of
 532              subregs results in Severe Tire Damage.  */
 533           gcc_assert (GET_MODE (SUBREG_REG (op0)) == fieldmode
 534                       || GET_MODE_CLASS (fieldmode) == MODE_INT
 535                       || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT);
 536           op0 = SUBREG_REG (op0);
 537         }
 538
 539       emit_insn (GEN_FCN (icode)
 540                  (gen_rtx_SUBREG (fieldmode, op0,
 541                                   (bitnum % BITS_PER_WORD) / BITS_PER_UNIT
 542                                   + (offset * UNITS_PER_WORD)),
 543                                   value));
 544
 545       return value;
 546     }
 547
 548   /* Handle fields bigger than a word.  */
 549
 550   if (bitsize > BITS_PER_WORD)
 551     {
 552       /* Here we transfer the words of the field
 553          in the order least significant first.
 554          This is because the most significant word is the one which may
 555          be less than full.
 556          However, only do that if the value is not BLKmode.  */
 557
 558       unsigned int backwards = WORDS_BIG_ENDIAN && fieldmode != BLKmode;
 559       unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
 560       unsigned int i;
 561
 562       /* This is the mode we must force value to, so that there will be enough
 563          subwords to extract.  Note that fieldmode will often (always?) be
 564          VOIDmode, because that is what store_field uses to indicate that this
 565          is a bit field, but passing VOIDmode to operand_subword_force
 566          is not allowed.  */
 567       fieldmode = GET_MODE (value);
 568       if (fieldmode == VOIDmode)
 569         fieldmode = smallest_mode_for_size (nwords * BITS_PER_WORD, MODE_INT);
 570
 571       for (i = 0; i < nwords; i++)
 572         {
 573           /* If I is 0, use the low-order word in both field and target;
 574              if I is 1, use the next to lowest word; and so on.  */
 575           unsigned int wordnum = (backwards ? nwords - i - 1 : i);
 576           unsigned int bit_offset = (backwards
 577                                      ? MAX ((int) bitsize - ((int) i + 1)
 578                                             * BITS_PER_WORD,
 579                                             0)
 580                                      : (int) i * BITS_PER_WORD);
 581
 582           store_bit_field (op0, MIN (BITS_PER_WORD,
 583                                      bitsize - i * BITS_PER_WORD),
 584                            bitnum + bit_offset, word_mode,
 585                            operand_subword_force (value, wordnum, fieldmode));
 586         }
 587       return value;
 588     }
 589
 590   /* From here on we can assume that the field to be stored in is
 591      a full-word (whatever type that is), since it is shorter than a word.  */
 592
 593   /* OFFSET is the number of words or bytes (UNIT says which)
 594      from STR_RTX to the first word or byte containing part of the field.  */
 595
 596   if (!MEM_P (op0))
 597     {
 598       if (offset != 0
 599           || GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
 600         {
 601           if (!REG_P (op0))
 602             {
 603               /* Since this is a destination (lvalue), we can't copy
 604                  it to a pseudo.  We can remove a SUBREG that does not
 605                  change the size of the operand.  Such a SUBREG may
 606                  have been added above.  */
 607               gcc_assert (GET_CODE (op0) == SUBREG
 608                           && (GET_MODE_SIZE (GET_MODE (op0))
 609                               == GET_MODE_SIZE (GET_MODE (SUBREG_REG (op0)))));
 610               op0 = SUBREG_REG (op0);
 611             }
 612           op0 = gen_rtx_SUBREG (mode_for_size (BITS_PER_WORD, MODE_INT, 0),
 613                                 op0, (offset * UNITS_PER_WORD));
 614         }
 615       offset = 0;
 616     }
 617
 618   /* If VALUE has a floating-point or complex mode, access it as an
 619      integer of the corresponding size.  This can occur on a machine
 620      with 64 bit registers that uses SFmode for float.  It can also
 621      occur for unaligned float or complex fields.  */
 622   orig_value = value;
 623   if (GET_MODE (value) != VOIDmode
 624       && GET_MODE_CLASS (GET_MODE (value)) != MODE_INT
 625       && GET_MODE_CLASS (GET_MODE (value)) != MODE_PARTIAL_INT)
 626     {
 627       value = gen_reg_rtx (int_mode_for_mode (GET_MODE (value)));
 628       emit_move_insn (gen_lowpart (GET_MODE (orig_value), value), orig_value);
 629     }
 630
 631   /* Now OFFSET is nonzero only if OP0 is memory
 632      and is therefore always measured in bytes.  */
 633
 634   if (HAVE_insv
 635       && GET_MODE (value) != BLKmode
 636       && bitsize > 0
 637       && GET_MODE_BITSIZE (op_mode) >= bitsize
 638       && ! ((REG_P (op0) || GET_CODE (op0) == SUBREG)
 639             && (bitsize + bitpos > GET_MODE_BITSIZE (op_mode)))
 640       && insn_data[CODE_FOR_insv].operand[1].predicate (GEN_INT (bitsize),
 641                                                         VOIDmode))
 642     {
 643       int xbitpos = bitpos;
 644       rtx value1;
 645       rtx xop0 = op0;
 646       rtx last = get_last_insn ();
 647       rtx pat;
 648       enum machine_mode maxmode = mode_for_extraction (EP_insv, 3);
 649       int save_volatile_ok = volatile_ok;
 650
 651       volatile_ok = 1;
 652
 653       /* If this machine's insv can only insert into a register, copy OP0
 654          into a register and save it back later.  */
 655       if (MEM_P (op0)
 656           && ! ((*insn_data[(int) CODE_FOR_insv].operand[0].predicate)
 657                 (op0, VOIDmode)))
 658         {
 659           rtx tempreg;
 660           enum machine_mode bestmode;
 661
 662           /* Get the mode to use for inserting into this field.  If OP0 is
 663              BLKmode, get the smallest mode consistent with the alignment. If
 664              OP0 is a non-BLKmode object that is no wider than MAXMODE, use its
 665              mode. Otherwise, use the smallest mode containing the field.  */
 666
 667           if (GET_MODE (op0) == BLKmode
 668               || GET_MODE_SIZE (GET_MODE (op0)) > GET_MODE_SIZE (maxmode))
 669             bestmode
 670               = get_best_mode (bitsize, bitnum, MEM_ALIGN (op0), maxmode,
 671                                MEM_VOLATILE_P (op0));
 672           else
 673             bestmode = GET_MODE (op0);
 674
 675           if (bestmode == VOIDmode
 676               || GET_MODE_SIZE (bestmode) < GET_MODE_SIZE (fieldmode)
 677               || (SLOW_UNALIGNED_ACCESS (bestmode, MEM_ALIGN (op0))
 678                   && GET_MODE_BITSIZE (bestmode) > MEM_ALIGN (op0)))
 679             goto insv_loses;
 680
 681           /* Adjust address to point to the containing unit of that mode.
 682              Compute offset as multiple of this unit, counting in bytes.  */
 683           unit = GET_MODE_BITSIZE (bestmode);
 684           offset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
 685           bitpos = bitnum % unit;
 686           op0 = adjust_address (op0, bestmode,  offset);
 687
 688           /* Fetch that unit, store the bitfield in it, then store
 689              the unit.  */
 690           tempreg = copy_to_reg (op0);
 691           store_bit_field (tempreg, bitsize, bitpos, fieldmode, orig_value);
 692           emit_move_insn (op0, tempreg);
 693           return value;
 694         }
 695       volatile_ok = save_volatile_ok;
 696
 697       /* Add OFFSET into OP0's address.  */
 698       if (MEM_P (xop0))
 699         xop0 = adjust_address (xop0, byte_mode, offset);
 700
 701       /* If xop0 is a register, we need it in MAXMODE
 702          to make it acceptable to the format of insv.  */
 703       if (GET_CODE (xop0) == SUBREG)
 704         /* We can't just change the mode, because this might clobber op0,
 705            and we will need the original value of op0 if insv fails.  */
 706         xop0 = gen_rtx_SUBREG (maxmode, SUBREG_REG (xop0), SUBREG_BYTE (xop0));
 707       if (REG_P (xop0) && GET_MODE (xop0) != maxmode)
 708         xop0 = gen_rtx_SUBREG (maxmode, xop0, 0);
 709
 710       /* On big-endian machines, we count bits from the most significant.
 711          If the bit field insn does not, we must invert.  */
 712
 713       if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
 714         xbitpos = unit - bitsize - xbitpos;
 715
 716       /* We have been counting XBITPOS within UNIT.
 717          Count instead within the size of the register.  */
 718       if (BITS_BIG_ENDIAN && !MEM_P (xop0))
 719         xbitpos += GET_MODE_BITSIZE (maxmode) - unit;
 720
 721       unit = GET_MODE_BITSIZE (maxmode);
 722
 723       /* Convert VALUE to maxmode (which insv insn wants) in VALUE1.  */
 724       value1 = value;
 725       if (GET_MODE (value) != maxmode)
 726         {
 727           if (GET_MODE_BITSIZE (GET_MODE (value)) >= bitsize)
 728             {
 729               /* Optimization: Don't bother really extending VALUE
 730                  if it has all the bits we will actually use.  However,
 731                  if we must narrow it, be sure we do it correctly.  */
 732
 733               if (GET_MODE_SIZE (GET_MODE (value)) < GET_MODE_SIZE (maxmode))
 734                 {
 735                   rtx tmp;
 736
 737                   tmp = simplify_subreg (maxmode, value1, GET_MODE (value), 0);
 738                   if (! tmp)
 739                     tmp = simplify_gen_subreg (maxmode,
 740                                                force_reg (GET_MODE (value),
 741                                                           value1),
 742                                                GET_MODE (value), 0);
 743                   value1 = tmp;
 744                 }
 745               else
 746                 value1 = gen_lowpart (maxmode, value1);
 747             }
 748           else if (GET_CODE (value) == CONST_INT)
 749             value1 = gen_int_mode (INTVAL (value), maxmode);
 750           else
 751             /* Parse phase is supposed to make VALUE's data type
 752                match that of the component reference, which is a type
 753                at least as wide as the field; so VALUE should have
 754                a mode that corresponds to that type.  */
 755             gcc_assert (CONSTANT_P (value));
 756         }
 757
 758       /* If this machine's insv insists on a register,
 759          get VALUE1 into a register.  */
 760       if (! ((*insn_data[(int) CODE_FOR_insv].operand[3].predicate)
 761              (value1, maxmode)))
 762         value1 = force_reg (maxmode, value1);
 763
 764       pat = gen_insv (xop0, GEN_INT (bitsize), GEN_INT (xbitpos), value1);
 765       if (pat)
 766         emit_insn (pat);
 767       else
 768         {
 769           delete_insns_since (last);
 770           store_fixed_bit_field (op0, offset, bitsize, bitpos, value);
 771         }
 772     }
 773   else
 774     insv_loses:
 775     /* Insv is not available; store using shifts and boolean ops.  */
 776     store_fixed_bit_field (op0, offset, bitsize, bitpos, value);
 777   return value;
 778 }
 779 \f
 780 /* Use shifts and boolean operations to store VALUE
 781    into a bit field of width BITSIZE
 782    in a memory location specified by OP0 except offset by OFFSET bytes.
 783      (OFFSET must be 0 if OP0 is a register.)
 784    The field starts at position BITPOS within the byte.
 785     (If OP0 is a register, it may be a full word or a narrower mode,
 786      but BITPOS still counts within a full word,
 787      which is significant on bigendian machines.)  */
 788
 789 static void
 790 store_fixed_bit_field (rtx op0, unsigned HOST_WIDE_INT offset,
 791                        unsigned HOST_WIDE_INT bitsize,
 792                        unsigned HOST_WIDE_INT bitpos, rtx value)
 793 {
 794   enum machine_mode mode;
 795   unsigned int total_bits = BITS_PER_WORD;
 796   rtx subtarget, temp;
 797   int all_zero = 0;
 798   int all_one = 0;
 799
 800   /* There is a case not handled here:
 801      a structure with a known alignment of just a halfword
 802      and a field split across two aligned halfwords within the structure.
 803      Or likewise a structure with a known alignment of just a byte
 804      and a field split across two bytes.
 805      Such cases are not supposed to be able to occur.  */
 806
 807   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
 808     {
 809       gcc_assert (!offset);
 810       /* Special treatment for a bit field split across two registers.  */
 811       if (bitsize + bitpos > BITS_PER_WORD)
 812         {
 813           store_split_bit_field (op0, bitsize, bitpos, value);
 814           return;
 815         }
 816     }
 817   else
 818     {
 819       /* Get the proper mode to use for this field.  We want a mode that
 820          includes the entire field.  If such a mode would be larger than
 821          a word, we won't be doing the extraction the normal way.
 822          We don't want a mode bigger than the destination.  */
 823
 824       mode = GET_MODE (op0);
 825       if (GET_MODE_BITSIZE (mode) == 0
 826           || GET_MODE_BITSIZE (mode) > GET_MODE_BITSIZE (word_mode))
 827         mode = word_mode;
 828       mode = get_best_mode (bitsize, bitpos + offset * BITS_PER_UNIT,
 829                             MEM_ALIGN (op0), mode, MEM_VOLATILE_P (op0));
 830
 831       if (mode == VOIDmode)
 832         {
 833           /* The only way this should occur is if the field spans word
 834              boundaries.  */
 835           store_split_bit_field (op0, bitsize, bitpos + offset * BITS_PER_UNIT,
 836                                  value);
 837           return;
 838         }
 839
 840       total_bits = GET_MODE_BITSIZE (mode);
 841
 842       /* Make sure bitpos is valid for the chosen mode.  Adjust BITPOS to
 843          be in the range 0 to total_bits-1, and put any excess bytes in
 844          OFFSET.  */
 845       if (bitpos >= total_bits)
 846         {
 847           offset += (bitpos / total_bits) * (total_bits / BITS_PER_UNIT);
 848           bitpos -= ((bitpos / total_bits) * (total_bits / BITS_PER_UNIT)
 849                      * BITS_PER_UNIT);
 850         }
 851
 852       /* Get ref to an aligned byte, halfword, or word containing the field.
 853          Adjust BITPOS to be position within a word,
 854          and OFFSET to be the offset of that word.
 855          Then alter OP0 to refer to that word.  */
 856       bitpos += (offset % (total_bits / BITS_PER_UNIT)) * BITS_PER_UNIT;
 857       offset -= (offset % (total_bits / BITS_PER_UNIT));
 858       op0 = adjust_address (op0, mode, offset);
 859     }
 860
 861   mode = GET_MODE (op0);
 862
 863   /* Now MODE is either some integral mode for a MEM as OP0,
 864      or is a full-word for a REG as OP0.  TOTAL_BITS corresponds.
 865      The bit field is contained entirely within OP0.
 866      BITPOS is the starting bit number within OP0.
 867      (OP0's mode may actually be narrower than MODE.)  */
 868
 869   if (BYTES_BIG_ENDIAN)
 870       /* BITPOS is the distance between our msb
 871          and that of the containing datum.
 872          Convert it to the distance from the lsb.  */
 873       bitpos = total_bits - bitsize - bitpos;
 874
 875   /* Now BITPOS is always the distance between our lsb
 876      and that of OP0.  */
 877
 878   /* Shift VALUE left by BITPOS bits.  If VALUE is not constant,
 879      we must first convert its mode to MODE.  */
 880
 881   if (GET_CODE (value) == CONST_INT)
 882     {
 883       HOST_WIDE_INT v = INTVAL (value);
 884
 885       if (bitsize < HOST_BITS_PER_WIDE_INT)
 886         v &= ((HOST_WIDE_INT) 1 << bitsize) - 1;
 887
 888       if (v == 0)
 889         all_zero = 1;
 890       else if ((bitsize < HOST_BITS_PER_WIDE_INT
 891                 && v == ((HOST_WIDE_INT) 1 << bitsize) - 1)
 892                || (bitsize == HOST_BITS_PER_WIDE_INT && v == -1))
 893         all_one = 1;
 894
 895       value = lshift_value (mode, value, bitpos, bitsize);
 896     }
 897   else
 898     {
 899       int must_and = (GET_MODE_BITSIZE (GET_MODE (value)) != bitsize
 900                       && bitpos + bitsize != GET_MODE_BITSIZE (mode));
 901
 902       if (GET_MODE (value) != mode)
 903         {
 904           if ((REG_P (value) || GET_CODE (value) == SUBREG)
 905               && GET_MODE_SIZE (mode) < GET_MODE_SIZE (GET_MODE (value)))
 906             value = gen_lowpart (mode, value);
 907           else
 908             value = convert_to_mode (mode, value, 1);
 909         }
 910
 911       if (must_and)
 912         value = expand_binop (mode, and_optab, value,
 913                               mask_rtx (mode, 0, bitsize, 0),
 914                               NULL_RTX, 1, OPTAB_LIB_WIDEN);
 915       if (bitpos > 0)
 916         value = expand_shift (LSHIFT_EXPR, mode, value,
 917                               build_int_cst (NULL_TREE, bitpos), NULL_RTX, 1);
 918     }
 919
 920   /* Now clear the chosen bits in OP0,
 921      except that if VALUE is -1 we need not bother.  */
 922
 923   subtarget = op0;
 924
 925   if (! all_one)
 926     {
 927       /* Don't try and keep the intermediate in memory, if we need to
 928          perform both a bit-wise AND and a bit-wise IOR (except when
 929          we're optimizing for size).  */
 930       if (MEM_P (subtarget) && !all_zero && !optimize_size)
 931         subtarget = force_reg (mode, subtarget);
 932       temp = expand_binop (mode, and_optab, subtarget,
 933                            mask_rtx (mode, bitpos, bitsize, 1),
 934                            subtarget, 1, OPTAB_LIB_WIDEN);
 935       subtarget = temp;
 936     }
 937   else
 938     temp = op0;
 939
 940   /* Now logical-or VALUE into OP0, unless it is zero.  */
 941
 942   if (! all_zero)
 943     temp = expand_binop (mode, ior_optab, temp, value,
 944                          subtarget, 1, OPTAB_LIB_WIDEN);
 945   if (op0 != temp)
 946     emit_move_insn (op0, temp);
 947 }
 948 \f
 949 /* Store a bit field that is split across multiple accessible memory objects.
 950
 951    OP0 is the REG, SUBREG or MEM rtx for the first of the objects.
 952    BITSIZE is the field width; BITPOS the position of its first bit
 953    (within the word).
 954    VALUE is the value to store.
 955
 956    This does not yet handle fields wider than BITS_PER_WORD.  */
 957
 958 static void
 959 store_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
 960                        unsigned HOST_WIDE_INT bitpos, rtx value)
 961 {
 962   unsigned int unit;
 963   unsigned int bitsdone = 0;
 964
 965   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
 966      much at a time.  */
 967   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
 968     unit = BITS_PER_WORD;
 969   else
 970     unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
 971
 972   /* If VALUE is a constant other than a CONST_INT, get it into a register in
 973      WORD_MODE.  If we can do this using gen_lowpart_common, do so.  Note
 974      that VALUE might be a floating-point constant.  */
 975   if (CONSTANT_P (value) && GET_CODE (value) != CONST_INT)
 976     {
 977       rtx word = gen_lowpart_common (word_mode, value);
 978
 979       if (word && (value != word))
 980         value = word;
 981       else
 982         value = gen_lowpart_common (word_mode,
 983                                     force_reg (GET_MODE (value) != VOIDmode
 984                                                ? GET_MODE (value)
 985                                                : word_mode, value));
 986     }
 987
 988   while (bitsdone < bitsize)
 989     {
 990       unsigned HOST_WIDE_INT thissize;
 991       rtx part, word;
 992       unsigned HOST_WIDE_INT thispos;
 993       unsigned HOST_WIDE_INT offset;
 994
 995       offset = (bitpos + bitsdone) / unit;
 996       thispos = (bitpos + bitsdone) % unit;
 997
 998       /* THISSIZE must not overrun a word boundary.  Otherwise,
 999          store_fixed_bit_field will call us again, and we will mutually
1000          recurse forever.  */
1001       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1002       thissize = MIN (thissize, unit - thispos);
1003
1004       if (BYTES_BIG_ENDIAN)
1005         {
1006           int total_bits;
1007
1008           /* We must do an endian conversion exactly the same way as it is
1009              done in extract_bit_field, so that the two calls to
1010              extract_fixed_bit_field will have comparable arguments.  */
1011           if (!MEM_P (value) || GET_MODE (value) == BLKmode)
1012             total_bits = BITS_PER_WORD;
1013           else
1014             total_bits = GET_MODE_BITSIZE (GET_MODE (value));
1015
1016           /* Fetch successively less significant portions.  */
1017           if (GET_CODE (value) == CONST_INT)
1018             part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1019                              >> (bitsize - bitsdone - thissize))
1020                             & (((HOST_WIDE_INT) 1 << thissize) - 1));
1021           else
1022             /* The args are chosen so that the last part includes the
1023                lsb.  Give extract_bit_field the value it needs (with
1024                endianness compensation) to fetch the piece we want.  */
1025             part = extract_fixed_bit_field (word_mode, value, 0, thissize,
1026                                             total_bits - bitsize + bitsdone,
1027                                             NULL_RTX, 1);
1028         }
1029       else
1030         {
1031           /* Fetch successively more significant portions.  */
1032           if (GET_CODE (value) == CONST_INT)
1033             part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1034                              >> bitsdone)
1035                             & (((HOST_WIDE_INT) 1 << thissize) - 1));
1036           else
1037             part = extract_fixed_bit_field (word_mode, value, 0, thissize,
1038                                             bitsdone, NULL_RTX, 1);
1039         }
1040
1041       /* If OP0 is a register, then handle OFFSET here.
1042
1043          When handling multiword bitfields, extract_bit_field may pass
1044          down a word_mode SUBREG of a larger REG for a bitfield that actually
1045          crosses a word boundary.  Thus, for a SUBREG, we must find
1046          the current word starting from the base register.  */
1047       if (GET_CODE (op0) == SUBREG)
1048         {
1049           int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset;
1050           word = operand_subword_force (SUBREG_REG (op0), word_offset,
1051                                         GET_MODE (SUBREG_REG (op0)));
1052           offset = 0;
1053         }
1054       else if (REG_P (op0))
1055         {
1056           word = operand_subword_force (op0, offset, GET_MODE (op0));
1057           offset = 0;
1058         }
1059       else
1060         word = op0;
1061
1062       /* OFFSET is in UNITs, and UNIT is in bits.
1063          store_fixed_bit_field wants offset in bytes.  */
1064       store_fixed_bit_field (word, offset * unit / BITS_PER_UNIT, thissize,
1065                              thispos, part);
1066       bitsdone += thissize;
1067     }
1068 }
1069 \f
1070 /* Generate code to extract a byte-field from STR_RTX
1071    containing BITSIZE bits, starting at BITNUM,
1072    and put it in TARGET if possible (if TARGET is nonzero).
1073    Regardless of TARGET, we return the rtx for where the value is placed.
1074
1075    STR_RTX is the structure containing the byte (a REG or MEM).
1076    UNSIGNEDP is nonzero if this is an unsigned bit field.
1077    MODE is the natural mode of the field value once extracted.
1078    TMODE is the mode the caller would like the value to have;
1079    but the value may be returned with type MODE instead.
1080
1081    TOTAL_SIZE is the size in bytes of the containing structure,
1082    or -1 if varying.
1083
1084    If a TARGET is specified and we can store in it at no extra cost,
1085    we do so, and return TARGET.
1086    Otherwise, we return a REG of mode TMODE or MODE, with TMODE preferred
1087    if they are equally easy.  */
1088
1089 rtx
1090 extract_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1091                    unsigned HOST_WIDE_INT bitnum, int unsignedp, rtx target,
1092                    enum machine_mode mode, enum machine_mode tmode)
1093 {
1094   unsigned int unit
1095     = (MEM_P (str_rtx)) ? BITS_PER_UNIT : BITS_PER_WORD;
1096   unsigned HOST_WIDE_INT offset, bitpos;
1097   rtx op0 = str_rtx;
1098   rtx spec_target = target;
1099   rtx spec_target_subreg = 0;
1100   enum machine_mode int_mode;
1101   enum machine_mode extv_mode = mode_for_extraction (EP_extv, 0);
1102   enum machine_mode extzv_mode = mode_for_extraction (EP_extzv, 0);
1103   enum machine_mode mode1;
1104   int byte_offset;
1105
1106   if (tmode == VOIDmode)
1107     tmode = mode;
1108
1109   while (GET_CODE (op0) == SUBREG)
1110     {
1111       bitnum += SUBREG_BYTE (op0) * BITS_PER_UNIT;
1112       op0 = SUBREG_REG (op0);
1113     }
1114
1115   /* If we have an out-of-bounds access to a register, just return an
1116      uninitialized register of the required mode.  This can occur if the
1117      source code contains an out-of-bounds access to a small array.  */
1118   if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
1119     return gen_reg_rtx (tmode);
1120
1121   if (REG_P (op0)
1122       && mode == GET_MODE (op0)
1123       && bitnum == 0
1124       && bitsize == GET_MODE_BITSIZE (GET_MODE (op0)))
1125     {
1126       /* We're trying to extract a full register from itself.  */
1127       return op0;
1128     }
1129
1130   /* Use vec_extract patterns for extracting parts of vectors whenever
1131      available.  */
1132   if (VECTOR_MODE_P (GET_MODE (op0))
1133       && !MEM_P (op0)
1134       && (vec_extract_optab->handlers[GET_MODE (op0)].insn_code
1135           != CODE_FOR_nothing)
1136       && ((bitnum + bitsize - 1) / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
1137           == bitnum / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
1138     {
1139       enum machine_mode outermode = GET_MODE (op0);
1140       enum machine_mode innermode = GET_MODE_INNER (outermode);
1141       int icode = (int) vec_extract_optab->handlers[outermode].insn_code;
1142       unsigned HOST_WIDE_INT pos = bitnum / GET_MODE_BITSIZE (innermode);
1143       rtx rtxpos = GEN_INT (pos);
1144       rtx src = op0;
1145       rtx dest = NULL, pat, seq;
1146       enum machine_mode mode0 = insn_data[icode].operand[0].mode;
1147       enum machine_mode mode1 = insn_data[icode].operand[1].mode;
1148       enum machine_mode mode2 = insn_data[icode].operand[2].mode;
1149
1150       if (innermode == tmode || innermode == mode)
1151         dest = target;
1152
1153       if (!dest)
1154         dest = gen_reg_rtx (innermode);
1155
1156       start_sequence ();
1157
1158       if (! (*insn_data[icode].operand[0].predicate) (dest, mode0))
1159         dest = copy_to_mode_reg (mode0, dest);
1160
1161       if (! (*insn_data[icode].operand[1].predicate) (src, mode1))
1162         src = copy_to_mode_reg (mode1, src);
1163
1164       if (! (*insn_data[icode].operand[2].predicate) (rtxpos, mode2))
1165         rtxpos = copy_to_mode_reg (mode1, rtxpos);
1166
1167       /* We could handle this, but we should always be called with a pseudo
1168          for our targets and all insns should take them as outputs.  */
1169       gcc_assert ((*insn_data[icode].operand[0].predicate) (dest, mode0)
1170                   && (*insn_data[icode].operand[1].predicate) (src, mode1)
1171                   && (*insn_data[icode].operand[2].predicate) (rtxpos, mode2));
1172
1173       pat = GEN_FCN (icode) (dest, src, rtxpos);
1174       seq = get_insns ();
1175       end_sequence ();
1176       if (pat)
1177         {
1178           emit_insn (seq);
1179           emit_insn (pat);
1180           return dest;
1181         }
1182     }
1183
1184   /* Make sure we are playing with integral modes.  Pun with subregs
1185      if we aren't.  */
1186   {
1187     enum machine_mode imode = int_mode_for_mode (GET_MODE (op0));
1188     if (imode != GET_MODE (op0))
1189       {
1190         if (MEM_P (op0))
1191           op0 = adjust_address (op0, imode, 0);
1192         else
1193           {
1194             gcc_assert (imode != BLKmode);
1195             op0 = gen_lowpart (imode, op0);
1196
1197             /* If we got a SUBREG, force it into a register since we
1198                aren't going to be able to do another SUBREG on it.  */
1199             if (GET_CODE (op0) == SUBREG)
1200               op0 = force_reg (imode, op0);
1201           }
1202       }
1203   }
1204
1205   /* We may be accessing data outside the field, which means
1206      we can alias adjacent data.  */
1207   if (MEM_P (op0))
1208     {
1209       op0 = shallow_copy_rtx (op0);
1210       set_mem_alias_set (op0, 0);
1211       set_mem_expr (op0, 0);
1212     }
1213
1214   /* Extraction of a full-word or multi-word value from a structure
1215      in a register or aligned memory can be done with just a SUBREG.
1216      A subword value in the least significant part of a register
1217      can also be extracted with a SUBREG.  For this, we need the
1218      byte offset of the value in op0.  */
1219
1220   bitpos = bitnum % unit;
1221   offset = bitnum / unit;
1222   byte_offset = bitpos / BITS_PER_UNIT + offset * UNITS_PER_WORD;
1223
1224   /* If OP0 is a register, BITPOS must count within a word.
1225      But as we have it, it counts within whatever size OP0 now has.
1226      On a bigendian machine, these are not the same, so convert.  */
1227   if (BYTES_BIG_ENDIAN
1228       && !MEM_P (op0)
1229       && unit > GET_MODE_BITSIZE (GET_MODE (op0)))
1230     bitpos += unit - GET_MODE_BITSIZE (GET_MODE (op0));
1231
1232   /* ??? We currently assume TARGET is at least as big as BITSIZE.
1233      If that's wrong, the solution is to test for it and set TARGET to 0
1234      if needed.  */
1235
1236   /* Only scalar integer modes can be converted via subregs.  There is an
1237      additional problem for FP modes here in that they can have a precision
1238      which is different from the size.  mode_for_size uses precision, but
1239      we want a mode based on the size, so we must avoid calling it for FP
1240      modes.  */
1241   mode1  = (SCALAR_INT_MODE_P (tmode)
1242             ? mode_for_size (bitsize, GET_MODE_CLASS (tmode), 0)
1243             : mode);
1244
1245   if (((bitsize >= BITS_PER_WORD && bitsize == GET_MODE_BITSIZE (mode)
1246         && bitpos % BITS_PER_WORD == 0)
1247        || (mode1 != BLKmode
1248            /* ??? The big endian test here is wrong.  This is correct
1249               if the value is in a register, and if mode_for_size is not
1250               the same mode as op0.  This causes us to get unnecessarily
1251               inefficient code from the Thumb port when -mbig-endian.  */
1252            && (BYTES_BIG_ENDIAN
1253                ? bitpos + bitsize == BITS_PER_WORD
1254                : bitpos == 0)))
1255       && ((!MEM_P (op0)
1256            && TRULY_NOOP_TRUNCATION (GET_MODE_BITSIZE (mode),
1257                                      GET_MODE_BITSIZE (GET_MODE (op0)))
1258            && GET_MODE_SIZE (mode1) != 0
1259            && byte_offset % GET_MODE_SIZE (mode1) == 0)
1260           || (MEM_P (op0)
1261               && (! SLOW_UNALIGNED_ACCESS (mode, MEM_ALIGN (op0))
1262                   || (offset * BITS_PER_UNIT % bitsize == 0
1263                       && MEM_ALIGN (op0) % bitsize == 0)))))
1264     {
1265       if (mode1 != GET_MODE (op0))
1266         {
1267           if (MEM_P (op0))
1268             op0 = adjust_address (op0, mode1, offset);
1269           else
1270             {
1271               rtx sub = simplify_gen_subreg (mode1, op0, GET_MODE (op0),
1272                                              byte_offset);
1273               if (sub == NULL)
1274                 goto no_subreg_mode_swap;
1275               op0 = sub;
1276             }
1277         }
1278       if (mode1 != mode)
1279         return convert_to_mode (tmode, op0, unsignedp);
1280       return op0;
1281     }
1282  no_subreg_mode_swap:
1283
1284   /* Handle fields bigger than a word.  */
1285
1286   if (bitsize > BITS_PER_WORD)
1287     {
1288       /* Here we transfer the words of the field
1289          in the order least significant first.
1290          This is because the most significant word is the one which may
1291          be less than full.  */
1292
1293       unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
1294       unsigned int i;
1295
1296       if (target == 0 || !REG_P (target))
1297         target = gen_reg_rtx (mode);
1298
1299       /* Indicate for flow that the entire target reg is being set.  */
1300       emit_insn (gen_rtx_CLOBBER (VOIDmode, target));
1301
1302       for (i = 0; i < nwords; i++)
1303         {
1304           /* If I is 0, use the low-order word in both field and target;
1305              if I is 1, use the next to lowest word; and so on.  */
1306           /* Word number in TARGET to use.  */
1307           unsigned int wordnum
1308             = (WORDS_BIG_ENDIAN
1309                ? GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD - i - 1
1310                : i);
1311           /* Offset from start of field in OP0.  */
1312           unsigned int bit_offset = (WORDS_BIG_ENDIAN
1313                                      ? MAX (0, ((int) bitsize - ((int) i + 1)
1314                                                 * (int) BITS_PER_WORD))
1315                                      : (int) i * BITS_PER_WORD);
1316           rtx target_part = operand_subword (target, wordnum, 1, VOIDmode);
1317           rtx result_part
1318             = extract_bit_field (op0, MIN (BITS_PER_WORD,
1319                                            bitsize - i * BITS_PER_WORD),
1320                                  bitnum + bit_offset, 1, target_part, mode,
1321                                  word_mode);
1322
1323           gcc_assert (target_part);
1324
1325           if (result_part != target_part)
1326             emit_move_insn (target_part, result_part);
1327         }
1328
1329       if (unsignedp)
1330         {
1331           /* Unless we've filled TARGET, the upper regs in a multi-reg value
1332              need to be zero'd out.  */
1333           if (GET_MODE_SIZE (GET_MODE (target)) > nwords * UNITS_PER_WORD)
1334             {
1335               unsigned int i, total_words;
1336
1337               total_words = GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD;
1338               for (i = nwords; i < total_words; i++)
1339                 emit_move_insn
1340                   (operand_subword (target,
1341                                     WORDS_BIG_ENDIAN ? total_words - i - 1 : i,
1342                                     1, VOIDmode),
1343                    const0_rtx);
1344             }
1345           return target;
1346         }
1347
1348       /* Signed bit field: sign-extend with two arithmetic shifts.  */
1349       target = expand_shift (LSHIFT_EXPR, mode, target,
1350                              build_int_cst (NULL_TREE,
1351                                             GET_MODE_BITSIZE (mode) - bitsize),
1352                              NULL_RTX, 0);
1353       return expand_shift (RSHIFT_EXPR, mode, target,
1354                            build_int_cst (NULL_TREE,
1355                                           GET_MODE_BITSIZE (mode) - bitsize),
1356                            NULL_RTX, 0);
1357     }
1358
1359   /* From here on we know the desired field is smaller than a word.  */
1360
1361   /* Check if there is a correspondingly-sized integer field, so we can
1362      safely extract it as one size of integer, if necessary; then
1363      truncate or extend to the size that is wanted; then use SUBREGs or
1364      convert_to_mode to get one of the modes we really wanted.  */
1365
1366   int_mode = int_mode_for_mode (tmode);
1367   if (int_mode == BLKmode)
1368     int_mode = int_mode_for_mode (mode);
1369   /* Should probably push op0 out to memory and then do a load.  */
1370   gcc_assert (int_mode != BLKmode);
1371
1372   /* OFFSET is the number of words or bytes (UNIT says which)
1373      from STR_RTX to the first word or byte containing part of the field.  */
1374   if (!MEM_P (op0))
1375     {
1376       if (offset != 0
1377           || GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
1378         {
1379           if (!REG_P (op0))
1380             op0 = copy_to_reg (op0);
1381           op0 = gen_rtx_SUBREG (mode_for_size (BITS_PER_WORD, MODE_INT, 0),
1382                                 op0, (offset * UNITS_PER_WORD));
1383         }
1384       offset = 0;
1385     }
1386
1387   /* Now OFFSET is nonzero only for memory operands.  */
1388
1389   if (unsignedp)
1390     {
1391       if (HAVE_extzv
1392           && bitsize > 0
1393           && GET_MODE_BITSIZE (extzv_mode) >= bitsize
1394           && ! ((REG_P (op0) || GET_CODE (op0) == SUBREG)
1395                 && (bitsize + bitpos > GET_MODE_BITSIZE (extzv_mode))))
1396         {
1397           unsigned HOST_WIDE_INT xbitpos = bitpos, xoffset = offset;
1398           rtx bitsize_rtx, bitpos_rtx;
1399           rtx last = get_last_insn ();
1400           rtx xop0 = op0;
1401           rtx xtarget = target;
1402           rtx xspec_target = spec_target;
1403           rtx xspec_target_subreg = spec_target_subreg;
1404           rtx pat;
1405           enum machine_mode maxmode = mode_for_extraction (EP_extzv, 0);
1406
1407           if (MEM_P (xop0))
1408             {
1409               int save_volatile_ok = volatile_ok;
1410               volatile_ok = 1;
1411
1412               /* Is the memory operand acceptable?  */
1413               if (! ((*insn_data[(int) CODE_FOR_extzv].operand[1].predicate)
1414                      (xop0, GET_MODE (xop0))))
1415                 {
1416                   /* No, load into a reg and extract from there.  */
1417                   enum machine_mode bestmode;
1418
1419                   /* Get the mode to use for inserting into this field.  If
1420                      OP0 is BLKmode, get the smallest mode consistent with the
1421                      alignment. If OP0 is a non-BLKmode object that is no
1422                      wider than MAXMODE, use its mode. Otherwise, use the
1423                      smallest mode containing the field.  */
1424
1425                   if (GET_MODE (xop0) == BLKmode
1426                       || (GET_MODE_SIZE (GET_MODE (op0))
1427                           > GET_MODE_SIZE (maxmode)))
1428                     bestmode = get_best_mode (bitsize, bitnum,
1429                                               MEM_ALIGN (xop0), maxmode,
1430                                               MEM_VOLATILE_P (xop0));
1431                   else
1432                     bestmode = GET_MODE (xop0);
1433
1434                   if (bestmode == VOIDmode
1435                       || (SLOW_UNALIGNED_ACCESS (bestmode, MEM_ALIGN (xop0))
1436                           && GET_MODE_BITSIZE (bestmode) > MEM_ALIGN (xop0)))
1437                     goto extzv_loses;
1438
1439                   /* Compute offset as multiple of this unit,
1440                      counting in bytes.  */
1441                   unit = GET_MODE_BITSIZE (bestmode);
1442                   xoffset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
1443                   xbitpos = bitnum % unit;
1444                   xop0 = adjust_address (xop0, bestmode, xoffset);
1445
1446                   /* Make sure register is big enough for the whole field. */
1447                   if (xoffset * BITS_PER_UNIT + unit
1448                       < offset * BITS_PER_UNIT + bitsize)
1449                     goto extzv_loses;
1450
1451                   /* Fetch it to a register in that size.  */
1452                   xop0 = force_reg (bestmode, xop0);
1453
1454                   /* XBITPOS counts within UNIT, which is what is expected.  */
1455                 }
1456               else
1457                 /* Get ref to first byte containing part of the field.  */
1458                 xop0 = adjust_address (xop0, byte_mode, xoffset);
1459
1460               volatile_ok = save_volatile_ok;
1461             }
1462
1463           /* If op0 is a register, we need it in MAXMODE (which is usually
1464              SImode). to make it acceptable to the format of extzv.  */
1465           if (GET_CODE (xop0) == SUBREG && GET_MODE (xop0) != maxmode)
1466             goto extzv_loses;
1467           if (REG_P (xop0) && GET_MODE (xop0) != maxmode)
1468             xop0 = gen_rtx_SUBREG (maxmode, xop0, 0);
1469
1470           /* On big-endian machines, we count bits from the most significant.
1471              If the bit field insn does not, we must invert.  */
1472           if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1473             xbitpos = unit - bitsize - xbitpos;
1474
1475           /* Now convert from counting within UNIT to counting in MAXMODE.  */
1476           if (BITS_BIG_ENDIAN && !MEM_P (xop0))
1477             xbitpos += GET_MODE_BITSIZE (maxmode) - unit;
1478
1479           unit = GET_MODE_BITSIZE (maxmode);
1480
1481           if (xtarget == 0)
1482             xtarget = xspec_target = gen_reg_rtx (tmode);
1483
1484           if (GET_MODE (xtarget) != maxmode)
1485             {
1486               if (REG_P (xtarget))
1487                 {
1488                   int wider = (GET_MODE_SIZE (maxmode)
1489                                > GET_MODE_SIZE (GET_MODE (xtarget)));
1490                   xtarget = gen_lowpart (maxmode, xtarget);
1491                   if (wider)
1492                     xspec_target_subreg = xtarget;
1493                 }
1494               else
1495                 xtarget = gen_reg_rtx (maxmode);
1496             }
1497
1498           /* If this machine's extzv insists on a register target,
1499              make sure we have one.  */
1500           if (! ((*insn_data[(int) CODE_FOR_extzv].operand[0].predicate)
1501                  (xtarget, maxmode)))
1502             xtarget = gen_reg_rtx (maxmode);
1503
1504           bitsize_rtx = GEN_INT (bitsize);
1505           bitpos_rtx = GEN_INT (xbitpos);
1506
1507           pat = gen_extzv (xtarget, xop0, bitsize_rtx, bitpos_rtx);
1508           if (pat)
1509             {
1510               emit_insn (pat);
1511               target = xtarget;
1512               spec_target = xspec_target;
1513               spec_target_subreg = xspec_target_subreg;
1514             }
1515           else
1516             {
1517               delete_insns_since (last);
1518               target = extract_fixed_bit_field (int_mode, op0, offset, bitsize,
1519                                                 bitpos, target, 1);
1520             }
1521         }
1522       else
1523       extzv_loses:
1524         target = extract_fixed_bit_field (int_mode, op0, offset, bitsize,
1525                                           bitpos, target, 1);
1526     }
1527   else
1528     {
1529       if (HAVE_extv
1530           && bitsize > 0
1531           && GET_MODE_BITSIZE (extv_mode) >= bitsize
1532           && ! ((REG_P (op0) || GET_CODE (op0) == SUBREG)
1533                 && (bitsize + bitpos > GET_MODE_BITSIZE (extv_mode))))
1534         {
1535           int xbitpos = bitpos, xoffset = offset;
1536           rtx bitsize_rtx, bitpos_rtx;
1537           rtx last = get_last_insn ();
1538           rtx xop0 = op0, xtarget = target;
1539           rtx xspec_target = spec_target;
1540           rtx xspec_target_subreg = spec_target_subreg;
1541           rtx pat;
1542           enum machine_mode maxmode = mode_for_extraction (EP_extv, 0);
1543
1544           if (MEM_P (xop0))
1545             {
1546               /* Is the memory operand acceptable?  */
1547               if (! ((*insn_data[(int) CODE_FOR_extv].operand[1].predicate)
1548                      (xop0, GET_MODE (xop0))))
1549                 {
1550                   /* No, load into a reg and extract from there.  */
1551                   enum machine_mode bestmode;
1552
1553                   /* Get the mode to use for inserting into this field.  If
1554                      OP0 is BLKmode, get the smallest mode consistent with the
1555                      alignment. If OP0 is a non-BLKmode object that is no
1556                      wider than MAXMODE, use its mode. Otherwise, use the
1557                      smallest mode containing the field.  */
1558
1559                   if (GET_MODE (xop0) == BLKmode
1560                       || (GET_MODE_SIZE (GET_MODE (op0))
1561                           > GET_MODE_SIZE (maxmode)))
1562                     bestmode = get_best_mode (bitsize, bitnum,
1563                                               MEM_ALIGN (xop0), maxmode,
1564                                               MEM_VOLATILE_P (xop0));
1565                   else
1566                     bestmode = GET_MODE (xop0);
1567
1568                   if (bestmode == VOIDmode
1569                       || (SLOW_UNALIGNED_ACCESS (bestmode, MEM_ALIGN (xop0))
1570                           && GET_MODE_BITSIZE (bestmode) > MEM_ALIGN (xop0)))
1571                     goto extv_loses;
1572
1573                   /* Compute offset as multiple of this unit,
1574                      counting in bytes.  */
1575                   unit = GET_MODE_BITSIZE (bestmode);
1576                   xoffset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
1577                   xbitpos = bitnum % unit;
1578                   xop0 = adjust_address (xop0, bestmode, xoffset);
1579
1580                   /* Make sure register is big enough for the whole field. */
1581                   if (xoffset * BITS_PER_UNIT + unit
1582                       < offset * BITS_PER_UNIT + bitsize)
1583                     goto extv_loses;
1584
1585                   /* Fetch it to a register in that size.  */
1586                   xop0 = force_reg (bestmode, xop0);
1587
1588                   /* XBITPOS counts within UNIT, which is what is expected.  */
1589                 }
1590               else
1591                 /* Get ref to first byte containing part of the field.  */
1592                 xop0 = adjust_address (xop0, byte_mode, xoffset);
1593             }
1594
1595           /* If op0 is a register, we need it in MAXMODE (which is usually
1596              SImode) to make it acceptable to the format of extv.  */
1597           if (GET_CODE (xop0) == SUBREG && GET_MODE (xop0) != maxmode)
1598             goto extv_loses;
1599           if (REG_P (xop0) && GET_MODE (xop0) != maxmode)
1600             xop0 = gen_rtx_SUBREG (maxmode, xop0, 0);
1601
1602           /* On big-endian machines, we count bits from the most significant.
1603              If the bit field insn does not, we must invert.  */
1604           if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1605             xbitpos = unit - bitsize - xbitpos;
1606
1607           /* XBITPOS counts within a size of UNIT.
1608              Adjust to count within a size of MAXMODE.  */
1609           if (BITS_BIG_ENDIAN && !MEM_P (xop0))
1610             xbitpos += (GET_MODE_BITSIZE (maxmode) - unit);
1611
1612           unit = GET_MODE_BITSIZE (maxmode);
1613
1614           if (xtarget == 0)
1615             xtarget = xspec_target = gen_reg_rtx (tmode);
1616
1617           if (GET_MODE (xtarget) != maxmode)
1618             {
1619               if (REG_P (xtarget))
1620                 {
1621                   int wider = (GET_MODE_SIZE (maxmode)
1622                                > GET_MODE_SIZE (GET_MODE (xtarget)));
1623                   xtarget = gen_lowpart (maxmode, xtarget);
1624                   if (wider)
1625                     xspec_target_subreg = xtarget;
1626                 }
1627               else
1628                 xtarget = gen_reg_rtx (maxmode);
1629             }
1630
1631           /* If this machine's extv insists on a register target,
1632              make sure we have one.  */
1633           if (! ((*insn_data[(int) CODE_FOR_extv].operand[0].predicate)
1634                  (xtarget, maxmode)))
1635             xtarget = gen_reg_rtx (maxmode);
1636
1637           bitsize_rtx = GEN_INT (bitsize);
1638           bitpos_rtx = GEN_INT (xbitpos);
1639
1640           pat = gen_extv (xtarget, xop0, bitsize_rtx, bitpos_rtx);
1641           if (pat)
1642             {
1643               emit_insn (pat);
1644               target = xtarget;
1645               spec_target = xspec_target;
1646               spec_target_subreg = xspec_target_subreg;
1647             }
1648           else
1649             {
1650               delete_insns_since (last);
1651               target = extract_fixed_bit_field (int_mode, op0, offset, bitsize,
1652                                                 bitpos, target, 0);
1653             }
1654         }
1655       else
1656       extv_loses:
1657         target = extract_fixed_bit_field (int_mode, op0, offset, bitsize,
1658                                           bitpos, target, 0);
1659     }
1660   if (target == spec_target)
1661     return target;
1662   if (target == spec_target_subreg)
1663     return spec_target;
1664   if (GET_MODE (target) != tmode && GET_MODE (target) != mode)
1665     {
1666       /* If the target mode is not a scalar integral, first convert to the
1667          integer mode of that size and then access it as a floating-point
1668          value via a SUBREG.  */
1669       if (!SCALAR_INT_MODE_P (tmode))
1670         {
1671           enum machine_mode smode
1672             = mode_for_size (GET_MODE_BITSIZE (tmode), MODE_INT, 0);
1673           target = convert_to_mode (smode, target, unsignedp);
1674           target = force_reg (smode, target);
1675           return gen_lowpart (tmode, target);
1676         }
1677
1678       return convert_to_mode (tmode, target, unsignedp);
1679     }
1680   return target;
1681 }
1682 \f
1683 /* Extract a bit field using shifts and boolean operations
1684    Returns an rtx to represent the value.
1685    OP0 addresses a register (word) or memory (byte).
1686    BITPOS says which bit within the word or byte the bit field starts in.
1687    OFFSET says how many bytes farther the bit field starts;
1688     it is 0 if OP0 is a register.
1689    BITSIZE says how many bits long the bit field is.
1690     (If OP0 is a register, it may be narrower than a full word,
1691      but BITPOS still counts within a full word,
1692      which is significant on bigendian machines.)
1693
1694    UNSIGNEDP is nonzero for an unsigned bit field (don't sign-extend value).
1695    If TARGET is nonzero, attempts to store the value there
1696    and return TARGET, but this is not guaranteed.
1697    If TARGET is not used, create a pseudo-reg of mode TMODE for the value.  */
1698
1699 static rtx
1700 extract_fixed_bit_field (enum machine_mode tmode, rtx op0,
1701                          unsigned HOST_WIDE_INT offset,
1702                          unsigned HOST_WIDE_INT bitsize,
1703                          unsigned HOST_WIDE_INT bitpos, rtx target,
1704                          int unsignedp)
1705 {
1706   unsigned int total_bits = BITS_PER_WORD;
1707   enum machine_mode mode;
1708
1709   if (GET_CODE (op0) == SUBREG || REG_P (op0))
1710     {
1711       /* Special treatment for a bit field split across two registers.  */
1712       if (bitsize + bitpos > BITS_PER_WORD)
1713         return extract_split_bit_field (op0, bitsize, bitpos, unsignedp);
1714     }
1715   else
1716     {
1717       /* Get the proper mode to use for this field.  We want a mode that
1718          includes the entire field.  If such a mode would be larger than
1719          a word, we won't be doing the extraction the normal way.  */
1720
1721       mode = get_best_mode (bitsize, bitpos + offset * BITS_PER_UNIT,
1722                             MEM_ALIGN (op0), word_mode, MEM_VOLATILE_P (op0));
1723
1724       if (mode == VOIDmode)
1725         /* The only way this should occur is if the field spans word
1726            boundaries.  */
1727         return extract_split_bit_field (op0, bitsize,
1728                                         bitpos + offset * BITS_PER_UNIT,
1729                                         unsignedp);
1730
1731       total_bits = GET_MODE_BITSIZE (mode);
1732
1733       /* Make sure bitpos is valid for the chosen mode.  Adjust BITPOS to
1734          be in the range 0 to total_bits-1, and put any excess bytes in
1735          OFFSET.  */
1736       if (bitpos >= total_bits)
1737         {
1738           offset += (bitpos / total_bits) * (total_bits / BITS_PER_UNIT);
1739           bitpos -= ((bitpos / total_bits) * (total_bits / BITS_PER_UNIT)
1740                      * BITS_PER_UNIT);
1741         }
1742
1743       /* Get ref to an aligned byte, halfword, or word containing the field.
1744          Adjust BITPOS to be position within a word,
1745          and OFFSET to be the offset of that word.
1746          Then alter OP0 to refer to that word.  */
1747       bitpos += (offset % (total_bits / BITS_PER_UNIT)) * BITS_PER_UNIT;
1748       offset -= (offset % (total_bits / BITS_PER_UNIT));
1749       op0 = adjust_address (op0, mode, offset);
1750     }
1751
1752   mode = GET_MODE (op0);
1753
1754   if (BYTES_BIG_ENDIAN)
1755     /* BITPOS is the distance between our msb and that of OP0.
1756        Convert it to the distance from the lsb.  */
1757     bitpos = total_bits - bitsize - bitpos;
1758
1759   /* Now BITPOS is always the distance between the field's lsb and that of OP0.
1760      We have reduced the big-endian case to the little-endian case.  */
1761
1762   if (unsignedp)
1763     {
1764       if (bitpos)
1765         {
1766           /* If the field does not already start at the lsb,
1767              shift it so it does.  */
1768           tree amount = build_int_cst (NULL_TREE, bitpos);
1769           /* Maybe propagate the target for the shift.  */
1770           /* But not if we will return it--could confuse integrate.c.  */
1771           rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
1772           if (tmode != mode) subtarget = 0;
1773           op0 = expand_shift (RSHIFT_EXPR, mode, op0, amount, subtarget, 1);
1774         }
1775       /* Convert the value to the desired mode.  */
1776       if (mode != tmode)
1777         op0 = convert_to_mode (tmode, op0, 1);
1778
1779       /* Unless the msb of the field used to be the msb when we shifted,
1780          mask out the upper bits.  */
1781
1782       if (GET_MODE_BITSIZE (mode) != bitpos + bitsize)
1783         return expand_binop (GET_MODE (op0), and_optab, op0,
1784                              mask_rtx (GET_MODE (op0), 0, bitsize, 0),
1785                              target, 1, OPTAB_LIB_WIDEN);
1786       return op0;
1787     }
1788
1789   /* To extract a signed bit-field, first shift its msb to the msb of the word,
1790      then arithmetic-shift its lsb to the lsb of the word.  */
1791   op0 = force_reg (mode, op0);
1792   if (mode != tmode)
1793     target = 0;
1794
1795   /* Find the narrowest integer mode that contains the field.  */
1796
1797   for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT); mode != VOIDmode;
1798        mode = GET_MODE_WIDER_MODE (mode))
1799     if (GET_MODE_BITSIZE (mode) >= bitsize + bitpos)
1800       {
1801         op0 = convert_to_mode (mode, op0, 0);
1802         break;
1803       }
1804
1805   if (GET_MODE_BITSIZE (mode) != (bitsize + bitpos))
1806     {
1807       tree amount
1808         = build_int_cst (NULL_TREE,
1809                          GET_MODE_BITSIZE (mode) - (bitsize + bitpos));
1810       /* Maybe propagate the target for the shift.  */
1811       rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
1812       op0 = expand_shift (LSHIFT_EXPR, mode, op0, amount, subtarget, 1);
1813     }
1814
1815   return expand_shift (RSHIFT_EXPR, mode, op0,
1816                        build_int_cst (NULL_TREE,
1817                                       GET_MODE_BITSIZE (mode) - bitsize),
1818                        target, 0);
1819 }
1820 \f
1821 /* Return a constant integer (CONST_INT or CONST_DOUBLE) mask value
1822    of mode MODE with BITSIZE ones followed by BITPOS zeros, or the
1823    complement of that if COMPLEMENT.  The mask is truncated if
1824    necessary to the width of mode MODE.  The mask is zero-extended if
1825    BITSIZE+BITPOS is too small for MODE.  */
1826
1827 static rtx
1828 mask_rtx (enum machine_mode mode, int bitpos, int bitsize, int complement)
1829 {
1830   HOST_WIDE_INT masklow, maskhigh;
1831
1832   if (bitsize == 0)
1833     masklow = 0;
1834   else if (bitpos < HOST_BITS_PER_WIDE_INT)
1835     masklow = (HOST_WIDE_INT) -1 << bitpos;
1836   else
1837     masklow = 0;
1838
1839   if (bitpos + bitsize < HOST_BITS_PER_WIDE_INT)
1840     masklow &= ((unsigned HOST_WIDE_INT) -1
1841                 >> (HOST_BITS_PER_WIDE_INT - bitpos - bitsize));
1842
1843   if (bitpos <= HOST_BITS_PER_WIDE_INT)
1844     maskhigh = -1;
1845   else
1846     maskhigh = (HOST_WIDE_INT) -1 << (bitpos - HOST_BITS_PER_WIDE_INT);
1847
1848   if (bitsize == 0)
1849     maskhigh = 0;
1850   else if (bitpos + bitsize > HOST_BITS_PER_WIDE_INT)
1851     maskhigh &= ((unsigned HOST_WIDE_INT) -1
1852                  >> (2 * HOST_BITS_PER_WIDE_INT - bitpos - bitsize));
1853   else
1854     maskhigh = 0;
1855
1856   if (complement)
1857     {
1858       maskhigh = ~maskhigh;
1859       masklow = ~masklow;
1860     }
1861
1862   return immed_double_const (masklow, maskhigh, mode);
1863 }
1864
1865 /* Return a constant integer (CONST_INT or CONST_DOUBLE) rtx with the value
1866    VALUE truncated to BITSIZE bits and then shifted left BITPOS bits.  */
1867
1868 static rtx
1869 lshift_value (enum machine_mode mode, rtx value, int bitpos, int bitsize)
1870 {
1871   unsigned HOST_WIDE_INT v = INTVAL (value);
1872   HOST_WIDE_INT low, high;
1873
1874   if (bitsize < HOST_BITS_PER_WIDE_INT)
1875     v &= ~((HOST_WIDE_INT) -1 << bitsize);
1876
1877   if (bitpos < HOST_BITS_PER_WIDE_INT)
1878     {
1879       low = v << bitpos;
1880       high = (bitpos > 0 ? (v >> (HOST_BITS_PER_WIDE_INT - bitpos)) : 0);
1881     }
1882   else
1883     {
1884       low = 0;
1885       high = v << (bitpos - HOST_BITS_PER_WIDE_INT);
1886     }
1887
1888   return immed_double_const (low, high, mode);
1889 }
1890 \f
1891 /* Extract a bit field from a memory by forcing the alignment of the
1892    memory.  This efficient only if the field spans at least 4 boundaries.
1893
1894    OP0 is the MEM.
1895    BITSIZE is the field width; BITPOS is the position of the first bit.
1896    UNSIGNEDP is true if the result should be zero-extended.  */
1897
1898 static rtx
1899 extract_force_align_mem_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1900                                    unsigned HOST_WIDE_INT bitpos,
1901                                    int unsignedp)
1902 {
1903   enum machine_mode mode, dmode;
1904   unsigned int m_bitsize, m_size;
1905   unsigned int sign_shift_up, sign_shift_dn;
1906   rtx base, a1, a2, v1, v2, comb, shift, result, start;
1907
1908   /* Choose a mode that will fit BITSIZE.  */
1909   mode = smallest_mode_for_size (bitsize, MODE_INT);
1910   m_size = GET_MODE_SIZE (mode);
1911   m_bitsize = GET_MODE_BITSIZE (mode);
1912
1913   /* Choose a mode twice as wide.  Fail if no such mode exists.  */
1914   dmode = mode_for_size (m_bitsize * 2, MODE_INT, false);
1915   if (dmode == BLKmode)
1916     return NULL;
1917
1918   do_pending_stack_adjust ();
1919   start = get_last_insn ();
1920
1921   /* At the end, we'll need an additional shift to deal with sign/zero
1922      extension.  By default this will be a left+right shift of the
1923      appropriate size.  But we may be able to eliminate one of them.  */
1924   sign_shift_up = sign_shift_dn = m_bitsize - bitsize;
1925
1926   if (STRICT_ALIGNMENT)
1927     {
1928       base = plus_constant (XEXP (op0, 0), bitpos / BITS_PER_UNIT);
1929       bitpos %= BITS_PER_UNIT;
1930
1931       /* We load two values to be concatenate.  There's an edge condition
1932          that bears notice -- an aligned value at the end of a page can
1933          only load one value lest we segfault.  So the two values we load
1934          are at "base & -size" and "(base + size - 1) & -size".  If base
1935          is unaligned, the addresses will be aligned and sequential; if
1936          base is aligned, the addresses will both be equal to base.  */
1937
1938       a1 = expand_simple_binop (Pmode, AND, force_operand (base, NULL),
1939                                 GEN_INT (-(HOST_WIDE_INT)m_size),
1940                                 NULL, true, OPTAB_LIB_WIDEN);
1941       mark_reg_pointer (a1, m_bitsize);
1942       v1 = gen_rtx_MEM (mode, a1);
1943       set_mem_align (v1, m_bitsize);
1944       v1 = force_reg (mode, validize_mem (v1));
1945
1946       a2 = plus_constant (base, GET_MODE_SIZE (mode) - 1);
1947       a2 = expand_simple_binop (Pmode, AND, force_operand (a2, NULL),
1948                                 GEN_INT (-(HOST_WIDE_INT)m_size),
1949                                 NULL, true, OPTAB_LIB_WIDEN);
1950       v2 = gen_rtx_MEM (mode, a2);
1951       set_mem_align (v2, m_bitsize);
1952       v2 = force_reg (mode, validize_mem (v2));
1953
1954       /* Combine these two values into a double-word value.  */
1955       if (m_bitsize == BITS_PER_WORD)
1956         {
1957           comb = gen_reg_rtx (dmode);
1958           emit_insn (gen_rtx_CLOBBER (VOIDmode, comb));
1959           emit_move_insn (gen_rtx_SUBREG (mode, comb, 0), v1);
1960           emit_move_insn (gen_rtx_SUBREG (mode, comb, m_size), v2);
1961         }
1962       else
1963         {
1964           if (BYTES_BIG_ENDIAN)
1965             comb = v1, v1 = v2, v2 = comb;
1966           v1 = convert_modes (dmode, mode, v1, true);
1967           if (v1 == NULL)
1968             goto fail;
1969           v2 = convert_modes (dmode, mode, v2, true);
1970           v2 = expand_simple_binop (dmode, ASHIFT, v2, GEN_INT (m_bitsize),
1971                                     NULL, true, OPTAB_LIB_WIDEN);
1972           if (v2 == NULL)
1973             goto fail;
1974           comb = expand_simple_binop (dmode, IOR, v1, v2, NULL,
1975                                       true, OPTAB_LIB_WIDEN);
1976           if (comb == NULL)
1977             goto fail;
1978         }
1979
1980       shift = expand_simple_binop (Pmode, AND, base, GEN_INT (m_size - 1),
1981                                    NULL, true, OPTAB_LIB_WIDEN);
1982       shift = expand_mult (Pmode, shift, GEN_INT (BITS_PER_UNIT), NULL, 1);
1983
1984       if (bitpos != 0)
1985         {
1986           if (sign_shift_up <= bitpos)
1987             bitpos -= sign_shift_up, sign_shift_up = 0;
1988           shift = expand_simple_binop (Pmode, PLUS, shift, GEN_INT (bitpos),
1989                                        NULL, true, OPTAB_LIB_WIDEN);
1990         }
1991     }
1992   else
1993     {
1994       unsigned HOST_WIDE_INT offset = bitpos / BITS_PER_UNIT;
1995       bitpos %= BITS_PER_UNIT;
1996
1997       /* When strict alignment is not required, we can just load directly
1998          from memory without masking.  If the remaining BITPOS offset is
1999          small enough, we may be able to do all operations in MODE as
2000          opposed to DMODE.  */
2001       if (bitpos + bitsize <= m_bitsize)
2002         dmode = mode;
2003       comb = adjust_address (op0, dmode, offset);
2004
2005       if (sign_shift_up <= bitpos)
2006         bitpos -= sign_shift_up, sign_shift_up = 0;
2007       shift = GEN_INT (bitpos);
2008     }
2009
2010   /* Shift down the double-word such that the requested value is at bit 0.  */
2011   if (shift != const0_rtx)
2012     comb = expand_simple_binop (dmode, unsignedp ? LSHIFTRT : ASHIFTRT,
2013                                 comb, shift, NULL, unsignedp, OPTAB_LIB_WIDEN);
2014   if (comb == NULL)
2015     goto fail;
2016
2017   /* If the field exactly matches MODE, then all we need to do is return the
2018      lowpart.  Otherwise, shift to get the sign bits set properly.  */
2019   result = force_reg (mode, gen_lowpart (mode, comb));
2020
2021   if (sign_shift_up)
2022     result = expand_simple_binop (mode, ASHIFT, result,
2023                                   GEN_INT (sign_shift_up),
2024                                   NULL_RTX, 0, OPTAB_LIB_WIDEN);
2025   if (sign_shift_dn)
2026     result = expand_simple_binop (mode, unsignedp ? LSHIFTRT : ASHIFTRT,
2027                                   result, GEN_INT (sign_shift_dn),
2028                                   NULL_RTX, 0, OPTAB_LIB_WIDEN);
2029
2030   return result;
2031
2032  fail:
2033   delete_insns_since (start);
2034   return NULL;
2035 }
2036
2037 /* Extract a bit field that is split across two words
2038    and return an RTX for the result.
2039
2040    OP0 is the REG, SUBREG or MEM rtx for the first of the two words.
2041    BITSIZE is the field width; BITPOS, position of its first bit, in the word.
2042    UNSIGNEDP is 1 if should zero-extend the contents; else sign-extend.  */
2043
2044 static rtx
2045 extract_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
2046                          unsigned HOST_WIDE_INT bitpos, int unsignedp)
2047 {
2048   unsigned int unit;
2049   unsigned int bitsdone = 0;
2050   rtx result = NULL_RTX;
2051   int first = 1;
2052
2053   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
2054      much at a time.  */
2055   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
2056     unit = BITS_PER_WORD;
2057   else
2058     {
2059       unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
2060       if (0 && bitsize / unit > 2)
2061         {
2062           rtx tmp = extract_force_align_mem_bit_field (op0, bitsize, bitpos,
2063                                                        unsignedp);
2064           if (tmp)
2065             return tmp;
2066         }
2067     }
2068
2069   while (bitsdone < bitsize)
2070     {
2071       unsigned HOST_WIDE_INT thissize;
2072       rtx part, word;
2073       unsigned HOST_WIDE_INT thispos;
2074       unsigned HOST_WIDE_INT offset;
2075
2076       offset = (bitpos + bitsdone) / unit;
2077       thispos = (bitpos + bitsdone) % unit;
2078
2079       /* THISSIZE must not overrun a word boundary.  Otherwise,
2080          extract_fixed_bit_field will call us again, and we will mutually
2081          recurse forever.  */
2082       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
2083       thissize = MIN (thissize, unit - thispos);
2084
2085       /* If OP0 is a register, then handle OFFSET here.
2086
2087          When handling multiword bitfields, extract_bit_field may pass
2088          down a word_mode SUBREG of a larger REG for a bitfield that actually
2089          crosses a word boundary.  Thus, for a SUBREG, we must find
2090          the current word starting from the base register.  */
2091       if (GET_CODE (op0) == SUBREG)
2092         {
2093           int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset;
2094           word = operand_subword_force (SUBREG_REG (op0), word_offset,
2095                                         GET_MODE (SUBREG_REG (op0)));
2096           offset = 0;
2097         }
2098       else if (REG_P (op0))
2099         {
2100           word = operand_subword_force (op0, offset, GET_MODE (op0));
2101           offset = 0;
2102         }
2103       else
2104         word = op0;
2105
2106       /* Extract the parts in bit-counting order,
2107          whose meaning is determined by BYTES_PER_UNIT.
2108          OFFSET is in UNITs, and UNIT is in bits.
2109          extract_fixed_bit_field wants offset in bytes.  */
2110       part = extract_fixed_bit_field (word_mode, word,
2111                                       offset * unit / BITS_PER_UNIT,
2112                                       thissize, thispos, 0, 1);
2113       bitsdone += thissize;
2114
2115       /* Shift this part into place for the result.  */
2116       if (BYTES_BIG_ENDIAN)
2117         {
2118           if (bitsize != bitsdone)
2119             part = expand_shift (LSHIFT_EXPR, word_mode, part,
2120                                  build_int_cst (NULL_TREE, bitsize - bitsdone),
2121                                  0, 1);
2122         }
2123       else
2124         {
2125           if (bitsdone != thissize)
2126             part = expand_shift (LSHIFT_EXPR, word_mode, part,
2127                                  build_int_cst (NULL_TREE,
2128                                                 bitsdone - thissize), 0, 1);
2129         }
2130
2131       if (first)
2132         result = part;
2133       else
2134         /* Combine the parts with bitwise or.  This works
2135            because we extracted each part as an unsigned bit field.  */
2136         result = expand_binop (word_mode, ior_optab, part, result, NULL_RTX, 1,
2137                                OPTAB_LIB_WIDEN);
2138
2139       first = 0;
2140     }
2141
2142   /* Unsigned bit field: we are done.  */
2143   if (unsignedp)
2144     return result;
2145   /* Signed bit field: sign-extend with two arithmetic shifts.  */
2146   result = expand_shift (LSHIFT_EXPR, word_mode, result,
2147                          build_int_cst (NULL_TREE, BITS_PER_WORD - bitsize),
2148                          NULL_RTX, 0);
2149   return expand_shift (RSHIFT_EXPR, word_mode, result,
2150                        build_int_cst (NULL_TREE, BITS_PER_WORD - bitsize),
2151                        NULL_RTX, 0);
2152 }
2153 \f
2154 /* Add INC into TARGET.  */
2155
2156 void
2157 expand_inc (rtx target, rtx inc)
2158 {
2159   rtx value = expand_binop (GET_MODE (target), add_optab,
2160                             target, inc,
2161                             target, 0, OPTAB_LIB_WIDEN);
2162   if (value != target)
2163     emit_move_insn (target, value);
2164 }
2165
2166 /* Subtract DEC from TARGET.  */
2167
2168 void
2169 expand_dec (rtx target, rtx dec)
2170 {
2171   rtx value = expand_binop (GET_MODE (target), sub_optab,
2172                             target, dec,
2173                             target, 0, OPTAB_LIB_WIDEN);
2174   if (value != target)
2175     emit_move_insn (target, value);
2176 }
2177 \f
2178 /* Output a shift instruction for expression code CODE,
2179    with SHIFTED being the rtx for the value to shift,
2180    and AMOUNT the tree for the amount to shift by.
2181    Store the result in the rtx TARGET, if that is convenient.
2182    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2183    Return the rtx for where the value is.  */
2184
2185 rtx
2186 expand_shift (enum tree_code code, enum machine_mode mode, rtx shifted,
2187               tree amount, rtx target, int unsignedp)
2188 {
2189   rtx op1, temp = 0;
2190   int left = (code == LSHIFT_EXPR || code == LROTATE_EXPR);
2191   int rotate = (code == LROTATE_EXPR || code == RROTATE_EXPR);
2192   int try;
2193
2194   /* Previously detected shift-counts computed by NEGATE_EXPR
2195      and shifted in the other direction; but that does not work
2196      on all machines.  */
2197
2198   op1 = expand_normal (amount);
2199
2200   if (SHIFT_COUNT_TRUNCATED)
2201     {
2202       if (GET_CODE (op1) == CONST_INT
2203           && ((unsigned HOST_WIDE_INT) INTVAL (op1) >=
2204               (unsigned HOST_WIDE_INT) GET_MODE_BITSIZE (mode)))
2205         op1 = GEN_INT ((unsigned HOST_WIDE_INT) INTVAL (op1)
2206                        % GET_MODE_BITSIZE (mode));
2207       else if (GET_CODE (op1) == SUBREG
2208                && subreg_lowpart_p (op1))
2209         op1 = SUBREG_REG (op1);
2210     }
2211
2212   if (op1 == const0_rtx)
2213     return shifted;
2214
2215   /* Check whether its cheaper to implement a left shift by a constant
2216      bit count by a sequence of additions.  */
2217   if (code == LSHIFT_EXPR
2218       && GET_CODE (op1) == CONST_INT
2219       && INTVAL (op1) > 0
2220       && INTVAL (op1) < GET_MODE_BITSIZE (mode)
2221       && shift_cost[mode][INTVAL (op1)] > INTVAL (op1) * add_cost[mode])
2222     {
2223       int i;
2224       for (i = 0; i < INTVAL (op1); i++)
2225         {
2226           temp = force_reg (mode, shifted);
2227           shifted = expand_binop (mode, add_optab, temp, temp, NULL_RTX,
2228                                   unsignedp, OPTAB_LIB_WIDEN);
2229         }
2230       return shifted;
2231     }
2232
2233   for (try = 0; temp == 0 && try < 3; try++)
2234     {
2235       enum optab_methods methods;
2236
2237       if (try == 0)
2238         methods = OPTAB_DIRECT;
2239       else if (try == 1)
2240         methods = OPTAB_WIDEN;
2241       else
2242         methods = OPTAB_LIB_WIDEN;
2243
2244       if (rotate)
2245         {
2246           /* Widening does not work for rotation.  */
2247           if (methods == OPTAB_WIDEN)
2248             continue;
2249           else if (methods == OPTAB_LIB_WIDEN)
2250             {
2251               /* If we have been unable to open-code this by a rotation,
2252                  do it as the IOR of two shifts.  I.e., to rotate A
2253                  by N bits, compute (A << N) | ((unsigned) A >> (C - N))
2254                  where C is the bitsize of A.
2255
2256                  It is theoretically possible that the target machine might
2257                  not be able to perform either shift and hence we would
2258                  be making two libcalls rather than just the one for the
2259                  shift (similarly if IOR could not be done).  We will allow
2260                  this extremely unlikely lossage to avoid complicating the
2261                  code below.  */
2262
2263               rtx subtarget = target == shifted ? 0 : target;
2264               rtx temp1;
2265               tree type = TREE_TYPE (amount);
2266               tree new_amount = make_tree (type, op1);
2267               tree other_amount
2268                 = fold_build2 (MINUS_EXPR, type,
2269                                build_int_cst (type, GET_MODE_BITSIZE (mode)),
2270                                amount);
2271
2272               shifted = force_reg (mode, shifted);
2273
2274               temp = expand_shift (left ? LSHIFT_EXPR : RSHIFT_EXPR,
2275                                    mode, shifted, new_amount, 0, 1);
2276               temp1 = expand_shift (left ? RSHIFT_EXPR : LSHIFT_EXPR,
2277                                     mode, shifted, other_amount, subtarget, 1);
2278               return expand_binop (mode, ior_optab, temp, temp1, target,
2279                                    unsignedp, methods);
2280             }
2281
2282           temp = expand_binop (mode,
2283                                left ? rotl_optab : rotr_optab,
2284                                shifted, op1, target, unsignedp, methods);
2285         }
2286       else if (unsignedp)
2287         temp = expand_binop (mode,
2288                              left ? ashl_optab : lshr_optab,
2289                              shifted, op1, target, unsignedp, methods);
2290
2291       /* Do arithmetic shifts.
2292          Also, if we are going to widen the operand, we can just as well
2293          use an arithmetic right-shift instead of a logical one.  */
2294       if (temp == 0 && ! rotate
2295           && (! unsignedp || (! left && methods == OPTAB_WIDEN)))
2296         {
2297           enum optab_methods methods1 = methods;
2298
2299           /* If trying to widen a log shift to an arithmetic shift,
2300              don't accept an arithmetic shift of the same size.  */
2301           if (unsignedp)
2302             methods1 = OPTAB_MUST_WIDEN;
2303
2304           /* Arithmetic shift */
2305
2306           temp = expand_binop (mode,
2307                                left ? ashl_optab : ashr_optab,
2308                                shifted, op1, target, unsignedp, methods1);
2309         }
2310
2311       /* We used to try extzv here for logical right shifts, but that was
2312          only useful for one machine, the VAX, and caused poor code
2313          generation there for lshrdi3, so the code was deleted and a
2314          define_expand for lshrsi3 was added to vax.md.  */
2315     }
2316
2317   gcc_assert (temp);
2318   return temp;
2319 }
2320 \f
2321 enum alg_code {
2322   alg_unknown,
2323   alg_zero,
2324   alg_m, alg_shift,
2325   alg_add_t_m2,
2326   alg_sub_t_m2,
2327   alg_add_factor,
2328   alg_sub_factor,
2329   alg_add_t2_m,
2330   alg_sub_t2_m,
2331   alg_impossible
2332 };
2333
2334 /* This structure holds the "cost" of a multiply sequence.  The
2335    "cost" field holds the total rtx_cost of every operator in the
2336    synthetic multiplication sequence, hence cost(a op b) is defined
2337    as rtx_cost(op) + cost(a) + cost(b), where cost(leaf) is zero.
2338    The "latency" field holds the minimum possible latency of the
2339    synthetic multiply, on a hypothetical infinitely parallel CPU.
2340    This is the critical path, or the maximum height, of the expression
2341    tree which is the sum of rtx_costs on the most expensive path from
2342    any leaf to the root.  Hence latency(a op b) is defined as zero for
2343    leaves and rtx_cost(op) + max(latency(a), latency(b)) otherwise.  */
2344
2345 struct mult_cost {
2346   short cost;     /* Total rtx_cost of the multiplication sequence.  */
2347   short latency;  /* The latency of the multiplication sequence.  */
2348 };
2349
2350 /* This macro is used to compare a pointer to a mult_cost against an
2351    single integer "rtx_cost" value.  This is equivalent to the macro
2352    CHEAPER_MULT_COST(X,Z) where Z = {Y,Y}.  */
2353 #define MULT_COST_LESS(X,Y) ((X)->cost < (Y)    \
2354                              || ((X)->cost == (Y) && (X)->latency < (Y)))
2355
2356 /* This macro is used to compare two pointers to mult_costs against
2357    each other.  The macro returns true if X is cheaper than Y.
2358    Currently, the cheaper of two mult_costs is the one with the
2359    lower "cost".  If "cost"s are tied, the lower latency is cheaper.  */
2360 #define CHEAPER_MULT_COST(X,Y)  ((X)->cost < (Y)->cost          \
2361                                  || ((X)->cost == (Y)->cost     \
2362                                      && (X)->latency < (Y)->latency))
2363
2364 /* This structure records a sequence of operations.
2365    `ops' is the number of operations recorded.
2366    `cost' is their total cost.
2367    The operations are stored in `op' and the corresponding
2368    logarithms of the integer coefficients in `log'.
2369
2370    These are the operations:
2371    alg_zero             total := 0;
2372    alg_m                total := multiplicand;
2373    alg_shift            total := total * coeff
2374    alg_add_t_m2         total := total + multiplicand * coeff;
2375    alg_sub_t_m2         total := total - multiplicand * coeff;
2376    alg_add_factor       total := total * coeff + total;
2377    alg_sub_factor       total := total * coeff - total;
2378    alg_add_t2_m         total := total * coeff + multiplicand;
2379    alg_sub_t2_m         total := total * coeff - multiplicand;
2380
2381    The first operand must be either alg_zero or alg_m.  */
2382
2383 struct algorithm
2384 {
2385   struct mult_cost cost;
2386   short ops;
2387   /* The size of the OP and LOG fields are not directly related to the
2388      word size, but the worst-case algorithms will be if we have few
2389      consecutive ones or zeros, i.e., a multiplicand like 10101010101...
2390      In that case we will generate shift-by-2, add, shift-by-2, add,...,
2391      in total wordsize operations.  */
2392   enum alg_code op[MAX_BITS_PER_WORD];
2393   char log[MAX_BITS_PER_WORD];
2394 };
2395
2396 /* The entry for our multiplication cache/hash table.  */
2397 struct alg_hash_entry {
2398   /* The number we are multiplying by.  */
2399   unsigned int t;
2400
2401   /* The mode in which we are multiplying something by T.  */
2402   enum machine_mode mode;
2403
2404   /* The best multiplication algorithm for t.  */
2405   enum alg_code alg;
2406
2407   /* The cost of multiplication if ALG_CODE is not alg_impossible.
2408      Otherwise, the cost within which multiplication by T is
2409      impossible.  */
2410   struct mult_cost cost;
2411 };
2412
2413 /* The number of cache/hash entries.  */
2414 #define NUM_ALG_HASH_ENTRIES 307
2415
2416 /* Each entry of ALG_HASH caches alg_code for some integer.  This is
2417    actually a hash table.  If we have a collision, that the older
2418    entry is kicked out.  */
2419 static struct alg_hash_entry alg_hash[NUM_ALG_HASH_ENTRIES];
2420
2421 /* Indicates the type of fixup needed after a constant multiplication.
2422    BASIC_VARIANT means no fixup is needed, NEGATE_VARIANT means that
2423    the result should be negated, and ADD_VARIANT means that the
2424    multiplicand should be added to the result.  */
2425 enum mult_variant {basic_variant, negate_variant, add_variant};
2426
2427 static void synth_mult (struct algorithm *, unsigned HOST_WIDE_INT,
2428                         const struct mult_cost *, enum machine_mode mode);
2429 static bool choose_mult_variant (enum machine_mode, HOST_WIDE_INT,
2430                                  struct algorithm *, enum mult_variant *, int);
2431 static rtx expand_mult_const (enum machine_mode, rtx, HOST_WIDE_INT, rtx,
2432                               const struct algorithm *, enum mult_variant);
2433 static unsigned HOST_WIDE_INT choose_multiplier (unsigned HOST_WIDE_INT, int,
2434                                                  int, rtx *, int *, int *);
2435 static unsigned HOST_WIDE_INT invert_mod2n (unsigned HOST_WIDE_INT, int);
2436 static rtx extract_high_half (enum machine_mode, rtx);
2437 static rtx expand_mult_highpart (enum machine_mode, rtx, rtx, rtx, int, int);
2438 static rtx expand_mult_highpart_optab (enum machine_mode, rtx, rtx, rtx,
2439                                        int, int);
2440 /* Compute and return the best algorithm for multiplying by T.
2441    The algorithm must cost less than cost_limit
2442    If retval.cost >= COST_LIMIT, no algorithm was found and all
2443    other field of the returned struct are undefined.
2444    MODE is the machine mode of the multiplication.  */
2445
2446 static void
2447 synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
2448             const struct mult_cost *cost_limit, enum machine_mode mode)
2449 {
2450   int m;
2451   struct algorithm *alg_in, *best_alg;
2452   struct mult_cost best_cost;
2453   struct mult_cost new_limit;
2454   int op_cost, op_latency;
2455   unsigned HOST_WIDE_INT q;
2456   int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode));
2457   int hash_index;
2458   bool cache_hit = false;
2459   enum alg_code cache_alg = alg_zero;
2460
2461   /* Indicate that no algorithm is yet found.  If no algorithm
2462      is found, this value will be returned and indicate failure.  */
2463   alg_out->cost.cost = cost_limit->cost + 1;
2464   alg_out->cost.latency = cost_limit->latency + 1;
2465
2466   if (cost_limit->cost < 0
2467       || (cost_limit->cost == 0 && cost_limit->latency <= 0))
2468     return;
2469
2470   /* Restrict the bits of "t" to the multiplication's mode.  */
2471   t &= GET_MODE_MASK (mode);
2472
2473   /* t == 1 can be done in zero cost.  */
2474   if (t == 1)
2475     {
2476       alg_out->ops = 1;
2477       alg_out->cost.cost = 0;
2478       alg_out->cost.latency = 0;
2479       alg_out->op[0] = alg_m;
2480       return;
2481     }
2482
2483   /* t == 0 sometimes has a cost.  If it does and it exceeds our limit,
2484      fail now.  */
2485   if (t == 0)
2486     {
2487       if (MULT_COST_LESS (cost_limit, zero_cost))
2488         return;
2489       else
2490         {
2491           alg_out->ops = 1;
2492           alg_out->cost.cost = zero_cost;
2493           alg_out->cost.latency = zero_cost;
2494           alg_out->op[0] = alg_zero;
2495           return;
2496         }
2497     }
2498
2499   /* We'll be needing a couple extra algorithm structures now.  */
2500
2501   alg_in = alloca (sizeof (struct algorithm));
2502   best_alg = alloca (sizeof (struct algorithm));
2503   best_cost = *cost_limit;
2504
2505   /* Compute the hash index.  */
2506   hash_index = (t ^ (unsigned int) mode) % NUM_ALG_HASH_ENTRIES;
2507
2508   /* See if we already know what to do for T.  */
2509   if (alg_hash[hash_index].t == t
2510       && alg_hash[hash_index].mode == mode
2511       && alg_hash[hash_index].alg != alg_unknown)
2512     {
2513       cache_alg = alg_hash[hash_index].alg;
2514
2515       if (cache_alg == alg_impossible)
2516         {
2517           /* The cache tells us that it's impossible to synthesize
2518              multiplication by T within alg_hash[hash_index].cost.  */
2519           if (!CHEAPER_MULT_COST (&alg_hash[hash_index].cost, cost_limit))
2520             /* COST_LIMIT is at least as restrictive as the one
2521                recorded in the hash table, in which case we have no
2522                hope of synthesizing a multiplication.  Just
2523                return.  */
2524             return;
2525
2526           /* If we get here, COST_LIMIT is less restrictive than the
2527              one recorded in the hash table, so we may be able to
2528              synthesize a multiplication.  Proceed as if we didn't
2529              have the cache entry.  */
2530         }
2531       else
2532         {
2533           if (CHEAPER_MULT_COST (cost_limit, &alg_hash[hash_index].cost))
2534             /* The cached algorithm shows that this multiplication
2535                requires more cost than COST_LIMIT.  Just return.  This
2536                way, we don't clobber this cache entry with
2537                alg_impossible but retain useful information.  */
2538             return;
2539
2540           cache_hit = true;
2541
2542           switch (cache_alg)
2543             {
2544             case alg_shift:
2545               goto do_alg_shift;
2546
2547             case alg_add_t_m2:
2548             case alg_sub_t_m2:
2549               goto do_alg_addsub_t_m2;
2550
2551             case alg_add_factor:
2552             case alg_sub_factor:
2553               goto do_alg_addsub_factor;
2554
2555             case alg_add_t2_m:
2556               goto do_alg_add_t2_m;
2557
2558             case alg_sub_t2_m:
2559               goto do_alg_sub_t2_m;
2560
2561             default:
2562               gcc_unreachable ();
2563             }
2564         }
2565     }
2566
2567   /* If we have a group of zero bits at the low-order part of T, try
2568      multiplying by the remaining bits and then doing a shift.  */
2569
2570   if ((t & 1) == 0)
2571     {
2572     do_alg_shift:
2573       m = floor_log2 (t & -t);  /* m = number of low zero bits */
2574       if (m < maxm)
2575         {
2576           q = t >> m;
2577           /* The function expand_shift will choose between a shift and
2578              a sequence of additions, so the observed cost is given as
2579              MIN (m * add_cost[mode], shift_cost[mode][m]).  */
2580           op_cost = m * add_cost[mode];
2581           if (shift_cost[mode][m] < op_cost)
2582             op_cost = shift_cost[mode][m];
2583           new_limit.cost = best_cost.cost - op_cost;
2584           new_limit.latency = best_cost.latency - op_cost;
2585           synth_mult (alg_in, q, &new_limit, mode);
2586
2587           alg_in->cost.cost += op_cost;
2588           alg_in->cost.latency += op_cost;
2589           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2590             {
2591               struct algorithm *x;
2592               best_cost = alg_in->cost;
2593               x = alg_in, alg_in = best_alg, best_alg = x;
2594               best_alg->log[best_alg->ops] = m;
2595               best_alg->op[best_alg->ops] = alg_shift;
2596             }
2597         }
2598       if (cache_hit)
2599         goto done;
2600     }
2601
2602   /* If we have an odd number, add or subtract one.  */
2603   if ((t & 1) != 0)
2604     {
2605       unsigned HOST_WIDE_INT w;
2606
2607     do_alg_addsub_t_m2:
2608       for (w = 1; (w & t) != 0; w <<= 1)
2609         ;
2610       /* If T was -1, then W will be zero after the loop.  This is another
2611          case where T ends with ...111.  Handling this with (T + 1) and
2612          subtract 1 produces slightly better code and results in algorithm
2613          selection much faster than treating it like the ...0111 case
2614          below.  */
2615       if (w == 0
2616           || (w > 2
2617               /* Reject the case where t is 3.
2618                  Thus we prefer addition in that case.  */
2619               && t != 3))
2620         {
2621           /* T ends with ...111.  Multiply by (T + 1) and subtract 1.  */
2622
2623           op_cost = add_cost[mode];
2624           new_limit.cost = best_cost.cost - op_cost;
2625           new_limit.latency = best_cost.latency - op_cost;
2626           synth_mult (alg_in, t + 1, &new_limit, mode);
2627
2628           alg_in->cost.cost += op_cost;
2629           alg_in->cost.latency += op_cost;
2630           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2631             {
2632               struct algorithm *x;
2633               best_cost = alg_in->cost;
2634               x = alg_in, alg_in = best_alg, best_alg = x;
2635               best_alg->log[best_alg->ops] = 0;
2636               best_alg->op[best_alg->ops] = alg_sub_t_m2;
2637             }
2638         }
2639       else
2640         {
2641           /* T ends with ...01 or ...011.  Multiply by (T - 1) and add 1.  */
2642
2643           op_cost = add_cost[mode];
2644           new_limit.cost = best_cost.cost - op_cost;
2645           new_limit.latency = best_cost.latency - op_cost;
2646           synth_mult (alg_in, t - 1, &new_limit, mode);
2647
2648           alg_in->cost.cost += op_cost;
2649           alg_in->cost.latency += op_cost;
2650           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2651             {
2652               struct algorithm *x;
2653               best_cost = alg_in->cost;
2654               x = alg_in, alg_in = best_alg, best_alg = x;
2655               best_alg->log[best_alg->ops] = 0;
2656               best_alg->op[best_alg->ops] = alg_add_t_m2;
2657             }
2658         }
2659       if (cache_hit)
2660         goto done;
2661     }
2662
2663   /* Look for factors of t of the form
2664      t = q(2**m +- 1), 2 <= m <= floor(log2(t - 1)).
2665      If we find such a factor, we can multiply by t using an algorithm that
2666      multiplies by q, shift the result by m and add/subtract it to itself.
2667
2668      We search for large factors first and loop down, even if large factors
2669      are less probable than small; if we find a large factor we will find a
2670      good sequence quickly, and therefore be able to prune (by decreasing
2671      COST_LIMIT) the search.  */
2672
2673  do_alg_addsub_factor:
2674   for (m = floor_log2 (t - 1); m >= 2; m--)
2675     {
2676       unsigned HOST_WIDE_INT d;
2677
2678       d = ((unsigned HOST_WIDE_INT) 1 << m) + 1;
2679       if (t % d == 0 && t > d && m < maxm
2680           && (!cache_hit || cache_alg == alg_add_factor))
2681         {
2682           /* If the target has a cheap shift-and-add instruction use
2683              that in preference to a shift insn followed by an add insn.
2684              Assume that the shift-and-add is "atomic" with a latency
2685              equal to its cost, otherwise assume that on superscalar
2686              hardware the shift may be executed concurrently with the
2687              earlier steps in the algorithm.  */
2688           op_cost = add_cost[mode] + shift_cost[mode][m];
2689           if (shiftadd_cost[mode][m] < op_cost)
2690             {
2691               op_cost = shiftadd_cost[mode][m];
2692               op_latency = op_cost;
2693             }
2694           else
2695             op_latency = add_cost[mode];
2696
2697           new_limit.cost = best_cost.cost - op_cost;
2698           new_limit.latency = best_cost.latency - op_latency;
2699           synth_mult (alg_in, t / d, &new_limit, mode);
2700
2701           alg_in->cost.cost += op_cost;
2702           alg_in->cost.latency += op_latency;
2703           if (alg_in->cost.latency < op_cost)
2704             alg_in->cost.latency = op_cost;
2705           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2706             {
2707               struct algorithm *x;
2708               best_cost = alg_in->cost;
2709               x = alg_in, alg_in = best_alg, best_alg = x;
2710               best_alg->log[best_alg->ops] = m;
2711               best_alg->op[best_alg->ops] = alg_add_factor;
2712             }
2713           /* Other factors will have been taken care of in the recursion.  */
2714           break;
2715         }
2716
2717       d = ((unsigned HOST_WIDE_INT) 1 << m) - 1;
2718       if (t % d == 0 && t > d && m < maxm
2719           && (!cache_hit || cache_alg == alg_sub_factor))
2720         {
2721           /* If the target has a cheap shift-and-subtract insn use
2722              that in preference to a shift insn followed by a sub insn.
2723              Assume that the shift-and-sub is "atomic" with a latency
2724              equal to it's cost, otherwise assume that on superscalar
2725              hardware the shift may be executed concurrently with the
2726              earlier steps in the algorithm.  */
2727           op_cost = add_cost[mode] + shift_cost[mode][m];
2728           if (shiftsub_cost[mode][m] < op_cost)
2729             {
2730               op_cost = shiftsub_cost[mode][m];
2731               op_latency = op_cost;
2732             }
2733           else
2734             op_latency = add_cost[mode];
2735
2736           new_limit.cost = best_cost.cost - op_cost;
2737           new_limit.latency = best_cost.latency - op_latency;
2738           synth_mult (alg_in, t / d, &new_limit, mode);
2739
2740           alg_in->cost.cost += op_cost;
2741           alg_in->cost.latency += op_latency;
2742           if (alg_in->cost.latency < op_cost)
2743             alg_in->cost.latency = op_cost;
2744           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2745             {
2746               struct algorithm *x;
2747               best_cost = alg_in->cost;
2748               x = alg_in, alg_in = best_alg, best_alg = x;
2749               best_alg->log[best_alg->ops] = m;
2750               best_alg->op[best_alg->ops] = alg_sub_factor;
2751             }
2752           break;
2753         }
2754     }
2755   if (cache_hit)
2756     goto done;
2757
2758   /* Try shift-and-add (load effective address) instructions,
2759      i.e. do a*3, a*5, a*9.  */
2760   if ((t & 1) != 0)
2761     {
2762     do_alg_add_t2_m:
2763       q = t - 1;
2764       q = q & -q;
2765       m = exact_log2 (q);
2766       if (m >= 0 && m < maxm)
2767         {
2768           op_cost = shiftadd_cost[mode][m];
2769           new_limit.cost = best_cost.cost - op_cost;
2770           new_limit.latency = best_cost.latency - op_cost;
2771           synth_mult (alg_in, (t - 1) >> m, &new_limit, mode);
2772
2773           alg_in->cost.cost += op_cost;
2774           alg_in->cost.latency += op_cost;
2775           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2776             {
2777               struct algorithm *x;
2778               best_cost = alg_in->cost;
2779               x = alg_in, alg_in = best_alg, best_alg = x;
2780               best_alg->log[best_alg->ops] = m;
2781               best_alg->op[best_alg->ops] = alg_add_t2_m;
2782             }
2783         }
2784       if (cache_hit)
2785         goto done;
2786
2787     do_alg_sub_t2_m:
2788       q = t + 1;
2789       q = q & -q;
2790       m = exact_log2 (q);
2791       if (m >= 0 && m < maxm)
2792         {
2793           op_cost = shiftsub_cost[mode][m];
2794           new_limit.cost = best_cost.cost - op_cost;
2795           new_limit.latency = best_cost.latency - op_cost;
2796           synth_mult (alg_in, (t + 1) >> m, &new_limit, mode);
2797
2798           alg_in->cost.cost += op_cost;
2799           alg_in->cost.latency += op_cost;
2800           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2801             {
2802               struct algorithm *x;
2803               best_cost = alg_in->cost;
2804               x = alg_in, alg_in = best_alg, best_alg = x;
2805               best_alg->log[best_alg->ops] = m;
2806               best_alg->op[best_alg->ops] = alg_sub_t2_m;
2807             }
2808         }
2809       if (cache_hit)
2810         goto done;
2811     }
2812
2813  done:
2814   /* If best_cost has not decreased, we have not found any algorithm.  */
2815   if (!CHEAPER_MULT_COST (&best_cost, cost_limit))
2816     {
2817       /* We failed to find an algorithm.  Record alg_impossible for
2818          this case (that is, <T, MODE, COST_LIMIT>) so that next time
2819          we are asked to find an algorithm for T within the same or
2820          lower COST_LIMIT, we can immediately return to the
2821          caller.  */
2822       alg_hash[hash_index].t = t;
2823       alg_hash[hash_index].mode = mode;
2824       alg_hash[hash_index].alg = alg_impossible;
2825       alg_hash[hash_index].cost = *cost_limit;
2826       return;
2827     }
2828
2829   /* Cache the result.  */
2830   if (!cache_hit)
2831     {
2832       alg_hash[hash_index].t = t;
2833       alg_hash[hash_index].mode = mode;
2834       alg_hash[hash_index].alg = best_alg->op[best_alg->ops];
2835       alg_hash[hash_index].cost.cost = best_cost.cost;
2836       alg_hash[hash_index].cost.latency = best_cost.latency;
2837     }
2838
2839   /* If we are getting a too long sequence for `struct algorithm'
2840      to record, make this search fail.  */
2841   if (best_alg->ops == MAX_BITS_PER_WORD)
2842     return;
2843
2844   /* Copy the algorithm from temporary space to the space at alg_out.
2845      We avoid using structure assignment because the majority of
2846      best_alg is normally undefined, and this is a critical function.  */
2847   alg_out->ops = best_alg->ops + 1;
2848   alg_out->cost = best_cost;
2849   memcpy (alg_out->op, best_alg->op,
2850           alg_out->ops * sizeof *alg_out->op);
2851   memcpy (alg_out->log, best_alg->log,
2852           alg_out->ops * sizeof *alg_out->log);
2853 }
2854 \f
2855 /* Find the cheapest way of multiplying a value of mode MODE by VAL.
2856    Try three variations:
2857
2858        - a shift/add sequence based on VAL itself
2859        - a shift/add sequence based on -VAL, followed by a negation
2860        - a shift/add sequence based on VAL - 1, followed by an addition.
2861
2862    Return true if the cheapest of these cost less than MULT_COST,
2863    describing the algorithm in *ALG and final fixup in *VARIANT.  */
2864
2865 static bool
2866 choose_mult_variant (enum machine_mode mode, HOST_WIDE_INT val,
2867                      struct algorithm *alg, enum mult_variant *variant,
2868                      int mult_cost)
2869 {
2870   struct algorithm alg2;
2871   struct mult_cost limit;
2872   int op_cost;
2873
2874   /* Fail quickly for impossible bounds.  */
2875   if (mult_cost < 0)
2876     return false;
2877
2878   /* Ensure that mult_cost provides a reasonable upper bound.
2879      Any constant multiplication can be performed with less
2880      than 2 * bits additions.  */
2881   op_cost = 2 * GET_MODE_BITSIZE (mode) * add_cost[mode];
2882   if (mult_cost > op_cost)
2883     mult_cost = op_cost;
2884
2885   *variant = basic_variant;
2886   limit.cost = mult_cost;
2887   limit.latency = mult_cost;
2888   synth_mult (alg, val, &limit, mode);
2889
2890   /* This works only if the inverted value actually fits in an
2891      `unsigned int' */
2892   if (HOST_BITS_PER_INT >= GET_MODE_BITSIZE (mode))
2893     {
2894       op_cost = neg_cost[mode];
2895       if (MULT_COST_LESS (&alg->cost, mult_cost))
2896         {
2897           limit.cost = alg->cost.cost - op_cost;
2898           limit.latency = alg->cost.latency - op_cost;
2899         }
2900       else
2901         {
2902           limit.cost = mult_cost - op_cost;
2903           limit.latency = mult_cost - op_cost;
2904         }
2905
2906       synth_mult (&alg2, -val, &limit, mode);
2907       alg2.cost.cost += op_cost;
2908       alg2.cost.latency += op_cost;
2909       if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
2910         *alg = alg2, *variant = negate_variant;
2911     }
2912
2913   /* This proves very useful for division-by-constant.  */
2914   op_cost = add_cost[mode];
2915   if (MULT_COST_LESS (&alg->cost, mult_cost))
2916     {
2917       limit.cost = alg->cost.cost - op_cost;
2918       limit.latency = alg->cost.latency - op_cost;
2919     }
2920   else
2921     {
2922       limit.cost = mult_cost - op_cost;
2923       limit.latency = mult_cost - op_cost;
2924     }
2925
2926   synth_mult (&alg2, val - 1, &limit, mode);
2927   alg2.cost.cost += op_cost;
2928   alg2.cost.latency += op_cost;
2929   if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
2930     *alg = alg2, *variant = add_variant;
2931
2932   return MULT_COST_LESS (&alg->cost, mult_cost);
2933 }
2934
2935 /* A subroutine of expand_mult, used for constant multiplications.
2936    Multiply OP0 by VAL in mode MODE, storing the result in TARGET if
2937    convenient.  Use the shift/add sequence described by ALG and apply
2938    the final fixup specified by VARIANT.  */
2939
2940 static rtx
2941 expand_mult_const (enum machine_mode mode, rtx op0, HOST_WIDE_INT val,
2942                    rtx target, const struct algorithm *alg,
2943                    enum mult_variant variant)
2944 {
2945   HOST_WIDE_INT val_so_far;
2946   rtx insn, accum, tem;
2947   int opno;
2948   enum machine_mode nmode;
2949
2950   /* Avoid referencing memory over and over.
2951      For speed, but also for correctness when mem is volatile.  */
2952   if (MEM_P (op0))
2953     op0 = force_reg (mode, op0);
2954
2955   /* ACCUM starts out either as OP0 or as a zero, depending on
2956      the first operation.  */
2957
2958   if (alg->op[0] == alg_zero)
2959     {
2960       accum = copy_to_mode_reg (mode, const0_rtx);
2961       val_so_far = 0;
2962     }
2963   else if (alg->op[0] == alg_m)
2964     {
2965       accum = copy_to_mode_reg (mode, op0);
2966       val_so_far = 1;
2967     }
2968   else
2969     gcc_unreachable ();
2970
2971   for (opno = 1; opno < alg->ops; opno++)
2972     {
2973       int log = alg->log[opno];
2974       rtx shift_subtarget = optimize ? 0 : accum;
2975       rtx add_target
2976         = (opno == alg->ops - 1 && target != 0 && variant != add_variant
2977            && !optimize)
2978           ? target : 0;
2979       rtx accum_target = optimize ? 0 : accum;
2980
2981       switch (alg->op[opno])
2982         {
2983         case alg_shift:
2984           accum = expand_shift (LSHIFT_EXPR, mode, accum,
2985                                 build_int_cst (NULL_TREE, log),
2986                                 NULL_RTX, 0);
2987           val_so_far <<= log;
2988           break;
2989
2990         case alg_add_t_m2:
2991           tem = expand_shift (LSHIFT_EXPR, mode, op0,
2992                               build_int_cst (NULL_TREE, log),
2993                               NULL_RTX, 0);
2994           accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
2995                                  add_target ? add_target : accum_target);
2996           val_so_far += (HOST_WIDE_INT) 1 << log;
2997           break;
2998
2999         case alg_sub_t_m2:
3000           tem = expand_shift (LSHIFT_EXPR, mode, op0,
3001                               build_int_cst (NULL_TREE, log),
3002                               NULL_RTX, 0);
3003           accum = force_operand (gen_rtx_MINUS (mode, accum, tem),
3004                                  add_target ? add_target : accum_target);
3005           val_so_far -= (HOST_WIDE_INT) 1 << log;
3006           break;
3007
3008         case alg_add_t2_m:
3009           accum = expand_shift (LSHIFT_EXPR, mode, accum,
3010                                 build_int_cst (NULL_TREE, log),
3011                                 shift_subtarget,
3012                                 0);
3013           accum = force_operand (gen_rtx_PLUS (mode, accum, op0),
3014                                  add_target ? add_target : accum_target);
3015           val_so_far = (val_so_far << log) + 1;
3016           break;
3017
3018         case alg_sub_t2_m:
3019           accum = expand_shift (LSHIFT_EXPR, mode, accum,
3020                                 build_int_cst (NULL_TREE, log),
3021                                 shift_subtarget, 0);
3022           accum = force_operand (gen_rtx_MINUS (mode, accum, op0),
3023                                  add_target ? add_target : accum_target);
3024           val_so_far = (val_so_far << log) - 1;
3025           break;
3026
3027         case alg_add_factor:
3028           tem = expand_shift (LSHIFT_EXPR, mode, accum,
3029                               build_int_cst (NULL_TREE, log),
3030                               NULL_RTX, 0);
3031           accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
3032                                  add_target ? add_target : accum_target);
3033           val_so_far += val_so_far << log;
3034           break;
3035
3036         case alg_sub_factor:
3037           tem = expand_shift (LSHIFT_EXPR, mode, accum,
3038                               build_int_cst (NULL_TREE, log),
3039                               NULL_RTX, 0);
3040           accum = force_operand (gen_rtx_MINUS (mode, tem, accum),
3041                                  (add_target
3042                                   ? add_target : (optimize ? 0 : tem)));
3043           val_so_far = (val_so_far << log) - val_so_far;
3044           break;
3045
3046         default:
3047           gcc_unreachable ();
3048         }
3049
3050       /* Write a REG_EQUAL note on the last insn so that we can cse
3051          multiplication sequences.  Note that if ACCUM is a SUBREG,
3052          we've set the inner register and must properly indicate
3053          that.  */
3054
3055       tem = op0, nmode = mode;
3056       if (GET_CODE (accum) == SUBREG)
3057         {
3058           nmode = GET_MODE (SUBREG_REG (accum));
3059           tem = gen_lowpart (nmode, op0);
3060         }
3061
3062       insn = get_last_insn ();
3063       set_unique_reg_note (insn, REG_EQUAL,
3064                            gen_rtx_MULT (nmode, tem, GEN_INT (val_so_far)));
3065     }
3066
3067   if (variant == negate_variant)
3068     {
3069       val_so_far = -val_so_far;
3070       accum = expand_unop (mode, neg_optab, accum, target, 0);
3071     }
3072   else if (variant == add_variant)
3073     {
3074       val_so_far = val_so_far + 1;
3075       accum = force_operand (gen_rtx_PLUS (mode, accum, op0), target);
3076     }
3077
3078   /* Compare only the bits of val and val_so_far that are significant
3079      in the result mode, to avoid sign-/zero-extension confusion.  */
3080   val &= GET_MODE_MASK (mode);
3081   val_so_far &= GET_MODE_MASK (mode);
3082   gcc_assert (val == val_so_far);
3083
3084   return accum;
3085 }
3086
3087 /* Perform a multiplication and return an rtx for the result.
3088    MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3089    TARGET is a suggestion for where to store the result (an rtx).
3090
3091    We check specially for a constant integer as OP1.
3092    If you want this check for OP0 as well, then before calling
3093    you should swap the two operands if OP0 would be constant.  */
3094
3095 rtx
3096 expand_mult (enum machine_mode mode, rtx op0, rtx op1, rtx target,
3097              int unsignedp)
3098 {
3099   enum mult_variant variant;
3100   struct algorithm algorithm;
3101   int max_cost;
3102
3103   /* Handling const0_rtx here allows us to use zero as a rogue value for
3104      coeff below.  */
3105   if (op1 == const0_rtx)
3106     return const0_rtx;
3107   if (op1 == const1_rtx)
3108     return op0;
3109   if (op1 == constm1_rtx)
3110     return expand_unop (mode,
3111                         GET_MODE_CLASS (mode) == MODE_INT
3112                         && !unsignedp && flag_trapv
3113                         ? negv_optab : neg_optab,
3114                         op0, target, 0);
3115
3116   /* These are the operations that are potentially turned into a sequence
3117      of shifts and additions.  */
3118   if (SCALAR_INT_MODE_P (mode)
3119       && (unsignedp || !flag_trapv))
3120     {
3121       HOST_WIDE_INT coeff = 0;
3122       rtx fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3123
3124       /* synth_mult does an `unsigned int' multiply.  As long as the mode is
3125          less than or equal in size to `unsigned int' this doesn't matter.
3126          If the mode is larger than `unsigned int', then synth_mult works
3127          only if the constant value exactly fits in an `unsigned int' without
3128          any truncation.  This means that multiplying by negative values does
3129          not work; results are off by 2^32 on a 32 bit machine.  */
3130
3131       if (GET_CODE (op1) == CONST_INT)
3132         {
3133           /* Attempt to handle multiplication of DImode values by negative
3134              coefficients, by performing the multiplication by a positive
3135              multiplier and then inverting the result.  */
3136           if (INTVAL (op1) < 0
3137               && GET_MODE_BITSIZE (mode) > HOST_BITS_PER_WIDE_INT)
3138             {
3139               /* Its safe to use -INTVAL (op1) even for INT_MIN, as the
3140                  result is interpreted as an unsigned coefficient.
3141                  Exclude cost of op0 from max_cost to match the cost
3142                  calculation of the synth_mult.  */
3143               max_cost = rtx_cost (gen_rtx_MULT (mode, fake_reg, op1), SET)
3144                          - neg_cost[mode];
3145               if (max_cost > 0
3146                   && choose_mult_variant (mode, -INTVAL (op1), &algorithm,
3147                                           &variant, max_cost))
3148                 {
3149                   rtx temp = expand_mult_const (mode, op0, -INTVAL (op1),
3150                                                 NULL_RTX, &algorithm,
3151                                                 variant);
3152                   return expand_unop (mode, neg_optab, temp, target, 0);
3153                 }
3154             }
3155           else coeff = INTVAL (op1);
3156         }
3157       else if (GET_CODE (op1) == CONST_DOUBLE)
3158         {
3159           /* If we are multiplying in DImode, it may still be a win
3160              to try to work with shifts and adds.  */
3161           if (CONST_DOUBLE_HIGH (op1) == 0)
3162             coeff = CONST_DOUBLE_LOW (op1);
3163           else if (CONST_DOUBLE_LOW (op1) == 0
3164                    && EXACT_POWER_OF_2_OR_ZERO_P (CONST_DOUBLE_HIGH (op1)))
3165             {
3166               int shift = floor_log2 (CONST_DOUBLE_HIGH (op1))
3167                           + HOST_BITS_PER_WIDE_INT;
3168               return expand_shift (LSHIFT_EXPR, mode, op0,
3169                                    build_int_cst (NULL_TREE, shift),
3170                                    target, unsignedp);
3171             }
3172         }
3173
3174       /* We used to test optimize here, on the grounds that it's better to
3175          produce a smaller program when -O is not used.  But this causes
3176          such a terrible slowdown sometimes that it seems better to always
3177          use synth_mult.  */
3178       if (coeff != 0)
3179         {
3180           /* Special case powers of two.  */
3181           if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3182             return expand_shift (LSHIFT_EXPR, mode, op0,
3183                                  build_int_cst (NULL_TREE, floor_log2 (coeff)),
3184                                  target, unsignedp);
3185
3186           /* Exclude cost of op0 from max_cost to match the cost
3187              calculation of the synth_mult.  */
3188           max_cost = rtx_cost (gen_rtx_MULT (mode, fake_reg, op1), SET);
3189           if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3190                                    max_cost))
3191             return expand_mult_const (mode, op0, coeff, target,
3192                                       &algorithm, variant);
3193         }
3194     }
3195
3196   if (GET_CODE (op0) == CONST_DOUBLE)
3197     {
3198       rtx temp = op0;
3199       op0 = op1;
3200       op1 = temp;
3201     }
3202
3203   /* Expand x*2.0 as x+x.  */
3204   if (GET_CODE (op1) == CONST_DOUBLE
3205       && SCALAR_FLOAT_MODE_P (mode))
3206     {
3207       REAL_VALUE_TYPE d;
3208       REAL_VALUE_FROM_CONST_DOUBLE (d, op1);
3209
3210       if (REAL_VALUES_EQUAL (d, dconst2))
3211         {
3212           op0 = force_reg (GET_MODE (op0), op0);
3213           return expand_binop (mode, add_optab, op0, op0,
3214                                target, unsignedp, OPTAB_LIB_WIDEN);
3215         }
3216     }
3217
3218   /* This used to use umul_optab if unsigned, but for non-widening multiply
3219      there is no difference between signed and unsigned.  */
3220   op0 = expand_binop (mode,
3221                       ! unsignedp
3222                       && flag_trapv && (GET_MODE_CLASS(mode) == MODE_INT)
3223                       ? smulv_optab : smul_optab,
3224                       op0, op1, target, unsignedp, OPTAB_LIB_WIDEN);
3225   gcc_assert (op0);
3226   return op0;
3227 }
3228 \f
3229 /* Return the smallest n such that 2**n >= X.  */
3230
3231 int
3232 ceil_log2 (unsigned HOST_WIDE_INT x)
3233 {
3234   return floor_log2 (x - 1) + 1;
3235 }
3236
3237 /* Choose a minimal N + 1 bit approximation to 1/D that can be used to
3238    replace division by D, and put the least significant N bits of the result
3239    in *MULTIPLIER_PTR and return the most significant bit.
3240
3241    The width of operations is N (should be <= HOST_BITS_PER_WIDE_INT), the
3242    needed precision is in PRECISION (should be <= N).
3243
3244    PRECISION should be as small as possible so this function can choose
3245    multiplier more freely.
3246
3247    The rounded-up logarithm of D is placed in *lgup_ptr.  A shift count that
3248    is to be used for a final right shift is placed in *POST_SHIFT_PTR.
3249
3250    Using this function, x/D will be equal to (x * m) >> (*POST_SHIFT_PTR),
3251    where m is the full HOST_BITS_PER_WIDE_INT + 1 bit multiplier.  */
3252
3253 static
3254 unsigned HOST_WIDE_INT
3255 choose_multiplier (unsigned HOST_WIDE_INT d, int n, int precision,
3256                    rtx *multiplier_ptr, int *post_shift_ptr, int *lgup_ptr)
3257 {
3258   HOST_WIDE_INT mhigh_hi, mlow_hi;
3259   unsigned HOST_WIDE_INT mhigh_lo, mlow_lo;
3260   int lgup, post_shift;
3261   int pow, pow2;
3262   unsigned HOST_WIDE_INT nl, dummy1;
3263   HOST_WIDE_INT nh, dummy2;
3264
3265   /* lgup = ceil(log2(divisor)); */
3266   lgup = ceil_log2 (d);
3267
3268   gcc_assert (lgup <= n);
3269
3270   pow = n + lgup;
3271   pow2 = n + lgup - precision;
3272
3273   /* We could handle this with some effort, but this case is much
3274      better handled directly with a scc insn, so rely on caller using
3275      that.  */
3276   gcc_assert (pow != 2 * HOST_BITS_PER_WIDE_INT);
3277
3278   /* mlow = 2^(N + lgup)/d */
3279  if (pow >= HOST_BITS_PER_WIDE_INT)
3280     {
3281       nh = (HOST_WIDE_INT) 1 << (pow - HOST_BITS_PER_WIDE_INT);
3282       nl = 0;
3283     }
3284   else
3285     {
3286       nh = 0;
3287       nl = (unsigned HOST_WIDE_INT) 1 << pow;
3288     }
3289   div_and_round_double (TRUNC_DIV_EXPR, 1, nl, nh, d, (HOST_WIDE_INT) 0,
3290                         &mlow_lo, &mlow_hi, &dummy1, &dummy2);
3291
3292   /* mhigh = (2^(N + lgup) + 2^N + lgup - precision)/d */
3293   if (pow2 >= HOST_BITS_PER_WIDE_INT)
3294     nh |= (HOST_WIDE_INT) 1 << (pow2 - HOST_BITS_PER_WIDE_INT);
3295   else
3296     nl |= (unsigned HOST_WIDE_INT) 1 << pow2;
3297   div_and_round_double (TRUNC_DIV_EXPR, 1, nl, nh, d, (HOST_WIDE_INT) 0,
3298                         &mhigh_lo, &mhigh_hi, &dummy1, &dummy2);
3299
3300   gcc_assert (!mhigh_hi || nh - d < d);
3301   gcc_assert (mhigh_hi <= 1 && mlow_hi <= 1);
3302   /* Assert that mlow < mhigh.  */
3303   gcc_assert (mlow_hi < mhigh_hi
3304               || (mlow_hi == mhigh_hi && mlow_lo < mhigh_lo));
3305
3306   /* If precision == N, then mlow, mhigh exceed 2^N
3307      (but they do not exceed 2^(N+1)).  */
3308
3309   /* Reduce to lowest terms.  */
3310   for (post_shift = lgup; post_shift > 0; post_shift--)
3311     {
3312       unsigned HOST_WIDE_INT ml_lo = (mlow_hi << (HOST_BITS_PER_WIDE_INT - 1)) | (mlow_lo >> 1);
3313       unsigned HOST_WIDE_INT mh_lo = (mhigh_hi << (HOST_BITS_PER_WIDE_INT - 1)) | (mhigh_lo >> 1);
3314       if (ml_lo >= mh_lo)
3315         break;
3316
3317       mlow_hi = 0;
3318       mlow_lo = ml_lo;
3319       mhigh_hi = 0;
3320       mhigh_lo = mh_lo;
3321     }
3322
3323   *post_shift_ptr = post_shift;
3324   *lgup_ptr = lgup;
3325   if (n < HOST_BITS_PER_WIDE_INT)
3326     {
3327       unsigned HOST_WIDE_INT mask = ((unsigned HOST_WIDE_INT) 1 << n) - 1;
3328       *multiplier_ptr = GEN_INT (mhigh_lo & mask);
3329       return mhigh_lo >= mask;
3330     }
3331   else
3332     {
3333       *multiplier_ptr = GEN_INT (mhigh_lo);
3334       return mhigh_hi;
3335     }
3336 }
3337
3338 /* Compute the inverse of X mod 2**n, i.e., find Y such that X * Y is
3339    congruent to 1 (mod 2**N).  */
3340
3341 static unsigned HOST_WIDE_INT
3342 invert_mod2n (unsigned HOST_WIDE_INT x, int n)
3343 {
3344   /* Solve x*y == 1 (mod 2^n), where x is odd.  Return y.  */
3345
3346   /* The algorithm notes that the choice y = x satisfies
3347      x*y == 1 mod 2^3, since x is assumed odd.
3348      Each iteration doubles the number of bits of significance in y.  */
3349
3350   unsigned HOST_WIDE_INT mask;
3351   unsigned HOST_WIDE_INT y = x;
3352   int nbit = 3;
3353
3354   mask = (n == HOST_BITS_PER_WIDE_INT
3355           ? ~(unsigned HOST_WIDE_INT) 0
3356           : ((unsigned HOST_WIDE_INT) 1 << n) - 1);
3357
3358   while (nbit < n)
3359     {
3360       y = y * (2 - x*y) & mask;         /* Modulo 2^N */
3361       nbit *= 2;
3362     }
3363   return y;
3364 }
3365
3366 /* Emit code to adjust ADJ_OPERAND after multiplication of wrong signedness
3367    flavor of OP0 and OP1.  ADJ_OPERAND is already the high half of the
3368    product OP0 x OP1.  If UNSIGNEDP is nonzero, adjust the signed product
3369    to become unsigned, if UNSIGNEDP is zero, adjust the unsigned product to
3370    become signed.
3371
3372    The result is put in TARGET if that is convenient.
3373
3374    MODE is the mode of operation.  */
3375
3376 rtx
3377 expand_mult_highpart_adjust (enum machine_mode mode, rtx adj_operand, rtx op0,
3378                              rtx op1, rtx target, int unsignedp)
3379 {
3380   rtx tem;
3381   enum rtx_code adj_code = unsignedp ? PLUS : MINUS;
3382
3383   tem = expand_shift (RSHIFT_EXPR, mode, op0,
3384                       build_int_cst (NULL_TREE, GET_MODE_BITSIZE (mode) - 1),
3385                       NULL_RTX, 0);
3386   tem = expand_and (mode, tem, op1, NULL_RTX);
3387   adj_operand
3388     = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3389                      adj_operand);
3390
3391   tem = expand_shift (RSHIFT_EXPR, mode, op1,
3392                       build_int_cst (NULL_TREE, GET_MODE_BITSIZE (mode) - 1),
3393                       NULL_RTX, 0);
3394   tem = expand_and (mode, tem, op0, NULL_RTX);
3395   target = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3396                           target);
3397
3398   return target;
3399 }
3400
3401 /* Subroutine of expand_mult_highpart.  Return the MODE high part of OP.  */
3402
3403 static rtx
3404 extract_high_half (enum machine_mode mode, rtx op)
3405 {
3406   enum machine_mode wider_mode;
3407
3408   if (mode == word_mode)
3409     return gen_highpart (mode, op);
3410
3411   gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3412
3413   wider_mode = GET_MODE_WIDER_MODE (mode);
3414   op = expand_shift (RSHIFT_EXPR, wider_mode, op,
3415                      build_int_cst (NULL_TREE, GET_MODE_BITSIZE (mode)), 0, 1);
3416   return convert_modes (mode, wider_mode, op, 0);
3417 }
3418
3419 /* Like expand_mult_highpart, but only consider using a multiplication
3420    optab.  OP1 is an rtx for the constant operand.  */
3421
3422 static rtx
3423 expand_mult_highpart_optab (enum machine_mode mode, rtx op0, rtx op1,
3424                             rtx target, int unsignedp, int max_cost)
3425 {
3426   rtx narrow_op1 = gen_int_mode (INTVAL (op1), mode);
3427   enum machine_mode wider_mode;
3428   optab moptab;
3429   rtx tem;
3430   int size;
3431
3432   gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3433
3434   wider_mode = GET_MODE_WIDER_MODE (mode);
3435   size = GET_MODE_BITSIZE (mode);
3436
3437   /* Firstly, try using a multiplication insn that only generates the needed
3438      high part of the product, and in the sign flavor of unsignedp.  */
3439   if (mul_highpart_cost[mode] < max_cost)
3440     {
3441       moptab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
3442       tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3443                           unsignedp, OPTAB_DIRECT);
3444       if (tem)
3445         return tem;
3446     }
3447
3448   /* Secondly, same as above, but use sign flavor opposite of unsignedp.
3449      Need to adjust the result after the multiplication.  */
3450   if (size - 1 < BITS_PER_WORD
3451       && (mul_highpart_cost[mode] + 2 * shift_cost[mode][size-1]
3452           + 4 * add_cost[mode] < max_cost))
3453     {
3454       moptab = unsignedp ? smul_highpart_optab : umul_highpart_optab;
3455       tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3456                           unsignedp, OPTAB_DIRECT);
3457       if (tem)
3458         /* We used the wrong signedness.  Adjust the result.  */
3459         return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3460                                             tem, unsignedp);
3461     }
3462
3463   /* Try widening multiplication.  */
3464   moptab = unsignedp ? umul_widen_optab : smul_widen_optab;
3465   if (moptab->handlers[wider_mode].insn_code != CODE_FOR_nothing
3466       && mul_widen_cost[wider_mode] < max_cost)
3467     {
3468       tem = expand_binop (wider_mode, moptab, op0, narrow_op1, 0,
3469                           unsignedp, OPTAB_WIDEN);
3470       if (tem)
3471         return extract_high_half (mode, tem);
3472     }
3473
3474   /* Try widening the mode and perform a non-widening multiplication.  */
3475   if (smul_optab->handlers[wider_mode].insn_code != CODE_FOR_nothing
3476       && size - 1 < BITS_PER_WORD
3477       && mul_cost[wider_mode] + shift_cost[mode][size-1] < max_cost)
3478     {
3479       rtx insns, wop0, wop1;
3480
3481       /* We need to widen the operands, for example to ensure the
3482          constant multiplier is correctly sign or zero extended.
3483          Use a sequence to clean-up any instructions emitted by
3484          the conversions if things don't work out.  */
3485       start_sequence ();
3486       wop0 = convert_modes (wider_mode, mode, op0, unsignedp);
3487       wop1 = convert_modes (wider_mode, mode, op1, unsignedp);
3488       tem = expand_binop (wider_mode, smul_optab, wop0, wop1, 0,
3489                           unsignedp, OPTAB_WIDEN);
3490       insns = get_insns ();
3491       end_sequence ();
3492
3493       if (tem)
3494         {
3495           emit_insn (insns);
3496           return extract_high_half (mode, tem);
3497         }
3498     }
3499
3500   /* Try widening multiplication of opposite signedness, and adjust.  */
3501   moptab = unsignedp ? smul_widen_optab : umul_widen_optab;
3502   if (moptab->handlers[wider_mode].insn_code != CODE_FOR_nothing
3503       && size - 1 < BITS_PER_WORD
3504       && (mul_widen_cost[wider_mode] + 2 * shift_cost[mode][size-1]
3505           + 4 * add_cost[mode] < max_cost))
3506     {
3507       tem = expand_binop (wider_mode, moptab, op0, narrow_op1,
3508                           NULL_RTX, ! unsignedp, OPTAB_WIDEN);
3509       if (tem != 0)
3510         {
3511           tem = extract_high_half (mode, tem);
3512           /* We used the wrong signedness.  Adjust the result.  */
3513           return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3514                                               target, unsignedp);
3515         }
3516     }
3517
3518   return 0;
3519 }
3520
3521 /* Emit code to multiply OP0 and OP1 (where OP1 is an integer constant),
3522    putting the high half of the result in TARGET if that is convenient,
3523    and return where the result is.  If the operation can not be performed,
3524    0 is returned.
3525
3526    MODE is the mode of operation and result.
3527
3528    UNSIGNEDP nonzero means unsigned multiply.
3529
3530    MAX_COST is the total allowed cost for the expanded RTL.  */
3531
3532 static rtx
3533 expand_mult_highpart (enum machine_mode mode, rtx op0, rtx op1,
3534                       rtx target, int unsignedp, int max_cost)
3535 {
3536   enum machine_mode wider_mode = GET_MODE_WIDER_MODE (mode);
3537   unsigned HOST_WIDE_INT cnst1;
3538   int extra_cost;
3539   bool sign_adjust = false;
3540   enum mult_variant variant;
3541   struct algorithm alg;
3542   rtx tem;
3543
3544   gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3545   /* We can't support modes wider than HOST_BITS_PER_INT.  */
3546   gcc_assert (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT);
3547
3548   cnst1 = INTVAL (op1) & GET_MODE_MASK (mode);
3549
3550   /* We can't optimize modes wider than BITS_PER_WORD.
3551      ??? We might be able to perform double-word arithmetic if
3552      mode == word_mode, however all the cost calculations in
3553      synth_mult etc. assume single-word operations.  */
3554   if (GET_MODE_BITSIZE (wider_mode) > BITS_PER_WORD)
3555     return expand_mult_highpart_optab (mode, op0, op1, target,
3556                                        unsignedp, max_cost);
3557
3558   extra_cost = shift_cost[mode][GET_MODE_BITSIZE (mode) - 1];
3559
3560   /* Check whether we try to multiply by a negative constant.  */
3561   if (!unsignedp && ((cnst1 >> (GET_MODE_BITSIZE (mode) - 1)) & 1))
3562     {
3563       sign_adjust = true;
3564       extra_cost += add_cost[mode];
3565     }
3566
3567   /* See whether shift/add multiplication is cheap enough.  */
3568   if (choose_mult_variant (wider_mode, cnst1, &alg, &variant,
3569                            max_cost - extra_cost))
3570     {
3571       /* See whether the specialized multiplication optabs are
3572          cheaper than the shift/add version.  */
3573       tem = expand_mult_highpart_optab (mode, op0, op1, target, unsignedp,
3574                                         alg.cost.cost + extra_cost);
3575       if (tem)
3576         return tem;
3577
3578       tem = convert_to_mode (wider_mode, op0, unsignedp);
3579       tem = expand_mult_const (wider_mode, tem, cnst1, 0, &alg, variant);
3580       tem = extract_high_half (mode, tem);
3581
3582       /* Adjust result for signedness.  */
3583       if (sign_adjust)
3584         tem = force_operand (gen_rtx_MINUS (mode, tem, op0), tem);
3585
3586       return tem;
3587     }
3588   return expand_mult_highpart_optab (mode, op0, op1, target,
3589                                      unsignedp, max_cost);
3590 }
3591
3592
3593 /* Expand signed modulus of OP0 by a power of two D in mode MODE.  */
3594
3595 static rtx
3596 expand_smod_pow2 (enum machine_mode mode, rtx op0, HOST_WIDE_INT d)
3597 {
3598   unsigned HOST_WIDE_INT masklow, maskhigh;
3599   rtx result, temp, shift, label;
3600   int logd;
3601
3602   logd = floor_log2 (d);
3603   result = gen_reg_rtx (mode);
3604
3605   /* Avoid conditional branches when they're expensive.  */
3606   if (BRANCH_COST >= 2
3607       && !optimize_size)
3608     {
3609       rtx signmask = emit_store_flag (result, LT, op0, const0_rtx,
3610                                       mode, 0, -1);
3611       if (signmask)
3612         {
3613           signmask = force_reg (mode, signmask);
3614           masklow = ((HOST_WIDE_INT) 1 << logd) - 1;
3615           shift = GEN_INT (GET_MODE_BITSIZE (mode) - logd);
3616
3617           /* Use the rtx_cost of a LSHIFTRT instruction to determine
3618              which instruction sequence to use.  If logical right shifts
3619              are expensive the use 2 XORs, 2 SUBs and an AND, otherwise
3620              use a LSHIFTRT, 1 ADD, 1 SUB and an AND.  */
3621
3622           temp = gen_rtx_LSHIFTRT (mode, result, shift);
3623           if (lshr_optab->handlers[mode].insn_code == CODE_FOR_nothing
3624               || rtx_cost (temp, SET) > COSTS_N_INSNS (2))
3625             {
3626               temp = expand_binop (mode, xor_optab, op0, signmask,
3627                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3628               temp = expand_binop (mode, sub_optab, temp, signmask,
3629                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3630               temp = expand_binop (mode, and_optab, temp, GEN_INT (masklow),
3631                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3632               temp = expand_binop (mode, xor_optab, temp, signmask,
3633                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3634               temp = expand_binop (mode, sub_optab, temp, signmask,
3635                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3636             }
3637           else
3638             {
3639               signmask = expand_binop (mode, lshr_optab, signmask, shift,
3640                                        NULL_RTX, 1, OPTAB_LIB_WIDEN);
3641               signmask = force_reg (mode, signmask);
3642
3643               temp = expand_binop (mode, add_optab, op0, signmask,
3644                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3645               temp = expand_binop (mode, and_optab, temp, GEN_INT (masklow),
3646                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3647               temp = expand_binop (mode, sub_optab, temp, signmask,
3648                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3649             }
3650           return temp;
3651         }
3652     }
3653
3654   /* Mask contains the mode's signbit and the significant bits of the
3655      modulus.  By including the signbit in the operation, many targets
3656      can avoid an explicit compare operation in the following comparison
3657      against zero.  */
3658
3659   masklow = ((HOST_WIDE_INT) 1 << logd) - 1;
3660   if (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
3661     {
3662       masklow |= (HOST_WIDE_INT) -1 << (GET_MODE_BITSIZE (mode) - 1);
3663       maskhigh = -1;
3664     }
3665   else
3666     maskhigh = (HOST_WIDE_INT) -1
3667                  << (GET_MODE_BITSIZE (mode) - HOST_BITS_PER_WIDE_INT - 1);
3668
3669   temp = expand_binop (mode, and_optab, op0,
3670                        immed_double_const (masklow, maskhigh, mode),
3671                        result, 1, OPTAB_LIB_WIDEN);
3672   if (temp != result)
3673     emit_move_insn (result, temp);
3674
3675   label = gen_label_rtx ();
3676   do_cmp_and_jump (result, const0_rtx, GE, mode, label);
3677
3678   temp = expand_binop (mode, sub_optab, result, const1_rtx, result,
3679                        0, OPTAB_LIB_WIDEN);
3680   masklow = (HOST_WIDE_INT) -1 << logd;
3681   maskhigh = -1;
3682   temp = expand_binop (mode, ior_optab, temp,
3683                        immed_double_const (masklow, maskhigh, mode),
3684                        result, 1, OPTAB_LIB_WIDEN);
3685   temp = expand_binop (mode, add_optab, temp, const1_rtx, result,
3686                        0, OPTAB_LIB_WIDEN);
3687   if (temp != result)
3688     emit_move_insn (result, temp);
3689   emit_label (label);
3690   return result;
3691 }
3692
3693 /* Expand signed division of OP0 by a power of two D in mode MODE.
3694    This routine is only called for positive values of D.  */
3695
3696 static rtx
3697 expand_sdiv_pow2 (enum machine_mode mode, rtx op0, HOST_WIDE_INT d)
3698 {
3699   rtx temp, label;
3700   tree shift;
3701   int logd;
3702
3703   logd = floor_log2 (d);
3704   shift = build_int_cst (NULL_TREE, logd);
3705
3706   if (d == 2 && BRANCH_COST >= 1)
3707     {
3708       temp = gen_reg_rtx (mode);
3709       temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, 1);
3710       temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3711                            0, OPTAB_LIB_WIDEN);
3712       return expand_shift (RSHIFT_EXPR, mode, temp, shift, NULL_RTX, 0);
3713     }
3714
3715 #ifdef HAVE_conditional_move
3716   if (BRANCH_COST >= 2)
3717     {
3718       rtx temp2;
3719
3720       /* ??? emit_conditional_move forces a stack adjustment via
3721          compare_from_rtx so, if the sequence is discarded, it will
3722          be lost.  Do it now instead.  */
3723       do_pending_stack_adjust ();
3724
3725       start_sequence ();
3726       temp2 = copy_to_mode_reg (mode, op0);
3727       temp = expand_binop (mode, add_optab, temp2, GEN_INT (d-1),
3728                            NULL_RTX, 0, OPTAB_LIB_WIDEN);
3729       temp = force_reg (mode, temp);
3730
3731       /* Construct "temp2 = (temp2 < 0) ? temp : temp2".  */
3732       temp2 = emit_conditional_move (temp2, LT, temp2, const0_rtx,
3733                                      mode, temp, temp2, mode, 0);
3734       if (temp2)
3735         {
3736           rtx seq = get_insns ();
3737           end_sequence ();
3738           emit_insn (seq);
3739           return expand_shift (RSHIFT_EXPR, mode, temp2, shift, NULL_RTX, 0);
3740         }
3741       end_sequence ();
3742     }
3743 #endif
3744
3745   if (BRANCH_COST >= 2)
3746     {
3747       int ushift = GET_MODE_BITSIZE (mode) - logd;
3748
3749       temp = gen_reg_rtx (mode);
3750       temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, -1);
3751       if (shift_cost[mode][ushift] > COSTS_N_INSNS (1))
3752         temp = expand_binop (mode, and_optab, temp, GEN_INT (d - 1),
3753                              NULL_RTX, 0, OPTAB_LIB_WIDEN);
3754       else
3755         temp = expand_shift (RSHIFT_EXPR, mode, temp,
3756                              build_int_cst (NULL_TREE, ushift),
3757                              NULL_RTX, 1);
3758       temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3759                            0, OPTAB_LIB_WIDEN);
3760       return expand_shift (RSHIFT_EXPR, mode, temp, shift, NULL_RTX, 0);
3761     }
3762
3763   label = gen_label_rtx ();
3764   temp = copy_to_mode_reg (mode, op0);
3765   do_cmp_and_jump (temp, const0_rtx, GE, mode, label);
3766   expand_inc (temp, GEN_INT (d - 1));
3767   emit_label (label);
3768   return expand_shift (RSHIFT_EXPR, mode, temp, shift, NULL_RTX, 0);
3769 }
3770 \f
3771 /* Emit the code to divide OP0 by OP1, putting the result in TARGET
3772    if that is convenient, and returning where the result is.
3773    You may request either the quotient or the remainder as the result;
3774    specify REM_FLAG nonzero to get the remainder.
3775
3776    CODE is the expression code for which kind of division this is;
3777    it controls how rounding is done.  MODE is the machine mode to use.
3778    UNSIGNEDP nonzero means do unsigned division.  */
3779
3780 /* ??? For CEIL_MOD_EXPR, can compute incorrect remainder with ANDI
3781    and then correct it by or'ing in missing high bits
3782    if result of ANDI is nonzero.
3783    For ROUND_MOD_EXPR, can use ANDI and then sign-extend the result.
3784    This could optimize to a bfexts instruction.
3785    But C doesn't use these operations, so their optimizations are
3786    left for later.  */
3787 /* ??? For modulo, we don't actually need the highpart of the first product,
3788    the low part will do nicely.  And for small divisors, the second multiply
3789    can also be a low-part only multiply or even be completely left out.
3790    E.g. to calculate the remainder of a division by 3 with a 32 bit
3791    multiply, multiply with 0x55555556 and extract the upper two bits;
3792    the result is exact for inputs up to 0x1fffffff.
3793    The input range can be reduced by using cross-sum rules.
3794    For odd divisors >= 3, the following table gives right shift counts
3795    so that if a number is shifted by an integer multiple of the given
3796    amount, the remainder stays the same:
3797    2, 4, 3, 6, 10, 12, 4, 8, 18, 6, 11, 20, 18, 0, 5, 10, 12, 0, 12, 20,
3798    14, 12, 23, 21, 8, 0, 20, 18, 0, 0, 6, 12, 0, 22, 0, 18, 20, 30, 0, 0,
3799    0, 8, 0, 11, 12, 10, 36, 0, 30, 0, 0, 12, 0, 0, 0, 0, 44, 12, 24, 0,
3800    20, 0, 7, 14, 0, 18, 36, 0, 0, 46, 60, 0, 42, 0, 15, 24, 20, 0, 0, 33,
3801    0, 20, 0, 0, 18, 0, 60, 0, 0, 0, 0, 0, 40, 18, 0, 0, 12
3802
3803    Cross-sum rules for even numbers can be derived by leaving as many bits
3804    to the right alone as the divisor has zeros to the right.
3805    E.g. if x is an unsigned 32 bit number:
3806    (x mod 12) == (((x & 1023) + ((x >> 8) & ~3)) * 0x15555558 >> 2 * 3) >> 28
3807    */
3808
3809 rtx
3810 expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode,
3811                rtx op0, rtx op1, rtx target, int unsignedp)
3812 {
3813   enum machine_mode compute_mode;
3814   rtx tquotient;
3815   rtx quotient = 0, remainder = 0;
3816   rtx last;
3817   int size;
3818   rtx insn, set;
3819   optab optab1, optab2;
3820   int op1_is_constant, op1_is_pow2 = 0;
3821   int max_cost, extra_cost;
3822   static HOST_WIDE_INT last_div_const = 0;
3823   static HOST_WIDE_INT ext_op1;
3824
3825   op1_is_constant = GET_CODE (op1) == CONST_INT;
3826   if (op1_is_constant)
3827     {
3828       ext_op1 = INTVAL (op1);
3829       if (unsignedp)
3830         ext_op1 &= GET_MODE_MASK (mode);
3831       op1_is_pow2 = ((EXACT_POWER_OF_2_OR_ZERO_P (ext_op1)
3832                      || (! unsignedp && EXACT_POWER_OF_2_OR_ZERO_P (-ext_op1))));
3833     }
3834
3835   /*
3836      This is the structure of expand_divmod:
3837
3838      First comes code to fix up the operands so we can perform the operations
3839      correctly and efficiently.
3840
3841      Second comes a switch statement with code specific for each rounding mode.
3842      For some special operands this code emits all RTL for the desired
3843      operation, for other cases, it generates only a quotient and stores it in
3844      QUOTIENT.  The case for trunc division/remainder might leave quotient = 0,
3845      to indicate that it has not done anything.
3846
3847      Last comes code that finishes the operation.  If QUOTIENT is set and
3848      REM_FLAG is set, the remainder is computed as OP0 - QUOTIENT * OP1.  If
3849      QUOTIENT is not set, it is computed using trunc rounding.
3850
3851      We try to generate special code for division and remainder when OP1 is a
3852      constant.  If |OP1| = 2**n we can use shifts and some other fast
3853      operations.  For other values of OP1, we compute a carefully selected
3854      fixed-point approximation m = 1/OP1, and generate code that multiplies OP0
3855      by m.
3856
3857      In all cases but EXACT_DIV_EXPR, this multiplication requires the upper
3858      half of the product.  Different strategies for generating the product are
3859      implemented in expand_mult_highpart.
3860
3861      If what we actually want is the remainder, we generate that by another
3862      by-constant multiplication and a subtraction.  */
3863
3864   /* We shouldn't be called with OP1 == const1_rtx, but some of the
3865      code below will malfunction if we are, so check here and handle
3866      the special case if so.  */
3867   if (op1 == const1_rtx)
3868     return rem_flag ? const0_rtx : op0;
3869
3870     /* When dividing by -1, we could get an overflow.
3871      negv_optab can handle overflows.  */
3872   if (! unsignedp && op1 == constm1_rtx)
3873     {
3874       if (rem_flag)
3875         return const0_rtx;
3876       return expand_unop (mode, flag_trapv && GET_MODE_CLASS(mode) == MODE_INT
3877                           ? negv_optab : neg_optab, op0, target, 0);
3878     }
3879
3880   if (target
3881       /* Don't use the function value register as a target
3882          since we have to read it as well as write it,
3883          and function-inlining gets confused by this.  */
3884       && ((REG_P (target) && REG_FUNCTION_VALUE_P (target))
3885           /* Don't clobber an operand while doing a multi-step calculation.  */
3886           || ((rem_flag || op1_is_constant)
3887               && (reg_mentioned_p (target, op0)
3888                   || (MEM_P (op0) && MEM_P (target))))
3889           || reg_mentioned_p (target, op1)
3890           || (MEM_P (op1) && MEM_P (target))))
3891     target = 0;
3892
3893   /* Get the mode in which to perform this computation.  Normally it will
3894      be MODE, but sometimes we can't do the desired operation in MODE.
3895      If so, pick a wider mode in which we can do the operation.  Convert
3896      to that mode at the start to avoid repeated conversions.
3897
3898      First see what operations we need.  These depend on the expression
3899      we are evaluating.  (We assume that divxx3 insns exist under the
3900      same conditions that modxx3 insns and that these insns don't normally
3901      fail.  If these assumptions are not correct, we may generate less
3902      efficient code in some cases.)
3903
3904      Then see if we find a mode in which we can open-code that operation
3905      (either a division, modulus, or shift).  Finally, check for the smallest
3906      mode for which we can do the operation with a library call.  */
3907
3908   /* We might want to refine this now that we have division-by-constant
3909      optimization.  Since expand_mult_highpart tries so many variants, it is
3910      not straightforward to generalize this.  Maybe we should make an array
3911      of possible modes in init_expmed?  Save this for GCC 2.7.  */
3912
3913   optab1 = ((op1_is_pow2 && op1 != const0_rtx)
3914             ? (unsignedp ? lshr_optab : ashr_optab)
3915             : (unsignedp ? udiv_optab : sdiv_optab));
3916   optab2 = ((op1_is_pow2 && op1 != const0_rtx)
3917             ? optab1
3918             : (unsignedp ? udivmod_optab : sdivmod_optab));
3919
3920   for (compute_mode = mode; compute_mode != VOIDmode;
3921        compute_mode = GET_MODE_WIDER_MODE (compute_mode))
3922     if (optab1->handlers[compute_mode].insn_code != CODE_FOR_nothing
3923         || optab2->handlers[compute_mode].insn_code != CODE_FOR_nothing)
3924       break;
3925
3926   if (compute_mode == VOIDmode)
3927     for (compute_mode = mode; compute_mode != VOIDmode;
3928          compute_mode = GET_MODE_WIDER_MODE (compute_mode))
3929       if (optab1->handlers[compute_mode].libfunc
3930           || optab2->handlers[compute_mode].libfunc)
3931         break;
3932
3933   /* If we still couldn't find a mode, use MODE, but expand_binop will
3934      probably die.  */
3935   if (compute_mode == VOIDmode)
3936     compute_mode = mode;
3937
3938   if (target && GET_MODE (target) == compute_mode)
3939     tquotient = target;
3940   else
3941     tquotient = gen_reg_rtx (compute_mode);
3942
3943   size = GET_MODE_BITSIZE (compute_mode);
3944 #if 0
3945   /* It should be possible to restrict the precision to GET_MODE_BITSIZE
3946      (mode), and thereby get better code when OP1 is a constant.  Do that
3947      later.  It will require going over all usages of SIZE below.  */
3948   size = GET_MODE_BITSIZE (mode);
3949 #endif
3950
3951   /* Only deduct something for a REM if the last divide done was
3952      for a different constant.   Then set the constant of the last
3953      divide.  */
3954   max_cost = unsignedp ? udiv_cost[compute_mode] : sdiv_cost[compute_mode];
3955   if (rem_flag && ! (last_div_const != 0 && op1_is_constant
3956                      && INTVAL (op1) == last_div_const))
3957     max_cost -= mul_cost[compute_mode] + add_cost[compute_mode];
3958
3959   last_div_const = ! rem_flag && op1_is_constant ? INTVAL (op1) : 0;
3960
3961   /* Now convert to the best mode to use.  */
3962   if (compute_mode != mode)
3963     {
3964       op0 = convert_modes (compute_mode, mode, op0, unsignedp);
3965       op1 = convert_modes (compute_mode, mode, op1, unsignedp);
3966
3967       /* convert_modes may have placed op1 into a register, so we
3968          must recompute the following.  */
3969       op1_is_constant = GET_CODE (op1) == CONST_INT;
3970       op1_is_pow2 = (op1_is_constant
3971                      && ((EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
3972                           || (! unsignedp
3973                               && EXACT_POWER_OF_2_OR_ZERO_P (-INTVAL (op1)))))) ;
3974     }
3975
3976   /* If one of the operands is a volatile MEM, copy it into a register.  */
3977
3978   if (MEM_P (op0) && MEM_VOLATILE_P (op0))
3979     op0 = force_reg (compute_mode, op0);
3980   if (MEM_P (op1) && MEM_VOLATILE_P (op1))
3981     op1 = force_reg (compute_mode, op1);
3982
3983   /* If we need the remainder or if OP1 is constant, we need to
3984      put OP0 in a register in case it has any queued subexpressions.  */
3985   if (rem_flag || op1_is_constant)
3986     op0 = force_reg (compute_mode, op0);
3987
3988   last = get_last_insn ();
3989
3990   /* Promote floor rounding to trunc rounding for unsigned operations.  */
3991   if (unsignedp)
3992     {
3993       if (code == FLOOR_DIV_EXPR)
3994         code = TRUNC_DIV_EXPR;
3995       if (code == FLOOR_MOD_EXPR)
3996         code = TRUNC_MOD_EXPR;
3997       if (code == EXACT_DIV_EXPR && op1_is_pow2)
3998         code = TRUNC_DIV_EXPR;
3999     }
4000
4001   if (op1 != const0_rtx)
4002     switch (code)
4003       {
4004       case TRUNC_MOD_EXPR:
4005       case TRUNC_DIV_EXPR:
4006         if (op1_is_constant)
4007           {
4008             if (unsignedp)
4009               {
4010                 unsigned HOST_WIDE_INT mh;
4011                 int pre_shift, post_shift;
4012                 int dummy;
4013                 rtx ml;
4014                 unsigned HOST_WIDE_INT d = (INTVAL (op1)
4015                                             & GET_MODE_MASK (compute_mode));
4016
4017                 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4018                   {
4019                     pre_shift = floor_log2 (d);
4020                     if (rem_flag)
4021                       {
4022                         remainder
4023                           = expand_binop (compute_mode, and_optab, op0,
4024                                           GEN_INT (((HOST_WIDE_INT) 1 << pre_shift) - 1),
4025                                           remainder, 1,
4026                                           OPTAB_LIB_WIDEN);
4027                         if (remainder)
4028                           return gen_lowpart (mode, remainder);
4029                       }
4030                     quotient = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4031                                              build_int_cst (NULL_TREE,
4032                                                             pre_shift),
4033                                              tquotient, 1);
4034                   }
4035                 else if (size <= HOST_BITS_PER_WIDE_INT)
4036                   {
4037                     if (d >= ((unsigned HOST_WIDE_INT) 1 << (size - 1)))
4038                       {
4039                         /* Most significant bit of divisor is set; emit an scc
4040                            insn.  */
4041                         quotient = emit_store_flag (tquotient, GEU, op0, op1,
4042                                                     compute_mode, 1, 1);
4043                         if (quotient == 0)
4044                           goto fail1;
4045                       }
4046                     else
4047                       {
4048                         /* Find a suitable multiplier and right shift count
4049                            instead of multiplying with D.  */
4050
4051                         mh = choose_multiplier (d, size, size,
4052                                                 &ml, &post_shift, &dummy);
4053
4054                         /* If the suggested multiplier is more than SIZE bits,
4055                            we can do better for even divisors, using an
4056                            initial right shift.  */
4057                         if (mh != 0 && (d & 1) == 0)
4058                           {
4059                             pre_shift = floor_log2 (d & -d);
4060                             mh = choose_multiplier (d >> pre_shift, size,
4061                                                     size - pre_shift,
4062                                                     &ml, &post_shift, &dummy);
4063                             gcc_assert (!mh);
4064                           }
4065                         else
4066                           pre_shift = 0;
4067
4068                         if (mh != 0)
4069                           {
4070                             rtx t1, t2, t3, t4;
4071
4072                             if (post_shift - 1 >= BITS_PER_WORD)
4073                               goto fail1;
4074
4075                             extra_cost
4076                               = (shift_cost[compute_mode][post_shift - 1]
4077                                  + shift_cost[compute_mode][1]
4078                                  + 2 * add_cost[compute_mode]);
4079                             t1 = expand_mult_highpart (compute_mode, op0, ml,
4080                                                        NULL_RTX, 1,
4081                                                        max_cost - extra_cost);
4082                             if (t1 == 0)
4083                               goto fail1;
4084                             t2 = force_operand (gen_rtx_MINUS (compute_mode,
4085                                                                op0, t1),
4086                                                 NULL_RTX);
4087                             t3 = expand_shift
4088                               (RSHIFT_EXPR, compute_mode, t2,
4089                                build_int_cst (NULL_TREE, 1),
4090                                NULL_RTX,1);
4091                             t4 = force_operand (gen_rtx_PLUS (compute_mode,
4092                                                               t1, t3),
4093                                                 NULL_RTX);
4094                             quotient = expand_shift
4095                               (RSHIFT_EXPR, compute_mode, t4,
4096                                build_int_cst (NULL_TREE, post_shift - 1),
4097                                tquotient, 1);
4098                           }
4099                         else
4100                           {
4101                             rtx t1, t2;
4102
4103                             if (pre_shift >= BITS_PER_WORD
4104                                 || post_shift >= BITS_PER_WORD)
4105                               goto fail1;
4106
4107                             t1 = expand_shift
4108                               (RSHIFT_EXPR, compute_mode, op0,
4109                                build_int_cst (NULL_TREE, pre_shift),
4110                                NULL_RTX, 1);
4111                             extra_cost
4112                               = (shift_cost[compute_mode][pre_shift]
4113                                  + shift_cost[compute_mode][post_shift]);
4114                             t2 = expand_mult_highpart (compute_mode, t1, ml,
4115                                                        NULL_RTX, 1,
4116                                                        max_cost - extra_cost);
4117                             if (t2 == 0)
4118                               goto fail1;
4119                             quotient = expand_shift
4120                               (RSHIFT_EXPR, compute_mode, t2,
4121                                build_int_cst (NULL_TREE, post_shift),
4122                                tquotient, 1);
4123                           }
4124                       }
4125                   }
4126                 else            /* Too wide mode to use tricky code */
4127                   break;
4128
4129                 insn = get_last_insn ();
4130                 if (insn != last
4131                     && (set = single_set (insn)) != 0
4132                     && SET_DEST (set) == quotient)
4133                   set_unique_reg_note (insn,
4134                                        REG_EQUAL,
4135                                        gen_rtx_UDIV (compute_mode, op0, op1));
4136               }
4137             else                /* TRUNC_DIV, signed */
4138               {
4139                 unsigned HOST_WIDE_INT ml;
4140                 int lgup, post_shift;
4141                 rtx mlr;
4142                 HOST_WIDE_INT d = INTVAL (op1);
4143                 unsigned HOST_WIDE_INT abs_d = d >= 0 ? d : -d;
4144
4145                 /* n rem d = n rem -d */
4146                 if (rem_flag && d < 0)
4147                   {
4148                     d = abs_d;
4149                     op1 = gen_int_mode (abs_d, compute_mode);
4150                   }
4151
4152                 if (d == 1)
4153                   quotient = op0;
4154                 else if (d == -1)
4155                   quotient = expand_unop (compute_mode, neg_optab, op0,
4156                                           tquotient, 0);
4157                 else if (abs_d == (unsigned HOST_WIDE_INT) 1 << (size - 1))
4158                   {
4159                     /* This case is not handled correctly below.  */
4160                     quotient = emit_store_flag (tquotient, EQ, op0, op1,
4161                                                 compute_mode, 1, 1);
4162                     if (quotient == 0)
4163                       goto fail1;
4164                   }
4165                 else if (EXACT_POWER_OF_2_OR_ZERO_P (d)
4166                          && (rem_flag ? smod_pow2_cheap[compute_mode]
4167                                       : sdiv_pow2_cheap[compute_mode])
4168                          /* We assume that cheap metric is true if the
4169                             optab has an expander for this mode.  */
4170                          && (((rem_flag ? smod_optab : sdiv_optab)
4171                               ->handlers[compute_mode].insn_code
4172                               != CODE_FOR_nothing)
4173                              || (sdivmod_optab->handlers[compute_mode]
4174                                  .insn_code != CODE_FOR_nothing)))
4175                   ;
4176                 else if (EXACT_POWER_OF_2_OR_ZERO_P (abs_d))
4177                   {
4178                     if (rem_flag)
4179                       {
4180                         remainder = expand_smod_pow2 (compute_mode, op0, d);
4181                         if (remainder)
4182                           return gen_lowpart (mode, remainder);
4183                       }
4184
4185                     if (sdiv_pow2_cheap[compute_mode]
4186                         && ((sdiv_optab->handlers[compute_mode].insn_code
4187                              != CODE_FOR_nothing)
4188                             || (sdivmod_optab->handlers[compute_mode].insn_code
4189                                 != CODE_FOR_nothing)))
4190                       quotient = expand_divmod (0, TRUNC_DIV_EXPR,
4191                                                 compute_mode, op0,
4192                                                 gen_int_mode (abs_d,
4193                                                               compute_mode),
4194                                                 NULL_RTX, 0);
4195                     else
4196                       quotient = expand_sdiv_pow2 (compute_mode, op0, abs_d);
4197
4198                     /* We have computed OP0 / abs(OP1).  If OP1 is negative,
4199                        negate the quotient.  */
4200                     if (d < 0)
4201                       {
4202                         insn = get_last_insn ();
4203                         if (insn != last
4204                             && (set = single_set (insn)) != 0
4205                             && SET_DEST (set) == quotient
4206                             && abs_d < ((unsigned HOST_WIDE_INT) 1
4207                                         << (HOST_BITS_PER_WIDE_INT - 1)))
4208                           set_unique_reg_note (insn,
4209                                                REG_EQUAL,
4210                                                gen_rtx_DIV (compute_mode,
4211                                                             op0,
4212                                                             GEN_INT
4213                                                             (trunc_int_for_mode
4214                                                              (abs_d,
4215                                                               compute_mode))));
4216
4217                         quotient = expand_unop (compute_mode, neg_optab,
4218                                                 quotient, quotient, 0);
4219                       }
4220                   }
4221                 else if (size <= HOST_BITS_PER_WIDE_INT)
4222                   {
4223                     choose_multiplier (abs_d, size, size - 1,
4224                                        &mlr, &post_shift, &lgup);
4225                     ml = (unsigned HOST_WIDE_INT) INTVAL (mlr);
4226                     if (ml < (unsigned HOST_WIDE_INT) 1 << (size - 1))
4227                       {
4228                         rtx t1, t2, t3;
4229
4230                         if (post_shift >= BITS_PER_WORD
4231                             || size - 1 >= BITS_PER_WORD)
4232                           goto fail1;
4233
4234                         extra_cost = (shift_cost[compute_mode][post_shift]
4235                                       + shift_cost[compute_mode][size - 1]
4236                                       + add_cost[compute_mode]);
4237                         t1 = expand_mult_highpart (compute_mode, op0, mlr,
4238                                                    NULL_RTX, 0,
4239                                                    max_cost - extra_cost);
4240                         if (t1 == 0)
4241                           goto fail1;
4242                         t2 = expand_shift
4243                           (RSHIFT_EXPR, compute_mode, t1,
4244                            build_int_cst (NULL_TREE, post_shift),
4245                            NULL_RTX, 0);
4246                         t3 = expand_shift
4247                           (RSHIFT_EXPR, compute_mode, op0,
4248                            build_int_cst (NULL_TREE, size - 1),
4249                            NULL_RTX, 0);
4250                         if (d < 0)
4251                           quotient
4252                             = force_operand (gen_rtx_MINUS (compute_mode,
4253                                                             t3, t2),
4254                                              tquotient);
4255                         else
4256                           quotient
4257                             = force_operand (gen_rtx_MINUS (compute_mode,
4258                                                             t2, t3),
4259                                              tquotient);
4260                       }
4261                     else
4262                       {
4263                         rtx t1, t2, t3, t4;
4264
4265                         if (post_shift >= BITS_PER_WORD
4266                             || size - 1 >= BITS_PER_WORD)
4267                           goto fail1;
4268
4269                         ml |= (~(unsigned HOST_WIDE_INT) 0) << (size - 1);
4270                         mlr = gen_int_mode (ml, compute_mode);
4271                         extra_cost = (shift_cost[compute_mode][post_shift]
4272                                       + shift_cost[compute_mode][size - 1]
4273                                       + 2 * add_cost[compute_mode]);
4274                         t1 = expand_mult_highpart (compute_mode, op0, mlr,
4275                                                    NULL_RTX, 0,
4276                                                    max_cost - extra_cost);
4277                         if (t1 == 0)
4278                           goto fail1;
4279                         t2 = force_operand (gen_rtx_PLUS (compute_mode,
4280                                                           t1, op0),
4281                                             NULL_RTX);
4282                         t3 = expand_shift
4283                           (RSHIFT_EXPR, compute_mode, t2,
4284                            build_int_cst (NULL_TREE, post_shift),
4285                            NULL_RTX, 0);
4286                         t4 = expand_shift
4287                           (RSHIFT_EXPR, compute_mode, op0,
4288                            build_int_cst (NULL_TREE, size - 1),
4289                            NULL_RTX, 0);
4290                         if (d < 0)
4291                           quotient
4292                             = force_operand (gen_rtx_MINUS (compute_mode,
4293                                                             t4, t3),
4294                                              tquotient);
4295                         else
4296                           quotient
4297                             = force_operand (gen_rtx_MINUS (compute_mode,
4298                                                             t3, t4),
4299                                              tquotient);
4300                       }
4301                   }
4302                 else            /* Too wide mode to use tricky code */
4303                   break;
4304
4305                 insn = get_last_insn ();
4306                 if (insn != last
4307                     && (set = single_set (insn)) != 0
4308                     && SET_DEST (set) == quotient)
4309                   set_unique_reg_note (insn,
4310                                        REG_EQUAL,
4311                                        gen_rtx_DIV (compute_mode, op0, op1));
4312               }
4313             break;
4314           }
4315       fail1:
4316         delete_insns_since (last);
4317         break;
4318
4319       case FLOOR_DIV_EXPR:
4320       case FLOOR_MOD_EXPR:
4321       /* We will come here only for signed operations.  */
4322         if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4323           {
4324             unsigned HOST_WIDE_INT mh;
4325             int pre_shift, lgup, post_shift;
4326             HOST_WIDE_INT d = INTVAL (op1);
4327             rtx ml;
4328
4329             if (d > 0)
4330               {
4331                 /* We could just as easily deal with negative constants here,
4332                    but it does not seem worth the trouble for GCC 2.6.  */
4333                 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4334                   {
4335                     pre_shift = floor_log2 (d);
4336                     if (rem_flag)
4337                       {
4338                         remainder = expand_binop (compute_mode, and_optab, op0,
4339                                                   GEN_INT (((HOST_WIDE_INT) 1 << pre_shift) - 1),
4340                                                   remainder, 0, OPTAB_LIB_WIDEN);
4341                         if (remainder)
4342                           return gen_lowpart (mode, remainder);
4343                       }
4344                     quotient = expand_shift
4345                       (RSHIFT_EXPR, compute_mode, op0,
4346                        build_int_cst (NULL_TREE, pre_shift),
4347                        tquotient, 0);
4348                   }
4349                 else
4350                   {
4351                     rtx t1, t2, t3, t4;
4352
4353                     mh = choose_multiplier (d, size, size - 1,
4354                                             &ml, &post_shift, &lgup);
4355                     gcc_assert (!mh);
4356
4357                     if (post_shift < BITS_PER_WORD
4358                         && size - 1 < BITS_PER_WORD)
4359                       {
4360                         t1 = expand_shift
4361                           (RSHIFT_EXPR, compute_mode, op0,
4362                            build_int_cst (NULL_TREE, size - 1),
4363                            NULL_RTX, 0);
4364                         t2 = expand_binop (compute_mode, xor_optab, op0, t1,
4365                                            NULL_RTX, 0, OPTAB_WIDEN);
4366                         extra_cost = (shift_cost[compute_mode][post_shift]
4367                                       + shift_cost[compute_mode][size - 1]
4368                                       + 2 * add_cost[compute_mode]);
4369                         t3 = expand_mult_highpart (compute_mode, t2, ml,
4370                                                    NULL_RTX, 1,
4371                                                    max_cost - extra_cost);
4372                         if (t3 != 0)
4373                           {
4374                             t4 = expand_shift
4375                               (RSHIFT_EXPR, compute_mode, t3,
4376                                build_int_cst (NULL_TREE, post_shift),
4377                                NULL_RTX, 1);
4378                             quotient = expand_binop (compute_mode, xor_optab,
4379                                                      t4, t1, tquotient, 0,
4380                                                      OPTAB_WIDEN);
4381                           }
4382                       }
4383                   }
4384               }
4385             else
4386               {
4387                 rtx nsign, t1, t2, t3, t4;
4388                 t1 = force_operand (gen_rtx_PLUS (compute_mode,
4389                                                   op0, constm1_rtx), NULL_RTX);
4390                 t2 = expand_binop (compute_mode, ior_optab, op0, t1, NULL_RTX,
4391                                    0, OPTAB_WIDEN);
4392                 nsign = expand_shift
4393                   (RSHIFT_EXPR, compute_mode, t2,
4394                    build_int_cst (NULL_TREE, size - 1),
4395                    NULL_RTX, 0);
4396                 t3 = force_operand (gen_rtx_MINUS (compute_mode, t1, nsign),
4397                                     NULL_RTX);
4398                 t4 = expand_divmod (0, TRUNC_DIV_EXPR, compute_mode, t3, op1,
4399                                     NULL_RTX, 0);
4400                 if (t4)
4401                   {
4402                     rtx t5;
4403                     t5 = expand_unop (compute_mode, one_cmpl_optab, nsign,
4404                                       NULL_RTX, 0);
4405                     quotient = force_operand (gen_rtx_PLUS (compute_mode,
4406                                                             t4, t5),
4407                                               tquotient);
4408                   }
4409               }
4410           }
4411
4412         if (quotient != 0)
4413           break;
4414         delete_insns_since (last);
4415
4416         /* Try using an instruction that produces both the quotient and
4417            remainder, using truncation.  We can easily compensate the quotient
4418            or remainder to get floor rounding, once we have the remainder.
4419            Notice that we compute also the final remainder value here,
4420            and return the result right away.  */
4421         if (target == 0 || GET_MODE (target) != compute_mode)
4422           target = gen_reg_rtx (compute_mode);
4423
4424         if (rem_flag)
4425           {
4426             remainder
4427               = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4428             quotient = gen_reg_rtx (compute_mode);
4429           }
4430         else
4431           {
4432             quotient
4433               = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4434             remainder = gen_reg_rtx (compute_mode);
4435           }
4436
4437         if (expand_twoval_binop (sdivmod_optab, op0, op1,
4438                                  quotient, remainder, 0))
4439           {
4440             /* This could be computed with a branch-less sequence.
4441                Save that for later.  */
4442             rtx tem;
4443             rtx label = gen_label_rtx ();
4444             do_cmp_and_jump (remainder, const0_rtx, EQ, compute_mode, label);
4445             tem = expand_binop (compute_mode, xor_optab, op0, op1,
4446                                 NULL_RTX, 0, OPTAB_WIDEN);
4447             do_cmp_and_jump (tem, const0_rtx, GE, compute_mode, label);
4448             expand_dec (quotient, const1_rtx);
4449             expand_inc (remainder, op1);
4450             emit_label (label);
4451             return gen_lowpart (mode, rem_flag ? remainder : quotient);
4452           }
4453
4454         /* No luck with division elimination or divmod.  Have to do it
4455            by conditionally adjusting op0 *and* the result.  */
4456         {
4457           rtx label1, label2, label3, label4, label5;
4458           rtx adjusted_op0;
4459           rtx tem;
4460
4461           quotient = gen_reg_rtx (compute_mode);
4462           adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4463           label1 = gen_label_rtx ();
4464           label2 = gen_label_rtx ();
4465           label3 = gen_label_rtx ();
4466           label4 = gen_label_rtx ();
4467           label5 = gen_label_rtx ();
4468           do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4469           do_cmp_and_jump (adjusted_op0, const0_rtx, LT, compute_mode, label1);
4470           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4471                               quotient, 0, OPTAB_LIB_WIDEN);
4472           if (tem != quotient)
4473             emit_move_insn (quotient, tem);
4474           emit_jump_insn (gen_jump (label5));
4475           emit_barrier ();
4476           emit_label (label1);
4477           expand_inc (adjusted_op0, const1_rtx);
4478           emit_jump_insn (gen_jump (label4));
4479           emit_barrier ();
4480           emit_label (label2);
4481           do_cmp_and_jump (adjusted_op0, const0_rtx, GT, compute_mode, label3);
4482           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4483                               quotient, 0, OPTAB_LIB_WIDEN);
4484           if (tem != quotient)
4485             emit_move_insn (quotient, tem);
4486           emit_jump_insn (gen_jump (label5));
4487           emit_barrier ();
4488           emit_label (label3);
4489           expand_dec (adjusted_op0, const1_rtx);
4490           emit_label (label4);
4491           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4492                               quotient, 0, OPTAB_LIB_WIDEN);
4493           if (tem != quotient)
4494             emit_move_insn (quotient, tem);
4495           expand_dec (quotient, const1_rtx);
4496           emit_label (label5);
4497         }
4498         break;
4499
4500       case CEIL_DIV_EXPR:
4501       case CEIL_MOD_EXPR:
4502         if (unsignedp)
4503           {
4504             if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1)))
4505               {
4506                 rtx t1, t2, t3;
4507                 unsigned HOST_WIDE_INT d = INTVAL (op1);
4508                 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4509                                    build_int_cst (NULL_TREE, floor_log2 (d)),
4510                                    tquotient, 1);
4511                 t2 = expand_binop (compute_mode, and_optab, op0,
4512                                    GEN_INT (d - 1),
4513                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4514                 t3 = gen_reg_rtx (compute_mode);
4515                 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4516                                       compute_mode, 1, 1);
4517                 if (t3 == 0)
4518                   {
4519                     rtx lab;
4520                     lab = gen_label_rtx ();
4521                     do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4522                     expand_inc (t1, const1_rtx);
4523                     emit_label (lab);
4524                     quotient = t1;
4525                   }
4526                 else
4527                   quotient = force_operand (gen_rtx_PLUS (compute_mode,
4528                                                           t1, t3),
4529                                             tquotient);
4530                 break;
4531               }
4532
4533             /* Try using an instruction that produces both the quotient and
4534                remainder, using truncation.  We can easily compensate the
4535                quotient or remainder to get ceiling rounding, once we have the
4536                remainder.  Notice that we compute also the final remainder
4537                value here, and return the result right away.  */
4538             if (target == 0 || GET_MODE (target) != compute_mode)
4539               target = gen_reg_rtx (compute_mode);
4540
4541             if (rem_flag)
4542               {
4543                 remainder = (REG_P (target)
4544                              ? target : gen_reg_rtx (compute_mode));
4545                 quotient = gen_reg_rtx (compute_mode);
4546               }
4547             else
4548               {
4549                 quotient = (REG_P (target)
4550                             ? target : gen_reg_rtx (compute_mode));
4551                 remainder = gen_reg_rtx (compute_mode);
4552               }
4553
4554             if (expand_twoval_binop (udivmod_optab, op0, op1, quotient,
4555                                      remainder, 1))
4556               {
4557                 /* This could be computed with a branch-less sequence.
4558                    Save that for later.  */
4559                 rtx label = gen_label_rtx ();
4560                 do_cmp_and_jump (remainder, const0_rtx, EQ,
4561                                  compute_mode, label);
4562                 expand_inc (quotient, const1_rtx);
4563                 expand_dec (remainder, op1);
4564                 emit_label (label);
4565                 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4566               }
4567
4568             /* No luck with division elimination or divmod.  Have to do it
4569                by conditionally adjusting op0 *and* the result.  */
4570             {
4571               rtx label1, label2;
4572               rtx adjusted_op0, tem;
4573
4574               quotient = gen_reg_rtx (compute_mode);
4575               adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4576               label1 = gen_label_rtx ();
4577               label2 = gen_label_rtx ();
4578               do_cmp_and_jump (adjusted_op0, const0_rtx, NE,
4579                                compute_mode, label1);
4580               emit_move_insn  (quotient, const0_rtx);
4581               emit_jump_insn (gen_jump (label2));
4582               emit_barrier ();
4583               emit_label (label1);
4584               expand_dec (adjusted_op0, const1_rtx);
4585               tem = expand_binop (compute_mode, udiv_optab, adjusted_op0, op1,
4586                                   quotient, 1, OPTAB_LIB_WIDEN);
4587               if (tem != quotient)
4588                 emit_move_insn (quotient, tem);
4589               expand_inc (quotient, const1_rtx);
4590               emit_label (label2);
4591             }
4592           }
4593         else /* signed */
4594           {
4595             if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4596                 && INTVAL (op1) >= 0)
4597               {
4598                 /* This is extremely similar to the code for the unsigned case
4599                    above.  For 2.7 we should merge these variants, but for
4600                    2.6.1 I don't want to touch the code for unsigned since that
4601                    get used in C.  The signed case will only be used by other
4602                    languages (Ada).  */
4603
4604                 rtx t1, t2, t3;
4605                 unsigned HOST_WIDE_INT d = INTVAL (op1);
4606                 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4607                                    build_int_cst (NULL_TREE, floor_log2 (d)),
4608                                    tquotient, 0);
4609                 t2 = expand_binop (compute_mode, and_optab, op0,
4610                                    GEN_INT (d - 1),
4611                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4612                 t3 = gen_reg_rtx (compute_mode);
4613                 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4614                                       compute_mode, 1, 1);
4615                 if (t3 == 0)
4616                   {
4617                     rtx lab;
4618                     lab = gen_label_rtx ();
4619                     do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4620                     expand_inc (t1, const1_rtx);
4621                     emit_label (lab);
4622                     quotient = t1;
4623                   }
4624                 else
4625                   quotient = force_operand (gen_rtx_PLUS (compute_mode,
4626                                                           t1, t3),
4627                                             tquotient);
4628                 break;
4629               }
4630
4631             /* Try using an instruction that produces both the quotient and
4632                remainder, using truncation.  We can easily compensate the
4633                quotient or remainder to get ceiling rounding, once we have the
4634                remainder.  Notice that we compute also the final remainder
4635                value here, and return the result right away.  */
4636             if (target == 0 || GET_MODE (target) != compute_mode)
4637               target = gen_reg_rtx (compute_mode);
4638             if (rem_flag)
4639               {
4640                 remainder= (REG_P (target)
4641                             ? target : gen_reg_rtx (compute_mode));
4642                 quotient = gen_reg_rtx (compute_mode);
4643               }
4644             else
4645               {
4646                 quotient = (REG_P (target)
4647                             ? target : gen_reg_rtx (compute_mode));
4648                 remainder = gen_reg_rtx (compute_mode);
4649               }
4650
4651             if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient,
4652                                      remainder, 0))
4653               {
4654                 /* This could be computed with a branch-less sequence.
4655                    Save that for later.  */
4656                 rtx tem;
4657                 rtx label = gen_label_rtx ();
4658                 do_cmp_and_jump (remainder, const0_rtx, EQ,
4659                                  compute_mode, label);
4660                 tem = expand_binop (compute_mode, xor_optab, op0, op1,
4661                                     NULL_RTX, 0, OPTAB_WIDEN);
4662                 do_cmp_and_jump (tem, const0_rtx, LT, compute_mode, label);
4663                 expand_inc (quotient, const1_rtx);
4664                 expand_dec (remainder, op1);
4665                 emit_label (label);
4666                 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4667               }
4668
4669             /* No luck with division elimination or divmod.  Have to do it
4670                by conditionally adjusting op0 *and* the result.  */
4671             {
4672               rtx label1, label2, label3, label4, label5;
4673               rtx adjusted_op0;
4674               rtx tem;
4675
4676               quotient = gen_reg_rtx (compute_mode);
4677               adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4678               label1 = gen_label_rtx ();
4679               label2 = gen_label_rtx ();
4680               label3 = gen_label_rtx ();
4681               label4 = gen_label_rtx ();
4682               label5 = gen_label_rtx ();
4683               do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4684               do_cmp_and_jump (adjusted_op0, const0_rtx, GT,
4685                                compute_mode, label1);
4686               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4687                                   quotient, 0, OPTAB_LIB_WIDEN);
4688               if (tem != quotient)
4689                 emit_move_insn (quotient, tem);
4690               emit_jump_insn (gen_jump (label5));
4691               emit_barrier ();
4692               emit_label (label1);
4693               expand_dec (adjusted_op0, const1_rtx);
4694               emit_jump_insn (gen_jump (label4));
4695               emit_barrier ();
4696               emit_label (label2);
4697               do_cmp_and_jump (adjusted_op0, const0_rtx, LT,
4698                                compute_mode, label3);
4699               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4700                                   quotient, 0, OPTAB_LIB_WIDEN);
4701               if (tem != quotient)
4702                 emit_move_insn (quotient, tem);
4703               emit_jump_insn (gen_jump (label5));
4704               emit_barrier ();
4705               emit_label (label3);
4706               expand_inc (adjusted_op0, const1_rtx);
4707               emit_label (label4);
4708               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4709                                   quotient, 0, OPTAB_LIB_WIDEN);
4710               if (tem != quotient)
4711                 emit_move_insn (quotient, tem);
4712               expand_inc (quotient, const1_rtx);
4713               emit_label (label5);
4714             }
4715           }
4716         break;
4717
4718       case EXACT_DIV_EXPR:
4719         if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4720           {
4721             HOST_WIDE_INT d = INTVAL (op1);
4722             unsigned HOST_WIDE_INT ml;
4723             int pre_shift;
4724             rtx t1;
4725
4726             pre_shift = floor_log2 (d & -d);
4727             ml = invert_mod2n (d >> pre_shift, size);
4728             t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4729                                build_int_cst (NULL_TREE, pre_shift),
4730                                NULL_RTX, unsignedp);
4731             quotient = expand_mult (compute_mode, t1,
4732                                     gen_int_mode (ml, compute_mode),
4733                                     NULL_RTX, 1);
4734
4735             insn = get_last_insn ();
4736             set_unique_reg_note (insn,
4737                                  REG_EQUAL,
4738                                  gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
4739                                                  compute_mode,
4740                                                  op0, op1));
4741           }
4742         break;
4743
4744       case ROUND_DIV_EXPR:
4745       case ROUND_MOD_EXPR:
4746         if (unsignedp)
4747           {
4748             rtx tem;
4749             rtx label;
4750             label = gen_label_rtx ();
4751             quotient = gen_reg_rtx (compute_mode);
4752             remainder = gen_reg_rtx (compute_mode);
4753             if (expand_twoval_binop (udivmod_optab, op0, op1, quotient, remainder, 1) == 0)
4754               {
4755                 rtx tem;
4756                 quotient = expand_binop (compute_mode, udiv_optab, op0, op1,
4757                                          quotient, 1, OPTAB_LIB_WIDEN);
4758                 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 1);
4759                 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4760                                           remainder, 1, OPTAB_LIB_WIDEN);
4761               }
4762             tem = plus_constant (op1, -1);
4763             tem = expand_shift (RSHIFT_EXPR, compute_mode, tem,
4764                                 build_int_cst (NULL_TREE, 1),
4765                                 NULL_RTX, 1);
4766             do_cmp_and_jump (remainder, tem, LEU, compute_mode, label);
4767             expand_inc (quotient, const1_rtx);
4768             expand_dec (remainder, op1);
4769             emit_label (label);
4770           }
4771         else
4772           {
4773             rtx abs_rem, abs_op1, tem, mask;
4774             rtx label;
4775             label = gen_label_rtx ();
4776             quotient = gen_reg_rtx (compute_mode);
4777             remainder = gen_reg_rtx (compute_mode);
4778             if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient, remainder, 0) == 0)
4779               {
4780                 rtx tem;
4781                 quotient = expand_binop (compute_mode, sdiv_optab, op0, op1,
4782                                          quotient, 0, OPTAB_LIB_WIDEN);
4783                 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 0);
4784                 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4785                                           remainder, 0, OPTAB_LIB_WIDEN);
4786               }
4787             abs_rem = expand_abs (compute_mode, remainder, NULL_RTX, 1, 0);
4788             abs_op1 = expand_abs (compute_mode, op1, NULL_RTX, 1, 0);
4789             tem = expand_shift (LSHIFT_EXPR, compute_mode, abs_rem,
4790                                 build_int_cst (NULL_TREE, 1),
4791                                 NULL_RTX, 1);
4792             do_cmp_and_jump (tem, abs_op1, LTU, compute_mode, label);
4793             tem = expand_binop (compute_mode, xor_optab, op0, op1,
4794                                 NULL_RTX, 0, OPTAB_WIDEN);
4795             mask = expand_shift (RSHIFT_EXPR, compute_mode, tem,
4796                                  build_int_cst (NULL_TREE, size - 1),
4797                                  NULL_RTX, 0);
4798             tem = expand_binop (compute_mode, xor_optab, mask, const1_rtx,
4799                                 NULL_RTX, 0, OPTAB_WIDEN);
4800             tem = expand_binop (compute_mode, sub_optab, tem, mask,
4801                                 NULL_RTX, 0, OPTAB_WIDEN);
4802             expand_inc (quotient, tem);
4803             tem = expand_binop (compute_mode, xor_optab, mask, op1,
4804                                 NULL_RTX, 0, OPTAB_WIDEN);
4805             tem = expand_binop (compute_mode, sub_optab, tem, mask,
4806                                 NULL_RTX, 0, OPTAB_WIDEN);
4807             expand_dec (remainder, tem);
4808             emit_label (label);
4809           }
4810         return gen_lowpart (mode, rem_flag ? remainder : quotient);
4811
4812       default:
4813         gcc_unreachable ();
4814       }
4815
4816   if (quotient == 0)
4817     {
4818       if (target && GET_MODE (target) != compute_mode)
4819         target = 0;
4820
4821       if (rem_flag)
4822         {
4823           /* Try to produce the remainder without producing the quotient.
4824              If we seem to have a divmod pattern that does not require widening,
4825              don't try widening here.  We should really have a WIDEN argument
4826              to expand_twoval_binop, since what we'd really like to do here is
4827              1) try a mod insn in compute_mode
4828              2) try a divmod insn in compute_mode
4829              3) try a div insn in compute_mode and multiply-subtract to get
4830                 remainder
4831              4) try the same things with widening allowed.  */
4832           remainder
4833             = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4834                                  op0, op1, target,
4835                                  unsignedp,
4836                                  ((optab2->handlers[compute_mode].insn_code
4837                                    != CODE_FOR_nothing)
4838                                   ? OPTAB_DIRECT : OPTAB_WIDEN));
4839           if (remainder == 0)
4840             {
4841               /* No luck there.  Can we do remainder and divide at once
4842                  without a library call?  */
4843               remainder = gen_reg_rtx (compute_mode);
4844               if (! expand_twoval_binop ((unsignedp
4845                                           ? udivmod_optab
4846                                           : sdivmod_optab),
4847                                          op0, op1,
4848                                          NULL_RTX, remainder, unsignedp))
4849                 remainder = 0;
4850             }
4851
4852           if (remainder)
4853             return gen_lowpart (mode, remainder);
4854         }
4855
4856       /* Produce the quotient.  Try a quotient insn, but not a library call.
4857          If we have a divmod in this mode, use it in preference to widening
4858          the div (for this test we assume it will not fail). Note that optab2
4859          is set to the one of the two optabs that the call below will use.  */
4860       quotient
4861         = sign_expand_binop (compute_mode, udiv_optab, sdiv_optab,
4862                              op0, op1, rem_flag ? NULL_RTX : target,
4863                              unsignedp,
4864                              ((optab2->handlers[compute_mode].insn_code
4865                                != CODE_FOR_nothing)
4866                               ? OPTAB_DIRECT : OPTAB_WIDEN));
4867
4868       if (quotient == 0)
4869         {
4870           /* No luck there.  Try a quotient-and-remainder insn,
4871              keeping the quotient alone.  */
4872           quotient = gen_reg_rtx (compute_mode);
4873           if (! expand_twoval_binop (unsignedp ? udivmod_optab : sdivmod_optab,
4874                                      op0, op1,
4875                                      quotient, NULL_RTX, unsignedp))
4876             {
4877               quotient = 0;
4878               if (! rem_flag)
4879                 /* Still no luck.  If we are not computing the remainder,
4880                    use a library call for the quotient.  */
4881                 quotient = sign_expand_binop (compute_mode,
4882                                               udiv_optab, sdiv_optab,
4883                                               op0, op1, target,
4884                                               unsignedp, OPTAB_LIB_WIDEN);
4885             }
4886         }
4887     }
4888
4889   if (rem_flag)
4890     {
4891       if (target && GET_MODE (target) != compute_mode)
4892         target = 0;
4893
4894       if (quotient == 0)
4895         {
4896           /* No divide instruction either.  Use library for remainder.  */
4897           remainder = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4898                                          op0, op1, target,
4899                                          unsignedp, OPTAB_LIB_WIDEN);
4900           /* No remainder function.  Try a quotient-and-remainder
4901              function, keeping the remainder.  */
4902           if (!remainder)
4903             {
4904               remainder = gen_reg_rtx (compute_mode);
4905               if (!expand_twoval_binop_libfunc
4906                   (unsignedp ? udivmod_optab : sdivmod_optab,
4907                    op0, op1,
4908                    NULL_RTX, remainder,
4909                    unsignedp ? UMOD : MOD))
4910                 remainder = NULL_RTX;
4911             }
4912         }
4913       else
4914         {
4915           /* We divided.  Now finish doing X - Y * (X / Y).  */
4916           remainder = expand_mult (compute_mode, quotient, op1,
4917                                    NULL_RTX, unsignedp);
4918           remainder = expand_binop (compute_mode, sub_optab, op0,
4919                                     remainder, target, unsignedp,
4920                                     OPTAB_LIB_WIDEN);
4921         }
4922     }
4923
4924   return gen_lowpart (mode, rem_flag ? remainder : quotient);
4925 }
4926 \f
4927 /* Return a tree node with data type TYPE, describing the value of X.
4928    Usually this is an VAR_DECL, if there is no obvious better choice.
4929    X may be an expression, however we only support those expressions
4930    generated by loop.c.  */
4931
4932 tree
4933 make_tree (tree type, rtx x)
4934 {
4935   tree t;
4936
4937   switch (GET_CODE (x))
4938     {
4939     case CONST_INT:
4940       {
4941         HOST_WIDE_INT hi = 0;
4942
4943         if (INTVAL (x) < 0
4944             && !(TYPE_UNSIGNED (type)
4945                  && (GET_MODE_BITSIZE (TYPE_MODE (type))
4946                      < HOST_BITS_PER_WIDE_INT)))
4947           hi = -1;
4948
4949         t = build_int_cst_wide (type, INTVAL (x), hi);
4950
4951         return t;
4952       }
4953
4954     case CONST_DOUBLE:
4955       if (GET_MODE (x) == VOIDmode)
4956         t = build_int_cst_wide (type,
4957                                 CONST_DOUBLE_LOW (x), CONST_DOUBLE_HIGH (x));
4958       else
4959         {
4960           REAL_VALUE_TYPE d;
4961
4962           REAL_VALUE_FROM_CONST_DOUBLE (d, x);
4963           t = build_real (type, d);
4964         }
4965
4966       return t;
4967
4968     case CONST_VECTOR:
4969       {
4970         int i, units;
4971         rtx elt;
4972         tree t = NULL_TREE;
4973
4974         units = CONST_VECTOR_NUNITS (x);
4975
4976         /* Build a tree with vector elements.  */
4977         for (i = units - 1; i >= 0; --i)
4978           {
4979             elt = CONST_VECTOR_ELT (x, i);
4980             t = tree_cons (NULL_TREE, make_tree (type, elt), t);
4981           }
4982
4983         return build_vector (type, t);
4984       }
4985
4986     case PLUS:
4987       return fold_build2 (PLUS_EXPR, type, make_tree (type, XEXP (x, 0)),
4988                           make_tree (type, XEXP (x, 1)));
4989
4990     case MINUS:
4991       return fold_build2 (MINUS_EXPR, type, make_tree (type, XEXP (x, 0)),
4992                           make_tree (type, XEXP (x, 1)));
4993
4994     case NEG:
4995       return fold_build1 (NEGATE_EXPR, type, make_tree (type, XEXP (x, 0)));
4996
4997     case MULT:
4998       return fold_build2 (MULT_EXPR, type, make_tree (type, XEXP (x, 0)),
4999                           make_tree (type, XEXP (x, 1)));
5000
5001     case ASHIFT:
5002       return fold_build2 (LSHIFT_EXPR, type, make_tree (type, XEXP (x, 0)),
5003                           make_tree (type, XEXP (x, 1)));
5004
5005     case LSHIFTRT:
5006       t = lang_hooks.types.unsigned_type (type);
5007       return fold_convert (type, build2 (RSHIFT_EXPR, t,
5008                                          make_tree (t, XEXP (x, 0)),
5009                                          make_tree (type, XEXP (x, 1))));
5010
5011     case ASHIFTRT:
5012       t = lang_hooks.types.signed_type (type);
5013       return fold_convert (type, build2 (RSHIFT_EXPR, t,
5014                                          make_tree (t, XEXP (x, 0)),
5015                                          make_tree (type, XEXP (x, 1))));
5016
5017     case DIV:
5018       if (TREE_CODE (type) != REAL_TYPE)
5019         t = lang_hooks.types.signed_type (type);
5020       else
5021         t = type;
5022
5023       return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5024                                          make_tree (t, XEXP (x, 0)),
5025                                          make_tree (t, XEXP (x, 1))));
5026     case UDIV:
5027       t = lang_hooks.types.unsigned_type (type);
5028       return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5029                                          make_tree (t, XEXP (x, 0)),
5030                                          make_tree (t, XEXP (x, 1))));
5031
5032     case SIGN_EXTEND:
5033     case ZERO_EXTEND:
5034       t = lang_hooks.types.type_for_mode (GET_MODE (XEXP (x, 0)),
5035                                           GET_CODE (x) == ZERO_EXTEND);
5036       return fold_convert (type, make_tree (t, XEXP (x, 0)));
5037
5038     default:
5039       t = build_decl (VAR_DECL, NULL_TREE, type);
5040
5041       /* If TYPE is a POINTER_TYPE, X might be Pmode with TYPE_MODE being
5042          ptr_mode.  So convert.  */
5043       if (POINTER_TYPE_P (type))
5044         x = convert_memory_address (TYPE_MODE (type), x);
5045
5046       /* Note that we do *not* use SET_DECL_RTL here, because we do not
5047          want set_decl_rtl to go adjusting REG_ATTRS for this temporary.  */
5048       t->decl_with_rtl.rtl = x;
5049
5050       return t;
5051     }
5052 }
5053 \f
5054 /* Compute the logical-and of OP0 and OP1, storing it in TARGET
5055    and returning TARGET.
5056
5057    If TARGET is 0, a pseudo-register or constant is returned.  */
5058
5059 rtx
5060 expand_and (enum machine_mode mode, rtx op0, rtx op1, rtx target)
5061 {
5062   rtx tem = 0;
5063
5064   if (GET_MODE (op0) == VOIDmode && GET_MODE (op1) == VOIDmode)
5065     tem = simplify_binary_operation (AND, mode, op0, op1);
5066   if (tem == 0)
5067     tem = expand_binop (mode, and_optab, op0, op1, target, 0, OPTAB_LIB_WIDEN);
5068
5069   if (target == 0)
5070     target = tem;
5071   else if (tem != target)
5072     emit_move_insn (target, tem);
5073   return target;
5074 }
5075 \f
5076 /* Emit a store-flags instruction for comparison CODE on OP0 and OP1
5077    and storing in TARGET.  Normally return TARGET.
5078    Return 0 if that cannot be done.
5079
5080    MODE is the mode to use for OP0 and OP1 should they be CONST_INTs.  If
5081    it is VOIDmode, they cannot both be CONST_INT.
5082
5083    UNSIGNEDP is for the case where we have to widen the operands
5084    to perform the operation.  It says to use zero-extension.
5085
5086    NORMALIZEP is 1 if we should convert the result to be either zero
5087    or one.  Normalize is -1 if we should convert the result to be
5088    either zero or -1.  If NORMALIZEP is zero, the result will be left
5089    "raw" out of the scc insn.  */
5090
5091 rtx
5092 emit_store_flag (rtx target, enum rtx_code code, rtx op0, rtx op1,
5093                  enum machine_mode mode, int unsignedp, int normalizep)
5094 {
5095   rtx subtarget;
5096   enum insn_code icode;
5097   enum machine_mode compare_mode;
5098   enum machine_mode target_mode = GET_MODE (target);
5099   rtx tem;
5100   rtx last = get_last_insn ();
5101   rtx pattern, comparison;
5102
5103   if (unsignedp)
5104     code = unsigned_condition (code);
5105
5106   /* If one operand is constant, make it the second one.  Only do this
5107      if the other operand is not constant as well.  */
5108
5109   if (swap_commutative_operands_p (op0, op1))
5110     {
5111       tem = op0;
5112       op0 = op1;
5113       op1 = tem;
5114       code = swap_condition (code);
5115     }
5116
5117   if (mode == VOIDmode)
5118     mode = GET_MODE (op0);
5119
5120   /* For some comparisons with 1 and -1, we can convert this to
5121      comparisons with zero.  This will often produce more opportunities for
5122      store-flag insns.  */
5123
5124   switch (code)
5125     {
5126     case LT:
5127       if (op1 == const1_rtx)
5128         op1 = const0_rtx, code = LE;
5129       break;
5130     case LE:
5131       if (op1 == constm1_rtx)
5132         op1 = const0_rtx, code = LT;
5133       break;
5134     case GE:
5135       if (op1 == const1_rtx)
5136         op1 = const0_rtx, code = GT;
5137       break;
5138     case GT:
5139       if (op1 == constm1_rtx)
5140         op1 = const0_rtx, code = GE;
5141       break;
5142     case GEU:
5143       if (op1 == const1_rtx)
5144         op1 = const0_rtx, code = NE;
5145       break;
5146     case LTU:
5147       if (op1 == const1_rtx)
5148         op1 = const0_rtx, code = EQ;
5149       break;
5150     default:
5151       break;
5152     }
5153
5154   /* If we are comparing a double-word integer with zero or -1, we can
5155      convert the comparison into one involving a single word.  */
5156   if (GET_MODE_BITSIZE (mode) == BITS_PER_WORD * 2
5157       && GET_MODE_CLASS (mode) == MODE_INT
5158       && (!MEM_P (op0) || ! MEM_VOLATILE_P (op0)))
5159     {
5160       if ((code == EQ || code == NE)
5161           && (op1 == const0_rtx || op1 == constm1_rtx))
5162         {
5163           rtx op00, op01, op0both;
5164
5165           /* Do a logical OR or AND of the two words and compare the result.  */
5166           op00 = simplify_gen_subreg (word_mode, op0, mode, 0);
5167           op01 = simplify_gen_subreg (word_mode, op0, mode, UNITS_PER_WORD);
5168           op0both = expand_binop (word_mode,
5169                                   op1 == const0_rtx ? ior_optab : and_optab,
5170                                   op00, op01, NULL_RTX, unsignedp, OPTAB_DIRECT);
5171
5172           if (op0both != 0)
5173             return emit_store_flag (target, code, op0both, op1, word_mode,
5174                                     unsignedp, normalizep);
5175         }
5176       else if ((code == LT || code == GE) && op1 == const0_rtx)
5177         {
5178           rtx op0h;
5179
5180           /* If testing the sign bit, can just test on high word.  */
5181           op0h = simplify_gen_subreg (word_mode, op0, mode,
5182                                       subreg_highpart_offset (word_mode, mode));
5183           return emit_store_flag (target, code, op0h, op1, word_mode,
5184                                   unsignedp, normalizep);
5185         }
5186     }
5187
5188   /* From now on, we won't change CODE, so set ICODE now.  */
5189   icode = setcc_gen_code[(int) code];
5190
5191   /* If this is A < 0 or A >= 0, we can do this by taking the ones
5192      complement of A (for GE) and shifting the sign bit to the low bit.  */
5193   if (op1 == const0_rtx && (code == LT || code == GE)
5194       && GET_MODE_CLASS (mode) == MODE_INT
5195       && (normalizep || STORE_FLAG_VALUE == 1
5196           || (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT
5197               && ((STORE_FLAG_VALUE & GET_MODE_MASK (mode))
5198                   == (unsigned HOST_WIDE_INT) 1 << (GET_MODE_BITSIZE (mode) - 1)))))
5199     {
5200       subtarget = target;
5201
5202       /* If the result is to be wider than OP0, it is best to convert it
5203          first.  If it is to be narrower, it is *incorrect* to convert it
5204          first.  */
5205       if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (mode))
5206         {
5207           op0 = convert_modes (target_mode, mode, op0, 0);
5208           mode = target_mode;
5209         }
5210
5211       if (target_mode != mode)
5212         subtarget = 0;
5213
5214       if (code == GE)
5215         op0 = expand_unop (mode, one_cmpl_optab, op0,
5216                            ((STORE_FLAG_VALUE == 1 || normalizep)
5217                             ? 0 : subtarget), 0);
5218
5219       if (STORE_FLAG_VALUE == 1 || normalizep)
5220         /* If we are supposed to produce a 0/1 value, we want to do
5221            a logical shift from the sign bit to the low-order bit; for
5222            a -1/0 value, we do an arithmetic shift.  */
5223         op0 = expand_shift (RSHIFT_EXPR, mode, op0,
5224                             size_int (GET_MODE_BITSIZE (mode) - 1),
5225                             subtarget, normalizep != -1);
5226
5227       if (mode != target_mode)
5228         op0 = convert_modes (target_mode, mode, op0, 0);
5229
5230       return op0;
5231     }
5232
5233   if (icode != CODE_FOR_nothing)
5234     {
5235       insn_operand_predicate_fn pred;
5236
5237       /* We think we may be able to do this with a scc insn.  Emit the
5238          comparison and then the scc insn.  */
5239
5240       do_pending_stack_adjust ();
5241       last = get_last_insn ();
5242
5243       comparison
5244         = compare_from_rtx (op0, op1, code, unsignedp, mode, NULL_RTX);
5245       if (CONSTANT_P (comparison))
5246         {
5247           switch (GET_CODE (comparison))
5248             {
5249             case CONST_INT:
5250               if (comparison == const0_rtx)
5251                 return const0_rtx;
5252               break;
5253
5254 #ifdef FLOAT_STORE_FLAG_VALUE
5255             case CONST_DOUBLE:
5256               if (comparison == CONST0_RTX (GET_MODE (comparison)))
5257                 return const0_rtx;
5258               break;
5259 #endif
5260             default:
5261               gcc_unreachable ();
5262             }
5263
5264           if (normalizep == 1)
5265             return const1_rtx;
5266           if (normalizep == -1)
5267             return constm1_rtx;
5268           return const_true_rtx;
5269         }
5270
5271       /* The code of COMPARISON may not match CODE if compare_from_rtx
5272          decided to swap its operands and reverse the original code.
5273
5274          We know that compare_from_rtx returns either a CONST_INT or
5275          a new comparison code, so it is safe to just extract the
5276          code from COMPARISON.  */
5277       code = GET_CODE (comparison);
5278
5279       /* Get a reference to the target in the proper mode for this insn.  */
5280       compare_mode = insn_data[(int) icode].operand[0].mode;
5281       subtarget = target;
5282       pred = insn_data[(int) icode].operand[0].predicate;
5283       if (optimize || ! (*pred) (subtarget, compare_mode))
5284         subtarget = gen_reg_rtx (compare_mode);
5285
5286       pattern = GEN_FCN (icode) (subtarget);
5287       if (pattern)
5288         {
5289           emit_insn (pattern);
5290
5291           /* If we are converting to a wider mode, first convert to
5292              TARGET_MODE, then normalize.  This produces better combining
5293              opportunities on machines that have a SIGN_EXTRACT when we are
5294              testing a single bit.  This mostly benefits the 68k.
5295
5296              If STORE_FLAG_VALUE does not have the sign bit set when
5297              interpreted in COMPARE_MODE, we can do this conversion as
5298              unsigned, which is usually more efficient.  */
5299           if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (compare_mode))
5300             {
5301               convert_move (target, subtarget,
5302                             (GET_MODE_BITSIZE (compare_mode)
5303                              <= HOST_BITS_PER_WIDE_INT)
5304                             && 0 == (STORE_FLAG_VALUE
5305                                      & ((HOST_WIDE_INT) 1
5306                                         << (GET_MODE_BITSIZE (compare_mode) -1))));
5307               op0 = target;
5308               compare_mode = target_mode;
5309             }
5310           else
5311             op0 = subtarget;
5312
5313           /* If we want to keep subexpressions around, don't reuse our
5314              last target.  */
5315
5316           if (optimize)
5317             subtarget = 0;
5318
5319           /* Now normalize to the proper value in COMPARE_MODE.  Sometimes
5320              we don't have to do anything.  */
5321           if (normalizep == 0 || normalizep == STORE_FLAG_VALUE)
5322             ;
5323           /* STORE_FLAG_VALUE might be the most negative number, so write
5324              the comparison this way to avoid a compiler-time warning.  */
5325           else if (- normalizep == STORE_FLAG_VALUE)
5326             op0 = expand_unop (compare_mode, neg_optab, op0, subtarget, 0);
5327
5328           /* We don't want to use STORE_FLAG_VALUE < 0 below since this
5329              makes it hard to use a value of just the sign bit due to
5330              ANSI integer constant typing rules.  */
5331           else if (GET_MODE_BITSIZE (compare_mode) <= HOST_BITS_PER_WIDE_INT
5332                    && (STORE_FLAG_VALUE
5333                        & ((HOST_WIDE_INT) 1
5334                           << (GET_MODE_BITSIZE (compare_mode) - 1))))
5335             op0 = expand_shift (RSHIFT_EXPR, compare_mode, op0,
5336                                 size_int (GET_MODE_BITSIZE (compare_mode) - 1),
5337                                 subtarget, normalizep == 1);
5338           else
5339             {
5340               gcc_assert (STORE_FLAG_VALUE & 1);
5341
5342               op0 = expand_and (compare_mode, op0, const1_rtx, subtarget);
5343               if (normalizep == -1)
5344                 op0 = expand_unop (compare_mode, neg_optab, op0, op0, 0);
5345             }
5346
5347           /* If we were converting to a smaller mode, do the
5348              conversion now.  */
5349           if (target_mode != compare_mode)
5350             {
5351               convert_move (target, op0, 0);
5352               return target;
5353             }
5354           else
5355             return op0;
5356         }
5357     }
5358
5359   delete_insns_since (last);
5360
5361   /* If optimizing, use different pseudo registers for each insn, instead
5362      of reusing the same pseudo.  This leads to better CSE, but slows
5363      down the compiler, since there are more pseudos */
5364   subtarget = (!optimize
5365                && (target_mode == mode)) ? target : NULL_RTX;
5366
5367   /* If we reached here, we can't do this with a scc insn.  However, there
5368      are some comparisons that can be done directly.  For example, if
5369      this is an equality comparison of integers, we can try to exclusive-or
5370      (or subtract) the two operands and use a recursive call to try the
5371      comparison with zero.  Don't do any of these cases if branches are
5372      very cheap.  */
5373
5374   if (BRANCH_COST > 0
5375       && GET_MODE_CLASS (mode) == MODE_INT && (code == EQ || code == NE)
5376       && op1 != const0_rtx)
5377     {
5378       tem = expand_binop (mode, xor_optab, op0, op1, subtarget, 1,
5379                           OPTAB_WIDEN);
5380
5381       if (tem == 0)
5382         tem = expand_binop (mode, sub_optab, op0, op1, subtarget, 1,
5383                             OPTAB_WIDEN);
5384       if (tem != 0)
5385         tem = emit_store_flag (target, code, tem, const0_rtx,
5386                                mode, unsignedp, normalizep);
5387       if (tem == 0)
5388         delete_insns_since (last);
5389       return tem;
5390     }
5391
5392   /* Some other cases we can do are EQ, NE, LE, and GT comparisons with
5393      the constant zero.  Reject all other comparisons at this point.  Only
5394      do LE and GT if branches are expensive since they are expensive on
5395      2-operand machines.  */
5396
5397   if (BRANCH_COST == 0
5398       || GET_MODE_CLASS (mode) != MODE_INT || op1 != const0_rtx
5399       || (code != EQ && code != NE
5400           && (BRANCH_COST <= 1 || (code != LE && code != GT))))
5401     return 0;
5402
5403   /* See what we need to return.  We can only return a 1, -1, or the
5404      sign bit.  */
5405
5406   if (normalizep == 0)
5407     {
5408       if (STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1)
5409         normalizep = STORE_FLAG_VALUE;
5410
5411       else if (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT
5412                && ((STORE_FLAG_VALUE & GET_MODE_MASK (mode))
5413                    == (unsigned HOST_WIDE_INT) 1 << (GET_MODE_BITSIZE (mode) - 1)))
5414         ;
5415       else
5416         return 0;
5417     }
5418
5419   /* Try to put the result of the comparison in the sign bit.  Assume we can't
5420      do the necessary operation below.  */
5421
5422   tem = 0;
5423
5424   /* To see if A <= 0, compute (A | (A - 1)).  A <= 0 iff that result has
5425      the sign bit set.  */
5426
5427   if (code == LE)
5428     {
5429       /* This is destructive, so SUBTARGET can't be OP0.  */
5430       if (rtx_equal_p (subtarget, op0))
5431         subtarget = 0;
5432
5433       tem = expand_binop (mode, sub_optab, op0, const1_rtx, subtarget, 0,
5434                           OPTAB_WIDEN);
5435       if (tem)
5436         tem = expand_binop (mode, ior_optab, op0, tem, subtarget, 0,
5437                             OPTAB_WIDEN);
5438     }
5439
5440   /* To see if A > 0, compute (((signed) A) << BITS) - A, where BITS is the
5441      number of bits in the mode of OP0, minus one.  */
5442
5443   if (code == GT)
5444     {
5445       if (rtx_equal_p (subtarget, op0))
5446         subtarget = 0;
5447
5448       tem = expand_shift (RSHIFT_EXPR, mode, op0,
5449                           size_int (GET_MODE_BITSIZE (mode) - 1),
5450                           subtarget, 0);
5451       tem = expand_binop (mode, sub_optab, tem, op0, subtarget, 0,
5452                           OPTAB_WIDEN);
5453     }
5454
5455   if (code == EQ || code == NE)
5456     {
5457       /* For EQ or NE, one way to do the comparison is to apply an operation
5458          that converts the operand into a positive number if it is nonzero
5459          or zero if it was originally zero.  Then, for EQ, we subtract 1 and
5460          for NE we negate.  This puts the result in the sign bit.  Then we
5461          normalize with a shift, if needed.
5462
5463          Two operations that can do the above actions are ABS and FFS, so try
5464          them.  If that doesn't work, and MODE is smaller than a full word,
5465          we can use zero-extension to the wider mode (an unsigned conversion)
5466          as the operation.  */
5467
5468       /* Note that ABS doesn't yield a positive number for INT_MIN, but
5469          that is compensated by the subsequent overflow when subtracting
5470          one / negating.  */
5471
5472       if (abs_optab->handlers[mode].insn_code != CODE_FOR_nothing)
5473         tem = expand_unop (mode, abs_optab, op0, subtarget, 1);
5474       else if (ffs_optab->handlers[mode].insn_code != CODE_FOR_nothing)
5475         tem = expand_unop (mode, ffs_optab, op0, subtarget, 1);
5476       else if (GET_MODE_SIZE (mode) < UNITS_PER_WORD)
5477         {
5478           tem = convert_modes (word_mode, mode, op0, 1);
5479           mode = word_mode;
5480         }
5481
5482       if (tem != 0)
5483         {
5484           if (code == EQ)
5485             tem = expand_binop (mode, sub_optab, tem, const1_rtx, subtarget,
5486                                 0, OPTAB_WIDEN);
5487           else
5488             tem = expand_unop (mode, neg_optab, tem, subtarget, 0);
5489         }
5490
5491       /* If we couldn't do it that way, for NE we can "or" the two's complement
5492          of the value with itself.  For EQ, we take the one's complement of
5493          that "or", which is an extra insn, so we only handle EQ if branches
5494          are expensive.  */
5495
5496       if (tem == 0 && (code == NE || BRANCH_COST > 1))
5497         {
5498           if (rtx_equal_p (subtarget, op0))
5499             subtarget = 0;
5500
5501           tem = expand_unop (mode, neg_optab, op0, subtarget, 0);
5502           tem = expand_binop (mode, ior_optab, tem, op0, subtarget, 0,
5503                               OPTAB_WIDEN);
5504
5505           if (tem && code == EQ)
5506             tem = expand_unop (mode, one_cmpl_optab, tem, subtarget, 0);
5507         }
5508     }
5509
5510   if (tem && normalizep)
5511     tem = expand_shift (RSHIFT_EXPR, mode, tem,
5512                         size_int (GET_MODE_BITSIZE (mode) - 1),
5513                         subtarget, normalizep == 1);
5514
5515   if (tem)
5516     {
5517       if (GET_MODE (tem) != target_mode)
5518         {
5519           convert_move (target, tem, 0);
5520           tem = target;
5521         }
5522       else if (!subtarget)
5523         {
5524           emit_move_insn (target, tem);
5525           tem = target;
5526         }
5527     }
5528   else
5529     delete_insns_since (last);
5530
5531   return tem;
5532 }
5533
5534 /* Like emit_store_flag, but always succeeds.  */
5535
5536 rtx
5537 emit_store_flag_force (rtx target, enum rtx_code code, rtx op0, rtx op1,
5538                        enum machine_mode mode, int unsignedp, int normalizep)
5539 {
5540   rtx tem, label;
5541
5542   /* First see if emit_store_flag can do the job.  */
5543   tem = emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep);
5544   if (tem != 0)
5545     return tem;
5546
5547   if (normalizep == 0)
5548     normalizep = 1;
5549
5550   /* If this failed, we have to do this with set/compare/jump/set code.  */
5551
5552   if (!REG_P (target)
5553       || reg_mentioned_p (target, op0) || reg_mentioned_p (target, op1))
5554     target = gen_reg_rtx (GET_MODE (target));
5555
5556   emit_move_insn (target, const1_rtx);
5557   label = gen_label_rtx ();
5558   do_compare_rtx_and_jump (op0, op1, code, unsignedp, mode, NULL_RTX,
5559                            NULL_RTX, label);
5560
5561   emit_move_insn (target, const0_rtx);
5562   emit_label (label);
5563
5564   return target;
5565 }
5566 \f
5567 /* Perform possibly multi-word comparison and conditional jump to LABEL
5568    if ARG1 OP ARG2 true where ARG1 and ARG2 are of mode MODE.  This is
5569    now a thin wrapper around do_compare_rtx_and_jump.  */
5570
5571 static void
5572 do_cmp_and_jump (rtx arg1, rtx arg2, enum rtx_code op, enum machine_mode mode,
5573                  rtx label)
5574 {
5575   int unsignedp = (op == LTU || op == LEU || op == GTU || op == GEU);
5576   do_compare_rtx_and_jump (arg1, arg2, op, unsignedp, mode,
5577                            NULL_RTX, NULL_RTX, label);
5578 }