gcc/expmed.c

   1 /* Medium-level subroutines: convert bit-field store and extract
   2    and shifts, multiplies and divides to rtl instructions.
   3    Copyright (C) 1987, 1988, 1989, 1992, 1993, 1994, 1995, 1996, 1997, 1998,
   4    1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007
   5    Free Software Foundation, Inc.
   6
   7 This file is part of GCC.
   8
   9 GCC is free software; you can redistribute it and/or modify it under
  10 the terms of the GNU General Public License as published by the Free
  11 Software Foundation; either version 2, or (at your option) any later
  12 version.
  13
  14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  16 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  17 for more details.
  18
  19 You should have received a copy of the GNU General Public License
  20 along with GCC; see the file COPYING.  If not, write to the Free
  21 Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
  22 02110-1301, USA.  */
  23
  24
  25 #include "config.h"
  26 #include "system.h"
  27 #include "coretypes.h"
  28 #include "tm.h"
  29 #include "toplev.h"
  30 #include "rtl.h"
  31 #include "tree.h"
  32 #include "tm_p.h"
  33 #include "flags.h"
  34 #include "insn-config.h"
  35 #include "expr.h"
  36 #include "optabs.h"
  37 #include "real.h"
  38 #include "recog.h"
  39 #include "langhooks.h"
  40 #include "df.h"
  41 #include "target.h"
  42
  43 static void store_fixed_bit_field (rtx, unsigned HOST_WIDE_INT,
  44                                    unsigned HOST_WIDE_INT,
  45                                    unsigned HOST_WIDE_INT, rtx);
  46 static void store_split_bit_field (rtx, unsigned HOST_WIDE_INT,
  47                                    unsigned HOST_WIDE_INT, rtx);
  48 static rtx extract_fixed_bit_field (enum machine_mode, rtx,
  49                                     unsigned HOST_WIDE_INT,
  50                                     unsigned HOST_WIDE_INT,
  51                                     unsigned HOST_WIDE_INT, rtx, int);
  52 static rtx mask_rtx (enum machine_mode, int, int, int);
  53 static rtx lshift_value (enum machine_mode, rtx, int, int);
  54 static rtx extract_split_bit_field (rtx, unsigned HOST_WIDE_INT,
  55                                     unsigned HOST_WIDE_INT, int);
  56 static void do_cmp_and_jump (rtx, rtx, enum rtx_code, enum machine_mode, rtx);
  57 static rtx expand_smod_pow2 (enum machine_mode, rtx, HOST_WIDE_INT);
  58 static rtx expand_sdiv_pow2 (enum machine_mode, rtx, HOST_WIDE_INT);
  59
  60 /* Test whether a value is zero of a power of two.  */
  61 #define EXACT_POWER_OF_2_OR_ZERO_P(x) (((x) & ((x) - 1)) == 0)
  62
  63 /* Nonzero means divides or modulus operations are relatively cheap for
  64    powers of two, so don't use branches; emit the operation instead.
  65    Usually, this will mean that the MD file will emit non-branch
  66    sequences.  */
  67
  68 static bool sdiv_pow2_cheap[NUM_MACHINE_MODES];
  69 static bool smod_pow2_cheap[NUM_MACHINE_MODES];
  70
  71 #ifndef SLOW_UNALIGNED_ACCESS
  72 #define SLOW_UNALIGNED_ACCESS(MODE, ALIGN) STRICT_ALIGNMENT
  73 #endif
  74
  75 /* For compilers that support multiple targets with different word sizes,
  76    MAX_BITS_PER_WORD contains the biggest value of BITS_PER_WORD.  An example
  77    is the H8/300(H) compiler.  */
  78
  79 #ifndef MAX_BITS_PER_WORD
  80 #define MAX_BITS_PER_WORD BITS_PER_WORD
  81 #endif
  82
  83 /* Reduce conditional compilation elsewhere.  */
  84 #ifndef HAVE_insv
  85 #define HAVE_insv       0
  86 #define CODE_FOR_insv   CODE_FOR_nothing
  87 #define gen_insv(a,b,c,d) NULL_RTX
  88 #endif
  89 #ifndef HAVE_extv
  90 #define HAVE_extv       0
  91 #define CODE_FOR_extv   CODE_FOR_nothing
  92 #define gen_extv(a,b,c,d) NULL_RTX
  93 #endif
  94 #ifndef HAVE_extzv
  95 #define HAVE_extzv      0
  96 #define CODE_FOR_extzv  CODE_FOR_nothing
  97 #define gen_extzv(a,b,c,d) NULL_RTX
  98 #endif
  99
 100 /* Cost of various pieces of RTL.  Note that some of these are indexed by
 101    shift count and some by mode.  */
 102 static int zero_cost;
 103 static int add_cost[NUM_MACHINE_MODES];
 104 static int neg_cost[NUM_MACHINE_MODES];
 105 static int shift_cost[NUM_MACHINE_MODES][MAX_BITS_PER_WORD];
 106 static int shiftadd_cost[NUM_MACHINE_MODES][MAX_BITS_PER_WORD];
 107 static int shiftsub_cost[NUM_MACHINE_MODES][MAX_BITS_PER_WORD];
 108 static int mul_cost[NUM_MACHINE_MODES];
 109 static int sdiv_cost[NUM_MACHINE_MODES];
 110 static int udiv_cost[NUM_MACHINE_MODES];
 111 static int mul_widen_cost[NUM_MACHINE_MODES];
 112 static int mul_highpart_cost[NUM_MACHINE_MODES];
 113
 114 void
 115 init_expmed (void)
 116 {
 117   struct
 118   {
 119     struct rtx_def reg;         rtunion reg_fld[2];
 120     struct rtx_def plus;        rtunion plus_fld1;
 121     struct rtx_def neg;
 122     struct rtx_def mult;        rtunion mult_fld1;
 123     struct rtx_def sdiv;        rtunion sdiv_fld1;
 124     struct rtx_def udiv;        rtunion udiv_fld1;
 125     struct rtx_def zext;
 126     struct rtx_def sdiv_32;     rtunion sdiv_32_fld1;
 127     struct rtx_def smod_32;     rtunion smod_32_fld1;
 128     struct rtx_def wide_mult;   rtunion wide_mult_fld1;
 129     struct rtx_def wide_lshr;   rtunion wide_lshr_fld1;
 130     struct rtx_def wide_trunc;
 131     struct rtx_def shift;       rtunion shift_fld1;
 132     struct rtx_def shift_mult;  rtunion shift_mult_fld1;
 133     struct rtx_def shift_add;   rtunion shift_add_fld1;
 134     struct rtx_def shift_sub;   rtunion shift_sub_fld1;
 135   } all;
 136
 137   rtx pow2[MAX_BITS_PER_WORD];
 138   rtx cint[MAX_BITS_PER_WORD];
 139   int m, n;
 140   enum machine_mode mode, wider_mode;
 141
 142   zero_cost = rtx_cost (const0_rtx, 0);
 143
 144   for (m = 1; m < MAX_BITS_PER_WORD; m++)
 145     {
 146       pow2[m] = GEN_INT ((HOST_WIDE_INT) 1 << m);
 147       cint[m] = GEN_INT (m);
 148     }
 149
 150   memset (&all, 0, sizeof all);
 151
 152   PUT_CODE (&all.reg, REG);
 153   /* Avoid using hard regs in ways which may be unsupported.  */
 154   SET_REGNO (&all.reg, LAST_VIRTUAL_REGISTER + 1);
 155
 156   PUT_CODE (&all.plus, PLUS);
 157   XEXP (&all.plus, 0) = &all.reg;
 158   XEXP (&all.plus, 1) = &all.reg;
 159
 160   PUT_CODE (&all.neg, NEG);
 161   XEXP (&all.neg, 0) = &all.reg;
 162
 163   PUT_CODE (&all.mult, MULT);
 164   XEXP (&all.mult, 0) = &all.reg;
 165   XEXP (&all.mult, 1) = &all.reg;
 166
 167   PUT_CODE (&all.sdiv, DIV);
 168   XEXP (&all.sdiv, 0) = &all.reg;
 169   XEXP (&all.sdiv, 1) = &all.reg;
 170
 171   PUT_CODE (&all.udiv, UDIV);
 172   XEXP (&all.udiv, 0) = &all.reg;
 173   XEXP (&all.udiv, 1) = &all.reg;
 174
 175   PUT_CODE (&all.sdiv_32, DIV);
 176   XEXP (&all.sdiv_32, 0) = &all.reg;
 177   XEXP (&all.sdiv_32, 1) = 32 < MAX_BITS_PER_WORD ? cint[32] : GEN_INT (32);
 178
 179   PUT_CODE (&all.smod_32, MOD);
 180   XEXP (&all.smod_32, 0) = &all.reg;
 181   XEXP (&all.smod_32, 1) = XEXP (&all.sdiv_32, 1);
 182
 183   PUT_CODE (&all.zext, ZERO_EXTEND);
 184   XEXP (&all.zext, 0) = &all.reg;
 185
 186   PUT_CODE (&all.wide_mult, MULT);
 187   XEXP (&all.wide_mult, 0) = &all.zext;
 188   XEXP (&all.wide_mult, 1) = &all.zext;
 189
 190   PUT_CODE (&all.wide_lshr, LSHIFTRT);
 191   XEXP (&all.wide_lshr, 0) = &all.wide_mult;
 192
 193   PUT_CODE (&all.wide_trunc, TRUNCATE);
 194   XEXP (&all.wide_trunc, 0) = &all.wide_lshr;
 195
 196   PUT_CODE (&all.shift, ASHIFT);
 197   XEXP (&all.shift, 0) = &all.reg;
 198
 199   PUT_CODE (&all.shift_mult, MULT);
 200   XEXP (&all.shift_mult, 0) = &all.reg;
 201
 202   PUT_CODE (&all.shift_add, PLUS);
 203   XEXP (&all.shift_add, 0) = &all.shift_mult;
 204   XEXP (&all.shift_add, 1) = &all.reg;
 205
 206   PUT_CODE (&all.shift_sub, MINUS);
 207   XEXP (&all.shift_sub, 0) = &all.shift_mult;
 208   XEXP (&all.shift_sub, 1) = &all.reg;
 209
 210   for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT);
 211        mode != VOIDmode;
 212        mode = GET_MODE_WIDER_MODE (mode))
 213     {
 214       PUT_MODE (&all.reg, mode);
 215       PUT_MODE (&all.plus, mode);
 216       PUT_MODE (&all.neg, mode);
 217       PUT_MODE (&all.mult, mode);
 218       PUT_MODE (&all.sdiv, mode);
 219       PUT_MODE (&all.udiv, mode);
 220       PUT_MODE (&all.sdiv_32, mode);
 221       PUT_MODE (&all.smod_32, mode);
 222       PUT_MODE (&all.wide_trunc, mode);
 223       PUT_MODE (&all.shift, mode);
 224       PUT_MODE (&all.shift_mult, mode);
 225       PUT_MODE (&all.shift_add, mode);
 226       PUT_MODE (&all.shift_sub, mode);
 227
 228       add_cost[mode] = rtx_cost (&all.plus, SET);
 229       neg_cost[mode] = rtx_cost (&all.neg, SET);
 230       mul_cost[mode] = rtx_cost (&all.mult, SET);
 231       sdiv_cost[mode] = rtx_cost (&all.sdiv, SET);
 232       udiv_cost[mode] = rtx_cost (&all.udiv, SET);
 233
 234       sdiv_pow2_cheap[mode] = (rtx_cost (&all.sdiv_32, SET)
 235                                <= 2 * add_cost[mode]);
 236       smod_pow2_cheap[mode] = (rtx_cost (&all.smod_32, SET)
 237                                <= 4 * add_cost[mode]);
 238
 239       wider_mode = GET_MODE_WIDER_MODE (mode);
 240       if (wider_mode != VOIDmode)
 241         {
 242           PUT_MODE (&all.zext, wider_mode);
 243           PUT_MODE (&all.wide_mult, wider_mode);
 244           PUT_MODE (&all.wide_lshr, wider_mode);
 245           XEXP (&all.wide_lshr, 1) = GEN_INT (GET_MODE_BITSIZE (mode));
 246
 247           mul_widen_cost[wider_mode] = rtx_cost (&all.wide_mult, SET);
 248           mul_highpart_cost[mode] = rtx_cost (&all.wide_trunc, SET);
 249         }
 250
 251       shift_cost[mode][0] = 0;
 252       shiftadd_cost[mode][0] = shiftsub_cost[mode][0] = add_cost[mode];
 253
 254       n = MIN (MAX_BITS_PER_WORD, GET_MODE_BITSIZE (mode));
 255       for (m = 1; m < n; m++)
 256         {
 257           XEXP (&all.shift, 1) = cint[m];
 258           XEXP (&all.shift_mult, 1) = pow2[m];
 259
 260           shift_cost[mode][m] = rtx_cost (&all.shift, SET);
 261           shiftadd_cost[mode][m] = rtx_cost (&all.shift_add, SET);
 262           shiftsub_cost[mode][m] = rtx_cost (&all.shift_sub, SET);
 263         }
 264     }
 265 }
 266
 267 /* Return an rtx representing minus the value of X.
 268    MODE is the intended mode of the result,
 269    useful if X is a CONST_INT.  */
 270
 271 rtx
 272 negate_rtx (enum machine_mode mode, rtx x)
 273 {
 274   rtx result = simplify_unary_operation (NEG, mode, x, mode);
 275
 276   if (result == 0)
 277     result = expand_unop (mode, neg_optab, x, NULL_RTX, 0);
 278
 279   return result;
 280 }
 281
 282 /* Report on the availability of insv/extv/extzv and the desired mode
 283    of each of their operands.  Returns MAX_MACHINE_MODE if HAVE_foo
 284    is false; else the mode of the specified operand.  If OPNO is -1,
 285    all the caller cares about is whether the insn is available.  */
 286 enum machine_mode
 287 mode_for_extraction (enum extraction_pattern pattern, int opno)
 288 {
 289   const struct insn_data *data;
 290
 291   switch (pattern)
 292     {
 293     case EP_insv:
 294       if (HAVE_insv)
 295         {
 296           data = &insn_data[CODE_FOR_insv];
 297           break;
 298         }
 299       return MAX_MACHINE_MODE;
 300
 301     case EP_extv:
 302       if (HAVE_extv)
 303         {
 304           data = &insn_data[CODE_FOR_extv];
 305           break;
 306         }
 307       return MAX_MACHINE_MODE;
 308
 309     case EP_extzv:
 310       if (HAVE_extzv)
 311         {
 312           data = &insn_data[CODE_FOR_extzv];
 313           break;
 314         }
 315       return MAX_MACHINE_MODE;
 316
 317     default:
 318       gcc_unreachable ();
 319     }
 320
 321   if (opno == -1)
 322     return VOIDmode;
 323
 324   /* Everyone who uses this function used to follow it with
 325      if (result == VOIDmode) result = word_mode; */
 326   if (data->operand[opno].mode == VOIDmode)
 327     return word_mode;
 328   return data->operand[opno].mode;
 329 }
 330
 331 \f
 332 /* Generate code to store value from rtx VALUE
 333    into a bit-field within structure STR_RTX
 334    containing BITSIZE bits starting at bit BITNUM.
 335    FIELDMODE is the machine-mode of the FIELD_DECL node for this field.
 336    ALIGN is the alignment that STR_RTX is known to have.
 337    TOTAL_SIZE is the size of the structure in bytes, or -1 if varying.  */
 338
 339 /* ??? Note that there are two different ideas here for how
 340    to determine the size to count bits within, for a register.
 341    One is BITS_PER_WORD, and the other is the size of operand 3
 342    of the insv pattern.
 343
 344    If operand 3 of the insv pattern is VOIDmode, then we will use BITS_PER_WORD
 345    else, we use the mode of operand 3.  */
 346
 347 rtx
 348 store_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
 349                  unsigned HOST_WIDE_INT bitnum, enum machine_mode fieldmode,
 350                  rtx value)
 351 {
 352   unsigned int unit
 353     = (MEM_P (str_rtx)) ? BITS_PER_UNIT : BITS_PER_WORD;
 354   unsigned HOST_WIDE_INT offset, bitpos;
 355   rtx op0 = str_rtx;
 356   int byte_offset;
 357   rtx orig_value;
 358
 359   enum machine_mode op_mode = mode_for_extraction (EP_insv, 3);
 360
 361   while (GET_CODE (op0) == SUBREG)
 362     {
 363       /* The following line once was done only if WORDS_BIG_ENDIAN,
 364          but I think that is a mistake.  WORDS_BIG_ENDIAN is
 365          meaningful at a much higher level; when structures are copied
 366          between memory and regs, the higher-numbered regs
 367          always get higher addresses.  */
 368       int inner_mode_size = GET_MODE_SIZE (GET_MODE (SUBREG_REG (op0)));
 369       int outer_mode_size = GET_MODE_SIZE (GET_MODE (op0));
 370
 371       byte_offset = 0;
 372
 373       /* Paradoxical subregs need special handling on big endian machines.  */
 374       if (SUBREG_BYTE (op0) == 0 && inner_mode_size < outer_mode_size)
 375         {
 376           int difference = inner_mode_size - outer_mode_size;
 377
 378           if (WORDS_BIG_ENDIAN)
 379             byte_offset += (difference / UNITS_PER_WORD) * UNITS_PER_WORD;
 380           if (BYTES_BIG_ENDIAN)
 381             byte_offset += difference % UNITS_PER_WORD;
 382         }
 383       else
 384         byte_offset = SUBREG_BYTE (op0);
 385
 386       bitnum += byte_offset * BITS_PER_UNIT;
 387       op0 = SUBREG_REG (op0);
 388     }
 389
 390   /* No action is needed if the target is a register and if the field
 391      lies completely outside that register.  This can occur if the source
 392      code contains an out-of-bounds access to a small array.  */
 393   if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
 394     return value;
 395
 396   /* Use vec_set patterns for inserting parts of vectors whenever
 397      available.  */
 398   if (VECTOR_MODE_P (GET_MODE (op0))
 399       && !MEM_P (op0)
 400       && (vec_set_optab->handlers[GET_MODE (op0)].insn_code
 401           != CODE_FOR_nothing)
 402       && fieldmode == GET_MODE_INNER (GET_MODE (op0))
 403       && bitsize == GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
 404       && !(bitnum % GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
 405     {
 406       enum machine_mode outermode = GET_MODE (op0);
 407       enum machine_mode innermode = GET_MODE_INNER (outermode);
 408       int icode = (int) vec_set_optab->handlers[outermode].insn_code;
 409       int pos = bitnum / GET_MODE_BITSIZE (innermode);
 410       rtx rtxpos = GEN_INT (pos);
 411       rtx src = value;
 412       rtx dest = op0;
 413       rtx pat, seq;
 414       enum machine_mode mode0 = insn_data[icode].operand[0].mode;
 415       enum machine_mode mode1 = insn_data[icode].operand[1].mode;
 416       enum machine_mode mode2 = insn_data[icode].operand[2].mode;
 417
 418       start_sequence ();
 419
 420       if (! (*insn_data[icode].operand[1].predicate) (src, mode1))
 421         src = copy_to_mode_reg (mode1, src);
 422
 423       if (! (*insn_data[icode].operand[2].predicate) (rtxpos, mode2))
 424         rtxpos = copy_to_mode_reg (mode1, rtxpos);
 425
 426       /* We could handle this, but we should always be called with a pseudo
 427          for our targets and all insns should take them as outputs.  */
 428       gcc_assert ((*insn_data[icode].operand[0].predicate) (dest, mode0)
 429                   && (*insn_data[icode].operand[1].predicate) (src, mode1)
 430                   && (*insn_data[icode].operand[2].predicate) (rtxpos, mode2));
 431       pat = GEN_FCN (icode) (dest, src, rtxpos);
 432       seq = get_insns ();
 433       end_sequence ();
 434       if (pat)
 435         {
 436           emit_insn (seq);
 437           emit_insn (pat);
 438           return dest;
 439         }
 440     }
 441
 442   /* If the target is a register, overwriting the entire object, or storing
 443      a full-word or multi-word field can be done with just a SUBREG.
 444
 445      If the target is memory, storing any naturally aligned field can be
 446      done with a simple store.  For targets that support fast unaligned
 447      memory, any naturally sized, unit aligned field can be done directly.  */
 448
 449   offset = bitnum / unit;
 450   bitpos = bitnum % unit;
 451   byte_offset = (bitnum % BITS_PER_WORD) / BITS_PER_UNIT
 452                 + (offset * UNITS_PER_WORD);
 453
 454   if (bitpos == 0
 455       && bitsize == GET_MODE_BITSIZE (fieldmode)
 456       && (!MEM_P (op0)
 457           ? ((GET_MODE_SIZE (fieldmode) >= UNITS_PER_WORD
 458              || GET_MODE_SIZE (GET_MODE (op0)) == GET_MODE_SIZE (fieldmode))
 459              && byte_offset % GET_MODE_SIZE (fieldmode) == 0)
 460           : (! SLOW_UNALIGNED_ACCESS (fieldmode, MEM_ALIGN (op0))
 461              || (offset * BITS_PER_UNIT % bitsize == 0
 462                  && MEM_ALIGN (op0) % GET_MODE_BITSIZE (fieldmode) == 0))))
 463     {
 464       if (MEM_P (op0))
 465         op0 = adjust_address (op0, fieldmode, offset);
 466       else if (GET_MODE (op0) != fieldmode)
 467         op0 = simplify_gen_subreg (fieldmode, op0, GET_MODE (op0),
 468                                    byte_offset);
 469       emit_move_insn (op0, value);
 470       return value;
 471     }
 472
 473   /* Make sure we are playing with integral modes.  Pun with subregs
 474      if we aren't.  This must come after the entire register case above,
 475      since that case is valid for any mode.  The following cases are only
 476      valid for integral modes.  */
 477   {
 478     enum machine_mode imode = int_mode_for_mode (GET_MODE (op0));
 479     if (imode != GET_MODE (op0))
 480       {
 481         if (MEM_P (op0))
 482           op0 = adjust_address (op0, imode, 0);
 483         else
 484           {
 485             gcc_assert (imode != BLKmode);
 486             op0 = gen_lowpart (imode, op0);
 487           }
 488       }
 489   }
 490
 491   /* We may be accessing data outside the field, which means
 492      we can alias adjacent data.  */
 493   if (MEM_P (op0))
 494     {
 495       op0 = shallow_copy_rtx (op0);
 496       set_mem_alias_set (op0, 0);
 497       set_mem_expr (op0, 0);
 498     }
 499
 500   /* If OP0 is a register, BITPOS must count within a word.
 501      But as we have it, it counts within whatever size OP0 now has.
 502      On a bigendian machine, these are not the same, so convert.  */
 503   if (BYTES_BIG_ENDIAN
 504       && !MEM_P (op0)
 505       && unit > GET_MODE_BITSIZE (GET_MODE (op0)))
 506     bitpos += unit - GET_MODE_BITSIZE (GET_MODE (op0));
 507
 508   /* Storing an lsb-aligned field in a register
 509      can be done with a movestrict instruction.  */
 510
 511   if (!MEM_P (op0)
 512       && (BYTES_BIG_ENDIAN ? bitpos + bitsize == unit : bitpos == 0)
 513       && bitsize == GET_MODE_BITSIZE (fieldmode)
 514       && (movstrict_optab->handlers[fieldmode].insn_code
 515           != CODE_FOR_nothing))
 516     {
 517       int icode = movstrict_optab->handlers[fieldmode].insn_code;
 518
 519       /* Get appropriate low part of the value being stored.  */
 520       if (GET_CODE (value) == CONST_INT || REG_P (value))
 521         value = gen_lowpart (fieldmode, value);
 522       else if (!(GET_CODE (value) == SYMBOL_REF
 523                  || GET_CODE (value) == LABEL_REF
 524                  || GET_CODE (value) == CONST))
 525         value = convert_to_mode (fieldmode, value, 0);
 526
 527       if (! (*insn_data[icode].operand[1].predicate) (value, fieldmode))
 528         value = copy_to_mode_reg (fieldmode, value);
 529
 530       if (GET_CODE (op0) == SUBREG)
 531         {
 532           /* Else we've got some float mode source being extracted into
 533              a different float mode destination -- this combination of
 534              subregs results in Severe Tire Damage.  */
 535           gcc_assert (GET_MODE (SUBREG_REG (op0)) == fieldmode
 536                       || GET_MODE_CLASS (fieldmode) == MODE_INT
 537                       || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT);
 538           op0 = SUBREG_REG (op0);
 539         }
 540
 541       emit_insn (GEN_FCN (icode)
 542                  (gen_rtx_SUBREG (fieldmode, op0,
 543                                   (bitnum % BITS_PER_WORD) / BITS_PER_UNIT
 544                                   + (offset * UNITS_PER_WORD)),
 545                                   value));
 546
 547       return value;
 548     }
 549
 550   /* Handle fields bigger than a word.  */
 551
 552   if (bitsize > BITS_PER_WORD)
 553     {
 554       /* Here we transfer the words of the field
 555          in the order least significant first.
 556          This is because the most significant word is the one which may
 557          be less than full.
 558          However, only do that if the value is not BLKmode.  */
 559
 560       unsigned int backwards = WORDS_BIG_ENDIAN && fieldmode != BLKmode;
 561       unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
 562       unsigned int i;
 563
 564       /* This is the mode we must force value to, so that there will be enough
 565          subwords to extract.  Note that fieldmode will often (always?) be
 566          VOIDmode, because that is what store_field uses to indicate that this
 567          is a bit field, but passing VOIDmode to operand_subword_force
 568          is not allowed.  */
 569       fieldmode = GET_MODE (value);
 570       if (fieldmode == VOIDmode)
 571         fieldmode = smallest_mode_for_size (nwords * BITS_PER_WORD, MODE_INT);
 572
 573       for (i = 0; i < nwords; i++)
 574         {
 575           /* If I is 0, use the low-order word in both field and target;
 576              if I is 1, use the next to lowest word; and so on.  */
 577           unsigned int wordnum = (backwards ? nwords - i - 1 : i);
 578           unsigned int bit_offset = (backwards
 579                                      ? MAX ((int) bitsize - ((int) i + 1)
 580                                             * BITS_PER_WORD,
 581                                             0)
 582                                      : (int) i * BITS_PER_WORD);
 583
 584           store_bit_field (op0, MIN (BITS_PER_WORD,
 585                                      bitsize - i * BITS_PER_WORD),
 586                            bitnum + bit_offset, word_mode,
 587                            operand_subword_force (value, wordnum, fieldmode));
 588         }
 589       return value;
 590     }
 591
 592   /* From here on we can assume that the field to be stored in is
 593      a full-word (whatever type that is), since it is shorter than a word.  */
 594
 595   /* OFFSET is the number of words or bytes (UNIT says which)
 596      from STR_RTX to the first word or byte containing part of the field.  */
 597
 598   if (!MEM_P (op0))
 599     {
 600       if (offset != 0
 601           || GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
 602         {
 603           if (!REG_P (op0))
 604             {
 605               /* Since this is a destination (lvalue), we can't copy
 606                  it to a pseudo.  We can remove a SUBREG that does not
 607                  change the size of the operand.  Such a SUBREG may
 608                  have been added above.  */
 609               gcc_assert (GET_CODE (op0) == SUBREG
 610                           && (GET_MODE_SIZE (GET_MODE (op0))
 611                               == GET_MODE_SIZE (GET_MODE (SUBREG_REG (op0)))));
 612               op0 = SUBREG_REG (op0);
 613             }
 614           op0 = gen_rtx_SUBREG (mode_for_size (BITS_PER_WORD, MODE_INT, 0),
 615                                 op0, (offset * UNITS_PER_WORD));
 616         }
 617       offset = 0;
 618     }
 619
 620   /* If VALUE has a floating-point or complex mode, access it as an
 621      integer of the corresponding size.  This can occur on a machine
 622      with 64 bit registers that uses SFmode for float.  It can also
 623      occur for unaligned float or complex fields.  */
 624   orig_value = value;
 625   if (GET_MODE (value) != VOIDmode
 626       && GET_MODE_CLASS (GET_MODE (value)) != MODE_INT
 627       && GET_MODE_CLASS (GET_MODE (value)) != MODE_PARTIAL_INT)
 628     {
 629       value = gen_reg_rtx (int_mode_for_mode (GET_MODE (value)));
 630       emit_move_insn (gen_lowpart (GET_MODE (orig_value), value), orig_value);
 631     }
 632
 633   /* Now OFFSET is nonzero only if OP0 is memory
 634      and is therefore always measured in bytes.  */
 635
 636   if (HAVE_insv
 637       && GET_MODE (value) != BLKmode
 638       && bitsize > 0
 639       && GET_MODE_BITSIZE (op_mode) >= bitsize
 640       && ! ((REG_P (op0) || GET_CODE (op0) == SUBREG)
 641             && (bitsize + bitpos > GET_MODE_BITSIZE (op_mode)))
 642       && insn_data[CODE_FOR_insv].operand[1].predicate (GEN_INT (bitsize),
 643                                                         VOIDmode))
 644     {
 645       int xbitpos = bitpos;
 646       rtx value1;
 647       rtx xop0 = op0;
 648       rtx last = get_last_insn ();
 649       rtx pat;
 650       enum machine_mode maxmode = mode_for_extraction (EP_insv, 3);
 651       int save_volatile_ok = volatile_ok;
 652
 653       volatile_ok = 1;
 654
 655       /* If this machine's insv can only insert into a register, copy OP0
 656          into a register and save it back later.  */
 657       if (MEM_P (op0)
 658           && ! ((*insn_data[(int) CODE_FOR_insv].operand[0].predicate)
 659                 (op0, VOIDmode)))
 660         {
 661           rtx tempreg;
 662           enum machine_mode bestmode;
 663
 664           /* Get the mode to use for inserting into this field.  If OP0 is
 665              BLKmode, get the smallest mode consistent with the alignment. If
 666              OP0 is a non-BLKmode object that is no wider than MAXMODE, use its
 667              mode. Otherwise, use the smallest mode containing the field.  */
 668
 669           if (GET_MODE (op0) == BLKmode
 670               || GET_MODE_SIZE (GET_MODE (op0)) > GET_MODE_SIZE (maxmode))
 671             bestmode
 672               = get_best_mode (bitsize, bitnum, MEM_ALIGN (op0), maxmode,
 673                                MEM_VOLATILE_P (op0));
 674           else
 675             bestmode = GET_MODE (op0);
 676
 677           if (bestmode == VOIDmode
 678               || GET_MODE_SIZE (bestmode) < GET_MODE_SIZE (fieldmode)
 679               || (SLOW_UNALIGNED_ACCESS (bestmode, MEM_ALIGN (op0))
 680                   && GET_MODE_BITSIZE (bestmode) > MEM_ALIGN (op0)))
 681             goto insv_loses;
 682
 683           /* Adjust address to point to the containing unit of that mode.
 684              Compute offset as multiple of this unit, counting in bytes.  */
 685           unit = GET_MODE_BITSIZE (bestmode);
 686           offset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
 687           bitpos = bitnum % unit;
 688           op0 = adjust_address (op0, bestmode,  offset);
 689
 690           /* Fetch that unit, store the bitfield in it, then store
 691              the unit.  */
 692           tempreg = copy_to_reg (op0);
 693           store_bit_field (tempreg, bitsize, bitpos, fieldmode, orig_value);
 694           emit_move_insn (op0, tempreg);
 695           return value;
 696         }
 697       volatile_ok = save_volatile_ok;
 698
 699       /* Add OFFSET into OP0's address.  */
 700       if (MEM_P (xop0))
 701         xop0 = adjust_address (xop0, byte_mode, offset);
 702
 703       /* If xop0 is a register, we need it in MAXMODE
 704          to make it acceptable to the format of insv.  */
 705       if (GET_CODE (xop0) == SUBREG)
 706         /* We can't just change the mode, because this might clobber op0,
 707            and we will need the original value of op0 if insv fails.  */
 708         xop0 = gen_rtx_SUBREG (maxmode, SUBREG_REG (xop0), SUBREG_BYTE (xop0));
 709       if (REG_P (xop0) && GET_MODE (xop0) != maxmode)
 710         xop0 = gen_rtx_SUBREG (maxmode, xop0, 0);
 711
 712       /* On big-endian machines, we count bits from the most significant.
 713          If the bit field insn does not, we must invert.  */
 714
 715       if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
 716         xbitpos = unit - bitsize - xbitpos;
 717
 718       /* We have been counting XBITPOS within UNIT.
 719          Count instead within the size of the register.  */
 720       if (BITS_BIG_ENDIAN && !MEM_P (xop0))
 721         xbitpos += GET_MODE_BITSIZE (maxmode) - unit;
 722
 723       unit = GET_MODE_BITSIZE (maxmode);
 724
 725       /* Convert VALUE to maxmode (which insv insn wants) in VALUE1.  */
 726       value1 = value;
 727       if (GET_MODE (value) != maxmode)
 728         {
 729           if (GET_MODE_BITSIZE (GET_MODE (value)) >= bitsize)
 730             {
 731               /* Optimization: Don't bother really extending VALUE
 732                  if it has all the bits we will actually use.  However,
 733                  if we must narrow it, be sure we do it correctly.  */
 734
 735               if (GET_MODE_SIZE (GET_MODE (value)) < GET_MODE_SIZE (maxmode))
 736                 {
 737                   rtx tmp;
 738
 739                   tmp = simplify_subreg (maxmode, value1, GET_MODE (value), 0);
 740                   if (! tmp)
 741                     tmp = simplify_gen_subreg (maxmode,
 742                                                force_reg (GET_MODE (value),
 743                                                           value1),
 744                                                GET_MODE (value), 0);
 745                   value1 = tmp;
 746                 }
 747               else
 748                 value1 = gen_lowpart (maxmode, value1);
 749             }
 750           else if (GET_CODE (value) == CONST_INT)
 751             value1 = gen_int_mode (INTVAL (value), maxmode);
 752           else
 753             /* Parse phase is supposed to make VALUE's data type
 754                match that of the component reference, which is a type
 755                at least as wide as the field; so VALUE should have
 756                a mode that corresponds to that type.  */
 757             gcc_assert (CONSTANT_P (value));
 758         }
 759
 760       /* If this machine's insv insists on a register,
 761          get VALUE1 into a register.  */
 762       if (! ((*insn_data[(int) CODE_FOR_insv].operand[3].predicate)
 763              (value1, maxmode)))
 764         value1 = force_reg (maxmode, value1);
 765
 766       pat = gen_insv (xop0, GEN_INT (bitsize), GEN_INT (xbitpos), value1);
 767       if (pat)
 768         emit_insn (pat);
 769       else
 770         {
 771           delete_insns_since (last);
 772           store_fixed_bit_field (op0, offset, bitsize, bitpos, value);
 773         }
 774     }
 775   else
 776     insv_loses:
 777     /* Insv is not available; store using shifts and boolean ops.  */
 778     store_fixed_bit_field (op0, offset, bitsize, bitpos, value);
 779   return value;
 780 }
 781 \f
 782 /* Use shifts and boolean operations to store VALUE
 783    into a bit field of width BITSIZE
 784    in a memory location specified by OP0 except offset by OFFSET bytes.
 785      (OFFSET must be 0 if OP0 is a register.)
 786    The field starts at position BITPOS within the byte.
 787     (If OP0 is a register, it may be a full word or a narrower mode,
 788      but BITPOS still counts within a full word,
 789      which is significant on bigendian machines.)  */
 790
 791 static void
 792 store_fixed_bit_field (rtx op0, unsigned HOST_WIDE_INT offset,
 793                        unsigned HOST_WIDE_INT bitsize,
 794                        unsigned HOST_WIDE_INT bitpos, rtx value)
 795 {
 796   enum machine_mode mode;
 797   unsigned int total_bits = BITS_PER_WORD;
 798   rtx temp;
 799   int all_zero = 0;
 800   int all_one = 0;
 801
 802   /* There is a case not handled here:
 803      a structure with a known alignment of just a halfword
 804      and a field split across two aligned halfwords within the structure.
 805      Or likewise a structure with a known alignment of just a byte
 806      and a field split across two bytes.
 807      Such cases are not supposed to be able to occur.  */
 808
 809   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
 810     {
 811       gcc_assert (!offset);
 812       /* Special treatment for a bit field split across two registers.  */
 813       if (bitsize + bitpos > BITS_PER_WORD)
 814         {
 815           store_split_bit_field (op0, bitsize, bitpos, value);
 816           return;
 817         }
 818     }
 819   else
 820     {
 821       /* Get the proper mode to use for this field.  We want a mode that
 822          includes the entire field.  If such a mode would be larger than
 823          a word, we won't be doing the extraction the normal way.
 824          We don't want a mode bigger than the destination.  */
 825
 826       mode = GET_MODE (op0);
 827       if (GET_MODE_BITSIZE (mode) == 0
 828           || GET_MODE_BITSIZE (mode) > GET_MODE_BITSIZE (word_mode))
 829         mode = word_mode;
 830       mode = get_best_mode (bitsize, bitpos + offset * BITS_PER_UNIT,
 831                             MEM_ALIGN (op0), mode, MEM_VOLATILE_P (op0));
 832
 833       if (mode == VOIDmode)
 834         {
 835           /* The only way this should occur is if the field spans word
 836              boundaries.  */
 837           store_split_bit_field (op0, bitsize, bitpos + offset * BITS_PER_UNIT,
 838                                  value);
 839           return;
 840         }
 841
 842       total_bits = GET_MODE_BITSIZE (mode);
 843
 844       /* Make sure bitpos is valid for the chosen mode.  Adjust BITPOS to
 845          be in the range 0 to total_bits-1, and put any excess bytes in
 846          OFFSET.  */
 847       if (bitpos >= total_bits)
 848         {
 849           offset += (bitpos / total_bits) * (total_bits / BITS_PER_UNIT);
 850           bitpos -= ((bitpos / total_bits) * (total_bits / BITS_PER_UNIT)
 851                      * BITS_PER_UNIT);
 852         }
 853
 854       /* Get ref to an aligned byte, halfword, or word containing the field.
 855          Adjust BITPOS to be position within a word,
 856          and OFFSET to be the offset of that word.
 857          Then alter OP0 to refer to that word.  */
 858       bitpos += (offset % (total_bits / BITS_PER_UNIT)) * BITS_PER_UNIT;
 859       offset -= (offset % (total_bits / BITS_PER_UNIT));
 860       op0 = adjust_address (op0, mode, offset);
 861     }
 862
 863   mode = GET_MODE (op0);
 864
 865   /* Now MODE is either some integral mode for a MEM as OP0,
 866      or is a full-word for a REG as OP0.  TOTAL_BITS corresponds.
 867      The bit field is contained entirely within OP0.
 868      BITPOS is the starting bit number within OP0.
 869      (OP0's mode may actually be narrower than MODE.)  */
 870
 871   if (BYTES_BIG_ENDIAN)
 872       /* BITPOS is the distance between our msb
 873          and that of the containing datum.
 874          Convert it to the distance from the lsb.  */
 875       bitpos = total_bits - bitsize - bitpos;
 876
 877   /* Now BITPOS is always the distance between our lsb
 878      and that of OP0.  */
 879
 880   /* Shift VALUE left by BITPOS bits.  If VALUE is not constant,
 881      we must first convert its mode to MODE.  */
 882
 883   if (GET_CODE (value) == CONST_INT)
 884     {
 885       HOST_WIDE_INT v = INTVAL (value);
 886
 887       if (bitsize < HOST_BITS_PER_WIDE_INT)
 888         v &= ((HOST_WIDE_INT) 1 << bitsize) - 1;
 889
 890       if (v == 0)
 891         all_zero = 1;
 892       else if ((bitsize < HOST_BITS_PER_WIDE_INT
 893                 && v == ((HOST_WIDE_INT) 1 << bitsize) - 1)
 894                || (bitsize == HOST_BITS_PER_WIDE_INT && v == -1))
 895         all_one = 1;
 896
 897       value = lshift_value (mode, value, bitpos, bitsize);
 898     }
 899   else
 900     {
 901       int must_and = (GET_MODE_BITSIZE (GET_MODE (value)) != bitsize
 902                       && bitpos + bitsize != GET_MODE_BITSIZE (mode));
 903
 904       if (GET_MODE (value) != mode)
 905         {
 906           if ((REG_P (value) || GET_CODE (value) == SUBREG)
 907               && GET_MODE_SIZE (mode) < GET_MODE_SIZE (GET_MODE (value)))
 908             value = gen_lowpart (mode, value);
 909           else
 910             value = convert_to_mode (mode, value, 1);
 911         }
 912
 913       if (must_and)
 914         value = expand_binop (mode, and_optab, value,
 915                               mask_rtx (mode, 0, bitsize, 0),
 916                               NULL_RTX, 1, OPTAB_LIB_WIDEN);
 917       if (bitpos > 0)
 918         value = expand_shift (LSHIFT_EXPR, mode, value,
 919                               build_int_cst (NULL_TREE, bitpos), NULL_RTX, 1);
 920     }
 921
 922   /* Now clear the chosen bits in OP0,
 923      except that if VALUE is -1 we need not bother.  */
 924   /* We keep the intermediates in registers to allow CSE to combine
 925      consecutive bitfield assignments.  */
 926
 927   temp = force_reg (mode, op0);
 928
 929   if (! all_one)
 930     {
 931       temp = expand_binop (mode, and_optab, temp,
 932                            mask_rtx (mode, bitpos, bitsize, 1),
 933                            NULL_RTX, 1, OPTAB_LIB_WIDEN);
 934       temp = force_reg (mode, temp);
 935     }
 936
 937   /* Now logical-or VALUE into OP0, unless it is zero.  */
 938
 939   if (! all_zero)
 940     {
 941       temp = expand_binop (mode, ior_optab, temp, value,
 942                            NULL_RTX, 1, OPTAB_LIB_WIDEN);
 943       temp = force_reg (mode, temp);
 944     }
 945
 946   if (op0 != temp)
 947     emit_move_insn (op0, temp);
 948 }
 949 \f
 950 /* Store a bit field that is split across multiple accessible memory objects.
 951
 952    OP0 is the REG, SUBREG or MEM rtx for the first of the objects.
 953    BITSIZE is the field width; BITPOS the position of its first bit
 954    (within the word).
 955    VALUE is the value to store.
 956
 957    This does not yet handle fields wider than BITS_PER_WORD.  */
 958
 959 static void
 960 store_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
 961                        unsigned HOST_WIDE_INT bitpos, rtx value)
 962 {
 963   unsigned int unit;
 964   unsigned int bitsdone = 0;
 965
 966   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
 967      much at a time.  */
 968   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
 969     unit = BITS_PER_WORD;
 970   else
 971     unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
 972
 973   /* If VALUE is a constant other than a CONST_INT, get it into a register in
 974      WORD_MODE.  If we can do this using gen_lowpart_common, do so.  Note
 975      that VALUE might be a floating-point constant.  */
 976   if (CONSTANT_P (value) && GET_CODE (value) != CONST_INT)
 977     {
 978       rtx word = gen_lowpart_common (word_mode, value);
 979
 980       if (word && (value != word))
 981         value = word;
 982       else
 983         value = gen_lowpart_common (word_mode,
 984                                     force_reg (GET_MODE (value) != VOIDmode
 985                                                ? GET_MODE (value)
 986                                                : word_mode, value));
 987     }
 988
 989   while (bitsdone < bitsize)
 990     {
 991       unsigned HOST_WIDE_INT thissize;
 992       rtx part, word;
 993       unsigned HOST_WIDE_INT thispos;
 994       unsigned HOST_WIDE_INT offset;
 995
 996       offset = (bitpos + bitsdone) / unit;
 997       thispos = (bitpos + bitsdone) % unit;
 998
 999       /* THISSIZE must not overrun a word boundary.  Otherwise,
1000          store_fixed_bit_field will call us again, and we will mutually
1001          recurse forever.  */
1002       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1003       thissize = MIN (thissize, unit - thispos);
1004
1005       if (BYTES_BIG_ENDIAN)
1006         {
1007           int total_bits;
1008
1009           /* We must do an endian conversion exactly the same way as it is
1010              done in extract_bit_field, so that the two calls to
1011              extract_fixed_bit_field will have comparable arguments.  */
1012           if (!MEM_P (value) || GET_MODE (value) == BLKmode)
1013             total_bits = BITS_PER_WORD;
1014           else
1015             total_bits = GET_MODE_BITSIZE (GET_MODE (value));
1016
1017           /* Fetch successively less significant portions.  */
1018           if (GET_CODE (value) == CONST_INT)
1019             part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1020                              >> (bitsize - bitsdone - thissize))
1021                             & (((HOST_WIDE_INT) 1 << thissize) - 1));
1022           else
1023             /* The args are chosen so that the last part includes the
1024                lsb.  Give extract_bit_field the value it needs (with
1025                endianness compensation) to fetch the piece we want.  */
1026             part = extract_fixed_bit_field (word_mode, value, 0, thissize,
1027                                             total_bits - bitsize + bitsdone,
1028                                             NULL_RTX, 1);
1029         }
1030       else
1031         {
1032           /* Fetch successively more significant portions.  */
1033           if (GET_CODE (value) == CONST_INT)
1034             part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1035                              >> bitsdone)
1036                             & (((HOST_WIDE_INT) 1 << thissize) - 1));
1037           else
1038             part = extract_fixed_bit_field (word_mode, value, 0, thissize,
1039                                             bitsdone, NULL_RTX, 1);
1040         }
1041
1042       /* If OP0 is a register, then handle OFFSET here.
1043
1044          When handling multiword bitfields, extract_bit_field may pass
1045          down a word_mode SUBREG of a larger REG for a bitfield that actually
1046          crosses a word boundary.  Thus, for a SUBREG, we must find
1047          the current word starting from the base register.  */
1048       if (GET_CODE (op0) == SUBREG)
1049         {
1050           int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset;
1051           word = operand_subword_force (SUBREG_REG (op0), word_offset,
1052                                         GET_MODE (SUBREG_REG (op0)));
1053           offset = 0;
1054         }
1055       else if (REG_P (op0))
1056         {
1057           word = operand_subword_force (op0, offset, GET_MODE (op0));
1058           offset = 0;
1059         }
1060       else
1061         word = op0;
1062
1063       /* OFFSET is in UNITs, and UNIT is in bits.
1064          store_fixed_bit_field wants offset in bytes.  */
1065       store_fixed_bit_field (word, offset * unit / BITS_PER_UNIT, thissize,
1066                              thispos, part);
1067       bitsdone += thissize;
1068     }
1069 }
1070 \f
1071 /* Generate code to extract a byte-field from STR_RTX
1072    containing BITSIZE bits, starting at BITNUM,
1073    and put it in TARGET if possible (if TARGET is nonzero).
1074    Regardless of TARGET, we return the rtx for where the value is placed.
1075
1076    STR_RTX is the structure containing the byte (a REG or MEM).
1077    UNSIGNEDP is nonzero if this is an unsigned bit field.
1078    MODE is the natural mode of the field value once extracted.
1079    TMODE is the mode the caller would like the value to have;
1080    but the value may be returned with type MODE instead.
1081
1082    TOTAL_SIZE is the size in bytes of the containing structure,
1083    or -1 if varying.
1084
1085    If a TARGET is specified and we can store in it at no extra cost,
1086    we do so, and return TARGET.
1087    Otherwise, we return a REG of mode TMODE or MODE, with TMODE preferred
1088    if they are equally easy.  */
1089
1090 rtx
1091 extract_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1092                    unsigned HOST_WIDE_INT bitnum, int unsignedp, rtx target,
1093                    enum machine_mode mode, enum machine_mode tmode)
1094 {
1095   unsigned int unit
1096     = (MEM_P (str_rtx)) ? BITS_PER_UNIT : BITS_PER_WORD;
1097   unsigned HOST_WIDE_INT offset, bitpos;
1098   rtx op0 = str_rtx;
1099   rtx spec_target = target;
1100   rtx spec_target_subreg = 0;
1101   enum machine_mode int_mode;
1102   enum machine_mode extv_mode = mode_for_extraction (EP_extv, 0);
1103   enum machine_mode extzv_mode = mode_for_extraction (EP_extzv, 0);
1104   enum machine_mode mode1;
1105   int byte_offset;
1106
1107   if (tmode == VOIDmode)
1108     tmode = mode;
1109
1110   while (GET_CODE (op0) == SUBREG)
1111     {
1112       bitnum += SUBREG_BYTE (op0) * BITS_PER_UNIT;
1113       op0 = SUBREG_REG (op0);
1114     }
1115
1116   /* If we have an out-of-bounds access to a register, just return an
1117      uninitialized register of the required mode.  This can occur if the
1118      source code contains an out-of-bounds access to a small array.  */
1119   if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
1120     return gen_reg_rtx (tmode);
1121
1122   if (REG_P (op0)
1123       && mode == GET_MODE (op0)
1124       && bitnum == 0
1125       && bitsize == GET_MODE_BITSIZE (GET_MODE (op0)))
1126     {
1127       /* We're trying to extract a full register from itself.  */
1128       return op0;
1129     }
1130
1131   /* See if we can get a better vector mode before extracting.  */
1132   if (VECTOR_MODE_P (GET_MODE (op0))
1133       && !MEM_P (op0)
1134       && GET_MODE_INNER (GET_MODE (op0)) != tmode)
1135     {
1136       enum machine_mode new_mode;
1137       int nunits = GET_MODE_NUNITS (GET_MODE (op0));
1138
1139       if (GET_MODE_CLASS (tmode) == MODE_FLOAT)
1140         new_mode = MIN_MODE_VECTOR_FLOAT;
1141       else
1142         new_mode = MIN_MODE_VECTOR_INT;
1143
1144       for (; new_mode != VOIDmode ; new_mode = GET_MODE_WIDER_MODE (new_mode))
1145         if (GET_MODE_NUNITS (new_mode) == nunits
1146             && GET_MODE_INNER (new_mode) == tmode
1147             && targetm.vector_mode_supported_p (new_mode))
1148           break;
1149       if (new_mode != VOIDmode)
1150         op0 = gen_lowpart (new_mode, op0);
1151     }
1152
1153   /* Use vec_extract patterns for extracting parts of vectors whenever
1154      available.  */
1155   if (VECTOR_MODE_P (GET_MODE (op0))
1156       && !MEM_P (op0)
1157       && (vec_extract_optab->handlers[GET_MODE (op0)].insn_code
1158           != CODE_FOR_nothing)
1159       && ((bitnum + bitsize - 1) / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
1160           == bitnum / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
1161     {
1162       enum machine_mode outermode = GET_MODE (op0);
1163       enum machine_mode innermode = GET_MODE_INNER (outermode);
1164       int icode = (int) vec_extract_optab->handlers[outermode].insn_code;
1165       unsigned HOST_WIDE_INT pos = bitnum / GET_MODE_BITSIZE (innermode);
1166       rtx rtxpos = GEN_INT (pos);
1167       rtx src = op0;
1168       rtx dest = NULL, pat, seq;
1169       enum machine_mode mode0 = insn_data[icode].operand[0].mode;
1170       enum machine_mode mode1 = insn_data[icode].operand[1].mode;
1171       enum machine_mode mode2 = insn_data[icode].operand[2].mode;
1172
1173       if (innermode == tmode || innermode == mode)
1174         dest = target;
1175
1176       if (!dest)
1177         dest = gen_reg_rtx (innermode);
1178
1179       start_sequence ();
1180
1181       if (! (*insn_data[icode].operand[0].predicate) (dest, mode0))
1182         dest = copy_to_mode_reg (mode0, dest);
1183
1184       if (! (*insn_data[icode].operand[1].predicate) (src, mode1))
1185         src = copy_to_mode_reg (mode1, src);
1186
1187       if (! (*insn_data[icode].operand[2].predicate) (rtxpos, mode2))
1188         rtxpos = copy_to_mode_reg (mode1, rtxpos);
1189
1190       /* We could handle this, but we should always be called with a pseudo
1191          for our targets and all insns should take them as outputs.  */
1192       gcc_assert ((*insn_data[icode].operand[0].predicate) (dest, mode0)
1193                   && (*insn_data[icode].operand[1].predicate) (src, mode1)
1194                   && (*insn_data[icode].operand[2].predicate) (rtxpos, mode2));
1195
1196       pat = GEN_FCN (icode) (dest, src, rtxpos);
1197       seq = get_insns ();
1198       end_sequence ();
1199       if (pat)
1200         {
1201           emit_insn (seq);
1202           emit_insn (pat);
1203           if (mode0 != mode)
1204             return gen_lowpart (tmode, dest);
1205           return dest;
1206         }
1207     }
1208
1209   /* Make sure we are playing with integral modes.  Pun with subregs
1210      if we aren't.  */
1211   {
1212     enum machine_mode imode = int_mode_for_mode (GET_MODE (op0));
1213     if (imode != GET_MODE (op0))
1214       {
1215         if (MEM_P (op0))
1216           op0 = adjust_address (op0, imode, 0);
1217         else
1218           {
1219             gcc_assert (imode != BLKmode);
1220             op0 = gen_lowpart (imode, op0);
1221
1222             /* If we got a SUBREG, force it into a register since we
1223                aren't going to be able to do another SUBREG on it.  */
1224             if (GET_CODE (op0) == SUBREG)
1225               op0 = force_reg (imode, op0);
1226           }
1227       }
1228   }
1229
1230   /* We may be accessing data outside the field, which means
1231      we can alias adjacent data.  */
1232   if (MEM_P (op0))
1233     {
1234       op0 = shallow_copy_rtx (op0);
1235       set_mem_alias_set (op0, 0);
1236       set_mem_expr (op0, 0);
1237     }
1238
1239   /* Extraction of a full-word or multi-word value from a structure
1240      in a register or aligned memory can be done with just a SUBREG.
1241      A subword value in the least significant part of a register
1242      can also be extracted with a SUBREG.  For this, we need the
1243      byte offset of the value in op0.  */
1244
1245   bitpos = bitnum % unit;
1246   offset = bitnum / unit;
1247   byte_offset = bitpos / BITS_PER_UNIT + offset * UNITS_PER_WORD;
1248
1249   /* If OP0 is a register, BITPOS must count within a word.
1250      But as we have it, it counts within whatever size OP0 now has.
1251      On a bigendian machine, these are not the same, so convert.  */
1252   if (BYTES_BIG_ENDIAN
1253       && !MEM_P (op0)
1254       && unit > GET_MODE_BITSIZE (GET_MODE (op0)))
1255     bitpos += unit - GET_MODE_BITSIZE (GET_MODE (op0));
1256
1257   /* ??? We currently assume TARGET is at least as big as BITSIZE.
1258      If that's wrong, the solution is to test for it and set TARGET to 0
1259      if needed.  */
1260
1261   /* Only scalar integer modes can be converted via subregs.  There is an
1262      additional problem for FP modes here in that they can have a precision
1263      which is different from the size.  mode_for_size uses precision, but
1264      we want a mode based on the size, so we must avoid calling it for FP
1265      modes.  */
1266   mode1  = (SCALAR_INT_MODE_P (tmode)
1267             ? mode_for_size (bitsize, GET_MODE_CLASS (tmode), 0)
1268             : mode);
1269
1270   if (((bitsize >= BITS_PER_WORD && bitsize == GET_MODE_BITSIZE (mode)
1271         && bitpos % BITS_PER_WORD == 0)
1272        || (mode1 != BLKmode
1273            /* ??? The big endian test here is wrong.  This is correct
1274               if the value is in a register, and if mode_for_size is not
1275               the same mode as op0.  This causes us to get unnecessarily
1276               inefficient code from the Thumb port when -mbig-endian.  */
1277            && (BYTES_BIG_ENDIAN
1278                ? bitpos + bitsize == BITS_PER_WORD
1279                : bitpos == 0)))
1280       && ((!MEM_P (op0)
1281            && TRULY_NOOP_TRUNCATION (GET_MODE_BITSIZE (mode),
1282                                      GET_MODE_BITSIZE (GET_MODE (op0)))
1283            && GET_MODE_SIZE (mode1) != 0
1284            && byte_offset % GET_MODE_SIZE (mode1) == 0)
1285           || (MEM_P (op0)
1286               && (! SLOW_UNALIGNED_ACCESS (mode, MEM_ALIGN (op0))
1287                   || (offset * BITS_PER_UNIT % bitsize == 0
1288                       && MEM_ALIGN (op0) % bitsize == 0)))))
1289     {
1290       if (mode1 != GET_MODE (op0))
1291         {
1292           if (MEM_P (op0))
1293             op0 = adjust_address (op0, mode1, offset);
1294           else
1295             {
1296               rtx sub = simplify_gen_subreg (mode1, op0, GET_MODE (op0),
1297                                              byte_offset);
1298               if (sub == NULL)
1299                 goto no_subreg_mode_swap;
1300               op0 = sub;
1301             }
1302         }
1303       if (mode1 != mode)
1304         return convert_to_mode (tmode, op0, unsignedp);
1305       return op0;
1306     }
1307  no_subreg_mode_swap:
1308
1309   /* Handle fields bigger than a word.  */
1310
1311   if (bitsize > BITS_PER_WORD)
1312     {
1313       /* Here we transfer the words of the field
1314          in the order least significant first.
1315          This is because the most significant word is the one which may
1316          be less than full.  */
1317
1318       unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
1319       unsigned int i;
1320
1321       if (target == 0 || !REG_P (target))
1322         target = gen_reg_rtx (mode);
1323
1324       /* Indicate for flow that the entire target reg is being set.  */
1325       emit_insn (gen_rtx_CLOBBER (VOIDmode, target));
1326
1327       for (i = 0; i < nwords; i++)
1328         {
1329           /* If I is 0, use the low-order word in both field and target;
1330              if I is 1, use the next to lowest word; and so on.  */
1331           /* Word number in TARGET to use.  */
1332           unsigned int wordnum
1333             = (WORDS_BIG_ENDIAN
1334                ? GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD - i - 1
1335                : i);
1336           /* Offset from start of field in OP0.  */
1337           unsigned int bit_offset = (WORDS_BIG_ENDIAN
1338                                      ? MAX (0, ((int) bitsize - ((int) i + 1)
1339                                                 * (int) BITS_PER_WORD))
1340                                      : (int) i * BITS_PER_WORD);
1341           rtx target_part = operand_subword (target, wordnum, 1, VOIDmode);
1342           rtx result_part
1343             = extract_bit_field (op0, MIN (BITS_PER_WORD,
1344                                            bitsize - i * BITS_PER_WORD),
1345                                  bitnum + bit_offset, 1, target_part, mode,
1346                                  word_mode);
1347
1348           gcc_assert (target_part);
1349
1350           if (result_part != target_part)
1351             emit_move_insn (target_part, result_part);
1352         }
1353
1354       if (unsignedp)
1355         {
1356           /* Unless we've filled TARGET, the upper regs in a multi-reg value
1357              need to be zero'd out.  */
1358           if (GET_MODE_SIZE (GET_MODE (target)) > nwords * UNITS_PER_WORD)
1359             {
1360               unsigned int i, total_words;
1361
1362               total_words = GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD;
1363               for (i = nwords; i < total_words; i++)
1364                 emit_move_insn
1365                   (operand_subword (target,
1366                                     WORDS_BIG_ENDIAN ? total_words - i - 1 : i,
1367                                     1, VOIDmode),
1368                    const0_rtx);
1369             }
1370           return target;
1371         }
1372
1373       /* Signed bit field: sign-extend with two arithmetic shifts.  */
1374       target = expand_shift (LSHIFT_EXPR, mode, target,
1375                              build_int_cst (NULL_TREE,
1376                                             GET_MODE_BITSIZE (mode) - bitsize),
1377                              NULL_RTX, 0);
1378       return expand_shift (RSHIFT_EXPR, mode, target,
1379                            build_int_cst (NULL_TREE,
1380                                           GET_MODE_BITSIZE (mode) - bitsize),
1381                            NULL_RTX, 0);
1382     }
1383
1384   /* From here on we know the desired field is smaller than a word.  */
1385
1386   /* Check if there is a correspondingly-sized integer field, so we can
1387      safely extract it as one size of integer, if necessary; then
1388      truncate or extend to the size that is wanted; then use SUBREGs or
1389      convert_to_mode to get one of the modes we really wanted.  */
1390
1391   int_mode = int_mode_for_mode (tmode);
1392   if (int_mode == BLKmode)
1393     int_mode = int_mode_for_mode (mode);
1394   /* Should probably push op0 out to memory and then do a load.  */
1395   gcc_assert (int_mode != BLKmode);
1396
1397   /* OFFSET is the number of words or bytes (UNIT says which)
1398      from STR_RTX to the first word or byte containing part of the field.  */
1399   if (!MEM_P (op0))
1400     {
1401       if (offset != 0
1402           || GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
1403         {
1404           if (!REG_P (op0))
1405             op0 = copy_to_reg (op0);
1406           op0 = gen_rtx_SUBREG (mode_for_size (BITS_PER_WORD, MODE_INT, 0),
1407                                 op0, (offset * UNITS_PER_WORD));
1408         }
1409       offset = 0;
1410     }
1411
1412   /* Now OFFSET is nonzero only for memory operands.  */
1413
1414   if (unsignedp)
1415     {
1416       if (HAVE_extzv
1417           && bitsize > 0
1418           && GET_MODE_BITSIZE (extzv_mode) >= bitsize
1419           && ! ((REG_P (op0) || GET_CODE (op0) == SUBREG)
1420                 && (bitsize + bitpos > GET_MODE_BITSIZE (extzv_mode))))
1421         {
1422           unsigned HOST_WIDE_INT xbitpos = bitpos, xoffset = offset;
1423           rtx bitsize_rtx, bitpos_rtx;
1424           rtx last = get_last_insn ();
1425           rtx xop0 = op0;
1426           rtx xtarget = target;
1427           rtx xspec_target = spec_target;
1428           rtx xspec_target_subreg = spec_target_subreg;
1429           rtx pat;
1430           enum machine_mode maxmode = mode_for_extraction (EP_extzv, 0);
1431
1432           if (MEM_P (xop0))
1433             {
1434               int save_volatile_ok = volatile_ok;
1435               volatile_ok = 1;
1436
1437               /* Is the memory operand acceptable?  */
1438               if (! ((*insn_data[(int) CODE_FOR_extzv].operand[1].predicate)
1439                      (xop0, GET_MODE (xop0))))
1440                 {
1441                   /* No, load into a reg and extract from there.  */
1442                   enum machine_mode bestmode;
1443
1444                   /* Get the mode to use for inserting into this field.  If
1445                      OP0 is BLKmode, get the smallest mode consistent with the
1446                      alignment. If OP0 is a non-BLKmode object that is no
1447                      wider than MAXMODE, use its mode. Otherwise, use the
1448                      smallest mode containing the field.  */
1449
1450                   if (GET_MODE (xop0) == BLKmode
1451                       || (GET_MODE_SIZE (GET_MODE (op0))
1452                           > GET_MODE_SIZE (maxmode)))
1453                     bestmode = get_best_mode (bitsize, bitnum,
1454                                               MEM_ALIGN (xop0), maxmode,
1455                                               MEM_VOLATILE_P (xop0));
1456                   else
1457                     bestmode = GET_MODE (xop0);
1458
1459                   if (bestmode == VOIDmode
1460                       || (SLOW_UNALIGNED_ACCESS (bestmode, MEM_ALIGN (xop0))
1461                           && GET_MODE_BITSIZE (bestmode) > MEM_ALIGN (xop0)))
1462                     goto extzv_loses;
1463
1464                   /* Compute offset as multiple of this unit,
1465                      counting in bytes.  */
1466                   unit = GET_MODE_BITSIZE (bestmode);
1467                   xoffset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
1468                   xbitpos = bitnum % unit;
1469                   xop0 = adjust_address (xop0, bestmode, xoffset);
1470
1471                   /* Make sure register is big enough for the whole field. */
1472                   if (xoffset * BITS_PER_UNIT + unit
1473                       < offset * BITS_PER_UNIT + bitsize)
1474                     goto extzv_loses;
1475
1476                   /* Fetch it to a register in that size.  */
1477                   xop0 = force_reg (bestmode, xop0);
1478
1479                   /* XBITPOS counts within UNIT, which is what is expected.  */
1480                 }
1481               else
1482                 /* Get ref to first byte containing part of the field.  */
1483                 xop0 = adjust_address (xop0, byte_mode, xoffset);
1484
1485               volatile_ok = save_volatile_ok;
1486             }
1487
1488           /* If op0 is a register, we need it in MAXMODE (which is usually
1489              SImode). to make it acceptable to the format of extzv.  */
1490           if (GET_CODE (xop0) == SUBREG && GET_MODE (xop0) != maxmode)
1491             goto extzv_loses;
1492           if (REG_P (xop0) && GET_MODE (xop0) != maxmode)
1493             xop0 = gen_rtx_SUBREG (maxmode, xop0, 0);
1494
1495           /* On big-endian machines, we count bits from the most significant.
1496              If the bit field insn does not, we must invert.  */
1497           if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1498             xbitpos = unit - bitsize - xbitpos;
1499
1500           /* Now convert from counting within UNIT to counting in MAXMODE.  */
1501           if (BITS_BIG_ENDIAN && !MEM_P (xop0))
1502             xbitpos += GET_MODE_BITSIZE (maxmode) - unit;
1503
1504           unit = GET_MODE_BITSIZE (maxmode);
1505
1506           if (xtarget == 0)
1507             xtarget = xspec_target = gen_reg_rtx (tmode);
1508
1509           if (GET_MODE (xtarget) != maxmode)
1510             {
1511               if (REG_P (xtarget))
1512                 {
1513                   int wider = (GET_MODE_SIZE (maxmode)
1514                                > GET_MODE_SIZE (GET_MODE (xtarget)));
1515                   xtarget = gen_lowpart (maxmode, xtarget);
1516                   if (wider)
1517                     xspec_target_subreg = xtarget;
1518                 }
1519               else
1520                 xtarget = gen_reg_rtx (maxmode);
1521             }
1522
1523           /* If this machine's extzv insists on a register target,
1524              make sure we have one.  */
1525           if (! ((*insn_data[(int) CODE_FOR_extzv].operand[0].predicate)
1526                  (xtarget, maxmode)))
1527             xtarget = gen_reg_rtx (maxmode);
1528
1529           bitsize_rtx = GEN_INT (bitsize);
1530           bitpos_rtx = GEN_INT (xbitpos);
1531
1532           pat = gen_extzv (xtarget, xop0, bitsize_rtx, bitpos_rtx);
1533           if (pat)
1534             {
1535               emit_insn (pat);
1536               target = xtarget;
1537               spec_target = xspec_target;
1538               spec_target_subreg = xspec_target_subreg;
1539             }
1540           else
1541             {
1542               delete_insns_since (last);
1543               target = extract_fixed_bit_field (int_mode, op0, offset, bitsize,
1544                                                 bitpos, target, 1);
1545             }
1546         }
1547       else
1548       extzv_loses:
1549         target = extract_fixed_bit_field (int_mode, op0, offset, bitsize,
1550                                           bitpos, target, 1);
1551     }
1552   else
1553     {
1554       if (HAVE_extv
1555           && bitsize > 0
1556           && GET_MODE_BITSIZE (extv_mode) >= bitsize
1557           && ! ((REG_P (op0) || GET_CODE (op0) == SUBREG)
1558                 && (bitsize + bitpos > GET_MODE_BITSIZE (extv_mode))))
1559         {
1560           int xbitpos = bitpos, xoffset = offset;
1561           rtx bitsize_rtx, bitpos_rtx;
1562           rtx last = get_last_insn ();
1563           rtx xop0 = op0, xtarget = target;
1564           rtx xspec_target = spec_target;
1565           rtx xspec_target_subreg = spec_target_subreg;
1566           rtx pat;
1567           enum machine_mode maxmode = mode_for_extraction (EP_extv, 0);
1568
1569           if (MEM_P (xop0))
1570             {
1571               /* Is the memory operand acceptable?  */
1572               if (! ((*insn_data[(int) CODE_FOR_extv].operand[1].predicate)
1573                      (xop0, GET_MODE (xop0))))
1574                 {
1575                   /* No, load into a reg and extract from there.  */
1576                   enum machine_mode bestmode;
1577
1578                   /* Get the mode to use for inserting into this field.  If
1579                      OP0 is BLKmode, get the smallest mode consistent with the
1580                      alignment. If OP0 is a non-BLKmode object that is no
1581                      wider than MAXMODE, use its mode. Otherwise, use the
1582                      smallest mode containing the field.  */
1583
1584                   if (GET_MODE (xop0) == BLKmode
1585                       || (GET_MODE_SIZE (GET_MODE (op0))
1586                           > GET_MODE_SIZE (maxmode)))
1587                     bestmode = get_best_mode (bitsize, bitnum,
1588                                               MEM_ALIGN (xop0), maxmode,
1589                                               MEM_VOLATILE_P (xop0));
1590                   else
1591                     bestmode = GET_MODE (xop0);
1592
1593                   if (bestmode == VOIDmode
1594                       || (SLOW_UNALIGNED_ACCESS (bestmode, MEM_ALIGN (xop0))
1595                           && GET_MODE_BITSIZE (bestmode) > MEM_ALIGN (xop0)))
1596                     goto extv_loses;
1597
1598                   /* Compute offset as multiple of this unit,
1599                      counting in bytes.  */
1600                   unit = GET_MODE_BITSIZE (bestmode);
1601                   xoffset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
1602                   xbitpos = bitnum % unit;
1603                   xop0 = adjust_address (xop0, bestmode, xoffset);
1604
1605                   /* Make sure register is big enough for the whole field. */
1606                   if (xoffset * BITS_PER_UNIT + unit
1607                       < offset * BITS_PER_UNIT + bitsize)
1608                     goto extv_loses;
1609
1610                   /* Fetch it to a register in that size.  */
1611                   xop0 = force_reg (bestmode, xop0);
1612
1613                   /* XBITPOS counts within UNIT, which is what is expected.  */
1614                 }
1615               else
1616                 /* Get ref to first byte containing part of the field.  */
1617                 xop0 = adjust_address (xop0, byte_mode, xoffset);
1618             }
1619
1620           /* If op0 is a register, we need it in MAXMODE (which is usually
1621              SImode) to make it acceptable to the format of extv.  */
1622           if (GET_CODE (xop0) == SUBREG && GET_MODE (xop0) != maxmode)
1623             goto extv_loses;
1624           if (REG_P (xop0) && GET_MODE (xop0) != maxmode)
1625             xop0 = gen_rtx_SUBREG (maxmode, xop0, 0);
1626
1627           /* On big-endian machines, we count bits from the most significant.
1628              If the bit field insn does not, we must invert.  */
1629           if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1630             xbitpos = unit - bitsize - xbitpos;
1631
1632           /* XBITPOS counts within a size of UNIT.
1633              Adjust to count within a size of MAXMODE.  */
1634           if (BITS_BIG_ENDIAN && !MEM_P (xop0))
1635             xbitpos += (GET_MODE_BITSIZE (maxmode) - unit);
1636
1637           unit = GET_MODE_BITSIZE (maxmode);
1638
1639           if (xtarget == 0)
1640             xtarget = xspec_target = gen_reg_rtx (tmode);
1641
1642           if (GET_MODE (xtarget) != maxmode)
1643             {
1644               if (REG_P (xtarget))
1645                 {
1646                   int wider = (GET_MODE_SIZE (maxmode)
1647                                > GET_MODE_SIZE (GET_MODE (xtarget)));
1648                   xtarget = gen_lowpart (maxmode, xtarget);
1649                   if (wider)
1650                     xspec_target_subreg = xtarget;
1651                 }
1652               else
1653                 xtarget = gen_reg_rtx (maxmode);
1654             }
1655
1656           /* If this machine's extv insists on a register target,
1657              make sure we have one.  */
1658           if (! ((*insn_data[(int) CODE_FOR_extv].operand[0].predicate)
1659                  (xtarget, maxmode)))
1660             xtarget = gen_reg_rtx (maxmode);
1661
1662           bitsize_rtx = GEN_INT (bitsize);
1663           bitpos_rtx = GEN_INT (xbitpos);
1664
1665           pat = gen_extv (xtarget, xop0, bitsize_rtx, bitpos_rtx);
1666           if (pat)
1667             {
1668               emit_insn (pat);
1669               target = xtarget;
1670               spec_target = xspec_target;
1671               spec_target_subreg = xspec_target_subreg;
1672             }
1673           else
1674             {
1675               delete_insns_since (last);
1676               target = extract_fixed_bit_field (int_mode, op0, offset, bitsize,
1677                                                 bitpos, target, 0);
1678             }
1679         }
1680       else
1681       extv_loses:
1682         target = extract_fixed_bit_field (int_mode, op0, offset, bitsize,
1683                                           bitpos, target, 0);
1684     }
1685   if (target == spec_target)
1686     return target;
1687   if (target == spec_target_subreg)
1688     return spec_target;
1689   if (GET_MODE (target) != tmode && GET_MODE (target) != mode)
1690     {
1691       /* If the target mode is not a scalar integral, first convert to the
1692          integer mode of that size and then access it as a floating-point
1693          value via a SUBREG.  */
1694       if (!SCALAR_INT_MODE_P (tmode))
1695         {
1696           enum machine_mode smode
1697             = mode_for_size (GET_MODE_BITSIZE (tmode), MODE_INT, 0);
1698           target = convert_to_mode (smode, target, unsignedp);
1699           target = force_reg (smode, target);
1700           return gen_lowpart (tmode, target);
1701         }
1702
1703       return convert_to_mode (tmode, target, unsignedp);
1704     }
1705   return target;
1706 }
1707 \f
1708 /* Extract a bit field using shifts and boolean operations
1709    Returns an rtx to represent the value.
1710    OP0 addresses a register (word) or memory (byte).
1711    BITPOS says which bit within the word or byte the bit field starts in.
1712    OFFSET says how many bytes farther the bit field starts;
1713     it is 0 if OP0 is a register.
1714    BITSIZE says how many bits long the bit field is.
1715     (If OP0 is a register, it may be narrower than a full word,
1716      but BITPOS still counts within a full word,
1717      which is significant on bigendian machines.)
1718
1719    UNSIGNEDP is nonzero for an unsigned bit field (don't sign-extend value).
1720    If TARGET is nonzero, attempts to store the value there
1721    and return TARGET, but this is not guaranteed.
1722    If TARGET is not used, create a pseudo-reg of mode TMODE for the value.  */
1723
1724 static rtx
1725 extract_fixed_bit_field (enum machine_mode tmode, rtx op0,
1726                          unsigned HOST_WIDE_INT offset,
1727                          unsigned HOST_WIDE_INT bitsize,
1728                          unsigned HOST_WIDE_INT bitpos, rtx target,
1729                          int unsignedp)
1730 {
1731   unsigned int total_bits = BITS_PER_WORD;
1732   enum machine_mode mode;
1733
1734   if (GET_CODE (op0) == SUBREG || REG_P (op0))
1735     {
1736       /* Special treatment for a bit field split across two registers.  */
1737       if (bitsize + bitpos > BITS_PER_WORD)
1738         return extract_split_bit_field (op0, bitsize, bitpos, unsignedp);
1739     }
1740   else
1741     {
1742       /* Get the proper mode to use for this field.  We want a mode that
1743          includes the entire field.  If such a mode would be larger than
1744          a word, we won't be doing the extraction the normal way.  */
1745
1746       mode = get_best_mode (bitsize, bitpos + offset * BITS_PER_UNIT,
1747                             MEM_ALIGN (op0), word_mode, MEM_VOLATILE_P (op0));
1748
1749       if (mode == VOIDmode)
1750         /* The only way this should occur is if the field spans word
1751            boundaries.  */
1752         return extract_split_bit_field (op0, bitsize,
1753                                         bitpos + offset * BITS_PER_UNIT,
1754                                         unsignedp);
1755
1756       total_bits = GET_MODE_BITSIZE (mode);
1757
1758       /* Make sure bitpos is valid for the chosen mode.  Adjust BITPOS to
1759          be in the range 0 to total_bits-1, and put any excess bytes in
1760          OFFSET.  */
1761       if (bitpos >= total_bits)
1762         {
1763           offset += (bitpos / total_bits) * (total_bits / BITS_PER_UNIT);
1764           bitpos -= ((bitpos / total_bits) * (total_bits / BITS_PER_UNIT)
1765                      * BITS_PER_UNIT);
1766         }
1767
1768       /* Get ref to an aligned byte, halfword, or word containing the field.
1769          Adjust BITPOS to be position within a word,
1770          and OFFSET to be the offset of that word.
1771          Then alter OP0 to refer to that word.  */
1772       bitpos += (offset % (total_bits / BITS_PER_UNIT)) * BITS_PER_UNIT;
1773       offset -= (offset % (total_bits / BITS_PER_UNIT));
1774       op0 = adjust_address (op0, mode, offset);
1775     }
1776
1777   mode = GET_MODE (op0);
1778
1779   if (BYTES_BIG_ENDIAN)
1780     /* BITPOS is the distance between our msb and that of OP0.
1781        Convert it to the distance from the lsb.  */
1782     bitpos = total_bits - bitsize - bitpos;
1783
1784   /* Now BITPOS is always the distance between the field's lsb and that of OP0.
1785      We have reduced the big-endian case to the little-endian case.  */
1786
1787   if (unsignedp)
1788     {
1789       if (bitpos)
1790         {
1791           /* If the field does not already start at the lsb,
1792              shift it so it does.  */
1793           tree amount = build_int_cst (NULL_TREE, bitpos);
1794           /* Maybe propagate the target for the shift.  */
1795           /* But not if we will return it--could confuse integrate.c.  */
1796           rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
1797           if (tmode != mode) subtarget = 0;
1798           op0 = expand_shift (RSHIFT_EXPR, mode, op0, amount, subtarget, 1);
1799         }
1800       /* Convert the value to the desired mode.  */
1801       if (mode != tmode)
1802         op0 = convert_to_mode (tmode, op0, 1);
1803
1804       /* Unless the msb of the field used to be the msb when we shifted,
1805          mask out the upper bits.  */
1806
1807       if (GET_MODE_BITSIZE (mode) != bitpos + bitsize)
1808         return expand_binop (GET_MODE (op0), and_optab, op0,
1809                              mask_rtx (GET_MODE (op0), 0, bitsize, 0),
1810                              target, 1, OPTAB_LIB_WIDEN);
1811       return op0;
1812     }
1813
1814   /* To extract a signed bit-field, first shift its msb to the msb of the word,
1815      then arithmetic-shift its lsb to the lsb of the word.  */
1816   op0 = force_reg (mode, op0);
1817   if (mode != tmode)
1818     target = 0;
1819
1820   /* Find the narrowest integer mode that contains the field.  */
1821
1822   for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT); mode != VOIDmode;
1823        mode = GET_MODE_WIDER_MODE (mode))
1824     if (GET_MODE_BITSIZE (mode) >= bitsize + bitpos)
1825       {
1826         op0 = convert_to_mode (mode, op0, 0);
1827         break;
1828       }
1829
1830   if (GET_MODE_BITSIZE (mode) != (bitsize + bitpos))
1831     {
1832       tree amount
1833         = build_int_cst (NULL_TREE,
1834                          GET_MODE_BITSIZE (mode) - (bitsize + bitpos));
1835       /* Maybe propagate the target for the shift.  */
1836       rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
1837       op0 = expand_shift (LSHIFT_EXPR, mode, op0, amount, subtarget, 1);
1838     }
1839
1840   return expand_shift (RSHIFT_EXPR, mode, op0,
1841                        build_int_cst (NULL_TREE,
1842                                       GET_MODE_BITSIZE (mode) - bitsize),
1843                        target, 0);
1844 }
1845 \f
1846 /* Return a constant integer (CONST_INT or CONST_DOUBLE) mask value
1847    of mode MODE with BITSIZE ones followed by BITPOS zeros, or the
1848    complement of that if COMPLEMENT.  The mask is truncated if
1849    necessary to the width of mode MODE.  The mask is zero-extended if
1850    BITSIZE+BITPOS is too small for MODE.  */
1851
1852 static rtx
1853 mask_rtx (enum machine_mode mode, int bitpos, int bitsize, int complement)
1854 {
1855   HOST_WIDE_INT masklow, maskhigh;
1856
1857   if (bitsize == 0)
1858     masklow = 0;
1859   else if (bitpos < HOST_BITS_PER_WIDE_INT)
1860     masklow = (HOST_WIDE_INT) -1 << bitpos;
1861   else
1862     masklow = 0;
1863
1864   if (bitpos + bitsize < HOST_BITS_PER_WIDE_INT)
1865     masklow &= ((unsigned HOST_WIDE_INT) -1
1866                 >> (HOST_BITS_PER_WIDE_INT - bitpos - bitsize));
1867
1868   if (bitpos <= HOST_BITS_PER_WIDE_INT)
1869     maskhigh = -1;
1870   else
1871     maskhigh = (HOST_WIDE_INT) -1 << (bitpos - HOST_BITS_PER_WIDE_INT);
1872
1873   if (bitsize == 0)
1874     maskhigh = 0;
1875   else if (bitpos + bitsize > HOST_BITS_PER_WIDE_INT)
1876     maskhigh &= ((unsigned HOST_WIDE_INT) -1
1877                  >> (2 * HOST_BITS_PER_WIDE_INT - bitpos - bitsize));
1878   else
1879     maskhigh = 0;
1880
1881   if (complement)
1882     {
1883       maskhigh = ~maskhigh;
1884       masklow = ~masklow;
1885     }
1886
1887   return immed_double_const (masklow, maskhigh, mode);
1888 }
1889
1890 /* Return a constant integer (CONST_INT or CONST_DOUBLE) rtx with the value
1891    VALUE truncated to BITSIZE bits and then shifted left BITPOS bits.  */
1892
1893 static rtx
1894 lshift_value (enum machine_mode mode, rtx value, int bitpos, int bitsize)
1895 {
1896   unsigned HOST_WIDE_INT v = INTVAL (value);
1897   HOST_WIDE_INT low, high;
1898
1899   if (bitsize < HOST_BITS_PER_WIDE_INT)
1900     v &= ~((HOST_WIDE_INT) -1 << bitsize);
1901
1902   if (bitpos < HOST_BITS_PER_WIDE_INT)
1903     {
1904       low = v << bitpos;
1905       high = (bitpos > 0 ? (v >> (HOST_BITS_PER_WIDE_INT - bitpos)) : 0);
1906     }
1907   else
1908     {
1909       low = 0;
1910       high = v << (bitpos - HOST_BITS_PER_WIDE_INT);
1911     }
1912
1913   return immed_double_const (low, high, mode);
1914 }
1915 \f
1916 /* Extract a bit field from a memory by forcing the alignment of the
1917    memory.  This efficient only if the field spans at least 4 boundaries.
1918
1919    OP0 is the MEM.
1920    BITSIZE is the field width; BITPOS is the position of the first bit.
1921    UNSIGNEDP is true if the result should be zero-extended.  */
1922
1923 static rtx
1924 extract_force_align_mem_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1925                                    unsigned HOST_WIDE_INT bitpos,
1926                                    int unsignedp)
1927 {
1928   enum machine_mode mode, dmode;
1929   unsigned int m_bitsize, m_size;
1930   unsigned int sign_shift_up, sign_shift_dn;
1931   rtx base, a1, a2, v1, v2, comb, shift, result, start;
1932
1933   /* Choose a mode that will fit BITSIZE.  */
1934   mode = smallest_mode_for_size (bitsize, MODE_INT);
1935   m_size = GET_MODE_SIZE (mode);
1936   m_bitsize = GET_MODE_BITSIZE (mode);
1937
1938   /* Choose a mode twice as wide.  Fail if no such mode exists.  */
1939   dmode = mode_for_size (m_bitsize * 2, MODE_INT, false);
1940   if (dmode == BLKmode)
1941     return NULL;
1942
1943   do_pending_stack_adjust ();
1944   start = get_last_insn ();
1945
1946   /* At the end, we'll need an additional shift to deal with sign/zero
1947      extension.  By default this will be a left+right shift of the
1948      appropriate size.  But we may be able to eliminate one of them.  */
1949   sign_shift_up = sign_shift_dn = m_bitsize - bitsize;
1950
1951   if (STRICT_ALIGNMENT)
1952     {
1953       base = plus_constant (XEXP (op0, 0), bitpos / BITS_PER_UNIT);
1954       bitpos %= BITS_PER_UNIT;
1955
1956       /* We load two values to be concatenate.  There's an edge condition
1957          that bears notice -- an aligned value at the end of a page can
1958          only load one value lest we segfault.  So the two values we load
1959          are at "base & -size" and "(base + size - 1) & -size".  If base
1960          is unaligned, the addresses will be aligned and sequential; if
1961          base is aligned, the addresses will both be equal to base.  */
1962
1963       a1 = expand_simple_binop (Pmode, AND, force_operand (base, NULL),
1964                                 GEN_INT (-(HOST_WIDE_INT)m_size),
1965                                 NULL, true, OPTAB_LIB_WIDEN);
1966       mark_reg_pointer (a1, m_bitsize);
1967       v1 = gen_rtx_MEM (mode, a1);
1968       set_mem_align (v1, m_bitsize);
1969       v1 = force_reg (mode, validize_mem (v1));
1970
1971       a2 = plus_constant (base, GET_MODE_SIZE (mode) - 1);
1972       a2 = expand_simple_binop (Pmode, AND, force_operand (a2, NULL),
1973                                 GEN_INT (-(HOST_WIDE_INT)m_size),
1974                                 NULL, true, OPTAB_LIB_WIDEN);
1975       v2 = gen_rtx_MEM (mode, a2);
1976       set_mem_align (v2, m_bitsize);
1977       v2 = force_reg (mode, validize_mem (v2));
1978
1979       /* Combine these two values into a double-word value.  */
1980       if (m_bitsize == BITS_PER_WORD)
1981         {
1982           comb = gen_reg_rtx (dmode);
1983           emit_insn (gen_rtx_CLOBBER (VOIDmode, comb));
1984           emit_move_insn (gen_rtx_SUBREG (mode, comb, 0), v1);
1985           emit_move_insn (gen_rtx_SUBREG (mode, comb, m_size), v2);
1986         }
1987       else
1988         {
1989           if (BYTES_BIG_ENDIAN)
1990             comb = v1, v1 = v2, v2 = comb;
1991           v1 = convert_modes (dmode, mode, v1, true);
1992           if (v1 == NULL)
1993             goto fail;
1994           v2 = convert_modes (dmode, mode, v2, true);
1995           v2 = expand_simple_binop (dmode, ASHIFT, v2, GEN_INT (m_bitsize),
1996                                     NULL, true, OPTAB_LIB_WIDEN);
1997           if (v2 == NULL)
1998             goto fail;
1999           comb = expand_simple_binop (dmode, IOR, v1, v2, NULL,
2000                                       true, OPTAB_LIB_WIDEN);
2001           if (comb == NULL)
2002             goto fail;
2003         }
2004
2005       shift = expand_simple_binop (Pmode, AND, base, GEN_INT (m_size - 1),
2006                                    NULL, true, OPTAB_LIB_WIDEN);
2007       shift = expand_mult (Pmode, shift, GEN_INT (BITS_PER_UNIT), NULL, 1);
2008
2009       if (bitpos != 0)
2010         {
2011           if (sign_shift_up <= bitpos)
2012             bitpos -= sign_shift_up, sign_shift_up = 0;
2013           shift = expand_simple_binop (Pmode, PLUS, shift, GEN_INT (bitpos),
2014                                        NULL, true, OPTAB_LIB_WIDEN);
2015         }
2016     }
2017   else
2018     {
2019       unsigned HOST_WIDE_INT offset = bitpos / BITS_PER_UNIT;
2020       bitpos %= BITS_PER_UNIT;
2021
2022       /* When strict alignment is not required, we can just load directly
2023          from memory without masking.  If the remaining BITPOS offset is
2024          small enough, we may be able to do all operations in MODE as
2025          opposed to DMODE.  */
2026       if (bitpos + bitsize <= m_bitsize)
2027         dmode = mode;
2028       comb = adjust_address (op0, dmode, offset);
2029
2030       if (sign_shift_up <= bitpos)
2031         bitpos -= sign_shift_up, sign_shift_up = 0;
2032       shift = GEN_INT (bitpos);
2033     }
2034
2035   /* Shift down the double-word such that the requested value is at bit 0.  */
2036   if (shift != const0_rtx)
2037     comb = expand_simple_binop (dmode, unsignedp ? LSHIFTRT : ASHIFTRT,
2038                                 comb, shift, NULL, unsignedp, OPTAB_LIB_WIDEN);
2039   if (comb == NULL)
2040     goto fail;
2041
2042   /* If the field exactly matches MODE, then all we need to do is return the
2043      lowpart.  Otherwise, shift to get the sign bits set properly.  */
2044   result = force_reg (mode, gen_lowpart (mode, comb));
2045
2046   if (sign_shift_up)
2047     result = expand_simple_binop (mode, ASHIFT, result,
2048                                   GEN_INT (sign_shift_up),
2049                                   NULL_RTX, 0, OPTAB_LIB_WIDEN);
2050   if (sign_shift_dn)
2051     result = expand_simple_binop (mode, unsignedp ? LSHIFTRT : ASHIFTRT,
2052                                   result, GEN_INT (sign_shift_dn),
2053                                   NULL_RTX, 0, OPTAB_LIB_WIDEN);
2054
2055   return result;
2056
2057  fail:
2058   delete_insns_since (start);
2059   return NULL;
2060 }
2061
2062 /* Extract a bit field that is split across two words
2063    and return an RTX for the result.
2064
2065    OP0 is the REG, SUBREG or MEM rtx for the first of the two words.
2066    BITSIZE is the field width; BITPOS, position of its first bit, in the word.
2067    UNSIGNEDP is 1 if should zero-extend the contents; else sign-extend.  */
2068
2069 static rtx
2070 extract_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
2071                          unsigned HOST_WIDE_INT bitpos, int unsignedp)
2072 {
2073   unsigned int unit;
2074   unsigned int bitsdone = 0;
2075   rtx result = NULL_RTX;
2076   int first = 1;
2077
2078   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
2079      much at a time.  */
2080   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
2081     unit = BITS_PER_WORD;
2082   else
2083     {
2084       unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
2085       if (0 && bitsize / unit > 2)
2086         {
2087           rtx tmp = extract_force_align_mem_bit_field (op0, bitsize, bitpos,
2088                                                        unsignedp);
2089           if (tmp)
2090             return tmp;
2091         }
2092     }
2093
2094   while (bitsdone < bitsize)
2095     {
2096       unsigned HOST_WIDE_INT thissize;
2097       rtx part, word;
2098       unsigned HOST_WIDE_INT thispos;
2099       unsigned HOST_WIDE_INT offset;
2100
2101       offset = (bitpos + bitsdone) / unit;
2102       thispos = (bitpos + bitsdone) % unit;
2103
2104       /* THISSIZE must not overrun a word boundary.  Otherwise,
2105          extract_fixed_bit_field will call us again, and we will mutually
2106          recurse forever.  */
2107       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
2108       thissize = MIN (thissize, unit - thispos);
2109
2110       /* If OP0 is a register, then handle OFFSET here.
2111
2112          When handling multiword bitfields, extract_bit_field may pass
2113          down a word_mode SUBREG of a larger REG for a bitfield that actually
2114          crosses a word boundary.  Thus, for a SUBREG, we must find
2115          the current word starting from the base register.  */
2116       if (GET_CODE (op0) == SUBREG)
2117         {
2118           int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset;
2119           word = operand_subword_force (SUBREG_REG (op0), word_offset,
2120                                         GET_MODE (SUBREG_REG (op0)));
2121           offset = 0;
2122         }
2123       else if (REG_P (op0))
2124         {
2125           word = operand_subword_force (op0, offset, GET_MODE (op0));
2126           offset = 0;
2127         }
2128       else
2129         word = op0;
2130
2131       /* Extract the parts in bit-counting order,
2132          whose meaning is determined by BYTES_PER_UNIT.
2133          OFFSET is in UNITs, and UNIT is in bits.
2134          extract_fixed_bit_field wants offset in bytes.  */
2135       part = extract_fixed_bit_field (word_mode, word,
2136                                       offset * unit / BITS_PER_UNIT,
2137                                       thissize, thispos, 0, 1);
2138       bitsdone += thissize;
2139
2140       /* Shift this part into place for the result.  */
2141       if (BYTES_BIG_ENDIAN)
2142         {
2143           if (bitsize != bitsdone)
2144             part = expand_shift (LSHIFT_EXPR, word_mode, part,
2145                                  build_int_cst (NULL_TREE, bitsize - bitsdone),
2146                                  0, 1);
2147         }
2148       else
2149         {
2150           if (bitsdone != thissize)
2151             part = expand_shift (LSHIFT_EXPR, word_mode, part,
2152                                  build_int_cst (NULL_TREE,
2153                                                 bitsdone - thissize), 0, 1);
2154         }
2155
2156       if (first)
2157         result = part;
2158       else
2159         /* Combine the parts with bitwise or.  This works
2160            because we extracted each part as an unsigned bit field.  */
2161         result = expand_binop (word_mode, ior_optab, part, result, NULL_RTX, 1,
2162                                OPTAB_LIB_WIDEN);
2163
2164       first = 0;
2165     }
2166
2167   /* Unsigned bit field: we are done.  */
2168   if (unsignedp)
2169     return result;
2170   /* Signed bit field: sign-extend with two arithmetic shifts.  */
2171   result = expand_shift (LSHIFT_EXPR, word_mode, result,
2172                          build_int_cst (NULL_TREE, BITS_PER_WORD - bitsize),
2173                          NULL_RTX, 0);
2174   return expand_shift (RSHIFT_EXPR, word_mode, result,
2175                        build_int_cst (NULL_TREE, BITS_PER_WORD - bitsize),
2176                        NULL_RTX, 0);
2177 }
2178 \f
2179 /* Add INC into TARGET.  */
2180
2181 void
2182 expand_inc (rtx target, rtx inc)
2183 {
2184   rtx value = expand_binop (GET_MODE (target), add_optab,
2185                             target, inc,
2186                             target, 0, OPTAB_LIB_WIDEN);
2187   if (value != target)
2188     emit_move_insn (target, value);
2189 }
2190
2191 /* Subtract DEC from TARGET.  */
2192
2193 void
2194 expand_dec (rtx target, rtx dec)
2195 {
2196   rtx value = expand_binop (GET_MODE (target), sub_optab,
2197                             target, dec,
2198                             target, 0, OPTAB_LIB_WIDEN);
2199   if (value != target)
2200     emit_move_insn (target, value);
2201 }
2202 \f
2203 /* Output a shift instruction for expression code CODE,
2204    with SHIFTED being the rtx for the value to shift,
2205    and AMOUNT the tree for the amount to shift by.
2206    Store the result in the rtx TARGET, if that is convenient.
2207    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2208    Return the rtx for where the value is.  */
2209
2210 rtx
2211 expand_shift (enum tree_code code, enum machine_mode mode, rtx shifted,
2212               tree amount, rtx target, int unsignedp)
2213 {
2214   rtx op1, temp = 0;
2215   int left = (code == LSHIFT_EXPR || code == LROTATE_EXPR);
2216   int rotate = (code == LROTATE_EXPR || code == RROTATE_EXPR);
2217   int try;
2218
2219   /* Previously detected shift-counts computed by NEGATE_EXPR
2220      and shifted in the other direction; but that does not work
2221      on all machines.  */
2222
2223   op1 = expand_normal (amount);
2224
2225   if (SHIFT_COUNT_TRUNCATED)
2226     {
2227       if (GET_CODE (op1) == CONST_INT
2228           && ((unsigned HOST_WIDE_INT) INTVAL (op1) >=
2229               (unsigned HOST_WIDE_INT) GET_MODE_BITSIZE (mode)))
2230         op1 = GEN_INT ((unsigned HOST_WIDE_INT) INTVAL (op1)
2231                        % GET_MODE_BITSIZE (mode));
2232       else if (GET_CODE (op1) == SUBREG
2233                && subreg_lowpart_p (op1))
2234         op1 = SUBREG_REG (op1);
2235     }
2236
2237   if (op1 == const0_rtx)
2238     return shifted;
2239
2240   /* Check whether its cheaper to implement a left shift by a constant
2241      bit count by a sequence of additions.  */
2242   if (code == LSHIFT_EXPR
2243       && GET_CODE (op1) == CONST_INT
2244       && INTVAL (op1) > 0
2245       && INTVAL (op1) < GET_MODE_BITSIZE (mode)
2246       && INTVAL (op1) < MAX_BITS_PER_WORD
2247       && shift_cost[mode][INTVAL (op1)] > INTVAL (op1) * add_cost[mode]
2248       && shift_cost[mode][INTVAL (op1)] != MAX_COST)
2249     {
2250       int i;
2251       for (i = 0; i < INTVAL (op1); i++)
2252         {
2253           temp = force_reg (mode, shifted);
2254           shifted = expand_binop (mode, add_optab, temp, temp, NULL_RTX,
2255                                   unsignedp, OPTAB_LIB_WIDEN);
2256         }
2257       return shifted;
2258     }
2259
2260   for (try = 0; temp == 0 && try < 3; try++)
2261     {
2262       enum optab_methods methods;
2263
2264       if (try == 0)
2265         methods = OPTAB_DIRECT;
2266       else if (try == 1)
2267         methods = OPTAB_WIDEN;
2268       else
2269         methods = OPTAB_LIB_WIDEN;
2270
2271       if (rotate)
2272         {
2273           /* Widening does not work for rotation.  */
2274           if (methods == OPTAB_WIDEN)
2275             continue;
2276           else if (methods == OPTAB_LIB_WIDEN)
2277             {
2278               /* If we have been unable to open-code this by a rotation,
2279                  do it as the IOR of two shifts.  I.e., to rotate A
2280                  by N bits, compute (A << N) | ((unsigned) A >> (C - N))
2281                  where C is the bitsize of A.
2282
2283                  It is theoretically possible that the target machine might
2284                  not be able to perform either shift and hence we would
2285                  be making two libcalls rather than just the one for the
2286                  shift (similarly if IOR could not be done).  We will allow
2287                  this extremely unlikely lossage to avoid complicating the
2288                  code below.  */
2289
2290               rtx subtarget = target == shifted ? 0 : target;
2291               tree new_amount, other_amount;
2292               rtx temp1;
2293               tree type = TREE_TYPE (amount);
2294               if (GET_MODE (op1) != TYPE_MODE (type)
2295                   && GET_MODE (op1) != VOIDmode)
2296                 op1 = convert_to_mode (TYPE_MODE (type), op1, 1);
2297               new_amount = make_tree (type, op1);
2298               other_amount
2299                 = fold_build2 (MINUS_EXPR, type,
2300                                build_int_cst (type, GET_MODE_BITSIZE (mode)),
2301                                new_amount);
2302
2303               shifted = force_reg (mode, shifted);
2304
2305               temp = expand_shift (left ? LSHIFT_EXPR : RSHIFT_EXPR,
2306                                    mode, shifted, new_amount, 0, 1);
2307               temp1 = expand_shift (left ? RSHIFT_EXPR : LSHIFT_EXPR,
2308                                     mode, shifted, other_amount, subtarget, 1);
2309               return expand_binop (mode, ior_optab, temp, temp1, target,
2310                                    unsignedp, methods);
2311             }
2312
2313           temp = expand_binop (mode,
2314                                left ? rotl_optab : rotr_optab,
2315                                shifted, op1, target, unsignedp, methods);
2316         }
2317       else if (unsignedp)
2318         temp = expand_binop (mode,
2319                              left ? ashl_optab : lshr_optab,
2320                              shifted, op1, target, unsignedp, methods);
2321
2322       /* Do arithmetic shifts.
2323          Also, if we are going to widen the operand, we can just as well
2324          use an arithmetic right-shift instead of a logical one.  */
2325       if (temp == 0 && ! rotate
2326           && (! unsignedp || (! left && methods == OPTAB_WIDEN)))
2327         {
2328           enum optab_methods methods1 = methods;
2329
2330           /* If trying to widen a log shift to an arithmetic shift,
2331              don't accept an arithmetic shift of the same size.  */
2332           if (unsignedp)
2333             methods1 = OPTAB_MUST_WIDEN;
2334
2335           /* Arithmetic shift */
2336
2337           temp = expand_binop (mode,
2338                                left ? ashl_optab : ashr_optab,
2339                                shifted, op1, target, unsignedp, methods1);
2340         }
2341
2342       /* We used to try extzv here for logical right shifts, but that was
2343          only useful for one machine, the VAX, and caused poor code
2344          generation there for lshrdi3, so the code was deleted and a
2345          define_expand for lshrsi3 was added to vax.md.  */
2346     }
2347
2348   gcc_assert (temp);
2349   return temp;
2350 }
2351 \f
2352 enum alg_code {
2353   alg_unknown,
2354   alg_zero,
2355   alg_m, alg_shift,
2356   alg_add_t_m2,
2357   alg_sub_t_m2,
2358   alg_add_factor,
2359   alg_sub_factor,
2360   alg_add_t2_m,
2361   alg_sub_t2_m,
2362   alg_impossible
2363 };
2364
2365 /* This structure holds the "cost" of a multiply sequence.  The
2366    "cost" field holds the total rtx_cost of every operator in the
2367    synthetic multiplication sequence, hence cost(a op b) is defined
2368    as rtx_cost(op) + cost(a) + cost(b), where cost(leaf) is zero.
2369    The "latency" field holds the minimum possible latency of the
2370    synthetic multiply, on a hypothetical infinitely parallel CPU.
2371    This is the critical path, or the maximum height, of the expression
2372    tree which is the sum of rtx_costs on the most expensive path from
2373    any leaf to the root.  Hence latency(a op b) is defined as zero for
2374    leaves and rtx_cost(op) + max(latency(a), latency(b)) otherwise.  */
2375
2376 struct mult_cost {
2377   short cost;     /* Total rtx_cost of the multiplication sequence.  */
2378   short latency;  /* The latency of the multiplication sequence.  */
2379 };
2380
2381 /* This macro is used to compare a pointer to a mult_cost against an
2382    single integer "rtx_cost" value.  This is equivalent to the macro
2383    CHEAPER_MULT_COST(X,Z) where Z = {Y,Y}.  */
2384 #define MULT_COST_LESS(X,Y) ((X)->cost < (Y)    \
2385                              || ((X)->cost == (Y) && (X)->latency < (Y)))
2386
2387 /* This macro is used to compare two pointers to mult_costs against
2388    each other.  The macro returns true if X is cheaper than Y.
2389    Currently, the cheaper of two mult_costs is the one with the
2390    lower "cost".  If "cost"s are tied, the lower latency is cheaper.  */
2391 #define CHEAPER_MULT_COST(X,Y)  ((X)->cost < (Y)->cost          \
2392                                  || ((X)->cost == (Y)->cost     \
2393                                      && (X)->latency < (Y)->latency))
2394
2395 /* This structure records a sequence of operations.
2396    `ops' is the number of operations recorded.
2397    `cost' is their total cost.
2398    The operations are stored in `op' and the corresponding
2399    logarithms of the integer coefficients in `log'.
2400
2401    These are the operations:
2402    alg_zero             total := 0;
2403    alg_m                total := multiplicand;
2404    alg_shift            total := total * coeff
2405    alg_add_t_m2         total := total + multiplicand * coeff;
2406    alg_sub_t_m2         total := total - multiplicand * coeff;
2407    alg_add_factor       total := total * coeff + total;
2408    alg_sub_factor       total := total * coeff - total;
2409    alg_add_t2_m         total := total * coeff + multiplicand;
2410    alg_sub_t2_m         total := total * coeff - multiplicand;
2411
2412    The first operand must be either alg_zero or alg_m.  */
2413
2414 struct algorithm
2415 {
2416   struct mult_cost cost;
2417   short ops;
2418   /* The size of the OP and LOG fields are not directly related to the
2419      word size, but the worst-case algorithms will be if we have few
2420      consecutive ones or zeros, i.e., a multiplicand like 10101010101...
2421      In that case we will generate shift-by-2, add, shift-by-2, add,...,
2422      in total wordsize operations.  */
2423   enum alg_code op[MAX_BITS_PER_WORD];
2424   char log[MAX_BITS_PER_WORD];
2425 };
2426
2427 /* The entry for our multiplication cache/hash table.  */
2428 struct alg_hash_entry {
2429   /* The number we are multiplying by.  */
2430   unsigned HOST_WIDE_INT t;
2431
2432   /* The mode in which we are multiplying something by T.  */
2433   enum machine_mode mode;
2434
2435   /* The best multiplication algorithm for t.  */
2436   enum alg_code alg;
2437
2438   /* The cost of multiplication if ALG_CODE is not alg_impossible.
2439      Otherwise, the cost within which multiplication by T is
2440      impossible.  */
2441   struct mult_cost cost;
2442 };
2443
2444 /* The number of cache/hash entries.  */
2445 #if HOST_BITS_PER_WIDE_INT == 64
2446 #define NUM_ALG_HASH_ENTRIES 1031
2447 #else
2448 #define NUM_ALG_HASH_ENTRIES 307
2449 #endif
2450
2451 /* Each entry of ALG_HASH caches alg_code for some integer.  This is
2452    actually a hash table.  If we have a collision, that the older
2453    entry is kicked out.  */
2454 static struct alg_hash_entry alg_hash[NUM_ALG_HASH_ENTRIES];
2455
2456 /* Indicates the type of fixup needed after a constant multiplication.
2457    BASIC_VARIANT means no fixup is needed, NEGATE_VARIANT means that
2458    the result should be negated, and ADD_VARIANT means that the
2459    multiplicand should be added to the result.  */
2460 enum mult_variant {basic_variant, negate_variant, add_variant};
2461
2462 static void synth_mult (struct algorithm *, unsigned HOST_WIDE_INT,
2463                         const struct mult_cost *, enum machine_mode mode);
2464 static bool choose_mult_variant (enum machine_mode, HOST_WIDE_INT,
2465                                  struct algorithm *, enum mult_variant *, int);
2466 static rtx expand_mult_const (enum machine_mode, rtx, HOST_WIDE_INT, rtx,
2467                               const struct algorithm *, enum mult_variant);
2468 static unsigned HOST_WIDE_INT choose_multiplier (unsigned HOST_WIDE_INT, int,
2469                                                  int, rtx *, int *, int *);
2470 static unsigned HOST_WIDE_INT invert_mod2n (unsigned HOST_WIDE_INT, int);
2471 static rtx extract_high_half (enum machine_mode, rtx);
2472 static rtx expand_mult_highpart (enum machine_mode, rtx, rtx, rtx, int, int);
2473 static rtx expand_mult_highpart_optab (enum machine_mode, rtx, rtx, rtx,
2474                                        int, int);
2475 /* Compute and return the best algorithm for multiplying by T.
2476    The algorithm must cost less than cost_limit
2477    If retval.cost >= COST_LIMIT, no algorithm was found and all
2478    other field of the returned struct are undefined.
2479    MODE is the machine mode of the multiplication.  */
2480
2481 static void
2482 synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
2483             const struct mult_cost *cost_limit, enum machine_mode mode)
2484 {
2485   int m;
2486   struct algorithm *alg_in, *best_alg;
2487   struct mult_cost best_cost;
2488   struct mult_cost new_limit;
2489   int op_cost, op_latency;
2490   unsigned HOST_WIDE_INT q;
2491   int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode));
2492   int hash_index;
2493   bool cache_hit = false;
2494   enum alg_code cache_alg = alg_zero;
2495
2496   /* Indicate that no algorithm is yet found.  If no algorithm
2497      is found, this value will be returned and indicate failure.  */
2498   alg_out->cost.cost = cost_limit->cost + 1;
2499   alg_out->cost.latency = cost_limit->latency + 1;
2500
2501   if (cost_limit->cost < 0
2502       || (cost_limit->cost == 0 && cost_limit->latency <= 0))
2503     return;
2504
2505   /* Restrict the bits of "t" to the multiplication's mode.  */
2506   t &= GET_MODE_MASK (mode);
2507
2508   /* t == 1 can be done in zero cost.  */
2509   if (t == 1)
2510     {
2511       alg_out->ops = 1;
2512       alg_out->cost.cost = 0;
2513       alg_out->cost.latency = 0;
2514       alg_out->op[0] = alg_m;
2515       return;
2516     }
2517
2518   /* t == 0 sometimes has a cost.  If it does and it exceeds our limit,
2519      fail now.  */
2520   if (t == 0)
2521     {
2522       if (MULT_COST_LESS (cost_limit, zero_cost))
2523         return;
2524       else
2525         {
2526           alg_out->ops = 1;
2527           alg_out->cost.cost = zero_cost;
2528           alg_out->cost.latency = zero_cost;
2529           alg_out->op[0] = alg_zero;
2530           return;
2531         }
2532     }
2533
2534   /* We'll be needing a couple extra algorithm structures now.  */
2535
2536   alg_in = alloca (sizeof (struct algorithm));
2537   best_alg = alloca (sizeof (struct algorithm));
2538   best_cost = *cost_limit;
2539
2540   /* Compute the hash index.  */
2541   hash_index = (t ^ (unsigned int) mode) % NUM_ALG_HASH_ENTRIES;
2542
2543   /* See if we already know what to do for T.  */
2544   if (alg_hash[hash_index].t == t
2545       && alg_hash[hash_index].mode == mode
2546       && alg_hash[hash_index].alg != alg_unknown)
2547     {
2548       cache_alg = alg_hash[hash_index].alg;
2549
2550       if (cache_alg == alg_impossible)
2551         {
2552           /* The cache tells us that it's impossible to synthesize
2553              multiplication by T within alg_hash[hash_index].cost.  */
2554           if (!CHEAPER_MULT_COST (&alg_hash[hash_index].cost, cost_limit))
2555             /* COST_LIMIT is at least as restrictive as the one
2556                recorded in the hash table, in which case we have no
2557                hope of synthesizing a multiplication.  Just
2558                return.  */
2559             return;
2560
2561           /* If we get here, COST_LIMIT is less restrictive than the
2562              one recorded in the hash table, so we may be able to
2563              synthesize a multiplication.  Proceed as if we didn't
2564              have the cache entry.  */
2565         }
2566       else
2567         {
2568           if (CHEAPER_MULT_COST (cost_limit, &alg_hash[hash_index].cost))
2569             /* The cached algorithm shows that this multiplication
2570                requires more cost than COST_LIMIT.  Just return.  This
2571                way, we don't clobber this cache entry with
2572                alg_impossible but retain useful information.  */
2573             return;
2574
2575           cache_hit = true;
2576
2577           switch (cache_alg)
2578             {
2579             case alg_shift:
2580               goto do_alg_shift;
2581
2582             case alg_add_t_m2:
2583             case alg_sub_t_m2:
2584               goto do_alg_addsub_t_m2;
2585
2586             case alg_add_factor:
2587             case alg_sub_factor:
2588               goto do_alg_addsub_factor;
2589
2590             case alg_add_t2_m:
2591               goto do_alg_add_t2_m;
2592
2593             case alg_sub_t2_m:
2594               goto do_alg_sub_t2_m;
2595
2596             default:
2597               gcc_unreachable ();
2598             }
2599         }
2600     }
2601
2602   /* If we have a group of zero bits at the low-order part of T, try
2603      multiplying by the remaining bits and then doing a shift.  */
2604
2605   if ((t & 1) == 0)
2606     {
2607     do_alg_shift:
2608       m = floor_log2 (t & -t);  /* m = number of low zero bits */
2609       if (m < maxm)
2610         {
2611           q = t >> m;
2612           /* The function expand_shift will choose between a shift and
2613              a sequence of additions, so the observed cost is given as
2614              MIN (m * add_cost[mode], shift_cost[mode][m]).  */
2615           op_cost = m * add_cost[mode];
2616           if (shift_cost[mode][m] < op_cost)
2617             op_cost = shift_cost[mode][m];
2618           new_limit.cost = best_cost.cost - op_cost;
2619           new_limit.latency = best_cost.latency - op_cost;
2620           synth_mult (alg_in, q, &new_limit, mode);
2621
2622           alg_in->cost.cost += op_cost;
2623           alg_in->cost.latency += op_cost;
2624           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2625             {
2626               struct algorithm *x;
2627               best_cost = alg_in->cost;
2628               x = alg_in, alg_in = best_alg, best_alg = x;
2629               best_alg->log[best_alg->ops] = m;
2630               best_alg->op[best_alg->ops] = alg_shift;
2631             }
2632         }
2633       if (cache_hit)
2634         goto done;
2635     }
2636
2637   /* If we have an odd number, add or subtract one.  */
2638   if ((t & 1) != 0)
2639     {
2640       unsigned HOST_WIDE_INT w;
2641
2642     do_alg_addsub_t_m2:
2643       for (w = 1; (w & t) != 0; w <<= 1)
2644         ;
2645       /* If T was -1, then W will be zero after the loop.  This is another
2646          case where T ends with ...111.  Handling this with (T + 1) and
2647          subtract 1 produces slightly better code and results in algorithm
2648          selection much faster than treating it like the ...0111 case
2649          below.  */
2650       if (w == 0
2651           || (w > 2
2652               /* Reject the case where t is 3.
2653                  Thus we prefer addition in that case.  */
2654               && t != 3))
2655         {
2656           /* T ends with ...111.  Multiply by (T + 1) and subtract 1.  */
2657
2658           op_cost = add_cost[mode];
2659           new_limit.cost = best_cost.cost - op_cost;
2660           new_limit.latency = best_cost.latency - op_cost;
2661           synth_mult (alg_in, t + 1, &new_limit, mode);
2662
2663           alg_in->cost.cost += op_cost;
2664           alg_in->cost.latency += op_cost;
2665           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2666             {
2667               struct algorithm *x;
2668               best_cost = alg_in->cost;
2669               x = alg_in, alg_in = best_alg, best_alg = x;
2670               best_alg->log[best_alg->ops] = 0;
2671               best_alg->op[best_alg->ops] = alg_sub_t_m2;
2672             }
2673         }
2674       else
2675         {
2676           /* T ends with ...01 or ...011.  Multiply by (T - 1) and add 1.  */
2677
2678           op_cost = add_cost[mode];
2679           new_limit.cost = best_cost.cost - op_cost;
2680           new_limit.latency = best_cost.latency - op_cost;
2681           synth_mult (alg_in, t - 1, &new_limit, mode);
2682
2683           alg_in->cost.cost += op_cost;
2684           alg_in->cost.latency += op_cost;
2685           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2686             {
2687               struct algorithm *x;
2688               best_cost = alg_in->cost;
2689               x = alg_in, alg_in = best_alg, best_alg = x;
2690               best_alg->log[best_alg->ops] = 0;
2691               best_alg->op[best_alg->ops] = alg_add_t_m2;
2692             }
2693         }
2694       if (cache_hit)
2695         goto done;
2696     }
2697
2698   /* Look for factors of t of the form
2699      t = q(2**m +- 1), 2 <= m <= floor(log2(t - 1)).
2700      If we find such a factor, we can multiply by t using an algorithm that
2701      multiplies by q, shift the result by m and add/subtract it to itself.
2702
2703      We search for large factors first and loop down, even if large factors
2704      are less probable than small; if we find a large factor we will find a
2705      good sequence quickly, and therefore be able to prune (by decreasing
2706      COST_LIMIT) the search.  */
2707
2708  do_alg_addsub_factor:
2709   for (m = floor_log2 (t - 1); m >= 2; m--)
2710     {
2711       unsigned HOST_WIDE_INT d;
2712
2713       d = ((unsigned HOST_WIDE_INT) 1 << m) + 1;
2714       if (t % d == 0 && t > d && m < maxm
2715           && (!cache_hit || cache_alg == alg_add_factor))
2716         {
2717           /* If the target has a cheap shift-and-add instruction use
2718              that in preference to a shift insn followed by an add insn.
2719              Assume that the shift-and-add is "atomic" with a latency
2720              equal to its cost, otherwise assume that on superscalar
2721              hardware the shift may be executed concurrently with the
2722              earlier steps in the algorithm.  */
2723           op_cost = add_cost[mode] + shift_cost[mode][m];
2724           if (shiftadd_cost[mode][m] < op_cost)
2725             {
2726               op_cost = shiftadd_cost[mode][m];
2727               op_latency = op_cost;
2728             }
2729           else
2730             op_latency = add_cost[mode];
2731
2732           new_limit.cost = best_cost.cost - op_cost;
2733           new_limit.latency = best_cost.latency - op_latency;
2734           synth_mult (alg_in, t / d, &new_limit, mode);
2735
2736           alg_in->cost.cost += op_cost;
2737           alg_in->cost.latency += op_latency;
2738           if (alg_in->cost.latency < op_cost)
2739             alg_in->cost.latency = op_cost;
2740           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2741             {
2742               struct algorithm *x;
2743               best_cost = alg_in->cost;
2744               x = alg_in, alg_in = best_alg, best_alg = x;
2745               best_alg->log[best_alg->ops] = m;
2746               best_alg->op[best_alg->ops] = alg_add_factor;
2747             }
2748           /* Other factors will have been taken care of in the recursion.  */
2749           break;
2750         }
2751
2752       d = ((unsigned HOST_WIDE_INT) 1 << m) - 1;
2753       if (t % d == 0 && t > d && m < maxm
2754           && (!cache_hit || cache_alg == alg_sub_factor))
2755         {
2756           /* If the target has a cheap shift-and-subtract insn use
2757              that in preference to a shift insn followed by a sub insn.
2758              Assume that the shift-and-sub is "atomic" with a latency
2759              equal to it's cost, otherwise assume that on superscalar
2760              hardware the shift may be executed concurrently with the
2761              earlier steps in the algorithm.  */
2762           op_cost = add_cost[mode] + shift_cost[mode][m];
2763           if (shiftsub_cost[mode][m] < op_cost)
2764             {
2765               op_cost = shiftsub_cost[mode][m];
2766               op_latency = op_cost;
2767             }
2768           else
2769             op_latency = add_cost[mode];
2770
2771           new_limit.cost = best_cost.cost - op_cost;
2772           new_limit.latency = best_cost.latency - op_latency;
2773           synth_mult (alg_in, t / d, &new_limit, mode);
2774
2775           alg_in->cost.cost += op_cost;
2776           alg_in->cost.latency += op_latency;
2777           if (alg_in->cost.latency < op_cost)
2778             alg_in->cost.latency = op_cost;
2779           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2780             {
2781               struct algorithm *x;
2782               best_cost = alg_in->cost;
2783               x = alg_in, alg_in = best_alg, best_alg = x;
2784               best_alg->log[best_alg->ops] = m;
2785               best_alg->op[best_alg->ops] = alg_sub_factor;
2786             }
2787           break;
2788         }
2789     }
2790   if (cache_hit)
2791     goto done;
2792
2793   /* Try shift-and-add (load effective address) instructions,
2794      i.e. do a*3, a*5, a*9.  */
2795   if ((t & 1) != 0)
2796     {
2797     do_alg_add_t2_m:
2798       q = t - 1;
2799       q = q & -q;
2800       m = exact_log2 (q);
2801       if (m >= 0 && m < maxm)
2802         {
2803           op_cost = shiftadd_cost[mode][m];
2804           new_limit.cost = best_cost.cost - op_cost;
2805           new_limit.latency = best_cost.latency - op_cost;
2806           synth_mult (alg_in, (t - 1) >> m, &new_limit, mode);
2807
2808           alg_in->cost.cost += op_cost;
2809           alg_in->cost.latency += op_cost;
2810           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2811             {
2812               struct algorithm *x;
2813               best_cost = alg_in->cost;
2814               x = alg_in, alg_in = best_alg, best_alg = x;
2815               best_alg->log[best_alg->ops] = m;
2816               best_alg->op[best_alg->ops] = alg_add_t2_m;
2817             }
2818         }
2819       if (cache_hit)
2820         goto done;
2821
2822     do_alg_sub_t2_m:
2823       q = t + 1;
2824       q = q & -q;
2825       m = exact_log2 (q);
2826       if (m >= 0 && m < maxm)
2827         {
2828           op_cost = shiftsub_cost[mode][m];
2829           new_limit.cost = best_cost.cost - op_cost;
2830           new_limit.latency = best_cost.latency - op_cost;
2831           synth_mult (alg_in, (t + 1) >> m, &new_limit, mode);
2832
2833           alg_in->cost.cost += op_cost;
2834           alg_in->cost.latency += op_cost;
2835           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2836             {
2837               struct algorithm *x;
2838               best_cost = alg_in->cost;
2839               x = alg_in, alg_in = best_alg, best_alg = x;
2840               best_alg->log[best_alg->ops] = m;
2841               best_alg->op[best_alg->ops] = alg_sub_t2_m;
2842             }
2843         }
2844       if (cache_hit)
2845         goto done;
2846     }
2847
2848  done:
2849   /* If best_cost has not decreased, we have not found any algorithm.  */
2850   if (!CHEAPER_MULT_COST (&best_cost, cost_limit))
2851     {
2852       /* We failed to find an algorithm.  Record alg_impossible for
2853          this case (that is, <T, MODE, COST_LIMIT>) so that next time
2854          we are asked to find an algorithm for T within the same or
2855          lower COST_LIMIT, we can immediately return to the
2856          caller.  */
2857       alg_hash[hash_index].t = t;
2858       alg_hash[hash_index].mode = mode;
2859       alg_hash[hash_index].alg = alg_impossible;
2860       alg_hash[hash_index].cost = *cost_limit;
2861       return;
2862     }
2863
2864   /* Cache the result.  */
2865   if (!cache_hit)
2866     {
2867       alg_hash[hash_index].t = t;
2868       alg_hash[hash_index].mode = mode;
2869       alg_hash[hash_index].alg = best_alg->op[best_alg->ops];
2870       alg_hash[hash_index].cost.cost = best_cost.cost;
2871       alg_hash[hash_index].cost.latency = best_cost.latency;
2872     }
2873
2874   /* If we are getting a too long sequence for `struct algorithm'
2875      to record, make this search fail.  */
2876   if (best_alg->ops == MAX_BITS_PER_WORD)
2877     return;
2878
2879   /* Copy the algorithm from temporary space to the space at alg_out.
2880      We avoid using structure assignment because the majority of
2881      best_alg is normally undefined, and this is a critical function.  */
2882   alg_out->ops = best_alg->ops + 1;
2883   alg_out->cost = best_cost;
2884   memcpy (alg_out->op, best_alg->op,
2885           alg_out->ops * sizeof *alg_out->op);
2886   memcpy (alg_out->log, best_alg->log,
2887           alg_out->ops * sizeof *alg_out->log);
2888 }
2889 \f
2890 /* Find the cheapest way of multiplying a value of mode MODE by VAL.
2891    Try three variations:
2892
2893        - a shift/add sequence based on VAL itself
2894        - a shift/add sequence based on -VAL, followed by a negation
2895        - a shift/add sequence based on VAL - 1, followed by an addition.
2896
2897    Return true if the cheapest of these cost less than MULT_COST,
2898    describing the algorithm in *ALG and final fixup in *VARIANT.  */
2899
2900 static bool
2901 choose_mult_variant (enum machine_mode mode, HOST_WIDE_INT val,
2902                      struct algorithm *alg, enum mult_variant *variant,
2903                      int mult_cost)
2904 {
2905   struct algorithm alg2;
2906   struct mult_cost limit;
2907   int op_cost;
2908
2909   /* Fail quickly for impossible bounds.  */
2910   if (mult_cost < 0)
2911     return false;
2912
2913   /* Ensure that mult_cost provides a reasonable upper bound.
2914      Any constant multiplication can be performed with less
2915      than 2 * bits additions.  */
2916   op_cost = 2 * GET_MODE_BITSIZE (mode) * add_cost[mode];
2917   if (mult_cost > op_cost)
2918     mult_cost = op_cost;
2919
2920   *variant = basic_variant;
2921   limit.cost = mult_cost;
2922   limit.latency = mult_cost;
2923   synth_mult (alg, val, &limit, mode);
2924
2925   /* This works only if the inverted value actually fits in an
2926      `unsigned int' */
2927   if (HOST_BITS_PER_INT >= GET_MODE_BITSIZE (mode))
2928     {
2929       op_cost = neg_cost[mode];
2930       if (MULT_COST_LESS (&alg->cost, mult_cost))
2931         {
2932           limit.cost = alg->cost.cost - op_cost;
2933           limit.latency = alg->cost.latency - op_cost;
2934         }
2935       else
2936         {
2937           limit.cost = mult_cost - op_cost;
2938           limit.latency = mult_cost - op_cost;
2939         }
2940
2941       synth_mult (&alg2, -val, &limit, mode);
2942       alg2.cost.cost += op_cost;
2943       alg2.cost.latency += op_cost;
2944       if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
2945         *alg = alg2, *variant = negate_variant;
2946     }
2947
2948   /* This proves very useful for division-by-constant.  */
2949   op_cost = add_cost[mode];
2950   if (MULT_COST_LESS (&alg->cost, mult_cost))
2951     {
2952       limit.cost = alg->cost.cost - op_cost;
2953       limit.latency = alg->cost.latency - op_cost;
2954     }
2955   else
2956     {
2957       limit.cost = mult_cost - op_cost;
2958       limit.latency = mult_cost - op_cost;
2959     }
2960
2961   synth_mult (&alg2, val - 1, &limit, mode);
2962   alg2.cost.cost += op_cost;
2963   alg2.cost.latency += op_cost;
2964   if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
2965     *alg = alg2, *variant = add_variant;
2966
2967   return MULT_COST_LESS (&alg->cost, mult_cost);
2968 }
2969
2970 /* A subroutine of expand_mult, used for constant multiplications.
2971    Multiply OP0 by VAL in mode MODE, storing the result in TARGET if
2972    convenient.  Use the shift/add sequence described by ALG and apply
2973    the final fixup specified by VARIANT.  */
2974
2975 static rtx
2976 expand_mult_const (enum machine_mode mode, rtx op0, HOST_WIDE_INT val,
2977                    rtx target, const struct algorithm *alg,
2978                    enum mult_variant variant)
2979 {
2980   HOST_WIDE_INT val_so_far;
2981   rtx insn, accum, tem;
2982   int opno;
2983   enum machine_mode nmode;
2984
2985   /* Avoid referencing memory over and over and invalid sharing
2986      on SUBREGs.  */
2987   op0 = force_reg (mode, op0);
2988
2989   /* ACCUM starts out either as OP0 or as a zero, depending on
2990      the first operation.  */
2991
2992   if (alg->op[0] == alg_zero)
2993     {
2994       accum = copy_to_mode_reg (mode, const0_rtx);
2995       val_so_far = 0;
2996     }
2997   else if (alg->op[0] == alg_m)
2998     {
2999       accum = copy_to_mode_reg (mode, op0);
3000       val_so_far = 1;
3001     }
3002   else
3003     gcc_unreachable ();
3004
3005   for (opno = 1; opno < alg->ops; opno++)
3006     {
3007       int log = alg->log[opno];
3008       rtx shift_subtarget = optimize ? 0 : accum;
3009       rtx add_target
3010         = (opno == alg->ops - 1 && target != 0 && variant != add_variant
3011            && !optimize)
3012           ? target : 0;
3013       rtx accum_target = optimize ? 0 : accum;
3014
3015       switch (alg->op[opno])
3016         {
3017         case alg_shift:
3018           accum = expand_shift (LSHIFT_EXPR, mode, accum,
3019                                 build_int_cst (NULL_TREE, log),
3020                                 NULL_RTX, 0);
3021           val_so_far <<= log;
3022           break;
3023
3024         case alg_add_t_m2:
3025           tem = expand_shift (LSHIFT_EXPR, mode, op0,
3026                               build_int_cst (NULL_TREE, log),
3027                               NULL_RTX, 0);
3028           accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
3029                                  add_target ? add_target : accum_target);
3030           val_so_far += (HOST_WIDE_INT) 1 << log;
3031           break;
3032
3033         case alg_sub_t_m2:
3034           tem = expand_shift (LSHIFT_EXPR, mode, op0,
3035                               build_int_cst (NULL_TREE, log),
3036                               NULL_RTX, 0);
3037           accum = force_operand (gen_rtx_MINUS (mode, accum, tem),
3038                                  add_target ? add_target : accum_target);
3039           val_so_far -= (HOST_WIDE_INT) 1 << log;
3040           break;
3041
3042         case alg_add_t2_m:
3043           accum = expand_shift (LSHIFT_EXPR, mode, accum,
3044                                 build_int_cst (NULL_TREE, log),
3045                                 shift_subtarget,
3046                                 0);
3047           accum = force_operand (gen_rtx_PLUS (mode, accum, op0),
3048                                  add_target ? add_target : accum_target);
3049           val_so_far = (val_so_far << log) + 1;
3050           break;
3051
3052         case alg_sub_t2_m:
3053           accum = expand_shift (LSHIFT_EXPR, mode, accum,
3054                                 build_int_cst (NULL_TREE, log),
3055                                 shift_subtarget, 0);
3056           accum = force_operand (gen_rtx_MINUS (mode, accum, op0),
3057                                  add_target ? add_target : accum_target);
3058           val_so_far = (val_so_far << log) - 1;
3059           break;
3060
3061         case alg_add_factor:
3062           tem = expand_shift (LSHIFT_EXPR, mode, accum,
3063                               build_int_cst (NULL_TREE, log),
3064                               NULL_RTX, 0);
3065           accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
3066                                  add_target ? add_target : accum_target);
3067           val_so_far += val_so_far << log;
3068           break;
3069
3070         case alg_sub_factor:
3071           tem = expand_shift (LSHIFT_EXPR, mode, accum,
3072                               build_int_cst (NULL_TREE, log),
3073                               NULL_RTX, 0);
3074           accum = force_operand (gen_rtx_MINUS (mode, tem, accum),
3075                                  (add_target
3076                                   ? add_target : (optimize ? 0 : tem)));
3077           val_so_far = (val_so_far << log) - val_so_far;
3078           break;
3079
3080         default:
3081           gcc_unreachable ();
3082         }
3083
3084       /* Write a REG_EQUAL note on the last insn so that we can cse
3085          multiplication sequences.  Note that if ACCUM is a SUBREG,
3086          we've set the inner register and must properly indicate
3087          that.  */
3088
3089       tem = op0, nmode = mode;
3090       if (GET_CODE (accum) == SUBREG)
3091         {
3092           nmode = GET_MODE (SUBREG_REG (accum));
3093           tem = gen_lowpart (nmode, op0);
3094         }
3095
3096       insn = get_last_insn ();
3097       set_unique_reg_note (insn, REG_EQUAL,
3098                            gen_rtx_MULT (nmode, tem,
3099                                          GEN_INT (val_so_far)));
3100     }
3101
3102   if (variant == negate_variant)
3103     {
3104       val_so_far = -val_so_far;
3105       accum = expand_unop (mode, neg_optab, accum, target, 0);
3106     }
3107   else if (variant == add_variant)
3108     {
3109       val_so_far = val_so_far + 1;
3110       accum = force_operand (gen_rtx_PLUS (mode, accum, op0), target);
3111     }
3112
3113   /* Compare only the bits of val and val_so_far that are significant
3114      in the result mode, to avoid sign-/zero-extension confusion.  */
3115   val &= GET_MODE_MASK (mode);
3116   val_so_far &= GET_MODE_MASK (mode);
3117   gcc_assert (val == val_so_far);
3118
3119   return accum;
3120 }
3121
3122 /* Perform a multiplication and return an rtx for the result.
3123    MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3124    TARGET is a suggestion for where to store the result (an rtx).
3125
3126    We check specially for a constant integer as OP1.
3127    If you want this check for OP0 as well, then before calling
3128    you should swap the two operands if OP0 would be constant.  */
3129
3130 rtx
3131 expand_mult (enum machine_mode mode, rtx op0, rtx op1, rtx target,
3132              int unsignedp)
3133 {
3134   enum mult_variant variant;
3135   struct algorithm algorithm;
3136   int max_cost;
3137
3138   /* Handling const0_rtx here allows us to use zero as a rogue value for
3139      coeff below.  */
3140   if (op1 == const0_rtx)
3141     return const0_rtx;
3142   if (op1 == const1_rtx)
3143     return op0;
3144   if (op1 == constm1_rtx)
3145     return expand_unop (mode,
3146                         GET_MODE_CLASS (mode) == MODE_INT
3147                         && !unsignedp && flag_trapv
3148                         ? negv_optab : neg_optab,
3149                         op0, target, 0);
3150
3151   /* These are the operations that are potentially turned into a sequence
3152      of shifts and additions.  */
3153   if (SCALAR_INT_MODE_P (mode)
3154       && (unsignedp || !flag_trapv))
3155     {
3156       HOST_WIDE_INT coeff = 0;
3157       rtx fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3158
3159       /* synth_mult does an `unsigned int' multiply.  As long as the mode is
3160          less than or equal in size to `unsigned int' this doesn't matter.
3161          If the mode is larger than `unsigned int', then synth_mult works
3162          only if the constant value exactly fits in an `unsigned int' without
3163          any truncation.  This means that multiplying by negative values does
3164          not work; results are off by 2^32 on a 32 bit machine.  */
3165
3166       if (GET_CODE (op1) == CONST_INT)
3167         {
3168           /* Attempt to handle multiplication of DImode values by negative
3169              coefficients, by performing the multiplication by a positive
3170              multiplier and then inverting the result.  */
3171           if (INTVAL (op1) < 0
3172               && GET_MODE_BITSIZE (mode) > HOST_BITS_PER_WIDE_INT)
3173             {
3174               /* Its safe to use -INTVAL (op1) even for INT_MIN, as the
3175                  result is interpreted as an unsigned coefficient.
3176                  Exclude cost of op0 from max_cost to match the cost
3177                  calculation of the synth_mult.  */
3178               max_cost = rtx_cost (gen_rtx_MULT (mode, fake_reg, op1), SET)
3179                          - neg_cost[mode];
3180               if (max_cost > 0
3181                   && choose_mult_variant (mode, -INTVAL (op1), &algorithm,
3182                                           &variant, max_cost))
3183                 {
3184                   rtx temp = expand_mult_const (mode, op0, -INTVAL (op1),
3185                                                 NULL_RTX, &algorithm,
3186                                                 variant);
3187                   return expand_unop (mode, neg_optab, temp, target, 0);
3188                 }
3189             }
3190           else coeff = INTVAL (op1);
3191         }
3192       else if (GET_CODE (op1) == CONST_DOUBLE)
3193         {
3194           /* If we are multiplying in DImode, it may still be a win
3195              to try to work with shifts and adds.  */
3196           if (CONST_DOUBLE_HIGH (op1) == 0)
3197             coeff = CONST_DOUBLE_LOW (op1);
3198           else if (CONST_DOUBLE_LOW (op1) == 0
3199                    && EXACT_POWER_OF_2_OR_ZERO_P (CONST_DOUBLE_HIGH (op1)))
3200             {
3201               int shift = floor_log2 (CONST_DOUBLE_HIGH (op1))
3202                           + HOST_BITS_PER_WIDE_INT;
3203               return expand_shift (LSHIFT_EXPR, mode, op0,
3204                                    build_int_cst (NULL_TREE, shift),
3205                                    target, unsignedp);
3206             }
3207         }
3208
3209       /* We used to test optimize here, on the grounds that it's better to
3210          produce a smaller program when -O is not used.  But this causes
3211          such a terrible slowdown sometimes that it seems better to always
3212          use synth_mult.  */
3213       if (coeff != 0)
3214         {
3215           /* Special case powers of two.  */
3216           if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3217             return expand_shift (LSHIFT_EXPR, mode, op0,
3218                                  build_int_cst (NULL_TREE, floor_log2 (coeff)),
3219                                  target, unsignedp);
3220
3221           /* Exclude cost of op0 from max_cost to match the cost
3222              calculation of the synth_mult.  */
3223           max_cost = rtx_cost (gen_rtx_MULT (mode, fake_reg, op1), SET);
3224           if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3225                                    max_cost))
3226             return expand_mult_const (mode, op0, coeff, target,
3227                                       &algorithm, variant);
3228         }
3229     }
3230
3231   if (GET_CODE (op0) == CONST_DOUBLE)
3232     {
3233       rtx temp = op0;
3234       op0 = op1;
3235       op1 = temp;
3236     }
3237
3238   /* Expand x*2.0 as x+x.  */
3239   if (GET_CODE (op1) == CONST_DOUBLE
3240       && SCALAR_FLOAT_MODE_P (mode))
3241     {
3242       REAL_VALUE_TYPE d;
3243       REAL_VALUE_FROM_CONST_DOUBLE (d, op1);
3244
3245       if (REAL_VALUES_EQUAL (d, dconst2))
3246         {
3247           op0 = force_reg (GET_MODE (op0), op0);
3248           return expand_binop (mode, add_optab, op0, op0,
3249                                target, unsignedp, OPTAB_LIB_WIDEN);
3250         }
3251     }
3252
3253   /* This used to use umul_optab if unsigned, but for non-widening multiply
3254      there is no difference between signed and unsigned.  */
3255   op0 = expand_binop (mode,
3256                       ! unsignedp
3257                       && flag_trapv && (GET_MODE_CLASS(mode) == MODE_INT)
3258                       ? smulv_optab : smul_optab,
3259                       op0, op1, target, unsignedp, OPTAB_LIB_WIDEN);
3260   gcc_assert (op0);
3261   return op0;
3262 }
3263 \f
3264 /* Return the smallest n such that 2**n >= X.  */
3265
3266 int
3267 ceil_log2 (unsigned HOST_WIDE_INT x)
3268 {
3269   return floor_log2 (x - 1) + 1;
3270 }
3271
3272 /* Choose a minimal N + 1 bit approximation to 1/D that can be used to
3273    replace division by D, and put the least significant N bits of the result
3274    in *MULTIPLIER_PTR and return the most significant bit.
3275
3276    The width of operations is N (should be <= HOST_BITS_PER_WIDE_INT), the
3277    needed precision is in PRECISION (should be <= N).
3278
3279    PRECISION should be as small as possible so this function can choose
3280    multiplier more freely.
3281
3282    The rounded-up logarithm of D is placed in *lgup_ptr.  A shift count that
3283    is to be used for a final right shift is placed in *POST_SHIFT_PTR.
3284
3285    Using this function, x/D will be equal to (x * m) >> (*POST_SHIFT_PTR),
3286    where m is the full HOST_BITS_PER_WIDE_INT + 1 bit multiplier.  */
3287
3288 static
3289 unsigned HOST_WIDE_INT
3290 choose_multiplier (unsigned HOST_WIDE_INT d, int n, int precision,
3291                    rtx *multiplier_ptr, int *post_shift_ptr, int *lgup_ptr)
3292 {
3293   HOST_WIDE_INT mhigh_hi, mlow_hi;
3294   unsigned HOST_WIDE_INT mhigh_lo, mlow_lo;
3295   int lgup, post_shift;
3296   int pow, pow2;
3297   unsigned HOST_WIDE_INT nl, dummy1;
3298   HOST_WIDE_INT nh, dummy2;
3299
3300   /* lgup = ceil(log2(divisor)); */
3301   lgup = ceil_log2 (d);
3302
3303   gcc_assert (lgup <= n);
3304
3305   pow = n + lgup;
3306   pow2 = n + lgup - precision;
3307
3308   /* We could handle this with some effort, but this case is much
3309      better handled directly with a scc insn, so rely on caller using
3310      that.  */
3311   gcc_assert (pow != 2 * HOST_BITS_PER_WIDE_INT);
3312
3313   /* mlow = 2^(N + lgup)/d */
3314  if (pow >= HOST_BITS_PER_WIDE_INT)
3315     {
3316       nh = (HOST_WIDE_INT) 1 << (pow - HOST_BITS_PER_WIDE_INT);
3317       nl = 0;
3318     }
3319   else
3320     {
3321       nh = 0;
3322       nl = (unsigned HOST_WIDE_INT) 1 << pow;
3323     }
3324   div_and_round_double (TRUNC_DIV_EXPR, 1, nl, nh, d, (HOST_WIDE_INT) 0,
3325                         &mlow_lo, &mlow_hi, &dummy1, &dummy2);
3326
3327   /* mhigh = (2^(N + lgup) + 2^N + lgup - precision)/d */
3328   if (pow2 >= HOST_BITS_PER_WIDE_INT)
3329     nh |= (HOST_WIDE_INT) 1 << (pow2 - HOST_BITS_PER_WIDE_INT);
3330   else
3331     nl |= (unsigned HOST_WIDE_INT) 1 << pow2;
3332   div_and_round_double (TRUNC_DIV_EXPR, 1, nl, nh, d, (HOST_WIDE_INT) 0,
3333                         &mhigh_lo, &mhigh_hi, &dummy1, &dummy2);
3334
3335   gcc_assert (!mhigh_hi || nh - d < d);
3336   gcc_assert (mhigh_hi <= 1 && mlow_hi <= 1);
3337   /* Assert that mlow < mhigh.  */
3338   gcc_assert (mlow_hi < mhigh_hi
3339               || (mlow_hi == mhigh_hi && mlow_lo < mhigh_lo));
3340
3341   /* If precision == N, then mlow, mhigh exceed 2^N
3342      (but they do not exceed 2^(N+1)).  */
3343
3344   /* Reduce to lowest terms.  */
3345   for (post_shift = lgup; post_shift > 0; post_shift--)
3346     {
3347       unsigned HOST_WIDE_INT ml_lo = (mlow_hi << (HOST_BITS_PER_WIDE_INT - 1)) | (mlow_lo >> 1);
3348       unsigned HOST_WIDE_INT mh_lo = (mhigh_hi << (HOST_BITS_PER_WIDE_INT - 1)) | (mhigh_lo >> 1);
3349       if (ml_lo >= mh_lo)
3350         break;
3351
3352       mlow_hi = 0;
3353       mlow_lo = ml_lo;
3354       mhigh_hi = 0;
3355       mhigh_lo = mh_lo;
3356     }
3357
3358   *post_shift_ptr = post_shift;
3359   *lgup_ptr = lgup;
3360   if (n < HOST_BITS_PER_WIDE_INT)
3361     {
3362       unsigned HOST_WIDE_INT mask = ((unsigned HOST_WIDE_INT) 1 << n) - 1;
3363       *multiplier_ptr = GEN_INT (mhigh_lo & mask);
3364       return mhigh_lo >= mask;
3365     }
3366   else
3367     {
3368       *multiplier_ptr = GEN_INT (mhigh_lo);
3369       return mhigh_hi;
3370     }
3371 }
3372
3373 /* Compute the inverse of X mod 2**n, i.e., find Y such that X * Y is
3374    congruent to 1 (mod 2**N).  */
3375
3376 static unsigned HOST_WIDE_INT
3377 invert_mod2n (unsigned HOST_WIDE_INT x, int n)
3378 {
3379   /* Solve x*y == 1 (mod 2^n), where x is odd.  Return y.  */
3380
3381   /* The algorithm notes that the choice y = x satisfies
3382      x*y == 1 mod 2^3, since x is assumed odd.
3383      Each iteration doubles the number of bits of significance in y.  */
3384
3385   unsigned HOST_WIDE_INT mask;
3386   unsigned HOST_WIDE_INT y = x;
3387   int nbit = 3;
3388
3389   mask = (n == HOST_BITS_PER_WIDE_INT
3390           ? ~(unsigned HOST_WIDE_INT) 0
3391           : ((unsigned HOST_WIDE_INT) 1 << n) - 1);
3392
3393   while (nbit < n)
3394     {
3395       y = y * (2 - x*y) & mask;         /* Modulo 2^N */
3396       nbit *= 2;
3397     }
3398   return y;
3399 }
3400
3401 /* Emit code to adjust ADJ_OPERAND after multiplication of wrong signedness
3402    flavor of OP0 and OP1.  ADJ_OPERAND is already the high half of the
3403    product OP0 x OP1.  If UNSIGNEDP is nonzero, adjust the signed product
3404    to become unsigned, if UNSIGNEDP is zero, adjust the unsigned product to
3405    become signed.
3406
3407    The result is put in TARGET if that is convenient.
3408
3409    MODE is the mode of operation.  */
3410
3411 rtx
3412 expand_mult_highpart_adjust (enum machine_mode mode, rtx adj_operand, rtx op0,
3413                              rtx op1, rtx target, int unsignedp)
3414 {
3415   rtx tem;
3416   enum rtx_code adj_code = unsignedp ? PLUS : MINUS;
3417
3418   tem = expand_shift (RSHIFT_EXPR, mode, op0,
3419                       build_int_cst (NULL_TREE, GET_MODE_BITSIZE (mode) - 1),
3420                       NULL_RTX, 0);
3421   tem = expand_and (mode, tem, op1, NULL_RTX);
3422   adj_operand
3423     = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3424                      adj_operand);
3425
3426   tem = expand_shift (RSHIFT_EXPR, mode, op1,
3427                       build_int_cst (NULL_TREE, GET_MODE_BITSIZE (mode) - 1),
3428                       NULL_RTX, 0);
3429   tem = expand_and (mode, tem, op0, NULL_RTX);
3430   target = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3431                           target);
3432
3433   return target;
3434 }
3435
3436 /* Subroutine of expand_mult_highpart.  Return the MODE high part of OP.  */
3437
3438 static rtx
3439 extract_high_half (enum machine_mode mode, rtx op)
3440 {
3441   enum machine_mode wider_mode;
3442
3443   if (mode == word_mode)
3444     return gen_highpart (mode, op);
3445
3446   gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3447
3448   wider_mode = GET_MODE_WIDER_MODE (mode);
3449   op = expand_shift (RSHIFT_EXPR, wider_mode, op,
3450                      build_int_cst (NULL_TREE, GET_MODE_BITSIZE (mode)), 0, 1);
3451   return convert_modes (mode, wider_mode, op, 0);
3452 }
3453
3454 /* Like expand_mult_highpart, but only consider using a multiplication
3455    optab.  OP1 is an rtx for the constant operand.  */
3456
3457 static rtx
3458 expand_mult_highpart_optab (enum machine_mode mode, rtx op0, rtx op1,
3459                             rtx target, int unsignedp, int max_cost)
3460 {
3461   rtx narrow_op1 = gen_int_mode (INTVAL (op1), mode);
3462   enum machine_mode wider_mode;
3463   optab moptab;
3464   rtx tem;
3465   int size;
3466
3467   gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3468
3469   wider_mode = GET_MODE_WIDER_MODE (mode);
3470   size = GET_MODE_BITSIZE (mode);
3471
3472   /* Firstly, try using a multiplication insn that only generates the needed
3473      high part of the product, and in the sign flavor of unsignedp.  */
3474   if (mul_highpart_cost[mode] < max_cost)
3475     {
3476       moptab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
3477       tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3478                           unsignedp, OPTAB_DIRECT);
3479       if (tem)
3480         return tem;
3481     }
3482
3483   /* Secondly, same as above, but use sign flavor opposite of unsignedp.
3484      Need to adjust the result after the multiplication.  */
3485   if (size - 1 < BITS_PER_WORD
3486       && (mul_highpart_cost[mode] + 2 * shift_cost[mode][size-1]
3487           + 4 * add_cost[mode] < max_cost))
3488     {
3489       moptab = unsignedp ? smul_highpart_optab : umul_highpart_optab;
3490       tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3491                           unsignedp, OPTAB_DIRECT);
3492       if (tem)
3493         /* We used the wrong signedness.  Adjust the result.  */
3494         return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3495                                             tem, unsignedp);
3496     }
3497
3498   /* Try widening multiplication.  */
3499   moptab = unsignedp ? umul_widen_optab : smul_widen_optab;
3500   if (moptab->handlers[wider_mode].insn_code != CODE_FOR_nothing
3501       && mul_widen_cost[wider_mode] < max_cost)
3502     {
3503       tem = expand_binop (wider_mode, moptab, op0, narrow_op1, 0,
3504                           unsignedp, OPTAB_WIDEN);
3505       if (tem)
3506         return extract_high_half (mode, tem);
3507     }
3508
3509   /* Try widening the mode and perform a non-widening multiplication.  */
3510   if (smul_optab->handlers[wider_mode].insn_code != CODE_FOR_nothing
3511       && size - 1 < BITS_PER_WORD
3512       && mul_cost[wider_mode] + shift_cost[mode][size-1] < max_cost)
3513     {
3514       rtx insns, wop0, wop1;
3515
3516       /* We need to widen the operands, for example to ensure the
3517          constant multiplier is correctly sign or zero extended.
3518          Use a sequence to clean-up any instructions emitted by
3519          the conversions if things don't work out.  */
3520       start_sequence ();
3521       wop0 = convert_modes (wider_mode, mode, op0, unsignedp);
3522       wop1 = convert_modes (wider_mode, mode, op1, unsignedp);
3523       tem = expand_binop (wider_mode, smul_optab, wop0, wop1, 0,
3524                           unsignedp, OPTAB_WIDEN);
3525       insns = get_insns ();
3526       end_sequence ();
3527
3528       if (tem)
3529         {
3530           emit_insn (insns);
3531           return extract_high_half (mode, tem);
3532         }
3533     }
3534
3535   /* Try widening multiplication of opposite signedness, and adjust.  */
3536   moptab = unsignedp ? smul_widen_optab : umul_widen_optab;
3537   if (moptab->handlers[wider_mode].insn_code != CODE_FOR_nothing
3538       && size - 1 < BITS_PER_WORD
3539       && (mul_widen_cost[wider_mode] + 2 * shift_cost[mode][size-1]
3540           + 4 * add_cost[mode] < max_cost))
3541     {
3542       tem = expand_binop (wider_mode, moptab, op0, narrow_op1,
3543                           NULL_RTX, ! unsignedp, OPTAB_WIDEN);
3544       if (tem != 0)
3545         {
3546           tem = extract_high_half (mode, tem);
3547           /* We used the wrong signedness.  Adjust the result.  */
3548           return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3549                                               target, unsignedp);
3550         }
3551     }
3552
3553   return 0;
3554 }
3555
3556 /* Emit code to multiply OP0 and OP1 (where OP1 is an integer constant),
3557    putting the high half of the result in TARGET if that is convenient,
3558    and return where the result is.  If the operation can not be performed,
3559    0 is returned.
3560
3561    MODE is the mode of operation and result.
3562
3563    UNSIGNEDP nonzero means unsigned multiply.
3564
3565    MAX_COST is the total allowed cost for the expanded RTL.  */
3566
3567 static rtx
3568 expand_mult_highpart (enum machine_mode mode, rtx op0, rtx op1,
3569                       rtx target, int unsignedp, int max_cost)
3570 {
3571   enum machine_mode wider_mode = GET_MODE_WIDER_MODE (mode);
3572   unsigned HOST_WIDE_INT cnst1;
3573   int extra_cost;
3574   bool sign_adjust = false;
3575   enum mult_variant variant;
3576   struct algorithm alg;
3577   rtx tem;
3578
3579   gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3580   /* We can't support modes wider than HOST_BITS_PER_INT.  */
3581   gcc_assert (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT);
3582
3583   cnst1 = INTVAL (op1) & GET_MODE_MASK (mode);
3584
3585   /* We can't optimize modes wider than BITS_PER_WORD.
3586      ??? We might be able to perform double-word arithmetic if
3587      mode == word_mode, however all the cost calculations in
3588      synth_mult etc. assume single-word operations.  */
3589   if (GET_MODE_BITSIZE (wider_mode) > BITS_PER_WORD)
3590     return expand_mult_highpart_optab (mode, op0, op1, target,
3591                                        unsignedp, max_cost);
3592
3593   extra_cost = shift_cost[mode][GET_MODE_BITSIZE (mode) - 1];
3594
3595   /* Check whether we try to multiply by a negative constant.  */
3596   if (!unsignedp && ((cnst1 >> (GET_MODE_BITSIZE (mode) - 1)) & 1))
3597     {
3598       sign_adjust = true;
3599       extra_cost += add_cost[mode];
3600     }
3601
3602   /* See whether shift/add multiplication is cheap enough.  */
3603   if (choose_mult_variant (wider_mode, cnst1, &alg, &variant,
3604                            max_cost - extra_cost))
3605     {
3606       /* See whether the specialized multiplication optabs are
3607          cheaper than the shift/add version.  */
3608       tem = expand_mult_highpart_optab (mode, op0, op1, target, unsignedp,
3609                                         alg.cost.cost + extra_cost);
3610       if (tem)
3611         return tem;
3612
3613       tem = convert_to_mode (wider_mode, op0, unsignedp);
3614       tem = expand_mult_const (wider_mode, tem, cnst1, 0, &alg, variant);
3615       tem = extract_high_half (mode, tem);
3616
3617       /* Adjust result for signedness.  */
3618       if (sign_adjust)
3619         tem = force_operand (gen_rtx_MINUS (mode, tem, op0), tem);
3620
3621       return tem;
3622     }
3623   return expand_mult_highpart_optab (mode, op0, op1, target,
3624                                      unsignedp, max_cost);
3625 }
3626
3627
3628 /* Expand signed modulus of OP0 by a power of two D in mode MODE.  */
3629
3630 static rtx
3631 expand_smod_pow2 (enum machine_mode mode, rtx op0, HOST_WIDE_INT d)
3632 {
3633   unsigned HOST_WIDE_INT masklow, maskhigh;
3634   rtx result, temp, shift, label;
3635   int logd;
3636
3637   logd = floor_log2 (d);
3638   result = gen_reg_rtx (mode);
3639
3640   /* Avoid conditional branches when they're expensive.  */
3641   if (BRANCH_COST >= 2
3642       && !optimize_size)
3643     {
3644       rtx signmask = emit_store_flag (result, LT, op0, const0_rtx,
3645                                       mode, 0, -1);
3646       if (signmask)
3647         {
3648           signmask = force_reg (mode, signmask);
3649           masklow = ((HOST_WIDE_INT) 1 << logd) - 1;
3650           shift = GEN_INT (GET_MODE_BITSIZE (mode) - logd);
3651
3652           /* Use the rtx_cost of a LSHIFTRT instruction to determine
3653              which instruction sequence to use.  If logical right shifts
3654              are expensive the use 2 XORs, 2 SUBs and an AND, otherwise
3655              use a LSHIFTRT, 1 ADD, 1 SUB and an AND.  */
3656
3657           temp = gen_rtx_LSHIFTRT (mode, result, shift);
3658           if (lshr_optab->handlers[mode].insn_code == CODE_FOR_nothing
3659               || rtx_cost (temp, SET) > COSTS_N_INSNS (2))
3660             {
3661               temp = expand_binop (mode, xor_optab, op0, signmask,
3662                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3663               temp = expand_binop (mode, sub_optab, temp, signmask,
3664                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3665               temp = expand_binop (mode, and_optab, temp, GEN_INT (masklow),
3666                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3667               temp = expand_binop (mode, xor_optab, temp, signmask,
3668                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3669               temp = expand_binop (mode, sub_optab, temp, signmask,
3670                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3671             }
3672           else
3673             {
3674               signmask = expand_binop (mode, lshr_optab, signmask, shift,
3675                                        NULL_RTX, 1, OPTAB_LIB_WIDEN);
3676               signmask = force_reg (mode, signmask);
3677
3678               temp = expand_binop (mode, add_optab, op0, signmask,
3679                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3680               temp = expand_binop (mode, and_optab, temp, GEN_INT (masklow),
3681                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3682               temp = expand_binop (mode, sub_optab, temp, signmask,
3683                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3684             }
3685           return temp;
3686         }
3687     }
3688
3689   /* Mask contains the mode's signbit and the significant bits of the
3690      modulus.  By including the signbit in the operation, many targets
3691      can avoid an explicit compare operation in the following comparison
3692      against zero.  */
3693
3694   masklow = ((HOST_WIDE_INT) 1 << logd) - 1;
3695   if (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
3696     {
3697       masklow |= (HOST_WIDE_INT) -1 << (GET_MODE_BITSIZE (mode) - 1);
3698       maskhigh = -1;
3699     }
3700   else
3701     maskhigh = (HOST_WIDE_INT) -1
3702                  << (GET_MODE_BITSIZE (mode) - HOST_BITS_PER_WIDE_INT - 1);
3703
3704   temp = expand_binop (mode, and_optab, op0,
3705                        immed_double_const (masklow, maskhigh, mode),
3706                        result, 1, OPTAB_LIB_WIDEN);
3707   if (temp != result)
3708     emit_move_insn (result, temp);
3709
3710   label = gen_label_rtx ();
3711   do_cmp_and_jump (result, const0_rtx, GE, mode, label);
3712
3713   temp = expand_binop (mode, sub_optab, result, const1_rtx, result,
3714                        0, OPTAB_LIB_WIDEN);
3715   masklow = (HOST_WIDE_INT) -1 << logd;
3716   maskhigh = -1;
3717   temp = expand_binop (mode, ior_optab, temp,
3718                        immed_double_const (masklow, maskhigh, mode),
3719                        result, 1, OPTAB_LIB_WIDEN);
3720   temp = expand_binop (mode, add_optab, temp, const1_rtx, result,
3721                        0, OPTAB_LIB_WIDEN);
3722   if (temp != result)
3723     emit_move_insn (result, temp);
3724   emit_label (label);
3725   return result;
3726 }
3727
3728 /* Expand signed division of OP0 by a power of two D in mode MODE.
3729    This routine is only called for positive values of D.  */
3730
3731 static rtx
3732 expand_sdiv_pow2 (enum machine_mode mode, rtx op0, HOST_WIDE_INT d)
3733 {
3734   rtx temp, label;
3735   tree shift;
3736   int logd;
3737
3738   logd = floor_log2 (d);
3739   shift = build_int_cst (NULL_TREE, logd);
3740
3741   if (d == 2 && BRANCH_COST >= 1)
3742     {
3743       temp = gen_reg_rtx (mode);
3744       temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, 1);
3745       temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3746                            0, OPTAB_LIB_WIDEN);
3747       return expand_shift (RSHIFT_EXPR, mode, temp, shift, NULL_RTX, 0);
3748     }
3749
3750 #ifdef HAVE_conditional_move
3751   if (BRANCH_COST >= 2)
3752     {
3753       rtx temp2;
3754
3755       /* ??? emit_conditional_move forces a stack adjustment via
3756          compare_from_rtx so, if the sequence is discarded, it will
3757          be lost.  Do it now instead.  */
3758       do_pending_stack_adjust ();
3759
3760       start_sequence ();
3761       temp2 = copy_to_mode_reg (mode, op0);
3762       temp = expand_binop (mode, add_optab, temp2, GEN_INT (d-1),
3763                            NULL_RTX, 0, OPTAB_LIB_WIDEN);
3764       temp = force_reg (mode, temp);
3765
3766       /* Construct "temp2 = (temp2 < 0) ? temp : temp2".  */
3767       temp2 = emit_conditional_move (temp2, LT, temp2, const0_rtx,
3768                                      mode, temp, temp2, mode, 0);
3769       if (temp2)
3770         {
3771           rtx seq = get_insns ();
3772           end_sequence ();
3773           emit_insn (seq);
3774           return expand_shift (RSHIFT_EXPR, mode, temp2, shift, NULL_RTX, 0);
3775         }
3776       end_sequence ();
3777     }
3778 #endif
3779
3780   if (BRANCH_COST >= 2)
3781     {
3782       int ushift = GET_MODE_BITSIZE (mode) - logd;
3783
3784       temp = gen_reg_rtx (mode);
3785       temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, -1);
3786       if (shift_cost[mode][ushift] > COSTS_N_INSNS (1))
3787         temp = expand_binop (mode, and_optab, temp, GEN_INT (d - 1),
3788                              NULL_RTX, 0, OPTAB_LIB_WIDEN);
3789       else
3790         temp = expand_shift (RSHIFT_EXPR, mode, temp,
3791                              build_int_cst (NULL_TREE, ushift),
3792                              NULL_RTX, 1);
3793       temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3794                            0, OPTAB_LIB_WIDEN);
3795       return expand_shift (RSHIFT_EXPR, mode, temp, shift, NULL_RTX, 0);
3796     }
3797
3798   label = gen_label_rtx ();
3799   temp = copy_to_mode_reg (mode, op0);
3800   do_cmp_and_jump (temp, const0_rtx, GE, mode, label);
3801   expand_inc (temp, GEN_INT (d - 1));
3802   emit_label (label);
3803   return expand_shift (RSHIFT_EXPR, mode, temp, shift, NULL_RTX, 0);
3804 }
3805 \f
3806 /* Emit the code to divide OP0 by OP1, putting the result in TARGET
3807    if that is convenient, and returning where the result is.
3808    You may request either the quotient or the remainder as the result;
3809    specify REM_FLAG nonzero to get the remainder.
3810
3811    CODE is the expression code for which kind of division this is;
3812    it controls how rounding is done.  MODE is the machine mode to use.
3813    UNSIGNEDP nonzero means do unsigned division.  */
3814
3815 /* ??? For CEIL_MOD_EXPR, can compute incorrect remainder with ANDI
3816    and then correct it by or'ing in missing high bits
3817    if result of ANDI is nonzero.
3818    For ROUND_MOD_EXPR, can use ANDI and then sign-extend the result.
3819    This could optimize to a bfexts instruction.
3820    But C doesn't use these operations, so their optimizations are
3821    left for later.  */
3822 /* ??? For modulo, we don't actually need the highpart of the first product,
3823    the low part will do nicely.  And for small divisors, the second multiply
3824    can also be a low-part only multiply or even be completely left out.
3825    E.g. to calculate the remainder of a division by 3 with a 32 bit
3826    multiply, multiply with 0x55555556 and extract the upper two bits;
3827    the result is exact for inputs up to 0x1fffffff.
3828    The input range can be reduced by using cross-sum rules.
3829    For odd divisors >= 3, the following table gives right shift counts
3830    so that if a number is shifted by an integer multiple of the given
3831    amount, the remainder stays the same:
3832    2, 4, 3, 6, 10, 12, 4, 8, 18, 6, 11, 20, 18, 0, 5, 10, 12, 0, 12, 20,
3833    14, 12, 23, 21, 8, 0, 20, 18, 0, 0, 6, 12, 0, 22, 0, 18, 20, 30, 0, 0,
3834    0, 8, 0, 11, 12, 10, 36, 0, 30, 0, 0, 12, 0, 0, 0, 0, 44, 12, 24, 0,
3835    20, 0, 7, 14, 0, 18, 36, 0, 0, 46, 60, 0, 42, 0, 15, 24, 20, 0, 0, 33,
3836    0, 20, 0, 0, 18, 0, 60, 0, 0, 0, 0, 0, 40, 18, 0, 0, 12
3837
3838    Cross-sum rules for even numbers can be derived by leaving as many bits
3839    to the right alone as the divisor has zeros to the right.
3840    E.g. if x is an unsigned 32 bit number:
3841    (x mod 12) == (((x & 1023) + ((x >> 8) & ~3)) * 0x15555558 >> 2 * 3) >> 28
3842    */
3843
3844 rtx
3845 expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode,
3846                rtx op0, rtx op1, rtx target, int unsignedp)
3847 {
3848   enum machine_mode compute_mode;
3849   rtx tquotient;
3850   rtx quotient = 0, remainder = 0;
3851   rtx last;
3852   int size;
3853   rtx insn, set;
3854   optab optab1, optab2;
3855   int op1_is_constant, op1_is_pow2 = 0;
3856   int max_cost, extra_cost;
3857   static HOST_WIDE_INT last_div_const = 0;
3858   static HOST_WIDE_INT ext_op1;
3859
3860   op1_is_constant = GET_CODE (op1) == CONST_INT;
3861   if (op1_is_constant)
3862     {
3863       ext_op1 = INTVAL (op1);
3864       if (unsignedp)
3865         ext_op1 &= GET_MODE_MASK (mode);
3866       op1_is_pow2 = ((EXACT_POWER_OF_2_OR_ZERO_P (ext_op1)
3867                      || (! unsignedp && EXACT_POWER_OF_2_OR_ZERO_P (-ext_op1))));
3868     }
3869
3870   /*
3871      This is the structure of expand_divmod:
3872
3873      First comes code to fix up the operands so we can perform the operations
3874      correctly and efficiently.
3875
3876      Second comes a switch statement with code specific for each rounding mode.
3877      For some special operands this code emits all RTL for the desired
3878      operation, for other cases, it generates only a quotient and stores it in
3879      QUOTIENT.  The case for trunc division/remainder might leave quotient = 0,
3880      to indicate that it has not done anything.
3881
3882      Last comes code that finishes the operation.  If QUOTIENT is set and
3883      REM_FLAG is set, the remainder is computed as OP0 - QUOTIENT * OP1.  If
3884      QUOTIENT is not set, it is computed using trunc rounding.
3885
3886      We try to generate special code for division and remainder when OP1 is a
3887      constant.  If |OP1| = 2**n we can use shifts and some other fast
3888      operations.  For other values of OP1, we compute a carefully selected
3889      fixed-point approximation m = 1/OP1, and generate code that multiplies OP0
3890      by m.
3891
3892      In all cases but EXACT_DIV_EXPR, this multiplication requires the upper
3893      half of the product.  Different strategies for generating the product are
3894      implemented in expand_mult_highpart.
3895
3896      If what we actually want is the remainder, we generate that by another
3897      by-constant multiplication and a subtraction.  */
3898
3899   /* We shouldn't be called with OP1 == const1_rtx, but some of the
3900      code below will malfunction if we are, so check here and handle
3901      the special case if so.  */
3902   if (op1 == const1_rtx)
3903     return rem_flag ? const0_rtx : op0;
3904
3905     /* When dividing by -1, we could get an overflow.
3906      negv_optab can handle overflows.  */
3907   if (! unsignedp && op1 == constm1_rtx)
3908     {
3909       if (rem_flag)
3910         return const0_rtx;
3911       return expand_unop (mode, flag_trapv && GET_MODE_CLASS(mode) == MODE_INT
3912                           ? negv_optab : neg_optab, op0, target, 0);
3913     }
3914
3915   if (target
3916       /* Don't use the function value register as a target
3917          since we have to read it as well as write it,
3918          and function-inlining gets confused by this.  */
3919       && ((REG_P (target) && REG_FUNCTION_VALUE_P (target))
3920           /* Don't clobber an operand while doing a multi-step calculation.  */
3921           || ((rem_flag || op1_is_constant)
3922               && (reg_mentioned_p (target, op0)
3923                   || (MEM_P (op0) && MEM_P (target))))
3924           || reg_mentioned_p (target, op1)
3925           || (MEM_P (op1) && MEM_P (target))))
3926     target = 0;
3927
3928   /* Get the mode in which to perform this computation.  Normally it will
3929      be MODE, but sometimes we can't do the desired operation in MODE.
3930      If so, pick a wider mode in which we can do the operation.  Convert
3931      to that mode at the start to avoid repeated conversions.
3932
3933      First see what operations we need.  These depend on the expression
3934      we are evaluating.  (We assume that divxx3 insns exist under the
3935      same conditions that modxx3 insns and that these insns don't normally
3936      fail.  If these assumptions are not correct, we may generate less
3937      efficient code in some cases.)
3938
3939      Then see if we find a mode in which we can open-code that operation
3940      (either a division, modulus, or shift).  Finally, check for the smallest
3941      mode for which we can do the operation with a library call.  */
3942
3943   /* We might want to refine this now that we have division-by-constant
3944      optimization.  Since expand_mult_highpart tries so many variants, it is
3945      not straightforward to generalize this.  Maybe we should make an array
3946      of possible modes in init_expmed?  Save this for GCC 2.7.  */
3947
3948   optab1 = ((op1_is_pow2 && op1 != const0_rtx)
3949             ? (unsignedp ? lshr_optab : ashr_optab)
3950             : (unsignedp ? udiv_optab : sdiv_optab));
3951   optab2 = ((op1_is_pow2 && op1 != const0_rtx)
3952             ? optab1
3953             : (unsignedp ? udivmod_optab : sdivmod_optab));
3954
3955   for (compute_mode = mode; compute_mode != VOIDmode;
3956        compute_mode = GET_MODE_WIDER_MODE (compute_mode))
3957     if (optab1->handlers[compute_mode].insn_code != CODE_FOR_nothing
3958         || optab2->handlers[compute_mode].insn_code != CODE_FOR_nothing)
3959       break;
3960
3961   if (compute_mode == VOIDmode)
3962     for (compute_mode = mode; compute_mode != VOIDmode;
3963          compute_mode = GET_MODE_WIDER_MODE (compute_mode))
3964       if (optab1->handlers[compute_mode].libfunc
3965           || optab2->handlers[compute_mode].libfunc)
3966         break;
3967
3968   /* If we still couldn't find a mode, use MODE, but expand_binop will
3969      probably die.  */
3970   if (compute_mode == VOIDmode)
3971     compute_mode = mode;
3972
3973   if (target && GET_MODE (target) == compute_mode)
3974     tquotient = target;
3975   else
3976     tquotient = gen_reg_rtx (compute_mode);
3977
3978   size = GET_MODE_BITSIZE (compute_mode);
3979 #if 0
3980   /* It should be possible to restrict the precision to GET_MODE_BITSIZE
3981      (mode), and thereby get better code when OP1 is a constant.  Do that
3982      later.  It will require going over all usages of SIZE below.  */
3983   size = GET_MODE_BITSIZE (mode);
3984 #endif
3985
3986   /* Only deduct something for a REM if the last divide done was
3987      for a different constant.   Then set the constant of the last
3988      divide.  */
3989   max_cost = unsignedp ? udiv_cost[compute_mode] : sdiv_cost[compute_mode];
3990   if (rem_flag && ! (last_div_const != 0 && op1_is_constant
3991                      && INTVAL (op1) == last_div_const))
3992     max_cost -= mul_cost[compute_mode] + add_cost[compute_mode];
3993
3994   last_div_const = ! rem_flag && op1_is_constant ? INTVAL (op1) : 0;
3995
3996   /* Now convert to the best mode to use.  */
3997   if (compute_mode != mode)
3998     {
3999       op0 = convert_modes (compute_mode, mode, op0, unsignedp);
4000       op1 = convert_modes (compute_mode, mode, op1, unsignedp);
4001
4002       /* convert_modes may have placed op1 into a register, so we
4003          must recompute the following.  */
4004       op1_is_constant = GET_CODE (op1) == CONST_INT;
4005       op1_is_pow2 = (op1_is_constant
4006                      && ((EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4007                           || (! unsignedp
4008                               && EXACT_POWER_OF_2_OR_ZERO_P (-INTVAL (op1)))))) ;
4009     }
4010
4011   /* If one of the operands is a volatile MEM, copy it into a register.  */
4012
4013   if (MEM_P (op0) && MEM_VOLATILE_P (op0))
4014     op0 = force_reg (compute_mode, op0);
4015   if (MEM_P (op1) && MEM_VOLATILE_P (op1))
4016     op1 = force_reg (compute_mode, op1);
4017
4018   /* If we need the remainder or if OP1 is constant, we need to
4019      put OP0 in a register in case it has any queued subexpressions.  */
4020   if (rem_flag || op1_is_constant)
4021     op0 = force_reg (compute_mode, op0);
4022
4023   last = get_last_insn ();
4024
4025   /* Promote floor rounding to trunc rounding for unsigned operations.  */
4026   if (unsignedp)
4027     {
4028       if (code == FLOOR_DIV_EXPR)
4029         code = TRUNC_DIV_EXPR;
4030       if (code == FLOOR_MOD_EXPR)
4031         code = TRUNC_MOD_EXPR;
4032       if (code == EXACT_DIV_EXPR && op1_is_pow2)
4033         code = TRUNC_DIV_EXPR;
4034     }
4035
4036   if (op1 != const0_rtx)
4037     switch (code)
4038       {
4039       case TRUNC_MOD_EXPR:
4040       case TRUNC_DIV_EXPR:
4041         if (op1_is_constant)
4042           {
4043             if (unsignedp)
4044               {
4045                 unsigned HOST_WIDE_INT mh;
4046                 int pre_shift, post_shift;
4047                 int dummy;
4048                 rtx ml;
4049                 unsigned HOST_WIDE_INT d = (INTVAL (op1)
4050                                             & GET_MODE_MASK (compute_mode));
4051
4052                 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4053                   {
4054                     pre_shift = floor_log2 (d);
4055                     if (rem_flag)
4056                       {
4057                         remainder
4058                           = expand_binop (compute_mode, and_optab, op0,
4059                                           GEN_INT (((HOST_WIDE_INT) 1 << pre_shift) - 1),
4060                                           remainder, 1,
4061                                           OPTAB_LIB_WIDEN);
4062                         if (remainder)
4063                           return gen_lowpart (mode, remainder);
4064                       }
4065                     quotient = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4066                                              build_int_cst (NULL_TREE,
4067                                                             pre_shift),
4068                                              tquotient, 1);
4069                   }
4070                 else if (size <= HOST_BITS_PER_WIDE_INT)
4071                   {
4072                     if (d >= ((unsigned HOST_WIDE_INT) 1 << (size - 1)))
4073                       {
4074                         /* Most significant bit of divisor is set; emit an scc
4075                            insn.  */
4076                         quotient = emit_store_flag (tquotient, GEU, op0, op1,
4077                                                     compute_mode, 1, 1);
4078                         if (quotient == 0)
4079                           goto fail1;
4080                       }
4081                     else
4082                       {
4083                         /* Find a suitable multiplier and right shift count
4084                            instead of multiplying with D.  */
4085
4086                         mh = choose_multiplier (d, size, size,
4087                                                 &ml, &post_shift, &dummy);
4088
4089                         /* If the suggested multiplier is more than SIZE bits,
4090                            we can do better for even divisors, using an
4091                            initial right shift.  */
4092                         if (mh != 0 && (d & 1) == 0)
4093                           {
4094                             pre_shift = floor_log2 (d & -d);
4095                             mh = choose_multiplier (d >> pre_shift, size,
4096                                                     size - pre_shift,
4097                                                     &ml, &post_shift, &dummy);
4098                             gcc_assert (!mh);
4099                           }
4100                         else
4101                           pre_shift = 0;
4102
4103                         if (mh != 0)
4104                           {
4105                             rtx t1, t2, t3, t4;
4106
4107                             if (post_shift - 1 >= BITS_PER_WORD)
4108                               goto fail1;
4109
4110                             extra_cost
4111                               = (shift_cost[compute_mode][post_shift - 1]
4112                                  + shift_cost[compute_mode][1]
4113                                  + 2 * add_cost[compute_mode]);
4114                             t1 = expand_mult_highpart (compute_mode, op0, ml,
4115                                                        NULL_RTX, 1,
4116                                                        max_cost - extra_cost);
4117                             if (t1 == 0)
4118                               goto fail1;
4119                             t2 = force_operand (gen_rtx_MINUS (compute_mode,
4120                                                                op0, t1),
4121                                                 NULL_RTX);
4122                             t3 = expand_shift
4123                               (RSHIFT_EXPR, compute_mode, t2,
4124                                build_int_cst (NULL_TREE, 1),
4125                                NULL_RTX,1);
4126                             t4 = force_operand (gen_rtx_PLUS (compute_mode,
4127                                                               t1, t3),
4128                                                 NULL_RTX);
4129                             quotient = expand_shift
4130                               (RSHIFT_EXPR, compute_mode, t4,
4131                                build_int_cst (NULL_TREE, post_shift - 1),
4132                                tquotient, 1);
4133                           }
4134                         else
4135                           {
4136                             rtx t1, t2;
4137
4138                             if (pre_shift >= BITS_PER_WORD
4139                                 || post_shift >= BITS_PER_WORD)
4140                               goto fail1;
4141
4142                             t1 = expand_shift
4143                               (RSHIFT_EXPR, compute_mode, op0,
4144                                build_int_cst (NULL_TREE, pre_shift),
4145                                NULL_RTX, 1);
4146                             extra_cost
4147                               = (shift_cost[compute_mode][pre_shift]
4148                                  + shift_cost[compute_mode][post_shift]);
4149                             t2 = expand_mult_highpart (compute_mode, t1, ml,
4150                                                        NULL_RTX, 1,
4151                                                        max_cost - extra_cost);
4152                             if (t2 == 0)
4153                               goto fail1;
4154                             quotient = expand_shift
4155                               (RSHIFT_EXPR, compute_mode, t2,
4156                                build_int_cst (NULL_TREE, post_shift),
4157                                tquotient, 1);
4158                           }
4159                       }
4160                   }
4161                 else            /* Too wide mode to use tricky code */
4162                   break;
4163
4164                 insn = get_last_insn ();
4165                 if (insn != last
4166                     && (set = single_set (insn)) != 0
4167                     && SET_DEST (set) == quotient)
4168                   set_unique_reg_note (insn,
4169                                        REG_EQUAL,
4170                                        gen_rtx_UDIV (compute_mode, op0, op1));
4171               }
4172             else                /* TRUNC_DIV, signed */
4173               {
4174                 unsigned HOST_WIDE_INT ml;
4175                 int lgup, post_shift;
4176                 rtx mlr;
4177                 HOST_WIDE_INT d = INTVAL (op1);
4178                 unsigned HOST_WIDE_INT abs_d;
4179
4180                 /* Since d might be INT_MIN, we have to cast to
4181                    unsigned HOST_WIDE_INT before negating to avoid
4182                    undefined signed overflow.  */
4183                 abs_d = (d >= 0
4184                          ? (unsigned HOST_WIDE_INT) d
4185                          : - (unsigned HOST_WIDE_INT) d);
4186
4187                 /* n rem d = n rem -d */
4188                 if (rem_flag && d < 0)
4189                   {
4190                     d = abs_d;
4191                     op1 = gen_int_mode (abs_d, compute_mode);
4192                   }
4193
4194                 if (d == 1)
4195                   quotient = op0;
4196                 else if (d == -1)
4197                   quotient = expand_unop (compute_mode, neg_optab, op0,
4198                                           tquotient, 0);
4199                 else if (abs_d == (unsigned HOST_WIDE_INT) 1 << (size - 1))
4200                   {
4201                     /* This case is not handled correctly below.  */
4202                     quotient = emit_store_flag (tquotient, EQ, op0, op1,
4203                                                 compute_mode, 1, 1);
4204                     if (quotient == 0)
4205                       goto fail1;
4206                   }
4207                 else if (EXACT_POWER_OF_2_OR_ZERO_P (d)
4208                          && (rem_flag ? smod_pow2_cheap[compute_mode]
4209                                       : sdiv_pow2_cheap[compute_mode])
4210                          /* We assume that cheap metric is true if the
4211                             optab has an expander for this mode.  */
4212                          && (((rem_flag ? smod_optab : sdiv_optab)
4213                               ->handlers[compute_mode].insn_code
4214                               != CODE_FOR_nothing)
4215                              || (sdivmod_optab->handlers[compute_mode]
4216                                  .insn_code != CODE_FOR_nothing)))
4217                   ;
4218                 else if (EXACT_POWER_OF_2_OR_ZERO_P (abs_d))
4219                   {
4220                     if (rem_flag)
4221                       {
4222                         remainder = expand_smod_pow2 (compute_mode, op0, d);
4223                         if (remainder)
4224                           return gen_lowpart (mode, remainder);
4225                       }
4226
4227                     if (sdiv_pow2_cheap[compute_mode]
4228                         && ((sdiv_optab->handlers[compute_mode].insn_code
4229                              != CODE_FOR_nothing)
4230                             || (sdivmod_optab->handlers[compute_mode].insn_code
4231                                 != CODE_FOR_nothing)))
4232                       quotient = expand_divmod (0, TRUNC_DIV_EXPR,
4233                                                 compute_mode, op0,
4234                                                 gen_int_mode (abs_d,
4235                                                               compute_mode),
4236                                                 NULL_RTX, 0);
4237                     else
4238                       quotient = expand_sdiv_pow2 (compute_mode, op0, abs_d);
4239
4240                     /* We have computed OP0 / abs(OP1).  If OP1 is negative,
4241                        negate the quotient.  */
4242                     if (d < 0)
4243                       {
4244                         insn = get_last_insn ();
4245                         if (insn != last
4246                             && (set = single_set (insn)) != 0
4247                             && SET_DEST (set) == quotient
4248                             && abs_d < ((unsigned HOST_WIDE_INT) 1
4249                                         << (HOST_BITS_PER_WIDE_INT - 1)))
4250                           set_unique_reg_note (insn,
4251                                                REG_EQUAL,
4252                                                gen_rtx_DIV (compute_mode,
4253                                                             op0,
4254                                                             GEN_INT
4255                                                             (trunc_int_for_mode
4256                                                              (abs_d,
4257                                                               compute_mode))));
4258
4259                         quotient = expand_unop (compute_mode, neg_optab,
4260                                                 quotient, quotient, 0);
4261                       }
4262                   }
4263                 else if (size <= HOST_BITS_PER_WIDE_INT)
4264                   {
4265                     choose_multiplier (abs_d, size, size - 1,
4266                                        &mlr, &post_shift, &lgup);
4267                     ml = (unsigned HOST_WIDE_INT) INTVAL (mlr);
4268                     if (ml < (unsigned HOST_WIDE_INT) 1 << (size - 1))
4269                       {
4270                         rtx t1, t2, t3;
4271
4272                         if (post_shift >= BITS_PER_WORD
4273                             || size - 1 >= BITS_PER_WORD)
4274                           goto fail1;
4275
4276                         extra_cost = (shift_cost[compute_mode][post_shift]
4277                                       + shift_cost[compute_mode][size - 1]
4278                                       + add_cost[compute_mode]);
4279                         t1 = expand_mult_highpart (compute_mode, op0, mlr,
4280                                                    NULL_RTX, 0,
4281                                                    max_cost - extra_cost);
4282                         if (t1 == 0)
4283                           goto fail1;
4284                         t2 = expand_shift
4285                           (RSHIFT_EXPR, compute_mode, t1,
4286                            build_int_cst (NULL_TREE, post_shift),
4287                            NULL_RTX, 0);
4288                         t3 = expand_shift
4289                           (RSHIFT_EXPR, compute_mode, op0,
4290                            build_int_cst (NULL_TREE, size - 1),
4291                            NULL_RTX, 0);
4292                         if (d < 0)
4293                           quotient
4294                             = force_operand (gen_rtx_MINUS (compute_mode,
4295                                                             t3, t2),
4296                                              tquotient);
4297                         else
4298                           quotient
4299                             = force_operand (gen_rtx_MINUS (compute_mode,
4300                                                             t2, t3),
4301                                              tquotient);
4302                       }
4303                     else
4304                       {
4305                         rtx t1, t2, t3, t4;
4306
4307                         if (post_shift >= BITS_PER_WORD
4308                             || size - 1 >= BITS_PER_WORD)
4309                           goto fail1;
4310
4311                         ml |= (~(unsigned HOST_WIDE_INT) 0) << (size - 1);
4312                         mlr = gen_int_mode (ml, compute_mode);
4313                         extra_cost = (shift_cost[compute_mode][post_shift]
4314                                       + shift_cost[compute_mode][size - 1]
4315                                       + 2 * add_cost[compute_mode]);
4316                         t1 = expand_mult_highpart (compute_mode, op0, mlr,
4317                                                    NULL_RTX, 0,
4318                                                    max_cost - extra_cost);
4319                         if (t1 == 0)
4320                           goto fail1;
4321                         t2 = force_operand (gen_rtx_PLUS (compute_mode,
4322                                                           t1, op0),
4323                                             NULL_RTX);
4324                         t3 = expand_shift
4325                           (RSHIFT_EXPR, compute_mode, t2,
4326                            build_int_cst (NULL_TREE, post_shift),
4327                            NULL_RTX, 0);
4328                         t4 = expand_shift
4329                           (RSHIFT_EXPR, compute_mode, op0,
4330                            build_int_cst (NULL_TREE, size - 1),
4331                            NULL_RTX, 0);
4332                         if (d < 0)
4333                           quotient
4334                             = force_operand (gen_rtx_MINUS (compute_mode,
4335                                                             t4, t3),
4336                                              tquotient);
4337                         else
4338                           quotient
4339                             = force_operand (gen_rtx_MINUS (compute_mode,
4340                                                             t3, t4),
4341                                              tquotient);
4342                       }
4343                   }
4344                 else            /* Too wide mode to use tricky code */
4345                   break;
4346
4347                 insn = get_last_insn ();
4348                 if (insn != last
4349                     && (set = single_set (insn)) != 0
4350                     && SET_DEST (set) == quotient)
4351                   set_unique_reg_note (insn,
4352                                        REG_EQUAL,
4353                                        gen_rtx_DIV (compute_mode, op0, op1));
4354               }
4355             break;
4356           }
4357       fail1:
4358         delete_insns_since (last);
4359         break;
4360
4361       case FLOOR_DIV_EXPR:
4362       case FLOOR_MOD_EXPR:
4363       /* We will come here only for signed operations.  */
4364         if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4365           {
4366             unsigned HOST_WIDE_INT mh;
4367             int pre_shift, lgup, post_shift;
4368             HOST_WIDE_INT d = INTVAL (op1);
4369             rtx ml;
4370
4371             if (d > 0)
4372               {
4373                 /* We could just as easily deal with negative constants here,
4374                    but it does not seem worth the trouble for GCC 2.6.  */
4375                 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4376                   {
4377                     pre_shift = floor_log2 (d);
4378                     if (rem_flag)
4379                       {
4380                         remainder = expand_binop (compute_mode, and_optab, op0,
4381                                                   GEN_INT (((HOST_WIDE_INT) 1 << pre_shift) - 1),
4382                                                   remainder, 0, OPTAB_LIB_WIDEN);
4383                         if (remainder)
4384                           return gen_lowpart (mode, remainder);
4385                       }
4386                     quotient = expand_shift
4387                       (RSHIFT_EXPR, compute_mode, op0,
4388                        build_int_cst (NULL_TREE, pre_shift),
4389                        tquotient, 0);
4390                   }
4391                 else
4392                   {
4393                     rtx t1, t2, t3, t4;
4394
4395                     mh = choose_multiplier (d, size, size - 1,
4396                                             &ml, &post_shift, &lgup);
4397                     gcc_assert (!mh);
4398
4399                     if (post_shift < BITS_PER_WORD
4400                         && size - 1 < BITS_PER_WORD)
4401                       {
4402                         t1 = expand_shift
4403                           (RSHIFT_EXPR, compute_mode, op0,
4404                            build_int_cst (NULL_TREE, size - 1),
4405                            NULL_RTX, 0);
4406                         t2 = expand_binop (compute_mode, xor_optab, op0, t1,
4407                                            NULL_RTX, 0, OPTAB_WIDEN);
4408                         extra_cost = (shift_cost[compute_mode][post_shift]
4409                                       + shift_cost[compute_mode][size - 1]
4410                                       + 2 * add_cost[compute_mode]);
4411                         t3 = expand_mult_highpart (compute_mode, t2, ml,
4412                                                    NULL_RTX, 1,
4413                                                    max_cost - extra_cost);
4414                         if (t3 != 0)
4415                           {
4416                             t4 = expand_shift
4417                               (RSHIFT_EXPR, compute_mode, t3,
4418                                build_int_cst (NULL_TREE, post_shift),
4419                                NULL_RTX, 1);
4420                             quotient = expand_binop (compute_mode, xor_optab,
4421                                                      t4, t1, tquotient, 0,
4422                                                      OPTAB_WIDEN);
4423                           }
4424                       }
4425                   }
4426               }
4427             else
4428               {
4429                 rtx nsign, t1, t2, t3, t4;
4430                 t1 = force_operand (gen_rtx_PLUS (compute_mode,
4431                                                   op0, constm1_rtx), NULL_RTX);
4432                 t2 = expand_binop (compute_mode, ior_optab, op0, t1, NULL_RTX,
4433                                    0, OPTAB_WIDEN);
4434                 nsign = expand_shift
4435                   (RSHIFT_EXPR, compute_mode, t2,
4436                    build_int_cst (NULL_TREE, size - 1),
4437                    NULL_RTX, 0);
4438                 t3 = force_operand (gen_rtx_MINUS (compute_mode, t1, nsign),
4439                                     NULL_RTX);
4440                 t4 = expand_divmod (0, TRUNC_DIV_EXPR, compute_mode, t3, op1,
4441                                     NULL_RTX, 0);
4442                 if (t4)
4443                   {
4444                     rtx t5;
4445                     t5 = expand_unop (compute_mode, one_cmpl_optab, nsign,
4446                                       NULL_RTX, 0);
4447                     quotient = force_operand (gen_rtx_PLUS (compute_mode,
4448                                                             t4, t5),
4449                                               tquotient);
4450                   }
4451               }
4452           }
4453
4454         if (quotient != 0)
4455           break;
4456         delete_insns_since (last);
4457
4458         /* Try using an instruction that produces both the quotient and
4459            remainder, using truncation.  We can easily compensate the quotient
4460            or remainder to get floor rounding, once we have the remainder.
4461            Notice that we compute also the final remainder value here,
4462            and return the result right away.  */
4463         if (target == 0 || GET_MODE (target) != compute_mode)
4464           target = gen_reg_rtx (compute_mode);
4465
4466         if (rem_flag)
4467           {
4468             remainder
4469               = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4470             quotient = gen_reg_rtx (compute_mode);
4471           }
4472         else
4473           {
4474             quotient
4475               = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4476             remainder = gen_reg_rtx (compute_mode);
4477           }
4478
4479         if (expand_twoval_binop (sdivmod_optab, op0, op1,
4480                                  quotient, remainder, 0))
4481           {
4482             /* This could be computed with a branch-less sequence.
4483                Save that for later.  */
4484             rtx tem;
4485             rtx label = gen_label_rtx ();
4486             do_cmp_and_jump (remainder, const0_rtx, EQ, compute_mode, label);
4487             tem = expand_binop (compute_mode, xor_optab, op0, op1,
4488                                 NULL_RTX, 0, OPTAB_WIDEN);
4489             do_cmp_and_jump (tem, const0_rtx, GE, compute_mode, label);
4490             expand_dec (quotient, const1_rtx);
4491             expand_inc (remainder, op1);
4492             emit_label (label);
4493             return gen_lowpart (mode, rem_flag ? remainder : quotient);
4494           }
4495
4496         /* No luck with division elimination or divmod.  Have to do it
4497            by conditionally adjusting op0 *and* the result.  */
4498         {
4499           rtx label1, label2, label3, label4, label5;
4500           rtx adjusted_op0;
4501           rtx tem;
4502
4503           quotient = gen_reg_rtx (compute_mode);
4504           adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4505           label1 = gen_label_rtx ();
4506           label2 = gen_label_rtx ();
4507           label3 = gen_label_rtx ();
4508           label4 = gen_label_rtx ();
4509           label5 = gen_label_rtx ();
4510           do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4511           do_cmp_and_jump (adjusted_op0, const0_rtx, LT, compute_mode, label1);
4512           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4513                               quotient, 0, OPTAB_LIB_WIDEN);
4514           if (tem != quotient)
4515             emit_move_insn (quotient, tem);
4516           emit_jump_insn (gen_jump (label5));
4517           emit_barrier ();
4518           emit_label (label1);
4519           expand_inc (adjusted_op0, const1_rtx);
4520           emit_jump_insn (gen_jump (label4));
4521           emit_barrier ();
4522           emit_label (label2);
4523           do_cmp_and_jump (adjusted_op0, const0_rtx, GT, compute_mode, label3);
4524           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4525                               quotient, 0, OPTAB_LIB_WIDEN);
4526           if (tem != quotient)
4527             emit_move_insn (quotient, tem);
4528           emit_jump_insn (gen_jump (label5));
4529           emit_barrier ();
4530           emit_label (label3);
4531           expand_dec (adjusted_op0, const1_rtx);
4532           emit_label (label4);
4533           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4534                               quotient, 0, OPTAB_LIB_WIDEN);
4535           if (tem != quotient)
4536             emit_move_insn (quotient, tem);
4537           expand_dec (quotient, const1_rtx);
4538           emit_label (label5);
4539         }
4540         break;
4541
4542       case CEIL_DIV_EXPR:
4543       case CEIL_MOD_EXPR:
4544         if (unsignedp)
4545           {
4546             if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1)))
4547               {
4548                 rtx t1, t2, t3;
4549                 unsigned HOST_WIDE_INT d = INTVAL (op1);
4550                 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4551                                    build_int_cst (NULL_TREE, floor_log2 (d)),
4552                                    tquotient, 1);
4553                 t2 = expand_binop (compute_mode, and_optab, op0,
4554                                    GEN_INT (d - 1),
4555                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4556                 t3 = gen_reg_rtx (compute_mode);
4557                 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4558                                       compute_mode, 1, 1);
4559                 if (t3 == 0)
4560                   {
4561                     rtx lab;
4562                     lab = gen_label_rtx ();
4563                     do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4564                     expand_inc (t1, const1_rtx);
4565                     emit_label (lab);
4566                     quotient = t1;
4567                   }
4568                 else
4569                   quotient = force_operand (gen_rtx_PLUS (compute_mode,
4570                                                           t1, t3),
4571                                             tquotient);
4572                 break;
4573               }
4574
4575             /* Try using an instruction that produces both the quotient and
4576                remainder, using truncation.  We can easily compensate the
4577                quotient or remainder to get ceiling rounding, once we have the
4578                remainder.  Notice that we compute also the final remainder
4579                value here, and return the result right away.  */
4580             if (target == 0 || GET_MODE (target) != compute_mode)
4581               target = gen_reg_rtx (compute_mode);
4582
4583             if (rem_flag)
4584               {
4585                 remainder = (REG_P (target)
4586                              ? target : gen_reg_rtx (compute_mode));
4587                 quotient = gen_reg_rtx (compute_mode);
4588               }
4589             else
4590               {
4591                 quotient = (REG_P (target)
4592                             ? target : gen_reg_rtx (compute_mode));
4593                 remainder = gen_reg_rtx (compute_mode);
4594               }
4595
4596             if (expand_twoval_binop (udivmod_optab, op0, op1, quotient,
4597                                      remainder, 1))
4598               {
4599                 /* This could be computed with a branch-less sequence.
4600                    Save that for later.  */
4601                 rtx label = gen_label_rtx ();
4602                 do_cmp_and_jump (remainder, const0_rtx, EQ,
4603                                  compute_mode, label);
4604                 expand_inc (quotient, const1_rtx);
4605                 expand_dec (remainder, op1);
4606                 emit_label (label);
4607                 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4608               }
4609
4610             /* No luck with division elimination or divmod.  Have to do it
4611                by conditionally adjusting op0 *and* the result.  */
4612             {
4613               rtx label1, label2;
4614               rtx adjusted_op0, tem;
4615
4616               quotient = gen_reg_rtx (compute_mode);
4617               adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4618               label1 = gen_label_rtx ();
4619               label2 = gen_label_rtx ();
4620               do_cmp_and_jump (adjusted_op0, const0_rtx, NE,
4621                                compute_mode, label1);
4622               emit_move_insn  (quotient, const0_rtx);
4623               emit_jump_insn (gen_jump (label2));
4624               emit_barrier ();
4625               emit_label (label1);
4626               expand_dec (adjusted_op0, const1_rtx);
4627               tem = expand_binop (compute_mode, udiv_optab, adjusted_op0, op1,
4628                                   quotient, 1, OPTAB_LIB_WIDEN);
4629               if (tem != quotient)
4630                 emit_move_insn (quotient, tem);
4631               expand_inc (quotient, const1_rtx);
4632               emit_label (label2);
4633             }
4634           }
4635         else /* signed */
4636           {
4637             if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4638                 && INTVAL (op1) >= 0)
4639               {
4640                 /* This is extremely similar to the code for the unsigned case
4641                    above.  For 2.7 we should merge these variants, but for
4642                    2.6.1 I don't want to touch the code for unsigned since that
4643                    get used in C.  The signed case will only be used by other
4644                    languages (Ada).  */
4645
4646                 rtx t1, t2, t3;
4647                 unsigned HOST_WIDE_INT d = INTVAL (op1);
4648                 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4649                                    build_int_cst (NULL_TREE, floor_log2 (d)),
4650                                    tquotient, 0);
4651                 t2 = expand_binop (compute_mode, and_optab, op0,
4652                                    GEN_INT (d - 1),
4653                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4654                 t3 = gen_reg_rtx (compute_mode);
4655                 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4656                                       compute_mode, 1, 1);
4657                 if (t3 == 0)
4658                   {
4659                     rtx lab;
4660                     lab = gen_label_rtx ();
4661                     do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4662                     expand_inc (t1, const1_rtx);
4663                     emit_label (lab);
4664                     quotient = t1;
4665                   }
4666                 else
4667                   quotient = force_operand (gen_rtx_PLUS (compute_mode,
4668                                                           t1, t3),
4669                                             tquotient);
4670                 break;
4671               }
4672
4673             /* Try using an instruction that produces both the quotient and
4674                remainder, using truncation.  We can easily compensate the
4675                quotient or remainder to get ceiling rounding, once we have the
4676                remainder.  Notice that we compute also the final remainder
4677                value here, and return the result right away.  */
4678             if (target == 0 || GET_MODE (target) != compute_mode)
4679               target = gen_reg_rtx (compute_mode);
4680             if (rem_flag)
4681               {
4682                 remainder= (REG_P (target)
4683                             ? target : gen_reg_rtx (compute_mode));
4684                 quotient = gen_reg_rtx (compute_mode);
4685               }
4686             else
4687               {
4688                 quotient = (REG_P (target)
4689                             ? target : gen_reg_rtx (compute_mode));
4690                 remainder = gen_reg_rtx (compute_mode);
4691               }
4692
4693             if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient,
4694                                      remainder, 0))
4695               {
4696                 /* This could be computed with a branch-less sequence.
4697                    Save that for later.  */
4698                 rtx tem;
4699                 rtx label = gen_label_rtx ();
4700                 do_cmp_and_jump (remainder, const0_rtx, EQ,
4701                                  compute_mode, label);
4702                 tem = expand_binop (compute_mode, xor_optab, op0, op1,
4703                                     NULL_RTX, 0, OPTAB_WIDEN);
4704                 do_cmp_and_jump (tem, const0_rtx, LT, compute_mode, label);
4705                 expand_inc (quotient, const1_rtx);
4706                 expand_dec (remainder, op1);
4707                 emit_label (label);
4708                 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4709               }
4710
4711             /* No luck with division elimination or divmod.  Have to do it
4712                by conditionally adjusting op0 *and* the result.  */
4713             {
4714               rtx label1, label2, label3, label4, label5;
4715               rtx adjusted_op0;
4716               rtx tem;
4717
4718               quotient = gen_reg_rtx (compute_mode);
4719               adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4720               label1 = gen_label_rtx ();
4721               label2 = gen_label_rtx ();
4722               label3 = gen_label_rtx ();
4723               label4 = gen_label_rtx ();
4724               label5 = gen_label_rtx ();
4725               do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4726               do_cmp_and_jump (adjusted_op0, const0_rtx, GT,
4727                                compute_mode, label1);
4728               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4729                                   quotient, 0, OPTAB_LIB_WIDEN);
4730               if (tem != quotient)
4731                 emit_move_insn (quotient, tem);
4732               emit_jump_insn (gen_jump (label5));
4733               emit_barrier ();
4734               emit_label (label1);
4735               expand_dec (adjusted_op0, const1_rtx);
4736               emit_jump_insn (gen_jump (label4));
4737               emit_barrier ();
4738               emit_label (label2);
4739               do_cmp_and_jump (adjusted_op0, const0_rtx, LT,
4740                                compute_mode, label3);
4741               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4742                                   quotient, 0, OPTAB_LIB_WIDEN);
4743               if (tem != quotient)
4744                 emit_move_insn (quotient, tem);
4745               emit_jump_insn (gen_jump (label5));
4746               emit_barrier ();
4747               emit_label (label3);
4748               expand_inc (adjusted_op0, const1_rtx);
4749               emit_label (label4);
4750               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4751                                   quotient, 0, OPTAB_LIB_WIDEN);
4752               if (tem != quotient)
4753                 emit_move_insn (quotient, tem);
4754               expand_inc (quotient, const1_rtx);
4755               emit_label (label5);
4756             }
4757           }
4758         break;
4759
4760       case EXACT_DIV_EXPR:
4761         if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4762           {
4763             HOST_WIDE_INT d = INTVAL (op1);
4764             unsigned HOST_WIDE_INT ml;
4765             int pre_shift;
4766             rtx t1;
4767
4768             pre_shift = floor_log2 (d & -d);
4769             ml = invert_mod2n (d >> pre_shift, size);
4770             t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4771                                build_int_cst (NULL_TREE, pre_shift),
4772                                NULL_RTX, unsignedp);
4773             quotient = expand_mult (compute_mode, t1,
4774                                     gen_int_mode (ml, compute_mode),
4775                                     NULL_RTX, 1);
4776
4777             insn = get_last_insn ();
4778             set_unique_reg_note (insn,
4779                                  REG_EQUAL,
4780                                  gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
4781                                                  compute_mode,
4782                                                  op0, op1));
4783           }
4784         break;
4785
4786       case ROUND_DIV_EXPR:
4787       case ROUND_MOD_EXPR:
4788         if (unsignedp)
4789           {
4790             rtx tem;
4791             rtx label;
4792             label = gen_label_rtx ();
4793             quotient = gen_reg_rtx (compute_mode);
4794             remainder = gen_reg_rtx (compute_mode);
4795             if (expand_twoval_binop (udivmod_optab, op0, op1, quotient, remainder, 1) == 0)
4796               {
4797                 rtx tem;
4798                 quotient = expand_binop (compute_mode, udiv_optab, op0, op1,
4799                                          quotient, 1, OPTAB_LIB_WIDEN);
4800                 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 1);
4801                 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4802                                           remainder, 1, OPTAB_LIB_WIDEN);
4803               }
4804             tem = plus_constant (op1, -1);
4805             tem = expand_shift (RSHIFT_EXPR, compute_mode, tem,
4806                                 build_int_cst (NULL_TREE, 1),
4807                                 NULL_RTX, 1);
4808             do_cmp_and_jump (remainder, tem, LEU, compute_mode, label);
4809             expand_inc (quotient, const1_rtx);
4810             expand_dec (remainder, op1);
4811             emit_label (label);
4812           }
4813         else
4814           {
4815             rtx abs_rem, abs_op1, tem, mask;
4816             rtx label;
4817             label = gen_label_rtx ();
4818             quotient = gen_reg_rtx (compute_mode);
4819             remainder = gen_reg_rtx (compute_mode);
4820             if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient, remainder, 0) == 0)
4821               {
4822                 rtx tem;
4823                 quotient = expand_binop (compute_mode, sdiv_optab, op0, op1,
4824                                          quotient, 0, OPTAB_LIB_WIDEN);
4825                 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 0);
4826                 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4827                                           remainder, 0, OPTAB_LIB_WIDEN);
4828               }
4829             abs_rem = expand_abs (compute_mode, remainder, NULL_RTX, 1, 0);
4830             abs_op1 = expand_abs (compute_mode, op1, NULL_RTX, 1, 0);
4831             tem = expand_shift (LSHIFT_EXPR, compute_mode, abs_rem,
4832                                 build_int_cst (NULL_TREE, 1),
4833                                 NULL_RTX, 1);
4834             do_cmp_and_jump (tem, abs_op1, LTU, compute_mode, label);
4835             tem = expand_binop (compute_mode, xor_optab, op0, op1,
4836                                 NULL_RTX, 0, OPTAB_WIDEN);
4837             mask = expand_shift (RSHIFT_EXPR, compute_mode, tem,
4838                                  build_int_cst (NULL_TREE, size - 1),
4839                                  NULL_RTX, 0);
4840             tem = expand_binop (compute_mode, xor_optab, mask, const1_rtx,
4841                                 NULL_RTX, 0, OPTAB_WIDEN);
4842             tem = expand_binop (compute_mode, sub_optab, tem, mask,
4843                                 NULL_RTX, 0, OPTAB_WIDEN);
4844             expand_inc (quotient, tem);
4845             tem = expand_binop (compute_mode, xor_optab, mask, op1,
4846                                 NULL_RTX, 0, OPTAB_WIDEN);
4847             tem = expand_binop (compute_mode, sub_optab, tem, mask,
4848                                 NULL_RTX, 0, OPTAB_WIDEN);
4849             expand_dec (remainder, tem);
4850             emit_label (label);
4851           }
4852         return gen_lowpart (mode, rem_flag ? remainder : quotient);
4853
4854       default:
4855         gcc_unreachable ();
4856       }
4857
4858   if (quotient == 0)
4859     {
4860       if (target && GET_MODE (target) != compute_mode)
4861         target = 0;
4862
4863       if (rem_flag)
4864         {
4865           /* Try to produce the remainder without producing the quotient.
4866              If we seem to have a divmod pattern that does not require widening,
4867              don't try widening here.  We should really have a WIDEN argument
4868              to expand_twoval_binop, since what we'd really like to do here is
4869              1) try a mod insn in compute_mode
4870              2) try a divmod insn in compute_mode
4871              3) try a div insn in compute_mode and multiply-subtract to get
4872                 remainder
4873              4) try the same things with widening allowed.  */
4874           remainder
4875             = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4876                                  op0, op1, target,
4877                                  unsignedp,
4878                                  ((optab2->handlers[compute_mode].insn_code
4879                                    != CODE_FOR_nothing)
4880                                   ? OPTAB_DIRECT : OPTAB_WIDEN));
4881           if (remainder == 0)
4882             {
4883               /* No luck there.  Can we do remainder and divide at once
4884                  without a library call?  */
4885               remainder = gen_reg_rtx (compute_mode);
4886               if (! expand_twoval_binop ((unsignedp
4887                                           ? udivmod_optab
4888                                           : sdivmod_optab),
4889                                          op0, op1,
4890                                          NULL_RTX, remainder, unsignedp))
4891                 remainder = 0;
4892             }
4893
4894           if (remainder)
4895             return gen_lowpart (mode, remainder);
4896         }
4897
4898       /* Produce the quotient.  Try a quotient insn, but not a library call.
4899          If we have a divmod in this mode, use it in preference to widening
4900          the div (for this test we assume it will not fail). Note that optab2
4901          is set to the one of the two optabs that the call below will use.  */
4902       quotient
4903         = sign_expand_binop (compute_mode, udiv_optab, sdiv_optab,
4904                              op0, op1, rem_flag ? NULL_RTX : target,
4905                              unsignedp,
4906                              ((optab2->handlers[compute_mode].insn_code
4907                                != CODE_FOR_nothing)
4908                               ? OPTAB_DIRECT : OPTAB_WIDEN));
4909
4910       if (quotient == 0)
4911         {
4912           /* No luck there.  Try a quotient-and-remainder insn,
4913              keeping the quotient alone.  */
4914           quotient = gen_reg_rtx (compute_mode);
4915           if (! expand_twoval_binop (unsignedp ? udivmod_optab : sdivmod_optab,
4916                                      op0, op1,
4917                                      quotient, NULL_RTX, unsignedp))
4918             {
4919               quotient = 0;
4920               if (! rem_flag)
4921                 /* Still no luck.  If we are not computing the remainder,
4922                    use a library call for the quotient.  */
4923                 quotient = sign_expand_binop (compute_mode,
4924                                               udiv_optab, sdiv_optab,
4925                                               op0, op1, target,
4926                                               unsignedp, OPTAB_LIB_WIDEN);
4927             }
4928         }
4929     }
4930
4931   if (rem_flag)
4932     {
4933       if (target && GET_MODE (target) != compute_mode)
4934         target = 0;
4935
4936       if (quotient == 0)
4937         {
4938           /* No divide instruction either.  Use library for remainder.  */
4939           remainder = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4940                                          op0, op1, target,
4941                                          unsignedp, OPTAB_LIB_WIDEN);
4942           /* No remainder function.  Try a quotient-and-remainder
4943              function, keeping the remainder.  */
4944           if (!remainder)
4945             {
4946               remainder = gen_reg_rtx (compute_mode);
4947               if (!expand_twoval_binop_libfunc
4948                   (unsignedp ? udivmod_optab : sdivmod_optab,
4949                    op0, op1,
4950                    NULL_RTX, remainder,
4951                    unsignedp ? UMOD : MOD))
4952                 remainder = NULL_RTX;
4953             }
4954         }
4955       else
4956         {
4957           /* We divided.  Now finish doing X - Y * (X / Y).  */
4958           remainder = expand_mult (compute_mode, quotient, op1,
4959                                    NULL_RTX, unsignedp);
4960           remainder = expand_binop (compute_mode, sub_optab, op0,
4961                                     remainder, target, unsignedp,
4962                                     OPTAB_LIB_WIDEN);
4963         }
4964     }
4965
4966   return gen_lowpart (mode, rem_flag ? remainder : quotient);
4967 }
4968 \f
4969 /* Return a tree node with data type TYPE, describing the value of X.
4970    Usually this is an VAR_DECL, if there is no obvious better choice.
4971    X may be an expression, however we only support those expressions
4972    generated by loop.c.  */
4973
4974 tree
4975 make_tree (tree type, rtx x)
4976 {
4977   tree t;
4978
4979   switch (GET_CODE (x))
4980     {
4981     case CONST_INT:
4982       {
4983         HOST_WIDE_INT hi = 0;
4984
4985         if (INTVAL (x) < 0
4986             && !(TYPE_UNSIGNED (type)
4987                  && (GET_MODE_BITSIZE (TYPE_MODE (type))
4988                      < HOST_BITS_PER_WIDE_INT)))
4989           hi = -1;
4990
4991         t = build_int_cst_wide (type, INTVAL (x), hi);
4992
4993         return t;
4994       }
4995
4996     case CONST_DOUBLE:
4997       if (GET_MODE (x) == VOIDmode)
4998         t = build_int_cst_wide (type,
4999                                 CONST_DOUBLE_LOW (x), CONST_DOUBLE_HIGH (x));
5000       else
5001         {
5002           REAL_VALUE_TYPE d;
5003
5004           REAL_VALUE_FROM_CONST_DOUBLE (d, x);
5005           t = build_real (type, d);
5006         }
5007
5008       return t;
5009
5010     case CONST_VECTOR:
5011       {
5012         int units = CONST_VECTOR_NUNITS (x);
5013         tree itype = TREE_TYPE (type);
5014         tree t = NULL_TREE;
5015         int i;
5016
5017
5018         /* Build a tree with vector elements.  */
5019         for (i = units - 1; i >= 0; --i)
5020           {
5021             rtx elt = CONST_VECTOR_ELT (x, i);
5022             t = tree_cons (NULL_TREE, make_tree (itype, elt), t);
5023           }
5024
5025         return build_vector (type, t);
5026       }
5027
5028     case PLUS:
5029       return fold_build2 (PLUS_EXPR, type, make_tree (type, XEXP (x, 0)),
5030                           make_tree (type, XEXP (x, 1)));
5031
5032     case MINUS:
5033       return fold_build2 (MINUS_EXPR, type, make_tree (type, XEXP (x, 0)),
5034                           make_tree (type, XEXP (x, 1)));
5035
5036     case NEG:
5037       return fold_build1 (NEGATE_EXPR, type, make_tree (type, XEXP (x, 0)));
5038
5039     case MULT:
5040       return fold_build2 (MULT_EXPR, type, make_tree (type, XEXP (x, 0)),
5041                           make_tree (type, XEXP (x, 1)));
5042
5043     case ASHIFT:
5044       return fold_build2 (LSHIFT_EXPR, type, make_tree (type, XEXP (x, 0)),
5045                           make_tree (type, XEXP (x, 1)));
5046
5047     case LSHIFTRT:
5048       t = unsigned_type_for (type);
5049       return fold_convert (type, build2 (RSHIFT_EXPR, t,
5050                                          make_tree (t, XEXP (x, 0)),
5051                                          make_tree (type, XEXP (x, 1))));
5052
5053     case ASHIFTRT:
5054       t = signed_type_for (type);
5055       return fold_convert (type, build2 (RSHIFT_EXPR, t,
5056                                          make_tree (t, XEXP (x, 0)),
5057                                          make_tree (type, XEXP (x, 1))));
5058
5059     case DIV:
5060       if (TREE_CODE (type) != REAL_TYPE)
5061         t = signed_type_for (type);
5062       else
5063         t = type;
5064
5065       return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5066                                          make_tree (t, XEXP (x, 0)),
5067                                          make_tree (t, XEXP (x, 1))));
5068     case UDIV:
5069       t = unsigned_type_for (type);
5070       return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5071                                          make_tree (t, XEXP (x, 0)),
5072                                          make_tree (t, XEXP (x, 1))));
5073
5074     case SIGN_EXTEND:
5075     case ZERO_EXTEND:
5076       t = lang_hooks.types.type_for_mode (GET_MODE (XEXP (x, 0)),
5077                                           GET_CODE (x) == ZERO_EXTEND);
5078       return fold_convert (type, make_tree (t, XEXP (x, 0)));
5079
5080     case CONST:
5081       return make_tree (type, XEXP (x, 0));
5082
5083     case SYMBOL_REF:
5084       t = SYMBOL_REF_DECL (x);
5085       if (t)
5086         return fold_convert (type, build_fold_addr_expr (t));
5087       /* else fall through.  */
5088
5089     default:
5090       t = build_decl (VAR_DECL, NULL_TREE, type);
5091
5092       /* If TYPE is a POINTER_TYPE, X might be Pmode with TYPE_MODE being
5093          ptr_mode.  So convert.  */
5094       if (POINTER_TYPE_P (type))
5095         x = convert_memory_address (TYPE_MODE (type), x);
5096
5097       /* Note that we do *not* use SET_DECL_RTL here, because we do not
5098          want set_decl_rtl to go adjusting REG_ATTRS for this temporary.  */
5099       t->decl_with_rtl.rtl = x;
5100
5101       return t;
5102     }
5103 }
5104 \f
5105 /* Compute the logical-and of OP0 and OP1, storing it in TARGET
5106    and returning TARGET.
5107
5108    If TARGET is 0, a pseudo-register or constant is returned.  */
5109
5110 rtx
5111 expand_and (enum machine_mode mode, rtx op0, rtx op1, rtx target)
5112 {
5113   rtx tem = 0;
5114
5115   if (GET_MODE (op0) == VOIDmode && GET_MODE (op1) == VOIDmode)
5116     tem = simplify_binary_operation (AND, mode, op0, op1);
5117   if (tem == 0)
5118     tem = expand_binop (mode, and_optab, op0, op1, target, 0, OPTAB_LIB_WIDEN);
5119
5120   if (target == 0)
5121     target = tem;
5122   else if (tem != target)
5123     emit_move_insn (target, tem);
5124   return target;
5125 }
5126 \f
5127 /* Helper function for emit_store_flag.  */
5128 static rtx
5129 emit_store_flag_1 (rtx target, rtx subtarget, enum machine_mode mode,
5130                    int normalizep)
5131 {
5132   rtx op0;
5133   enum machine_mode target_mode = GET_MODE (target);
5134
5135   /* If we are converting to a wider mode, first convert to
5136      TARGET_MODE, then normalize.  This produces better combining
5137      opportunities on machines that have a SIGN_EXTRACT when we are
5138      testing a single bit.  This mostly benefits the 68k.
5139
5140      If STORE_FLAG_VALUE does not have the sign bit set when
5141      interpreted in MODE, we can do this conversion as unsigned, which
5142      is usually more efficient.  */
5143   if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (mode))
5144     {
5145       convert_move (target, subtarget,
5146                     (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
5147                     && 0 == (STORE_FLAG_VALUE
5148                              & ((HOST_WIDE_INT) 1
5149                                 << (GET_MODE_BITSIZE (mode) -1))));
5150       op0 = target;
5151       mode = target_mode;
5152     }
5153   else
5154     op0 = subtarget;
5155
5156   /* If we want to keep subexpressions around, don't reuse our last
5157      target.  */
5158   if (optimize)
5159     subtarget = 0;
5160
5161   /* Now normalize to the proper value in MODE.  Sometimes we don't
5162      have to do anything.  */
5163   if (normalizep == 0 || normalizep == STORE_FLAG_VALUE)
5164     ;
5165   /* STORE_FLAG_VALUE might be the most negative number, so write
5166      the comparison this way to avoid a compiler-time warning.  */
5167   else if (- normalizep == STORE_FLAG_VALUE)
5168     op0 = expand_unop (mode, neg_optab, op0, subtarget, 0);
5169
5170   /* We don't want to use STORE_FLAG_VALUE < 0 below since this makes
5171      it hard to use a value of just the sign bit due to ANSI integer
5172      constant typing rules.  */
5173   else if (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT
5174            && (STORE_FLAG_VALUE
5175                & ((HOST_WIDE_INT) 1 << (GET_MODE_BITSIZE (mode) - 1))))
5176     op0 = expand_shift (RSHIFT_EXPR, mode, op0,
5177                         size_int (GET_MODE_BITSIZE (mode) - 1), subtarget,
5178                         normalizep == 1);
5179   else
5180     {
5181       gcc_assert (STORE_FLAG_VALUE & 1);
5182
5183       op0 = expand_and (mode, op0, const1_rtx, subtarget);
5184       if (normalizep == -1)
5185         op0 = expand_unop (mode, neg_optab, op0, op0, 0);
5186     }
5187
5188   /* If we were converting to a smaller mode, do the conversion now.  */
5189   if (target_mode != mode)
5190     {
5191       convert_move (target, op0, 0);
5192       return target;
5193     }
5194   else
5195     return op0;
5196 }
5197
5198 /* Emit a store-flags instruction for comparison CODE on OP0 and OP1
5199    and storing in TARGET.  Normally return TARGET.
5200    Return 0 if that cannot be done.
5201
5202    MODE is the mode to use for OP0 and OP1 should they be CONST_INTs.  If
5203    it is VOIDmode, they cannot both be CONST_INT.
5204
5205    UNSIGNEDP is for the case where we have to widen the operands
5206    to perform the operation.  It says to use zero-extension.
5207
5208    NORMALIZEP is 1 if we should convert the result to be either zero
5209    or one.  Normalize is -1 if we should convert the result to be
5210    either zero or -1.  If NORMALIZEP is zero, the result will be left
5211    "raw" out of the scc insn.  */
5212
5213 rtx
5214 emit_store_flag (rtx target, enum rtx_code code, rtx op0, rtx op1,
5215                  enum machine_mode mode, int unsignedp, int normalizep)
5216 {
5217   rtx subtarget;
5218   enum insn_code icode;
5219   enum machine_mode compare_mode;
5220   enum machine_mode target_mode = GET_MODE (target);
5221   rtx tem;
5222   rtx last = get_last_insn ();
5223   rtx pattern, comparison;
5224
5225   if (unsignedp)
5226     code = unsigned_condition (code);
5227
5228   /* If one operand is constant, make it the second one.  Only do this
5229      if the other operand is not constant as well.  */
5230
5231   if (swap_commutative_operands_p (op0, op1))
5232     {
5233       tem = op0;
5234       op0 = op1;
5235       op1 = tem;
5236       code = swap_condition (code);
5237     }
5238
5239   if (mode == VOIDmode)
5240     mode = GET_MODE (op0);
5241
5242   /* For some comparisons with 1 and -1, we can convert this to
5243      comparisons with zero.  This will often produce more opportunities for
5244      store-flag insns.  */
5245
5246   switch (code)
5247     {
5248     case LT:
5249       if (op1 == const1_rtx)
5250         op1 = const0_rtx, code = LE;
5251       break;
5252     case LE:
5253       if (op1 == constm1_rtx)
5254         op1 = const0_rtx, code = LT;
5255       break;
5256     case GE:
5257       if (op1 == const1_rtx)
5258         op1 = const0_rtx, code = GT;
5259       break;
5260     case GT:
5261       if (op1 == constm1_rtx)
5262         op1 = const0_rtx, code = GE;
5263       break;
5264     case GEU:
5265       if (op1 == const1_rtx)
5266         op1 = const0_rtx, code = NE;
5267       break;
5268     case LTU:
5269       if (op1 == const1_rtx)
5270         op1 = const0_rtx, code = EQ;
5271       break;
5272     default:
5273       break;
5274     }
5275
5276   /* If we are comparing a double-word integer with zero or -1, we can
5277      convert the comparison into one involving a single word.  */
5278   if (GET_MODE_BITSIZE (mode) == BITS_PER_WORD * 2
5279       && GET_MODE_CLASS (mode) == MODE_INT
5280       && (!MEM_P (op0) || ! MEM_VOLATILE_P (op0)))
5281     {
5282       if ((code == EQ || code == NE)
5283           && (op1 == const0_rtx || op1 == constm1_rtx))
5284         {
5285           rtx op00, op01, op0both;
5286
5287           /* Do a logical OR or AND of the two words and compare the
5288              result.  */
5289           op00 = simplify_gen_subreg (word_mode, op0, mode, 0);
5290           op01 = simplify_gen_subreg (word_mode, op0, mode, UNITS_PER_WORD);
5291           op0both = expand_binop (word_mode,
5292                                   op1 == const0_rtx ? ior_optab : and_optab,
5293                                   op00, op01, NULL_RTX, unsignedp,
5294                                   OPTAB_DIRECT);
5295
5296           if (op0both != 0)
5297             return emit_store_flag (target, code, op0both, op1, word_mode,
5298                                     unsignedp, normalizep);
5299         }
5300       else if ((code == LT || code == GE) && op1 == const0_rtx)
5301         {
5302           rtx op0h;
5303
5304           /* If testing the sign bit, can just test on high word.  */
5305           op0h = simplify_gen_subreg (word_mode, op0, mode,
5306                                       subreg_highpart_offset (word_mode,
5307                                                               mode));
5308           return emit_store_flag (target, code, op0h, op1, word_mode,
5309                                   unsignedp, normalizep);
5310         }
5311     }
5312
5313   /* If this is A < 0 or A >= 0, we can do this by taking the ones
5314      complement of A (for GE) and shifting the sign bit to the low bit.  */
5315   if (op1 == const0_rtx && (code == LT || code == GE)
5316       && GET_MODE_CLASS (mode) == MODE_INT
5317       && (normalizep || STORE_FLAG_VALUE == 1
5318           || (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT
5319               && ((STORE_FLAG_VALUE & GET_MODE_MASK (mode))
5320                   == ((unsigned HOST_WIDE_INT) 1
5321                       << (GET_MODE_BITSIZE (mode) - 1))))))
5322     {
5323       subtarget = target;
5324
5325       /* If the result is to be wider than OP0, it is best to convert it
5326          first.  If it is to be narrower, it is *incorrect* to convert it
5327          first.  */
5328       if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (mode))
5329         {
5330           op0 = convert_modes (target_mode, mode, op0, 0);
5331           mode = target_mode;
5332         }
5333
5334       if (target_mode != mode)
5335         subtarget = 0;
5336
5337       if (code == GE)
5338         op0 = expand_unop (mode, one_cmpl_optab, op0,
5339                            ((STORE_FLAG_VALUE == 1 || normalizep)
5340                             ? 0 : subtarget), 0);
5341
5342       if (STORE_FLAG_VALUE == 1 || normalizep)
5343         /* If we are supposed to produce a 0/1 value, we want to do
5344            a logical shift from the sign bit to the low-order bit; for
5345            a -1/0 value, we do an arithmetic shift.  */
5346         op0 = expand_shift (RSHIFT_EXPR, mode, op0,
5347                             size_int (GET_MODE_BITSIZE (mode) - 1),
5348                             subtarget, normalizep != -1);
5349
5350       if (mode != target_mode)
5351         op0 = convert_modes (target_mode, mode, op0, 0);
5352
5353       return op0;
5354     }
5355
5356   icode = setcc_gen_code[(int) code];
5357
5358   if (icode != CODE_FOR_nothing)
5359     {
5360       insn_operand_predicate_fn pred;
5361
5362       /* We think we may be able to do this with a scc insn.  Emit the
5363          comparison and then the scc insn.  */
5364
5365       do_pending_stack_adjust ();
5366       last = get_last_insn ();
5367
5368       comparison
5369         = compare_from_rtx (op0, op1, code, unsignedp, mode, NULL_RTX);
5370       if (CONSTANT_P (comparison))
5371         {
5372           switch (GET_CODE (comparison))
5373             {
5374             case CONST_INT:
5375               if (comparison == const0_rtx)
5376                 return const0_rtx;
5377               break;
5378
5379 #ifdef FLOAT_STORE_FLAG_VALUE
5380             case CONST_DOUBLE:
5381               if (comparison == CONST0_RTX (GET_MODE (comparison)))
5382                 return const0_rtx;
5383               break;
5384 #endif
5385             default:
5386               gcc_unreachable ();
5387             }
5388
5389           if (normalizep == 1)
5390             return const1_rtx;
5391           if (normalizep == -1)
5392             return constm1_rtx;
5393           return const_true_rtx;
5394         }
5395
5396       /* The code of COMPARISON may not match CODE if compare_from_rtx
5397          decided to swap its operands and reverse the original code.
5398
5399          We know that compare_from_rtx returns either a CONST_INT or
5400          a new comparison code, so it is safe to just extract the
5401          code from COMPARISON.  */
5402       code = GET_CODE (comparison);
5403
5404       /* Get a reference to the target in the proper mode for this insn.  */
5405       compare_mode = insn_data[(int) icode].operand[0].mode;
5406       subtarget = target;
5407       pred = insn_data[(int) icode].operand[0].predicate;
5408       if (optimize || ! (*pred) (subtarget, compare_mode))
5409         subtarget = gen_reg_rtx (compare_mode);
5410
5411       pattern = GEN_FCN (icode) (subtarget);
5412       if (pattern)
5413         {
5414           emit_insn (pattern);
5415           return emit_store_flag_1 (target, subtarget, compare_mode,
5416                                     normalizep);
5417         }
5418     }
5419   else
5420     {
5421       /* We don't have an scc insn, so try a cstore insn.  */
5422
5423       for (compare_mode = mode; compare_mode != VOIDmode;
5424            compare_mode = GET_MODE_WIDER_MODE (compare_mode))
5425         {
5426           icode = cstore_optab->handlers[(int) compare_mode].insn_code;
5427           if (icode != CODE_FOR_nothing)
5428             break;
5429         }
5430
5431       if (icode != CODE_FOR_nothing)
5432         {
5433           enum machine_mode result_mode
5434             = insn_data[(int) icode].operand[0].mode;
5435           rtx cstore_op0 = op0;
5436           rtx cstore_op1 = op1;
5437
5438           do_pending_stack_adjust ();
5439           last = get_last_insn ();
5440
5441           if (compare_mode != mode)
5442             {
5443               cstore_op0 = convert_modes (compare_mode, mode, cstore_op0,
5444                                           unsignedp);
5445               cstore_op1 = convert_modes (compare_mode, mode, cstore_op1,
5446                                           unsignedp);
5447             }
5448
5449           if (!insn_data[(int) icode].operand[2].predicate (cstore_op0,
5450                                                             compare_mode))
5451             cstore_op0 = copy_to_mode_reg (compare_mode, cstore_op0);
5452
5453           if (!insn_data[(int) icode].operand[3].predicate (cstore_op1,
5454                                                             compare_mode))
5455             cstore_op1 = copy_to_mode_reg (compare_mode, cstore_op1);
5456
5457           comparison = gen_rtx_fmt_ee (code, result_mode, cstore_op0,
5458                                        cstore_op1);
5459           subtarget = target;
5460
5461           if (optimize || !(insn_data[(int) icode].operand[0].predicate
5462                             (subtarget, result_mode)))
5463             subtarget = gen_reg_rtx (result_mode);
5464
5465           pattern = GEN_FCN (icode) (subtarget, comparison, cstore_op0,
5466                                      cstore_op1);
5467
5468           if (pattern)
5469             {
5470               emit_insn (pattern);
5471               return emit_store_flag_1 (target, subtarget, result_mode,
5472                                         normalizep);
5473             }
5474         }
5475     }
5476
5477   delete_insns_since (last);
5478
5479   /* If optimizing, use different pseudo registers for each insn, instead
5480      of reusing the same pseudo.  This leads to better CSE, but slows
5481      down the compiler, since there are more pseudos */
5482   subtarget = (!optimize
5483                && (target_mode == mode)) ? target : NULL_RTX;
5484
5485   /* If we reached here, we can't do this with a scc insn.  However, there
5486      are some comparisons that can be done directly.  For example, if
5487      this is an equality comparison of integers, we can try to exclusive-or
5488      (or subtract) the two operands and use a recursive call to try the
5489      comparison with zero.  Don't do any of these cases if branches are
5490      very cheap.  */
5491
5492   if (BRANCH_COST > 0
5493       && GET_MODE_CLASS (mode) == MODE_INT && (code == EQ || code == NE)
5494       && op1 != const0_rtx)
5495     {
5496       tem = expand_binop (mode, xor_optab, op0, op1, subtarget, 1,
5497                           OPTAB_WIDEN);
5498
5499       if (tem == 0)
5500         tem = expand_binop (mode, sub_optab, op0, op1, subtarget, 1,
5501                             OPTAB_WIDEN);
5502       if (tem != 0)
5503         tem = emit_store_flag (target, code, tem, const0_rtx,
5504                                mode, unsignedp, normalizep);
5505       if (tem == 0)
5506         delete_insns_since (last);
5507       return tem;
5508     }
5509
5510   /* Some other cases we can do are EQ, NE, LE, and GT comparisons with
5511      the constant zero.  Reject all other comparisons at this point.  Only
5512      do LE and GT if branches are expensive since they are expensive on
5513      2-operand machines.  */
5514
5515   if (BRANCH_COST == 0
5516       || GET_MODE_CLASS (mode) != MODE_INT || op1 != const0_rtx
5517       || (code != EQ && code != NE
5518           && (BRANCH_COST <= 1 || (code != LE && code != GT))))
5519     return 0;
5520
5521   /* See what we need to return.  We can only return a 1, -1, or the
5522      sign bit.  */
5523
5524   if (normalizep == 0)
5525     {
5526       if (STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1)
5527         normalizep = STORE_FLAG_VALUE;
5528
5529       else if (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT
5530                && ((STORE_FLAG_VALUE & GET_MODE_MASK (mode))
5531                    == (unsigned HOST_WIDE_INT) 1 << (GET_MODE_BITSIZE (mode) - 1)))
5532         ;
5533       else
5534         return 0;
5535     }
5536
5537   /* Try to put the result of the comparison in the sign bit.  Assume we can't
5538      do the necessary operation below.  */
5539
5540   tem = 0;
5541
5542   /* To see if A <= 0, compute (A | (A - 1)).  A <= 0 iff that result has
5543      the sign bit set.  */
5544
5545   if (code == LE)
5546     {
5547       /* This is destructive, so SUBTARGET can't be OP0.  */
5548       if (rtx_equal_p (subtarget, op0))
5549         subtarget = 0;
5550
5551       tem = expand_binop (mode, sub_optab, op0, const1_rtx, subtarget, 0,
5552                           OPTAB_WIDEN);
5553       if (tem)
5554         tem = expand_binop (mode, ior_optab, op0, tem, subtarget, 0,
5555                             OPTAB_WIDEN);
5556     }
5557
5558   /* To see if A > 0, compute (((signed) A) << BITS) - A, where BITS is the
5559      number of bits in the mode of OP0, minus one.  */
5560
5561   if (code == GT)
5562     {
5563       if (rtx_equal_p (subtarget, op0))
5564         subtarget = 0;
5565
5566       tem = expand_shift (RSHIFT_EXPR, mode, op0,
5567                           size_int (GET_MODE_BITSIZE (mode) - 1),
5568                           subtarget, 0);
5569       tem = expand_binop (mode, sub_optab, tem, op0, subtarget, 0,
5570                           OPTAB_WIDEN);
5571     }
5572
5573   if (code == EQ || code == NE)
5574     {
5575       /* For EQ or NE, one way to do the comparison is to apply an operation
5576          that converts the operand into a positive number if it is nonzero
5577          or zero if it was originally zero.  Then, for EQ, we subtract 1 and
5578          for NE we negate.  This puts the result in the sign bit.  Then we
5579          normalize with a shift, if needed.
5580
5581          Two operations that can do the above actions are ABS and FFS, so try
5582          them.  If that doesn't work, and MODE is smaller than a full word,
5583          we can use zero-extension to the wider mode (an unsigned conversion)
5584          as the operation.  */
5585
5586       /* Note that ABS doesn't yield a positive number for INT_MIN, but
5587          that is compensated by the subsequent overflow when subtracting
5588          one / negating.  */
5589
5590       if (abs_optab->handlers[mode].insn_code != CODE_FOR_nothing)
5591         tem = expand_unop (mode, abs_optab, op0, subtarget, 1);
5592       else if (ffs_optab->handlers[mode].insn_code != CODE_FOR_nothing)
5593         tem = expand_unop (mode, ffs_optab, op0, subtarget, 1);
5594       else if (GET_MODE_SIZE (mode) < UNITS_PER_WORD)
5595         {
5596           tem = convert_modes (word_mode, mode, op0, 1);
5597           mode = word_mode;
5598         }
5599
5600       if (tem != 0)
5601         {
5602           if (code == EQ)
5603             tem = expand_binop (mode, sub_optab, tem, const1_rtx, subtarget,
5604                                 0, OPTAB_WIDEN);
5605           else
5606             tem = expand_unop (mode, neg_optab, tem, subtarget, 0);
5607         }
5608
5609       /* If we couldn't do it that way, for NE we can "or" the two's complement
5610          of the value with itself.  For EQ, we take the one's complement of
5611          that "or", which is an extra insn, so we only handle EQ if branches
5612          are expensive.  */
5613
5614       if (tem == 0 && (code == NE || BRANCH_COST > 1))
5615         {
5616           if (rtx_equal_p (subtarget, op0))
5617             subtarget = 0;
5618
5619           tem = expand_unop (mode, neg_optab, op0, subtarget, 0);
5620           tem = expand_binop (mode, ior_optab, tem, op0, subtarget, 0,
5621                               OPTAB_WIDEN);
5622
5623           if (tem && code == EQ)
5624             tem = expand_unop (mode, one_cmpl_optab, tem, subtarget, 0);
5625         }
5626     }
5627
5628   if (tem && normalizep)
5629     tem = expand_shift (RSHIFT_EXPR, mode, tem,
5630                         size_int (GET_MODE_BITSIZE (mode) - 1),
5631                         subtarget, normalizep == 1);
5632
5633   if (tem)
5634     {
5635       if (GET_MODE (tem) != target_mode)
5636         {
5637           convert_move (target, tem, 0);
5638           tem = target;
5639         }
5640       else if (!subtarget)
5641         {
5642           emit_move_insn (target, tem);
5643           tem = target;
5644         }
5645     }
5646   else
5647     delete_insns_since (last);
5648
5649   return tem;
5650 }
5651
5652 /* Like emit_store_flag, but always succeeds.  */
5653
5654 rtx
5655 emit_store_flag_force (rtx target, enum rtx_code code, rtx op0, rtx op1,
5656                        enum machine_mode mode, int unsignedp, int normalizep)
5657 {
5658   rtx tem, label;
5659
5660   /* First see if emit_store_flag can do the job.  */
5661   tem = emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep);
5662   if (tem != 0)
5663     return tem;
5664
5665   if (normalizep == 0)
5666     normalizep = 1;
5667
5668   /* If this failed, we have to do this with set/compare/jump/set code.  */
5669
5670   if (!REG_P (target)
5671       || reg_mentioned_p (target, op0) || reg_mentioned_p (target, op1))
5672     target = gen_reg_rtx (GET_MODE (target));
5673
5674   emit_move_insn (target, const1_rtx);
5675   label = gen_label_rtx ();
5676   do_compare_rtx_and_jump (op0, op1, code, unsignedp, mode, NULL_RTX,
5677                            NULL_RTX, label);
5678
5679   emit_move_insn (target, const0_rtx);
5680   emit_label (label);
5681
5682   return target;
5683 }
5684 \f
5685 /* Perform possibly multi-word comparison and conditional jump to LABEL
5686    if ARG1 OP ARG2 true where ARG1 and ARG2 are of mode MODE.  This is
5687    now a thin wrapper around do_compare_rtx_and_jump.  */
5688
5689 static void
5690 do_cmp_and_jump (rtx arg1, rtx arg2, enum rtx_code op, enum machine_mode mode,
5691                  rtx label)
5692 {
5693   int unsignedp = (op == LTU || op == LEU || op == GTU || op == GEU);
5694   do_compare_rtx_and_jump (arg1, arg2, op, unsignedp, mode,
5695                            NULL_RTX, NULL_RTX, label);
5696 }