gcc/expmed.c

   1 /* Medium-level subroutines: convert bit-field store and extract
   2    and shifts, multiplies and divides to rtl instructions.
   3    Copyright (C) 1987, 1988, 1989, 1992, 1993, 1994, 1995, 1996, 1997, 1998,
   4    1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007
   5    Free Software Foundation, Inc.
   6
   7 This file is part of GCC.
   8
   9 GCC is free software; you can redistribute it and/or modify it under
  10 the terms of the GNU General Public License as published by the Free
  11 Software Foundation; either version 3, or (at your option) any later
  12 version.
  13
  14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  16 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  17 for more details.
  18
  19 You should have received a copy of the GNU General Public License
  20 along with GCC; see the file COPYING3.  If not see
  21 <http://www.gnu.org/licenses/>.  */
  22
  23
  24 #include "config.h"
  25 #include "system.h"
  26 #include "coretypes.h"
  27 #include "tm.h"
  28 #include "toplev.h"
  29 #include "rtl.h"
  30 #include "tree.h"
  31 #include "tm_p.h"
  32 #include "flags.h"
  33 #include "insn-config.h"
  34 #include "expr.h"
  35 #include "optabs.h"
  36 #include "real.h"
  37 #include "recog.h"
  38 #include "langhooks.h"
  39 #include "df.h"
  40 #include "target.h"
  41
  42 static void store_fixed_bit_field (rtx, unsigned HOST_WIDE_INT,
  43                                    unsigned HOST_WIDE_INT,
  44                                    unsigned HOST_WIDE_INT, rtx);
  45 static void store_split_bit_field (rtx, unsigned HOST_WIDE_INT,
  46                                    unsigned HOST_WIDE_INT, rtx);
  47 static rtx extract_fixed_bit_field (enum machine_mode, rtx,
  48                                     unsigned HOST_WIDE_INT,
  49                                     unsigned HOST_WIDE_INT,
  50                                     unsigned HOST_WIDE_INT, rtx, int);
  51 static rtx mask_rtx (enum machine_mode, int, int, int);
  52 static rtx lshift_value (enum machine_mode, rtx, int, int);
  53 static rtx extract_split_bit_field (rtx, unsigned HOST_WIDE_INT,
  54                                     unsigned HOST_WIDE_INT, int);
  55 static void do_cmp_and_jump (rtx, rtx, enum rtx_code, enum machine_mode, rtx);
  56 static rtx expand_smod_pow2 (enum machine_mode, rtx, HOST_WIDE_INT);
  57 static rtx expand_sdiv_pow2 (enum machine_mode, rtx, HOST_WIDE_INT);
  58
  59 /* Test whether a value is zero of a power of two.  */
  60 #define EXACT_POWER_OF_2_OR_ZERO_P(x) (((x) & ((x) - 1)) == 0)
  61
  62 /* Nonzero means divides or modulus operations are relatively cheap for
  63    powers of two, so don't use branches; emit the operation instead.
  64    Usually, this will mean that the MD file will emit non-branch
  65    sequences.  */
  66
  67 static bool sdiv_pow2_cheap[NUM_MACHINE_MODES];
  68 static bool smod_pow2_cheap[NUM_MACHINE_MODES];
  69
  70 #ifndef SLOW_UNALIGNED_ACCESS
  71 #define SLOW_UNALIGNED_ACCESS(MODE, ALIGN) STRICT_ALIGNMENT
  72 #endif
  73
  74 /* For compilers that support multiple targets with different word sizes,
  75    MAX_BITS_PER_WORD contains the biggest value of BITS_PER_WORD.  An example
  76    is the H8/300(H) compiler.  */
  77
  78 #ifndef MAX_BITS_PER_WORD
  79 #define MAX_BITS_PER_WORD BITS_PER_WORD
  80 #endif
  81
  82 /* Reduce conditional compilation elsewhere.  */
  83 #ifndef HAVE_insv
  84 #define HAVE_insv       0
  85 #define CODE_FOR_insv   CODE_FOR_nothing
  86 #define gen_insv(a,b,c,d) NULL_RTX
  87 #endif
  88 #ifndef HAVE_extv
  89 #define HAVE_extv       0
  90 #define CODE_FOR_extv   CODE_FOR_nothing
  91 #define gen_extv(a,b,c,d) NULL_RTX
  92 #endif
  93 #ifndef HAVE_extzv
  94 #define HAVE_extzv      0
  95 #define CODE_FOR_extzv  CODE_FOR_nothing
  96 #define gen_extzv(a,b,c,d) NULL_RTX
  97 #endif
  98
  99 /* Cost of various pieces of RTL.  Note that some of these are indexed by
 100    shift count and some by mode.  */
 101 static int zero_cost;
 102 static int add_cost[NUM_MACHINE_MODES];
 103 static int neg_cost[NUM_MACHINE_MODES];
 104 static int shift_cost[NUM_MACHINE_MODES][MAX_BITS_PER_WORD];
 105 static int shiftadd_cost[NUM_MACHINE_MODES][MAX_BITS_PER_WORD];
 106 static int shiftsub_cost[NUM_MACHINE_MODES][MAX_BITS_PER_WORD];
 107 static int mul_cost[NUM_MACHINE_MODES];
 108 static int sdiv_cost[NUM_MACHINE_MODES];
 109 static int udiv_cost[NUM_MACHINE_MODES];
 110 static int mul_widen_cost[NUM_MACHINE_MODES];
 111 static int mul_highpart_cost[NUM_MACHINE_MODES];
 112
 113 void
 114 init_expmed (void)
 115 {
 116   struct
 117   {
 118     struct rtx_def reg;         rtunion reg_fld[2];
 119     struct rtx_def plus;        rtunion plus_fld1;
 120     struct rtx_def neg;
 121     struct rtx_def mult;        rtunion mult_fld1;
 122     struct rtx_def sdiv;        rtunion sdiv_fld1;
 123     struct rtx_def udiv;        rtunion udiv_fld1;
 124     struct rtx_def zext;
 125     struct rtx_def sdiv_32;     rtunion sdiv_32_fld1;
 126     struct rtx_def smod_32;     rtunion smod_32_fld1;
 127     struct rtx_def wide_mult;   rtunion wide_mult_fld1;
 128     struct rtx_def wide_lshr;   rtunion wide_lshr_fld1;
 129     struct rtx_def wide_trunc;
 130     struct rtx_def shift;       rtunion shift_fld1;
 131     struct rtx_def shift_mult;  rtunion shift_mult_fld1;
 132     struct rtx_def shift_add;   rtunion shift_add_fld1;
 133     struct rtx_def shift_sub;   rtunion shift_sub_fld1;
 134   } all;
 135
 136   rtx pow2[MAX_BITS_PER_WORD];
 137   rtx cint[MAX_BITS_PER_WORD];
 138   int m, n;
 139   enum machine_mode mode, wider_mode;
 140
 141   zero_cost = rtx_cost (const0_rtx, 0);
 142
 143   for (m = 1; m < MAX_BITS_PER_WORD; m++)
 144     {
 145       pow2[m] = GEN_INT ((HOST_WIDE_INT) 1 << m);
 146       cint[m] = GEN_INT (m);
 147     }
 148
 149   memset (&all, 0, sizeof all);
 150
 151   PUT_CODE (&all.reg, REG);
 152   /* Avoid using hard regs in ways which may be unsupported.  */
 153   SET_REGNO (&all.reg, LAST_VIRTUAL_REGISTER + 1);
 154
 155   PUT_CODE (&all.plus, PLUS);
 156   XEXP (&all.plus, 0) = &all.reg;
 157   XEXP (&all.plus, 1) = &all.reg;
 158
 159   PUT_CODE (&all.neg, NEG);
 160   XEXP (&all.neg, 0) = &all.reg;
 161
 162   PUT_CODE (&all.mult, MULT);
 163   XEXP (&all.mult, 0) = &all.reg;
 164   XEXP (&all.mult, 1) = &all.reg;
 165
 166   PUT_CODE (&all.sdiv, DIV);
 167   XEXP (&all.sdiv, 0) = &all.reg;
 168   XEXP (&all.sdiv, 1) = &all.reg;
 169
 170   PUT_CODE (&all.udiv, UDIV);
 171   XEXP (&all.udiv, 0) = &all.reg;
 172   XEXP (&all.udiv, 1) = &all.reg;
 173
 174   PUT_CODE (&all.sdiv_32, DIV);
 175   XEXP (&all.sdiv_32, 0) = &all.reg;
 176   XEXP (&all.sdiv_32, 1) = 32 < MAX_BITS_PER_WORD ? cint[32] : GEN_INT (32);
 177
 178   PUT_CODE (&all.smod_32, MOD);
 179   XEXP (&all.smod_32, 0) = &all.reg;
 180   XEXP (&all.smod_32, 1) = XEXP (&all.sdiv_32, 1);
 181
 182   PUT_CODE (&all.zext, ZERO_EXTEND);
 183   XEXP (&all.zext, 0) = &all.reg;
 184
 185   PUT_CODE (&all.wide_mult, MULT);
 186   XEXP (&all.wide_mult, 0) = &all.zext;
 187   XEXP (&all.wide_mult, 1) = &all.zext;
 188
 189   PUT_CODE (&all.wide_lshr, LSHIFTRT);
 190   XEXP (&all.wide_lshr, 0) = &all.wide_mult;
 191
 192   PUT_CODE (&all.wide_trunc, TRUNCATE);
 193   XEXP (&all.wide_trunc, 0) = &all.wide_lshr;
 194
 195   PUT_CODE (&all.shift, ASHIFT);
 196   XEXP (&all.shift, 0) = &all.reg;
 197
 198   PUT_CODE (&all.shift_mult, MULT);
 199   XEXP (&all.shift_mult, 0) = &all.reg;
 200
 201   PUT_CODE (&all.shift_add, PLUS);
 202   XEXP (&all.shift_add, 0) = &all.shift_mult;
 203   XEXP (&all.shift_add, 1) = &all.reg;
 204
 205   PUT_CODE (&all.shift_sub, MINUS);
 206   XEXP (&all.shift_sub, 0) = &all.shift_mult;
 207   XEXP (&all.shift_sub, 1) = &all.reg;
 208
 209   for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT);
 210        mode != VOIDmode;
 211        mode = GET_MODE_WIDER_MODE (mode))
 212     {
 213       PUT_MODE (&all.reg, mode);
 214       PUT_MODE (&all.plus, mode);
 215       PUT_MODE (&all.neg, mode);
 216       PUT_MODE (&all.mult, mode);
 217       PUT_MODE (&all.sdiv, mode);
 218       PUT_MODE (&all.udiv, mode);
 219       PUT_MODE (&all.sdiv_32, mode);
 220       PUT_MODE (&all.smod_32, mode);
 221       PUT_MODE (&all.wide_trunc, mode);
 222       PUT_MODE (&all.shift, mode);
 223       PUT_MODE (&all.shift_mult, mode);
 224       PUT_MODE (&all.shift_add, mode);
 225       PUT_MODE (&all.shift_sub, mode);
 226
 227       add_cost[mode] = rtx_cost (&all.plus, SET);
 228       neg_cost[mode] = rtx_cost (&all.neg, SET);
 229       mul_cost[mode] = rtx_cost (&all.mult, SET);
 230       sdiv_cost[mode] = rtx_cost (&all.sdiv, SET);
 231       udiv_cost[mode] = rtx_cost (&all.udiv, SET);
 232
 233       sdiv_pow2_cheap[mode] = (rtx_cost (&all.sdiv_32, SET)
 234                                <= 2 * add_cost[mode]);
 235       smod_pow2_cheap[mode] = (rtx_cost (&all.smod_32, SET)
 236                                <= 4 * add_cost[mode]);
 237
 238       wider_mode = GET_MODE_WIDER_MODE (mode);
 239       if (wider_mode != VOIDmode)
 240         {
 241           PUT_MODE (&all.zext, wider_mode);
 242           PUT_MODE (&all.wide_mult, wider_mode);
 243           PUT_MODE (&all.wide_lshr, wider_mode);
 244           XEXP (&all.wide_lshr, 1) = GEN_INT (GET_MODE_BITSIZE (mode));
 245
 246           mul_widen_cost[wider_mode] = rtx_cost (&all.wide_mult, SET);
 247           mul_highpart_cost[mode] = rtx_cost (&all.wide_trunc, SET);
 248         }
 249
 250       shift_cost[mode][0] = 0;
 251       shiftadd_cost[mode][0] = shiftsub_cost[mode][0] = add_cost[mode];
 252
 253       n = MIN (MAX_BITS_PER_WORD, GET_MODE_BITSIZE (mode));
 254       for (m = 1; m < n; m++)
 255         {
 256           XEXP (&all.shift, 1) = cint[m];
 257           XEXP (&all.shift_mult, 1) = pow2[m];
 258
 259           shift_cost[mode][m] = rtx_cost (&all.shift, SET);
 260           shiftadd_cost[mode][m] = rtx_cost (&all.shift_add, SET);
 261           shiftsub_cost[mode][m] = rtx_cost (&all.shift_sub, SET);
 262         }
 263     }
 264 }
 265
 266 /* Return an rtx representing minus the value of X.
 267    MODE is the intended mode of the result,
 268    useful if X is a CONST_INT.  */
 269
 270 rtx
 271 negate_rtx (enum machine_mode mode, rtx x)
 272 {
 273   rtx result = simplify_unary_operation (NEG, mode, x, mode);
 274
 275   if (result == 0)
 276     result = expand_unop (mode, neg_optab, x, NULL_RTX, 0);
 277
 278   return result;
 279 }
 280
 281 /* Report on the availability of insv/extv/extzv and the desired mode
 282    of each of their operands.  Returns MAX_MACHINE_MODE if HAVE_foo
 283    is false; else the mode of the specified operand.  If OPNO is -1,
 284    all the caller cares about is whether the insn is available.  */
 285 enum machine_mode
 286 mode_for_extraction (enum extraction_pattern pattern, int opno)
 287 {
 288   const struct insn_data *data;
 289
 290   switch (pattern)
 291     {
 292     case EP_insv:
 293       if (HAVE_insv)
 294         {
 295           data = &insn_data[CODE_FOR_insv];
 296           break;
 297         }
 298       return MAX_MACHINE_MODE;
 299
 300     case EP_extv:
 301       if (HAVE_extv)
 302         {
 303           data = &insn_data[CODE_FOR_extv];
 304           break;
 305         }
 306       return MAX_MACHINE_MODE;
 307
 308     case EP_extzv:
 309       if (HAVE_extzv)
 310         {
 311           data = &insn_data[CODE_FOR_extzv];
 312           break;
 313         }
 314       return MAX_MACHINE_MODE;
 315
 316     default:
 317       gcc_unreachable ();
 318     }
 319
 320   if (opno == -1)
 321     return VOIDmode;
 322
 323   /* Everyone who uses this function used to follow it with
 324      if (result == VOIDmode) result = word_mode; */
 325   if (data->operand[opno].mode == VOIDmode)
 326     return word_mode;
 327   return data->operand[opno].mode;
 328 }
 329
 330 /* Return true if X, of mode MODE, matches the predicate for operand
 331    OPNO of instruction ICODE.  Allow volatile memories, regardless of
 332    the ambient volatile_ok setting.  */
 333
 334 static bool
 335 check_predicate_volatile_ok (enum insn_code icode, int opno,
 336                              rtx x, enum machine_mode mode)
 337 {
 338   bool save_volatile_ok, result;
 339
 340   save_volatile_ok = volatile_ok;
 341   result = insn_data[(int) icode].operand[opno].predicate (x, mode);
 342   volatile_ok = save_volatile_ok;
 343   return result;
 344 }
 345 \f
 346 /* A subroutine of store_bit_field, with the same arguments.  Return true
 347    if the operation could be implemented.
 348
 349    If FALLBACK_P is true, fall back to store_fixed_bit_field if we have
 350    no other way of implementing the operation.  If FALLBACK_P is false,
 351    return false instead.  */
 352
 353 static bool
 354 store_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
 355                    unsigned HOST_WIDE_INT bitnum, enum machine_mode fieldmode,
 356                    rtx value, bool fallback_p)
 357 {
 358   unsigned int unit
 359     = (MEM_P (str_rtx)) ? BITS_PER_UNIT : BITS_PER_WORD;
 360   unsigned HOST_WIDE_INT offset, bitpos;
 361   rtx op0 = str_rtx;
 362   int byte_offset;
 363   rtx orig_value;
 364
 365   enum machine_mode op_mode = mode_for_extraction (EP_insv, 3);
 366
 367   while (GET_CODE (op0) == SUBREG)
 368     {
 369       /* The following line once was done only if WORDS_BIG_ENDIAN,
 370          but I think that is a mistake.  WORDS_BIG_ENDIAN is
 371          meaningful at a much higher level; when structures are copied
 372          between memory and regs, the higher-numbered regs
 373          always get higher addresses.  */
 374       int inner_mode_size = GET_MODE_SIZE (GET_MODE (SUBREG_REG (op0)));
 375       int outer_mode_size = GET_MODE_SIZE (GET_MODE (op0));
 376
 377       byte_offset = 0;
 378
 379       /* Paradoxical subregs need special handling on big endian machines.  */
 380       if (SUBREG_BYTE (op0) == 0 && inner_mode_size < outer_mode_size)
 381         {
 382           int difference = inner_mode_size - outer_mode_size;
 383
 384           if (WORDS_BIG_ENDIAN)
 385             byte_offset += (difference / UNITS_PER_WORD) * UNITS_PER_WORD;
 386           if (BYTES_BIG_ENDIAN)
 387             byte_offset += difference % UNITS_PER_WORD;
 388         }
 389       else
 390         byte_offset = SUBREG_BYTE (op0);
 391
 392       bitnum += byte_offset * BITS_PER_UNIT;
 393       op0 = SUBREG_REG (op0);
 394     }
 395
 396   /* No action is needed if the target is a register and if the field
 397      lies completely outside that register.  This can occur if the source
 398      code contains an out-of-bounds access to a small array.  */
 399   if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
 400     return true;
 401
 402   /* Use vec_set patterns for inserting parts of vectors whenever
 403      available.  */
 404   if (VECTOR_MODE_P (GET_MODE (op0))
 405       && !MEM_P (op0)
 406       && (optab_handler (vec_set_optab, GET_MODE (op0))->insn_code
 407           != CODE_FOR_nothing)
 408       && fieldmode == GET_MODE_INNER (GET_MODE (op0))
 409       && bitsize == GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
 410       && !(bitnum % GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
 411     {
 412       enum machine_mode outermode = GET_MODE (op0);
 413       enum machine_mode innermode = GET_MODE_INNER (outermode);
 414       int icode = (int) optab_handler (vec_set_optab, outermode)->insn_code;
 415       int pos = bitnum / GET_MODE_BITSIZE (innermode);
 416       rtx rtxpos = GEN_INT (pos);
 417       rtx src = value;
 418       rtx dest = op0;
 419       rtx pat, seq;
 420       enum machine_mode mode0 = insn_data[icode].operand[0].mode;
 421       enum machine_mode mode1 = insn_data[icode].operand[1].mode;
 422       enum machine_mode mode2 = insn_data[icode].operand[2].mode;
 423
 424       start_sequence ();
 425
 426       if (! (*insn_data[icode].operand[1].predicate) (src, mode1))
 427         src = copy_to_mode_reg (mode1, src);
 428
 429       if (! (*insn_data[icode].operand[2].predicate) (rtxpos, mode2))
 430         rtxpos = copy_to_mode_reg (mode1, rtxpos);
 431
 432       /* We could handle this, but we should always be called with a pseudo
 433          for our targets and all insns should take them as outputs.  */
 434       gcc_assert ((*insn_data[icode].operand[0].predicate) (dest, mode0)
 435                   && (*insn_data[icode].operand[1].predicate) (src, mode1)
 436                   && (*insn_data[icode].operand[2].predicate) (rtxpos, mode2));
 437       pat = GEN_FCN (icode) (dest, src, rtxpos);
 438       seq = get_insns ();
 439       end_sequence ();
 440       if (pat)
 441         {
 442           emit_insn (seq);
 443           emit_insn (pat);
 444           return true;
 445         }
 446     }
 447
 448   /* If the target is a register, overwriting the entire object, or storing
 449      a full-word or multi-word field can be done with just a SUBREG.
 450
 451      If the target is memory, storing any naturally aligned field can be
 452      done with a simple store.  For targets that support fast unaligned
 453      memory, any naturally sized, unit aligned field can be done directly.  */
 454
 455   offset = bitnum / unit;
 456   bitpos = bitnum % unit;
 457   byte_offset = (bitnum % BITS_PER_WORD) / BITS_PER_UNIT
 458                 + (offset * UNITS_PER_WORD);
 459
 460   if (bitpos == 0
 461       && bitsize == GET_MODE_BITSIZE (fieldmode)
 462       && (!MEM_P (op0)
 463           ? ((GET_MODE_SIZE (fieldmode) >= UNITS_PER_WORD
 464              || GET_MODE_SIZE (GET_MODE (op0)) == GET_MODE_SIZE (fieldmode))
 465              && byte_offset % GET_MODE_SIZE (fieldmode) == 0)
 466           : (! SLOW_UNALIGNED_ACCESS (fieldmode, MEM_ALIGN (op0))
 467              || (offset * BITS_PER_UNIT % bitsize == 0
 468                  && MEM_ALIGN (op0) % GET_MODE_BITSIZE (fieldmode) == 0))))
 469     {
 470       if (MEM_P (op0))
 471         op0 = adjust_address (op0, fieldmode, offset);
 472       else if (GET_MODE (op0) != fieldmode)
 473         op0 = simplify_gen_subreg (fieldmode, op0, GET_MODE (op0),
 474                                    byte_offset);
 475       emit_move_insn (op0, value);
 476       return true;
 477     }
 478
 479   /* Make sure we are playing with integral modes.  Pun with subregs
 480      if we aren't.  This must come after the entire register case above,
 481      since that case is valid for any mode.  The following cases are only
 482      valid for integral modes.  */
 483   {
 484     enum machine_mode imode = int_mode_for_mode (GET_MODE (op0));
 485     if (imode != GET_MODE (op0))
 486       {
 487         if (MEM_P (op0))
 488           op0 = adjust_address (op0, imode, 0);
 489         else
 490           {
 491             gcc_assert (imode != BLKmode);
 492             op0 = gen_lowpart (imode, op0);
 493           }
 494       }
 495   }
 496
 497   /* We may be accessing data outside the field, which means
 498      we can alias adjacent data.  */
 499   if (MEM_P (op0))
 500     {
 501       op0 = shallow_copy_rtx (op0);
 502       set_mem_alias_set (op0, 0);
 503       set_mem_expr (op0, 0);
 504     }
 505
 506   /* If OP0 is a register, BITPOS must count within a word.
 507      But as we have it, it counts within whatever size OP0 now has.
 508      On a bigendian machine, these are not the same, so convert.  */
 509   if (BYTES_BIG_ENDIAN
 510       && !MEM_P (op0)
 511       && unit > GET_MODE_BITSIZE (GET_MODE (op0)))
 512     bitpos += unit - GET_MODE_BITSIZE (GET_MODE (op0));
 513
 514   /* Storing an lsb-aligned field in a register
 515      can be done with a movestrict instruction.  */
 516
 517   if (!MEM_P (op0)
 518       && (BYTES_BIG_ENDIAN ? bitpos + bitsize == unit : bitpos == 0)
 519       && bitsize == GET_MODE_BITSIZE (fieldmode)
 520       && (optab_handler (movstrict_optab, fieldmode)->insn_code
 521           != CODE_FOR_nothing))
 522     {
 523       int icode = optab_handler (movstrict_optab, fieldmode)->insn_code;
 524
 525       /* Get appropriate low part of the value being stored.  */
 526       if (GET_CODE (value) == CONST_INT || REG_P (value))
 527         value = gen_lowpart (fieldmode, value);
 528       else if (!(GET_CODE (value) == SYMBOL_REF
 529                  || GET_CODE (value) == LABEL_REF
 530                  || GET_CODE (value) == CONST))
 531         value = convert_to_mode (fieldmode, value, 0);
 532
 533       if (! (*insn_data[icode].operand[1].predicate) (value, fieldmode))
 534         value = copy_to_mode_reg (fieldmode, value);
 535
 536       if (GET_CODE (op0) == SUBREG)
 537         {
 538           /* Else we've got some float mode source being extracted into
 539              a different float mode destination -- this combination of
 540              subregs results in Severe Tire Damage.  */
 541           gcc_assert (GET_MODE (SUBREG_REG (op0)) == fieldmode
 542                       || GET_MODE_CLASS (fieldmode) == MODE_INT
 543                       || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT);
 544           op0 = SUBREG_REG (op0);
 545         }
 546
 547       emit_insn (GEN_FCN (icode)
 548                  (gen_rtx_SUBREG (fieldmode, op0,
 549                                   (bitnum % BITS_PER_WORD) / BITS_PER_UNIT
 550                                   + (offset * UNITS_PER_WORD)),
 551                                   value));
 552
 553       return true;
 554     }
 555
 556   /* Handle fields bigger than a word.  */
 557
 558   if (bitsize > BITS_PER_WORD)
 559     {
 560       /* Here we transfer the words of the field
 561          in the order least significant first.
 562          This is because the most significant word is the one which may
 563          be less than full.
 564          However, only do that if the value is not BLKmode.  */
 565
 566       unsigned int backwards = WORDS_BIG_ENDIAN && fieldmode != BLKmode;
 567       unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
 568       unsigned int i;
 569       rtx last;
 570
 571       /* This is the mode we must force value to, so that there will be enough
 572          subwords to extract.  Note that fieldmode will often (always?) be
 573          VOIDmode, because that is what store_field uses to indicate that this
 574          is a bit field, but passing VOIDmode to operand_subword_force
 575          is not allowed.  */
 576       fieldmode = GET_MODE (value);
 577       if (fieldmode == VOIDmode)
 578         fieldmode = smallest_mode_for_size (nwords * BITS_PER_WORD, MODE_INT);
 579
 580       last = get_last_insn ();
 581       for (i = 0; i < nwords; i++)
 582         {
 583           /* If I is 0, use the low-order word in both field and target;
 584              if I is 1, use the next to lowest word; and so on.  */
 585           unsigned int wordnum = (backwards ? nwords - i - 1 : i);
 586           unsigned int bit_offset = (backwards
 587                                      ? MAX ((int) bitsize - ((int) i + 1)
 588                                             * BITS_PER_WORD,
 589                                             0)
 590                                      : (int) i * BITS_PER_WORD);
 591           rtx value_word = operand_subword_force (value, wordnum, fieldmode);
 592
 593           if (!store_bit_field_1 (op0, MIN (BITS_PER_WORD,
 594                                             bitsize - i * BITS_PER_WORD),
 595                                   bitnum + bit_offset, word_mode,
 596                                   value_word, fallback_p))
 597             {
 598               delete_insns_since (last);
 599               return false;
 600             }
 601         }
 602       return true;
 603     }
 604
 605   /* From here on we can assume that the field to be stored in is
 606      a full-word (whatever type that is), since it is shorter than a word.  */
 607
 608   /* OFFSET is the number of words or bytes (UNIT says which)
 609      from STR_RTX to the first word or byte containing part of the field.  */
 610
 611   if (!MEM_P (op0))
 612     {
 613       if (offset != 0
 614           || GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
 615         {
 616           if (!REG_P (op0))
 617             {
 618               /* Since this is a destination (lvalue), we can't copy
 619                  it to a pseudo.  We can remove a SUBREG that does not
 620                  change the size of the operand.  Such a SUBREG may
 621                  have been added above.  */
 622               gcc_assert (GET_CODE (op0) == SUBREG
 623                           && (GET_MODE_SIZE (GET_MODE (op0))
 624                               == GET_MODE_SIZE (GET_MODE (SUBREG_REG (op0)))));
 625               op0 = SUBREG_REG (op0);
 626             }
 627           op0 = gen_rtx_SUBREG (mode_for_size (BITS_PER_WORD, MODE_INT, 0),
 628                                 op0, (offset * UNITS_PER_WORD));
 629         }
 630       offset = 0;
 631     }
 632
 633   /* If VALUE has a floating-point or complex mode, access it as an
 634      integer of the corresponding size.  This can occur on a machine
 635      with 64 bit registers that uses SFmode for float.  It can also
 636      occur for unaligned float or complex fields.  */
 637   orig_value = value;
 638   if (GET_MODE (value) != VOIDmode
 639       && GET_MODE_CLASS (GET_MODE (value)) != MODE_INT
 640       && GET_MODE_CLASS (GET_MODE (value)) != MODE_PARTIAL_INT)
 641     {
 642       value = gen_reg_rtx (int_mode_for_mode (GET_MODE (value)));
 643       emit_move_insn (gen_lowpart (GET_MODE (orig_value), value), orig_value);
 644     }
 645
 646   /* Now OFFSET is nonzero only if OP0 is memory
 647      and is therefore always measured in bytes.  */
 648
 649   if (HAVE_insv
 650       && GET_MODE (value) != BLKmode
 651       && bitsize > 0
 652       && GET_MODE_BITSIZE (op_mode) >= bitsize
 653       && ! ((REG_P (op0) || GET_CODE (op0) == SUBREG)
 654             && (bitsize + bitpos > GET_MODE_BITSIZE (op_mode)))
 655       && insn_data[CODE_FOR_insv].operand[1].predicate (GEN_INT (bitsize),
 656                                                         VOIDmode)
 657       && check_predicate_volatile_ok (CODE_FOR_insv, 0, op0, VOIDmode))
 658     {
 659       int xbitpos = bitpos;
 660       rtx value1;
 661       rtx xop0 = op0;
 662       rtx last = get_last_insn ();
 663       rtx pat;
 664
 665       /* Add OFFSET into OP0's address.  */
 666       if (MEM_P (xop0))
 667         xop0 = adjust_address (xop0, byte_mode, offset);
 668
 669       /* If xop0 is a register, we need it in OP_MODE
 670          to make it acceptable to the format of insv.  */
 671       if (GET_CODE (xop0) == SUBREG)
 672         /* We can't just change the mode, because this might clobber op0,
 673            and we will need the original value of op0 if insv fails.  */
 674         xop0 = gen_rtx_SUBREG (op_mode, SUBREG_REG (xop0), SUBREG_BYTE (xop0));
 675       if (REG_P (xop0) && GET_MODE (xop0) != op_mode)
 676         xop0 = gen_rtx_SUBREG (op_mode, xop0, 0);
 677
 678       /* On big-endian machines, we count bits from the most significant.
 679          If the bit field insn does not, we must invert.  */
 680
 681       if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
 682         xbitpos = unit - bitsize - xbitpos;
 683
 684       /* We have been counting XBITPOS within UNIT.
 685          Count instead within the size of the register.  */
 686       if (BITS_BIG_ENDIAN && !MEM_P (xop0))
 687         xbitpos += GET_MODE_BITSIZE (op_mode) - unit;
 688
 689       unit = GET_MODE_BITSIZE (op_mode);
 690
 691       /* Convert VALUE to op_mode (which insv insn wants) in VALUE1.  */
 692       value1 = value;
 693       if (GET_MODE (value) != op_mode)
 694         {
 695           if (GET_MODE_BITSIZE (GET_MODE (value)) >= bitsize)
 696             {
 697               /* Optimization: Don't bother really extending VALUE
 698                  if it has all the bits we will actually use.  However,
 699                  if we must narrow it, be sure we do it correctly.  */
 700
 701               if (GET_MODE_SIZE (GET_MODE (value)) < GET_MODE_SIZE (op_mode))
 702                 {
 703                   rtx tmp;
 704
 705                   tmp = simplify_subreg (op_mode, value1, GET_MODE (value), 0);
 706                   if (! tmp)
 707                     tmp = simplify_gen_subreg (op_mode,
 708                                                force_reg (GET_MODE (value),
 709                                                           value1),
 710                                                GET_MODE (value), 0);
 711                   value1 = tmp;
 712                 }
 713               else
 714                 value1 = gen_lowpart (op_mode, value1);
 715             }
 716           else if (GET_CODE (value) == CONST_INT)
 717             value1 = gen_int_mode (INTVAL (value), op_mode);
 718           else
 719             /* Parse phase is supposed to make VALUE's data type
 720                match that of the component reference, which is a type
 721                at least as wide as the field; so VALUE should have
 722                a mode that corresponds to that type.  */
 723             gcc_assert (CONSTANT_P (value));
 724         }
 725
 726       /* If this machine's insv insists on a register,
 727          get VALUE1 into a register.  */
 728       if (! ((*insn_data[(int) CODE_FOR_insv].operand[3].predicate)
 729              (value1, op_mode)))
 730         value1 = force_reg (op_mode, value1);
 731
 732       pat = gen_insv (xop0, GEN_INT (bitsize), GEN_INT (xbitpos), value1);
 733       if (pat)
 734         {
 735           emit_insn (pat);
 736           return true;
 737         }
 738       delete_insns_since (last);
 739     }
 740
 741   /* If OP0 is a memory, try copying it to a register and seeing if a
 742      cheap register alternative is available.  */
 743   if (HAVE_insv && MEM_P (op0))
 744     {
 745       enum machine_mode bestmode;
 746
 747       /* Get the mode to use for inserting into this field.  If OP0 is
 748          BLKmode, get the smallest mode consistent with the alignment. If
 749          OP0 is a non-BLKmode object that is no wider than OP_MODE, use its
 750          mode. Otherwise, use the smallest mode containing the field.  */
 751
 752       if (GET_MODE (op0) == BLKmode
 753           || (op_mode != MAX_MACHINE_MODE
 754               && GET_MODE_SIZE (GET_MODE (op0)) > GET_MODE_SIZE (op_mode)))
 755         bestmode = get_best_mode (bitsize, bitnum, MEM_ALIGN (op0),
 756                                   (op_mode == MAX_MACHINE_MODE
 757                                    ? VOIDmode : op_mode),
 758                                   MEM_VOLATILE_P (op0));
 759       else
 760         bestmode = GET_MODE (op0);
 761
 762       if (bestmode != VOIDmode
 763           && GET_MODE_SIZE (bestmode) >= GET_MODE_SIZE (fieldmode)
 764           && !(SLOW_UNALIGNED_ACCESS (bestmode, MEM_ALIGN (op0))
 765                && GET_MODE_BITSIZE (bestmode) > MEM_ALIGN (op0)))
 766         {
 767           rtx last, tempreg, xop0;
 768           unsigned HOST_WIDE_INT xoffset, xbitpos;
 769
 770           last = get_last_insn ();
 771
 772           /* Adjust address to point to the containing unit of
 773              that mode.  Compute the offset as a multiple of this unit,
 774              counting in bytes.  */
 775           unit = GET_MODE_BITSIZE (bestmode);
 776           xoffset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
 777           xbitpos = bitnum % unit;
 778           xop0 = adjust_address (op0, bestmode, xoffset);
 779
 780           /* Fetch that unit, store the bitfield in it, then store
 781              the unit.  */
 782           tempreg = copy_to_reg (xop0);
 783           if (store_bit_field_1 (tempreg, bitsize, xbitpos,
 784                                  fieldmode, orig_value, false))
 785             {
 786               emit_move_insn (xop0, tempreg);
 787               return true;
 788             }
 789           delete_insns_since (last);
 790         }
 791     }
 792
 793   if (!fallback_p)
 794     return false;
 795
 796   store_fixed_bit_field (op0, offset, bitsize, bitpos, value);
 797   return true;
 798 }
 799
 800 /* Generate code to store value from rtx VALUE
 801    into a bit-field within structure STR_RTX
 802    containing BITSIZE bits starting at bit BITNUM.
 803    FIELDMODE is the machine-mode of the FIELD_DECL node for this field.  */
 804
 805 void
 806 store_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
 807                  unsigned HOST_WIDE_INT bitnum, enum machine_mode fieldmode,
 808                  rtx value)
 809 {
 810   if (!store_bit_field_1 (str_rtx, bitsize, bitnum, fieldmode, value, true))
 811     gcc_unreachable ();
 812 }
 813 \f
 814 /* Use shifts and boolean operations to store VALUE
 815    into a bit field of width BITSIZE
 816    in a memory location specified by OP0 except offset by OFFSET bytes.
 817      (OFFSET must be 0 if OP0 is a register.)
 818    The field starts at position BITPOS within the byte.
 819     (If OP0 is a register, it may be a full word or a narrower mode,
 820      but BITPOS still counts within a full word,
 821      which is significant on bigendian machines.)  */
 822
 823 static void
 824 store_fixed_bit_field (rtx op0, unsigned HOST_WIDE_INT offset,
 825                        unsigned HOST_WIDE_INT bitsize,
 826                        unsigned HOST_WIDE_INT bitpos, rtx value)
 827 {
 828   enum machine_mode mode;
 829   unsigned int total_bits = BITS_PER_WORD;
 830   rtx temp;
 831   int all_zero = 0;
 832   int all_one = 0;
 833
 834   /* There is a case not handled here:
 835      a structure with a known alignment of just a halfword
 836      and a field split across two aligned halfwords within the structure.
 837      Or likewise a structure with a known alignment of just a byte
 838      and a field split across two bytes.
 839      Such cases are not supposed to be able to occur.  */
 840
 841   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
 842     {
 843       gcc_assert (!offset);
 844       /* Special treatment for a bit field split across two registers.  */
 845       if (bitsize + bitpos > BITS_PER_WORD)
 846         {
 847           store_split_bit_field (op0, bitsize, bitpos, value);
 848           return;
 849         }
 850     }
 851   else
 852     {
 853       /* Get the proper mode to use for this field.  We want a mode that
 854          includes the entire field.  If such a mode would be larger than
 855          a word, we won't be doing the extraction the normal way.
 856          We don't want a mode bigger than the destination.  */
 857
 858       mode = GET_MODE (op0);
 859       if (GET_MODE_BITSIZE (mode) == 0
 860           || GET_MODE_BITSIZE (mode) > GET_MODE_BITSIZE (word_mode))
 861         mode = word_mode;
 862       mode = get_best_mode (bitsize, bitpos + offset * BITS_PER_UNIT,
 863                             MEM_ALIGN (op0), mode, MEM_VOLATILE_P (op0));
 864
 865       if (mode == VOIDmode)
 866         {
 867           /* The only way this should occur is if the field spans word
 868              boundaries.  */
 869           store_split_bit_field (op0, bitsize, bitpos + offset * BITS_PER_UNIT,
 870                                  value);
 871           return;
 872         }
 873
 874       total_bits = GET_MODE_BITSIZE (mode);
 875
 876       /* Make sure bitpos is valid for the chosen mode.  Adjust BITPOS to
 877          be in the range 0 to total_bits-1, and put any excess bytes in
 878          OFFSET.  */
 879       if (bitpos >= total_bits)
 880         {
 881           offset += (bitpos / total_bits) * (total_bits / BITS_PER_UNIT);
 882           bitpos -= ((bitpos / total_bits) * (total_bits / BITS_PER_UNIT)
 883                      * BITS_PER_UNIT);
 884         }
 885
 886       /* Get ref to an aligned byte, halfword, or word containing the field.
 887          Adjust BITPOS to be position within a word,
 888          and OFFSET to be the offset of that word.
 889          Then alter OP0 to refer to that word.  */
 890       bitpos += (offset % (total_bits / BITS_PER_UNIT)) * BITS_PER_UNIT;
 891       offset -= (offset % (total_bits / BITS_PER_UNIT));
 892       op0 = adjust_address (op0, mode, offset);
 893     }
 894
 895   mode = GET_MODE (op0);
 896
 897   /* Now MODE is either some integral mode for a MEM as OP0,
 898      or is a full-word for a REG as OP0.  TOTAL_BITS corresponds.
 899      The bit field is contained entirely within OP0.
 900      BITPOS is the starting bit number within OP0.
 901      (OP0's mode may actually be narrower than MODE.)  */
 902
 903   if (BYTES_BIG_ENDIAN)
 904       /* BITPOS is the distance between our msb
 905          and that of the containing datum.
 906          Convert it to the distance from the lsb.  */
 907       bitpos = total_bits - bitsize - bitpos;
 908
 909   /* Now BITPOS is always the distance between our lsb
 910      and that of OP0.  */
 911
 912   /* Shift VALUE left by BITPOS bits.  If VALUE is not constant,
 913      we must first convert its mode to MODE.  */
 914
 915   if (GET_CODE (value) == CONST_INT)
 916     {
 917       HOST_WIDE_INT v = INTVAL (value);
 918
 919       if (bitsize < HOST_BITS_PER_WIDE_INT)
 920         v &= ((HOST_WIDE_INT) 1 << bitsize) - 1;
 921
 922       if (v == 0)
 923         all_zero = 1;
 924       else if ((bitsize < HOST_BITS_PER_WIDE_INT
 925                 && v == ((HOST_WIDE_INT) 1 << bitsize) - 1)
 926                || (bitsize == HOST_BITS_PER_WIDE_INT && v == -1))
 927         all_one = 1;
 928
 929       value = lshift_value (mode, value, bitpos, bitsize);
 930     }
 931   else
 932     {
 933       int must_and = (GET_MODE_BITSIZE (GET_MODE (value)) != bitsize
 934                       && bitpos + bitsize != GET_MODE_BITSIZE (mode));
 935
 936       if (GET_MODE (value) != mode)
 937         {
 938           if ((REG_P (value) || GET_CODE (value) == SUBREG)
 939               && GET_MODE_SIZE (mode) < GET_MODE_SIZE (GET_MODE (value)))
 940             value = gen_lowpart (mode, value);
 941           else
 942             value = convert_to_mode (mode, value, 1);
 943         }
 944
 945       if (must_and)
 946         value = expand_binop (mode, and_optab, value,
 947                               mask_rtx (mode, 0, bitsize, 0),
 948                               NULL_RTX, 1, OPTAB_LIB_WIDEN);
 949       if (bitpos > 0)
 950         value = expand_shift (LSHIFT_EXPR, mode, value,
 951                               build_int_cst (NULL_TREE, bitpos), NULL_RTX, 1);
 952     }
 953
 954   /* Now clear the chosen bits in OP0,
 955      except that if VALUE is -1 we need not bother.  */
 956   /* We keep the intermediates in registers to allow CSE to combine
 957      consecutive bitfield assignments.  */
 958
 959   temp = force_reg (mode, op0);
 960
 961   if (! all_one)
 962     {
 963       temp = expand_binop (mode, and_optab, temp,
 964                            mask_rtx (mode, bitpos, bitsize, 1),
 965                            NULL_RTX, 1, OPTAB_LIB_WIDEN);
 966       temp = force_reg (mode, temp);
 967     }
 968
 969   /* Now logical-or VALUE into OP0, unless it is zero.  */
 970
 971   if (! all_zero)
 972     {
 973       temp = expand_binop (mode, ior_optab, temp, value,
 974                            NULL_RTX, 1, OPTAB_LIB_WIDEN);
 975       temp = force_reg (mode, temp);
 976     }
 977
 978   if (op0 != temp)
 979     emit_move_insn (op0, temp);
 980 }
 981 \f
 982 /* Store a bit field that is split across multiple accessible memory objects.
 983
 984    OP0 is the REG, SUBREG or MEM rtx for the first of the objects.
 985    BITSIZE is the field width; BITPOS the position of its first bit
 986    (within the word).
 987    VALUE is the value to store.
 988
 989    This does not yet handle fields wider than BITS_PER_WORD.  */
 990
 991 static void
 992 store_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
 993                        unsigned HOST_WIDE_INT bitpos, rtx value)
 994 {
 995   unsigned int unit;
 996   unsigned int bitsdone = 0;
 997
 998   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
 999      much at a time.  */
1000   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
1001     unit = BITS_PER_WORD;
1002   else
1003     unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1004
1005   /* If VALUE is a constant other than a CONST_INT, get it into a register in
1006      WORD_MODE.  If we can do this using gen_lowpart_common, do so.  Note
1007      that VALUE might be a floating-point constant.  */
1008   if (CONSTANT_P (value) && GET_CODE (value) != CONST_INT)
1009     {
1010       rtx word = gen_lowpart_common (word_mode, value);
1011
1012       if (word && (value != word))
1013         value = word;
1014       else
1015         value = gen_lowpart_common (word_mode,
1016                                     force_reg (GET_MODE (value) != VOIDmode
1017                                                ? GET_MODE (value)
1018                                                : word_mode, value));
1019     }
1020
1021   while (bitsdone < bitsize)
1022     {
1023       unsigned HOST_WIDE_INT thissize;
1024       rtx part, word;
1025       unsigned HOST_WIDE_INT thispos;
1026       unsigned HOST_WIDE_INT offset;
1027
1028       offset = (bitpos + bitsdone) / unit;
1029       thispos = (bitpos + bitsdone) % unit;
1030
1031       /* THISSIZE must not overrun a word boundary.  Otherwise,
1032          store_fixed_bit_field will call us again, and we will mutually
1033          recurse forever.  */
1034       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1035       thissize = MIN (thissize, unit - thispos);
1036
1037       if (BYTES_BIG_ENDIAN)
1038         {
1039           int total_bits;
1040
1041           /* We must do an endian conversion exactly the same way as it is
1042              done in extract_bit_field, so that the two calls to
1043              extract_fixed_bit_field will have comparable arguments.  */
1044           if (!MEM_P (value) || GET_MODE (value) == BLKmode)
1045             total_bits = BITS_PER_WORD;
1046           else
1047             total_bits = GET_MODE_BITSIZE (GET_MODE (value));
1048
1049           /* Fetch successively less significant portions.  */
1050           if (GET_CODE (value) == CONST_INT)
1051             part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1052                              >> (bitsize - bitsdone - thissize))
1053                             & (((HOST_WIDE_INT) 1 << thissize) - 1));
1054           else
1055             /* The args are chosen so that the last part includes the
1056                lsb.  Give extract_bit_field the value it needs (with
1057                endianness compensation) to fetch the piece we want.  */
1058             part = extract_fixed_bit_field (word_mode, value, 0, thissize,
1059                                             total_bits - bitsize + bitsdone,
1060                                             NULL_RTX, 1);
1061         }
1062       else
1063         {
1064           /* Fetch successively more significant portions.  */
1065           if (GET_CODE (value) == CONST_INT)
1066             part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1067                              >> bitsdone)
1068                             & (((HOST_WIDE_INT) 1 << thissize) - 1));
1069           else
1070             part = extract_fixed_bit_field (word_mode, value, 0, thissize,
1071                                             bitsdone, NULL_RTX, 1);
1072         }
1073
1074       /* If OP0 is a register, then handle OFFSET here.
1075
1076          When handling multiword bitfields, extract_bit_field may pass
1077          down a word_mode SUBREG of a larger REG for a bitfield that actually
1078          crosses a word boundary.  Thus, for a SUBREG, we must find
1079          the current word starting from the base register.  */
1080       if (GET_CODE (op0) == SUBREG)
1081         {
1082           int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset;
1083           word = operand_subword_force (SUBREG_REG (op0), word_offset,
1084                                         GET_MODE (SUBREG_REG (op0)));
1085           offset = 0;
1086         }
1087       else if (REG_P (op0))
1088         {
1089           word = operand_subword_force (op0, offset, GET_MODE (op0));
1090           offset = 0;
1091         }
1092       else
1093         word = op0;
1094
1095       /* OFFSET is in UNITs, and UNIT is in bits.
1096          store_fixed_bit_field wants offset in bytes.  */
1097       store_fixed_bit_field (word, offset * unit / BITS_PER_UNIT, thissize,
1098                              thispos, part);
1099       bitsdone += thissize;
1100     }
1101 }
1102 \f
1103 /* A subroutine of extract_bit_field_1 that converts return value X
1104    to either MODE or TMODE.  MODE, TMODE and UNSIGNEDP are arguments
1105    to extract_bit_field.  */
1106
1107 static rtx
1108 convert_extracted_bit_field (rtx x, enum machine_mode mode,
1109                              enum machine_mode tmode, bool unsignedp)
1110 {
1111   if (GET_MODE (x) == tmode || GET_MODE (x) == mode)
1112     return x;
1113
1114   /* If the x mode is not a scalar integral, first convert to the
1115      integer mode of that size and then access it as a floating-point
1116      value via a SUBREG.  */
1117   if (!SCALAR_INT_MODE_P (tmode))
1118     {
1119       enum machine_mode smode;
1120
1121       smode = mode_for_size (GET_MODE_BITSIZE (tmode), MODE_INT, 0);
1122       x = convert_to_mode (smode, x, unsignedp);
1123       x = force_reg (smode, x);
1124       return gen_lowpart (tmode, x);
1125     }
1126
1127   return convert_to_mode (tmode, x, unsignedp);
1128 }
1129
1130 /* A subroutine of extract_bit_field, with the same arguments.
1131    If FALLBACK_P is true, fall back to extract_fixed_bit_field
1132    if we can find no other means of implementing the operation.
1133    if FALLBACK_P is false, return NULL instead.  */
1134
1135 static rtx
1136 extract_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1137                      unsigned HOST_WIDE_INT bitnum, int unsignedp, rtx target,
1138                      enum machine_mode mode, enum machine_mode tmode,
1139                      bool fallback_p)
1140 {
1141   unsigned int unit
1142     = (MEM_P (str_rtx)) ? BITS_PER_UNIT : BITS_PER_WORD;
1143   unsigned HOST_WIDE_INT offset, bitpos;
1144   rtx op0 = str_rtx;
1145   enum machine_mode int_mode;
1146   enum machine_mode ext_mode;
1147   enum machine_mode mode1;
1148   enum insn_code icode;
1149   int byte_offset;
1150
1151   if (tmode == VOIDmode)
1152     tmode = mode;
1153
1154   while (GET_CODE (op0) == SUBREG)
1155     {
1156       bitnum += SUBREG_BYTE (op0) * BITS_PER_UNIT;
1157       op0 = SUBREG_REG (op0);
1158     }
1159
1160   /* If we have an out-of-bounds access to a register, just return an
1161      uninitialized register of the required mode.  This can occur if the
1162      source code contains an out-of-bounds access to a small array.  */
1163   if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
1164     return gen_reg_rtx (tmode);
1165
1166   if (REG_P (op0)
1167       && mode == GET_MODE (op0)
1168       && bitnum == 0
1169       && bitsize == GET_MODE_BITSIZE (GET_MODE (op0)))
1170     {
1171       /* We're trying to extract a full register from itself.  */
1172       return op0;
1173     }
1174
1175   /* See if we can get a better vector mode before extracting.  */
1176   if (VECTOR_MODE_P (GET_MODE (op0))
1177       && !MEM_P (op0)
1178       && GET_MODE_INNER (GET_MODE (op0)) != tmode)
1179     {
1180       enum machine_mode new_mode;
1181       int nunits = GET_MODE_NUNITS (GET_MODE (op0));
1182
1183       if (GET_MODE_CLASS (tmode) == MODE_FLOAT)
1184         new_mode = MIN_MODE_VECTOR_FLOAT;
1185       else if (GET_MODE_CLASS (tmode) == MODE_FRACT)
1186         new_mode = MIN_MODE_VECTOR_FRACT;
1187       else if (GET_MODE_CLASS (tmode) == MODE_UFRACT)
1188         new_mode = MIN_MODE_VECTOR_UFRACT;
1189       else if (GET_MODE_CLASS (tmode) == MODE_ACCUM)
1190         new_mode = MIN_MODE_VECTOR_ACCUM;
1191       else if (GET_MODE_CLASS (tmode) == MODE_UACCUM)
1192         new_mode = MIN_MODE_VECTOR_UACCUM;
1193       else
1194         new_mode = MIN_MODE_VECTOR_INT;
1195
1196       for (; new_mode != VOIDmode ; new_mode = GET_MODE_WIDER_MODE (new_mode))
1197         if (GET_MODE_NUNITS (new_mode) == nunits
1198             && GET_MODE_INNER (new_mode) == tmode
1199             && targetm.vector_mode_supported_p (new_mode))
1200           break;
1201       if (new_mode != VOIDmode)
1202         op0 = gen_lowpart (new_mode, op0);
1203     }
1204
1205   /* Use vec_extract patterns for extracting parts of vectors whenever
1206      available.  */
1207   if (VECTOR_MODE_P (GET_MODE (op0))
1208       && !MEM_P (op0)
1209       && (optab_handler (vec_extract_optab, GET_MODE (op0))->insn_code
1210           != CODE_FOR_nothing)
1211       && ((bitnum + bitsize - 1) / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
1212           == bitnum / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
1213     {
1214       enum machine_mode outermode = GET_MODE (op0);
1215       enum machine_mode innermode = GET_MODE_INNER (outermode);
1216       int icode = (int) optab_handler (vec_extract_optab, outermode)->insn_code;
1217       unsigned HOST_WIDE_INT pos = bitnum / GET_MODE_BITSIZE (innermode);
1218       rtx rtxpos = GEN_INT (pos);
1219       rtx src = op0;
1220       rtx dest = NULL, pat, seq;
1221       enum machine_mode mode0 = insn_data[icode].operand[0].mode;
1222       enum machine_mode mode1 = insn_data[icode].operand[1].mode;
1223       enum machine_mode mode2 = insn_data[icode].operand[2].mode;
1224
1225       if (innermode == tmode || innermode == mode)
1226         dest = target;
1227
1228       if (!dest)
1229         dest = gen_reg_rtx (innermode);
1230
1231       start_sequence ();
1232
1233       if (! (*insn_data[icode].operand[0].predicate) (dest, mode0))
1234         dest = copy_to_mode_reg (mode0, dest);
1235
1236       if (! (*insn_data[icode].operand[1].predicate) (src, mode1))
1237         src = copy_to_mode_reg (mode1, src);
1238
1239       if (! (*insn_data[icode].operand[2].predicate) (rtxpos, mode2))
1240         rtxpos = copy_to_mode_reg (mode1, rtxpos);
1241
1242       /* We could handle this, but we should always be called with a pseudo
1243          for our targets and all insns should take them as outputs.  */
1244       gcc_assert ((*insn_data[icode].operand[0].predicate) (dest, mode0)
1245                   && (*insn_data[icode].operand[1].predicate) (src, mode1)
1246                   && (*insn_data[icode].operand[2].predicate) (rtxpos, mode2));
1247
1248       pat = GEN_FCN (icode) (dest, src, rtxpos);
1249       seq = get_insns ();
1250       end_sequence ();
1251       if (pat)
1252         {
1253           emit_insn (seq);
1254           emit_insn (pat);
1255           if (mode0 != mode)
1256             return gen_lowpart (tmode, dest);
1257           return dest;
1258         }
1259     }
1260
1261   /* Make sure we are playing with integral modes.  Pun with subregs
1262      if we aren't.  */
1263   {
1264     enum machine_mode imode = int_mode_for_mode (GET_MODE (op0));
1265     if (imode != GET_MODE (op0))
1266       {
1267         if (MEM_P (op0))
1268           op0 = adjust_address (op0, imode, 0);
1269         else
1270           {
1271             gcc_assert (imode != BLKmode);
1272             op0 = gen_lowpart (imode, op0);
1273
1274             /* If we got a SUBREG, force it into a register since we
1275                aren't going to be able to do another SUBREG on it.  */
1276             if (GET_CODE (op0) == SUBREG)
1277               op0 = force_reg (imode, op0);
1278           }
1279       }
1280   }
1281
1282   /* We may be accessing data outside the field, which means
1283      we can alias adjacent data.  */
1284   if (MEM_P (op0))
1285     {
1286       op0 = shallow_copy_rtx (op0);
1287       set_mem_alias_set (op0, 0);
1288       set_mem_expr (op0, 0);
1289     }
1290
1291   /* Extraction of a full-word or multi-word value from a structure
1292      in a register or aligned memory can be done with just a SUBREG.
1293      A subword value in the least significant part of a register
1294      can also be extracted with a SUBREG.  For this, we need the
1295      byte offset of the value in op0.  */
1296
1297   bitpos = bitnum % unit;
1298   offset = bitnum / unit;
1299   byte_offset = bitpos / BITS_PER_UNIT + offset * UNITS_PER_WORD;
1300
1301   /* If OP0 is a register, BITPOS must count within a word.
1302      But as we have it, it counts within whatever size OP0 now has.
1303      On a bigendian machine, these are not the same, so convert.  */
1304   if (BYTES_BIG_ENDIAN
1305       && !MEM_P (op0)
1306       && unit > GET_MODE_BITSIZE (GET_MODE (op0)))
1307     bitpos += unit - GET_MODE_BITSIZE (GET_MODE (op0));
1308
1309   /* ??? We currently assume TARGET is at least as big as BITSIZE.
1310      If that's wrong, the solution is to test for it and set TARGET to 0
1311      if needed.  */
1312
1313   /* Only scalar integer modes can be converted via subregs.  There is an
1314      additional problem for FP modes here in that they can have a precision
1315      which is different from the size.  mode_for_size uses precision, but
1316      we want a mode based on the size, so we must avoid calling it for FP
1317      modes.  */
1318   mode1  = (SCALAR_INT_MODE_P (tmode)
1319             ? mode_for_size (bitsize, GET_MODE_CLASS (tmode), 0)
1320             : mode);
1321
1322   if (((bitsize >= BITS_PER_WORD && bitsize == GET_MODE_BITSIZE (mode)
1323         && bitpos % BITS_PER_WORD == 0)
1324        || (mode1 != BLKmode
1325            /* ??? The big endian test here is wrong.  This is correct
1326               if the value is in a register, and if mode_for_size is not
1327               the same mode as op0.  This causes us to get unnecessarily
1328               inefficient code from the Thumb port when -mbig-endian.  */
1329            && (BYTES_BIG_ENDIAN
1330                ? bitpos + bitsize == BITS_PER_WORD
1331                : bitpos == 0)))
1332       && ((!MEM_P (op0)
1333            && TRULY_NOOP_TRUNCATION (GET_MODE_BITSIZE (mode),
1334                                      GET_MODE_BITSIZE (GET_MODE (op0)))
1335            && GET_MODE_SIZE (mode1) != 0
1336            && byte_offset % GET_MODE_SIZE (mode1) == 0)
1337           || (MEM_P (op0)
1338               && (! SLOW_UNALIGNED_ACCESS (mode, MEM_ALIGN (op0))
1339                   || (offset * BITS_PER_UNIT % bitsize == 0
1340                       && MEM_ALIGN (op0) % bitsize == 0)))))
1341     {
1342       if (MEM_P (op0))
1343         op0 = adjust_address (op0, mode1, offset);
1344       else if (mode1 != GET_MODE (op0))
1345         {
1346           rtx sub = simplify_gen_subreg (mode1, op0, GET_MODE (op0),
1347                                          byte_offset);
1348           if (sub == NULL)
1349             goto no_subreg_mode_swap;
1350           op0 = sub;
1351         }
1352       if (mode1 != mode)
1353         return convert_to_mode (tmode, op0, unsignedp);
1354       return op0;
1355     }
1356  no_subreg_mode_swap:
1357
1358   /* Handle fields bigger than a word.  */
1359
1360   if (bitsize > BITS_PER_WORD)
1361     {
1362       /* Here we transfer the words of the field
1363          in the order least significant first.
1364          This is because the most significant word is the one which may
1365          be less than full.  */
1366
1367       unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
1368       unsigned int i;
1369
1370       if (target == 0 || !REG_P (target))
1371         target = gen_reg_rtx (mode);
1372
1373       /* Indicate for flow that the entire target reg is being set.  */
1374       emit_insn (gen_rtx_CLOBBER (VOIDmode, target));
1375
1376       for (i = 0; i < nwords; i++)
1377         {
1378           /* If I is 0, use the low-order word in both field and target;
1379              if I is 1, use the next to lowest word; and so on.  */
1380           /* Word number in TARGET to use.  */
1381           unsigned int wordnum
1382             = (WORDS_BIG_ENDIAN
1383                ? GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD - i - 1
1384                : i);
1385           /* Offset from start of field in OP0.  */
1386           unsigned int bit_offset = (WORDS_BIG_ENDIAN
1387                                      ? MAX (0, ((int) bitsize - ((int) i + 1)
1388                                                 * (int) BITS_PER_WORD))
1389                                      : (int) i * BITS_PER_WORD);
1390           rtx target_part = operand_subword (target, wordnum, 1, VOIDmode);
1391           rtx result_part
1392             = extract_bit_field (op0, MIN (BITS_PER_WORD,
1393                                            bitsize - i * BITS_PER_WORD),
1394                                  bitnum + bit_offset, 1, target_part, mode,
1395                                  word_mode);
1396
1397           gcc_assert (target_part);
1398
1399           if (result_part != target_part)
1400             emit_move_insn (target_part, result_part);
1401         }
1402
1403       if (unsignedp)
1404         {
1405           /* Unless we've filled TARGET, the upper regs in a multi-reg value
1406              need to be zero'd out.  */
1407           if (GET_MODE_SIZE (GET_MODE (target)) > nwords * UNITS_PER_WORD)
1408             {
1409               unsigned int i, total_words;
1410
1411               total_words = GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD;
1412               for (i = nwords; i < total_words; i++)
1413                 emit_move_insn
1414                   (operand_subword (target,
1415                                     WORDS_BIG_ENDIAN ? total_words - i - 1 : i,
1416                                     1, VOIDmode),
1417                    const0_rtx);
1418             }
1419           return target;
1420         }
1421
1422       /* Signed bit field: sign-extend with two arithmetic shifts.  */
1423       target = expand_shift (LSHIFT_EXPR, mode, target,
1424                              build_int_cst (NULL_TREE,
1425                                             GET_MODE_BITSIZE (mode) - bitsize),
1426                              NULL_RTX, 0);
1427       return expand_shift (RSHIFT_EXPR, mode, target,
1428                            build_int_cst (NULL_TREE,
1429                                           GET_MODE_BITSIZE (mode) - bitsize),
1430                            NULL_RTX, 0);
1431     }
1432
1433   /* From here on we know the desired field is smaller than a word.  */
1434
1435   /* Check if there is a correspondingly-sized integer field, so we can
1436      safely extract it as one size of integer, if necessary; then
1437      truncate or extend to the size that is wanted; then use SUBREGs or
1438      convert_to_mode to get one of the modes we really wanted.  */
1439
1440   int_mode = int_mode_for_mode (tmode);
1441   if (int_mode == BLKmode)
1442     int_mode = int_mode_for_mode (mode);
1443   /* Should probably push op0 out to memory and then do a load.  */
1444   gcc_assert (int_mode != BLKmode);
1445
1446   /* OFFSET is the number of words or bytes (UNIT says which)
1447      from STR_RTX to the first word or byte containing part of the field.  */
1448   if (!MEM_P (op0))
1449     {
1450       if (offset != 0
1451           || GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
1452         {
1453           if (!REG_P (op0))
1454             op0 = copy_to_reg (op0);
1455           op0 = gen_rtx_SUBREG (mode_for_size (BITS_PER_WORD, MODE_INT, 0),
1456                                 op0, (offset * UNITS_PER_WORD));
1457         }
1458       offset = 0;
1459     }
1460
1461   /* Now OFFSET is nonzero only for memory operands.  */
1462   ext_mode = mode_for_extraction (unsignedp ? EP_extzv : EP_extv, 0);
1463   icode = unsignedp ? CODE_FOR_extzv : CODE_FOR_extv;
1464   if (ext_mode != MAX_MACHINE_MODE
1465       && bitsize > 0
1466       && GET_MODE_BITSIZE (ext_mode) >= bitsize
1467       /* If op0 is a register, we need it in EXT_MODE to make it
1468          acceptable to the format of ext(z)v.  */
1469       && !(GET_CODE (op0) == SUBREG && GET_MODE (op0) != ext_mode)
1470       && !((REG_P (op0) || GET_CODE (op0) == SUBREG)
1471            && (bitsize + bitpos > GET_MODE_BITSIZE (ext_mode)))
1472       && check_predicate_volatile_ok (icode, 1, op0, GET_MODE (op0)))
1473     {
1474       unsigned HOST_WIDE_INT xbitpos = bitpos, xoffset = offset;
1475       rtx bitsize_rtx, bitpos_rtx;
1476       rtx last = get_last_insn ();
1477       rtx xop0 = op0;
1478       rtx xtarget = target;
1479       rtx xspec_target = target;
1480       rtx xspec_target_subreg = 0;
1481       rtx pat;
1482
1483       /* If op0 is a register, we need it in EXT_MODE to make it
1484          acceptable to the format of ext(z)v.  */
1485       if (REG_P (xop0) && GET_MODE (xop0) != ext_mode)
1486         xop0 = gen_rtx_SUBREG (ext_mode, xop0, 0);
1487       if (MEM_P (xop0))
1488         /* Get ref to first byte containing part of the field.  */
1489         xop0 = adjust_address (xop0, byte_mode, xoffset);
1490
1491       /* On big-endian machines, we count bits from the most significant.
1492          If the bit field insn does not, we must invert.  */
1493       if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1494         xbitpos = unit - bitsize - xbitpos;
1495
1496       /* Now convert from counting within UNIT to counting in EXT_MODE.  */
1497       if (BITS_BIG_ENDIAN && !MEM_P (xop0))
1498         xbitpos += GET_MODE_BITSIZE (ext_mode) - unit;
1499
1500       unit = GET_MODE_BITSIZE (ext_mode);
1501
1502       if (xtarget == 0)
1503         xtarget = xspec_target = gen_reg_rtx (tmode);
1504
1505       if (GET_MODE (xtarget) != ext_mode)
1506         {
1507           if (REG_P (xtarget))
1508             {
1509               xtarget = gen_lowpart (ext_mode, xtarget);
1510               if (GET_MODE_SIZE (ext_mode)
1511                   > GET_MODE_SIZE (GET_MODE (xspec_target)))
1512                 xspec_target_subreg = xtarget;
1513             }
1514           else
1515             xtarget = gen_reg_rtx (ext_mode);
1516         }
1517
1518       /* If this machine's ext(z)v insists on a register target,
1519          make sure we have one.  */
1520       if (!insn_data[(int) icode].operand[0].predicate (xtarget, ext_mode))
1521         xtarget = gen_reg_rtx (ext_mode);
1522
1523       bitsize_rtx = GEN_INT (bitsize);
1524       bitpos_rtx = GEN_INT (xbitpos);
1525
1526       pat = (unsignedp
1527              ? gen_extzv (xtarget, xop0, bitsize_rtx, bitpos_rtx)
1528              : gen_extv (xtarget, xop0, bitsize_rtx, bitpos_rtx));
1529       if (pat)
1530         {
1531           emit_insn (pat);
1532           if (xtarget == xspec_target)
1533             return xtarget;
1534           if (xtarget == xspec_target_subreg)
1535             return xspec_target;
1536           return convert_extracted_bit_field (xtarget, mode, tmode, unsignedp);
1537         }
1538       delete_insns_since (last);
1539     }
1540
1541   /* If OP0 is a memory, try copying it to a register and seeing if a
1542      cheap register alternative is available.  */
1543   if (ext_mode != MAX_MACHINE_MODE && MEM_P (op0))
1544     {
1545       enum machine_mode bestmode;
1546
1547       /* Get the mode to use for inserting into this field.  If
1548          OP0 is BLKmode, get the smallest mode consistent with the
1549          alignment. If OP0 is a non-BLKmode object that is no
1550          wider than EXT_MODE, use its mode. Otherwise, use the
1551          smallest mode containing the field.  */
1552
1553       if (GET_MODE (op0) == BLKmode
1554           || (ext_mode != MAX_MACHINE_MODE
1555               && GET_MODE_SIZE (GET_MODE (op0)) > GET_MODE_SIZE (ext_mode)))
1556         bestmode = get_best_mode (bitsize, bitnum, MEM_ALIGN (op0),
1557                                   (ext_mode == MAX_MACHINE_MODE
1558                                    ? VOIDmode : ext_mode),
1559                                   MEM_VOLATILE_P (op0));
1560       else
1561         bestmode = GET_MODE (op0);
1562
1563       if (bestmode != VOIDmode
1564           && !(SLOW_UNALIGNED_ACCESS (bestmode, MEM_ALIGN (op0))
1565                && GET_MODE_BITSIZE (bestmode) > MEM_ALIGN (op0)))
1566         {
1567           unsigned HOST_WIDE_INT xoffset, xbitpos;
1568
1569           /* Compute the offset as a multiple of this unit,
1570              counting in bytes.  */
1571           unit = GET_MODE_BITSIZE (bestmode);
1572           xoffset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
1573           xbitpos = bitnum % unit;
1574
1575           /* Make sure the register is big enough for the whole field.  */
1576           if (xoffset * BITS_PER_UNIT + unit
1577               >= offset * BITS_PER_UNIT + bitsize)
1578             {
1579               rtx last, result, xop0;
1580
1581               last = get_last_insn ();
1582
1583               /* Fetch it to a register in that size.  */
1584               xop0 = adjust_address (op0, bestmode, xoffset);
1585               xop0 = force_reg (bestmode, xop0);
1586               result = extract_bit_field_1 (xop0, bitsize, xbitpos,
1587                                             unsignedp, target,
1588                                             mode, tmode, false);
1589               if (result)
1590                 return result;
1591
1592               delete_insns_since (last);
1593             }
1594         }
1595     }
1596
1597   if (!fallback_p)
1598     return NULL;
1599
1600   target = extract_fixed_bit_field (int_mode, op0, offset, bitsize,
1601                                     bitpos, target, unsignedp);
1602   return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1603 }
1604
1605 /* Generate code to extract a byte-field from STR_RTX
1606    containing BITSIZE bits, starting at BITNUM,
1607    and put it in TARGET if possible (if TARGET is nonzero).
1608    Regardless of TARGET, we return the rtx for where the value is placed.
1609
1610    STR_RTX is the structure containing the byte (a REG or MEM).
1611    UNSIGNEDP is nonzero if this is an unsigned bit field.
1612    MODE is the natural mode of the field value once extracted.
1613    TMODE is the mode the caller would like the value to have;
1614    but the value may be returned with type MODE instead.
1615
1616    If a TARGET is specified and we can store in it at no extra cost,
1617    we do so, and return TARGET.
1618    Otherwise, we return a REG of mode TMODE or MODE, with TMODE preferred
1619    if they are equally easy.  */
1620
1621 rtx
1622 extract_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1623                    unsigned HOST_WIDE_INT bitnum, int unsignedp, rtx target,
1624                    enum machine_mode mode, enum machine_mode tmode)
1625 {
1626   return extract_bit_field_1 (str_rtx, bitsize, bitnum, unsignedp,
1627                               target, mode, tmode, true);
1628 }
1629 \f
1630 /* Extract a bit field using shifts and boolean operations
1631    Returns an rtx to represent the value.
1632    OP0 addresses a register (word) or memory (byte).
1633    BITPOS says which bit within the word or byte the bit field starts in.
1634    OFFSET says how many bytes farther the bit field starts;
1635     it is 0 if OP0 is a register.
1636    BITSIZE says how many bits long the bit field is.
1637     (If OP0 is a register, it may be narrower than a full word,
1638      but BITPOS still counts within a full word,
1639      which is significant on bigendian machines.)
1640
1641    UNSIGNEDP is nonzero for an unsigned bit field (don't sign-extend value).
1642    If TARGET is nonzero, attempts to store the value there
1643    and return TARGET, but this is not guaranteed.
1644    If TARGET is not used, create a pseudo-reg of mode TMODE for the value.  */
1645
1646 static rtx
1647 extract_fixed_bit_field (enum machine_mode tmode, rtx op0,
1648                          unsigned HOST_WIDE_INT offset,
1649                          unsigned HOST_WIDE_INT bitsize,
1650                          unsigned HOST_WIDE_INT bitpos, rtx target,
1651                          int unsignedp)
1652 {
1653   unsigned int total_bits = BITS_PER_WORD;
1654   enum machine_mode mode;
1655
1656   if (GET_CODE (op0) == SUBREG || REG_P (op0))
1657     {
1658       /* Special treatment for a bit field split across two registers.  */
1659       if (bitsize + bitpos > BITS_PER_WORD)
1660         return extract_split_bit_field (op0, bitsize, bitpos, unsignedp);
1661     }
1662   else
1663     {
1664       /* Get the proper mode to use for this field.  We want a mode that
1665          includes the entire field.  If such a mode would be larger than
1666          a word, we won't be doing the extraction the normal way.  */
1667
1668       mode = get_best_mode (bitsize, bitpos + offset * BITS_PER_UNIT,
1669                             MEM_ALIGN (op0), word_mode, MEM_VOLATILE_P (op0));
1670
1671       if (mode == VOIDmode)
1672         /* The only way this should occur is if the field spans word
1673            boundaries.  */
1674         return extract_split_bit_field (op0, bitsize,
1675                                         bitpos + offset * BITS_PER_UNIT,
1676                                         unsignedp);
1677
1678       total_bits = GET_MODE_BITSIZE (mode);
1679
1680       /* Make sure bitpos is valid for the chosen mode.  Adjust BITPOS to
1681          be in the range 0 to total_bits-1, and put any excess bytes in
1682          OFFSET.  */
1683       if (bitpos >= total_bits)
1684         {
1685           offset += (bitpos / total_bits) * (total_bits / BITS_PER_UNIT);
1686           bitpos -= ((bitpos / total_bits) * (total_bits / BITS_PER_UNIT)
1687                      * BITS_PER_UNIT);
1688         }
1689
1690       /* Get ref to an aligned byte, halfword, or word containing the field.
1691          Adjust BITPOS to be position within a word,
1692          and OFFSET to be the offset of that word.
1693          Then alter OP0 to refer to that word.  */
1694       bitpos += (offset % (total_bits / BITS_PER_UNIT)) * BITS_PER_UNIT;
1695       offset -= (offset % (total_bits / BITS_PER_UNIT));
1696       op0 = adjust_address (op0, mode, offset);
1697     }
1698
1699   mode = GET_MODE (op0);
1700
1701   if (BYTES_BIG_ENDIAN)
1702     /* BITPOS is the distance between our msb and that of OP0.
1703        Convert it to the distance from the lsb.  */
1704     bitpos = total_bits - bitsize - bitpos;
1705
1706   /* Now BITPOS is always the distance between the field's lsb and that of OP0.
1707      We have reduced the big-endian case to the little-endian case.  */
1708
1709   if (unsignedp)
1710     {
1711       if (bitpos)
1712         {
1713           /* If the field does not already start at the lsb,
1714              shift it so it does.  */
1715           tree amount = build_int_cst (NULL_TREE, bitpos);
1716           /* Maybe propagate the target for the shift.  */
1717           /* But not if we will return it--could confuse integrate.c.  */
1718           rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
1719           if (tmode != mode) subtarget = 0;
1720           op0 = expand_shift (RSHIFT_EXPR, mode, op0, amount, subtarget, 1);
1721         }
1722       /* Convert the value to the desired mode.  */
1723       if (mode != tmode)
1724         op0 = convert_to_mode (tmode, op0, 1);
1725
1726       /* Unless the msb of the field used to be the msb when we shifted,
1727          mask out the upper bits.  */
1728
1729       if (GET_MODE_BITSIZE (mode) != bitpos + bitsize)
1730         return expand_binop (GET_MODE (op0), and_optab, op0,
1731                              mask_rtx (GET_MODE (op0), 0, bitsize, 0),
1732                              target, 1, OPTAB_LIB_WIDEN);
1733       return op0;
1734     }
1735
1736   /* To extract a signed bit-field, first shift its msb to the msb of the word,
1737      then arithmetic-shift its lsb to the lsb of the word.  */
1738   op0 = force_reg (mode, op0);
1739   if (mode != tmode)
1740     target = 0;
1741
1742   /* Find the narrowest integer mode that contains the field.  */
1743
1744   for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT); mode != VOIDmode;
1745        mode = GET_MODE_WIDER_MODE (mode))
1746     if (GET_MODE_BITSIZE (mode) >= bitsize + bitpos)
1747       {
1748         op0 = convert_to_mode (mode, op0, 0);
1749         break;
1750       }
1751
1752   if (GET_MODE_BITSIZE (mode) != (bitsize + bitpos))
1753     {
1754       tree amount
1755         = build_int_cst (NULL_TREE,
1756                          GET_MODE_BITSIZE (mode) - (bitsize + bitpos));
1757       /* Maybe propagate the target for the shift.  */
1758       rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
1759       op0 = expand_shift (LSHIFT_EXPR, mode, op0, amount, subtarget, 1);
1760     }
1761
1762   return expand_shift (RSHIFT_EXPR, mode, op0,
1763                        build_int_cst (NULL_TREE,
1764                                       GET_MODE_BITSIZE (mode) - bitsize),
1765                        target, 0);
1766 }
1767 \f
1768 /* Return a constant integer (CONST_INT or CONST_DOUBLE) mask value
1769    of mode MODE with BITSIZE ones followed by BITPOS zeros, or the
1770    complement of that if COMPLEMENT.  The mask is truncated if
1771    necessary to the width of mode MODE.  The mask is zero-extended if
1772    BITSIZE+BITPOS is too small for MODE.  */
1773
1774 static rtx
1775 mask_rtx (enum machine_mode mode, int bitpos, int bitsize, int complement)
1776 {
1777   HOST_WIDE_INT masklow, maskhigh;
1778
1779   if (bitsize == 0)
1780     masklow = 0;
1781   else if (bitpos < HOST_BITS_PER_WIDE_INT)
1782     masklow = (HOST_WIDE_INT) -1 << bitpos;
1783   else
1784     masklow = 0;
1785
1786   if (bitpos + bitsize < HOST_BITS_PER_WIDE_INT)
1787     masklow &= ((unsigned HOST_WIDE_INT) -1
1788                 >> (HOST_BITS_PER_WIDE_INT - bitpos - bitsize));
1789
1790   if (bitpos <= HOST_BITS_PER_WIDE_INT)
1791     maskhigh = -1;
1792   else
1793     maskhigh = (HOST_WIDE_INT) -1 << (bitpos - HOST_BITS_PER_WIDE_INT);
1794
1795   if (bitsize == 0)
1796     maskhigh = 0;
1797   else if (bitpos + bitsize > HOST_BITS_PER_WIDE_INT)
1798     maskhigh &= ((unsigned HOST_WIDE_INT) -1
1799                  >> (2 * HOST_BITS_PER_WIDE_INT - bitpos - bitsize));
1800   else
1801     maskhigh = 0;
1802
1803   if (complement)
1804     {
1805       maskhigh = ~maskhigh;
1806       masklow = ~masklow;
1807     }
1808
1809   return immed_double_const (masklow, maskhigh, mode);
1810 }
1811
1812 /* Return a constant integer (CONST_INT or CONST_DOUBLE) rtx with the value
1813    VALUE truncated to BITSIZE bits and then shifted left BITPOS bits.  */
1814
1815 static rtx
1816 lshift_value (enum machine_mode mode, rtx value, int bitpos, int bitsize)
1817 {
1818   unsigned HOST_WIDE_INT v = INTVAL (value);
1819   HOST_WIDE_INT low, high;
1820
1821   if (bitsize < HOST_BITS_PER_WIDE_INT)
1822     v &= ~((HOST_WIDE_INT) -1 << bitsize);
1823
1824   if (bitpos < HOST_BITS_PER_WIDE_INT)
1825     {
1826       low = v << bitpos;
1827       high = (bitpos > 0 ? (v >> (HOST_BITS_PER_WIDE_INT - bitpos)) : 0);
1828     }
1829   else
1830     {
1831       low = 0;
1832       high = v << (bitpos - HOST_BITS_PER_WIDE_INT);
1833     }
1834
1835   return immed_double_const (low, high, mode);
1836 }
1837 \f
1838 /* Extract a bit field from a memory by forcing the alignment of the
1839    memory.  This efficient only if the field spans at least 4 boundaries.
1840
1841    OP0 is the MEM.
1842    BITSIZE is the field width; BITPOS is the position of the first bit.
1843    UNSIGNEDP is true if the result should be zero-extended.  */
1844
1845 static rtx
1846 extract_force_align_mem_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1847                                    unsigned HOST_WIDE_INT bitpos,
1848                                    int unsignedp)
1849 {
1850   enum machine_mode mode, dmode;
1851   unsigned int m_bitsize, m_size;
1852   unsigned int sign_shift_up, sign_shift_dn;
1853   rtx base, a1, a2, v1, v2, comb, shift, result, start;
1854
1855   /* Choose a mode that will fit BITSIZE.  */
1856   mode = smallest_mode_for_size (bitsize, MODE_INT);
1857   m_size = GET_MODE_SIZE (mode);
1858   m_bitsize = GET_MODE_BITSIZE (mode);
1859
1860   /* Choose a mode twice as wide.  Fail if no such mode exists.  */
1861   dmode = mode_for_size (m_bitsize * 2, MODE_INT, false);
1862   if (dmode == BLKmode)
1863     return NULL;
1864
1865   do_pending_stack_adjust ();
1866   start = get_last_insn ();
1867
1868   /* At the end, we'll need an additional shift to deal with sign/zero
1869      extension.  By default this will be a left+right shift of the
1870      appropriate size.  But we may be able to eliminate one of them.  */
1871   sign_shift_up = sign_shift_dn = m_bitsize - bitsize;
1872
1873   if (STRICT_ALIGNMENT)
1874     {
1875       base = plus_constant (XEXP (op0, 0), bitpos / BITS_PER_UNIT);
1876       bitpos %= BITS_PER_UNIT;
1877
1878       /* We load two values to be concatenate.  There's an edge condition
1879          that bears notice -- an aligned value at the end of a page can
1880          only load one value lest we segfault.  So the two values we load
1881          are at "base & -size" and "(base + size - 1) & -size".  If base
1882          is unaligned, the addresses will be aligned and sequential; if
1883          base is aligned, the addresses will both be equal to base.  */
1884
1885       a1 = expand_simple_binop (Pmode, AND, force_operand (base, NULL),
1886                                 GEN_INT (-(HOST_WIDE_INT)m_size),
1887                                 NULL, true, OPTAB_LIB_WIDEN);
1888       mark_reg_pointer (a1, m_bitsize);
1889       v1 = gen_rtx_MEM (mode, a1);
1890       set_mem_align (v1, m_bitsize);
1891       v1 = force_reg (mode, validize_mem (v1));
1892
1893       a2 = plus_constant (base, GET_MODE_SIZE (mode) - 1);
1894       a2 = expand_simple_binop (Pmode, AND, force_operand (a2, NULL),
1895                                 GEN_INT (-(HOST_WIDE_INT)m_size),
1896                                 NULL, true, OPTAB_LIB_WIDEN);
1897       v2 = gen_rtx_MEM (mode, a2);
1898       set_mem_align (v2, m_bitsize);
1899       v2 = force_reg (mode, validize_mem (v2));
1900
1901       /* Combine these two values into a double-word value.  */
1902       if (m_bitsize == BITS_PER_WORD)
1903         {
1904           comb = gen_reg_rtx (dmode);
1905           emit_insn (gen_rtx_CLOBBER (VOIDmode, comb));
1906           emit_move_insn (gen_rtx_SUBREG (mode, comb, 0), v1);
1907           emit_move_insn (gen_rtx_SUBREG (mode, comb, m_size), v2);
1908         }
1909       else
1910         {
1911           if (BYTES_BIG_ENDIAN)
1912             comb = v1, v1 = v2, v2 = comb;
1913           v1 = convert_modes (dmode, mode, v1, true);
1914           if (v1 == NULL)
1915             goto fail;
1916           v2 = convert_modes (dmode, mode, v2, true);
1917           v2 = expand_simple_binop (dmode, ASHIFT, v2, GEN_INT (m_bitsize),
1918                                     NULL, true, OPTAB_LIB_WIDEN);
1919           if (v2 == NULL)
1920             goto fail;
1921           comb = expand_simple_binop (dmode, IOR, v1, v2, NULL,
1922                                       true, OPTAB_LIB_WIDEN);
1923           if (comb == NULL)
1924             goto fail;
1925         }
1926
1927       shift = expand_simple_binop (Pmode, AND, base, GEN_INT (m_size - 1),
1928                                    NULL, true, OPTAB_LIB_WIDEN);
1929       shift = expand_mult (Pmode, shift, GEN_INT (BITS_PER_UNIT), NULL, 1);
1930
1931       if (bitpos != 0)
1932         {
1933           if (sign_shift_up <= bitpos)
1934             bitpos -= sign_shift_up, sign_shift_up = 0;
1935           shift = expand_simple_binop (Pmode, PLUS, shift, GEN_INT (bitpos),
1936                                        NULL, true, OPTAB_LIB_WIDEN);
1937         }
1938     }
1939   else
1940     {
1941       unsigned HOST_WIDE_INT offset = bitpos / BITS_PER_UNIT;
1942       bitpos %= BITS_PER_UNIT;
1943
1944       /* When strict alignment is not required, we can just load directly
1945          from memory without masking.  If the remaining BITPOS offset is
1946          small enough, we may be able to do all operations in MODE as
1947          opposed to DMODE.  */
1948       if (bitpos + bitsize <= m_bitsize)
1949         dmode = mode;
1950       comb = adjust_address (op0, dmode, offset);
1951
1952       if (sign_shift_up <= bitpos)
1953         bitpos -= sign_shift_up, sign_shift_up = 0;
1954       shift = GEN_INT (bitpos);
1955     }
1956
1957   /* Shift down the double-word such that the requested value is at bit 0.  */
1958   if (shift != const0_rtx)
1959     comb = expand_simple_binop (dmode, unsignedp ? LSHIFTRT : ASHIFTRT,
1960                                 comb, shift, NULL, unsignedp, OPTAB_LIB_WIDEN);
1961   if (comb == NULL)
1962     goto fail;
1963
1964   /* If the field exactly matches MODE, then all we need to do is return the
1965      lowpart.  Otherwise, shift to get the sign bits set properly.  */
1966   result = force_reg (mode, gen_lowpart (mode, comb));
1967
1968   if (sign_shift_up)
1969     result = expand_simple_binop (mode, ASHIFT, result,
1970                                   GEN_INT (sign_shift_up),
1971                                   NULL_RTX, 0, OPTAB_LIB_WIDEN);
1972   if (sign_shift_dn)
1973     result = expand_simple_binop (mode, unsignedp ? LSHIFTRT : ASHIFTRT,
1974                                   result, GEN_INT (sign_shift_dn),
1975                                   NULL_RTX, 0, OPTAB_LIB_WIDEN);
1976
1977   return result;
1978
1979  fail:
1980   delete_insns_since (start);
1981   return NULL;
1982 }
1983
1984 /* Extract a bit field that is split across two words
1985    and return an RTX for the result.
1986
1987    OP0 is the REG, SUBREG or MEM rtx for the first of the two words.
1988    BITSIZE is the field width; BITPOS, position of its first bit, in the word.
1989    UNSIGNEDP is 1 if should zero-extend the contents; else sign-extend.  */
1990
1991 static rtx
1992 extract_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1993                          unsigned HOST_WIDE_INT bitpos, int unsignedp)
1994 {
1995   unsigned int unit;
1996   unsigned int bitsdone = 0;
1997   rtx result = NULL_RTX;
1998   int first = 1;
1999
2000   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
2001      much at a time.  */
2002   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
2003     unit = BITS_PER_WORD;
2004   else
2005     {
2006       unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
2007       if (0 && bitsize / unit > 2)
2008         {
2009           rtx tmp = extract_force_align_mem_bit_field (op0, bitsize, bitpos,
2010                                                        unsignedp);
2011           if (tmp)
2012             return tmp;
2013         }
2014     }
2015
2016   while (bitsdone < bitsize)
2017     {
2018       unsigned HOST_WIDE_INT thissize;
2019       rtx part, word;
2020       unsigned HOST_WIDE_INT thispos;
2021       unsigned HOST_WIDE_INT offset;
2022
2023       offset = (bitpos + bitsdone) / unit;
2024       thispos = (bitpos + bitsdone) % unit;
2025
2026       /* THISSIZE must not overrun a word boundary.  Otherwise,
2027          extract_fixed_bit_field will call us again, and we will mutually
2028          recurse forever.  */
2029       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
2030       thissize = MIN (thissize, unit - thispos);
2031
2032       /* If OP0 is a register, then handle OFFSET here.
2033
2034          When handling multiword bitfields, extract_bit_field may pass
2035          down a word_mode SUBREG of a larger REG for a bitfield that actually
2036          crosses a word boundary.  Thus, for a SUBREG, we must find
2037          the current word starting from the base register.  */
2038       if (GET_CODE (op0) == SUBREG)
2039         {
2040           int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset;
2041           word = operand_subword_force (SUBREG_REG (op0), word_offset,
2042                                         GET_MODE (SUBREG_REG (op0)));
2043           offset = 0;
2044         }
2045       else if (REG_P (op0))
2046         {
2047           word = operand_subword_force (op0, offset, GET_MODE (op0));
2048           offset = 0;
2049         }
2050       else
2051         word = op0;
2052
2053       /* Extract the parts in bit-counting order,
2054          whose meaning is determined by BYTES_PER_UNIT.
2055          OFFSET is in UNITs, and UNIT is in bits.
2056          extract_fixed_bit_field wants offset in bytes.  */
2057       part = extract_fixed_bit_field (word_mode, word,
2058                                       offset * unit / BITS_PER_UNIT,
2059                                       thissize, thispos, 0, 1);
2060       bitsdone += thissize;
2061
2062       /* Shift this part into place for the result.  */
2063       if (BYTES_BIG_ENDIAN)
2064         {
2065           if (bitsize != bitsdone)
2066             part = expand_shift (LSHIFT_EXPR, word_mode, part,
2067                                  build_int_cst (NULL_TREE, bitsize - bitsdone),
2068                                  0, 1);
2069         }
2070       else
2071         {
2072           if (bitsdone != thissize)
2073             part = expand_shift (LSHIFT_EXPR, word_mode, part,
2074                                  build_int_cst (NULL_TREE,
2075                                                 bitsdone - thissize), 0, 1);
2076         }
2077
2078       if (first)
2079         result = part;
2080       else
2081         /* Combine the parts with bitwise or.  This works
2082            because we extracted each part as an unsigned bit field.  */
2083         result = expand_binop (word_mode, ior_optab, part, result, NULL_RTX, 1,
2084                                OPTAB_LIB_WIDEN);
2085
2086       first = 0;
2087     }
2088
2089   /* Unsigned bit field: we are done.  */
2090   if (unsignedp)
2091     return result;
2092   /* Signed bit field: sign-extend with two arithmetic shifts.  */
2093   result = expand_shift (LSHIFT_EXPR, word_mode, result,
2094                          build_int_cst (NULL_TREE, BITS_PER_WORD - bitsize),
2095                          NULL_RTX, 0);
2096   return expand_shift (RSHIFT_EXPR, word_mode, result,
2097                        build_int_cst (NULL_TREE, BITS_PER_WORD - bitsize),
2098                        NULL_RTX, 0);
2099 }
2100 \f
2101 /* Try to read the low bits of SRC as an rvalue of mode MODE, preserving
2102    the bit pattern.  SRC_MODE is the mode of SRC; if this is smaller than
2103    MODE, fill the upper bits with zeros.  Fail if the layout of either
2104    mode is unknown (as for CC modes) or if the extraction would involve
2105    unprofitable mode punning.  Return the value on success, otherwise
2106    return null.
2107
2108    This is different from gen_lowpart* in these respects:
2109
2110      - the returned value must always be considered an rvalue
2111
2112      - when MODE is wider than SRC_MODE, the extraction involves
2113        a zero extension
2114
2115      - when MODE is smaller than SRC_MODE, the extraction involves
2116        a truncation (and is thus subject to TRULY_NOOP_TRUNCATION).
2117
2118    In other words, this routine performs a computation, whereas the
2119    gen_lowpart* routines are conceptually lvalue or rvalue subreg
2120    operations.  */
2121
2122 rtx
2123 extract_low_bits (enum machine_mode mode, enum machine_mode src_mode, rtx src)
2124 {
2125   enum machine_mode int_mode, src_int_mode;
2126
2127   if (mode == src_mode)
2128     return src;
2129
2130   if (CONSTANT_P (src))
2131     return simplify_gen_subreg (mode, src, src_mode,
2132                                 subreg_lowpart_offset (mode, src_mode));
2133
2134   if (GET_MODE_CLASS (mode) == MODE_CC || GET_MODE_CLASS (src_mode) == MODE_CC)
2135     return NULL_RTX;
2136
2137   if (GET_MODE_BITSIZE (mode) == GET_MODE_BITSIZE (src_mode)
2138       && MODES_TIEABLE_P (mode, src_mode))
2139     {
2140       rtx x = gen_lowpart_common (mode, src);
2141       if (x)
2142         return x;
2143     }
2144
2145   src_int_mode = int_mode_for_mode (src_mode);
2146   int_mode = int_mode_for_mode (mode);
2147   if (src_int_mode == BLKmode || int_mode == BLKmode)
2148     return NULL_RTX;
2149
2150   if (!MODES_TIEABLE_P (src_int_mode, src_mode))
2151     return NULL_RTX;
2152   if (!MODES_TIEABLE_P (int_mode, mode))
2153     return NULL_RTX;
2154
2155   src = gen_lowpart (src_int_mode, src);
2156   src = convert_modes (int_mode, src_int_mode, src, true);
2157   src = gen_lowpart (mode, src);
2158   return src;
2159 }
2160 \f
2161 /* Add INC into TARGET.  */
2162
2163 void
2164 expand_inc (rtx target, rtx inc)
2165 {
2166   rtx value = expand_binop (GET_MODE (target), add_optab,
2167                             target, inc,
2168                             target, 0, OPTAB_LIB_WIDEN);
2169   if (value != target)
2170     emit_move_insn (target, value);
2171 }
2172
2173 /* Subtract DEC from TARGET.  */
2174
2175 void
2176 expand_dec (rtx target, rtx dec)
2177 {
2178   rtx value = expand_binop (GET_MODE (target), sub_optab,
2179                             target, dec,
2180                             target, 0, OPTAB_LIB_WIDEN);
2181   if (value != target)
2182     emit_move_insn (target, value);
2183 }
2184 \f
2185 /* Output a shift instruction for expression code CODE,
2186    with SHIFTED being the rtx for the value to shift,
2187    and AMOUNT the tree for the amount to shift by.
2188    Store the result in the rtx TARGET, if that is convenient.
2189    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2190    Return the rtx for where the value is.  */
2191
2192 rtx
2193 expand_shift (enum tree_code code, enum machine_mode mode, rtx shifted,
2194               tree amount, rtx target, int unsignedp)
2195 {
2196   rtx op1, temp = 0;
2197   int left = (code == LSHIFT_EXPR || code == LROTATE_EXPR);
2198   int rotate = (code == LROTATE_EXPR || code == RROTATE_EXPR);
2199   int try;
2200
2201   /* Previously detected shift-counts computed by NEGATE_EXPR
2202      and shifted in the other direction; but that does not work
2203      on all machines.  */
2204
2205   op1 = expand_normal (amount);
2206
2207   if (SHIFT_COUNT_TRUNCATED)
2208     {
2209       if (GET_CODE (op1) == CONST_INT
2210           && ((unsigned HOST_WIDE_INT) INTVAL (op1) >=
2211               (unsigned HOST_WIDE_INT) GET_MODE_BITSIZE (mode)))
2212         op1 = GEN_INT ((unsigned HOST_WIDE_INT) INTVAL (op1)
2213                        % GET_MODE_BITSIZE (mode));
2214       else if (GET_CODE (op1) == SUBREG
2215                && subreg_lowpart_p (op1))
2216         op1 = SUBREG_REG (op1);
2217     }
2218
2219   if (op1 == const0_rtx)
2220     return shifted;
2221
2222   /* Check whether its cheaper to implement a left shift by a constant
2223      bit count by a sequence of additions.  */
2224   if (code == LSHIFT_EXPR
2225       && GET_CODE (op1) == CONST_INT
2226       && INTVAL (op1) > 0
2227       && INTVAL (op1) < GET_MODE_BITSIZE (mode)
2228       && INTVAL (op1) < MAX_BITS_PER_WORD
2229       && shift_cost[mode][INTVAL (op1)] > INTVAL (op1) * add_cost[mode]
2230       && shift_cost[mode][INTVAL (op1)] != MAX_COST)
2231     {
2232       int i;
2233       for (i = 0; i < INTVAL (op1); i++)
2234         {
2235           temp = force_reg (mode, shifted);
2236           shifted = expand_binop (mode, add_optab, temp, temp, NULL_RTX,
2237                                   unsignedp, OPTAB_LIB_WIDEN);
2238         }
2239       return shifted;
2240     }
2241
2242   for (try = 0; temp == 0 && try < 3; try++)
2243     {
2244       enum optab_methods methods;
2245
2246       if (try == 0)
2247         methods = OPTAB_DIRECT;
2248       else if (try == 1)
2249         methods = OPTAB_WIDEN;
2250       else
2251         methods = OPTAB_LIB_WIDEN;
2252
2253       if (rotate)
2254         {
2255           /* Widening does not work for rotation.  */
2256           if (methods == OPTAB_WIDEN)
2257             continue;
2258           else if (methods == OPTAB_LIB_WIDEN)
2259             {
2260               /* If we have been unable to open-code this by a rotation,
2261                  do it as the IOR of two shifts.  I.e., to rotate A
2262                  by N bits, compute (A << N) | ((unsigned) A >> (C - N))
2263                  where C is the bitsize of A.
2264
2265                  It is theoretically possible that the target machine might
2266                  not be able to perform either shift and hence we would
2267                  be making two libcalls rather than just the one for the
2268                  shift (similarly if IOR could not be done).  We will allow
2269                  this extremely unlikely lossage to avoid complicating the
2270                  code below.  */
2271
2272               rtx subtarget = target == shifted ? 0 : target;
2273               tree new_amount, other_amount;
2274               rtx temp1;
2275               tree type = TREE_TYPE (amount);
2276               if (GET_MODE (op1) != TYPE_MODE (type)
2277                   && GET_MODE (op1) != VOIDmode)
2278                 op1 = convert_to_mode (TYPE_MODE (type), op1, 1);
2279               new_amount = make_tree (type, op1);
2280               other_amount
2281                 = fold_build2 (MINUS_EXPR, type,
2282                                build_int_cst (type, GET_MODE_BITSIZE (mode)),
2283                                new_amount);
2284
2285               shifted = force_reg (mode, shifted);
2286
2287               temp = expand_shift (left ? LSHIFT_EXPR : RSHIFT_EXPR,
2288                                    mode, shifted, new_amount, 0, 1);
2289               temp1 = expand_shift (left ? RSHIFT_EXPR : LSHIFT_EXPR,
2290                                     mode, shifted, other_amount, subtarget, 1);
2291               return expand_binop (mode, ior_optab, temp, temp1, target,
2292                                    unsignedp, methods);
2293             }
2294
2295           temp = expand_binop (mode,
2296                                left ? rotl_optab : rotr_optab,
2297                                shifted, op1, target, unsignedp, methods);
2298         }
2299       else if (unsignedp)
2300         temp = expand_binop (mode,
2301                              left ? ashl_optab : lshr_optab,
2302                              shifted, op1, target, unsignedp, methods);
2303
2304       /* Do arithmetic shifts.
2305          Also, if we are going to widen the operand, we can just as well
2306          use an arithmetic right-shift instead of a logical one.  */
2307       if (temp == 0 && ! rotate
2308           && (! unsignedp || (! left && methods == OPTAB_WIDEN)))
2309         {
2310           enum optab_methods methods1 = methods;
2311
2312           /* If trying to widen a log shift to an arithmetic shift,
2313              don't accept an arithmetic shift of the same size.  */
2314           if (unsignedp)
2315             methods1 = OPTAB_MUST_WIDEN;
2316
2317           /* Arithmetic shift */
2318
2319           temp = expand_binop (mode,
2320                                left ? ashl_optab : ashr_optab,
2321                                shifted, op1, target, unsignedp, methods1);
2322         }
2323
2324       /* We used to try extzv here for logical right shifts, but that was
2325          only useful for one machine, the VAX, and caused poor code
2326          generation there for lshrdi3, so the code was deleted and a
2327          define_expand for lshrsi3 was added to vax.md.  */
2328     }
2329
2330   gcc_assert (temp);
2331   return temp;
2332 }
2333 \f
2334 enum alg_code {
2335   alg_unknown,
2336   alg_zero,
2337   alg_m, alg_shift,
2338   alg_add_t_m2,
2339   alg_sub_t_m2,
2340   alg_add_factor,
2341   alg_sub_factor,
2342   alg_add_t2_m,
2343   alg_sub_t2_m,
2344   alg_impossible
2345 };
2346
2347 /* This structure holds the "cost" of a multiply sequence.  The
2348    "cost" field holds the total rtx_cost of every operator in the
2349    synthetic multiplication sequence, hence cost(a op b) is defined
2350    as rtx_cost(op) + cost(a) + cost(b), where cost(leaf) is zero.
2351    The "latency" field holds the minimum possible latency of the
2352    synthetic multiply, on a hypothetical infinitely parallel CPU.
2353    This is the critical path, or the maximum height, of the expression
2354    tree which is the sum of rtx_costs on the most expensive path from
2355    any leaf to the root.  Hence latency(a op b) is defined as zero for
2356    leaves and rtx_cost(op) + max(latency(a), latency(b)) otherwise.  */
2357
2358 struct mult_cost {
2359   short cost;     /* Total rtx_cost of the multiplication sequence.  */
2360   short latency;  /* The latency of the multiplication sequence.  */
2361 };
2362
2363 /* This macro is used to compare a pointer to a mult_cost against an
2364    single integer "rtx_cost" value.  This is equivalent to the macro
2365    CHEAPER_MULT_COST(X,Z) where Z = {Y,Y}.  */
2366 #define MULT_COST_LESS(X,Y) ((X)->cost < (Y)    \
2367                              || ((X)->cost == (Y) && (X)->latency < (Y)))
2368
2369 /* This macro is used to compare two pointers to mult_costs against
2370    each other.  The macro returns true if X is cheaper than Y.
2371    Currently, the cheaper of two mult_costs is the one with the
2372    lower "cost".  If "cost"s are tied, the lower latency is cheaper.  */
2373 #define CHEAPER_MULT_COST(X,Y)  ((X)->cost < (Y)->cost          \
2374                                  || ((X)->cost == (Y)->cost     \
2375                                      && (X)->latency < (Y)->latency))
2376
2377 /* This structure records a sequence of operations.
2378    `ops' is the number of operations recorded.
2379    `cost' is their total cost.
2380    The operations are stored in `op' and the corresponding
2381    logarithms of the integer coefficients in `log'.
2382
2383    These are the operations:
2384    alg_zero             total := 0;
2385    alg_m                total := multiplicand;
2386    alg_shift            total := total * coeff
2387    alg_add_t_m2         total := total + multiplicand * coeff;
2388    alg_sub_t_m2         total := total - multiplicand * coeff;
2389    alg_add_factor       total := total * coeff + total;
2390    alg_sub_factor       total := total * coeff - total;
2391    alg_add_t2_m         total := total * coeff + multiplicand;
2392    alg_sub_t2_m         total := total * coeff - multiplicand;
2393
2394    The first operand must be either alg_zero or alg_m.  */
2395
2396 struct algorithm
2397 {
2398   struct mult_cost cost;
2399   short ops;
2400   /* The size of the OP and LOG fields are not directly related to the
2401      word size, but the worst-case algorithms will be if we have few
2402      consecutive ones or zeros, i.e., a multiplicand like 10101010101...
2403      In that case we will generate shift-by-2, add, shift-by-2, add,...,
2404      in total wordsize operations.  */
2405   enum alg_code op[MAX_BITS_PER_WORD];
2406   char log[MAX_BITS_PER_WORD];
2407 };
2408
2409 /* The entry for our multiplication cache/hash table.  */
2410 struct alg_hash_entry {
2411   /* The number we are multiplying by.  */
2412   unsigned HOST_WIDE_INT t;
2413
2414   /* The mode in which we are multiplying something by T.  */
2415   enum machine_mode mode;
2416
2417   /* The best multiplication algorithm for t.  */
2418   enum alg_code alg;
2419
2420   /* The cost of multiplication if ALG_CODE is not alg_impossible.
2421      Otherwise, the cost within which multiplication by T is
2422      impossible.  */
2423   struct mult_cost cost;
2424 };
2425
2426 /* The number of cache/hash entries.  */
2427 #if HOST_BITS_PER_WIDE_INT == 64
2428 #define NUM_ALG_HASH_ENTRIES 1031
2429 #else
2430 #define NUM_ALG_HASH_ENTRIES 307
2431 #endif
2432
2433 /* Each entry of ALG_HASH caches alg_code for some integer.  This is
2434    actually a hash table.  If we have a collision, that the older
2435    entry is kicked out.  */
2436 static struct alg_hash_entry alg_hash[NUM_ALG_HASH_ENTRIES];
2437
2438 /* Indicates the type of fixup needed after a constant multiplication.
2439    BASIC_VARIANT means no fixup is needed, NEGATE_VARIANT means that
2440    the result should be negated, and ADD_VARIANT means that the
2441    multiplicand should be added to the result.  */
2442 enum mult_variant {basic_variant, negate_variant, add_variant};
2443
2444 static void synth_mult (struct algorithm *, unsigned HOST_WIDE_INT,
2445                         const struct mult_cost *, enum machine_mode mode);
2446 static bool choose_mult_variant (enum machine_mode, HOST_WIDE_INT,
2447                                  struct algorithm *, enum mult_variant *, int);
2448 static rtx expand_mult_const (enum machine_mode, rtx, HOST_WIDE_INT, rtx,
2449                               const struct algorithm *, enum mult_variant);
2450 static unsigned HOST_WIDE_INT choose_multiplier (unsigned HOST_WIDE_INT, int,
2451                                                  int, rtx *, int *, int *);
2452 static unsigned HOST_WIDE_INT invert_mod2n (unsigned HOST_WIDE_INT, int);
2453 static rtx extract_high_half (enum machine_mode, rtx);
2454 static rtx expand_mult_highpart (enum machine_mode, rtx, rtx, rtx, int, int);
2455 static rtx expand_mult_highpart_optab (enum machine_mode, rtx, rtx, rtx,
2456                                        int, int);
2457 /* Compute and return the best algorithm for multiplying by T.
2458    The algorithm must cost less than cost_limit
2459    If retval.cost >= COST_LIMIT, no algorithm was found and all
2460    other field of the returned struct are undefined.
2461    MODE is the machine mode of the multiplication.  */
2462
2463 static void
2464 synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
2465             const struct mult_cost *cost_limit, enum machine_mode mode)
2466 {
2467   int m;
2468   struct algorithm *alg_in, *best_alg;
2469   struct mult_cost best_cost;
2470   struct mult_cost new_limit;
2471   int op_cost, op_latency;
2472   unsigned HOST_WIDE_INT q;
2473   int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode));
2474   int hash_index;
2475   bool cache_hit = false;
2476   enum alg_code cache_alg = alg_zero;
2477
2478   /* Indicate that no algorithm is yet found.  If no algorithm
2479      is found, this value will be returned and indicate failure.  */
2480   alg_out->cost.cost = cost_limit->cost + 1;
2481   alg_out->cost.latency = cost_limit->latency + 1;
2482
2483   if (cost_limit->cost < 0
2484       || (cost_limit->cost == 0 && cost_limit->latency <= 0))
2485     return;
2486
2487   /* Restrict the bits of "t" to the multiplication's mode.  */
2488   t &= GET_MODE_MASK (mode);
2489
2490   /* t == 1 can be done in zero cost.  */
2491   if (t == 1)
2492     {
2493       alg_out->ops = 1;
2494       alg_out->cost.cost = 0;
2495       alg_out->cost.latency = 0;
2496       alg_out->op[0] = alg_m;
2497       return;
2498     }
2499
2500   /* t == 0 sometimes has a cost.  If it does and it exceeds our limit,
2501      fail now.  */
2502   if (t == 0)
2503     {
2504       if (MULT_COST_LESS (cost_limit, zero_cost))
2505         return;
2506       else
2507         {
2508           alg_out->ops = 1;
2509           alg_out->cost.cost = zero_cost;
2510           alg_out->cost.latency = zero_cost;
2511           alg_out->op[0] = alg_zero;
2512           return;
2513         }
2514     }
2515
2516   /* We'll be needing a couple extra algorithm structures now.  */
2517
2518   alg_in = alloca (sizeof (struct algorithm));
2519   best_alg = alloca (sizeof (struct algorithm));
2520   best_cost = *cost_limit;
2521
2522   /* Compute the hash index.  */
2523   hash_index = (t ^ (unsigned int) mode) % NUM_ALG_HASH_ENTRIES;
2524
2525   /* See if we already know what to do for T.  */
2526   if (alg_hash[hash_index].t == t
2527       && alg_hash[hash_index].mode == mode
2528       && alg_hash[hash_index].alg != alg_unknown)
2529     {
2530       cache_alg = alg_hash[hash_index].alg;
2531
2532       if (cache_alg == alg_impossible)
2533         {
2534           /* The cache tells us that it's impossible to synthesize
2535              multiplication by T within alg_hash[hash_index].cost.  */
2536           if (!CHEAPER_MULT_COST (&alg_hash[hash_index].cost, cost_limit))
2537             /* COST_LIMIT is at least as restrictive as the one
2538                recorded in the hash table, in which case we have no
2539                hope of synthesizing a multiplication.  Just
2540                return.  */
2541             return;
2542
2543           /* If we get here, COST_LIMIT is less restrictive than the
2544              one recorded in the hash table, so we may be able to
2545              synthesize a multiplication.  Proceed as if we didn't
2546              have the cache entry.  */
2547         }
2548       else
2549         {
2550           if (CHEAPER_MULT_COST (cost_limit, &alg_hash[hash_index].cost))
2551             /* The cached algorithm shows that this multiplication
2552                requires more cost than COST_LIMIT.  Just return.  This
2553                way, we don't clobber this cache entry with
2554                alg_impossible but retain useful information.  */
2555             return;
2556
2557           cache_hit = true;
2558
2559           switch (cache_alg)
2560             {
2561             case alg_shift:
2562               goto do_alg_shift;
2563
2564             case alg_add_t_m2:
2565             case alg_sub_t_m2:
2566               goto do_alg_addsub_t_m2;
2567
2568             case alg_add_factor:
2569             case alg_sub_factor:
2570               goto do_alg_addsub_factor;
2571
2572             case alg_add_t2_m:
2573               goto do_alg_add_t2_m;
2574
2575             case alg_sub_t2_m:
2576               goto do_alg_sub_t2_m;
2577
2578             default:
2579               gcc_unreachable ();
2580             }
2581         }
2582     }
2583
2584   /* If we have a group of zero bits at the low-order part of T, try
2585      multiplying by the remaining bits and then doing a shift.  */
2586
2587   if ((t & 1) == 0)
2588     {
2589     do_alg_shift:
2590       m = floor_log2 (t & -t);  /* m = number of low zero bits */
2591       if (m < maxm)
2592         {
2593           q = t >> m;
2594           /* The function expand_shift will choose between a shift and
2595              a sequence of additions, so the observed cost is given as
2596              MIN (m * add_cost[mode], shift_cost[mode][m]).  */
2597           op_cost = m * add_cost[mode];
2598           if (shift_cost[mode][m] < op_cost)
2599             op_cost = shift_cost[mode][m];
2600           new_limit.cost = best_cost.cost - op_cost;
2601           new_limit.latency = best_cost.latency - op_cost;
2602           synth_mult (alg_in, q, &new_limit, mode);
2603
2604           alg_in->cost.cost += op_cost;
2605           alg_in->cost.latency += op_cost;
2606           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2607             {
2608               struct algorithm *x;
2609               best_cost = alg_in->cost;
2610               x = alg_in, alg_in = best_alg, best_alg = x;
2611               best_alg->log[best_alg->ops] = m;
2612               best_alg->op[best_alg->ops] = alg_shift;
2613             }
2614         }
2615       if (cache_hit)
2616         goto done;
2617     }
2618
2619   /* If we have an odd number, add or subtract one.  */
2620   if ((t & 1) != 0)
2621     {
2622       unsigned HOST_WIDE_INT w;
2623
2624     do_alg_addsub_t_m2:
2625       for (w = 1; (w & t) != 0; w <<= 1)
2626         ;
2627       /* If T was -1, then W will be zero after the loop.  This is another
2628          case where T ends with ...111.  Handling this with (T + 1) and
2629          subtract 1 produces slightly better code and results in algorithm
2630          selection much faster than treating it like the ...0111 case
2631          below.  */
2632       if (w == 0
2633           || (w > 2
2634               /* Reject the case where t is 3.
2635                  Thus we prefer addition in that case.  */
2636               && t != 3))
2637         {
2638           /* T ends with ...111.  Multiply by (T + 1) and subtract 1.  */
2639
2640           op_cost = add_cost[mode];
2641           new_limit.cost = best_cost.cost - op_cost;
2642           new_limit.latency = best_cost.latency - op_cost;
2643           synth_mult (alg_in, t + 1, &new_limit, mode);
2644
2645           alg_in->cost.cost += op_cost;
2646           alg_in->cost.latency += op_cost;
2647           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2648             {
2649               struct algorithm *x;
2650               best_cost = alg_in->cost;
2651               x = alg_in, alg_in = best_alg, best_alg = x;
2652               best_alg->log[best_alg->ops] = 0;
2653               best_alg->op[best_alg->ops] = alg_sub_t_m2;
2654             }
2655         }
2656       else
2657         {
2658           /* T ends with ...01 or ...011.  Multiply by (T - 1) and add 1.  */
2659
2660           op_cost = add_cost[mode];
2661           new_limit.cost = best_cost.cost - op_cost;
2662           new_limit.latency = best_cost.latency - op_cost;
2663           synth_mult (alg_in, t - 1, &new_limit, mode);
2664
2665           alg_in->cost.cost += op_cost;
2666           alg_in->cost.latency += op_cost;
2667           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2668             {
2669               struct algorithm *x;
2670               best_cost = alg_in->cost;
2671               x = alg_in, alg_in = best_alg, best_alg = x;
2672               best_alg->log[best_alg->ops] = 0;
2673               best_alg->op[best_alg->ops] = alg_add_t_m2;
2674             }
2675         }
2676       if (cache_hit)
2677         goto done;
2678     }
2679
2680   /* Look for factors of t of the form
2681      t = q(2**m +- 1), 2 <= m <= floor(log2(t - 1)).
2682      If we find such a factor, we can multiply by t using an algorithm that
2683      multiplies by q, shift the result by m and add/subtract it to itself.
2684
2685      We search for large factors first and loop down, even if large factors
2686      are less probable than small; if we find a large factor we will find a
2687      good sequence quickly, and therefore be able to prune (by decreasing
2688      COST_LIMIT) the search.  */
2689
2690  do_alg_addsub_factor:
2691   for (m = floor_log2 (t - 1); m >= 2; m--)
2692     {
2693       unsigned HOST_WIDE_INT d;
2694
2695       d = ((unsigned HOST_WIDE_INT) 1 << m) + 1;
2696       if (t % d == 0 && t > d && m < maxm
2697           && (!cache_hit || cache_alg == alg_add_factor))
2698         {
2699           /* If the target has a cheap shift-and-add instruction use
2700              that in preference to a shift insn followed by an add insn.
2701              Assume that the shift-and-add is "atomic" with a latency
2702              equal to its cost, otherwise assume that on superscalar
2703              hardware the shift may be executed concurrently with the
2704              earlier steps in the algorithm.  */
2705           op_cost = add_cost[mode] + shift_cost[mode][m];
2706           if (shiftadd_cost[mode][m] < op_cost)
2707             {
2708               op_cost = shiftadd_cost[mode][m];
2709               op_latency = op_cost;
2710             }
2711           else
2712             op_latency = add_cost[mode];
2713
2714           new_limit.cost = best_cost.cost - op_cost;
2715           new_limit.latency = best_cost.latency - op_latency;
2716           synth_mult (alg_in, t / d, &new_limit, mode);
2717
2718           alg_in->cost.cost += op_cost;
2719           alg_in->cost.latency += op_latency;
2720           if (alg_in->cost.latency < op_cost)
2721             alg_in->cost.latency = op_cost;
2722           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2723             {
2724               struct algorithm *x;
2725               best_cost = alg_in->cost;
2726               x = alg_in, alg_in = best_alg, best_alg = x;
2727               best_alg->log[best_alg->ops] = m;
2728               best_alg->op[best_alg->ops] = alg_add_factor;
2729             }
2730           /* Other factors will have been taken care of in the recursion.  */
2731           break;
2732         }
2733
2734       d = ((unsigned HOST_WIDE_INT) 1 << m) - 1;
2735       if (t % d == 0 && t > d && m < maxm
2736           && (!cache_hit || cache_alg == alg_sub_factor))
2737         {
2738           /* If the target has a cheap shift-and-subtract insn use
2739              that in preference to a shift insn followed by a sub insn.
2740              Assume that the shift-and-sub is "atomic" with a latency
2741              equal to it's cost, otherwise assume that on superscalar
2742              hardware the shift may be executed concurrently with the
2743              earlier steps in the algorithm.  */
2744           op_cost = add_cost[mode] + shift_cost[mode][m];
2745           if (shiftsub_cost[mode][m] < op_cost)
2746             {
2747               op_cost = shiftsub_cost[mode][m];
2748               op_latency = op_cost;
2749             }
2750           else
2751             op_latency = add_cost[mode];
2752
2753           new_limit.cost = best_cost.cost - op_cost;
2754           new_limit.latency = best_cost.latency - op_latency;
2755           synth_mult (alg_in, t / d, &new_limit, mode);
2756
2757           alg_in->cost.cost += op_cost;
2758           alg_in->cost.latency += op_latency;
2759           if (alg_in->cost.latency < op_cost)
2760             alg_in->cost.latency = op_cost;
2761           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2762             {
2763               struct algorithm *x;
2764               best_cost = alg_in->cost;
2765               x = alg_in, alg_in = best_alg, best_alg = x;
2766               best_alg->log[best_alg->ops] = m;
2767               best_alg->op[best_alg->ops] = alg_sub_factor;
2768             }
2769           break;
2770         }
2771     }
2772   if (cache_hit)
2773     goto done;
2774
2775   /* Try shift-and-add (load effective address) instructions,
2776      i.e. do a*3, a*5, a*9.  */
2777   if ((t & 1) != 0)
2778     {
2779     do_alg_add_t2_m:
2780       q = t - 1;
2781       q = q & -q;
2782       m = exact_log2 (q);
2783       if (m >= 0 && m < maxm)
2784         {
2785           op_cost = shiftadd_cost[mode][m];
2786           new_limit.cost = best_cost.cost - op_cost;
2787           new_limit.latency = best_cost.latency - op_cost;
2788           synth_mult (alg_in, (t - 1) >> m, &new_limit, mode);
2789
2790           alg_in->cost.cost += op_cost;
2791           alg_in->cost.latency += op_cost;
2792           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2793             {
2794               struct algorithm *x;
2795               best_cost = alg_in->cost;
2796               x = alg_in, alg_in = best_alg, best_alg = x;
2797               best_alg->log[best_alg->ops] = m;
2798               best_alg->op[best_alg->ops] = alg_add_t2_m;
2799             }
2800         }
2801       if (cache_hit)
2802         goto done;
2803
2804     do_alg_sub_t2_m:
2805       q = t + 1;
2806       q = q & -q;
2807       m = exact_log2 (q);
2808       if (m >= 0 && m < maxm)
2809         {
2810           op_cost = shiftsub_cost[mode][m];
2811           new_limit.cost = best_cost.cost - op_cost;
2812           new_limit.latency = best_cost.latency - op_cost;
2813           synth_mult (alg_in, (t + 1) >> m, &new_limit, mode);
2814
2815           alg_in->cost.cost += op_cost;
2816           alg_in->cost.latency += op_cost;
2817           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2818             {
2819               struct algorithm *x;
2820               best_cost = alg_in->cost;
2821               x = alg_in, alg_in = best_alg, best_alg = x;
2822               best_alg->log[best_alg->ops] = m;
2823               best_alg->op[best_alg->ops] = alg_sub_t2_m;
2824             }
2825         }
2826       if (cache_hit)
2827         goto done;
2828     }
2829
2830  done:
2831   /* If best_cost has not decreased, we have not found any algorithm.  */
2832   if (!CHEAPER_MULT_COST (&best_cost, cost_limit))
2833     {
2834       /* We failed to find an algorithm.  Record alg_impossible for
2835          this case (that is, <T, MODE, COST_LIMIT>) so that next time
2836          we are asked to find an algorithm for T within the same or
2837          lower COST_LIMIT, we can immediately return to the
2838          caller.  */
2839       alg_hash[hash_index].t = t;
2840       alg_hash[hash_index].mode = mode;
2841       alg_hash[hash_index].alg = alg_impossible;
2842       alg_hash[hash_index].cost = *cost_limit;
2843       return;
2844     }
2845
2846   /* Cache the result.  */
2847   if (!cache_hit)
2848     {
2849       alg_hash[hash_index].t = t;
2850       alg_hash[hash_index].mode = mode;
2851       alg_hash[hash_index].alg = best_alg->op[best_alg->ops];
2852       alg_hash[hash_index].cost.cost = best_cost.cost;
2853       alg_hash[hash_index].cost.latency = best_cost.latency;
2854     }
2855
2856   /* If we are getting a too long sequence for `struct algorithm'
2857      to record, make this search fail.  */
2858   if (best_alg->ops == MAX_BITS_PER_WORD)
2859     return;
2860
2861   /* Copy the algorithm from temporary space to the space at alg_out.
2862      We avoid using structure assignment because the majority of
2863      best_alg is normally undefined, and this is a critical function.  */
2864   alg_out->ops = best_alg->ops + 1;
2865   alg_out->cost = best_cost;
2866   memcpy (alg_out->op, best_alg->op,
2867           alg_out->ops * sizeof *alg_out->op);
2868   memcpy (alg_out->log, best_alg->log,
2869           alg_out->ops * sizeof *alg_out->log);
2870 }
2871 \f
2872 /* Find the cheapest way of multiplying a value of mode MODE by VAL.
2873    Try three variations:
2874
2875        - a shift/add sequence based on VAL itself
2876        - a shift/add sequence based on -VAL, followed by a negation
2877        - a shift/add sequence based on VAL - 1, followed by an addition.
2878
2879    Return true if the cheapest of these cost less than MULT_COST,
2880    describing the algorithm in *ALG and final fixup in *VARIANT.  */
2881
2882 static bool
2883 choose_mult_variant (enum machine_mode mode, HOST_WIDE_INT val,
2884                      struct algorithm *alg, enum mult_variant *variant,
2885                      int mult_cost)
2886 {
2887   struct algorithm alg2;
2888   struct mult_cost limit;
2889   int op_cost;
2890
2891   /* Fail quickly for impossible bounds.  */
2892   if (mult_cost < 0)
2893     return false;
2894
2895   /* Ensure that mult_cost provides a reasonable upper bound.
2896      Any constant multiplication can be performed with less
2897      than 2 * bits additions.  */
2898   op_cost = 2 * GET_MODE_BITSIZE (mode) * add_cost[mode];
2899   if (mult_cost > op_cost)
2900     mult_cost = op_cost;
2901
2902   *variant = basic_variant;
2903   limit.cost = mult_cost;
2904   limit.latency = mult_cost;
2905   synth_mult (alg, val, &limit, mode);
2906
2907   /* This works only if the inverted value actually fits in an
2908      `unsigned int' */
2909   if (HOST_BITS_PER_INT >= GET_MODE_BITSIZE (mode))
2910     {
2911       op_cost = neg_cost[mode];
2912       if (MULT_COST_LESS (&alg->cost, mult_cost))
2913         {
2914           limit.cost = alg->cost.cost - op_cost;
2915           limit.latency = alg->cost.latency - op_cost;
2916         }
2917       else
2918         {
2919           limit.cost = mult_cost - op_cost;
2920           limit.latency = mult_cost - op_cost;
2921         }
2922
2923       synth_mult (&alg2, -val, &limit, mode);
2924       alg2.cost.cost += op_cost;
2925       alg2.cost.latency += op_cost;
2926       if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
2927         *alg = alg2, *variant = negate_variant;
2928     }
2929
2930   /* This proves very useful for division-by-constant.  */
2931   op_cost = add_cost[mode];
2932   if (MULT_COST_LESS (&alg->cost, mult_cost))
2933     {
2934       limit.cost = alg->cost.cost - op_cost;
2935       limit.latency = alg->cost.latency - op_cost;
2936     }
2937   else
2938     {
2939       limit.cost = mult_cost - op_cost;
2940       limit.latency = mult_cost - op_cost;
2941     }
2942
2943   synth_mult (&alg2, val - 1, &limit, mode);
2944   alg2.cost.cost += op_cost;
2945   alg2.cost.latency += op_cost;
2946   if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
2947     *alg = alg2, *variant = add_variant;
2948
2949   return MULT_COST_LESS (&alg->cost, mult_cost);
2950 }
2951
2952 /* A subroutine of expand_mult, used for constant multiplications.
2953    Multiply OP0 by VAL in mode MODE, storing the result in TARGET if
2954    convenient.  Use the shift/add sequence described by ALG and apply
2955    the final fixup specified by VARIANT.  */
2956
2957 static rtx
2958 expand_mult_const (enum machine_mode mode, rtx op0, HOST_WIDE_INT val,
2959                    rtx target, const struct algorithm *alg,
2960                    enum mult_variant variant)
2961 {
2962   HOST_WIDE_INT val_so_far;
2963   rtx insn, accum, tem;
2964   int opno;
2965   enum machine_mode nmode;
2966
2967   /* Avoid referencing memory over and over and invalid sharing
2968      on SUBREGs.  */
2969   op0 = force_reg (mode, op0);
2970
2971   /* ACCUM starts out either as OP0 or as a zero, depending on
2972      the first operation.  */
2973
2974   if (alg->op[0] == alg_zero)
2975     {
2976       accum = copy_to_mode_reg (mode, const0_rtx);
2977       val_so_far = 0;
2978     }
2979   else if (alg->op[0] == alg_m)
2980     {
2981       accum = copy_to_mode_reg (mode, op0);
2982       val_so_far = 1;
2983     }
2984   else
2985     gcc_unreachable ();
2986
2987   for (opno = 1; opno < alg->ops; opno++)
2988     {
2989       int log = alg->log[opno];
2990       rtx shift_subtarget = optimize ? 0 : accum;
2991       rtx add_target
2992         = (opno == alg->ops - 1 && target != 0 && variant != add_variant
2993            && !optimize)
2994           ? target : 0;
2995       rtx accum_target = optimize ? 0 : accum;
2996
2997       switch (alg->op[opno])
2998         {
2999         case alg_shift:
3000           accum = expand_shift (LSHIFT_EXPR, mode, accum,
3001                                 build_int_cst (NULL_TREE, log),
3002                                 NULL_RTX, 0);
3003           val_so_far <<= log;
3004           break;
3005
3006         case alg_add_t_m2:
3007           tem = expand_shift (LSHIFT_EXPR, mode, op0,
3008                               build_int_cst (NULL_TREE, log),
3009                               NULL_RTX, 0);
3010           accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
3011                                  add_target ? add_target : accum_target);
3012           val_so_far += (HOST_WIDE_INT) 1 << log;
3013           break;
3014
3015         case alg_sub_t_m2:
3016           tem = expand_shift (LSHIFT_EXPR, mode, op0,
3017                               build_int_cst (NULL_TREE, log),
3018                               NULL_RTX, 0);
3019           accum = force_operand (gen_rtx_MINUS (mode, accum, tem),
3020                                  add_target ? add_target : accum_target);
3021           val_so_far -= (HOST_WIDE_INT) 1 << log;
3022           break;
3023
3024         case alg_add_t2_m:
3025           accum = expand_shift (LSHIFT_EXPR, mode, accum,
3026                                 build_int_cst (NULL_TREE, log),
3027                                 shift_subtarget,
3028                                 0);
3029           accum = force_operand (gen_rtx_PLUS (mode, accum, op0),
3030                                  add_target ? add_target : accum_target);
3031           val_so_far = (val_so_far << log) + 1;
3032           break;
3033
3034         case alg_sub_t2_m:
3035           accum = expand_shift (LSHIFT_EXPR, mode, accum,
3036                                 build_int_cst (NULL_TREE, log),
3037                                 shift_subtarget, 0);
3038           accum = force_operand (gen_rtx_MINUS (mode, accum, op0),
3039                                  add_target ? add_target : accum_target);
3040           val_so_far = (val_so_far << log) - 1;
3041           break;
3042
3043         case alg_add_factor:
3044           tem = expand_shift (LSHIFT_EXPR, mode, accum,
3045                               build_int_cst (NULL_TREE, log),
3046                               NULL_RTX, 0);
3047           accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
3048                                  add_target ? add_target : accum_target);
3049           val_so_far += val_so_far << log;
3050           break;
3051
3052         case alg_sub_factor:
3053           tem = expand_shift (LSHIFT_EXPR, mode, accum,
3054                               build_int_cst (NULL_TREE, log),
3055                               NULL_RTX, 0);
3056           accum = force_operand (gen_rtx_MINUS (mode, tem, accum),
3057                                  (add_target
3058                                   ? add_target : (optimize ? 0 : tem)));
3059           val_so_far = (val_so_far << log) - val_so_far;
3060           break;
3061
3062         default:
3063           gcc_unreachable ();
3064         }
3065
3066       /* Write a REG_EQUAL note on the last insn so that we can cse
3067          multiplication sequences.  Note that if ACCUM is a SUBREG,
3068          we've set the inner register and must properly indicate
3069          that.  */
3070
3071       tem = op0, nmode = mode;
3072       if (GET_CODE (accum) == SUBREG)
3073         {
3074           nmode = GET_MODE (SUBREG_REG (accum));
3075           tem = gen_lowpart (nmode, op0);
3076         }
3077
3078       insn = get_last_insn ();
3079       set_unique_reg_note (insn, REG_EQUAL,
3080                            gen_rtx_MULT (nmode, tem,
3081                                          GEN_INT (val_so_far)));
3082     }
3083
3084   if (variant == negate_variant)
3085     {
3086       val_so_far = -val_so_far;
3087       accum = expand_unop (mode, neg_optab, accum, target, 0);
3088     }
3089   else if (variant == add_variant)
3090     {
3091       val_so_far = val_so_far + 1;
3092       accum = force_operand (gen_rtx_PLUS (mode, accum, op0), target);
3093     }
3094
3095   /* Compare only the bits of val and val_so_far that are significant
3096      in the result mode, to avoid sign-/zero-extension confusion.  */
3097   val &= GET_MODE_MASK (mode);
3098   val_so_far &= GET_MODE_MASK (mode);
3099   gcc_assert (val == val_so_far);
3100
3101   return accum;
3102 }
3103
3104 /* Perform a multiplication and return an rtx for the result.
3105    MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3106    TARGET is a suggestion for where to store the result (an rtx).
3107
3108    We check specially for a constant integer as OP1.
3109    If you want this check for OP0 as well, then before calling
3110    you should swap the two operands if OP0 would be constant.  */
3111
3112 rtx
3113 expand_mult (enum machine_mode mode, rtx op0, rtx op1, rtx target,
3114              int unsignedp)
3115 {
3116   enum mult_variant variant;
3117   struct algorithm algorithm;
3118   int max_cost;
3119
3120   /* Handling const0_rtx here allows us to use zero as a rogue value for
3121      coeff below.  */
3122   if (op1 == const0_rtx)
3123     return const0_rtx;
3124   if (op1 == const1_rtx)
3125     return op0;
3126   if (op1 == constm1_rtx)
3127     return expand_unop (mode,
3128                         GET_MODE_CLASS (mode) == MODE_INT
3129                         && !unsignedp && flag_trapv
3130                         ? negv_optab : neg_optab,
3131                         op0, target, 0);
3132
3133   /* These are the operations that are potentially turned into a sequence
3134      of shifts and additions.  */
3135   if (SCALAR_INT_MODE_P (mode)
3136       && (unsignedp || !flag_trapv))
3137     {
3138       HOST_WIDE_INT coeff = 0;
3139       rtx fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3140
3141       /* synth_mult does an `unsigned int' multiply.  As long as the mode is
3142          less than or equal in size to `unsigned int' this doesn't matter.
3143          If the mode is larger than `unsigned int', then synth_mult works
3144          only if the constant value exactly fits in an `unsigned int' without
3145          any truncation.  This means that multiplying by negative values does
3146          not work; results are off by 2^32 on a 32 bit machine.  */
3147
3148       if (GET_CODE (op1) == CONST_INT)
3149         {
3150           /* Attempt to handle multiplication of DImode values by negative
3151              coefficients, by performing the multiplication by a positive
3152              multiplier and then inverting the result.  */
3153           if (INTVAL (op1) < 0
3154               && GET_MODE_BITSIZE (mode) > HOST_BITS_PER_WIDE_INT)
3155             {
3156               /* Its safe to use -INTVAL (op1) even for INT_MIN, as the
3157                  result is interpreted as an unsigned coefficient.
3158                  Exclude cost of op0 from max_cost to match the cost
3159                  calculation of the synth_mult.  */
3160               max_cost = rtx_cost (gen_rtx_MULT (mode, fake_reg, op1), SET)
3161                          - neg_cost[mode];
3162               if (max_cost > 0
3163                   && choose_mult_variant (mode, -INTVAL (op1), &algorithm,
3164                                           &variant, max_cost))
3165                 {
3166                   rtx temp = expand_mult_const (mode, op0, -INTVAL (op1),
3167                                                 NULL_RTX, &algorithm,
3168                                                 variant);
3169                   return expand_unop (mode, neg_optab, temp, target, 0);
3170                 }
3171             }
3172           else coeff = INTVAL (op1);
3173         }
3174       else if (GET_CODE (op1) == CONST_DOUBLE)
3175         {
3176           /* If we are multiplying in DImode, it may still be a win
3177              to try to work with shifts and adds.  */
3178           if (CONST_DOUBLE_HIGH (op1) == 0)
3179             coeff = CONST_DOUBLE_LOW (op1);
3180           else if (CONST_DOUBLE_LOW (op1) == 0
3181                    && EXACT_POWER_OF_2_OR_ZERO_P (CONST_DOUBLE_HIGH (op1)))
3182             {
3183               int shift = floor_log2 (CONST_DOUBLE_HIGH (op1))
3184                           + HOST_BITS_PER_WIDE_INT;
3185               return expand_shift (LSHIFT_EXPR, mode, op0,
3186                                    build_int_cst (NULL_TREE, shift),
3187                                    target, unsignedp);
3188             }
3189         }
3190
3191       /* We used to test optimize here, on the grounds that it's better to
3192          produce a smaller program when -O is not used.  But this causes
3193          such a terrible slowdown sometimes that it seems better to always
3194          use synth_mult.  */
3195       if (coeff != 0)
3196         {
3197           /* Special case powers of two.  */
3198           if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3199             return expand_shift (LSHIFT_EXPR, mode, op0,
3200                                  build_int_cst (NULL_TREE, floor_log2 (coeff)),
3201                                  target, unsignedp);
3202
3203           /* Exclude cost of op0 from max_cost to match the cost
3204              calculation of the synth_mult.  */
3205           max_cost = rtx_cost (gen_rtx_MULT (mode, fake_reg, op1), SET);
3206           if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3207                                    max_cost))
3208             return expand_mult_const (mode, op0, coeff, target,
3209                                       &algorithm, variant);
3210         }
3211     }
3212
3213   if (GET_CODE (op0) == CONST_DOUBLE)
3214     {
3215       rtx temp = op0;
3216       op0 = op1;
3217       op1 = temp;
3218     }
3219
3220   /* Expand x*2.0 as x+x.  */
3221   if (GET_CODE (op1) == CONST_DOUBLE
3222       && SCALAR_FLOAT_MODE_P (mode))
3223     {
3224       REAL_VALUE_TYPE d;
3225       REAL_VALUE_FROM_CONST_DOUBLE (d, op1);
3226
3227       if (REAL_VALUES_EQUAL (d, dconst2))
3228         {
3229           op0 = force_reg (GET_MODE (op0), op0);
3230           return expand_binop (mode, add_optab, op0, op0,
3231                                target, unsignedp, OPTAB_LIB_WIDEN);
3232         }
3233     }
3234
3235   /* This used to use umul_optab if unsigned, but for non-widening multiply
3236      there is no difference between signed and unsigned.  */
3237   op0 = expand_binop (mode,
3238                       ! unsignedp
3239                       && flag_trapv && (GET_MODE_CLASS(mode) == MODE_INT)
3240                       ? smulv_optab : smul_optab,
3241                       op0, op1, target, unsignedp, OPTAB_LIB_WIDEN);
3242   gcc_assert (op0);
3243   return op0;
3244 }
3245 \f
3246 /* Return the smallest n such that 2**n >= X.  */
3247
3248 int
3249 ceil_log2 (unsigned HOST_WIDE_INT x)
3250 {
3251   return floor_log2 (x - 1) + 1;
3252 }
3253
3254 /* Choose a minimal N + 1 bit approximation to 1/D that can be used to
3255    replace division by D, and put the least significant N bits of the result
3256    in *MULTIPLIER_PTR and return the most significant bit.
3257
3258    The width of operations is N (should be <= HOST_BITS_PER_WIDE_INT), the
3259    needed precision is in PRECISION (should be <= N).
3260
3261    PRECISION should be as small as possible so this function can choose
3262    multiplier more freely.
3263
3264    The rounded-up logarithm of D is placed in *lgup_ptr.  A shift count that
3265    is to be used for a final right shift is placed in *POST_SHIFT_PTR.
3266
3267    Using this function, x/D will be equal to (x * m) >> (*POST_SHIFT_PTR),
3268    where m is the full HOST_BITS_PER_WIDE_INT + 1 bit multiplier.  */
3269
3270 static
3271 unsigned HOST_WIDE_INT
3272 choose_multiplier (unsigned HOST_WIDE_INT d, int n, int precision,
3273                    rtx *multiplier_ptr, int *post_shift_ptr, int *lgup_ptr)
3274 {
3275   HOST_WIDE_INT mhigh_hi, mlow_hi;
3276   unsigned HOST_WIDE_INT mhigh_lo, mlow_lo;
3277   int lgup, post_shift;
3278   int pow, pow2;
3279   unsigned HOST_WIDE_INT nl, dummy1;
3280   HOST_WIDE_INT nh, dummy2;
3281
3282   /* lgup = ceil(log2(divisor)); */
3283   lgup = ceil_log2 (d);
3284
3285   gcc_assert (lgup <= n);
3286
3287   pow = n + lgup;
3288   pow2 = n + lgup - precision;
3289
3290   /* We could handle this with some effort, but this case is much
3291      better handled directly with a scc insn, so rely on caller using
3292      that.  */
3293   gcc_assert (pow != 2 * HOST_BITS_PER_WIDE_INT);
3294
3295   /* mlow = 2^(N + lgup)/d */
3296  if (pow >= HOST_BITS_PER_WIDE_INT)
3297     {
3298       nh = (HOST_WIDE_INT) 1 << (pow - HOST_BITS_PER_WIDE_INT);
3299       nl = 0;
3300     }
3301   else
3302     {
3303       nh = 0;
3304       nl = (unsigned HOST_WIDE_INT) 1 << pow;
3305     }
3306   div_and_round_double (TRUNC_DIV_EXPR, 1, nl, nh, d, (HOST_WIDE_INT) 0,
3307                         &mlow_lo, &mlow_hi, &dummy1, &dummy2);
3308
3309   /* mhigh = (2^(N + lgup) + 2^N + lgup - precision)/d */
3310   if (pow2 >= HOST_BITS_PER_WIDE_INT)
3311     nh |= (HOST_WIDE_INT) 1 << (pow2 - HOST_BITS_PER_WIDE_INT);
3312   else
3313     nl |= (unsigned HOST_WIDE_INT) 1 << pow2;
3314   div_and_round_double (TRUNC_DIV_EXPR, 1, nl, nh, d, (HOST_WIDE_INT) 0,
3315                         &mhigh_lo, &mhigh_hi, &dummy1, &dummy2);
3316
3317   gcc_assert (!mhigh_hi || nh - d < d);
3318   gcc_assert (mhigh_hi <= 1 && mlow_hi <= 1);
3319   /* Assert that mlow < mhigh.  */
3320   gcc_assert (mlow_hi < mhigh_hi
3321               || (mlow_hi == mhigh_hi && mlow_lo < mhigh_lo));
3322
3323   /* If precision == N, then mlow, mhigh exceed 2^N
3324      (but they do not exceed 2^(N+1)).  */
3325
3326   /* Reduce to lowest terms.  */
3327   for (post_shift = lgup; post_shift > 0; post_shift--)
3328     {
3329       unsigned HOST_WIDE_INT ml_lo = (mlow_hi << (HOST_BITS_PER_WIDE_INT - 1)) | (mlow_lo >> 1);
3330       unsigned HOST_WIDE_INT mh_lo = (mhigh_hi << (HOST_BITS_PER_WIDE_INT - 1)) | (mhigh_lo >> 1);
3331       if (ml_lo >= mh_lo)
3332         break;
3333
3334       mlow_hi = 0;
3335       mlow_lo = ml_lo;
3336       mhigh_hi = 0;
3337       mhigh_lo = mh_lo;
3338     }
3339
3340   *post_shift_ptr = post_shift;
3341   *lgup_ptr = lgup;
3342   if (n < HOST_BITS_PER_WIDE_INT)
3343     {
3344       unsigned HOST_WIDE_INT mask = ((unsigned HOST_WIDE_INT) 1 << n) - 1;
3345       *multiplier_ptr = GEN_INT (mhigh_lo & mask);
3346       return mhigh_lo >= mask;
3347     }
3348   else
3349     {
3350       *multiplier_ptr = GEN_INT (mhigh_lo);
3351       return mhigh_hi;
3352     }
3353 }
3354
3355 /* Compute the inverse of X mod 2**n, i.e., find Y such that X * Y is
3356    congruent to 1 (mod 2**N).  */
3357
3358 static unsigned HOST_WIDE_INT
3359 invert_mod2n (unsigned HOST_WIDE_INT x, int n)
3360 {
3361   /* Solve x*y == 1 (mod 2^n), where x is odd.  Return y.  */
3362
3363   /* The algorithm notes that the choice y = x satisfies
3364      x*y == 1 mod 2^3, since x is assumed odd.
3365      Each iteration doubles the number of bits of significance in y.  */
3366
3367   unsigned HOST_WIDE_INT mask;
3368   unsigned HOST_WIDE_INT y = x;
3369   int nbit = 3;
3370
3371   mask = (n == HOST_BITS_PER_WIDE_INT
3372           ? ~(unsigned HOST_WIDE_INT) 0
3373           : ((unsigned HOST_WIDE_INT) 1 << n) - 1);
3374
3375   while (nbit < n)
3376     {
3377       y = y * (2 - x*y) & mask;         /* Modulo 2^N */
3378       nbit *= 2;
3379     }
3380   return y;
3381 }
3382
3383 /* Emit code to adjust ADJ_OPERAND after multiplication of wrong signedness
3384    flavor of OP0 and OP1.  ADJ_OPERAND is already the high half of the
3385    product OP0 x OP1.  If UNSIGNEDP is nonzero, adjust the signed product
3386    to become unsigned, if UNSIGNEDP is zero, adjust the unsigned product to
3387    become signed.
3388
3389    The result is put in TARGET if that is convenient.
3390
3391    MODE is the mode of operation.  */
3392
3393 rtx
3394 expand_mult_highpart_adjust (enum machine_mode mode, rtx adj_operand, rtx op0,
3395                              rtx op1, rtx target, int unsignedp)
3396 {
3397   rtx tem;
3398   enum rtx_code adj_code = unsignedp ? PLUS : MINUS;
3399
3400   tem = expand_shift (RSHIFT_EXPR, mode, op0,
3401                       build_int_cst (NULL_TREE, GET_MODE_BITSIZE (mode) - 1),
3402                       NULL_RTX, 0);
3403   tem = expand_and (mode, tem, op1, NULL_RTX);
3404   adj_operand
3405     = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3406                      adj_operand);
3407
3408   tem = expand_shift (RSHIFT_EXPR, mode, op1,
3409                       build_int_cst (NULL_TREE, GET_MODE_BITSIZE (mode) - 1),
3410                       NULL_RTX, 0);
3411   tem = expand_and (mode, tem, op0, NULL_RTX);
3412   target = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3413                           target);
3414
3415   return target;
3416 }
3417
3418 /* Subroutine of expand_mult_highpart.  Return the MODE high part of OP.  */
3419
3420 static rtx
3421 extract_high_half (enum machine_mode mode, rtx op)
3422 {
3423   enum machine_mode wider_mode;
3424
3425   if (mode == word_mode)
3426     return gen_highpart (mode, op);
3427
3428   gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3429
3430   wider_mode = GET_MODE_WIDER_MODE (mode);
3431   op = expand_shift (RSHIFT_EXPR, wider_mode, op,
3432                      build_int_cst (NULL_TREE, GET_MODE_BITSIZE (mode)), 0, 1);
3433   return convert_modes (mode, wider_mode, op, 0);
3434 }
3435
3436 /* Like expand_mult_highpart, but only consider using a multiplication
3437    optab.  OP1 is an rtx for the constant operand.  */
3438
3439 static rtx
3440 expand_mult_highpart_optab (enum machine_mode mode, rtx op0, rtx op1,
3441                             rtx target, int unsignedp, int max_cost)
3442 {
3443   rtx narrow_op1 = gen_int_mode (INTVAL (op1), mode);
3444   enum machine_mode wider_mode;
3445   optab moptab;
3446   rtx tem;
3447   int size;
3448
3449   gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3450
3451   wider_mode = GET_MODE_WIDER_MODE (mode);
3452   size = GET_MODE_BITSIZE (mode);
3453
3454   /* Firstly, try using a multiplication insn that only generates the needed
3455      high part of the product, and in the sign flavor of unsignedp.  */
3456   if (mul_highpart_cost[mode] < max_cost)
3457     {
3458       moptab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
3459       tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3460                           unsignedp, OPTAB_DIRECT);
3461       if (tem)
3462         return tem;
3463     }
3464
3465   /* Secondly, same as above, but use sign flavor opposite of unsignedp.
3466      Need to adjust the result after the multiplication.  */
3467   if (size - 1 < BITS_PER_WORD
3468       && (mul_highpart_cost[mode] + 2 * shift_cost[mode][size-1]
3469           + 4 * add_cost[mode] < max_cost))
3470     {
3471       moptab = unsignedp ? smul_highpart_optab : umul_highpart_optab;
3472       tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3473                           unsignedp, OPTAB_DIRECT);
3474       if (tem)
3475         /* We used the wrong signedness.  Adjust the result.  */
3476         return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3477                                             tem, unsignedp);
3478     }
3479
3480   /* Try widening multiplication.  */
3481   moptab = unsignedp ? umul_widen_optab : smul_widen_optab;
3482   if (optab_handler (moptab, wider_mode)->insn_code != CODE_FOR_nothing
3483       && mul_widen_cost[wider_mode] < max_cost)
3484     {
3485       tem = expand_binop (wider_mode, moptab, op0, narrow_op1, 0,
3486                           unsignedp, OPTAB_WIDEN);
3487       if (tem)
3488         return extract_high_half (mode, tem);
3489     }
3490
3491   /* Try widening the mode and perform a non-widening multiplication.  */
3492   if (optab_handler (smul_optab, wider_mode)->insn_code != CODE_FOR_nothing
3493       && size - 1 < BITS_PER_WORD
3494       && mul_cost[wider_mode] + shift_cost[mode][size-1] < max_cost)
3495     {
3496       rtx insns, wop0, wop1;
3497
3498       /* We need to widen the operands, for example to ensure the
3499          constant multiplier is correctly sign or zero extended.
3500          Use a sequence to clean-up any instructions emitted by
3501          the conversions if things don't work out.  */
3502       start_sequence ();
3503       wop0 = convert_modes (wider_mode, mode, op0, unsignedp);
3504       wop1 = convert_modes (wider_mode, mode, op1, unsignedp);
3505       tem = expand_binop (wider_mode, smul_optab, wop0, wop1, 0,
3506                           unsignedp, OPTAB_WIDEN);
3507       insns = get_insns ();
3508       end_sequence ();
3509
3510       if (tem)
3511         {
3512           emit_insn (insns);
3513           return extract_high_half (mode, tem);
3514         }
3515     }
3516
3517   /* Try widening multiplication of opposite signedness, and adjust.  */
3518   moptab = unsignedp ? smul_widen_optab : umul_widen_optab;
3519   if (optab_handler (moptab, wider_mode)->insn_code != CODE_FOR_nothing
3520       && size - 1 < BITS_PER_WORD
3521       && (mul_widen_cost[wider_mode] + 2 * shift_cost[mode][size-1]
3522           + 4 * add_cost[mode] < max_cost))
3523     {
3524       tem = expand_binop (wider_mode, moptab, op0, narrow_op1,
3525                           NULL_RTX, ! unsignedp, OPTAB_WIDEN);
3526       if (tem != 0)
3527         {
3528           tem = extract_high_half (mode, tem);
3529           /* We used the wrong signedness.  Adjust the result.  */
3530           return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3531                                               target, unsignedp);
3532         }
3533     }
3534
3535   return 0;
3536 }
3537
3538 /* Emit code to multiply OP0 and OP1 (where OP1 is an integer constant),
3539    putting the high half of the result in TARGET if that is convenient,
3540    and return where the result is.  If the operation can not be performed,
3541    0 is returned.
3542
3543    MODE is the mode of operation and result.
3544
3545    UNSIGNEDP nonzero means unsigned multiply.
3546
3547    MAX_COST is the total allowed cost for the expanded RTL.  */
3548
3549 static rtx
3550 expand_mult_highpart (enum machine_mode mode, rtx op0, rtx op1,
3551                       rtx target, int unsignedp, int max_cost)
3552 {
3553   enum machine_mode wider_mode = GET_MODE_WIDER_MODE (mode);
3554   unsigned HOST_WIDE_INT cnst1;
3555   int extra_cost;
3556   bool sign_adjust = false;
3557   enum mult_variant variant;
3558   struct algorithm alg;
3559   rtx tem;
3560
3561   gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3562   /* We can't support modes wider than HOST_BITS_PER_INT.  */
3563   gcc_assert (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT);
3564
3565   cnst1 = INTVAL (op1) & GET_MODE_MASK (mode);
3566
3567   /* We can't optimize modes wider than BITS_PER_WORD.
3568      ??? We might be able to perform double-word arithmetic if
3569      mode == word_mode, however all the cost calculations in
3570      synth_mult etc. assume single-word operations.  */
3571   if (GET_MODE_BITSIZE (wider_mode) > BITS_PER_WORD)
3572     return expand_mult_highpart_optab (mode, op0, op1, target,
3573                                        unsignedp, max_cost);
3574
3575   extra_cost = shift_cost[mode][GET_MODE_BITSIZE (mode) - 1];
3576
3577   /* Check whether we try to multiply by a negative constant.  */
3578   if (!unsignedp && ((cnst1 >> (GET_MODE_BITSIZE (mode) - 1)) & 1))
3579     {
3580       sign_adjust = true;
3581       extra_cost += add_cost[mode];
3582     }
3583
3584   /* See whether shift/add multiplication is cheap enough.  */
3585   if (choose_mult_variant (wider_mode, cnst1, &alg, &variant,
3586                            max_cost - extra_cost))
3587     {
3588       /* See whether the specialized multiplication optabs are
3589          cheaper than the shift/add version.  */
3590       tem = expand_mult_highpart_optab (mode, op0, op1, target, unsignedp,
3591                                         alg.cost.cost + extra_cost);
3592       if (tem)
3593         return tem;
3594
3595       tem = convert_to_mode (wider_mode, op0, unsignedp);
3596       tem = expand_mult_const (wider_mode, tem, cnst1, 0, &alg, variant);
3597       tem = extract_high_half (mode, tem);
3598
3599       /* Adjust result for signedness.  */
3600       if (sign_adjust)
3601         tem = force_operand (gen_rtx_MINUS (mode, tem, op0), tem);
3602
3603       return tem;
3604     }
3605   return expand_mult_highpart_optab (mode, op0, op1, target,
3606                                      unsignedp, max_cost);
3607 }
3608
3609
3610 /* Expand signed modulus of OP0 by a power of two D in mode MODE.  */
3611
3612 static rtx
3613 expand_smod_pow2 (enum machine_mode mode, rtx op0, HOST_WIDE_INT d)
3614 {
3615   unsigned HOST_WIDE_INT masklow, maskhigh;
3616   rtx result, temp, shift, label;
3617   int logd;
3618
3619   logd = floor_log2 (d);
3620   result = gen_reg_rtx (mode);
3621
3622   /* Avoid conditional branches when they're expensive.  */
3623   if (BRANCH_COST >= 2
3624       && !optimize_size)
3625     {
3626       rtx signmask = emit_store_flag (result, LT, op0, const0_rtx,
3627                                       mode, 0, -1);
3628       if (signmask)
3629         {
3630           signmask = force_reg (mode, signmask);
3631           masklow = ((HOST_WIDE_INT) 1 << logd) - 1;
3632           shift = GEN_INT (GET_MODE_BITSIZE (mode) - logd);
3633
3634           /* Use the rtx_cost of a LSHIFTRT instruction to determine
3635              which instruction sequence to use.  If logical right shifts
3636              are expensive the use 2 XORs, 2 SUBs and an AND, otherwise
3637              use a LSHIFTRT, 1 ADD, 1 SUB and an AND.  */
3638
3639           temp = gen_rtx_LSHIFTRT (mode, result, shift);
3640           if (optab_handler (lshr_optab, mode)->insn_code == CODE_FOR_nothing
3641               || rtx_cost (temp, SET) > COSTS_N_INSNS (2))
3642             {
3643               temp = expand_binop (mode, xor_optab, op0, signmask,
3644                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3645               temp = expand_binop (mode, sub_optab, temp, signmask,
3646                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3647               temp = expand_binop (mode, and_optab, temp, GEN_INT (masklow),
3648                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3649               temp = expand_binop (mode, xor_optab, temp, signmask,
3650                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3651               temp = expand_binop (mode, sub_optab, temp, signmask,
3652                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3653             }
3654           else
3655             {
3656               signmask = expand_binop (mode, lshr_optab, signmask, shift,
3657                                        NULL_RTX, 1, OPTAB_LIB_WIDEN);
3658               signmask = force_reg (mode, signmask);
3659
3660               temp = expand_binop (mode, add_optab, op0, signmask,
3661                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3662               temp = expand_binop (mode, and_optab, temp, GEN_INT (masklow),
3663                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3664               temp = expand_binop (mode, sub_optab, temp, signmask,
3665                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3666             }
3667           return temp;
3668         }
3669     }
3670
3671   /* Mask contains the mode's signbit and the significant bits of the
3672      modulus.  By including the signbit in the operation, many targets
3673      can avoid an explicit compare operation in the following comparison
3674      against zero.  */
3675
3676   masklow = ((HOST_WIDE_INT) 1 << logd) - 1;
3677   if (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
3678     {
3679       masklow |= (HOST_WIDE_INT) -1 << (GET_MODE_BITSIZE (mode) - 1);
3680       maskhigh = -1;
3681     }
3682   else
3683     maskhigh = (HOST_WIDE_INT) -1
3684                  << (GET_MODE_BITSIZE (mode) - HOST_BITS_PER_WIDE_INT - 1);
3685
3686   temp = expand_binop (mode, and_optab, op0,
3687                        immed_double_const (masklow, maskhigh, mode),
3688                        result, 1, OPTAB_LIB_WIDEN);
3689   if (temp != result)
3690     emit_move_insn (result, temp);
3691
3692   label = gen_label_rtx ();
3693   do_cmp_and_jump (result, const0_rtx, GE, mode, label);
3694
3695   temp = expand_binop (mode, sub_optab, result, const1_rtx, result,
3696                        0, OPTAB_LIB_WIDEN);
3697   masklow = (HOST_WIDE_INT) -1 << logd;
3698   maskhigh = -1;
3699   temp = expand_binop (mode, ior_optab, temp,
3700                        immed_double_const (masklow, maskhigh, mode),
3701                        result, 1, OPTAB_LIB_WIDEN);
3702   temp = expand_binop (mode, add_optab, temp, const1_rtx, result,
3703                        0, OPTAB_LIB_WIDEN);
3704   if (temp != result)
3705     emit_move_insn (result, temp);
3706   emit_label (label);
3707   return result;
3708 }
3709
3710 /* Expand signed division of OP0 by a power of two D in mode MODE.
3711    This routine is only called for positive values of D.  */
3712
3713 static rtx
3714 expand_sdiv_pow2 (enum machine_mode mode, rtx op0, HOST_WIDE_INT d)
3715 {
3716   rtx temp, label;
3717   tree shift;
3718   int logd;
3719
3720   logd = floor_log2 (d);
3721   shift = build_int_cst (NULL_TREE, logd);
3722
3723   if (d == 2 && BRANCH_COST >= 1)
3724     {
3725       temp = gen_reg_rtx (mode);
3726       temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, 1);
3727       temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3728                            0, OPTAB_LIB_WIDEN);
3729       return expand_shift (RSHIFT_EXPR, mode, temp, shift, NULL_RTX, 0);
3730     }
3731
3732 #ifdef HAVE_conditional_move
3733   if (BRANCH_COST >= 2)
3734     {
3735       rtx temp2;
3736
3737       /* ??? emit_conditional_move forces a stack adjustment via
3738          compare_from_rtx so, if the sequence is discarded, it will
3739          be lost.  Do it now instead.  */
3740       do_pending_stack_adjust ();
3741
3742       start_sequence ();
3743       temp2 = copy_to_mode_reg (mode, op0);
3744       temp = expand_binop (mode, add_optab, temp2, GEN_INT (d-1),
3745                            NULL_RTX, 0, OPTAB_LIB_WIDEN);
3746       temp = force_reg (mode, temp);
3747
3748       /* Construct "temp2 = (temp2 < 0) ? temp : temp2".  */
3749       temp2 = emit_conditional_move (temp2, LT, temp2, const0_rtx,
3750                                      mode, temp, temp2, mode, 0);
3751       if (temp2)
3752         {
3753           rtx seq = get_insns ();
3754           end_sequence ();
3755           emit_insn (seq);
3756           return expand_shift (RSHIFT_EXPR, mode, temp2, shift, NULL_RTX, 0);
3757         }
3758       end_sequence ();
3759     }
3760 #endif
3761
3762   if (BRANCH_COST >= 2)
3763     {
3764       int ushift = GET_MODE_BITSIZE (mode) - logd;
3765
3766       temp = gen_reg_rtx (mode);
3767       temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, -1);
3768       if (shift_cost[mode][ushift] > COSTS_N_INSNS (1))
3769         temp = expand_binop (mode, and_optab, temp, GEN_INT (d - 1),
3770                              NULL_RTX, 0, OPTAB_LIB_WIDEN);
3771       else
3772         temp = expand_shift (RSHIFT_EXPR, mode, temp,
3773                              build_int_cst (NULL_TREE, ushift),
3774                              NULL_RTX, 1);
3775       temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3776                            0, OPTAB_LIB_WIDEN);
3777       return expand_shift (RSHIFT_EXPR, mode, temp, shift, NULL_RTX, 0);
3778     }
3779
3780   label = gen_label_rtx ();
3781   temp = copy_to_mode_reg (mode, op0);
3782   do_cmp_and_jump (temp, const0_rtx, GE, mode, label);
3783   expand_inc (temp, GEN_INT (d - 1));
3784   emit_label (label);
3785   return expand_shift (RSHIFT_EXPR, mode, temp, shift, NULL_RTX, 0);
3786 }
3787 \f
3788 /* Emit the code to divide OP0 by OP1, putting the result in TARGET
3789    if that is convenient, and returning where the result is.
3790    You may request either the quotient or the remainder as the result;
3791    specify REM_FLAG nonzero to get the remainder.
3792
3793    CODE is the expression code for which kind of division this is;
3794    it controls how rounding is done.  MODE is the machine mode to use.
3795    UNSIGNEDP nonzero means do unsigned division.  */
3796
3797 /* ??? For CEIL_MOD_EXPR, can compute incorrect remainder with ANDI
3798    and then correct it by or'ing in missing high bits
3799    if result of ANDI is nonzero.
3800    For ROUND_MOD_EXPR, can use ANDI and then sign-extend the result.
3801    This could optimize to a bfexts instruction.
3802    But C doesn't use these operations, so their optimizations are
3803    left for later.  */
3804 /* ??? For modulo, we don't actually need the highpart of the first product,
3805    the low part will do nicely.  And for small divisors, the second multiply
3806    can also be a low-part only multiply or even be completely left out.
3807    E.g. to calculate the remainder of a division by 3 with a 32 bit
3808    multiply, multiply with 0x55555556 and extract the upper two bits;
3809    the result is exact for inputs up to 0x1fffffff.
3810    The input range can be reduced by using cross-sum rules.
3811    For odd divisors >= 3, the following table gives right shift counts
3812    so that if a number is shifted by an integer multiple of the given
3813    amount, the remainder stays the same:
3814    2, 4, 3, 6, 10, 12, 4, 8, 18, 6, 11, 20, 18, 0, 5, 10, 12, 0, 12, 20,
3815    14, 12, 23, 21, 8, 0, 20, 18, 0, 0, 6, 12, 0, 22, 0, 18, 20, 30, 0, 0,
3816    0, 8, 0, 11, 12, 10, 36, 0, 30, 0, 0, 12, 0, 0, 0, 0, 44, 12, 24, 0,
3817    20, 0, 7, 14, 0, 18, 36, 0, 0, 46, 60, 0, 42, 0, 15, 24, 20, 0, 0, 33,
3818    0, 20, 0, 0, 18, 0, 60, 0, 0, 0, 0, 0, 40, 18, 0, 0, 12
3819
3820    Cross-sum rules for even numbers can be derived by leaving as many bits
3821    to the right alone as the divisor has zeros to the right.
3822    E.g. if x is an unsigned 32 bit number:
3823    (x mod 12) == (((x & 1023) + ((x >> 8) & ~3)) * 0x15555558 >> 2 * 3) >> 28
3824    */
3825
3826 rtx
3827 expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode,
3828                rtx op0, rtx op1, rtx target, int unsignedp)
3829 {
3830   enum machine_mode compute_mode;
3831   rtx tquotient;
3832   rtx quotient = 0, remainder = 0;
3833   rtx last;
3834   int size;
3835   rtx insn, set;
3836   optab optab1, optab2;
3837   int op1_is_constant, op1_is_pow2 = 0;
3838   int max_cost, extra_cost;
3839   static HOST_WIDE_INT last_div_const = 0;
3840   static HOST_WIDE_INT ext_op1;
3841
3842   op1_is_constant = GET_CODE (op1) == CONST_INT;
3843   if (op1_is_constant)
3844     {
3845       ext_op1 = INTVAL (op1);
3846       if (unsignedp)
3847         ext_op1 &= GET_MODE_MASK (mode);
3848       op1_is_pow2 = ((EXACT_POWER_OF_2_OR_ZERO_P (ext_op1)
3849                      || (! unsignedp && EXACT_POWER_OF_2_OR_ZERO_P (-ext_op1))));
3850     }
3851
3852   /*
3853      This is the structure of expand_divmod:
3854
3855      First comes code to fix up the operands so we can perform the operations
3856      correctly and efficiently.
3857
3858      Second comes a switch statement with code specific for each rounding mode.
3859      For some special operands this code emits all RTL for the desired
3860      operation, for other cases, it generates only a quotient and stores it in
3861      QUOTIENT.  The case for trunc division/remainder might leave quotient = 0,
3862      to indicate that it has not done anything.
3863
3864      Last comes code that finishes the operation.  If QUOTIENT is set and
3865      REM_FLAG is set, the remainder is computed as OP0 - QUOTIENT * OP1.  If
3866      QUOTIENT is not set, it is computed using trunc rounding.
3867
3868      We try to generate special code for division and remainder when OP1 is a
3869      constant.  If |OP1| = 2**n we can use shifts and some other fast
3870      operations.  For other values of OP1, we compute a carefully selected
3871      fixed-point approximation m = 1/OP1, and generate code that multiplies OP0
3872      by m.
3873
3874      In all cases but EXACT_DIV_EXPR, this multiplication requires the upper
3875      half of the product.  Different strategies for generating the product are
3876      implemented in expand_mult_highpart.
3877
3878      If what we actually want is the remainder, we generate that by another
3879      by-constant multiplication and a subtraction.  */
3880
3881   /* We shouldn't be called with OP1 == const1_rtx, but some of the
3882      code below will malfunction if we are, so check here and handle
3883      the special case if so.  */
3884   if (op1 == const1_rtx)
3885     return rem_flag ? const0_rtx : op0;
3886
3887     /* When dividing by -1, we could get an overflow.
3888      negv_optab can handle overflows.  */
3889   if (! unsignedp && op1 == constm1_rtx)
3890     {
3891       if (rem_flag)
3892         return const0_rtx;
3893       return expand_unop (mode, flag_trapv && GET_MODE_CLASS(mode) == MODE_INT
3894                           ? negv_optab : neg_optab, op0, target, 0);
3895     }
3896
3897   if (target
3898       /* Don't use the function value register as a target
3899          since we have to read it as well as write it,
3900          and function-inlining gets confused by this.  */
3901       && ((REG_P (target) && REG_FUNCTION_VALUE_P (target))
3902           /* Don't clobber an operand while doing a multi-step calculation.  */
3903           || ((rem_flag || op1_is_constant)
3904               && (reg_mentioned_p (target, op0)
3905                   || (MEM_P (op0) && MEM_P (target))))
3906           || reg_mentioned_p (target, op1)
3907           || (MEM_P (op1) && MEM_P (target))))
3908     target = 0;
3909
3910   /* Get the mode in which to perform this computation.  Normally it will
3911      be MODE, but sometimes we can't do the desired operation in MODE.
3912      If so, pick a wider mode in which we can do the operation.  Convert
3913      to that mode at the start to avoid repeated conversions.
3914
3915      First see what operations we need.  These depend on the expression
3916      we are evaluating.  (We assume that divxx3 insns exist under the
3917      same conditions that modxx3 insns and that these insns don't normally
3918      fail.  If these assumptions are not correct, we may generate less
3919      efficient code in some cases.)
3920
3921      Then see if we find a mode in which we can open-code that operation
3922      (either a division, modulus, or shift).  Finally, check for the smallest
3923      mode for which we can do the operation with a library call.  */
3924
3925   /* We might want to refine this now that we have division-by-constant
3926      optimization.  Since expand_mult_highpart tries so many variants, it is
3927      not straightforward to generalize this.  Maybe we should make an array
3928      of possible modes in init_expmed?  Save this for GCC 2.7.  */
3929
3930   optab1 = ((op1_is_pow2 && op1 != const0_rtx)
3931             ? (unsignedp ? lshr_optab : ashr_optab)
3932             : (unsignedp ? udiv_optab : sdiv_optab));
3933   optab2 = ((op1_is_pow2 && op1 != const0_rtx)
3934             ? optab1
3935             : (unsignedp ? udivmod_optab : sdivmod_optab));
3936
3937   for (compute_mode = mode; compute_mode != VOIDmode;
3938        compute_mode = GET_MODE_WIDER_MODE (compute_mode))
3939     if (optab_handler (optab1, compute_mode)->insn_code != CODE_FOR_nothing
3940         || optab_handler (optab2, compute_mode)->insn_code != CODE_FOR_nothing)
3941       break;
3942
3943   if (compute_mode == VOIDmode)
3944     for (compute_mode = mode; compute_mode != VOIDmode;
3945          compute_mode = GET_MODE_WIDER_MODE (compute_mode))
3946       if (optab_libfunc (optab1, compute_mode)
3947           || optab_libfunc (optab2, compute_mode))
3948         break;
3949
3950   /* If we still couldn't find a mode, use MODE, but expand_binop will
3951      probably die.  */
3952   if (compute_mode == VOIDmode)
3953     compute_mode = mode;
3954
3955   if (target && GET_MODE (target) == compute_mode)
3956     tquotient = target;
3957   else
3958     tquotient = gen_reg_rtx (compute_mode);
3959
3960   size = GET_MODE_BITSIZE (compute_mode);
3961 #if 0
3962   /* It should be possible to restrict the precision to GET_MODE_BITSIZE
3963      (mode), and thereby get better code when OP1 is a constant.  Do that
3964      later.  It will require going over all usages of SIZE below.  */
3965   size = GET_MODE_BITSIZE (mode);
3966 #endif
3967
3968   /* Only deduct something for a REM if the last divide done was
3969      for a different constant.   Then set the constant of the last
3970      divide.  */
3971   max_cost = unsignedp ? udiv_cost[compute_mode] : sdiv_cost[compute_mode];
3972   if (rem_flag && ! (last_div_const != 0 && op1_is_constant
3973                      && INTVAL (op1) == last_div_const))
3974     max_cost -= mul_cost[compute_mode] + add_cost[compute_mode];
3975
3976   last_div_const = ! rem_flag && op1_is_constant ? INTVAL (op1) : 0;
3977
3978   /* Now convert to the best mode to use.  */
3979   if (compute_mode != mode)
3980     {
3981       op0 = convert_modes (compute_mode, mode, op0, unsignedp);
3982       op1 = convert_modes (compute_mode, mode, op1, unsignedp);
3983
3984       /* convert_modes may have placed op1 into a register, so we
3985          must recompute the following.  */
3986       op1_is_constant = GET_CODE (op1) == CONST_INT;
3987       op1_is_pow2 = (op1_is_constant
3988                      && ((EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
3989                           || (! unsignedp
3990                               && EXACT_POWER_OF_2_OR_ZERO_P (-INTVAL (op1)))))) ;
3991     }
3992
3993   /* If one of the operands is a volatile MEM, copy it into a register.  */
3994
3995   if (MEM_P (op0) && MEM_VOLATILE_P (op0))
3996     op0 = force_reg (compute_mode, op0);
3997   if (MEM_P (op1) && MEM_VOLATILE_P (op1))
3998     op1 = force_reg (compute_mode, op1);
3999
4000   /* If we need the remainder or if OP1 is constant, we need to
4001      put OP0 in a register in case it has any queued subexpressions.  */
4002   if (rem_flag || op1_is_constant)
4003     op0 = force_reg (compute_mode, op0);
4004
4005   last = get_last_insn ();
4006
4007   /* Promote floor rounding to trunc rounding for unsigned operations.  */
4008   if (unsignedp)
4009     {
4010       if (code == FLOOR_DIV_EXPR)
4011         code = TRUNC_DIV_EXPR;
4012       if (code == FLOOR_MOD_EXPR)
4013         code = TRUNC_MOD_EXPR;
4014       if (code == EXACT_DIV_EXPR && op1_is_pow2)
4015         code = TRUNC_DIV_EXPR;
4016     }
4017
4018   if (op1 != const0_rtx)
4019     switch (code)
4020       {
4021       case TRUNC_MOD_EXPR:
4022       case TRUNC_DIV_EXPR:
4023         if (op1_is_constant)
4024           {
4025             if (unsignedp)
4026               {
4027                 unsigned HOST_WIDE_INT mh;
4028                 int pre_shift, post_shift;
4029                 int dummy;
4030                 rtx ml;
4031                 unsigned HOST_WIDE_INT d = (INTVAL (op1)
4032                                             & GET_MODE_MASK (compute_mode));
4033
4034                 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4035                   {
4036                     pre_shift = floor_log2 (d);
4037                     if (rem_flag)
4038                       {
4039                         remainder
4040                           = expand_binop (compute_mode, and_optab, op0,
4041                                           GEN_INT (((HOST_WIDE_INT) 1 << pre_shift) - 1),
4042                                           remainder, 1,
4043                                           OPTAB_LIB_WIDEN);
4044                         if (remainder)
4045                           return gen_lowpart (mode, remainder);
4046                       }
4047                     quotient = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4048                                              build_int_cst (NULL_TREE,
4049                                                             pre_shift),
4050                                              tquotient, 1);
4051                   }
4052                 else if (size <= HOST_BITS_PER_WIDE_INT)
4053                   {
4054                     if (d >= ((unsigned HOST_WIDE_INT) 1 << (size - 1)))
4055                       {
4056                         /* Most significant bit of divisor is set; emit an scc
4057                            insn.  */
4058                         quotient = emit_store_flag (tquotient, GEU, op0, op1,
4059                                                     compute_mode, 1, 1);
4060                         if (quotient == 0)
4061                           goto fail1;
4062                       }
4063                     else
4064                       {
4065                         /* Find a suitable multiplier and right shift count
4066                            instead of multiplying with D.  */
4067
4068                         mh = choose_multiplier (d, size, size,
4069                                                 &ml, &post_shift, &dummy);
4070
4071                         /* If the suggested multiplier is more than SIZE bits,
4072                            we can do better for even divisors, using an
4073                            initial right shift.  */
4074                         if (mh != 0 && (d & 1) == 0)
4075                           {
4076                             pre_shift = floor_log2 (d & -d);
4077                             mh = choose_multiplier (d >> pre_shift, size,
4078                                                     size - pre_shift,
4079                                                     &ml, &post_shift, &dummy);
4080                             gcc_assert (!mh);
4081                           }
4082                         else
4083                           pre_shift = 0;
4084
4085                         if (mh != 0)
4086                           {
4087                             rtx t1, t2, t3, t4;
4088
4089                             if (post_shift - 1 >= BITS_PER_WORD)
4090                               goto fail1;
4091
4092                             extra_cost
4093                               = (shift_cost[compute_mode][post_shift - 1]
4094                                  + shift_cost[compute_mode][1]
4095                                  + 2 * add_cost[compute_mode]);
4096                             t1 = expand_mult_highpart (compute_mode, op0, ml,
4097                                                        NULL_RTX, 1,
4098                                                        max_cost - extra_cost);
4099                             if (t1 == 0)
4100                               goto fail1;
4101                             t2 = force_operand (gen_rtx_MINUS (compute_mode,
4102                                                                op0, t1),
4103                                                 NULL_RTX);
4104                             t3 = expand_shift
4105                               (RSHIFT_EXPR, compute_mode, t2,
4106                                build_int_cst (NULL_TREE, 1),
4107                                NULL_RTX,1);
4108                             t4 = force_operand (gen_rtx_PLUS (compute_mode,
4109                                                               t1, t3),
4110                                                 NULL_RTX);
4111                             quotient = expand_shift
4112                               (RSHIFT_EXPR, compute_mode, t4,
4113                                build_int_cst (NULL_TREE, post_shift - 1),
4114                                tquotient, 1);
4115                           }
4116                         else
4117                           {
4118                             rtx t1, t2;
4119
4120                             if (pre_shift >= BITS_PER_WORD
4121                                 || post_shift >= BITS_PER_WORD)
4122                               goto fail1;
4123
4124                             t1 = expand_shift
4125                               (RSHIFT_EXPR, compute_mode, op0,
4126                                build_int_cst (NULL_TREE, pre_shift),
4127                                NULL_RTX, 1);
4128                             extra_cost
4129                               = (shift_cost[compute_mode][pre_shift]
4130                                  + shift_cost[compute_mode][post_shift]);
4131                             t2 = expand_mult_highpart (compute_mode, t1, ml,
4132                                                        NULL_RTX, 1,
4133                                                        max_cost - extra_cost);
4134                             if (t2 == 0)
4135                               goto fail1;
4136                             quotient = expand_shift
4137                               (RSHIFT_EXPR, compute_mode, t2,
4138                                build_int_cst (NULL_TREE, post_shift),
4139                                tquotient, 1);
4140                           }
4141                       }
4142                   }
4143                 else            /* Too wide mode to use tricky code */
4144                   break;
4145
4146                 insn = get_last_insn ();
4147                 if (insn != last
4148                     && (set = single_set (insn)) != 0
4149                     && SET_DEST (set) == quotient)
4150                   set_unique_reg_note (insn,
4151                                        REG_EQUAL,
4152                                        gen_rtx_UDIV (compute_mode, op0, op1));
4153               }
4154             else                /* TRUNC_DIV, signed */
4155               {
4156                 unsigned HOST_WIDE_INT ml;
4157                 int lgup, post_shift;
4158                 rtx mlr;
4159                 HOST_WIDE_INT d = INTVAL (op1);
4160                 unsigned HOST_WIDE_INT abs_d;
4161
4162                 /* Since d might be INT_MIN, we have to cast to
4163                    unsigned HOST_WIDE_INT before negating to avoid
4164                    undefined signed overflow.  */
4165                 abs_d = (d >= 0
4166                          ? (unsigned HOST_WIDE_INT) d
4167                          : - (unsigned HOST_WIDE_INT) d);
4168
4169                 /* n rem d = n rem -d */
4170                 if (rem_flag && d < 0)
4171                   {
4172                     d = abs_d;
4173                     op1 = gen_int_mode (abs_d, compute_mode);
4174                   }
4175
4176                 if (d == 1)
4177                   quotient = op0;
4178                 else if (d == -1)
4179                   quotient = expand_unop (compute_mode, neg_optab, op0,
4180                                           tquotient, 0);
4181                 else if (abs_d == (unsigned HOST_WIDE_INT) 1 << (size - 1))
4182                   {
4183                     /* This case is not handled correctly below.  */
4184                     quotient = emit_store_flag (tquotient, EQ, op0, op1,
4185                                                 compute_mode, 1, 1);
4186                     if (quotient == 0)
4187                       goto fail1;
4188                   }
4189                 else if (EXACT_POWER_OF_2_OR_ZERO_P (d)
4190                          && (rem_flag ? smod_pow2_cheap[compute_mode]
4191                                       : sdiv_pow2_cheap[compute_mode])
4192                          /* We assume that cheap metric is true if the
4193                             optab has an expander for this mode.  */
4194                          && ((optab_handler ((rem_flag ? smod_optab
4195                                               : sdiv_optab),
4196                                               compute_mode)->insn_code
4197                               != CODE_FOR_nothing)
4198                              || (optab_handler(sdivmod_optab,
4199                                                compute_mode)
4200                                  ->insn_code != CODE_FOR_nothing)))
4201                   ;
4202                 else if (EXACT_POWER_OF_2_OR_ZERO_P (abs_d))
4203                   {
4204                     if (rem_flag)
4205                       {
4206                         remainder = expand_smod_pow2 (compute_mode, op0, d);
4207                         if (remainder)
4208                           return gen_lowpart (mode, remainder);
4209                       }
4210
4211                     if (sdiv_pow2_cheap[compute_mode]
4212                         && ((optab_handler (sdiv_optab, compute_mode)->insn_code
4213                              != CODE_FOR_nothing)
4214                             || (optab_handler (sdivmod_optab, compute_mode)->insn_code
4215                                 != CODE_FOR_nothing)))
4216                       quotient = expand_divmod (0, TRUNC_DIV_EXPR,
4217                                                 compute_mode, op0,
4218                                                 gen_int_mode (abs_d,
4219                                                               compute_mode),
4220                                                 NULL_RTX, 0);
4221                     else
4222                       quotient = expand_sdiv_pow2 (compute_mode, op0, abs_d);
4223
4224                     /* We have computed OP0 / abs(OP1).  If OP1 is negative,
4225                        negate the quotient.  */
4226                     if (d < 0)
4227                       {
4228                         insn = get_last_insn ();
4229                         if (insn != last
4230                             && (set = single_set (insn)) != 0
4231                             && SET_DEST (set) == quotient
4232                             && abs_d < ((unsigned HOST_WIDE_INT) 1
4233                                         << (HOST_BITS_PER_WIDE_INT - 1)))
4234                           set_unique_reg_note (insn,
4235                                                REG_EQUAL,
4236                                                gen_rtx_DIV (compute_mode,
4237                                                             op0,
4238                                                             GEN_INT
4239                                                             (trunc_int_for_mode
4240                                                              (abs_d,
4241                                                               compute_mode))));
4242
4243                         quotient = expand_unop (compute_mode, neg_optab,
4244                                                 quotient, quotient, 0);
4245                       }
4246                   }
4247                 else if (size <= HOST_BITS_PER_WIDE_INT)
4248                   {
4249                     choose_multiplier (abs_d, size, size - 1,
4250                                        &mlr, &post_shift, &lgup);
4251                     ml = (unsigned HOST_WIDE_INT) INTVAL (mlr);
4252                     if (ml < (unsigned HOST_WIDE_INT) 1 << (size - 1))
4253                       {
4254                         rtx t1, t2, t3;
4255
4256                         if (post_shift >= BITS_PER_WORD
4257                             || size - 1 >= BITS_PER_WORD)
4258                           goto fail1;
4259
4260                         extra_cost = (shift_cost[compute_mode][post_shift]
4261                                       + shift_cost[compute_mode][size - 1]
4262                                       + add_cost[compute_mode]);
4263                         t1 = expand_mult_highpart (compute_mode, op0, mlr,
4264                                                    NULL_RTX, 0,
4265                                                    max_cost - extra_cost);
4266                         if (t1 == 0)
4267                           goto fail1;
4268                         t2 = expand_shift
4269                           (RSHIFT_EXPR, compute_mode, t1,
4270                            build_int_cst (NULL_TREE, post_shift),
4271                            NULL_RTX, 0);
4272                         t3 = expand_shift
4273                           (RSHIFT_EXPR, compute_mode, op0,
4274                            build_int_cst (NULL_TREE, size - 1),
4275                            NULL_RTX, 0);
4276                         if (d < 0)
4277                           quotient
4278                             = force_operand (gen_rtx_MINUS (compute_mode,
4279                                                             t3, t2),
4280                                              tquotient);
4281                         else
4282                           quotient
4283                             = force_operand (gen_rtx_MINUS (compute_mode,
4284                                                             t2, t3),
4285                                              tquotient);
4286                       }
4287                     else
4288                       {
4289                         rtx t1, t2, t3, t4;
4290
4291                         if (post_shift >= BITS_PER_WORD
4292                             || size - 1 >= BITS_PER_WORD)
4293                           goto fail1;
4294
4295                         ml |= (~(unsigned HOST_WIDE_INT) 0) << (size - 1);
4296                         mlr = gen_int_mode (ml, compute_mode);
4297                         extra_cost = (shift_cost[compute_mode][post_shift]
4298                                       + shift_cost[compute_mode][size - 1]
4299                                       + 2 * add_cost[compute_mode]);
4300                         t1 = expand_mult_highpart (compute_mode, op0, mlr,
4301                                                    NULL_RTX, 0,
4302                                                    max_cost - extra_cost);
4303                         if (t1 == 0)
4304                           goto fail1;
4305                         t2 = force_operand (gen_rtx_PLUS (compute_mode,
4306                                                           t1, op0),
4307                                             NULL_RTX);
4308                         t3 = expand_shift
4309                           (RSHIFT_EXPR, compute_mode, t2,
4310                            build_int_cst (NULL_TREE, post_shift),
4311                            NULL_RTX, 0);
4312                         t4 = expand_shift
4313                           (RSHIFT_EXPR, compute_mode, op0,
4314                            build_int_cst (NULL_TREE, size - 1),
4315                            NULL_RTX, 0);
4316                         if (d < 0)
4317                           quotient
4318                             = force_operand (gen_rtx_MINUS (compute_mode,
4319                                                             t4, t3),
4320                                              tquotient);
4321                         else
4322                           quotient
4323                             = force_operand (gen_rtx_MINUS (compute_mode,
4324                                                             t3, t4),
4325                                              tquotient);
4326                       }
4327                   }
4328                 else            /* Too wide mode to use tricky code */
4329                   break;
4330
4331                 insn = get_last_insn ();
4332                 if (insn != last
4333                     && (set = single_set (insn)) != 0
4334                     && SET_DEST (set) == quotient)
4335                   set_unique_reg_note (insn,
4336                                        REG_EQUAL,
4337                                        gen_rtx_DIV (compute_mode, op0, op1));
4338               }
4339             break;
4340           }
4341       fail1:
4342         delete_insns_since (last);
4343         break;
4344
4345       case FLOOR_DIV_EXPR:
4346       case FLOOR_MOD_EXPR:
4347       /* We will come here only for signed operations.  */
4348         if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4349           {
4350             unsigned HOST_WIDE_INT mh;
4351             int pre_shift, lgup, post_shift;
4352             HOST_WIDE_INT d = INTVAL (op1);
4353             rtx ml;
4354
4355             if (d > 0)
4356               {
4357                 /* We could just as easily deal with negative constants here,
4358                    but it does not seem worth the trouble for GCC 2.6.  */
4359                 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4360                   {
4361                     pre_shift = floor_log2 (d);
4362                     if (rem_flag)
4363                       {
4364                         remainder = expand_binop (compute_mode, and_optab, op0,
4365                                                   GEN_INT (((HOST_WIDE_INT) 1 << pre_shift) - 1),
4366                                                   remainder, 0, OPTAB_LIB_WIDEN);
4367                         if (remainder)
4368                           return gen_lowpart (mode, remainder);
4369                       }
4370                     quotient = expand_shift
4371                       (RSHIFT_EXPR, compute_mode, op0,
4372                        build_int_cst (NULL_TREE, pre_shift),
4373                        tquotient, 0);
4374                   }
4375                 else
4376                   {
4377                     rtx t1, t2, t3, t4;
4378
4379                     mh = choose_multiplier (d, size, size - 1,
4380                                             &ml, &post_shift, &lgup);
4381                     gcc_assert (!mh);
4382
4383                     if (post_shift < BITS_PER_WORD
4384                         && size - 1 < BITS_PER_WORD)
4385                       {
4386                         t1 = expand_shift
4387                           (RSHIFT_EXPR, compute_mode, op0,
4388                            build_int_cst (NULL_TREE, size - 1),
4389                            NULL_RTX, 0);
4390                         t2 = expand_binop (compute_mode, xor_optab, op0, t1,
4391                                            NULL_RTX, 0, OPTAB_WIDEN);
4392                         extra_cost = (shift_cost[compute_mode][post_shift]
4393                                       + shift_cost[compute_mode][size - 1]
4394                                       + 2 * add_cost[compute_mode]);
4395                         t3 = expand_mult_highpart (compute_mode, t2, ml,
4396                                                    NULL_RTX, 1,
4397                                                    max_cost - extra_cost);
4398                         if (t3 != 0)
4399                           {
4400                             t4 = expand_shift
4401                               (RSHIFT_EXPR, compute_mode, t3,
4402                                build_int_cst (NULL_TREE, post_shift),
4403                                NULL_RTX, 1);
4404                             quotient = expand_binop (compute_mode, xor_optab,
4405                                                      t4, t1, tquotient, 0,
4406                                                      OPTAB_WIDEN);
4407                           }
4408                       }
4409                   }
4410               }
4411             else
4412               {
4413                 rtx nsign, t1, t2, t3, t4;
4414                 t1 = force_operand (gen_rtx_PLUS (compute_mode,
4415                                                   op0, constm1_rtx), NULL_RTX);
4416                 t2 = expand_binop (compute_mode, ior_optab, op0, t1, NULL_RTX,
4417                                    0, OPTAB_WIDEN);
4418                 nsign = expand_shift
4419                   (RSHIFT_EXPR, compute_mode, t2,
4420                    build_int_cst (NULL_TREE, size - 1),
4421                    NULL_RTX, 0);
4422                 t3 = force_operand (gen_rtx_MINUS (compute_mode, t1, nsign),
4423                                     NULL_RTX);
4424                 t4 = expand_divmod (0, TRUNC_DIV_EXPR, compute_mode, t3, op1,
4425                                     NULL_RTX, 0);
4426                 if (t4)
4427                   {
4428                     rtx t5;
4429                     t5 = expand_unop (compute_mode, one_cmpl_optab, nsign,
4430                                       NULL_RTX, 0);
4431                     quotient = force_operand (gen_rtx_PLUS (compute_mode,
4432                                                             t4, t5),
4433                                               tquotient);
4434                   }
4435               }
4436           }
4437
4438         if (quotient != 0)
4439           break;
4440         delete_insns_since (last);
4441
4442         /* Try using an instruction that produces both the quotient and
4443            remainder, using truncation.  We can easily compensate the quotient
4444            or remainder to get floor rounding, once we have the remainder.
4445            Notice that we compute also the final remainder value here,
4446            and return the result right away.  */
4447         if (target == 0 || GET_MODE (target) != compute_mode)
4448           target = gen_reg_rtx (compute_mode);
4449
4450         if (rem_flag)
4451           {
4452             remainder
4453               = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4454             quotient = gen_reg_rtx (compute_mode);
4455           }
4456         else
4457           {
4458             quotient
4459               = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4460             remainder = gen_reg_rtx (compute_mode);
4461           }
4462
4463         if (expand_twoval_binop (sdivmod_optab, op0, op1,
4464                                  quotient, remainder, 0))
4465           {
4466             /* This could be computed with a branch-less sequence.
4467                Save that for later.  */
4468             rtx tem;
4469             rtx label = gen_label_rtx ();
4470             do_cmp_and_jump (remainder, const0_rtx, EQ, compute_mode, label);
4471             tem = expand_binop (compute_mode, xor_optab, op0, op1,
4472                                 NULL_RTX, 0, OPTAB_WIDEN);
4473             do_cmp_and_jump (tem, const0_rtx, GE, compute_mode, label);
4474             expand_dec (quotient, const1_rtx);
4475             expand_inc (remainder, op1);
4476             emit_label (label);
4477             return gen_lowpart (mode, rem_flag ? remainder : quotient);
4478           }
4479
4480         /* No luck with division elimination or divmod.  Have to do it
4481            by conditionally adjusting op0 *and* the result.  */
4482         {
4483           rtx label1, label2, label3, label4, label5;
4484           rtx adjusted_op0;
4485           rtx tem;
4486
4487           quotient = gen_reg_rtx (compute_mode);
4488           adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4489           label1 = gen_label_rtx ();
4490           label2 = gen_label_rtx ();
4491           label3 = gen_label_rtx ();
4492           label4 = gen_label_rtx ();
4493           label5 = gen_label_rtx ();
4494           do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4495           do_cmp_and_jump (adjusted_op0, const0_rtx, LT, compute_mode, label1);
4496           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4497                               quotient, 0, OPTAB_LIB_WIDEN);
4498           if (tem != quotient)
4499             emit_move_insn (quotient, tem);
4500           emit_jump_insn (gen_jump (label5));
4501           emit_barrier ();
4502           emit_label (label1);
4503           expand_inc (adjusted_op0, const1_rtx);
4504           emit_jump_insn (gen_jump (label4));
4505           emit_barrier ();
4506           emit_label (label2);
4507           do_cmp_and_jump (adjusted_op0, const0_rtx, GT, compute_mode, label3);
4508           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4509                               quotient, 0, OPTAB_LIB_WIDEN);
4510           if (tem != quotient)
4511             emit_move_insn (quotient, tem);
4512           emit_jump_insn (gen_jump (label5));
4513           emit_barrier ();
4514           emit_label (label3);
4515           expand_dec (adjusted_op0, const1_rtx);
4516           emit_label (label4);
4517           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4518                               quotient, 0, OPTAB_LIB_WIDEN);
4519           if (tem != quotient)
4520             emit_move_insn (quotient, tem);
4521           expand_dec (quotient, const1_rtx);
4522           emit_label (label5);
4523         }
4524         break;
4525
4526       case CEIL_DIV_EXPR:
4527       case CEIL_MOD_EXPR:
4528         if (unsignedp)
4529           {
4530             if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1)))
4531               {
4532                 rtx t1, t2, t3;
4533                 unsigned HOST_WIDE_INT d = INTVAL (op1);
4534                 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4535                                    build_int_cst (NULL_TREE, floor_log2 (d)),
4536                                    tquotient, 1);
4537                 t2 = expand_binop (compute_mode, and_optab, op0,
4538                                    GEN_INT (d - 1),
4539                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4540                 t3 = gen_reg_rtx (compute_mode);
4541                 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4542                                       compute_mode, 1, 1);
4543                 if (t3 == 0)
4544                   {
4545                     rtx lab;
4546                     lab = gen_label_rtx ();
4547                     do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4548                     expand_inc (t1, const1_rtx);
4549                     emit_label (lab);
4550                     quotient = t1;
4551                   }
4552                 else
4553                   quotient = force_operand (gen_rtx_PLUS (compute_mode,
4554                                                           t1, t3),
4555                                             tquotient);
4556                 break;
4557               }
4558
4559             /* Try using an instruction that produces both the quotient and
4560                remainder, using truncation.  We can easily compensate the
4561                quotient or remainder to get ceiling rounding, once we have the
4562                remainder.  Notice that we compute also the final remainder
4563                value here, and return the result right away.  */
4564             if (target == 0 || GET_MODE (target) != compute_mode)
4565               target = gen_reg_rtx (compute_mode);
4566
4567             if (rem_flag)
4568               {
4569                 remainder = (REG_P (target)
4570                              ? target : gen_reg_rtx (compute_mode));
4571                 quotient = gen_reg_rtx (compute_mode);
4572               }
4573             else
4574               {
4575                 quotient = (REG_P (target)
4576                             ? target : gen_reg_rtx (compute_mode));
4577                 remainder = gen_reg_rtx (compute_mode);
4578               }
4579
4580             if (expand_twoval_binop (udivmod_optab, op0, op1, quotient,
4581                                      remainder, 1))
4582               {
4583                 /* This could be computed with a branch-less sequence.
4584                    Save that for later.  */
4585                 rtx label = gen_label_rtx ();
4586                 do_cmp_and_jump (remainder, const0_rtx, EQ,
4587                                  compute_mode, label);
4588                 expand_inc (quotient, const1_rtx);
4589                 expand_dec (remainder, op1);
4590                 emit_label (label);
4591                 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4592               }
4593
4594             /* No luck with division elimination or divmod.  Have to do it
4595                by conditionally adjusting op0 *and* the result.  */
4596             {
4597               rtx label1, label2;
4598               rtx adjusted_op0, tem;
4599
4600               quotient = gen_reg_rtx (compute_mode);
4601               adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4602               label1 = gen_label_rtx ();
4603               label2 = gen_label_rtx ();
4604               do_cmp_and_jump (adjusted_op0, const0_rtx, NE,
4605                                compute_mode, label1);
4606               emit_move_insn  (quotient, const0_rtx);
4607               emit_jump_insn (gen_jump (label2));
4608               emit_barrier ();
4609               emit_label (label1);
4610               expand_dec (adjusted_op0, const1_rtx);
4611               tem = expand_binop (compute_mode, udiv_optab, adjusted_op0, op1,
4612                                   quotient, 1, OPTAB_LIB_WIDEN);
4613               if (tem != quotient)
4614                 emit_move_insn (quotient, tem);
4615               expand_inc (quotient, const1_rtx);
4616               emit_label (label2);
4617             }
4618           }
4619         else /* signed */
4620           {
4621             if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4622                 && INTVAL (op1) >= 0)
4623               {
4624                 /* This is extremely similar to the code for the unsigned case
4625                    above.  For 2.7 we should merge these variants, but for
4626                    2.6.1 I don't want to touch the code for unsigned since that
4627                    get used in C.  The signed case will only be used by other
4628                    languages (Ada).  */
4629
4630                 rtx t1, t2, t3;
4631                 unsigned HOST_WIDE_INT d = INTVAL (op1);
4632                 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4633                                    build_int_cst (NULL_TREE, floor_log2 (d)),
4634                                    tquotient, 0);
4635                 t2 = expand_binop (compute_mode, and_optab, op0,
4636                                    GEN_INT (d - 1),
4637                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4638                 t3 = gen_reg_rtx (compute_mode);
4639                 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4640                                       compute_mode, 1, 1);
4641                 if (t3 == 0)
4642                   {
4643                     rtx lab;
4644                     lab = gen_label_rtx ();
4645                     do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4646                     expand_inc (t1, const1_rtx);
4647                     emit_label (lab);
4648                     quotient = t1;
4649                   }
4650                 else
4651                   quotient = force_operand (gen_rtx_PLUS (compute_mode,
4652                                                           t1, t3),
4653                                             tquotient);
4654                 break;
4655               }
4656
4657             /* Try using an instruction that produces both the quotient and
4658                remainder, using truncation.  We can easily compensate the
4659                quotient or remainder to get ceiling rounding, once we have the
4660                remainder.  Notice that we compute also the final remainder
4661                value here, and return the result right away.  */
4662             if (target == 0 || GET_MODE (target) != compute_mode)
4663               target = gen_reg_rtx (compute_mode);
4664             if (rem_flag)
4665               {
4666                 remainder= (REG_P (target)
4667                             ? target : gen_reg_rtx (compute_mode));
4668                 quotient = gen_reg_rtx (compute_mode);
4669               }
4670             else
4671               {
4672                 quotient = (REG_P (target)
4673                             ? target : gen_reg_rtx (compute_mode));
4674                 remainder = gen_reg_rtx (compute_mode);
4675               }
4676
4677             if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient,
4678                                      remainder, 0))
4679               {
4680                 /* This could be computed with a branch-less sequence.
4681                    Save that for later.  */
4682                 rtx tem;
4683                 rtx label = gen_label_rtx ();
4684                 do_cmp_and_jump (remainder, const0_rtx, EQ,
4685                                  compute_mode, label);
4686                 tem = expand_binop (compute_mode, xor_optab, op0, op1,
4687                                     NULL_RTX, 0, OPTAB_WIDEN);
4688                 do_cmp_and_jump (tem, const0_rtx, LT, compute_mode, label);
4689                 expand_inc (quotient, const1_rtx);
4690                 expand_dec (remainder, op1);
4691                 emit_label (label);
4692                 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4693               }
4694
4695             /* No luck with division elimination or divmod.  Have to do it
4696                by conditionally adjusting op0 *and* the result.  */
4697             {
4698               rtx label1, label2, label3, label4, label5;
4699               rtx adjusted_op0;
4700               rtx tem;
4701
4702               quotient = gen_reg_rtx (compute_mode);
4703               adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4704               label1 = gen_label_rtx ();
4705               label2 = gen_label_rtx ();
4706               label3 = gen_label_rtx ();
4707               label4 = gen_label_rtx ();
4708               label5 = gen_label_rtx ();
4709               do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4710               do_cmp_and_jump (adjusted_op0, const0_rtx, GT,
4711                                compute_mode, label1);
4712               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4713                                   quotient, 0, OPTAB_LIB_WIDEN);
4714               if (tem != quotient)
4715                 emit_move_insn (quotient, tem);
4716               emit_jump_insn (gen_jump (label5));
4717               emit_barrier ();
4718               emit_label (label1);
4719               expand_dec (adjusted_op0, const1_rtx);
4720               emit_jump_insn (gen_jump (label4));
4721               emit_barrier ();
4722               emit_label (label2);
4723               do_cmp_and_jump (adjusted_op0, const0_rtx, LT,
4724                                compute_mode, label3);
4725               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4726                                   quotient, 0, OPTAB_LIB_WIDEN);
4727               if (tem != quotient)
4728                 emit_move_insn (quotient, tem);
4729               emit_jump_insn (gen_jump (label5));
4730               emit_barrier ();
4731               emit_label (label3);
4732               expand_inc (adjusted_op0, const1_rtx);
4733               emit_label (label4);
4734               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4735                                   quotient, 0, OPTAB_LIB_WIDEN);
4736               if (tem != quotient)
4737                 emit_move_insn (quotient, tem);
4738               expand_inc (quotient, const1_rtx);
4739               emit_label (label5);
4740             }
4741           }
4742         break;
4743
4744       case EXACT_DIV_EXPR:
4745         if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4746           {
4747             HOST_WIDE_INT d = INTVAL (op1);
4748             unsigned HOST_WIDE_INT ml;
4749             int pre_shift;
4750             rtx t1;
4751
4752             pre_shift = floor_log2 (d & -d);
4753             ml = invert_mod2n (d >> pre_shift, size);
4754             t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4755                                build_int_cst (NULL_TREE, pre_shift),
4756                                NULL_RTX, unsignedp);
4757             quotient = expand_mult (compute_mode, t1,
4758                                     gen_int_mode (ml, compute_mode),
4759                                     NULL_RTX, 1);
4760
4761             insn = get_last_insn ();
4762             set_unique_reg_note (insn,
4763                                  REG_EQUAL,
4764                                  gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
4765                                                  compute_mode,
4766                                                  op0, op1));
4767           }
4768         break;
4769
4770       case ROUND_DIV_EXPR:
4771       case ROUND_MOD_EXPR:
4772         if (unsignedp)
4773           {
4774             rtx tem;
4775             rtx label;
4776             label = gen_label_rtx ();
4777             quotient = gen_reg_rtx (compute_mode);
4778             remainder = gen_reg_rtx (compute_mode);
4779             if (expand_twoval_binop (udivmod_optab, op0, op1, quotient, remainder, 1) == 0)
4780               {
4781                 rtx tem;
4782                 quotient = expand_binop (compute_mode, udiv_optab, op0, op1,
4783                                          quotient, 1, OPTAB_LIB_WIDEN);
4784                 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 1);
4785                 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4786                                           remainder, 1, OPTAB_LIB_WIDEN);
4787               }
4788             tem = plus_constant (op1, -1);
4789             tem = expand_shift (RSHIFT_EXPR, compute_mode, tem,
4790                                 build_int_cst (NULL_TREE, 1),
4791                                 NULL_RTX, 1);
4792             do_cmp_and_jump (remainder, tem, LEU, compute_mode, label);
4793             expand_inc (quotient, const1_rtx);
4794             expand_dec (remainder, op1);
4795             emit_label (label);
4796           }
4797         else
4798           {
4799             rtx abs_rem, abs_op1, tem, mask;
4800             rtx label;
4801             label = gen_label_rtx ();
4802             quotient = gen_reg_rtx (compute_mode);
4803             remainder = gen_reg_rtx (compute_mode);
4804             if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient, remainder, 0) == 0)
4805               {
4806                 rtx tem;
4807                 quotient = expand_binop (compute_mode, sdiv_optab, op0, op1,
4808                                          quotient, 0, OPTAB_LIB_WIDEN);
4809                 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 0);
4810                 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4811                                           remainder, 0, OPTAB_LIB_WIDEN);
4812               }
4813             abs_rem = expand_abs (compute_mode, remainder, NULL_RTX, 1, 0);
4814             abs_op1 = expand_abs (compute_mode, op1, NULL_RTX, 1, 0);
4815             tem = expand_shift (LSHIFT_EXPR, compute_mode, abs_rem,
4816                                 build_int_cst (NULL_TREE, 1),
4817                                 NULL_RTX, 1);
4818             do_cmp_and_jump (tem, abs_op1, LTU, compute_mode, label);
4819             tem = expand_binop (compute_mode, xor_optab, op0, op1,
4820                                 NULL_RTX, 0, OPTAB_WIDEN);
4821             mask = expand_shift (RSHIFT_EXPR, compute_mode, tem,
4822                                  build_int_cst (NULL_TREE, size - 1),
4823                                  NULL_RTX, 0);
4824             tem = expand_binop (compute_mode, xor_optab, mask, const1_rtx,
4825                                 NULL_RTX, 0, OPTAB_WIDEN);
4826             tem = expand_binop (compute_mode, sub_optab, tem, mask,
4827                                 NULL_RTX, 0, OPTAB_WIDEN);
4828             expand_inc (quotient, tem);
4829             tem = expand_binop (compute_mode, xor_optab, mask, op1,
4830                                 NULL_RTX, 0, OPTAB_WIDEN);
4831             tem = expand_binop (compute_mode, sub_optab, tem, mask,
4832                                 NULL_RTX, 0, OPTAB_WIDEN);
4833             expand_dec (remainder, tem);
4834             emit_label (label);
4835           }
4836         return gen_lowpart (mode, rem_flag ? remainder : quotient);
4837
4838       default:
4839         gcc_unreachable ();
4840       }
4841
4842   if (quotient == 0)
4843     {
4844       if (target && GET_MODE (target) != compute_mode)
4845         target = 0;
4846
4847       if (rem_flag)
4848         {
4849           /* Try to produce the remainder without producing the quotient.
4850              If we seem to have a divmod pattern that does not require widening,
4851              don't try widening here.  We should really have a WIDEN argument
4852              to expand_twoval_binop, since what we'd really like to do here is
4853              1) try a mod insn in compute_mode
4854              2) try a divmod insn in compute_mode
4855              3) try a div insn in compute_mode and multiply-subtract to get
4856                 remainder
4857              4) try the same things with widening allowed.  */
4858           remainder
4859             = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4860                                  op0, op1, target,
4861                                  unsignedp,
4862                                  ((optab_handler (optab2, compute_mode)->insn_code
4863                                    != CODE_FOR_nothing)
4864                                   ? OPTAB_DIRECT : OPTAB_WIDEN));
4865           if (remainder == 0)
4866             {
4867               /* No luck there.  Can we do remainder and divide at once
4868                  without a library call?  */
4869               remainder = gen_reg_rtx (compute_mode);
4870               if (! expand_twoval_binop ((unsignedp
4871                                           ? udivmod_optab
4872                                           : sdivmod_optab),
4873                                          op0, op1,
4874                                          NULL_RTX, remainder, unsignedp))
4875                 remainder = 0;
4876             }
4877
4878           if (remainder)
4879             return gen_lowpart (mode, remainder);
4880         }
4881
4882       /* Produce the quotient.  Try a quotient insn, but not a library call.
4883          If we have a divmod in this mode, use it in preference to widening
4884          the div (for this test we assume it will not fail). Note that optab2
4885          is set to the one of the two optabs that the call below will use.  */
4886       quotient
4887         = sign_expand_binop (compute_mode, udiv_optab, sdiv_optab,
4888                              op0, op1, rem_flag ? NULL_RTX : target,
4889                              unsignedp,
4890                              ((optab_handler (optab2, compute_mode)->insn_code
4891                                != CODE_FOR_nothing)
4892                               ? OPTAB_DIRECT : OPTAB_WIDEN));
4893
4894       if (quotient == 0)
4895         {
4896           /* No luck there.  Try a quotient-and-remainder insn,
4897              keeping the quotient alone.  */
4898           quotient = gen_reg_rtx (compute_mode);
4899           if (! expand_twoval_binop (unsignedp ? udivmod_optab : sdivmod_optab,
4900                                      op0, op1,
4901                                      quotient, NULL_RTX, unsignedp))
4902             {
4903               quotient = 0;
4904               if (! rem_flag)
4905                 /* Still no luck.  If we are not computing the remainder,
4906                    use a library call for the quotient.  */
4907                 quotient = sign_expand_binop (compute_mode,
4908                                               udiv_optab, sdiv_optab,
4909                                               op0, op1, target,
4910                                               unsignedp, OPTAB_LIB_WIDEN);
4911             }
4912         }
4913     }
4914
4915   if (rem_flag)
4916     {
4917       if (target && GET_MODE (target) != compute_mode)
4918         target = 0;
4919
4920       if (quotient == 0)
4921         {
4922           /* No divide instruction either.  Use library for remainder.  */
4923           remainder = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4924                                          op0, op1, target,
4925                                          unsignedp, OPTAB_LIB_WIDEN);
4926           /* No remainder function.  Try a quotient-and-remainder
4927              function, keeping the remainder.  */
4928           if (!remainder)
4929             {
4930               remainder = gen_reg_rtx (compute_mode);
4931               if (!expand_twoval_binop_libfunc
4932                   (unsignedp ? udivmod_optab : sdivmod_optab,
4933                    op0, op1,
4934                    NULL_RTX, remainder,
4935                    unsignedp ? UMOD : MOD))
4936                 remainder = NULL_RTX;
4937             }
4938         }
4939       else
4940         {
4941           /* We divided.  Now finish doing X - Y * (X / Y).  */
4942           remainder = expand_mult (compute_mode, quotient, op1,
4943                                    NULL_RTX, unsignedp);
4944           remainder = expand_binop (compute_mode, sub_optab, op0,
4945                                     remainder, target, unsignedp,
4946                                     OPTAB_LIB_WIDEN);
4947         }
4948     }
4949
4950   return gen_lowpart (mode, rem_flag ? remainder : quotient);
4951 }
4952 \f
4953 /* Return a tree node with data type TYPE, describing the value of X.
4954    Usually this is an VAR_DECL, if there is no obvious better choice.
4955    X may be an expression, however we only support those expressions
4956    generated by loop.c.  */
4957
4958 tree
4959 make_tree (tree type, rtx x)
4960 {
4961   tree t;
4962
4963   switch (GET_CODE (x))
4964     {
4965     case CONST_INT:
4966       {
4967         HOST_WIDE_INT hi = 0;
4968
4969         if (INTVAL (x) < 0
4970             && !(TYPE_UNSIGNED (type)
4971                  && (GET_MODE_BITSIZE (TYPE_MODE (type))
4972                      < HOST_BITS_PER_WIDE_INT)))
4973           hi = -1;
4974
4975         t = build_int_cst_wide (type, INTVAL (x), hi);
4976
4977         return t;
4978       }
4979
4980     case CONST_DOUBLE:
4981       if (GET_MODE (x) == VOIDmode)
4982         t = build_int_cst_wide (type,
4983                                 CONST_DOUBLE_LOW (x), CONST_DOUBLE_HIGH (x));
4984       else
4985         {
4986           REAL_VALUE_TYPE d;
4987
4988           REAL_VALUE_FROM_CONST_DOUBLE (d, x);
4989           t = build_real (type, d);
4990         }
4991
4992       return t;
4993
4994     case CONST_VECTOR:
4995       {
4996         int units = CONST_VECTOR_NUNITS (x);
4997         tree itype = TREE_TYPE (type);
4998         tree t = NULL_TREE;
4999         int i;
5000
5001
5002         /* Build a tree with vector elements.  */
5003         for (i = units - 1; i >= 0; --i)
5004           {
5005             rtx elt = CONST_VECTOR_ELT (x, i);
5006             t = tree_cons (NULL_TREE, make_tree (itype, elt), t);
5007           }
5008
5009         return build_vector (type, t);
5010       }
5011
5012     case PLUS:
5013       return fold_build2 (PLUS_EXPR, type, make_tree (type, XEXP (x, 0)),
5014                           make_tree (type, XEXP (x, 1)));
5015
5016     case MINUS:
5017       return fold_build2 (MINUS_EXPR, type, make_tree (type, XEXP (x, 0)),
5018                           make_tree (type, XEXP (x, 1)));
5019
5020     case NEG:
5021       return fold_build1 (NEGATE_EXPR, type, make_tree (type, XEXP (x, 0)));
5022
5023     case MULT:
5024       return fold_build2 (MULT_EXPR, type, make_tree (type, XEXP (x, 0)),
5025                           make_tree (type, XEXP (x, 1)));
5026
5027     case ASHIFT:
5028       return fold_build2 (LSHIFT_EXPR, type, make_tree (type, XEXP (x, 0)),
5029                           make_tree (type, XEXP (x, 1)));
5030
5031     case LSHIFTRT:
5032       t = unsigned_type_for (type);
5033       return fold_convert (type, build2 (RSHIFT_EXPR, t,
5034                                          make_tree (t, XEXP (x, 0)),
5035                                          make_tree (type, XEXP (x, 1))));
5036
5037     case ASHIFTRT:
5038       t = signed_type_for (type);
5039       return fold_convert (type, build2 (RSHIFT_EXPR, t,
5040                                          make_tree (t, XEXP (x, 0)),
5041                                          make_tree (type, XEXP (x, 1))));
5042
5043     case DIV:
5044       if (TREE_CODE (type) != REAL_TYPE)
5045         t = signed_type_for (type);
5046       else
5047         t = type;
5048
5049       return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5050                                          make_tree (t, XEXP (x, 0)),
5051                                          make_tree (t, XEXP (x, 1))));
5052     case UDIV:
5053       t = unsigned_type_for (type);
5054       return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5055                                          make_tree (t, XEXP (x, 0)),
5056                                          make_tree (t, XEXP (x, 1))));
5057
5058     case SIGN_EXTEND:
5059     case ZERO_EXTEND:
5060       t = lang_hooks.types.type_for_mode (GET_MODE (XEXP (x, 0)),
5061                                           GET_CODE (x) == ZERO_EXTEND);
5062       return fold_convert (type, make_tree (t, XEXP (x, 0)));
5063
5064     case CONST:
5065       return make_tree (type, XEXP (x, 0));
5066
5067     case SYMBOL_REF:
5068       t = SYMBOL_REF_DECL (x);
5069       if (t)
5070         return fold_convert (type, build_fold_addr_expr (t));
5071       /* else fall through.  */
5072
5073     default:
5074       t = build_decl (VAR_DECL, NULL_TREE, type);
5075
5076       /* If TYPE is a POINTER_TYPE, X might be Pmode with TYPE_MODE being
5077          ptr_mode.  So convert.  */
5078       if (POINTER_TYPE_P (type))
5079         x = convert_memory_address (TYPE_MODE (type), x);
5080
5081       /* Note that we do *not* use SET_DECL_RTL here, because we do not
5082          want set_decl_rtl to go adjusting REG_ATTRS for this temporary.  */
5083       t->decl_with_rtl.rtl = x;
5084
5085       return t;
5086     }
5087 }
5088 \f
5089 /* Compute the logical-and of OP0 and OP1, storing it in TARGET
5090    and returning TARGET.
5091
5092    If TARGET is 0, a pseudo-register or constant is returned.  */
5093
5094 rtx
5095 expand_and (enum machine_mode mode, rtx op0, rtx op1, rtx target)
5096 {
5097   rtx tem = 0;
5098
5099   if (GET_MODE (op0) == VOIDmode && GET_MODE (op1) == VOIDmode)
5100     tem = simplify_binary_operation (AND, mode, op0, op1);
5101   if (tem == 0)
5102     tem = expand_binop (mode, and_optab, op0, op1, target, 0, OPTAB_LIB_WIDEN);
5103
5104   if (target == 0)
5105     target = tem;
5106   else if (tem != target)
5107     emit_move_insn (target, tem);
5108   return target;
5109 }
5110 \f
5111 /* Helper function for emit_store_flag.  */
5112 static rtx
5113 emit_store_flag_1 (rtx target, rtx subtarget, enum machine_mode mode,
5114                    int normalizep)
5115 {
5116   rtx op0;
5117   enum machine_mode target_mode = GET_MODE (target);
5118
5119   /* If we are converting to a wider mode, first convert to
5120      TARGET_MODE, then normalize.  This produces better combining
5121      opportunities on machines that have a SIGN_EXTRACT when we are
5122      testing a single bit.  This mostly benefits the 68k.
5123
5124      If STORE_FLAG_VALUE does not have the sign bit set when
5125      interpreted in MODE, we can do this conversion as unsigned, which
5126      is usually more efficient.  */
5127   if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (mode))
5128     {
5129       convert_move (target, subtarget,
5130                     (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
5131                     && 0 == (STORE_FLAG_VALUE
5132                              & ((HOST_WIDE_INT) 1
5133                                 << (GET_MODE_BITSIZE (mode) -1))));
5134       op0 = target;
5135       mode = target_mode;
5136     }
5137   else
5138     op0 = subtarget;
5139
5140   /* If we want to keep subexpressions around, don't reuse our last
5141      target.  */
5142   if (optimize)
5143     subtarget = 0;
5144
5145   /* Now normalize to the proper value in MODE.  Sometimes we don't
5146      have to do anything.  */
5147   if (normalizep == 0 || normalizep == STORE_FLAG_VALUE)
5148     ;
5149   /* STORE_FLAG_VALUE might be the most negative number, so write
5150      the comparison this way to avoid a compiler-time warning.  */
5151   else if (- normalizep == STORE_FLAG_VALUE)
5152     op0 = expand_unop (mode, neg_optab, op0, subtarget, 0);
5153
5154   /* We don't want to use STORE_FLAG_VALUE < 0 below since this makes
5155      it hard to use a value of just the sign bit due to ANSI integer
5156      constant typing rules.  */
5157   else if (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT
5158            && (STORE_FLAG_VALUE
5159                & ((HOST_WIDE_INT) 1 << (GET_MODE_BITSIZE (mode) - 1))))
5160     op0 = expand_shift (RSHIFT_EXPR, mode, op0,
5161                         size_int (GET_MODE_BITSIZE (mode) - 1), subtarget,
5162                         normalizep == 1);
5163   else
5164     {
5165       gcc_assert (STORE_FLAG_VALUE & 1);
5166
5167       op0 = expand_and (mode, op0, const1_rtx, subtarget);
5168       if (normalizep == -1)
5169         op0 = expand_unop (mode, neg_optab, op0, op0, 0);
5170     }
5171
5172   /* If we were converting to a smaller mode, do the conversion now.  */
5173   if (target_mode != mode)
5174     {
5175       convert_move (target, op0, 0);
5176       return target;
5177     }
5178   else
5179     return op0;
5180 }
5181
5182 /* Emit a store-flags instruction for comparison CODE on OP0 and OP1
5183    and storing in TARGET.  Normally return TARGET.
5184    Return 0 if that cannot be done.
5185
5186    MODE is the mode to use for OP0 and OP1 should they be CONST_INTs.  If
5187    it is VOIDmode, they cannot both be CONST_INT.
5188
5189    UNSIGNEDP is for the case where we have to widen the operands
5190    to perform the operation.  It says to use zero-extension.
5191
5192    NORMALIZEP is 1 if we should convert the result to be either zero
5193    or one.  Normalize is -1 if we should convert the result to be
5194    either zero or -1.  If NORMALIZEP is zero, the result will be left
5195    "raw" out of the scc insn.  */
5196
5197 rtx
5198 emit_store_flag (rtx target, enum rtx_code code, rtx op0, rtx op1,
5199                  enum machine_mode mode, int unsignedp, int normalizep)
5200 {
5201   rtx subtarget;
5202   enum insn_code icode;
5203   enum machine_mode compare_mode;
5204   enum machine_mode target_mode = GET_MODE (target);
5205   rtx tem;
5206   rtx last = get_last_insn ();
5207   rtx pattern, comparison;
5208
5209   if (unsignedp)
5210     code = unsigned_condition (code);
5211
5212   /* If one operand is constant, make it the second one.  Only do this
5213      if the other operand is not constant as well.  */
5214
5215   if (swap_commutative_operands_p (op0, op1))
5216     {
5217       tem = op0;
5218       op0 = op1;
5219       op1 = tem;
5220       code = swap_condition (code);
5221     }
5222
5223   if (mode == VOIDmode)
5224     mode = GET_MODE (op0);
5225
5226   /* For some comparisons with 1 and -1, we can convert this to
5227      comparisons with zero.  This will often produce more opportunities for
5228      store-flag insns.  */
5229
5230   switch (code)
5231     {
5232     case LT:
5233       if (op1 == const1_rtx)
5234         op1 = const0_rtx, code = LE;
5235       break;
5236     case LE:
5237       if (op1 == constm1_rtx)
5238         op1 = const0_rtx, code = LT;
5239       break;
5240     case GE:
5241       if (op1 == const1_rtx)
5242         op1 = const0_rtx, code = GT;
5243       break;
5244     case GT:
5245       if (op1 == constm1_rtx)
5246         op1 = const0_rtx, code = GE;
5247       break;
5248     case GEU:
5249       if (op1 == const1_rtx)
5250         op1 = const0_rtx, code = NE;
5251       break;
5252     case LTU:
5253       if (op1 == const1_rtx)
5254         op1 = const0_rtx, code = EQ;
5255       break;
5256     default:
5257       break;
5258     }
5259
5260   /* If we are comparing a double-word integer with zero or -1, we can
5261      convert the comparison into one involving a single word.  */
5262   if (GET_MODE_BITSIZE (mode) == BITS_PER_WORD * 2
5263       && GET_MODE_CLASS (mode) == MODE_INT
5264       && (!MEM_P (op0) || ! MEM_VOLATILE_P (op0)))
5265     {
5266       if ((code == EQ || code == NE)
5267           && (op1 == const0_rtx || op1 == constm1_rtx))
5268         {
5269           rtx op00, op01, op0both;
5270
5271           /* Do a logical OR or AND of the two words and compare the
5272              result.  */
5273           op00 = simplify_gen_subreg (word_mode, op0, mode, 0);
5274           op01 = simplify_gen_subreg (word_mode, op0, mode, UNITS_PER_WORD);
5275           op0both = expand_binop (word_mode,
5276                                   op1 == const0_rtx ? ior_optab : and_optab,
5277                                   op00, op01, NULL_RTX, unsignedp,
5278                                   OPTAB_DIRECT);
5279
5280           if (op0both != 0)
5281             return emit_store_flag (target, code, op0both, op1, word_mode,
5282                                     unsignedp, normalizep);
5283         }
5284       else if ((code == LT || code == GE) && op1 == const0_rtx)
5285         {
5286           rtx op0h;
5287
5288           /* If testing the sign bit, can just test on high word.  */
5289           op0h = simplify_gen_subreg (word_mode, op0, mode,
5290                                       subreg_highpart_offset (word_mode,
5291                                                               mode));
5292           return emit_store_flag (target, code, op0h, op1, word_mode,
5293                                   unsignedp, normalizep);
5294         }
5295     }
5296
5297   /* If this is A < 0 or A >= 0, we can do this by taking the ones
5298      complement of A (for GE) and shifting the sign bit to the low bit.  */
5299   if (op1 == const0_rtx && (code == LT || code == GE)
5300       && GET_MODE_CLASS (mode) == MODE_INT
5301       && (normalizep || STORE_FLAG_VALUE == 1
5302           || (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT
5303               && ((STORE_FLAG_VALUE & GET_MODE_MASK (mode))
5304                   == ((unsigned HOST_WIDE_INT) 1
5305                       << (GET_MODE_BITSIZE (mode) - 1))))))
5306     {
5307       subtarget = target;
5308
5309       /* If the result is to be wider than OP0, it is best to convert it
5310          first.  If it is to be narrower, it is *incorrect* to convert it
5311          first.  */
5312       if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (mode))
5313         {
5314           op0 = convert_modes (target_mode, mode, op0, 0);
5315           mode = target_mode;
5316         }
5317
5318       if (target_mode != mode)
5319         subtarget = 0;
5320
5321       if (code == GE)
5322         op0 = expand_unop (mode, one_cmpl_optab, op0,
5323                            ((STORE_FLAG_VALUE == 1 || normalizep)
5324                             ? 0 : subtarget), 0);
5325
5326       if (STORE_FLAG_VALUE == 1 || normalizep)
5327         /* If we are supposed to produce a 0/1 value, we want to do
5328            a logical shift from the sign bit to the low-order bit; for
5329            a -1/0 value, we do an arithmetic shift.  */
5330         op0 = expand_shift (RSHIFT_EXPR, mode, op0,
5331                             size_int (GET_MODE_BITSIZE (mode) - 1),
5332                             subtarget, normalizep != -1);
5333
5334       if (mode != target_mode)
5335         op0 = convert_modes (target_mode, mode, op0, 0);
5336
5337       return op0;
5338     }
5339
5340   icode = setcc_gen_code[(int) code];
5341
5342   if (icode != CODE_FOR_nothing)
5343     {
5344       insn_operand_predicate_fn pred;
5345
5346       /* We think we may be able to do this with a scc insn.  Emit the
5347          comparison and then the scc insn.  */
5348
5349       do_pending_stack_adjust ();
5350       last = get_last_insn ();
5351
5352       comparison
5353         = compare_from_rtx (op0, op1, code, unsignedp, mode, NULL_RTX);
5354       if (CONSTANT_P (comparison))
5355         {
5356           switch (GET_CODE (comparison))
5357             {
5358             case CONST_INT:
5359               if (comparison == const0_rtx)
5360                 return const0_rtx;
5361               break;
5362
5363 #ifdef FLOAT_STORE_FLAG_VALUE
5364             case CONST_DOUBLE:
5365               if (comparison == CONST0_RTX (GET_MODE (comparison)))
5366                 return const0_rtx;
5367               break;
5368 #endif
5369             default:
5370               gcc_unreachable ();
5371             }
5372
5373           if (normalizep == 1)
5374             return const1_rtx;
5375           if (normalizep == -1)
5376             return constm1_rtx;
5377           return const_true_rtx;
5378         }
5379
5380       /* The code of COMPARISON may not match CODE if compare_from_rtx
5381          decided to swap its operands and reverse the original code.
5382
5383          We know that compare_from_rtx returns either a CONST_INT or
5384          a new comparison code, so it is safe to just extract the
5385          code from COMPARISON.  */
5386       code = GET_CODE (comparison);
5387
5388       /* Get a reference to the target in the proper mode for this insn.  */
5389       compare_mode = insn_data[(int) icode].operand[0].mode;
5390       subtarget = target;
5391       pred = insn_data[(int) icode].operand[0].predicate;
5392       if (optimize || ! (*pred) (subtarget, compare_mode))
5393         subtarget = gen_reg_rtx (compare_mode);
5394
5395       pattern = GEN_FCN (icode) (subtarget);
5396       if (pattern)
5397         {
5398           emit_insn (pattern);
5399           return emit_store_flag_1 (target, subtarget, compare_mode,
5400                                     normalizep);
5401         }
5402     }
5403   else
5404     {
5405       /* We don't have an scc insn, so try a cstore insn.  */
5406
5407       for (compare_mode = mode; compare_mode != VOIDmode;
5408            compare_mode = GET_MODE_WIDER_MODE (compare_mode))
5409         {
5410           icode = optab_handler (cstore_optab, compare_mode)->insn_code;
5411           if (icode != CODE_FOR_nothing)
5412             break;
5413         }
5414
5415       if (icode != CODE_FOR_nothing)
5416         {
5417           enum machine_mode result_mode
5418             = insn_data[(int) icode].operand[0].mode;
5419           rtx cstore_op0 = op0;
5420           rtx cstore_op1 = op1;
5421
5422           do_pending_stack_adjust ();
5423           last = get_last_insn ();
5424
5425           if (compare_mode != mode)
5426             {
5427               cstore_op0 = convert_modes (compare_mode, mode, cstore_op0,
5428                                           unsignedp);
5429               cstore_op1 = convert_modes (compare_mode, mode, cstore_op1,
5430                                           unsignedp);
5431             }
5432
5433           if (!insn_data[(int) icode].operand[2].predicate (cstore_op0,
5434                                                             compare_mode))
5435             cstore_op0 = copy_to_mode_reg (compare_mode, cstore_op0);
5436
5437           if (!insn_data[(int) icode].operand[3].predicate (cstore_op1,
5438                                                             compare_mode))
5439             cstore_op1 = copy_to_mode_reg (compare_mode, cstore_op1);
5440
5441           comparison = gen_rtx_fmt_ee (code, result_mode, cstore_op0,
5442                                        cstore_op1);
5443           subtarget = target;
5444
5445           if (optimize || !(insn_data[(int) icode].operand[0].predicate
5446                             (subtarget, result_mode)))
5447             subtarget = gen_reg_rtx (result_mode);
5448
5449           pattern = GEN_FCN (icode) (subtarget, comparison, cstore_op0,
5450                                      cstore_op1);
5451
5452           if (pattern)
5453             {
5454               emit_insn (pattern);
5455               return emit_store_flag_1 (target, subtarget, result_mode,
5456                                         normalizep);
5457             }
5458         }
5459     }
5460
5461   delete_insns_since (last);
5462
5463   /* If optimizing, use different pseudo registers for each insn, instead
5464      of reusing the same pseudo.  This leads to better CSE, but slows
5465      down the compiler, since there are more pseudos */
5466   subtarget = (!optimize
5467                && (target_mode == mode)) ? target : NULL_RTX;
5468
5469   /* If we reached here, we can't do this with a scc insn.  However, there
5470      are some comparisons that can be done directly.  For example, if
5471      this is an equality comparison of integers, we can try to exclusive-or
5472      (or subtract) the two operands and use a recursive call to try the
5473      comparison with zero.  Don't do any of these cases if branches are
5474      very cheap.  */
5475
5476   if (BRANCH_COST > 0
5477       && GET_MODE_CLASS (mode) == MODE_INT && (code == EQ || code == NE)
5478       && op1 != const0_rtx)
5479     {
5480       tem = expand_binop (mode, xor_optab, op0, op1, subtarget, 1,
5481                           OPTAB_WIDEN);
5482
5483       if (tem == 0)
5484         tem = expand_binop (mode, sub_optab, op0, op1, subtarget, 1,
5485                             OPTAB_WIDEN);
5486       if (tem != 0)
5487         tem = emit_store_flag (target, code, tem, const0_rtx,
5488                                mode, unsignedp, normalizep);
5489       if (tem == 0)
5490         delete_insns_since (last);
5491       return tem;
5492     }
5493
5494   /* Some other cases we can do are EQ, NE, LE, and GT comparisons with
5495      the constant zero.  Reject all other comparisons at this point.  Only
5496      do LE and GT if branches are expensive since they are expensive on
5497      2-operand machines.  */
5498
5499   if (BRANCH_COST == 0
5500       || GET_MODE_CLASS (mode) != MODE_INT || op1 != const0_rtx
5501       || (code != EQ && code != NE
5502           && (BRANCH_COST <= 1 || (code != LE && code != GT))))
5503     return 0;
5504
5505   /* See what we need to return.  We can only return a 1, -1, or the
5506      sign bit.  */
5507
5508   if (normalizep == 0)
5509     {
5510       if (STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1)
5511         normalizep = STORE_FLAG_VALUE;
5512
5513       else if (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT
5514                && ((STORE_FLAG_VALUE & GET_MODE_MASK (mode))
5515                    == (unsigned HOST_WIDE_INT) 1 << (GET_MODE_BITSIZE (mode) - 1)))
5516         ;
5517       else
5518         return 0;
5519     }
5520
5521   /* Try to put the result of the comparison in the sign bit.  Assume we can't
5522      do the necessary operation below.  */
5523
5524   tem = 0;
5525
5526   /* To see if A <= 0, compute (A | (A - 1)).  A <= 0 iff that result has
5527      the sign bit set.  */
5528
5529   if (code == LE)
5530     {
5531       /* This is destructive, so SUBTARGET can't be OP0.  */
5532       if (rtx_equal_p (subtarget, op0))
5533         subtarget = 0;
5534
5535       tem = expand_binop (mode, sub_optab, op0, const1_rtx, subtarget, 0,
5536                           OPTAB_WIDEN);
5537       if (tem)
5538         tem = expand_binop (mode, ior_optab, op0, tem, subtarget, 0,
5539                             OPTAB_WIDEN);
5540     }
5541
5542   /* To see if A > 0, compute (((signed) A) << BITS) - A, where BITS is the
5543      number of bits in the mode of OP0, minus one.  */
5544
5545   if (code == GT)
5546     {
5547       if (rtx_equal_p (subtarget, op0))
5548         subtarget = 0;
5549
5550       tem = expand_shift (RSHIFT_EXPR, mode, op0,
5551                           size_int (GET_MODE_BITSIZE (mode) - 1),
5552                           subtarget, 0);
5553       tem = expand_binop (mode, sub_optab, tem, op0, subtarget, 0,
5554                           OPTAB_WIDEN);
5555     }
5556
5557   if (code == EQ || code == NE)
5558     {
5559       /* For EQ or NE, one way to do the comparison is to apply an operation
5560          that converts the operand into a positive number if it is nonzero
5561          or zero if it was originally zero.  Then, for EQ, we subtract 1 and
5562          for NE we negate.  This puts the result in the sign bit.  Then we
5563          normalize with a shift, if needed.
5564
5565          Two operations that can do the above actions are ABS and FFS, so try
5566          them.  If that doesn't work, and MODE is smaller than a full word,
5567          we can use zero-extension to the wider mode (an unsigned conversion)
5568          as the operation.  */
5569
5570       /* Note that ABS doesn't yield a positive number for INT_MIN, but
5571          that is compensated by the subsequent overflow when subtracting
5572          one / negating.  */
5573
5574       if (optab_handler (abs_optab, mode)->insn_code != CODE_FOR_nothing)
5575         tem = expand_unop (mode, abs_optab, op0, subtarget, 1);
5576       else if (optab_handler (ffs_optab, mode)->insn_code != CODE_FOR_nothing)
5577         tem = expand_unop (mode, ffs_optab, op0, subtarget, 1);
5578       else if (GET_MODE_SIZE (mode) < UNITS_PER_WORD)
5579         {
5580           tem = convert_modes (word_mode, mode, op0, 1);
5581           mode = word_mode;
5582         }
5583
5584       if (tem != 0)
5585         {
5586           if (code == EQ)
5587             tem = expand_binop (mode, sub_optab, tem, const1_rtx, subtarget,
5588                                 0, OPTAB_WIDEN);
5589           else
5590             tem = expand_unop (mode, neg_optab, tem, subtarget, 0);
5591         }
5592
5593       /* If we couldn't do it that way, for NE we can "or" the two's complement
5594          of the value with itself.  For EQ, we take the one's complement of
5595          that "or", which is an extra insn, so we only handle EQ if branches
5596          are expensive.  */
5597
5598       if (tem == 0 && (code == NE || BRANCH_COST > 1))
5599         {
5600           if (rtx_equal_p (subtarget, op0))
5601             subtarget = 0;
5602
5603           tem = expand_unop (mode, neg_optab, op0, subtarget, 0);
5604           tem = expand_binop (mode, ior_optab, tem, op0, subtarget, 0,
5605                               OPTAB_WIDEN);
5606
5607           if (tem && code == EQ)
5608             tem = expand_unop (mode, one_cmpl_optab, tem, subtarget, 0);
5609         }
5610     }
5611
5612   if (tem && normalizep)
5613     tem = expand_shift (RSHIFT_EXPR, mode, tem,
5614                         size_int (GET_MODE_BITSIZE (mode) - 1),
5615                         subtarget, normalizep == 1);
5616
5617   if (tem)
5618     {
5619       if (GET_MODE (tem) != target_mode)
5620         {
5621           convert_move (target, tem, 0);
5622           tem = target;
5623         }
5624       else if (!subtarget)
5625         {
5626           emit_move_insn (target, tem);
5627           tem = target;
5628         }
5629     }
5630   else
5631     delete_insns_since (last);
5632
5633   return tem;
5634 }
5635
5636 /* Like emit_store_flag, but always succeeds.  */
5637
5638 rtx
5639 emit_store_flag_force (rtx target, enum rtx_code code, rtx op0, rtx op1,
5640                        enum machine_mode mode, int unsignedp, int normalizep)
5641 {
5642   rtx tem, label;
5643
5644   /* First see if emit_store_flag can do the job.  */
5645   tem = emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep);
5646   if (tem != 0)
5647     return tem;
5648
5649   if (normalizep == 0)
5650     normalizep = 1;
5651
5652   /* If this failed, we have to do this with set/compare/jump/set code.  */
5653
5654   if (!REG_P (target)
5655       || reg_mentioned_p (target, op0) || reg_mentioned_p (target, op1))
5656     target = gen_reg_rtx (GET_MODE (target));
5657
5658   emit_move_insn (target, const1_rtx);
5659   label = gen_label_rtx ();
5660   do_compare_rtx_and_jump (op0, op1, code, unsignedp, mode, NULL_RTX,
5661                            NULL_RTX, label);
5662
5663   emit_move_insn (target, const0_rtx);
5664   emit_label (label);
5665
5666   return target;
5667 }
5668 \f
5669 /* Perform possibly multi-word comparison and conditional jump to LABEL
5670    if ARG1 OP ARG2 true where ARG1 and ARG2 are of mode MODE.  This is
5671    now a thin wrapper around do_compare_rtx_and_jump.  */
5672
5673 static void
5674 do_cmp_and_jump (rtx arg1, rtx arg2, enum rtx_code op, enum machine_mode mode,
5675                  rtx label)
5676 {
5677   int unsignedp = (op == LTU || op == LEU || op == GTU || op == GEU);
5678   do_compare_rtx_and_jump (arg1, arg2, op, unsignedp, mode,
5679                            NULL_RTX, NULL_RTX, label);
5680 }