gcc/expmed.c

   1 /* Medium-level subroutines: convert bit-field store and extract
   2    and shifts, multiplies and divides to rtl instructions.
   3    Copyright (C) 1987, 1988, 1989, 1992, 1993, 1994, 1995, 1996, 1997, 1998,
   4    1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007
   5    Free Software Foundation, Inc.
   6
   7 This file is part of GCC.
   8
   9 GCC is free software; you can redistribute it and/or modify it under
  10 the terms of the GNU General Public License as published by the Free
  11 Software Foundation; either version 3, or (at your option) any later
  12 version.
  13
  14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  16 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  17 for more details.
  18
  19 You should have received a copy of the GNU General Public License
  20 along with GCC; see the file COPYING3.  If not see
  21 <http://www.gnu.org/licenses/>.  */
  22
  23
  24 #include "config.h"
  25 #include "system.h"
  26 #include "coretypes.h"
  27 #include "tm.h"
  28 #include "toplev.h"
  29 #include "rtl.h"
  30 #include "tree.h"
  31 #include "tm_p.h"
  32 #include "flags.h"
  33 #include "insn-config.h"
  34 #include "expr.h"
  35 #include "optabs.h"
  36 #include "real.h"
  37 #include "recog.h"
  38 #include "langhooks.h"
  39 #include "df.h"
  40 #include "target.h"
  41
  42 static void store_fixed_bit_field (rtx, unsigned HOST_WIDE_INT,
  43                                    unsigned HOST_WIDE_INT,
  44                                    unsigned HOST_WIDE_INT, rtx);
  45 static void store_split_bit_field (rtx, unsigned HOST_WIDE_INT,
  46                                    unsigned HOST_WIDE_INT, rtx);
  47 static rtx extract_fixed_bit_field (enum machine_mode, rtx,
  48                                     unsigned HOST_WIDE_INT,
  49                                     unsigned HOST_WIDE_INT,
  50                                     unsigned HOST_WIDE_INT, rtx, int);
  51 static rtx mask_rtx (enum machine_mode, int, int, int);
  52 static rtx lshift_value (enum machine_mode, rtx, int, int);
  53 static rtx extract_split_bit_field (rtx, unsigned HOST_WIDE_INT,
  54                                     unsigned HOST_WIDE_INT, int);
  55 static void do_cmp_and_jump (rtx, rtx, enum rtx_code, enum machine_mode, rtx);
  56 static rtx expand_smod_pow2 (enum machine_mode, rtx, HOST_WIDE_INT);
  57 static rtx expand_sdiv_pow2 (enum machine_mode, rtx, HOST_WIDE_INT);
  58
  59 /* Test whether a value is zero of a power of two.  */
  60 #define EXACT_POWER_OF_2_OR_ZERO_P(x) (((x) & ((x) - 1)) == 0)
  61
  62 /* Nonzero means divides or modulus operations are relatively cheap for
  63    powers of two, so don't use branches; emit the operation instead.
  64    Usually, this will mean that the MD file will emit non-branch
  65    sequences.  */
  66
  67 static bool sdiv_pow2_cheap[2][NUM_MACHINE_MODES];
  68 static bool smod_pow2_cheap[2][NUM_MACHINE_MODES];
  69
  70 #ifndef SLOW_UNALIGNED_ACCESS
  71 #define SLOW_UNALIGNED_ACCESS(MODE, ALIGN) STRICT_ALIGNMENT
  72 #endif
  73
  74 /* For compilers that support multiple targets with different word sizes,
  75    MAX_BITS_PER_WORD contains the biggest value of BITS_PER_WORD.  An example
  76    is the H8/300(H) compiler.  */
  77
  78 #ifndef MAX_BITS_PER_WORD
  79 #define MAX_BITS_PER_WORD BITS_PER_WORD
  80 #endif
  81
  82 /* Reduce conditional compilation elsewhere.  */
  83 #ifndef HAVE_insv
  84 #define HAVE_insv       0
  85 #define CODE_FOR_insv   CODE_FOR_nothing
  86 #define gen_insv(a,b,c,d) NULL_RTX
  87 #endif
  88 #ifndef HAVE_extv
  89 #define HAVE_extv       0
  90 #define CODE_FOR_extv   CODE_FOR_nothing
  91 #define gen_extv(a,b,c,d) NULL_RTX
  92 #endif
  93 #ifndef HAVE_extzv
  94 #define HAVE_extzv      0
  95 #define CODE_FOR_extzv  CODE_FOR_nothing
  96 #define gen_extzv(a,b,c,d) NULL_RTX
  97 #endif
  98
  99 /* Cost of various pieces of RTL.  Note that some of these are indexed by
 100    shift count and some by mode.  */
 101 static int zero_cost[2];
 102 static int add_cost[2][NUM_MACHINE_MODES];
 103 static int neg_cost[2][NUM_MACHINE_MODES];
 104 static int shift_cost[2][NUM_MACHINE_MODES][MAX_BITS_PER_WORD];
 105 static int shiftadd_cost[2][NUM_MACHINE_MODES][MAX_BITS_PER_WORD];
 106 static int shiftsub_cost[2][NUM_MACHINE_MODES][MAX_BITS_PER_WORD];
 107 static int mul_cost[2][NUM_MACHINE_MODES];
 108 static int sdiv_cost[2][NUM_MACHINE_MODES];
 109 static int udiv_cost[2][NUM_MACHINE_MODES];
 110 static int mul_widen_cost[2][NUM_MACHINE_MODES];
 111 static int mul_highpart_cost[2][NUM_MACHINE_MODES];
 112
 113 void
 114 init_expmed (void)
 115 {
 116   struct
 117   {
 118     struct rtx_def reg;         rtunion reg_fld[2];
 119     struct rtx_def plus;        rtunion plus_fld1;
 120     struct rtx_def neg;
 121     struct rtx_def mult;        rtunion mult_fld1;
 122     struct rtx_def sdiv;        rtunion sdiv_fld1;
 123     struct rtx_def udiv;        rtunion udiv_fld1;
 124     struct rtx_def zext;
 125     struct rtx_def sdiv_32;     rtunion sdiv_32_fld1;
 126     struct rtx_def smod_32;     rtunion smod_32_fld1;
 127     struct rtx_def wide_mult;   rtunion wide_mult_fld1;
 128     struct rtx_def wide_lshr;   rtunion wide_lshr_fld1;
 129     struct rtx_def wide_trunc;
 130     struct rtx_def shift;       rtunion shift_fld1;
 131     struct rtx_def shift_mult;  rtunion shift_mult_fld1;
 132     struct rtx_def shift_add;   rtunion shift_add_fld1;
 133     struct rtx_def shift_sub;   rtunion shift_sub_fld1;
 134   } all;
 135
 136   rtx pow2[MAX_BITS_PER_WORD];
 137   rtx cint[MAX_BITS_PER_WORD];
 138   int m, n;
 139   enum machine_mode mode, wider_mode;
 140   int speed;
 141
 142
 143   for (m = 1; m < MAX_BITS_PER_WORD; m++)
 144     {
 145       pow2[m] = GEN_INT ((HOST_WIDE_INT) 1 << m);
 146       cint[m] = GEN_INT (m);
 147     }
 148   memset (&all, 0, sizeof all);
 149
 150   PUT_CODE (&all.reg, REG);
 151   /* Avoid using hard regs in ways which may be unsupported.  */
 152   SET_REGNO (&all.reg, LAST_VIRTUAL_REGISTER + 1);
 153
 154   PUT_CODE (&all.plus, PLUS);
 155   XEXP (&all.plus, 0) = &all.reg;
 156   XEXP (&all.plus, 1) = &all.reg;
 157
 158   PUT_CODE (&all.neg, NEG);
 159   XEXP (&all.neg, 0) = &all.reg;
 160
 161   PUT_CODE (&all.mult, MULT);
 162   XEXP (&all.mult, 0) = &all.reg;
 163   XEXP (&all.mult, 1) = &all.reg;
 164
 165   PUT_CODE (&all.sdiv, DIV);
 166   XEXP (&all.sdiv, 0) = &all.reg;
 167   XEXP (&all.sdiv, 1) = &all.reg;
 168
 169   PUT_CODE (&all.udiv, UDIV);
 170   XEXP (&all.udiv, 0) = &all.reg;
 171   XEXP (&all.udiv, 1) = &all.reg;
 172
 173   PUT_CODE (&all.sdiv_32, DIV);
 174   XEXP (&all.sdiv_32, 0) = &all.reg;
 175   XEXP (&all.sdiv_32, 1) = 32 < MAX_BITS_PER_WORD ? cint[32] : GEN_INT (32);
 176
 177   PUT_CODE (&all.smod_32, MOD);
 178   XEXP (&all.smod_32, 0) = &all.reg;
 179   XEXP (&all.smod_32, 1) = XEXP (&all.sdiv_32, 1);
 180
 181   PUT_CODE (&all.zext, ZERO_EXTEND);
 182   XEXP (&all.zext, 0) = &all.reg;
 183
 184   PUT_CODE (&all.wide_mult, MULT);
 185   XEXP (&all.wide_mult, 0) = &all.zext;
 186   XEXP (&all.wide_mult, 1) = &all.zext;
 187
 188   PUT_CODE (&all.wide_lshr, LSHIFTRT);
 189   XEXP (&all.wide_lshr, 0) = &all.wide_mult;
 190
 191   PUT_CODE (&all.wide_trunc, TRUNCATE);
 192   XEXP (&all.wide_trunc, 0) = &all.wide_lshr;
 193
 194   PUT_CODE (&all.shift, ASHIFT);
 195   XEXP (&all.shift, 0) = &all.reg;
 196
 197   PUT_CODE (&all.shift_mult, MULT);
 198   XEXP (&all.shift_mult, 0) = &all.reg;
 199
 200   PUT_CODE (&all.shift_add, PLUS);
 201   XEXP (&all.shift_add, 0) = &all.shift_mult;
 202   XEXP (&all.shift_add, 1) = &all.reg;
 203
 204   PUT_CODE (&all.shift_sub, MINUS);
 205   XEXP (&all.shift_sub, 0) = &all.shift_mult;
 206   XEXP (&all.shift_sub, 1) = &all.reg;
 207
 208   for (speed = 0; speed < 2; speed++)
 209     {
 210       crtl->maybe_hot_insn_p = speed;
 211       zero_cost[speed] = rtx_cost (const0_rtx, 0, speed);
 212
 213       for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT);
 214            mode != VOIDmode;
 215            mode = GET_MODE_WIDER_MODE (mode))
 216         {
 217           PUT_MODE (&all.reg, mode);
 218           PUT_MODE (&all.plus, mode);
 219           PUT_MODE (&all.neg, mode);
 220           PUT_MODE (&all.mult, mode);
 221           PUT_MODE (&all.sdiv, mode);
 222           PUT_MODE (&all.udiv, mode);
 223           PUT_MODE (&all.sdiv_32, mode);
 224           PUT_MODE (&all.smod_32, mode);
 225           PUT_MODE (&all.wide_trunc, mode);
 226           PUT_MODE (&all.shift, mode);
 227           PUT_MODE (&all.shift_mult, mode);
 228           PUT_MODE (&all.shift_add, mode);
 229           PUT_MODE (&all.shift_sub, mode);
 230
 231           add_cost[speed][mode] = rtx_cost (&all.plus, SET, speed);
 232           neg_cost[speed][mode] = rtx_cost (&all.neg, SET, speed);
 233           mul_cost[speed][mode] = rtx_cost (&all.mult, SET, speed);
 234           sdiv_cost[speed][mode] = rtx_cost (&all.sdiv, SET, speed);
 235           udiv_cost[speed][mode] = rtx_cost (&all.udiv, SET, speed);
 236
 237           sdiv_pow2_cheap[speed][mode] = (rtx_cost (&all.sdiv_32, SET, speed)
 238                                           <= 2 * add_cost[speed][mode]);
 239           smod_pow2_cheap[speed][mode] = (rtx_cost (&all.smod_32, SET, speed)
 240                                           <= 4 * add_cost[speed][mode]);
 241
 242           wider_mode = GET_MODE_WIDER_MODE (mode);
 243           if (wider_mode != VOIDmode)
 244             {
 245               PUT_MODE (&all.zext, wider_mode);
 246               PUT_MODE (&all.wide_mult, wider_mode);
 247               PUT_MODE (&all.wide_lshr, wider_mode);
 248               XEXP (&all.wide_lshr, 1) = GEN_INT (GET_MODE_BITSIZE (mode));
 249
 250               mul_widen_cost[speed][wider_mode]
 251                 = rtx_cost (&all.wide_mult, SET, speed);
 252               mul_highpart_cost[speed][mode]
 253                 = rtx_cost (&all.wide_trunc, SET, speed);
 254             }
 255
 256           shift_cost[speed][mode][0] = 0;
 257           shiftadd_cost[speed][mode][0] = shiftsub_cost[speed][mode][0]
 258             = add_cost[speed][mode];
 259
 260           n = MIN (MAX_BITS_PER_WORD, GET_MODE_BITSIZE (mode));
 261           for (m = 1; m < n; m++)
 262             {
 263               XEXP (&all.shift, 1) = cint[m];
 264               XEXP (&all.shift_mult, 1) = pow2[m];
 265
 266               shift_cost[speed][mode][m] = rtx_cost (&all.shift, SET, speed);
 267               shiftadd_cost[speed][mode][m] = rtx_cost (&all.shift_add, SET, speed);
 268               shiftsub_cost[speed][mode][m] = rtx_cost (&all.shift_sub, SET, speed);
 269             }
 270         }
 271     }
 272   default_rtl_profile ();
 273 }
 274
 275 /* Return an rtx representing minus the value of X.
 276    MODE is the intended mode of the result,
 277    useful if X is a CONST_INT.  */
 278
 279 rtx
 280 negate_rtx (enum machine_mode mode, rtx x)
 281 {
 282   rtx result = simplify_unary_operation (NEG, mode, x, mode);
 283
 284   if (result == 0)
 285     result = expand_unop (mode, neg_optab, x, NULL_RTX, 0);
 286
 287   return result;
 288 }
 289
 290 /* Report on the availability of insv/extv/extzv and the desired mode
 291    of each of their operands.  Returns MAX_MACHINE_MODE if HAVE_foo
 292    is false; else the mode of the specified operand.  If OPNO is -1,
 293    all the caller cares about is whether the insn is available.  */
 294 enum machine_mode
 295 mode_for_extraction (enum extraction_pattern pattern, int opno)
 296 {
 297   const struct insn_data *data;
 298
 299   switch (pattern)
 300     {
 301     case EP_insv:
 302       if (HAVE_insv)
 303         {
 304           data = &insn_data[CODE_FOR_insv];
 305           break;
 306         }
 307       return MAX_MACHINE_MODE;
 308
 309     case EP_extv:
 310       if (HAVE_extv)
 311         {
 312           data = &insn_data[CODE_FOR_extv];
 313           break;
 314         }
 315       return MAX_MACHINE_MODE;
 316
 317     case EP_extzv:
 318       if (HAVE_extzv)
 319         {
 320           data = &insn_data[CODE_FOR_extzv];
 321           break;
 322         }
 323       return MAX_MACHINE_MODE;
 324
 325     default:
 326       gcc_unreachable ();
 327     }
 328
 329   if (opno == -1)
 330     return VOIDmode;
 331
 332   /* Everyone who uses this function used to follow it with
 333      if (result == VOIDmode) result = word_mode; */
 334   if (data->operand[opno].mode == VOIDmode)
 335     return word_mode;
 336   return data->operand[opno].mode;
 337 }
 338
 339 /* Return true if X, of mode MODE, matches the predicate for operand
 340    OPNO of instruction ICODE.  Allow volatile memories, regardless of
 341    the ambient volatile_ok setting.  */
 342
 343 static bool
 344 check_predicate_volatile_ok (enum insn_code icode, int opno,
 345                              rtx x, enum machine_mode mode)
 346 {
 347   bool save_volatile_ok, result;
 348
 349   save_volatile_ok = volatile_ok;
 350   result = insn_data[(int) icode].operand[opno].predicate (x, mode);
 351   volatile_ok = save_volatile_ok;
 352   return result;
 353 }
 354 \f
 355 /* A subroutine of store_bit_field, with the same arguments.  Return true
 356    if the operation could be implemented.
 357
 358    If FALLBACK_P is true, fall back to store_fixed_bit_field if we have
 359    no other way of implementing the operation.  If FALLBACK_P is false,
 360    return false instead.  */
 361
 362 static bool
 363 store_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
 364                    unsigned HOST_WIDE_INT bitnum, enum machine_mode fieldmode,
 365                    rtx value, bool fallback_p)
 366 {
 367   unsigned int unit
 368     = (MEM_P (str_rtx)) ? BITS_PER_UNIT : BITS_PER_WORD;
 369   unsigned HOST_WIDE_INT offset, bitpos;
 370   rtx op0 = str_rtx;
 371   int byte_offset;
 372   rtx orig_value;
 373
 374   enum machine_mode op_mode = mode_for_extraction (EP_insv, 3);
 375
 376   while (GET_CODE (op0) == SUBREG)
 377     {
 378       /* The following line once was done only if WORDS_BIG_ENDIAN,
 379          but I think that is a mistake.  WORDS_BIG_ENDIAN is
 380          meaningful at a much higher level; when structures are copied
 381          between memory and regs, the higher-numbered regs
 382          always get higher addresses.  */
 383       int inner_mode_size = GET_MODE_SIZE (GET_MODE (SUBREG_REG (op0)));
 384       int outer_mode_size = GET_MODE_SIZE (GET_MODE (op0));
 385
 386       byte_offset = 0;
 387
 388       /* Paradoxical subregs need special handling on big endian machines.  */
 389       if (SUBREG_BYTE (op0) == 0 && inner_mode_size < outer_mode_size)
 390         {
 391           int difference = inner_mode_size - outer_mode_size;
 392
 393           if (WORDS_BIG_ENDIAN)
 394             byte_offset += (difference / UNITS_PER_WORD) * UNITS_PER_WORD;
 395           if (BYTES_BIG_ENDIAN)
 396             byte_offset += difference % UNITS_PER_WORD;
 397         }
 398       else
 399         byte_offset = SUBREG_BYTE (op0);
 400
 401       bitnum += byte_offset * BITS_PER_UNIT;
 402       op0 = SUBREG_REG (op0);
 403     }
 404
 405   /* No action is needed if the target is a register and if the field
 406      lies completely outside that register.  This can occur if the source
 407      code contains an out-of-bounds access to a small array.  */
 408   if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
 409     return true;
 410
 411   /* Use vec_set patterns for inserting parts of vectors whenever
 412      available.  */
 413   if (VECTOR_MODE_P (GET_MODE (op0))
 414       && !MEM_P (op0)
 415       && (optab_handler (vec_set_optab, GET_MODE (op0))->insn_code
 416           != CODE_FOR_nothing)
 417       && fieldmode == GET_MODE_INNER (GET_MODE (op0))
 418       && bitsize == GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
 419       && !(bitnum % GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
 420     {
 421       enum machine_mode outermode = GET_MODE (op0);
 422       enum machine_mode innermode = GET_MODE_INNER (outermode);
 423       int icode = (int) optab_handler (vec_set_optab, outermode)->insn_code;
 424       int pos = bitnum / GET_MODE_BITSIZE (innermode);
 425       rtx rtxpos = GEN_INT (pos);
 426       rtx src = value;
 427       rtx dest = op0;
 428       rtx pat, seq;
 429       enum machine_mode mode0 = insn_data[icode].operand[0].mode;
 430       enum machine_mode mode1 = insn_data[icode].operand[1].mode;
 431       enum machine_mode mode2 = insn_data[icode].operand[2].mode;
 432
 433       start_sequence ();
 434
 435       if (! (*insn_data[icode].operand[1].predicate) (src, mode1))
 436         src = copy_to_mode_reg (mode1, src);
 437
 438       if (! (*insn_data[icode].operand[2].predicate) (rtxpos, mode2))
 439         rtxpos = copy_to_mode_reg (mode1, rtxpos);
 440
 441       /* We could handle this, but we should always be called with a pseudo
 442          for our targets and all insns should take them as outputs.  */
 443       gcc_assert ((*insn_data[icode].operand[0].predicate) (dest, mode0)
 444                   && (*insn_data[icode].operand[1].predicate) (src, mode1)
 445                   && (*insn_data[icode].operand[2].predicate) (rtxpos, mode2));
 446       pat = GEN_FCN (icode) (dest, src, rtxpos);
 447       seq = get_insns ();
 448       end_sequence ();
 449       if (pat)
 450         {
 451           emit_insn (seq);
 452           emit_insn (pat);
 453           return true;
 454         }
 455     }
 456
 457   /* If the target is a register, overwriting the entire object, or storing
 458      a full-word or multi-word field can be done with just a SUBREG.
 459
 460      If the target is memory, storing any naturally aligned field can be
 461      done with a simple store.  For targets that support fast unaligned
 462      memory, any naturally sized, unit aligned field can be done directly.  */
 463
 464   offset = bitnum / unit;
 465   bitpos = bitnum % unit;
 466   byte_offset = (bitnum % BITS_PER_WORD) / BITS_PER_UNIT
 467                 + (offset * UNITS_PER_WORD);
 468
 469   if (bitpos == 0
 470       && bitsize == GET_MODE_BITSIZE (fieldmode)
 471       && (!MEM_P (op0)
 472           ? ((GET_MODE_SIZE (fieldmode) >= UNITS_PER_WORD
 473              || GET_MODE_SIZE (GET_MODE (op0)) == GET_MODE_SIZE (fieldmode))
 474              && byte_offset % GET_MODE_SIZE (fieldmode) == 0)
 475           : (! SLOW_UNALIGNED_ACCESS (fieldmode, MEM_ALIGN (op0))
 476              || (offset * BITS_PER_UNIT % bitsize == 0
 477                  && MEM_ALIGN (op0) % GET_MODE_BITSIZE (fieldmode) == 0))))
 478     {
 479       if (MEM_P (op0))
 480         op0 = adjust_address (op0, fieldmode, offset);
 481       else if (GET_MODE (op0) != fieldmode)
 482         op0 = simplify_gen_subreg (fieldmode, op0, GET_MODE (op0),
 483                                    byte_offset);
 484       emit_move_insn (op0, value);
 485       return true;
 486     }
 487
 488   /* Make sure we are playing with integral modes.  Pun with subregs
 489      if we aren't.  This must come after the entire register case above,
 490      since that case is valid for any mode.  The following cases are only
 491      valid for integral modes.  */
 492   {
 493     enum machine_mode imode = int_mode_for_mode (GET_MODE (op0));
 494     if (imode != GET_MODE (op0))
 495       {
 496         if (MEM_P (op0))
 497           op0 = adjust_address (op0, imode, 0);
 498         else
 499           {
 500             gcc_assert (imode != BLKmode);
 501             op0 = gen_lowpart (imode, op0);
 502           }
 503       }
 504   }
 505
 506   /* We may be accessing data outside the field, which means
 507      we can alias adjacent data.  */
 508   if (MEM_P (op0))
 509     {
 510       op0 = shallow_copy_rtx (op0);
 511       set_mem_alias_set (op0, 0);
 512       set_mem_expr (op0, 0);
 513     }
 514
 515   /* If OP0 is a register, BITPOS must count within a word.
 516      But as we have it, it counts within whatever size OP0 now has.
 517      On a bigendian machine, these are not the same, so convert.  */
 518   if (BYTES_BIG_ENDIAN
 519       && !MEM_P (op0)
 520       && unit > GET_MODE_BITSIZE (GET_MODE (op0)))
 521     bitpos += unit - GET_MODE_BITSIZE (GET_MODE (op0));
 522
 523   /* Storing an lsb-aligned field in a register
 524      can be done with a movestrict instruction.  */
 525
 526   if (!MEM_P (op0)
 527       && (BYTES_BIG_ENDIAN ? bitpos + bitsize == unit : bitpos == 0)
 528       && bitsize == GET_MODE_BITSIZE (fieldmode)
 529       && (optab_handler (movstrict_optab, fieldmode)->insn_code
 530           != CODE_FOR_nothing))
 531     {
 532       int icode = optab_handler (movstrict_optab, fieldmode)->insn_code;
 533       rtx insn;
 534       rtx start = get_last_insn ();
 535
 536       /* Get appropriate low part of the value being stored.  */
 537       if (GET_CODE (value) == CONST_INT || REG_P (value))
 538         value = gen_lowpart (fieldmode, value);
 539       else if (!(GET_CODE (value) == SYMBOL_REF
 540                  || GET_CODE (value) == LABEL_REF
 541                  || GET_CODE (value) == CONST))
 542         value = convert_to_mode (fieldmode, value, 0);
 543
 544       if (! (*insn_data[icode].operand[1].predicate) (value, fieldmode))
 545         value = copy_to_mode_reg (fieldmode, value);
 546
 547       if (GET_CODE (op0) == SUBREG)
 548         {
 549           /* Else we've got some float mode source being extracted into
 550              a different float mode destination -- this combination of
 551              subregs results in Severe Tire Damage.  */
 552           gcc_assert (GET_MODE (SUBREG_REG (op0)) == fieldmode
 553                       || GET_MODE_CLASS (fieldmode) == MODE_INT
 554                       || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT);
 555           op0 = SUBREG_REG (op0);
 556         }
 557
 558       insn = (GEN_FCN (icode)
 559                  (gen_rtx_SUBREG (fieldmode, op0,
 560                                   (bitnum % BITS_PER_WORD) / BITS_PER_UNIT
 561                                   + (offset * UNITS_PER_WORD)),
 562                                   value));
 563       if (insn)
 564         {
 565           emit_insn (insn);
 566           return true;
 567         }
 568       delete_insns_since (start);
 569     }
 570
 571   /* Handle fields bigger than a word.  */
 572
 573   if (bitsize > BITS_PER_WORD)
 574     {
 575       /* Here we transfer the words of the field
 576          in the order least significant first.
 577          This is because the most significant word is the one which may
 578          be less than full.
 579          However, only do that if the value is not BLKmode.  */
 580
 581       unsigned int backwards = WORDS_BIG_ENDIAN && fieldmode != BLKmode;
 582       unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
 583       unsigned int i;
 584       rtx last;
 585
 586       /* This is the mode we must force value to, so that there will be enough
 587          subwords to extract.  Note that fieldmode will often (always?) be
 588          VOIDmode, because that is what store_field uses to indicate that this
 589          is a bit field, but passing VOIDmode to operand_subword_force
 590          is not allowed.  */
 591       fieldmode = GET_MODE (value);
 592       if (fieldmode == VOIDmode)
 593         fieldmode = smallest_mode_for_size (nwords * BITS_PER_WORD, MODE_INT);
 594
 595       last = get_last_insn ();
 596       for (i = 0; i < nwords; i++)
 597         {
 598           /* If I is 0, use the low-order word in both field and target;
 599              if I is 1, use the next to lowest word; and so on.  */
 600           unsigned int wordnum = (backwards ? nwords - i - 1 : i);
 601           unsigned int bit_offset = (backwards
 602                                      ? MAX ((int) bitsize - ((int) i + 1)
 603                                             * BITS_PER_WORD,
 604                                             0)
 605                                      : (int) i * BITS_PER_WORD);
 606           rtx value_word = operand_subword_force (value, wordnum, fieldmode);
 607
 608           if (!store_bit_field_1 (op0, MIN (BITS_PER_WORD,
 609                                             bitsize - i * BITS_PER_WORD),
 610                                   bitnum + bit_offset, word_mode,
 611                                   value_word, fallback_p))
 612             {
 613               delete_insns_since (last);
 614               return false;
 615             }
 616         }
 617       return true;
 618     }
 619
 620   /* From here on we can assume that the field to be stored in is
 621      a full-word (whatever type that is), since it is shorter than a word.  */
 622
 623   /* OFFSET is the number of words or bytes (UNIT says which)
 624      from STR_RTX to the first word or byte containing part of the field.  */
 625
 626   if (!MEM_P (op0))
 627     {
 628       if (offset != 0
 629           || GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
 630         {
 631           if (!REG_P (op0))
 632             {
 633               /* Since this is a destination (lvalue), we can't copy
 634                  it to a pseudo.  We can remove a SUBREG that does not
 635                  change the size of the operand.  Such a SUBREG may
 636                  have been added above.  */
 637               gcc_assert (GET_CODE (op0) == SUBREG
 638                           && (GET_MODE_SIZE (GET_MODE (op0))
 639                               == GET_MODE_SIZE (GET_MODE (SUBREG_REG (op0)))));
 640               op0 = SUBREG_REG (op0);
 641             }
 642           op0 = gen_rtx_SUBREG (mode_for_size (BITS_PER_WORD, MODE_INT, 0),
 643                                 op0, (offset * UNITS_PER_WORD));
 644         }
 645       offset = 0;
 646     }
 647
 648   /* If VALUE has a floating-point or complex mode, access it as an
 649      integer of the corresponding size.  This can occur on a machine
 650      with 64 bit registers that uses SFmode for float.  It can also
 651      occur for unaligned float or complex fields.  */
 652   orig_value = value;
 653   if (GET_MODE (value) != VOIDmode
 654       && GET_MODE_CLASS (GET_MODE (value)) != MODE_INT
 655       && GET_MODE_CLASS (GET_MODE (value)) != MODE_PARTIAL_INT)
 656     {
 657       value = gen_reg_rtx (int_mode_for_mode (GET_MODE (value)));
 658       emit_move_insn (gen_lowpart (GET_MODE (orig_value), value), orig_value);
 659     }
 660
 661   /* Now OFFSET is nonzero only if OP0 is memory
 662      and is therefore always measured in bytes.  */
 663
 664   if (HAVE_insv
 665       && GET_MODE (value) != BLKmode
 666       && bitsize > 0
 667       && GET_MODE_BITSIZE (op_mode) >= bitsize
 668       && ! ((REG_P (op0) || GET_CODE (op0) == SUBREG)
 669             && (bitsize + bitpos > GET_MODE_BITSIZE (op_mode)))
 670       && insn_data[CODE_FOR_insv].operand[1].predicate (GEN_INT (bitsize),
 671                                                         VOIDmode)
 672       && check_predicate_volatile_ok (CODE_FOR_insv, 0, op0, VOIDmode))
 673     {
 674       int xbitpos = bitpos;
 675       rtx value1;
 676       rtx xop0 = op0;
 677       rtx last = get_last_insn ();
 678       rtx pat;
 679
 680       /* Add OFFSET into OP0's address.  */
 681       if (MEM_P (xop0))
 682         xop0 = adjust_address (xop0, byte_mode, offset);
 683
 684       /* If xop0 is a register, we need it in OP_MODE
 685          to make it acceptable to the format of insv.  */
 686       if (GET_CODE (xop0) == SUBREG)
 687         /* We can't just change the mode, because this might clobber op0,
 688            and we will need the original value of op0 if insv fails.  */
 689         xop0 = gen_rtx_SUBREG (op_mode, SUBREG_REG (xop0), SUBREG_BYTE (xop0));
 690       if (REG_P (xop0) && GET_MODE (xop0) != op_mode)
 691         xop0 = gen_rtx_SUBREG (op_mode, xop0, 0);
 692
 693       /* On big-endian machines, we count bits from the most significant.
 694          If the bit field insn does not, we must invert.  */
 695
 696       if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
 697         xbitpos = unit - bitsize - xbitpos;
 698
 699       /* We have been counting XBITPOS within UNIT.
 700          Count instead within the size of the register.  */
 701       if (BITS_BIG_ENDIAN && !MEM_P (xop0))
 702         xbitpos += GET_MODE_BITSIZE (op_mode) - unit;
 703
 704       unit = GET_MODE_BITSIZE (op_mode);
 705
 706       /* Convert VALUE to op_mode (which insv insn wants) in VALUE1.  */
 707       value1 = value;
 708       if (GET_MODE (value) != op_mode)
 709         {
 710           if (GET_MODE_BITSIZE (GET_MODE (value)) >= bitsize)
 711             {
 712               /* Optimization: Don't bother really extending VALUE
 713                  if it has all the bits we will actually use.  However,
 714                  if we must narrow it, be sure we do it correctly.  */
 715
 716               if (GET_MODE_SIZE (GET_MODE (value)) < GET_MODE_SIZE (op_mode))
 717                 {
 718                   rtx tmp;
 719
 720                   tmp = simplify_subreg (op_mode, value1, GET_MODE (value), 0);
 721                   if (! tmp)
 722                     tmp = simplify_gen_subreg (op_mode,
 723                                                force_reg (GET_MODE (value),
 724                                                           value1),
 725                                                GET_MODE (value), 0);
 726                   value1 = tmp;
 727                 }
 728               else
 729                 value1 = gen_lowpart (op_mode, value1);
 730             }
 731           else if (GET_CODE (value) == CONST_INT)
 732             value1 = gen_int_mode (INTVAL (value), op_mode);
 733           else
 734             /* Parse phase is supposed to make VALUE's data type
 735                match that of the component reference, which is a type
 736                at least as wide as the field; so VALUE should have
 737                a mode that corresponds to that type.  */
 738             gcc_assert (CONSTANT_P (value));
 739         }
 740
 741       /* If this machine's insv insists on a register,
 742          get VALUE1 into a register.  */
 743       if (! ((*insn_data[(int) CODE_FOR_insv].operand[3].predicate)
 744              (value1, op_mode)))
 745         value1 = force_reg (op_mode, value1);
 746
 747       pat = gen_insv (xop0, GEN_INT (bitsize), GEN_INT (xbitpos), value1);
 748       if (pat)
 749         {
 750           emit_insn (pat);
 751           return true;
 752         }
 753       delete_insns_since (last);
 754     }
 755
 756   /* If OP0 is a memory, try copying it to a register and seeing if a
 757      cheap register alternative is available.  */
 758   if (HAVE_insv && MEM_P (op0))
 759     {
 760       enum machine_mode bestmode;
 761
 762       /* Get the mode to use for inserting into this field.  If OP0 is
 763          BLKmode, get the smallest mode consistent with the alignment. If
 764          OP0 is a non-BLKmode object that is no wider than OP_MODE, use its
 765          mode. Otherwise, use the smallest mode containing the field.  */
 766
 767       if (GET_MODE (op0) == BLKmode
 768           || (op_mode != MAX_MACHINE_MODE
 769               && GET_MODE_SIZE (GET_MODE (op0)) > GET_MODE_SIZE (op_mode)))
 770         bestmode = get_best_mode (bitsize, bitnum, MEM_ALIGN (op0),
 771                                   (op_mode == MAX_MACHINE_MODE
 772                                    ? VOIDmode : op_mode),
 773                                   MEM_VOLATILE_P (op0));
 774       else
 775         bestmode = GET_MODE (op0);
 776
 777       if (bestmode != VOIDmode
 778           && GET_MODE_SIZE (bestmode) >= GET_MODE_SIZE (fieldmode)
 779           && !(SLOW_UNALIGNED_ACCESS (bestmode, MEM_ALIGN (op0))
 780                && GET_MODE_BITSIZE (bestmode) > MEM_ALIGN (op0)))
 781         {
 782           rtx last, tempreg, xop0;
 783           unsigned HOST_WIDE_INT xoffset, xbitpos;
 784
 785           last = get_last_insn ();
 786
 787           /* Adjust address to point to the containing unit of
 788              that mode.  Compute the offset as a multiple of this unit,
 789              counting in bytes.  */
 790           unit = GET_MODE_BITSIZE (bestmode);
 791           xoffset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
 792           xbitpos = bitnum % unit;
 793           xop0 = adjust_address (op0, bestmode, xoffset);
 794
 795           /* Fetch that unit, store the bitfield in it, then store
 796              the unit.  */
 797           tempreg = copy_to_reg (xop0);
 798           if (store_bit_field_1 (tempreg, bitsize, xbitpos,
 799                                  fieldmode, orig_value, false))
 800             {
 801               emit_move_insn (xop0, tempreg);
 802               return true;
 803             }
 804           delete_insns_since (last);
 805         }
 806     }
 807
 808   if (!fallback_p)
 809     return false;
 810
 811   store_fixed_bit_field (op0, offset, bitsize, bitpos, value);
 812   return true;
 813 }
 814
 815 /* Generate code to store value from rtx VALUE
 816    into a bit-field within structure STR_RTX
 817    containing BITSIZE bits starting at bit BITNUM.
 818    FIELDMODE is the machine-mode of the FIELD_DECL node for this field.  */
 819
 820 void
 821 store_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
 822                  unsigned HOST_WIDE_INT bitnum, enum machine_mode fieldmode,
 823                  rtx value)
 824 {
 825   if (!store_bit_field_1 (str_rtx, bitsize, bitnum, fieldmode, value, true))
 826     gcc_unreachable ();
 827 }
 828 \f
 829 /* Use shifts and boolean operations to store VALUE
 830    into a bit field of width BITSIZE
 831    in a memory location specified by OP0 except offset by OFFSET bytes.
 832      (OFFSET must be 0 if OP0 is a register.)
 833    The field starts at position BITPOS within the byte.
 834     (If OP0 is a register, it may be a full word or a narrower mode,
 835      but BITPOS still counts within a full word,
 836      which is significant on bigendian machines.)  */
 837
 838 static void
 839 store_fixed_bit_field (rtx op0, unsigned HOST_WIDE_INT offset,
 840                        unsigned HOST_WIDE_INT bitsize,
 841                        unsigned HOST_WIDE_INT bitpos, rtx value)
 842 {
 843   enum machine_mode mode;
 844   unsigned int total_bits = BITS_PER_WORD;
 845   rtx temp;
 846   int all_zero = 0;
 847   int all_one = 0;
 848
 849   /* There is a case not handled here:
 850      a structure with a known alignment of just a halfword
 851      and a field split across two aligned halfwords within the structure.
 852      Or likewise a structure with a known alignment of just a byte
 853      and a field split across two bytes.
 854      Such cases are not supposed to be able to occur.  */
 855
 856   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
 857     {
 858       gcc_assert (!offset);
 859       /* Special treatment for a bit field split across two registers.  */
 860       if (bitsize + bitpos > BITS_PER_WORD)
 861         {
 862           store_split_bit_field (op0, bitsize, bitpos, value);
 863           return;
 864         }
 865     }
 866   else
 867     {
 868       /* Get the proper mode to use for this field.  We want a mode that
 869          includes the entire field.  If such a mode would be larger than
 870          a word, we won't be doing the extraction the normal way.
 871          We don't want a mode bigger than the destination.  */
 872
 873       mode = GET_MODE (op0);
 874       if (GET_MODE_BITSIZE (mode) == 0
 875           || GET_MODE_BITSIZE (mode) > GET_MODE_BITSIZE (word_mode))
 876         mode = word_mode;
 877       mode = get_best_mode (bitsize, bitpos + offset * BITS_PER_UNIT,
 878                             MEM_ALIGN (op0), mode, MEM_VOLATILE_P (op0));
 879
 880       if (mode == VOIDmode)
 881         {
 882           /* The only way this should occur is if the field spans word
 883              boundaries.  */
 884           store_split_bit_field (op0, bitsize, bitpos + offset * BITS_PER_UNIT,
 885                                  value);
 886           return;
 887         }
 888
 889       total_bits = GET_MODE_BITSIZE (mode);
 890
 891       /* Make sure bitpos is valid for the chosen mode.  Adjust BITPOS to
 892          be in the range 0 to total_bits-1, and put any excess bytes in
 893          OFFSET.  */
 894       if (bitpos >= total_bits)
 895         {
 896           offset += (bitpos / total_bits) * (total_bits / BITS_PER_UNIT);
 897           bitpos -= ((bitpos / total_bits) * (total_bits / BITS_PER_UNIT)
 898                      * BITS_PER_UNIT);
 899         }
 900
 901       /* Get ref to an aligned byte, halfword, or word containing the field.
 902          Adjust BITPOS to be position within a word,
 903          and OFFSET to be the offset of that word.
 904          Then alter OP0 to refer to that word.  */
 905       bitpos += (offset % (total_bits / BITS_PER_UNIT)) * BITS_PER_UNIT;
 906       offset -= (offset % (total_bits / BITS_PER_UNIT));
 907       op0 = adjust_address (op0, mode, offset);
 908     }
 909
 910   mode = GET_MODE (op0);
 911
 912   /* Now MODE is either some integral mode for a MEM as OP0,
 913      or is a full-word for a REG as OP0.  TOTAL_BITS corresponds.
 914      The bit field is contained entirely within OP0.
 915      BITPOS is the starting bit number within OP0.
 916      (OP0's mode may actually be narrower than MODE.)  */
 917
 918   if (BYTES_BIG_ENDIAN)
 919       /* BITPOS is the distance between our msb
 920          and that of the containing datum.
 921          Convert it to the distance from the lsb.  */
 922       bitpos = total_bits - bitsize - bitpos;
 923
 924   /* Now BITPOS is always the distance between our lsb
 925      and that of OP0.  */
 926
 927   /* Shift VALUE left by BITPOS bits.  If VALUE is not constant,
 928      we must first convert its mode to MODE.  */
 929
 930   if (GET_CODE (value) == CONST_INT)
 931     {
 932       HOST_WIDE_INT v = INTVAL (value);
 933
 934       if (bitsize < HOST_BITS_PER_WIDE_INT)
 935         v &= ((HOST_WIDE_INT) 1 << bitsize) - 1;
 936
 937       if (v == 0)
 938         all_zero = 1;
 939       else if ((bitsize < HOST_BITS_PER_WIDE_INT
 940                 && v == ((HOST_WIDE_INT) 1 << bitsize) - 1)
 941                || (bitsize == HOST_BITS_PER_WIDE_INT && v == -1))
 942         all_one = 1;
 943
 944       value = lshift_value (mode, value, bitpos, bitsize);
 945     }
 946   else
 947     {
 948       int must_and = (GET_MODE_BITSIZE (GET_MODE (value)) != bitsize
 949                       && bitpos + bitsize != GET_MODE_BITSIZE (mode));
 950
 951       if (GET_MODE (value) != mode)
 952         value = convert_to_mode (mode, value, 1);
 953
 954       if (must_and)
 955         value = expand_binop (mode, and_optab, value,
 956                               mask_rtx (mode, 0, bitsize, 0),
 957                               NULL_RTX, 1, OPTAB_LIB_WIDEN);
 958       if (bitpos > 0)
 959         value = expand_shift (LSHIFT_EXPR, mode, value,
 960                               build_int_cst (NULL_TREE, bitpos), NULL_RTX, 1);
 961     }
 962
 963   /* Now clear the chosen bits in OP0,
 964      except that if VALUE is -1 we need not bother.  */
 965   /* We keep the intermediates in registers to allow CSE to combine
 966      consecutive bitfield assignments.  */
 967
 968   temp = force_reg (mode, op0);
 969
 970   if (! all_one)
 971     {
 972       temp = expand_binop (mode, and_optab, temp,
 973                            mask_rtx (mode, bitpos, bitsize, 1),
 974                            NULL_RTX, 1, OPTAB_LIB_WIDEN);
 975       temp = force_reg (mode, temp);
 976     }
 977
 978   /* Now logical-or VALUE into OP0, unless it is zero.  */
 979
 980   if (! all_zero)
 981     {
 982       temp = expand_binop (mode, ior_optab, temp, value,
 983                            NULL_RTX, 1, OPTAB_LIB_WIDEN);
 984       temp = force_reg (mode, temp);
 985     }
 986
 987   if (op0 != temp)
 988     {
 989       op0 = copy_rtx (op0);
 990       emit_move_insn (op0, temp);
 991     }
 992 }
 993 \f
 994 /* Store a bit field that is split across multiple accessible memory objects.
 995
 996    OP0 is the REG, SUBREG or MEM rtx for the first of the objects.
 997    BITSIZE is the field width; BITPOS the position of its first bit
 998    (within the word).
 999    VALUE is the value to store.
1000
1001    This does not yet handle fields wider than BITS_PER_WORD.  */
1002
1003 static void
1004 store_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1005                        unsigned HOST_WIDE_INT bitpos, rtx value)
1006 {
1007   unsigned int unit;
1008   unsigned int bitsdone = 0;
1009
1010   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1011      much at a time.  */
1012   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
1013     unit = BITS_PER_WORD;
1014   else
1015     unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1016
1017   /* If VALUE is a constant other than a CONST_INT, get it into a register in
1018      WORD_MODE.  If we can do this using gen_lowpart_common, do so.  Note
1019      that VALUE might be a floating-point constant.  */
1020   if (CONSTANT_P (value) && GET_CODE (value) != CONST_INT)
1021     {
1022       rtx word = gen_lowpart_common (word_mode, value);
1023
1024       if (word && (value != word))
1025         value = word;
1026       else
1027         value = gen_lowpart_common (word_mode,
1028                                     force_reg (GET_MODE (value) != VOIDmode
1029                                                ? GET_MODE (value)
1030                                                : word_mode, value));
1031     }
1032
1033   while (bitsdone < bitsize)
1034     {
1035       unsigned HOST_WIDE_INT thissize;
1036       rtx part, word;
1037       unsigned HOST_WIDE_INT thispos;
1038       unsigned HOST_WIDE_INT offset;
1039
1040       offset = (bitpos + bitsdone) / unit;
1041       thispos = (bitpos + bitsdone) % unit;
1042
1043       /* THISSIZE must not overrun a word boundary.  Otherwise,
1044          store_fixed_bit_field will call us again, and we will mutually
1045          recurse forever.  */
1046       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1047       thissize = MIN (thissize, unit - thispos);
1048
1049       if (BYTES_BIG_ENDIAN)
1050         {
1051           int total_bits;
1052
1053           /* We must do an endian conversion exactly the same way as it is
1054              done in extract_bit_field, so that the two calls to
1055              extract_fixed_bit_field will have comparable arguments.  */
1056           if (!MEM_P (value) || GET_MODE (value) == BLKmode)
1057             total_bits = BITS_PER_WORD;
1058           else
1059             total_bits = GET_MODE_BITSIZE (GET_MODE (value));
1060
1061           /* Fetch successively less significant portions.  */
1062           if (GET_CODE (value) == CONST_INT)
1063             part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1064                              >> (bitsize - bitsdone - thissize))
1065                             & (((HOST_WIDE_INT) 1 << thissize) - 1));
1066           else
1067             /* The args are chosen so that the last part includes the
1068                lsb.  Give extract_bit_field the value it needs (with
1069                endianness compensation) to fetch the piece we want.  */
1070             part = extract_fixed_bit_field (word_mode, value, 0, thissize,
1071                                             total_bits - bitsize + bitsdone,
1072                                             NULL_RTX, 1);
1073         }
1074       else
1075         {
1076           /* Fetch successively more significant portions.  */
1077           if (GET_CODE (value) == CONST_INT)
1078             part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1079                              >> bitsdone)
1080                             & (((HOST_WIDE_INT) 1 << thissize) - 1));
1081           else
1082             part = extract_fixed_bit_field (word_mode, value, 0, thissize,
1083                                             bitsdone, NULL_RTX, 1);
1084         }
1085
1086       /* If OP0 is a register, then handle OFFSET here.
1087
1088          When handling multiword bitfields, extract_bit_field may pass
1089          down a word_mode SUBREG of a larger REG for a bitfield that actually
1090          crosses a word boundary.  Thus, for a SUBREG, we must find
1091          the current word starting from the base register.  */
1092       if (GET_CODE (op0) == SUBREG)
1093         {
1094           int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset;
1095           word = operand_subword_force (SUBREG_REG (op0), word_offset,
1096                                         GET_MODE (SUBREG_REG (op0)));
1097           offset = 0;
1098         }
1099       else if (REG_P (op0))
1100         {
1101           word = operand_subword_force (op0, offset, GET_MODE (op0));
1102           offset = 0;
1103         }
1104       else
1105         word = op0;
1106
1107       /* OFFSET is in UNITs, and UNIT is in bits.
1108          store_fixed_bit_field wants offset in bytes.  */
1109       store_fixed_bit_field (word, offset * unit / BITS_PER_UNIT, thissize,
1110                              thispos, part);
1111       bitsdone += thissize;
1112     }
1113 }
1114 \f
1115 /* A subroutine of extract_bit_field_1 that converts return value X
1116    to either MODE or TMODE.  MODE, TMODE and UNSIGNEDP are arguments
1117    to extract_bit_field.  */
1118
1119 static rtx
1120 convert_extracted_bit_field (rtx x, enum machine_mode mode,
1121                              enum machine_mode tmode, bool unsignedp)
1122 {
1123   if (GET_MODE (x) == tmode || GET_MODE (x) == mode)
1124     return x;
1125
1126   /* If the x mode is not a scalar integral, first convert to the
1127      integer mode of that size and then access it as a floating-point
1128      value via a SUBREG.  */
1129   if (!SCALAR_INT_MODE_P (tmode))
1130     {
1131       enum machine_mode smode;
1132
1133       smode = mode_for_size (GET_MODE_BITSIZE (tmode), MODE_INT, 0);
1134       x = convert_to_mode (smode, x, unsignedp);
1135       x = force_reg (smode, x);
1136       return gen_lowpart (tmode, x);
1137     }
1138
1139   return convert_to_mode (tmode, x, unsignedp);
1140 }
1141
1142 /* A subroutine of extract_bit_field, with the same arguments.
1143    If FALLBACK_P is true, fall back to extract_fixed_bit_field
1144    if we can find no other means of implementing the operation.
1145    if FALLBACK_P is false, return NULL instead.  */
1146
1147 static rtx
1148 extract_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1149                      unsigned HOST_WIDE_INT bitnum, int unsignedp, rtx target,
1150                      enum machine_mode mode, enum machine_mode tmode,
1151                      bool fallback_p)
1152 {
1153   unsigned int unit
1154     = (MEM_P (str_rtx)) ? BITS_PER_UNIT : BITS_PER_WORD;
1155   unsigned HOST_WIDE_INT offset, bitpos;
1156   rtx op0 = str_rtx;
1157   enum machine_mode int_mode;
1158   enum machine_mode ext_mode;
1159   enum machine_mode mode1;
1160   enum insn_code icode;
1161   int byte_offset;
1162
1163   if (tmode == VOIDmode)
1164     tmode = mode;
1165
1166   while (GET_CODE (op0) == SUBREG)
1167     {
1168       bitnum += SUBREG_BYTE (op0) * BITS_PER_UNIT;
1169       op0 = SUBREG_REG (op0);
1170     }
1171
1172   /* If we have an out-of-bounds access to a register, just return an
1173      uninitialized register of the required mode.  This can occur if the
1174      source code contains an out-of-bounds access to a small array.  */
1175   if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
1176     return gen_reg_rtx (tmode);
1177
1178   if (REG_P (op0)
1179       && mode == GET_MODE (op0)
1180       && bitnum == 0
1181       && bitsize == GET_MODE_BITSIZE (GET_MODE (op0)))
1182     {
1183       /* We're trying to extract a full register from itself.  */
1184       return op0;
1185     }
1186
1187   /* See if we can get a better vector mode before extracting.  */
1188   if (VECTOR_MODE_P (GET_MODE (op0))
1189       && !MEM_P (op0)
1190       && GET_MODE_INNER (GET_MODE (op0)) != tmode)
1191     {
1192       enum machine_mode new_mode;
1193       int nunits = GET_MODE_NUNITS (GET_MODE (op0));
1194
1195       if (GET_MODE_CLASS (tmode) == MODE_FLOAT)
1196         new_mode = MIN_MODE_VECTOR_FLOAT;
1197       else if (GET_MODE_CLASS (tmode) == MODE_FRACT)
1198         new_mode = MIN_MODE_VECTOR_FRACT;
1199       else if (GET_MODE_CLASS (tmode) == MODE_UFRACT)
1200         new_mode = MIN_MODE_VECTOR_UFRACT;
1201       else if (GET_MODE_CLASS (tmode) == MODE_ACCUM)
1202         new_mode = MIN_MODE_VECTOR_ACCUM;
1203       else if (GET_MODE_CLASS (tmode) == MODE_UACCUM)
1204         new_mode = MIN_MODE_VECTOR_UACCUM;
1205       else
1206         new_mode = MIN_MODE_VECTOR_INT;
1207
1208       for (; new_mode != VOIDmode ; new_mode = GET_MODE_WIDER_MODE (new_mode))
1209         if (GET_MODE_NUNITS (new_mode) == nunits
1210             && GET_MODE_SIZE (new_mode) == GET_MODE_SIZE (GET_MODE (op0))
1211             && targetm.vector_mode_supported_p (new_mode))
1212           break;
1213       if (new_mode != VOIDmode)
1214         op0 = gen_lowpart (new_mode, op0);
1215     }
1216
1217   /* Use vec_extract patterns for extracting parts of vectors whenever
1218      available.  */
1219   if (VECTOR_MODE_P (GET_MODE (op0))
1220       && !MEM_P (op0)
1221       && (optab_handler (vec_extract_optab, GET_MODE (op0))->insn_code
1222           != CODE_FOR_nothing)
1223       && ((bitnum + bitsize - 1) / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
1224           == bitnum / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
1225     {
1226       enum machine_mode outermode = GET_MODE (op0);
1227       enum machine_mode innermode = GET_MODE_INNER (outermode);
1228       int icode = (int) optab_handler (vec_extract_optab, outermode)->insn_code;
1229       unsigned HOST_WIDE_INT pos = bitnum / GET_MODE_BITSIZE (innermode);
1230       rtx rtxpos = GEN_INT (pos);
1231       rtx src = op0;
1232       rtx dest = NULL, pat, seq;
1233       enum machine_mode mode0 = insn_data[icode].operand[0].mode;
1234       enum machine_mode mode1 = insn_data[icode].operand[1].mode;
1235       enum machine_mode mode2 = insn_data[icode].operand[2].mode;
1236
1237       if (innermode == tmode || innermode == mode)
1238         dest = target;
1239
1240       if (!dest)
1241         dest = gen_reg_rtx (innermode);
1242
1243       start_sequence ();
1244
1245       if (! (*insn_data[icode].operand[0].predicate) (dest, mode0))
1246         dest = copy_to_mode_reg (mode0, dest);
1247
1248       if (! (*insn_data[icode].operand[1].predicate) (src, mode1))
1249         src = copy_to_mode_reg (mode1, src);
1250
1251       if (! (*insn_data[icode].operand[2].predicate) (rtxpos, mode2))
1252         rtxpos = copy_to_mode_reg (mode1, rtxpos);
1253
1254       /* We could handle this, but we should always be called with a pseudo
1255          for our targets and all insns should take them as outputs.  */
1256       gcc_assert ((*insn_data[icode].operand[0].predicate) (dest, mode0)
1257                   && (*insn_data[icode].operand[1].predicate) (src, mode1)
1258                   && (*insn_data[icode].operand[2].predicate) (rtxpos, mode2));
1259
1260       pat = GEN_FCN (icode) (dest, src, rtxpos);
1261       seq = get_insns ();
1262       end_sequence ();
1263       if (pat)
1264         {
1265           emit_insn (seq);
1266           emit_insn (pat);
1267           if (mode0 != mode)
1268             return gen_lowpart (tmode, dest);
1269           return dest;
1270         }
1271     }
1272
1273   /* Make sure we are playing with integral modes.  Pun with subregs
1274      if we aren't.  */
1275   {
1276     enum machine_mode imode = int_mode_for_mode (GET_MODE (op0));
1277     if (imode != GET_MODE (op0))
1278       {
1279         if (MEM_P (op0))
1280           op0 = adjust_address (op0, imode, 0);
1281         else
1282           {
1283             gcc_assert (imode != BLKmode);
1284             op0 = gen_lowpart (imode, op0);
1285
1286             /* If we got a SUBREG, force it into a register since we
1287                aren't going to be able to do another SUBREG on it.  */
1288             if (GET_CODE (op0) == SUBREG)
1289               op0 = force_reg (imode, op0);
1290           }
1291       }
1292   }
1293
1294   /* We may be accessing data outside the field, which means
1295      we can alias adjacent data.  */
1296   if (MEM_P (op0))
1297     {
1298       op0 = shallow_copy_rtx (op0);
1299       set_mem_alias_set (op0, 0);
1300       set_mem_expr (op0, 0);
1301     }
1302
1303   /* Extraction of a full-word or multi-word value from a structure
1304      in a register or aligned memory can be done with just a SUBREG.
1305      A subword value in the least significant part of a register
1306      can also be extracted with a SUBREG.  For this, we need the
1307      byte offset of the value in op0.  */
1308
1309   bitpos = bitnum % unit;
1310   offset = bitnum / unit;
1311   byte_offset = bitpos / BITS_PER_UNIT + offset * UNITS_PER_WORD;
1312
1313   /* If OP0 is a register, BITPOS must count within a word.
1314      But as we have it, it counts within whatever size OP0 now has.
1315      On a bigendian machine, these are not the same, so convert.  */
1316   if (BYTES_BIG_ENDIAN
1317       && !MEM_P (op0)
1318       && unit > GET_MODE_BITSIZE (GET_MODE (op0)))
1319     bitpos += unit - GET_MODE_BITSIZE (GET_MODE (op0));
1320
1321   /* ??? We currently assume TARGET is at least as big as BITSIZE.
1322      If that's wrong, the solution is to test for it and set TARGET to 0
1323      if needed.  */
1324
1325   /* Only scalar integer modes can be converted via subregs.  There is an
1326      additional problem for FP modes here in that they can have a precision
1327      which is different from the size.  mode_for_size uses precision, but
1328      we want a mode based on the size, so we must avoid calling it for FP
1329      modes.  */
1330   mode1  = (SCALAR_INT_MODE_P (tmode)
1331             ? mode_for_size (bitsize, GET_MODE_CLASS (tmode), 0)
1332             : mode);
1333
1334   if (((bitsize >= BITS_PER_WORD && bitsize == GET_MODE_BITSIZE (mode)
1335         && bitpos % BITS_PER_WORD == 0)
1336        || (mode1 != BLKmode
1337            /* ??? The big endian test here is wrong.  This is correct
1338               if the value is in a register, and if mode_for_size is not
1339               the same mode as op0.  This causes us to get unnecessarily
1340               inefficient code from the Thumb port when -mbig-endian.  */
1341            && (BYTES_BIG_ENDIAN
1342                ? bitpos + bitsize == BITS_PER_WORD
1343                : bitpos == 0)))
1344       && ((!MEM_P (op0)
1345            && TRULY_NOOP_TRUNCATION (GET_MODE_BITSIZE (mode1),
1346                                      GET_MODE_BITSIZE (GET_MODE (op0)))
1347            && GET_MODE_SIZE (mode1) != 0
1348            && byte_offset % GET_MODE_SIZE (mode1) == 0)
1349           || (MEM_P (op0)
1350               && (! SLOW_UNALIGNED_ACCESS (mode, MEM_ALIGN (op0))
1351                   || (offset * BITS_PER_UNIT % bitsize == 0
1352                       && MEM_ALIGN (op0) % bitsize == 0)))))
1353     {
1354       if (MEM_P (op0))
1355         op0 = adjust_address (op0, mode1, offset);
1356       else if (mode1 != GET_MODE (op0))
1357         {
1358           rtx sub = simplify_gen_subreg (mode1, op0, GET_MODE (op0),
1359                                          byte_offset);
1360           if (sub == NULL)
1361             goto no_subreg_mode_swap;
1362           op0 = sub;
1363         }
1364       if (mode1 != mode)
1365         return convert_to_mode (tmode, op0, unsignedp);
1366       return op0;
1367     }
1368  no_subreg_mode_swap:
1369
1370   /* Handle fields bigger than a word.  */
1371
1372   if (bitsize > BITS_PER_WORD)
1373     {
1374       /* Here we transfer the words of the field
1375          in the order least significant first.
1376          This is because the most significant word is the one which may
1377          be less than full.  */
1378
1379       unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
1380       unsigned int i;
1381
1382       if (target == 0 || !REG_P (target))
1383         target = gen_reg_rtx (mode);
1384
1385       /* Indicate for flow that the entire target reg is being set.  */
1386       emit_clobber (target);
1387
1388       for (i = 0; i < nwords; i++)
1389         {
1390           /* If I is 0, use the low-order word in both field and target;
1391              if I is 1, use the next to lowest word; and so on.  */
1392           /* Word number in TARGET to use.  */
1393           unsigned int wordnum
1394             = (WORDS_BIG_ENDIAN
1395                ? GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD - i - 1
1396                : i);
1397           /* Offset from start of field in OP0.  */
1398           unsigned int bit_offset = (WORDS_BIG_ENDIAN
1399                                      ? MAX (0, ((int) bitsize - ((int) i + 1)
1400                                                 * (int) BITS_PER_WORD))
1401                                      : (int) i * BITS_PER_WORD);
1402           rtx target_part = operand_subword (target, wordnum, 1, VOIDmode);
1403           rtx result_part
1404             = extract_bit_field (op0, MIN (BITS_PER_WORD,
1405                                            bitsize - i * BITS_PER_WORD),
1406                                  bitnum + bit_offset, 1, target_part, mode,
1407                                  word_mode);
1408
1409           gcc_assert (target_part);
1410
1411           if (result_part != target_part)
1412             emit_move_insn (target_part, result_part);
1413         }
1414
1415       if (unsignedp)
1416         {
1417           /* Unless we've filled TARGET, the upper regs in a multi-reg value
1418              need to be zero'd out.  */
1419           if (GET_MODE_SIZE (GET_MODE (target)) > nwords * UNITS_PER_WORD)
1420             {
1421               unsigned int i, total_words;
1422
1423               total_words = GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD;
1424               for (i = nwords; i < total_words; i++)
1425                 emit_move_insn
1426                   (operand_subword (target,
1427                                     WORDS_BIG_ENDIAN ? total_words - i - 1 : i,
1428                                     1, VOIDmode),
1429                    const0_rtx);
1430             }
1431           return target;
1432         }
1433
1434       /* Signed bit field: sign-extend with two arithmetic shifts.  */
1435       target = expand_shift (LSHIFT_EXPR, mode, target,
1436                              build_int_cst (NULL_TREE,
1437                                             GET_MODE_BITSIZE (mode) - bitsize),
1438                              NULL_RTX, 0);
1439       return expand_shift (RSHIFT_EXPR, mode, target,
1440                            build_int_cst (NULL_TREE,
1441                                           GET_MODE_BITSIZE (mode) - bitsize),
1442                            NULL_RTX, 0);
1443     }
1444
1445   /* From here on we know the desired field is smaller than a word.  */
1446
1447   /* Check if there is a correspondingly-sized integer field, so we can
1448      safely extract it as one size of integer, if necessary; then
1449      truncate or extend to the size that is wanted; then use SUBREGs or
1450      convert_to_mode to get one of the modes we really wanted.  */
1451
1452   int_mode = int_mode_for_mode (tmode);
1453   if (int_mode == BLKmode)
1454     int_mode = int_mode_for_mode (mode);
1455   /* Should probably push op0 out to memory and then do a load.  */
1456   gcc_assert (int_mode != BLKmode);
1457
1458   /* OFFSET is the number of words or bytes (UNIT says which)
1459      from STR_RTX to the first word or byte containing part of the field.  */
1460   if (!MEM_P (op0))
1461     {
1462       if (offset != 0
1463           || GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
1464         {
1465           if (!REG_P (op0))
1466             op0 = copy_to_reg (op0);
1467           op0 = gen_rtx_SUBREG (mode_for_size (BITS_PER_WORD, MODE_INT, 0),
1468                                 op0, (offset * UNITS_PER_WORD));
1469         }
1470       offset = 0;
1471     }
1472
1473   /* Now OFFSET is nonzero only for memory operands.  */
1474   ext_mode = mode_for_extraction (unsignedp ? EP_extzv : EP_extv, 0);
1475   icode = unsignedp ? CODE_FOR_extzv : CODE_FOR_extv;
1476   if (ext_mode != MAX_MACHINE_MODE
1477       && bitsize > 0
1478       && GET_MODE_BITSIZE (ext_mode) >= bitsize
1479       /* If op0 is a register, we need it in EXT_MODE to make it
1480          acceptable to the format of ext(z)v.  */
1481       && !(GET_CODE (op0) == SUBREG && GET_MODE (op0) != ext_mode)
1482       && !((REG_P (op0) || GET_CODE (op0) == SUBREG)
1483            && (bitsize + bitpos > GET_MODE_BITSIZE (ext_mode)))
1484       && check_predicate_volatile_ok (icode, 1, op0, GET_MODE (op0)))
1485     {
1486       unsigned HOST_WIDE_INT xbitpos = bitpos, xoffset = offset;
1487       rtx bitsize_rtx, bitpos_rtx;
1488       rtx last = get_last_insn ();
1489       rtx xop0 = op0;
1490       rtx xtarget = target;
1491       rtx xspec_target = target;
1492       rtx xspec_target_subreg = 0;
1493       rtx pat;
1494
1495       /* If op0 is a register, we need it in EXT_MODE to make it
1496          acceptable to the format of ext(z)v.  */
1497       if (REG_P (xop0) && GET_MODE (xop0) != ext_mode)
1498         xop0 = gen_rtx_SUBREG (ext_mode, xop0, 0);
1499       if (MEM_P (xop0))
1500         /* Get ref to first byte containing part of the field.  */
1501         xop0 = adjust_address (xop0, byte_mode, xoffset);
1502
1503       /* On big-endian machines, we count bits from the most significant.
1504          If the bit field insn does not, we must invert.  */
1505       if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1506         xbitpos = unit - bitsize - xbitpos;
1507
1508       /* Now convert from counting within UNIT to counting in EXT_MODE.  */
1509       if (BITS_BIG_ENDIAN && !MEM_P (xop0))
1510         xbitpos += GET_MODE_BITSIZE (ext_mode) - unit;
1511
1512       unit = GET_MODE_BITSIZE (ext_mode);
1513
1514       if (xtarget == 0)
1515         xtarget = xspec_target = gen_reg_rtx (tmode);
1516
1517       if (GET_MODE (xtarget) != ext_mode)
1518         {
1519           if (REG_P (xtarget))
1520             {
1521               xtarget = gen_lowpart (ext_mode, xtarget);
1522               if (GET_MODE_SIZE (ext_mode)
1523                   > GET_MODE_SIZE (GET_MODE (xspec_target)))
1524                 xspec_target_subreg = xtarget;
1525             }
1526           else
1527             xtarget = gen_reg_rtx (ext_mode);
1528         }
1529
1530       /* If this machine's ext(z)v insists on a register target,
1531          make sure we have one.  */
1532       if (!insn_data[(int) icode].operand[0].predicate (xtarget, ext_mode))
1533         xtarget = gen_reg_rtx (ext_mode);
1534
1535       bitsize_rtx = GEN_INT (bitsize);
1536       bitpos_rtx = GEN_INT (xbitpos);
1537
1538       pat = (unsignedp
1539              ? gen_extzv (xtarget, xop0, bitsize_rtx, bitpos_rtx)
1540              : gen_extv (xtarget, xop0, bitsize_rtx, bitpos_rtx));
1541       if (pat)
1542         {
1543           emit_insn (pat);
1544           if (xtarget == xspec_target)
1545             return xtarget;
1546           if (xtarget == xspec_target_subreg)
1547             return xspec_target;
1548           return convert_extracted_bit_field (xtarget, mode, tmode, unsignedp);
1549         }
1550       delete_insns_since (last);
1551     }
1552
1553   /* If OP0 is a memory, try copying it to a register and seeing if a
1554      cheap register alternative is available.  */
1555   if (ext_mode != MAX_MACHINE_MODE && MEM_P (op0))
1556     {
1557       enum machine_mode bestmode;
1558
1559       /* Get the mode to use for inserting into this field.  If
1560          OP0 is BLKmode, get the smallest mode consistent with the
1561          alignment. If OP0 is a non-BLKmode object that is no
1562          wider than EXT_MODE, use its mode. Otherwise, use the
1563          smallest mode containing the field.  */
1564
1565       if (GET_MODE (op0) == BLKmode
1566           || (ext_mode != MAX_MACHINE_MODE
1567               && GET_MODE_SIZE (GET_MODE (op0)) > GET_MODE_SIZE (ext_mode)))
1568         bestmode = get_best_mode (bitsize, bitnum, MEM_ALIGN (op0),
1569                                   (ext_mode == MAX_MACHINE_MODE
1570                                    ? VOIDmode : ext_mode),
1571                                   MEM_VOLATILE_P (op0));
1572       else
1573         bestmode = GET_MODE (op0);
1574
1575       if (bestmode != VOIDmode
1576           && !(SLOW_UNALIGNED_ACCESS (bestmode, MEM_ALIGN (op0))
1577                && GET_MODE_BITSIZE (bestmode) > MEM_ALIGN (op0)))
1578         {
1579           unsigned HOST_WIDE_INT xoffset, xbitpos;
1580
1581           /* Compute the offset as a multiple of this unit,
1582              counting in bytes.  */
1583           unit = GET_MODE_BITSIZE (bestmode);
1584           xoffset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
1585           xbitpos = bitnum % unit;
1586
1587           /* Make sure the register is big enough for the whole field.  */
1588           if (xoffset * BITS_PER_UNIT + unit
1589               >= offset * BITS_PER_UNIT + bitsize)
1590             {
1591               rtx last, result, xop0;
1592
1593               last = get_last_insn ();
1594
1595               /* Fetch it to a register in that size.  */
1596               xop0 = adjust_address (op0, bestmode, xoffset);
1597               xop0 = force_reg (bestmode, xop0);
1598               result = extract_bit_field_1 (xop0, bitsize, xbitpos,
1599                                             unsignedp, target,
1600                                             mode, tmode, false);
1601               if (result)
1602                 return result;
1603
1604               delete_insns_since (last);
1605             }
1606         }
1607     }
1608
1609   if (!fallback_p)
1610     return NULL;
1611
1612   target = extract_fixed_bit_field (int_mode, op0, offset, bitsize,
1613                                     bitpos, target, unsignedp);
1614   return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1615 }
1616
1617 /* Generate code to extract a byte-field from STR_RTX
1618    containing BITSIZE bits, starting at BITNUM,
1619    and put it in TARGET if possible (if TARGET is nonzero).
1620    Regardless of TARGET, we return the rtx for where the value is placed.
1621
1622    STR_RTX is the structure containing the byte (a REG or MEM).
1623    UNSIGNEDP is nonzero if this is an unsigned bit field.
1624    MODE is the natural mode of the field value once extracted.
1625    TMODE is the mode the caller would like the value to have;
1626    but the value may be returned with type MODE instead.
1627
1628    If a TARGET is specified and we can store in it at no extra cost,
1629    we do so, and return TARGET.
1630    Otherwise, we return a REG of mode TMODE or MODE, with TMODE preferred
1631    if they are equally easy.  */
1632
1633 rtx
1634 extract_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1635                    unsigned HOST_WIDE_INT bitnum, int unsignedp, rtx target,
1636                    enum machine_mode mode, enum machine_mode tmode)
1637 {
1638   return extract_bit_field_1 (str_rtx, bitsize, bitnum, unsignedp,
1639                               target, mode, tmode, true);
1640 }
1641 \f
1642 /* Extract a bit field using shifts and boolean operations
1643    Returns an rtx to represent the value.
1644    OP0 addresses a register (word) or memory (byte).
1645    BITPOS says which bit within the word or byte the bit field starts in.
1646    OFFSET says how many bytes farther the bit field starts;
1647     it is 0 if OP0 is a register.
1648    BITSIZE says how many bits long the bit field is.
1649     (If OP0 is a register, it may be narrower than a full word,
1650      but BITPOS still counts within a full word,
1651      which is significant on bigendian machines.)
1652
1653    UNSIGNEDP is nonzero for an unsigned bit field (don't sign-extend value).
1654    If TARGET is nonzero, attempts to store the value there
1655    and return TARGET, but this is not guaranteed.
1656    If TARGET is not used, create a pseudo-reg of mode TMODE for the value.  */
1657
1658 static rtx
1659 extract_fixed_bit_field (enum machine_mode tmode, rtx op0,
1660                          unsigned HOST_WIDE_INT offset,
1661                          unsigned HOST_WIDE_INT bitsize,
1662                          unsigned HOST_WIDE_INT bitpos, rtx target,
1663                          int unsignedp)
1664 {
1665   unsigned int total_bits = BITS_PER_WORD;
1666   enum machine_mode mode;
1667
1668   if (GET_CODE (op0) == SUBREG || REG_P (op0))
1669     {
1670       /* Special treatment for a bit field split across two registers.  */
1671       if (bitsize + bitpos > BITS_PER_WORD)
1672         return extract_split_bit_field (op0, bitsize, bitpos, unsignedp);
1673     }
1674   else
1675     {
1676       /* Get the proper mode to use for this field.  We want a mode that
1677          includes the entire field.  If such a mode would be larger than
1678          a word, we won't be doing the extraction the normal way.  */
1679
1680       mode = get_best_mode (bitsize, bitpos + offset * BITS_PER_UNIT,
1681                             MEM_ALIGN (op0), word_mode, MEM_VOLATILE_P (op0));
1682
1683       if (mode == VOIDmode)
1684         /* The only way this should occur is if the field spans word
1685            boundaries.  */
1686         return extract_split_bit_field (op0, bitsize,
1687                                         bitpos + offset * BITS_PER_UNIT,
1688                                         unsignedp);
1689
1690       total_bits = GET_MODE_BITSIZE (mode);
1691
1692       /* Make sure bitpos is valid for the chosen mode.  Adjust BITPOS to
1693          be in the range 0 to total_bits-1, and put any excess bytes in
1694          OFFSET.  */
1695       if (bitpos >= total_bits)
1696         {
1697           offset += (bitpos / total_bits) * (total_bits / BITS_PER_UNIT);
1698           bitpos -= ((bitpos / total_bits) * (total_bits / BITS_PER_UNIT)
1699                      * BITS_PER_UNIT);
1700         }
1701
1702       /* Get ref to an aligned byte, halfword, or word containing the field.
1703          Adjust BITPOS to be position within a word,
1704          and OFFSET to be the offset of that word.
1705          Then alter OP0 to refer to that word.  */
1706       bitpos += (offset % (total_bits / BITS_PER_UNIT)) * BITS_PER_UNIT;
1707       offset -= (offset % (total_bits / BITS_PER_UNIT));
1708       op0 = adjust_address (op0, mode, offset);
1709     }
1710
1711   mode = GET_MODE (op0);
1712
1713   if (BYTES_BIG_ENDIAN)
1714     /* BITPOS is the distance between our msb and that of OP0.
1715        Convert it to the distance from the lsb.  */
1716     bitpos = total_bits - bitsize - bitpos;
1717
1718   /* Now BITPOS is always the distance between the field's lsb and that of OP0.
1719      We have reduced the big-endian case to the little-endian case.  */
1720
1721   if (unsignedp)
1722     {
1723       if (bitpos)
1724         {
1725           /* If the field does not already start at the lsb,
1726              shift it so it does.  */
1727           tree amount = build_int_cst (NULL_TREE, bitpos);
1728           /* Maybe propagate the target for the shift.  */
1729           /* But not if we will return it--could confuse integrate.c.  */
1730           rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
1731           if (tmode != mode) subtarget = 0;
1732           op0 = expand_shift (RSHIFT_EXPR, mode, op0, amount, subtarget, 1);
1733         }
1734       /* Convert the value to the desired mode.  */
1735       if (mode != tmode)
1736         op0 = convert_to_mode (tmode, op0, 1);
1737
1738       /* Unless the msb of the field used to be the msb when we shifted,
1739          mask out the upper bits.  */
1740
1741       if (GET_MODE_BITSIZE (mode) != bitpos + bitsize)
1742         return expand_binop (GET_MODE (op0), and_optab, op0,
1743                              mask_rtx (GET_MODE (op0), 0, bitsize, 0),
1744                              target, 1, OPTAB_LIB_WIDEN);
1745       return op0;
1746     }
1747
1748   /* To extract a signed bit-field, first shift its msb to the msb of the word,
1749      then arithmetic-shift its lsb to the lsb of the word.  */
1750   op0 = force_reg (mode, op0);
1751   if (mode != tmode)
1752     target = 0;
1753
1754   /* Find the narrowest integer mode that contains the field.  */
1755
1756   for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT); mode != VOIDmode;
1757        mode = GET_MODE_WIDER_MODE (mode))
1758     if (GET_MODE_BITSIZE (mode) >= bitsize + bitpos)
1759       {
1760         op0 = convert_to_mode (mode, op0, 0);
1761         break;
1762       }
1763
1764   if (GET_MODE_BITSIZE (mode) != (bitsize + bitpos))
1765     {
1766       tree amount
1767         = build_int_cst (NULL_TREE,
1768                          GET_MODE_BITSIZE (mode) - (bitsize + bitpos));
1769       /* Maybe propagate the target for the shift.  */
1770       rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
1771       op0 = expand_shift (LSHIFT_EXPR, mode, op0, amount, subtarget, 1);
1772     }
1773
1774   return expand_shift (RSHIFT_EXPR, mode, op0,
1775                        build_int_cst (NULL_TREE,
1776                                       GET_MODE_BITSIZE (mode) - bitsize),
1777                        target, 0);
1778 }
1779 \f
1780 /* Return a constant integer (CONST_INT or CONST_DOUBLE) mask value
1781    of mode MODE with BITSIZE ones followed by BITPOS zeros, or the
1782    complement of that if COMPLEMENT.  The mask is truncated if
1783    necessary to the width of mode MODE.  The mask is zero-extended if
1784    BITSIZE+BITPOS is too small for MODE.  */
1785
1786 static rtx
1787 mask_rtx (enum machine_mode mode, int bitpos, int bitsize, int complement)
1788 {
1789   HOST_WIDE_INT masklow, maskhigh;
1790
1791   if (bitsize == 0)
1792     masklow = 0;
1793   else if (bitpos < HOST_BITS_PER_WIDE_INT)
1794     masklow = (HOST_WIDE_INT) -1 << bitpos;
1795   else
1796     masklow = 0;
1797
1798   if (bitpos + bitsize < HOST_BITS_PER_WIDE_INT)
1799     masklow &= ((unsigned HOST_WIDE_INT) -1
1800                 >> (HOST_BITS_PER_WIDE_INT - bitpos - bitsize));
1801
1802   if (bitpos <= HOST_BITS_PER_WIDE_INT)
1803     maskhigh = -1;
1804   else
1805     maskhigh = (HOST_WIDE_INT) -1 << (bitpos - HOST_BITS_PER_WIDE_INT);
1806
1807   if (bitsize == 0)
1808     maskhigh = 0;
1809   else if (bitpos + bitsize > HOST_BITS_PER_WIDE_INT)
1810     maskhigh &= ((unsigned HOST_WIDE_INT) -1
1811                  >> (2 * HOST_BITS_PER_WIDE_INT - bitpos - bitsize));
1812   else
1813     maskhigh = 0;
1814
1815   if (complement)
1816     {
1817       maskhigh = ~maskhigh;
1818       masklow = ~masklow;
1819     }
1820
1821   return immed_double_const (masklow, maskhigh, mode);
1822 }
1823
1824 /* Return a constant integer (CONST_INT or CONST_DOUBLE) rtx with the value
1825    VALUE truncated to BITSIZE bits and then shifted left BITPOS bits.  */
1826
1827 static rtx
1828 lshift_value (enum machine_mode mode, rtx value, int bitpos, int bitsize)
1829 {
1830   unsigned HOST_WIDE_INT v = INTVAL (value);
1831   HOST_WIDE_INT low, high;
1832
1833   if (bitsize < HOST_BITS_PER_WIDE_INT)
1834     v &= ~((HOST_WIDE_INT) -1 << bitsize);
1835
1836   if (bitpos < HOST_BITS_PER_WIDE_INT)
1837     {
1838       low = v << bitpos;
1839       high = (bitpos > 0 ? (v >> (HOST_BITS_PER_WIDE_INT - bitpos)) : 0);
1840     }
1841   else
1842     {
1843       low = 0;
1844       high = v << (bitpos - HOST_BITS_PER_WIDE_INT);
1845     }
1846
1847   return immed_double_const (low, high, mode);
1848 }
1849 \f
1850 /* Extract a bit field that is split across two words
1851    and return an RTX for the result.
1852
1853    OP0 is the REG, SUBREG or MEM rtx for the first of the two words.
1854    BITSIZE is the field width; BITPOS, position of its first bit, in the word.
1855    UNSIGNEDP is 1 if should zero-extend the contents; else sign-extend.  */
1856
1857 static rtx
1858 extract_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1859                          unsigned HOST_WIDE_INT bitpos, int unsignedp)
1860 {
1861   unsigned int unit;
1862   unsigned int bitsdone = 0;
1863   rtx result = NULL_RTX;
1864   int first = 1;
1865
1866   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1867      much at a time.  */
1868   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
1869     unit = BITS_PER_WORD;
1870   else
1871     unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1872
1873   while (bitsdone < bitsize)
1874     {
1875       unsigned HOST_WIDE_INT thissize;
1876       rtx part, word;
1877       unsigned HOST_WIDE_INT thispos;
1878       unsigned HOST_WIDE_INT offset;
1879
1880       offset = (bitpos + bitsdone) / unit;
1881       thispos = (bitpos + bitsdone) % unit;
1882
1883       /* THISSIZE must not overrun a word boundary.  Otherwise,
1884          extract_fixed_bit_field will call us again, and we will mutually
1885          recurse forever.  */
1886       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1887       thissize = MIN (thissize, unit - thispos);
1888
1889       /* If OP0 is a register, then handle OFFSET here.
1890
1891          When handling multiword bitfields, extract_bit_field may pass
1892          down a word_mode SUBREG of a larger REG for a bitfield that actually
1893          crosses a word boundary.  Thus, for a SUBREG, we must find
1894          the current word starting from the base register.  */
1895       if (GET_CODE (op0) == SUBREG)
1896         {
1897           int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset;
1898           word = operand_subword_force (SUBREG_REG (op0), word_offset,
1899                                         GET_MODE (SUBREG_REG (op0)));
1900           offset = 0;
1901         }
1902       else if (REG_P (op0))
1903         {
1904           word = operand_subword_force (op0, offset, GET_MODE (op0));
1905           offset = 0;
1906         }
1907       else
1908         word = op0;
1909
1910       /* Extract the parts in bit-counting order,
1911          whose meaning is determined by BYTES_PER_UNIT.
1912          OFFSET is in UNITs, and UNIT is in bits.
1913          extract_fixed_bit_field wants offset in bytes.  */
1914       part = extract_fixed_bit_field (word_mode, word,
1915                                       offset * unit / BITS_PER_UNIT,
1916                                       thissize, thispos, 0, 1);
1917       bitsdone += thissize;
1918
1919       /* Shift this part into place for the result.  */
1920       if (BYTES_BIG_ENDIAN)
1921         {
1922           if (bitsize != bitsdone)
1923             part = expand_shift (LSHIFT_EXPR, word_mode, part,
1924                                  build_int_cst (NULL_TREE, bitsize - bitsdone),
1925                                  0, 1);
1926         }
1927       else
1928         {
1929           if (bitsdone != thissize)
1930             part = expand_shift (LSHIFT_EXPR, word_mode, part,
1931                                  build_int_cst (NULL_TREE,
1932                                                 bitsdone - thissize), 0, 1);
1933         }
1934
1935       if (first)
1936         result = part;
1937       else
1938         /* Combine the parts with bitwise or.  This works
1939            because we extracted each part as an unsigned bit field.  */
1940         result = expand_binop (word_mode, ior_optab, part, result, NULL_RTX, 1,
1941                                OPTAB_LIB_WIDEN);
1942
1943       first = 0;
1944     }
1945
1946   /* Unsigned bit field: we are done.  */
1947   if (unsignedp)
1948     return result;
1949   /* Signed bit field: sign-extend with two arithmetic shifts.  */
1950   result = expand_shift (LSHIFT_EXPR, word_mode, result,
1951                          build_int_cst (NULL_TREE, BITS_PER_WORD - bitsize),
1952                          NULL_RTX, 0);
1953   return expand_shift (RSHIFT_EXPR, word_mode, result,
1954                        build_int_cst (NULL_TREE, BITS_PER_WORD - bitsize),
1955                        NULL_RTX, 0);
1956 }
1957 \f
1958 /* Try to read the low bits of SRC as an rvalue of mode MODE, preserving
1959    the bit pattern.  SRC_MODE is the mode of SRC; if this is smaller than
1960    MODE, fill the upper bits with zeros.  Fail if the layout of either
1961    mode is unknown (as for CC modes) or if the extraction would involve
1962    unprofitable mode punning.  Return the value on success, otherwise
1963    return null.
1964
1965    This is different from gen_lowpart* in these respects:
1966
1967      - the returned value must always be considered an rvalue
1968
1969      - when MODE is wider than SRC_MODE, the extraction involves
1970        a zero extension
1971
1972      - when MODE is smaller than SRC_MODE, the extraction involves
1973        a truncation (and is thus subject to TRULY_NOOP_TRUNCATION).
1974
1975    In other words, this routine performs a computation, whereas the
1976    gen_lowpart* routines are conceptually lvalue or rvalue subreg
1977    operations.  */
1978
1979 rtx
1980 extract_low_bits (enum machine_mode mode, enum machine_mode src_mode, rtx src)
1981 {
1982   enum machine_mode int_mode, src_int_mode;
1983
1984   if (mode == src_mode)
1985     return src;
1986
1987   if (CONSTANT_P (src))
1988     {
1989       /* simplify_gen_subreg can't be used here, as if simplify_subreg
1990          fails, it will happily create (subreg (symbol_ref)) or similar
1991          invalid SUBREGs.  */
1992       unsigned int byte = subreg_lowpart_offset (mode, src_mode);
1993       rtx ret = simplify_subreg (mode, src, src_mode, byte);
1994       if (ret)
1995         return ret;
1996
1997       if (GET_MODE (src) == VOIDmode
1998           || !validate_subreg (mode, src_mode, src, byte))
1999         return NULL_RTX;
2000
2001       src = force_reg (GET_MODE (src), src);
2002       return gen_rtx_SUBREG (mode, src, byte);
2003     }
2004
2005   if (GET_MODE_CLASS (mode) == MODE_CC || GET_MODE_CLASS (src_mode) == MODE_CC)
2006     return NULL_RTX;
2007
2008   if (GET_MODE_BITSIZE (mode) == GET_MODE_BITSIZE (src_mode)
2009       && MODES_TIEABLE_P (mode, src_mode))
2010     {
2011       rtx x = gen_lowpart_common (mode, src);
2012       if (x)
2013         return x;
2014     }
2015
2016   src_int_mode = int_mode_for_mode (src_mode);
2017   int_mode = int_mode_for_mode (mode);
2018   if (src_int_mode == BLKmode || int_mode == BLKmode)
2019     return NULL_RTX;
2020
2021   if (!MODES_TIEABLE_P (src_int_mode, src_mode))
2022     return NULL_RTX;
2023   if (!MODES_TIEABLE_P (int_mode, mode))
2024     return NULL_RTX;
2025
2026   src = gen_lowpart (src_int_mode, src);
2027   src = convert_modes (int_mode, src_int_mode, src, true);
2028   src = gen_lowpart (mode, src);
2029   return src;
2030 }
2031 \f
2032 /* Add INC into TARGET.  */
2033
2034 void
2035 expand_inc (rtx target, rtx inc)
2036 {
2037   rtx value = expand_binop (GET_MODE (target), add_optab,
2038                             target, inc,
2039                             target, 0, OPTAB_LIB_WIDEN);
2040   if (value != target)
2041     emit_move_insn (target, value);
2042 }
2043
2044 /* Subtract DEC from TARGET.  */
2045
2046 void
2047 expand_dec (rtx target, rtx dec)
2048 {
2049   rtx value = expand_binop (GET_MODE (target), sub_optab,
2050                             target, dec,
2051                             target, 0, OPTAB_LIB_WIDEN);
2052   if (value != target)
2053     emit_move_insn (target, value);
2054 }
2055 \f
2056 /* Output a shift instruction for expression code CODE,
2057    with SHIFTED being the rtx for the value to shift,
2058    and AMOUNT the tree for the amount to shift by.
2059    Store the result in the rtx TARGET, if that is convenient.
2060    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2061    Return the rtx for where the value is.  */
2062
2063 rtx
2064 expand_shift (enum tree_code code, enum machine_mode mode, rtx shifted,
2065               tree amount, rtx target, int unsignedp)
2066 {
2067   rtx op1, temp = 0;
2068   int left = (code == LSHIFT_EXPR || code == LROTATE_EXPR);
2069   int rotate = (code == LROTATE_EXPR || code == RROTATE_EXPR);
2070   optab lshift_optab = ashl_optab;
2071   optab rshift_arith_optab = ashr_optab;
2072   optab rshift_uns_optab = lshr_optab;
2073   optab lrotate_optab = rotl_optab;
2074   optab rrotate_optab = rotr_optab;
2075   enum machine_mode op1_mode;
2076   int attempt;
2077   bool speed = optimize_insn_for_speed_p ();
2078
2079   op1 = expand_normal (amount);
2080   op1_mode = GET_MODE (op1);
2081
2082   /* Determine whether the shift/rotate amount is a vector, or scalar.  If the
2083      shift amount is a vector, use the vector/vector shift patterns.  */
2084   if (VECTOR_MODE_P (mode) && VECTOR_MODE_P (op1_mode))
2085     {
2086       lshift_optab = vashl_optab;
2087       rshift_arith_optab = vashr_optab;
2088       rshift_uns_optab = vlshr_optab;
2089       lrotate_optab = vrotl_optab;
2090       rrotate_optab = vrotr_optab;
2091     }
2092
2093   /* Previously detected shift-counts computed by NEGATE_EXPR
2094      and shifted in the other direction; but that does not work
2095      on all machines.  */
2096
2097   if (SHIFT_COUNT_TRUNCATED)
2098     {
2099       if (GET_CODE (op1) == CONST_INT
2100           && ((unsigned HOST_WIDE_INT) INTVAL (op1) >=
2101               (unsigned HOST_WIDE_INT) GET_MODE_BITSIZE (mode)))
2102         op1 = GEN_INT ((unsigned HOST_WIDE_INT) INTVAL (op1)
2103                        % GET_MODE_BITSIZE (mode));
2104       else if (GET_CODE (op1) == SUBREG
2105                && subreg_lowpart_p (op1))
2106         op1 = SUBREG_REG (op1);
2107     }
2108
2109   if (op1 == const0_rtx)
2110     return shifted;
2111
2112   /* Check whether its cheaper to implement a left shift by a constant
2113      bit count by a sequence of additions.  */
2114   if (code == LSHIFT_EXPR
2115       && GET_CODE (op1) == CONST_INT
2116       && INTVAL (op1) > 0
2117       && INTVAL (op1) < GET_MODE_BITSIZE (mode)
2118       && INTVAL (op1) < MAX_BITS_PER_WORD
2119       && shift_cost[speed][mode][INTVAL (op1)] > INTVAL (op1) * add_cost[speed][mode]
2120       && shift_cost[speed][mode][INTVAL (op1)] != MAX_COST)
2121     {
2122       int i;
2123       for (i = 0; i < INTVAL (op1); i++)
2124         {
2125           temp = force_reg (mode, shifted);
2126           shifted = expand_binop (mode, add_optab, temp, temp, NULL_RTX,
2127                                   unsignedp, OPTAB_LIB_WIDEN);
2128         }
2129       return shifted;
2130     }
2131
2132   for (attempt = 0; temp == 0 && attempt < 3; attempt++)
2133     {
2134       enum optab_methods methods;
2135
2136       if (attempt == 0)
2137         methods = OPTAB_DIRECT;
2138       else if (attempt == 1)
2139         methods = OPTAB_WIDEN;
2140       else
2141         methods = OPTAB_LIB_WIDEN;
2142
2143       if (rotate)
2144         {
2145           /* Widening does not work for rotation.  */
2146           if (methods == OPTAB_WIDEN)
2147             continue;
2148           else if (methods == OPTAB_LIB_WIDEN)
2149             {
2150               /* If we have been unable to open-code this by a rotation,
2151                  do it as the IOR of two shifts.  I.e., to rotate A
2152                  by N bits, compute (A << N) | ((unsigned) A >> (C - N))
2153                  where C is the bitsize of A.
2154
2155                  It is theoretically possible that the target machine might
2156                  not be able to perform either shift and hence we would
2157                  be making two libcalls rather than just the one for the
2158                  shift (similarly if IOR could not be done).  We will allow
2159                  this extremely unlikely lossage to avoid complicating the
2160                  code below.  */
2161
2162               rtx subtarget = target == shifted ? 0 : target;
2163               tree new_amount, other_amount;
2164               rtx temp1;
2165               tree type = TREE_TYPE (amount);
2166               if (GET_MODE (op1) != TYPE_MODE (type)
2167                   && GET_MODE (op1) != VOIDmode)
2168                 op1 = convert_to_mode (TYPE_MODE (type), op1, 1);
2169               new_amount = make_tree (type, op1);
2170               other_amount
2171                 = fold_build2 (MINUS_EXPR, type,
2172                                build_int_cst (type, GET_MODE_BITSIZE (mode)),
2173                                new_amount);
2174
2175               shifted = force_reg (mode, shifted);
2176
2177               temp = expand_shift (left ? LSHIFT_EXPR : RSHIFT_EXPR,
2178                                    mode, shifted, new_amount, 0, 1);
2179               temp1 = expand_shift (left ? RSHIFT_EXPR : LSHIFT_EXPR,
2180                                     mode, shifted, other_amount, subtarget, 1);
2181               return expand_binop (mode, ior_optab, temp, temp1, target,
2182                                    unsignedp, methods);
2183             }
2184
2185           temp = expand_binop (mode,
2186                                left ? lrotate_optab : rrotate_optab,
2187                                shifted, op1, target, unsignedp, methods);
2188         }
2189       else if (unsignedp)
2190         temp = expand_binop (mode,
2191                              left ? lshift_optab : rshift_uns_optab,
2192                              shifted, op1, target, unsignedp, methods);
2193
2194       /* Do arithmetic shifts.
2195          Also, if we are going to widen the operand, we can just as well
2196          use an arithmetic right-shift instead of a logical one.  */
2197       if (temp == 0 && ! rotate
2198           && (! unsignedp || (! left && methods == OPTAB_WIDEN)))
2199         {
2200           enum optab_methods methods1 = methods;
2201
2202           /* If trying to widen a log shift to an arithmetic shift,
2203              don't accept an arithmetic shift of the same size.  */
2204           if (unsignedp)
2205             methods1 = OPTAB_MUST_WIDEN;
2206
2207           /* Arithmetic shift */
2208
2209           temp = expand_binop (mode,
2210                                left ? lshift_optab : rshift_arith_optab,
2211                                shifted, op1, target, unsignedp, methods1);
2212         }
2213
2214       /* We used to try extzv here for logical right shifts, but that was
2215          only useful for one machine, the VAX, and caused poor code
2216          generation there for lshrdi3, so the code was deleted and a
2217          define_expand for lshrsi3 was added to vax.md.  */
2218     }
2219
2220   gcc_assert (temp);
2221   return temp;
2222 }
2223 \f
2224 enum alg_code {
2225   alg_unknown,
2226   alg_zero,
2227   alg_m, alg_shift,
2228   alg_add_t_m2,
2229   alg_sub_t_m2,
2230   alg_add_factor,
2231   alg_sub_factor,
2232   alg_add_t2_m,
2233   alg_sub_t2_m,
2234   alg_impossible
2235 };
2236
2237 /* This structure holds the "cost" of a multiply sequence.  The
2238    "cost" field holds the total rtx_cost of every operator in the
2239    synthetic multiplication sequence, hence cost(a op b) is defined
2240    as rtx_cost(op) + cost(a) + cost(b), where cost(leaf) is zero.
2241    The "latency" field holds the minimum possible latency of the
2242    synthetic multiply, on a hypothetical infinitely parallel CPU.
2243    This is the critical path, or the maximum height, of the expression
2244    tree which is the sum of rtx_costs on the most expensive path from
2245    any leaf to the root.  Hence latency(a op b) is defined as zero for
2246    leaves and rtx_cost(op) + max(latency(a), latency(b)) otherwise.  */
2247
2248 struct mult_cost {
2249   short cost;     /* Total rtx_cost of the multiplication sequence.  */
2250   short latency;  /* The latency of the multiplication sequence.  */
2251 };
2252
2253 /* This macro is used to compare a pointer to a mult_cost against an
2254    single integer "rtx_cost" value.  This is equivalent to the macro
2255    CHEAPER_MULT_COST(X,Z) where Z = {Y,Y}.  */
2256 #define MULT_COST_LESS(X,Y) ((X)->cost < (Y)    \
2257                              || ((X)->cost == (Y) && (X)->latency < (Y)))
2258
2259 /* This macro is used to compare two pointers to mult_costs against
2260    each other.  The macro returns true if X is cheaper than Y.
2261    Currently, the cheaper of two mult_costs is the one with the
2262    lower "cost".  If "cost"s are tied, the lower latency is cheaper.  */
2263 #define CHEAPER_MULT_COST(X,Y)  ((X)->cost < (Y)->cost          \
2264                                  || ((X)->cost == (Y)->cost     \
2265                                      && (X)->latency < (Y)->latency))
2266
2267 /* This structure records a sequence of operations.
2268    `ops' is the number of operations recorded.
2269    `cost' is their total cost.
2270    The operations are stored in `op' and the corresponding
2271    logarithms of the integer coefficients in `log'.
2272
2273    These are the operations:
2274    alg_zero             total := 0;
2275    alg_m                total := multiplicand;
2276    alg_shift            total := total * coeff
2277    alg_add_t_m2         total := total + multiplicand * coeff;
2278    alg_sub_t_m2         total := total - multiplicand * coeff;
2279    alg_add_factor       total := total * coeff + total;
2280    alg_sub_factor       total := total * coeff - total;
2281    alg_add_t2_m         total := total * coeff + multiplicand;
2282    alg_sub_t2_m         total := total * coeff - multiplicand;
2283
2284    The first operand must be either alg_zero or alg_m.  */
2285
2286 struct algorithm
2287 {
2288   struct mult_cost cost;
2289   short ops;
2290   /* The size of the OP and LOG fields are not directly related to the
2291      word size, but the worst-case algorithms will be if we have few
2292      consecutive ones or zeros, i.e., a multiplicand like 10101010101...
2293      In that case we will generate shift-by-2, add, shift-by-2, add,...,
2294      in total wordsize operations.  */
2295   enum alg_code op[MAX_BITS_PER_WORD];
2296   char log[MAX_BITS_PER_WORD];
2297 };
2298
2299 /* The entry for our multiplication cache/hash table.  */
2300 struct alg_hash_entry {
2301   /* The number we are multiplying by.  */
2302   unsigned HOST_WIDE_INT t;
2303
2304   /* The mode in which we are multiplying something by T.  */
2305   enum machine_mode mode;
2306
2307   /* The best multiplication algorithm for t.  */
2308   enum alg_code alg;
2309
2310   /* The cost of multiplication if ALG_CODE is not alg_impossible.
2311      Otherwise, the cost within which multiplication by T is
2312      impossible.  */
2313   struct mult_cost cost;
2314
2315   /* OPtimized for speed? */
2316   bool speed;
2317 };
2318
2319 /* The number of cache/hash entries.  */
2320 #if HOST_BITS_PER_WIDE_INT == 64
2321 #define NUM_ALG_HASH_ENTRIES 1031
2322 #else
2323 #define NUM_ALG_HASH_ENTRIES 307
2324 #endif
2325
2326 /* Each entry of ALG_HASH caches alg_code for some integer.  This is
2327    actually a hash table.  If we have a collision, that the older
2328    entry is kicked out.  */
2329 static struct alg_hash_entry alg_hash[NUM_ALG_HASH_ENTRIES];
2330
2331 /* Indicates the type of fixup needed after a constant multiplication.
2332    BASIC_VARIANT means no fixup is needed, NEGATE_VARIANT means that
2333    the result should be negated, and ADD_VARIANT means that the
2334    multiplicand should be added to the result.  */
2335 enum mult_variant {basic_variant, negate_variant, add_variant};
2336
2337 static void synth_mult (struct algorithm *, unsigned HOST_WIDE_INT,
2338                         const struct mult_cost *, enum machine_mode mode);
2339 static bool choose_mult_variant (enum machine_mode, HOST_WIDE_INT,
2340                                  struct algorithm *, enum mult_variant *, int);
2341 static rtx expand_mult_const (enum machine_mode, rtx, HOST_WIDE_INT, rtx,
2342                               const struct algorithm *, enum mult_variant);
2343 static unsigned HOST_WIDE_INT choose_multiplier (unsigned HOST_WIDE_INT, int,
2344                                                  int, rtx *, int *, int *);
2345 static unsigned HOST_WIDE_INT invert_mod2n (unsigned HOST_WIDE_INT, int);
2346 static rtx extract_high_half (enum machine_mode, rtx);
2347 static rtx expand_mult_highpart (enum machine_mode, rtx, rtx, rtx, int, int);
2348 static rtx expand_mult_highpart_optab (enum machine_mode, rtx, rtx, rtx,
2349                                        int, int);
2350 /* Compute and return the best algorithm for multiplying by T.
2351    The algorithm must cost less than cost_limit
2352    If retval.cost >= COST_LIMIT, no algorithm was found and all
2353    other field of the returned struct are undefined.
2354    MODE is the machine mode of the multiplication.  */
2355
2356 static void
2357 synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
2358             const struct mult_cost *cost_limit, enum machine_mode mode)
2359 {
2360   int m;
2361   struct algorithm *alg_in, *best_alg;
2362   struct mult_cost best_cost;
2363   struct mult_cost new_limit;
2364   int op_cost, op_latency;
2365   unsigned HOST_WIDE_INT q;
2366   int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode));
2367   int hash_index;
2368   bool cache_hit = false;
2369   enum alg_code cache_alg = alg_zero;
2370   bool speed = optimize_insn_for_speed_p ();
2371
2372   /* Indicate that no algorithm is yet found.  If no algorithm
2373      is found, this value will be returned and indicate failure.  */
2374   alg_out->cost.cost = cost_limit->cost + 1;
2375   alg_out->cost.latency = cost_limit->latency + 1;
2376
2377   if (cost_limit->cost < 0
2378       || (cost_limit->cost == 0 && cost_limit->latency <= 0))
2379     return;
2380
2381   /* Restrict the bits of "t" to the multiplication's mode.  */
2382   t &= GET_MODE_MASK (mode);
2383
2384   /* t == 1 can be done in zero cost.  */
2385   if (t == 1)
2386     {
2387       alg_out->ops = 1;
2388       alg_out->cost.cost = 0;
2389       alg_out->cost.latency = 0;
2390       alg_out->op[0] = alg_m;
2391       return;
2392     }
2393
2394   /* t == 0 sometimes has a cost.  If it does and it exceeds our limit,
2395      fail now.  */
2396   if (t == 0)
2397     {
2398       if (MULT_COST_LESS (cost_limit, zero_cost[speed]))
2399         return;
2400       else
2401         {
2402           alg_out->ops = 1;
2403           alg_out->cost.cost = zero_cost[speed];
2404           alg_out->cost.latency = zero_cost[speed];
2405           alg_out->op[0] = alg_zero;
2406           return;
2407         }
2408     }
2409
2410   /* We'll be needing a couple extra algorithm structures now.  */
2411
2412   alg_in = XALLOCA (struct algorithm);
2413   best_alg = XALLOCA (struct algorithm);
2414   best_cost = *cost_limit;
2415
2416   /* Compute the hash index.  */
2417   hash_index = (t ^ (unsigned int) mode ^ (speed * 256)) % NUM_ALG_HASH_ENTRIES;
2418
2419   /* See if we already know what to do for T.  */
2420   if (alg_hash[hash_index].t == t
2421       && alg_hash[hash_index].mode == mode
2422       && alg_hash[hash_index].mode == mode
2423       && alg_hash[hash_index].speed == speed
2424       && alg_hash[hash_index].alg != alg_unknown)
2425     {
2426       cache_alg = alg_hash[hash_index].alg;
2427
2428       if (cache_alg == alg_impossible)
2429         {
2430           /* The cache tells us that it's impossible to synthesize
2431              multiplication by T within alg_hash[hash_index].cost.  */
2432           if (!CHEAPER_MULT_COST (&alg_hash[hash_index].cost, cost_limit))
2433             /* COST_LIMIT is at least as restrictive as the one
2434                recorded in the hash table, in which case we have no
2435                hope of synthesizing a multiplication.  Just
2436                return.  */
2437             return;
2438
2439           /* If we get here, COST_LIMIT is less restrictive than the
2440              one recorded in the hash table, so we may be able to
2441              synthesize a multiplication.  Proceed as if we didn't
2442              have the cache entry.  */
2443         }
2444       else
2445         {
2446           if (CHEAPER_MULT_COST (cost_limit, &alg_hash[hash_index].cost))
2447             /* The cached algorithm shows that this multiplication
2448                requires more cost than COST_LIMIT.  Just return.  This
2449                way, we don't clobber this cache entry with
2450                alg_impossible but retain useful information.  */
2451             return;
2452
2453           cache_hit = true;
2454
2455           switch (cache_alg)
2456             {
2457             case alg_shift:
2458               goto do_alg_shift;
2459
2460             case alg_add_t_m2:
2461             case alg_sub_t_m2:
2462               goto do_alg_addsub_t_m2;
2463
2464             case alg_add_factor:
2465             case alg_sub_factor:
2466               goto do_alg_addsub_factor;
2467
2468             case alg_add_t2_m:
2469               goto do_alg_add_t2_m;
2470
2471             case alg_sub_t2_m:
2472               goto do_alg_sub_t2_m;
2473
2474             default:
2475               gcc_unreachable ();
2476             }
2477         }
2478     }
2479
2480   /* If we have a group of zero bits at the low-order part of T, try
2481      multiplying by the remaining bits and then doing a shift.  */
2482
2483   if ((t & 1) == 0)
2484     {
2485     do_alg_shift:
2486       m = floor_log2 (t & -t);  /* m = number of low zero bits */
2487       if (m < maxm)
2488         {
2489           q = t >> m;
2490           /* The function expand_shift will choose between a shift and
2491              a sequence of additions, so the observed cost is given as
2492              MIN (m * add_cost[speed][mode], shift_cost[speed][mode][m]).  */
2493           op_cost = m * add_cost[speed][mode];
2494           if (shift_cost[speed][mode][m] < op_cost)
2495             op_cost = shift_cost[speed][mode][m];
2496           new_limit.cost = best_cost.cost - op_cost;
2497           new_limit.latency = best_cost.latency - op_cost;
2498           synth_mult (alg_in, q, &new_limit, mode);
2499
2500           alg_in->cost.cost += op_cost;
2501           alg_in->cost.latency += op_cost;
2502           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2503             {
2504               struct algorithm *x;
2505               best_cost = alg_in->cost;
2506               x = alg_in, alg_in = best_alg, best_alg = x;
2507               best_alg->log[best_alg->ops] = m;
2508               best_alg->op[best_alg->ops] = alg_shift;
2509             }
2510         }
2511       if (cache_hit)
2512         goto done;
2513     }
2514
2515   /* If we have an odd number, add or subtract one.  */
2516   if ((t & 1) != 0)
2517     {
2518       unsigned HOST_WIDE_INT w;
2519
2520     do_alg_addsub_t_m2:
2521       for (w = 1; (w & t) != 0; w <<= 1)
2522         ;
2523       /* If T was -1, then W will be zero after the loop.  This is another
2524          case where T ends with ...111.  Handling this with (T + 1) and
2525          subtract 1 produces slightly better code and results in algorithm
2526          selection much faster than treating it like the ...0111 case
2527          below.  */
2528       if (w == 0
2529           || (w > 2
2530               /* Reject the case where t is 3.
2531                  Thus we prefer addition in that case.  */
2532               && t != 3))
2533         {
2534           /* T ends with ...111.  Multiply by (T + 1) and subtract 1.  */
2535
2536           op_cost = add_cost[speed][mode];
2537           new_limit.cost = best_cost.cost - op_cost;
2538           new_limit.latency = best_cost.latency - op_cost;
2539           synth_mult (alg_in, t + 1, &new_limit, mode);
2540
2541           alg_in->cost.cost += op_cost;
2542           alg_in->cost.latency += op_cost;
2543           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2544             {
2545               struct algorithm *x;
2546               best_cost = alg_in->cost;
2547               x = alg_in, alg_in = best_alg, best_alg = x;
2548               best_alg->log[best_alg->ops] = 0;
2549               best_alg->op[best_alg->ops] = alg_sub_t_m2;
2550             }
2551         }
2552       else
2553         {
2554           /* T ends with ...01 or ...011.  Multiply by (T - 1) and add 1.  */
2555
2556           op_cost = add_cost[speed][mode];
2557           new_limit.cost = best_cost.cost - op_cost;
2558           new_limit.latency = best_cost.latency - op_cost;
2559           synth_mult (alg_in, t - 1, &new_limit, mode);
2560
2561           alg_in->cost.cost += op_cost;
2562           alg_in->cost.latency += op_cost;
2563           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2564             {
2565               struct algorithm *x;
2566               best_cost = alg_in->cost;
2567               x = alg_in, alg_in = best_alg, best_alg = x;
2568               best_alg->log[best_alg->ops] = 0;
2569               best_alg->op[best_alg->ops] = alg_add_t_m2;
2570             }
2571         }
2572       if (cache_hit)
2573         goto done;
2574     }
2575
2576   /* Look for factors of t of the form
2577      t = q(2**m +- 1), 2 <= m <= floor(log2(t - 1)).
2578      If we find such a factor, we can multiply by t using an algorithm that
2579      multiplies by q, shift the result by m and add/subtract it to itself.
2580
2581      We search for large factors first and loop down, even if large factors
2582      are less probable than small; if we find a large factor we will find a
2583      good sequence quickly, and therefore be able to prune (by decreasing
2584      COST_LIMIT) the search.  */
2585
2586  do_alg_addsub_factor:
2587   for (m = floor_log2 (t - 1); m >= 2; m--)
2588     {
2589       unsigned HOST_WIDE_INT d;
2590
2591       d = ((unsigned HOST_WIDE_INT) 1 << m) + 1;
2592       if (t % d == 0 && t > d && m < maxm
2593           && (!cache_hit || cache_alg == alg_add_factor))
2594         {
2595           /* If the target has a cheap shift-and-add instruction use
2596              that in preference to a shift insn followed by an add insn.
2597              Assume that the shift-and-add is "atomic" with a latency
2598              equal to its cost, otherwise assume that on superscalar
2599              hardware the shift may be executed concurrently with the
2600              earlier steps in the algorithm.  */
2601           op_cost = add_cost[speed][mode] + shift_cost[speed][mode][m];
2602           if (shiftadd_cost[speed][mode][m] < op_cost)
2603             {
2604               op_cost = shiftadd_cost[speed][mode][m];
2605               op_latency = op_cost;
2606             }
2607           else
2608             op_latency = add_cost[speed][mode];
2609
2610           new_limit.cost = best_cost.cost - op_cost;
2611           new_limit.latency = best_cost.latency - op_latency;
2612           synth_mult (alg_in, t / d, &new_limit, mode);
2613
2614           alg_in->cost.cost += op_cost;
2615           alg_in->cost.latency += op_latency;
2616           if (alg_in->cost.latency < op_cost)
2617             alg_in->cost.latency = op_cost;
2618           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2619             {
2620               struct algorithm *x;
2621               best_cost = alg_in->cost;
2622               x = alg_in, alg_in = best_alg, best_alg = x;
2623               best_alg->log[best_alg->ops] = m;
2624               best_alg->op[best_alg->ops] = alg_add_factor;
2625             }
2626           /* Other factors will have been taken care of in the recursion.  */
2627           break;
2628         }
2629
2630       d = ((unsigned HOST_WIDE_INT) 1 << m) - 1;
2631       if (t % d == 0 && t > d && m < maxm
2632           && (!cache_hit || cache_alg == alg_sub_factor))
2633         {
2634           /* If the target has a cheap shift-and-subtract insn use
2635              that in preference to a shift insn followed by a sub insn.
2636              Assume that the shift-and-sub is "atomic" with a latency
2637              equal to it's cost, otherwise assume that on superscalar
2638              hardware the shift may be executed concurrently with the
2639              earlier steps in the algorithm.  */
2640           op_cost = add_cost[speed][mode] + shift_cost[speed][mode][m];
2641           if (shiftsub_cost[speed][mode][m] < op_cost)
2642             {
2643               op_cost = shiftsub_cost[speed][mode][m];
2644               op_latency = op_cost;
2645             }
2646           else
2647             op_latency = add_cost[speed][mode];
2648
2649           new_limit.cost = best_cost.cost - op_cost;
2650           new_limit.latency = best_cost.latency - op_latency;
2651           synth_mult (alg_in, t / d, &new_limit, mode);
2652
2653           alg_in->cost.cost += op_cost;
2654           alg_in->cost.latency += op_latency;
2655           if (alg_in->cost.latency < op_cost)
2656             alg_in->cost.latency = op_cost;
2657           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2658             {
2659               struct algorithm *x;
2660               best_cost = alg_in->cost;
2661               x = alg_in, alg_in = best_alg, best_alg = x;
2662               best_alg->log[best_alg->ops] = m;
2663               best_alg->op[best_alg->ops] = alg_sub_factor;
2664             }
2665           break;
2666         }
2667     }
2668   if (cache_hit)
2669     goto done;
2670
2671   /* Try shift-and-add (load effective address) instructions,
2672      i.e. do a*3, a*5, a*9.  */
2673   if ((t & 1) != 0)
2674     {
2675     do_alg_add_t2_m:
2676       q = t - 1;
2677       q = q & -q;
2678       m = exact_log2 (q);
2679       if (m >= 0 && m < maxm)
2680         {
2681           op_cost = shiftadd_cost[speed][mode][m];
2682           new_limit.cost = best_cost.cost - op_cost;
2683           new_limit.latency = best_cost.latency - op_cost;
2684           synth_mult (alg_in, (t - 1) >> m, &new_limit, mode);
2685
2686           alg_in->cost.cost += op_cost;
2687           alg_in->cost.latency += op_cost;
2688           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2689             {
2690               struct algorithm *x;
2691               best_cost = alg_in->cost;
2692               x = alg_in, alg_in = best_alg, best_alg = x;
2693               best_alg->log[best_alg->ops] = m;
2694               best_alg->op[best_alg->ops] = alg_add_t2_m;
2695             }
2696         }
2697       if (cache_hit)
2698         goto done;
2699
2700     do_alg_sub_t2_m:
2701       q = t + 1;
2702       q = q & -q;
2703       m = exact_log2 (q);
2704       if (m >= 0 && m < maxm)
2705         {
2706           op_cost = shiftsub_cost[speed][mode][m];
2707           new_limit.cost = best_cost.cost - op_cost;
2708           new_limit.latency = best_cost.latency - op_cost;
2709           synth_mult (alg_in, (t + 1) >> m, &new_limit, mode);
2710
2711           alg_in->cost.cost += op_cost;
2712           alg_in->cost.latency += op_cost;
2713           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2714             {
2715               struct algorithm *x;
2716               best_cost = alg_in->cost;
2717               x = alg_in, alg_in = best_alg, best_alg = x;
2718               best_alg->log[best_alg->ops] = m;
2719               best_alg->op[best_alg->ops] = alg_sub_t2_m;
2720             }
2721         }
2722       if (cache_hit)
2723         goto done;
2724     }
2725
2726  done:
2727   /* If best_cost has not decreased, we have not found any algorithm.  */
2728   if (!CHEAPER_MULT_COST (&best_cost, cost_limit))
2729     {
2730       /* We failed to find an algorithm.  Record alg_impossible for
2731          this case (that is, <T, MODE, COST_LIMIT>) so that next time
2732          we are asked to find an algorithm for T within the same or
2733          lower COST_LIMIT, we can immediately return to the
2734          caller.  */
2735       alg_hash[hash_index].t = t;
2736       alg_hash[hash_index].mode = mode;
2737       alg_hash[hash_index].speed = speed;
2738       alg_hash[hash_index].alg = alg_impossible;
2739       alg_hash[hash_index].cost = *cost_limit;
2740       return;
2741     }
2742
2743   /* Cache the result.  */
2744   if (!cache_hit)
2745     {
2746       alg_hash[hash_index].t = t;
2747       alg_hash[hash_index].mode = mode;
2748       alg_hash[hash_index].speed = speed;
2749       alg_hash[hash_index].alg = best_alg->op[best_alg->ops];
2750       alg_hash[hash_index].cost.cost = best_cost.cost;
2751       alg_hash[hash_index].cost.latency = best_cost.latency;
2752     }
2753
2754   /* If we are getting a too long sequence for `struct algorithm'
2755      to record, make this search fail.  */
2756   if (best_alg->ops == MAX_BITS_PER_WORD)
2757     return;
2758
2759   /* Copy the algorithm from temporary space to the space at alg_out.
2760      We avoid using structure assignment because the majority of
2761      best_alg is normally undefined, and this is a critical function.  */
2762   alg_out->ops = best_alg->ops + 1;
2763   alg_out->cost = best_cost;
2764   memcpy (alg_out->op, best_alg->op,
2765           alg_out->ops * sizeof *alg_out->op);
2766   memcpy (alg_out->log, best_alg->log,
2767           alg_out->ops * sizeof *alg_out->log);
2768 }
2769 \f
2770 /* Find the cheapest way of multiplying a value of mode MODE by VAL.
2771    Try three variations:
2772
2773        - a shift/add sequence based on VAL itself
2774        - a shift/add sequence based on -VAL, followed by a negation
2775        - a shift/add sequence based on VAL - 1, followed by an addition.
2776
2777    Return true if the cheapest of these cost less than MULT_COST,
2778    describing the algorithm in *ALG and final fixup in *VARIANT.  */
2779
2780 static bool
2781 choose_mult_variant (enum machine_mode mode, HOST_WIDE_INT val,
2782                      struct algorithm *alg, enum mult_variant *variant,
2783                      int mult_cost)
2784 {
2785   struct algorithm alg2;
2786   struct mult_cost limit;
2787   int op_cost;
2788   bool speed = optimize_insn_for_speed_p ();
2789
2790   /* Fail quickly for impossible bounds.  */
2791   if (mult_cost < 0)
2792     return false;
2793
2794   /* Ensure that mult_cost provides a reasonable upper bound.
2795      Any constant multiplication can be performed with less
2796      than 2 * bits additions.  */
2797   op_cost = 2 * GET_MODE_BITSIZE (mode) * add_cost[speed][mode];
2798   if (mult_cost > op_cost)
2799     mult_cost = op_cost;
2800
2801   *variant = basic_variant;
2802   limit.cost = mult_cost;
2803   limit.latency = mult_cost;
2804   synth_mult (alg, val, &limit, mode);
2805
2806   /* This works only if the inverted value actually fits in an
2807      `unsigned int' */
2808   if (HOST_BITS_PER_INT >= GET_MODE_BITSIZE (mode))
2809     {
2810       op_cost = neg_cost[speed][mode];
2811       if (MULT_COST_LESS (&alg->cost, mult_cost))
2812         {
2813           limit.cost = alg->cost.cost - op_cost;
2814           limit.latency = alg->cost.latency - op_cost;
2815         }
2816       else
2817         {
2818           limit.cost = mult_cost - op_cost;
2819           limit.latency = mult_cost - op_cost;
2820         }
2821
2822       synth_mult (&alg2, -val, &limit, mode);
2823       alg2.cost.cost += op_cost;
2824       alg2.cost.latency += op_cost;
2825       if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
2826         *alg = alg2, *variant = negate_variant;
2827     }
2828
2829   /* This proves very useful for division-by-constant.  */
2830   op_cost = add_cost[speed][mode];
2831   if (MULT_COST_LESS (&alg->cost, mult_cost))
2832     {
2833       limit.cost = alg->cost.cost - op_cost;
2834       limit.latency = alg->cost.latency - op_cost;
2835     }
2836   else
2837     {
2838       limit.cost = mult_cost - op_cost;
2839       limit.latency = mult_cost - op_cost;
2840     }
2841
2842   synth_mult (&alg2, val - 1, &limit, mode);
2843   alg2.cost.cost += op_cost;
2844   alg2.cost.latency += op_cost;
2845   if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
2846     *alg = alg2, *variant = add_variant;
2847
2848   return MULT_COST_LESS (&alg->cost, mult_cost);
2849 }
2850
2851 /* A subroutine of expand_mult, used for constant multiplications.
2852    Multiply OP0 by VAL in mode MODE, storing the result in TARGET if
2853    convenient.  Use the shift/add sequence described by ALG and apply
2854    the final fixup specified by VARIANT.  */
2855
2856 static rtx
2857 expand_mult_const (enum machine_mode mode, rtx op0, HOST_WIDE_INT val,
2858                    rtx target, const struct algorithm *alg,
2859                    enum mult_variant variant)
2860 {
2861   HOST_WIDE_INT val_so_far;
2862   rtx insn, accum, tem;
2863   int opno;
2864   enum machine_mode nmode;
2865
2866   /* Avoid referencing memory over and over and invalid sharing
2867      on SUBREGs.  */
2868   op0 = force_reg (mode, op0);
2869
2870   /* ACCUM starts out either as OP0 or as a zero, depending on
2871      the first operation.  */
2872
2873   if (alg->op[0] == alg_zero)
2874     {
2875       accum = copy_to_mode_reg (mode, const0_rtx);
2876       val_so_far = 0;
2877     }
2878   else if (alg->op[0] == alg_m)
2879     {
2880       accum = copy_to_mode_reg (mode, op0);
2881       val_so_far = 1;
2882     }
2883   else
2884     gcc_unreachable ();
2885
2886   for (opno = 1; opno < alg->ops; opno++)
2887     {
2888       int log = alg->log[opno];
2889       rtx shift_subtarget = optimize ? 0 : accum;
2890       rtx add_target
2891         = (opno == alg->ops - 1 && target != 0 && variant != add_variant
2892            && !optimize)
2893           ? target : 0;
2894       rtx accum_target = optimize ? 0 : accum;
2895
2896       switch (alg->op[opno])
2897         {
2898         case alg_shift:
2899           accum = expand_shift (LSHIFT_EXPR, mode, accum,
2900                                 build_int_cst (NULL_TREE, log),
2901                                 NULL_RTX, 0);
2902           val_so_far <<= log;
2903           break;
2904
2905         case alg_add_t_m2:
2906           tem = expand_shift (LSHIFT_EXPR, mode, op0,
2907                               build_int_cst (NULL_TREE, log),
2908                               NULL_RTX, 0);
2909           accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
2910                                  add_target ? add_target : accum_target);
2911           val_so_far += (HOST_WIDE_INT) 1 << log;
2912           break;
2913
2914         case alg_sub_t_m2:
2915           tem = expand_shift (LSHIFT_EXPR, mode, op0,
2916                               build_int_cst (NULL_TREE, log),
2917                               NULL_RTX, 0);
2918           accum = force_operand (gen_rtx_MINUS (mode, accum, tem),
2919                                  add_target ? add_target : accum_target);
2920           val_so_far -= (HOST_WIDE_INT) 1 << log;
2921           break;
2922
2923         case alg_add_t2_m:
2924           accum = expand_shift (LSHIFT_EXPR, mode, accum,
2925                                 build_int_cst (NULL_TREE, log),
2926                                 shift_subtarget,
2927                                 0);
2928           accum = force_operand (gen_rtx_PLUS (mode, accum, op0),
2929                                  add_target ? add_target : accum_target);
2930           val_so_far = (val_so_far << log) + 1;
2931           break;
2932
2933         case alg_sub_t2_m:
2934           accum = expand_shift (LSHIFT_EXPR, mode, accum,
2935                                 build_int_cst (NULL_TREE, log),
2936                                 shift_subtarget, 0);
2937           accum = force_operand (gen_rtx_MINUS (mode, accum, op0),
2938                                  add_target ? add_target : accum_target);
2939           val_so_far = (val_so_far << log) - 1;
2940           break;
2941
2942         case alg_add_factor:
2943           tem = expand_shift (LSHIFT_EXPR, mode, accum,
2944                               build_int_cst (NULL_TREE, log),
2945                               NULL_RTX, 0);
2946           accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
2947                                  add_target ? add_target : accum_target);
2948           val_so_far += val_so_far << log;
2949           break;
2950
2951         case alg_sub_factor:
2952           tem = expand_shift (LSHIFT_EXPR, mode, accum,
2953                               build_int_cst (NULL_TREE, log),
2954                               NULL_RTX, 0);
2955           accum = force_operand (gen_rtx_MINUS (mode, tem, accum),
2956                                  (add_target
2957                                   ? add_target : (optimize ? 0 : tem)));
2958           val_so_far = (val_so_far << log) - val_so_far;
2959           break;
2960
2961         default:
2962           gcc_unreachable ();
2963         }
2964
2965       /* Write a REG_EQUAL note on the last insn so that we can cse
2966          multiplication sequences.  Note that if ACCUM is a SUBREG,
2967          we've set the inner register and must properly indicate
2968          that.  */
2969
2970       tem = op0, nmode = mode;
2971       if (GET_CODE (accum) == SUBREG)
2972         {
2973           nmode = GET_MODE (SUBREG_REG (accum));
2974           tem = gen_lowpart (nmode, op0);
2975         }
2976
2977       insn = get_last_insn ();
2978       set_unique_reg_note (insn, REG_EQUAL,
2979                            gen_rtx_MULT (nmode, tem,
2980                                          GEN_INT (val_so_far)));
2981     }
2982
2983   if (variant == negate_variant)
2984     {
2985       val_so_far = -val_so_far;
2986       accum = expand_unop (mode, neg_optab, accum, target, 0);
2987     }
2988   else if (variant == add_variant)
2989     {
2990       val_so_far = val_so_far + 1;
2991       accum = force_operand (gen_rtx_PLUS (mode, accum, op0), target);
2992     }
2993
2994   /* Compare only the bits of val and val_so_far that are significant
2995      in the result mode, to avoid sign-/zero-extension confusion.  */
2996   val &= GET_MODE_MASK (mode);
2997   val_so_far &= GET_MODE_MASK (mode);
2998   gcc_assert (val == val_so_far);
2999
3000   return accum;
3001 }
3002
3003 /* Perform a multiplication and return an rtx for the result.
3004    MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3005    TARGET is a suggestion for where to store the result (an rtx).
3006
3007    We check specially for a constant integer as OP1.
3008    If you want this check for OP0 as well, then before calling
3009    you should swap the two operands if OP0 would be constant.  */
3010
3011 rtx
3012 expand_mult (enum machine_mode mode, rtx op0, rtx op1, rtx target,
3013              int unsignedp)
3014 {
3015   enum mult_variant variant;
3016   struct algorithm algorithm;
3017   int max_cost;
3018   bool speed = optimize_insn_for_speed_p ();
3019
3020   /* Handling const0_rtx here allows us to use zero as a rogue value for
3021      coeff below.  */
3022   if (op1 == const0_rtx)
3023     return const0_rtx;
3024   if (op1 == const1_rtx)
3025     return op0;
3026   if (op1 == constm1_rtx)
3027     return expand_unop (mode,
3028                         GET_MODE_CLASS (mode) == MODE_INT
3029                         && !unsignedp && flag_trapv
3030                         ? negv_optab : neg_optab,
3031                         op0, target, 0);
3032
3033   /* These are the operations that are potentially turned into a sequence
3034      of shifts and additions.  */
3035   if (SCALAR_INT_MODE_P (mode)
3036       && (unsignedp || !flag_trapv))
3037     {
3038       HOST_WIDE_INT coeff = 0;
3039       rtx fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3040
3041       /* synth_mult does an `unsigned int' multiply.  As long as the mode is
3042          less than or equal in size to `unsigned int' this doesn't matter.
3043          If the mode is larger than `unsigned int', then synth_mult works
3044          only if the constant value exactly fits in an `unsigned int' without
3045          any truncation.  This means that multiplying by negative values does
3046          not work; results are off by 2^32 on a 32 bit machine.  */
3047
3048       if (GET_CODE (op1) == CONST_INT)
3049         {
3050           /* Attempt to handle multiplication of DImode values by negative
3051              coefficients, by performing the multiplication by a positive
3052              multiplier and then inverting the result.  */
3053           if (INTVAL (op1) < 0
3054               && GET_MODE_BITSIZE (mode) > HOST_BITS_PER_WIDE_INT)
3055             {
3056               /* Its safe to use -INTVAL (op1) even for INT_MIN, as the
3057                  result is interpreted as an unsigned coefficient.
3058                  Exclude cost of op0 from max_cost to match the cost
3059                  calculation of the synth_mult.  */
3060               max_cost = rtx_cost (gen_rtx_MULT (mode, fake_reg, op1), SET, speed)
3061                          - neg_cost[speed][mode];
3062               if (max_cost > 0
3063                   && choose_mult_variant (mode, -INTVAL (op1), &algorithm,
3064                                           &variant, max_cost))
3065                 {
3066                   rtx temp = expand_mult_const (mode, op0, -INTVAL (op1),
3067                                                 NULL_RTX, &algorithm,
3068                                                 variant);
3069                   return expand_unop (mode, neg_optab, temp, target, 0);
3070                 }
3071             }
3072           else coeff = INTVAL (op1);
3073         }
3074       else if (GET_CODE (op1) == CONST_DOUBLE)
3075         {
3076           /* If we are multiplying in DImode, it may still be a win
3077              to try to work with shifts and adds.  */
3078           if (CONST_DOUBLE_HIGH (op1) == 0
3079               && CONST_DOUBLE_LOW (op1) > 0)
3080             coeff = CONST_DOUBLE_LOW (op1);
3081           else if (CONST_DOUBLE_LOW (op1) == 0
3082                    && EXACT_POWER_OF_2_OR_ZERO_P (CONST_DOUBLE_HIGH (op1)))
3083             {
3084               int shift = floor_log2 (CONST_DOUBLE_HIGH (op1))
3085                           + HOST_BITS_PER_WIDE_INT;
3086               return expand_shift (LSHIFT_EXPR, mode, op0,
3087                                    build_int_cst (NULL_TREE, shift),
3088                                    target, unsignedp);
3089             }
3090         }
3091
3092       /* We used to test optimize here, on the grounds that it's better to
3093          produce a smaller program when -O is not used.  But this causes
3094          such a terrible slowdown sometimes that it seems better to always
3095          use synth_mult.  */
3096       if (coeff != 0)
3097         {
3098           /* Special case powers of two.  */
3099           if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3100             return expand_shift (LSHIFT_EXPR, mode, op0,
3101                                  build_int_cst (NULL_TREE, floor_log2 (coeff)),
3102                                  target, unsignedp);
3103
3104           /* Exclude cost of op0 from max_cost to match the cost
3105              calculation of the synth_mult.  */
3106           max_cost = rtx_cost (gen_rtx_MULT (mode, fake_reg, op1), SET, speed);
3107           if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3108                                    max_cost))
3109             return expand_mult_const (mode, op0, coeff, target,
3110                                       &algorithm, variant);
3111         }
3112     }
3113
3114   if (GET_CODE (op0) == CONST_DOUBLE)
3115     {
3116       rtx temp = op0;
3117       op0 = op1;
3118       op1 = temp;
3119     }
3120
3121   /* Expand x*2.0 as x+x.  */
3122   if (GET_CODE (op1) == CONST_DOUBLE
3123       && SCALAR_FLOAT_MODE_P (mode))
3124     {
3125       REAL_VALUE_TYPE d;
3126       REAL_VALUE_FROM_CONST_DOUBLE (d, op1);
3127
3128       if (REAL_VALUES_EQUAL (d, dconst2))
3129         {
3130           op0 = force_reg (GET_MODE (op0), op0);
3131           return expand_binop (mode, add_optab, op0, op0,
3132                                target, unsignedp, OPTAB_LIB_WIDEN);
3133         }
3134     }
3135
3136   /* This used to use umul_optab if unsigned, but for non-widening multiply
3137      there is no difference between signed and unsigned.  */
3138   op0 = expand_binop (mode,
3139                       ! unsignedp
3140                       && flag_trapv && (GET_MODE_CLASS(mode) == MODE_INT)
3141                       ? smulv_optab : smul_optab,
3142                       op0, op1, target, unsignedp, OPTAB_LIB_WIDEN);
3143   gcc_assert (op0);
3144   return op0;
3145 }
3146 \f
3147 /* Return the smallest n such that 2**n >= X.  */
3148
3149 int
3150 ceil_log2 (unsigned HOST_WIDE_INT x)
3151 {
3152   return floor_log2 (x - 1) + 1;
3153 }
3154
3155 /* Choose a minimal N + 1 bit approximation to 1/D that can be used to
3156    replace division by D, and put the least significant N bits of the result
3157    in *MULTIPLIER_PTR and return the most significant bit.
3158
3159    The width of operations is N (should be <= HOST_BITS_PER_WIDE_INT), the
3160    needed precision is in PRECISION (should be <= N).
3161
3162    PRECISION should be as small as possible so this function can choose
3163    multiplier more freely.
3164
3165    The rounded-up logarithm of D is placed in *lgup_ptr.  A shift count that
3166    is to be used for a final right shift is placed in *POST_SHIFT_PTR.
3167
3168    Using this function, x/D will be equal to (x * m) >> (*POST_SHIFT_PTR),
3169    where m is the full HOST_BITS_PER_WIDE_INT + 1 bit multiplier.  */
3170
3171 static
3172 unsigned HOST_WIDE_INT
3173 choose_multiplier (unsigned HOST_WIDE_INT d, int n, int precision,
3174                    rtx *multiplier_ptr, int *post_shift_ptr, int *lgup_ptr)
3175 {
3176   HOST_WIDE_INT mhigh_hi, mlow_hi;
3177   unsigned HOST_WIDE_INT mhigh_lo, mlow_lo;
3178   int lgup, post_shift;
3179   int pow, pow2;
3180   unsigned HOST_WIDE_INT nl, dummy1;
3181   HOST_WIDE_INT nh, dummy2;
3182
3183   /* lgup = ceil(log2(divisor)); */
3184   lgup = ceil_log2 (d);
3185
3186   gcc_assert (lgup <= n);
3187
3188   pow = n + lgup;
3189   pow2 = n + lgup - precision;
3190
3191   /* We could handle this with some effort, but this case is much
3192      better handled directly with a scc insn, so rely on caller using
3193      that.  */
3194   gcc_assert (pow != 2 * HOST_BITS_PER_WIDE_INT);
3195
3196   /* mlow = 2^(N + lgup)/d */
3197  if (pow >= HOST_BITS_PER_WIDE_INT)
3198     {
3199       nh = (HOST_WIDE_INT) 1 << (pow - HOST_BITS_PER_WIDE_INT);
3200       nl = 0;
3201     }
3202   else
3203     {
3204       nh = 0;
3205       nl = (unsigned HOST_WIDE_INT) 1 << pow;
3206     }
3207   div_and_round_double (TRUNC_DIV_EXPR, 1, nl, nh, d, (HOST_WIDE_INT) 0,
3208                         &mlow_lo, &mlow_hi, &dummy1, &dummy2);
3209
3210   /* mhigh = (2^(N + lgup) + 2^N + lgup - precision)/d */
3211   if (pow2 >= HOST_BITS_PER_WIDE_INT)
3212     nh |= (HOST_WIDE_INT) 1 << (pow2 - HOST_BITS_PER_WIDE_INT);
3213   else
3214     nl |= (unsigned HOST_WIDE_INT) 1 << pow2;
3215   div_and_round_double (TRUNC_DIV_EXPR, 1, nl, nh, d, (HOST_WIDE_INT) 0,
3216                         &mhigh_lo, &mhigh_hi, &dummy1, &dummy2);
3217
3218   gcc_assert (!mhigh_hi || nh - d < d);
3219   gcc_assert (mhigh_hi <= 1 && mlow_hi <= 1);
3220   /* Assert that mlow < mhigh.  */
3221   gcc_assert (mlow_hi < mhigh_hi
3222               || (mlow_hi == mhigh_hi && mlow_lo < mhigh_lo));
3223
3224   /* If precision == N, then mlow, mhigh exceed 2^N
3225      (but they do not exceed 2^(N+1)).  */
3226
3227   /* Reduce to lowest terms.  */
3228   for (post_shift = lgup; post_shift > 0; post_shift--)
3229     {
3230       unsigned HOST_WIDE_INT ml_lo = (mlow_hi << (HOST_BITS_PER_WIDE_INT - 1)) | (mlow_lo >> 1);
3231       unsigned HOST_WIDE_INT mh_lo = (mhigh_hi << (HOST_BITS_PER_WIDE_INT - 1)) | (mhigh_lo >> 1);
3232       if (ml_lo >= mh_lo)
3233         break;
3234
3235       mlow_hi = 0;
3236       mlow_lo = ml_lo;
3237       mhigh_hi = 0;
3238       mhigh_lo = mh_lo;
3239     }
3240
3241   *post_shift_ptr = post_shift;
3242   *lgup_ptr = lgup;
3243   if (n < HOST_BITS_PER_WIDE_INT)
3244     {
3245       unsigned HOST_WIDE_INT mask = ((unsigned HOST_WIDE_INT) 1 << n) - 1;
3246       *multiplier_ptr = GEN_INT (mhigh_lo & mask);
3247       return mhigh_lo >= mask;
3248     }
3249   else
3250     {
3251       *multiplier_ptr = GEN_INT (mhigh_lo);
3252       return mhigh_hi;
3253     }
3254 }
3255
3256 /* Compute the inverse of X mod 2**n, i.e., find Y such that X * Y is
3257    congruent to 1 (mod 2**N).  */
3258
3259 static unsigned HOST_WIDE_INT
3260 invert_mod2n (unsigned HOST_WIDE_INT x, int n)
3261 {
3262   /* Solve x*y == 1 (mod 2^n), where x is odd.  Return y.  */
3263
3264   /* The algorithm notes that the choice y = x satisfies
3265      x*y == 1 mod 2^3, since x is assumed odd.
3266      Each iteration doubles the number of bits of significance in y.  */
3267
3268   unsigned HOST_WIDE_INT mask;
3269   unsigned HOST_WIDE_INT y = x;
3270   int nbit = 3;
3271
3272   mask = (n == HOST_BITS_PER_WIDE_INT
3273           ? ~(unsigned HOST_WIDE_INT) 0
3274           : ((unsigned HOST_WIDE_INT) 1 << n) - 1);
3275
3276   while (nbit < n)
3277     {
3278       y = y * (2 - x*y) & mask;         /* Modulo 2^N */
3279       nbit *= 2;
3280     }
3281   return y;
3282 }
3283
3284 /* Emit code to adjust ADJ_OPERAND after multiplication of wrong signedness
3285    flavor of OP0 and OP1.  ADJ_OPERAND is already the high half of the
3286    product OP0 x OP1.  If UNSIGNEDP is nonzero, adjust the signed product
3287    to become unsigned, if UNSIGNEDP is zero, adjust the unsigned product to
3288    become signed.
3289
3290    The result is put in TARGET if that is convenient.
3291
3292    MODE is the mode of operation.  */
3293
3294 rtx
3295 expand_mult_highpart_adjust (enum machine_mode mode, rtx adj_operand, rtx op0,
3296                              rtx op1, rtx target, int unsignedp)
3297 {
3298   rtx tem;
3299   enum rtx_code adj_code = unsignedp ? PLUS : MINUS;
3300
3301   tem = expand_shift (RSHIFT_EXPR, mode, op0,
3302                       build_int_cst (NULL_TREE, GET_MODE_BITSIZE (mode) - 1),
3303                       NULL_RTX, 0);
3304   tem = expand_and (mode, tem, op1, NULL_RTX);
3305   adj_operand
3306     = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3307                      adj_operand);
3308
3309   tem = expand_shift (RSHIFT_EXPR, mode, op1,
3310                       build_int_cst (NULL_TREE, GET_MODE_BITSIZE (mode) - 1),
3311                       NULL_RTX, 0);
3312   tem = expand_and (mode, tem, op0, NULL_RTX);
3313   target = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3314                           target);
3315
3316   return target;
3317 }
3318
3319 /* Subroutine of expand_mult_highpart.  Return the MODE high part of OP.  */
3320
3321 static rtx
3322 extract_high_half (enum machine_mode mode, rtx op)
3323 {
3324   enum machine_mode wider_mode;
3325
3326   if (mode == word_mode)
3327     return gen_highpart (mode, op);
3328
3329   gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3330
3331   wider_mode = GET_MODE_WIDER_MODE (mode);
3332   op = expand_shift (RSHIFT_EXPR, wider_mode, op,
3333                      build_int_cst (NULL_TREE, GET_MODE_BITSIZE (mode)), 0, 1);
3334   return convert_modes (mode, wider_mode, op, 0);
3335 }
3336
3337 /* Like expand_mult_highpart, but only consider using a multiplication
3338    optab.  OP1 is an rtx for the constant operand.  */
3339
3340 static rtx
3341 expand_mult_highpart_optab (enum machine_mode mode, rtx op0, rtx op1,
3342                             rtx target, int unsignedp, int max_cost)
3343 {
3344   rtx narrow_op1 = gen_int_mode (INTVAL (op1), mode);
3345   enum machine_mode wider_mode;
3346   optab moptab;
3347   rtx tem;
3348   int size;
3349   bool speed = optimize_insn_for_speed_p ();
3350
3351   gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3352
3353   wider_mode = GET_MODE_WIDER_MODE (mode);
3354   size = GET_MODE_BITSIZE (mode);
3355
3356   /* Firstly, try using a multiplication insn that only generates the needed
3357      high part of the product, and in the sign flavor of unsignedp.  */
3358   if (mul_highpart_cost[speed][mode] < max_cost)
3359     {
3360       moptab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
3361       tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3362                           unsignedp, OPTAB_DIRECT);
3363       if (tem)
3364         return tem;
3365     }
3366
3367   /* Secondly, same as above, but use sign flavor opposite of unsignedp.
3368      Need to adjust the result after the multiplication.  */
3369   if (size - 1 < BITS_PER_WORD
3370       && (mul_highpart_cost[speed][mode] + 2 * shift_cost[speed][mode][size-1]
3371           + 4 * add_cost[speed][mode] < max_cost))
3372     {
3373       moptab = unsignedp ? smul_highpart_optab : umul_highpart_optab;
3374       tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3375                           unsignedp, OPTAB_DIRECT);
3376       if (tem)
3377         /* We used the wrong signedness.  Adjust the result.  */
3378         return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3379                                             tem, unsignedp);
3380     }
3381
3382   /* Try widening multiplication.  */
3383   moptab = unsignedp ? umul_widen_optab : smul_widen_optab;
3384   if (optab_handler (moptab, wider_mode)->insn_code != CODE_FOR_nothing
3385       && mul_widen_cost[speed][wider_mode] < max_cost)
3386     {
3387       tem = expand_binop (wider_mode, moptab, op0, narrow_op1, 0,
3388                           unsignedp, OPTAB_WIDEN);
3389       if (tem)
3390         return extract_high_half (mode, tem);
3391     }
3392
3393   /* Try widening the mode and perform a non-widening multiplication.  */
3394   if (optab_handler (smul_optab, wider_mode)->insn_code != CODE_FOR_nothing
3395       && size - 1 < BITS_PER_WORD
3396       && mul_cost[speed][wider_mode] + shift_cost[speed][mode][size-1] < max_cost)
3397     {
3398       rtx insns, wop0, wop1;
3399
3400       /* We need to widen the operands, for example to ensure the
3401          constant multiplier is correctly sign or zero extended.
3402          Use a sequence to clean-up any instructions emitted by
3403          the conversions if things don't work out.  */
3404       start_sequence ();
3405       wop0 = convert_modes (wider_mode, mode, op0, unsignedp);
3406       wop1 = convert_modes (wider_mode, mode, op1, unsignedp);
3407       tem = expand_binop (wider_mode, smul_optab, wop0, wop1, 0,
3408                           unsignedp, OPTAB_WIDEN);
3409       insns = get_insns ();
3410       end_sequence ();
3411
3412       if (tem)
3413         {
3414           emit_insn (insns);
3415           return extract_high_half (mode, tem);
3416         }
3417     }
3418
3419   /* Try widening multiplication of opposite signedness, and adjust.  */
3420   moptab = unsignedp ? smul_widen_optab : umul_widen_optab;
3421   if (optab_handler (moptab, wider_mode)->insn_code != CODE_FOR_nothing
3422       && size - 1 < BITS_PER_WORD
3423       && (mul_widen_cost[speed][wider_mode] + 2 * shift_cost[speed][mode][size-1]
3424           + 4 * add_cost[speed][mode] < max_cost))
3425     {
3426       tem = expand_binop (wider_mode, moptab, op0, narrow_op1,
3427                           NULL_RTX, ! unsignedp, OPTAB_WIDEN);
3428       if (tem != 0)
3429         {
3430           tem = extract_high_half (mode, tem);
3431           /* We used the wrong signedness.  Adjust the result.  */
3432           return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3433                                               target, unsignedp);
3434         }
3435     }
3436
3437   return 0;
3438 }
3439
3440 /* Emit code to multiply OP0 and OP1 (where OP1 is an integer constant),
3441    putting the high half of the result in TARGET if that is convenient,
3442    and return where the result is.  If the operation can not be performed,
3443    0 is returned.
3444
3445    MODE is the mode of operation and result.
3446
3447    UNSIGNEDP nonzero means unsigned multiply.
3448
3449    MAX_COST is the total allowed cost for the expanded RTL.  */
3450
3451 static rtx
3452 expand_mult_highpart (enum machine_mode mode, rtx op0, rtx op1,
3453                       rtx target, int unsignedp, int max_cost)
3454 {
3455   enum machine_mode wider_mode = GET_MODE_WIDER_MODE (mode);
3456   unsigned HOST_WIDE_INT cnst1;
3457   int extra_cost;
3458   bool sign_adjust = false;
3459   enum mult_variant variant;
3460   struct algorithm alg;
3461   rtx tem;
3462   bool speed = optimize_insn_for_speed_p ();
3463
3464   gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3465   /* We can't support modes wider than HOST_BITS_PER_INT.  */
3466   gcc_assert (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT);
3467
3468   cnst1 = INTVAL (op1) & GET_MODE_MASK (mode);
3469
3470   /* We can't optimize modes wider than BITS_PER_WORD.
3471      ??? We might be able to perform double-word arithmetic if
3472      mode == word_mode, however all the cost calculations in
3473      synth_mult etc. assume single-word operations.  */
3474   if (GET_MODE_BITSIZE (wider_mode) > BITS_PER_WORD)
3475     return expand_mult_highpart_optab (mode, op0, op1, target,
3476                                        unsignedp, max_cost);
3477
3478   extra_cost = shift_cost[speed][mode][GET_MODE_BITSIZE (mode) - 1];
3479
3480   /* Check whether we try to multiply by a negative constant.  */
3481   if (!unsignedp && ((cnst1 >> (GET_MODE_BITSIZE (mode) - 1)) & 1))
3482     {
3483       sign_adjust = true;
3484       extra_cost += add_cost[speed][mode];
3485     }
3486
3487   /* See whether shift/add multiplication is cheap enough.  */
3488   if (choose_mult_variant (wider_mode, cnst1, &alg, &variant,
3489                            max_cost - extra_cost))
3490     {
3491       /* See whether the specialized multiplication optabs are
3492          cheaper than the shift/add version.  */
3493       tem = expand_mult_highpart_optab (mode, op0, op1, target, unsignedp,
3494                                         alg.cost.cost + extra_cost);
3495       if (tem)
3496         return tem;
3497
3498       tem = convert_to_mode (wider_mode, op0, unsignedp);
3499       tem = expand_mult_const (wider_mode, tem, cnst1, 0, &alg, variant);
3500       tem = extract_high_half (mode, tem);
3501
3502       /* Adjust result for signedness.  */
3503       if (sign_adjust)
3504         tem = force_operand (gen_rtx_MINUS (mode, tem, op0), tem);
3505
3506       return tem;
3507     }
3508   return expand_mult_highpart_optab (mode, op0, op1, target,
3509                                      unsignedp, max_cost);
3510 }
3511
3512
3513 /* Expand signed modulus of OP0 by a power of two D in mode MODE.  */
3514
3515 static rtx
3516 expand_smod_pow2 (enum machine_mode mode, rtx op0, HOST_WIDE_INT d)
3517 {
3518   unsigned HOST_WIDE_INT masklow, maskhigh;
3519   rtx result, temp, shift, label;
3520   int logd;
3521
3522   logd = floor_log2 (d);
3523   result = gen_reg_rtx (mode);
3524
3525   /* Avoid conditional branches when they're expensive.  */
3526   if (BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2
3527       && optimize_insn_for_speed_p ())
3528     {
3529       rtx signmask = emit_store_flag (result, LT, op0, const0_rtx,
3530                                       mode, 0, -1);
3531       if (signmask)
3532         {
3533           signmask = force_reg (mode, signmask);
3534           masklow = ((HOST_WIDE_INT) 1 << logd) - 1;
3535           shift = GEN_INT (GET_MODE_BITSIZE (mode) - logd);
3536
3537           /* Use the rtx_cost of a LSHIFTRT instruction to determine
3538              which instruction sequence to use.  If logical right shifts
3539              are expensive the use 2 XORs, 2 SUBs and an AND, otherwise
3540              use a LSHIFTRT, 1 ADD, 1 SUB and an AND.  */
3541
3542           temp = gen_rtx_LSHIFTRT (mode, result, shift);
3543           if (optab_handler (lshr_optab, mode)->insn_code == CODE_FOR_nothing
3544               || rtx_cost (temp, SET, optimize_insn_for_speed_p ()) > COSTS_N_INSNS (2))
3545             {
3546               temp = expand_binop (mode, xor_optab, op0, signmask,
3547                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3548               temp = expand_binop (mode, sub_optab, temp, signmask,
3549                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3550               temp = expand_binop (mode, and_optab, temp, GEN_INT (masklow),
3551                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3552               temp = expand_binop (mode, xor_optab, temp, signmask,
3553                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3554               temp = expand_binop (mode, sub_optab, temp, signmask,
3555                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3556             }
3557           else
3558             {
3559               signmask = expand_binop (mode, lshr_optab, signmask, shift,
3560                                        NULL_RTX, 1, OPTAB_LIB_WIDEN);
3561               signmask = force_reg (mode, signmask);
3562
3563               temp = expand_binop (mode, add_optab, op0, signmask,
3564                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3565               temp = expand_binop (mode, and_optab, temp, GEN_INT (masklow),
3566                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3567               temp = expand_binop (mode, sub_optab, temp, signmask,
3568                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3569             }
3570           return temp;
3571         }
3572     }
3573
3574   /* Mask contains the mode's signbit and the significant bits of the
3575      modulus.  By including the signbit in the operation, many targets
3576      can avoid an explicit compare operation in the following comparison
3577      against zero.  */
3578
3579   masklow = ((HOST_WIDE_INT) 1 << logd) - 1;
3580   if (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
3581     {
3582       masklow |= (HOST_WIDE_INT) -1 << (GET_MODE_BITSIZE (mode) - 1);
3583       maskhigh = -1;
3584     }
3585   else
3586     maskhigh = (HOST_WIDE_INT) -1
3587                  << (GET_MODE_BITSIZE (mode) - HOST_BITS_PER_WIDE_INT - 1);
3588
3589   temp = expand_binop (mode, and_optab, op0,
3590                        immed_double_const (masklow, maskhigh, mode),
3591                        result, 1, OPTAB_LIB_WIDEN);
3592   if (temp != result)
3593     emit_move_insn (result, temp);
3594
3595   label = gen_label_rtx ();
3596   do_cmp_and_jump (result, const0_rtx, GE, mode, label);
3597
3598   temp = expand_binop (mode, sub_optab, result, const1_rtx, result,
3599                        0, OPTAB_LIB_WIDEN);
3600   masklow = (HOST_WIDE_INT) -1 << logd;
3601   maskhigh = -1;
3602   temp = expand_binop (mode, ior_optab, temp,
3603                        immed_double_const (masklow, maskhigh, mode),
3604                        result, 1, OPTAB_LIB_WIDEN);
3605   temp = expand_binop (mode, add_optab, temp, const1_rtx, result,
3606                        0, OPTAB_LIB_WIDEN);
3607   if (temp != result)
3608     emit_move_insn (result, temp);
3609   emit_label (label);
3610   return result;
3611 }
3612
3613 /* Expand signed division of OP0 by a power of two D in mode MODE.
3614    This routine is only called for positive values of D.  */
3615
3616 static rtx
3617 expand_sdiv_pow2 (enum machine_mode mode, rtx op0, HOST_WIDE_INT d)
3618 {
3619   rtx temp, label;
3620   tree shift;
3621   int logd;
3622
3623   logd = floor_log2 (d);
3624   shift = build_int_cst (NULL_TREE, logd);
3625
3626   if (d == 2
3627       && BRANCH_COST (optimize_insn_for_speed_p (),
3628                       false) >= 1)
3629     {
3630       temp = gen_reg_rtx (mode);
3631       temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, 1);
3632       temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3633                            0, OPTAB_LIB_WIDEN);
3634       return expand_shift (RSHIFT_EXPR, mode, temp, shift, NULL_RTX, 0);
3635     }
3636
3637 #ifdef HAVE_conditional_move
3638   if (BRANCH_COST (optimize_insn_for_speed_p (), false)
3639       >= 2)
3640     {
3641       rtx temp2;
3642
3643       /* ??? emit_conditional_move forces a stack adjustment via
3644          compare_from_rtx so, if the sequence is discarded, it will
3645          be lost.  Do it now instead.  */
3646       do_pending_stack_adjust ();
3647
3648       start_sequence ();
3649       temp2 = copy_to_mode_reg (mode, op0);
3650       temp = expand_binop (mode, add_optab, temp2, GEN_INT (d-1),
3651                            NULL_RTX, 0, OPTAB_LIB_WIDEN);
3652       temp = force_reg (mode, temp);
3653
3654       /* Construct "temp2 = (temp2 < 0) ? temp : temp2".  */
3655       temp2 = emit_conditional_move (temp2, LT, temp2, const0_rtx,
3656                                      mode, temp, temp2, mode, 0);
3657       if (temp2)
3658         {
3659           rtx seq = get_insns ();
3660           end_sequence ();
3661           emit_insn (seq);
3662           return expand_shift (RSHIFT_EXPR, mode, temp2, shift, NULL_RTX, 0);
3663         }
3664       end_sequence ();
3665     }
3666 #endif
3667
3668   if (BRANCH_COST (optimize_insn_for_speed_p (),
3669                    false) >= 2)
3670     {
3671       int ushift = GET_MODE_BITSIZE (mode) - logd;
3672
3673       temp = gen_reg_rtx (mode);
3674       temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, -1);
3675       if (shift_cost[optimize_insn_for_speed_p ()][mode][ushift] > COSTS_N_INSNS (1))
3676         temp = expand_binop (mode, and_optab, temp, GEN_INT (d - 1),
3677                              NULL_RTX, 0, OPTAB_LIB_WIDEN);
3678       else
3679         temp = expand_shift (RSHIFT_EXPR, mode, temp,
3680                              build_int_cst (NULL_TREE, ushift),
3681                              NULL_RTX, 1);
3682       temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3683                            0, OPTAB_LIB_WIDEN);
3684       return expand_shift (RSHIFT_EXPR, mode, temp, shift, NULL_RTX, 0);
3685     }
3686
3687   label = gen_label_rtx ();
3688   temp = copy_to_mode_reg (mode, op0);
3689   do_cmp_and_jump (temp, const0_rtx, GE, mode, label);
3690   expand_inc (temp, GEN_INT (d - 1));
3691   emit_label (label);
3692   return expand_shift (RSHIFT_EXPR, mode, temp, shift, NULL_RTX, 0);
3693 }
3694 \f
3695 /* Emit the code to divide OP0 by OP1, putting the result in TARGET
3696    if that is convenient, and returning where the result is.
3697    You may request either the quotient or the remainder as the result;
3698    specify REM_FLAG nonzero to get the remainder.
3699
3700    CODE is the expression code for which kind of division this is;
3701    it controls how rounding is done.  MODE is the machine mode to use.
3702    UNSIGNEDP nonzero means do unsigned division.  */
3703
3704 /* ??? For CEIL_MOD_EXPR, can compute incorrect remainder with ANDI
3705    and then correct it by or'ing in missing high bits
3706    if result of ANDI is nonzero.
3707    For ROUND_MOD_EXPR, can use ANDI and then sign-extend the result.
3708    This could optimize to a bfexts instruction.
3709    But C doesn't use these operations, so their optimizations are
3710    left for later.  */
3711 /* ??? For modulo, we don't actually need the highpart of the first product,
3712    the low part will do nicely.  And for small divisors, the second multiply
3713    can also be a low-part only multiply or even be completely left out.
3714    E.g. to calculate the remainder of a division by 3 with a 32 bit
3715    multiply, multiply with 0x55555556 and extract the upper two bits;
3716    the result is exact for inputs up to 0x1fffffff.
3717    The input range can be reduced by using cross-sum rules.
3718    For odd divisors >= 3, the following table gives right shift counts
3719    so that if a number is shifted by an integer multiple of the given
3720    amount, the remainder stays the same:
3721    2, 4, 3, 6, 10, 12, 4, 8, 18, 6, 11, 20, 18, 0, 5, 10, 12, 0, 12, 20,
3722    14, 12, 23, 21, 8, 0, 20, 18, 0, 0, 6, 12, 0, 22, 0, 18, 20, 30, 0, 0,
3723    0, 8, 0, 11, 12, 10, 36, 0, 30, 0, 0, 12, 0, 0, 0, 0, 44, 12, 24, 0,
3724    20, 0, 7, 14, 0, 18, 36, 0, 0, 46, 60, 0, 42, 0, 15, 24, 20, 0, 0, 33,
3725    0, 20, 0, 0, 18, 0, 60, 0, 0, 0, 0, 0, 40, 18, 0, 0, 12
3726
3727    Cross-sum rules for even numbers can be derived by leaving as many bits
3728    to the right alone as the divisor has zeros to the right.
3729    E.g. if x is an unsigned 32 bit number:
3730    (x mod 12) == (((x & 1023) + ((x >> 8) & ~3)) * 0x15555558 >> 2 * 3) >> 28
3731    */
3732
3733 rtx
3734 expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode,
3735                rtx op0, rtx op1, rtx target, int unsignedp)
3736 {
3737   enum machine_mode compute_mode;
3738   rtx tquotient;
3739   rtx quotient = 0, remainder = 0;
3740   rtx last;
3741   int size;
3742   rtx insn, set;
3743   optab optab1, optab2;
3744   int op1_is_constant, op1_is_pow2 = 0;
3745   int max_cost, extra_cost;
3746   static HOST_WIDE_INT last_div_const = 0;
3747   static HOST_WIDE_INT ext_op1;
3748   bool speed = optimize_insn_for_speed_p ();
3749
3750   op1_is_constant = GET_CODE (op1) == CONST_INT;
3751   if (op1_is_constant)
3752     {
3753       ext_op1 = INTVAL (op1);
3754       if (unsignedp)
3755         ext_op1 &= GET_MODE_MASK (mode);
3756       op1_is_pow2 = ((EXACT_POWER_OF_2_OR_ZERO_P (ext_op1)
3757                      || (! unsignedp && EXACT_POWER_OF_2_OR_ZERO_P (-ext_op1))));
3758     }
3759
3760   /*
3761      This is the structure of expand_divmod:
3762
3763      First comes code to fix up the operands so we can perform the operations
3764      correctly and efficiently.
3765
3766      Second comes a switch statement with code specific for each rounding mode.
3767      For some special operands this code emits all RTL for the desired
3768      operation, for other cases, it generates only a quotient and stores it in
3769      QUOTIENT.  The case for trunc division/remainder might leave quotient = 0,
3770      to indicate that it has not done anything.
3771
3772      Last comes code that finishes the operation.  If QUOTIENT is set and
3773      REM_FLAG is set, the remainder is computed as OP0 - QUOTIENT * OP1.  If
3774      QUOTIENT is not set, it is computed using trunc rounding.
3775
3776      We try to generate special code for division and remainder when OP1 is a
3777      constant.  If |OP1| = 2**n we can use shifts and some other fast
3778      operations.  For other values of OP1, we compute a carefully selected
3779      fixed-point approximation m = 1/OP1, and generate code that multiplies OP0
3780      by m.
3781
3782      In all cases but EXACT_DIV_EXPR, this multiplication requires the upper
3783      half of the product.  Different strategies for generating the product are
3784      implemented in expand_mult_highpart.
3785
3786      If what we actually want is the remainder, we generate that by another
3787      by-constant multiplication and a subtraction.  */
3788
3789   /* We shouldn't be called with OP1 == const1_rtx, but some of the
3790      code below will malfunction if we are, so check here and handle
3791      the special case if so.  */
3792   if (op1 == const1_rtx)
3793     return rem_flag ? const0_rtx : op0;
3794
3795     /* When dividing by -1, we could get an overflow.
3796      negv_optab can handle overflows.  */
3797   if (! unsignedp && op1 == constm1_rtx)
3798     {
3799       if (rem_flag)
3800         return const0_rtx;
3801       return expand_unop (mode, flag_trapv && GET_MODE_CLASS(mode) == MODE_INT
3802                           ? negv_optab : neg_optab, op0, target, 0);
3803     }
3804
3805   if (target
3806       /* Don't use the function value register as a target
3807          since we have to read it as well as write it,
3808          and function-inlining gets confused by this.  */
3809       && ((REG_P (target) && REG_FUNCTION_VALUE_P (target))
3810           /* Don't clobber an operand while doing a multi-step calculation.  */
3811           || ((rem_flag || op1_is_constant)
3812               && (reg_mentioned_p (target, op0)
3813                   || (MEM_P (op0) && MEM_P (target))))
3814           || reg_mentioned_p (target, op1)
3815           || (MEM_P (op1) && MEM_P (target))))
3816     target = 0;
3817
3818   /* Get the mode in which to perform this computation.  Normally it will
3819      be MODE, but sometimes we can't do the desired operation in MODE.
3820      If so, pick a wider mode in which we can do the operation.  Convert
3821      to that mode at the start to avoid repeated conversions.
3822
3823      First see what operations we need.  These depend on the expression
3824      we are evaluating.  (We assume that divxx3 insns exist under the
3825      same conditions that modxx3 insns and that these insns don't normally
3826      fail.  If these assumptions are not correct, we may generate less
3827      efficient code in some cases.)
3828
3829      Then see if we find a mode in which we can open-code that operation
3830      (either a division, modulus, or shift).  Finally, check for the smallest
3831      mode for which we can do the operation with a library call.  */
3832
3833   /* We might want to refine this now that we have division-by-constant
3834      optimization.  Since expand_mult_highpart tries so many variants, it is
3835      not straightforward to generalize this.  Maybe we should make an array
3836      of possible modes in init_expmed?  Save this for GCC 2.7.  */
3837
3838   optab1 = ((op1_is_pow2 && op1 != const0_rtx)
3839             ? (unsignedp ? lshr_optab : ashr_optab)
3840             : (unsignedp ? udiv_optab : sdiv_optab));
3841   optab2 = ((op1_is_pow2 && op1 != const0_rtx)
3842             ? optab1
3843             : (unsignedp ? udivmod_optab : sdivmod_optab));
3844
3845   for (compute_mode = mode; compute_mode != VOIDmode;
3846        compute_mode = GET_MODE_WIDER_MODE (compute_mode))
3847     if (optab_handler (optab1, compute_mode)->insn_code != CODE_FOR_nothing
3848         || optab_handler (optab2, compute_mode)->insn_code != CODE_FOR_nothing)
3849       break;
3850
3851   if (compute_mode == VOIDmode)
3852     for (compute_mode = mode; compute_mode != VOIDmode;
3853          compute_mode = GET_MODE_WIDER_MODE (compute_mode))
3854       if (optab_libfunc (optab1, compute_mode)
3855           || optab_libfunc (optab2, compute_mode))
3856         break;
3857
3858   /* If we still couldn't find a mode, use MODE, but expand_binop will
3859      probably die.  */
3860   if (compute_mode == VOIDmode)
3861     compute_mode = mode;
3862
3863   if (target && GET_MODE (target) == compute_mode)
3864     tquotient = target;
3865   else
3866     tquotient = gen_reg_rtx (compute_mode);
3867
3868   size = GET_MODE_BITSIZE (compute_mode);
3869 #if 0
3870   /* It should be possible to restrict the precision to GET_MODE_BITSIZE
3871      (mode), and thereby get better code when OP1 is a constant.  Do that
3872      later.  It will require going over all usages of SIZE below.  */
3873   size = GET_MODE_BITSIZE (mode);
3874 #endif
3875
3876   /* Only deduct something for a REM if the last divide done was
3877      for a different constant.   Then set the constant of the last
3878      divide.  */
3879   max_cost = unsignedp ? udiv_cost[speed][compute_mode] : sdiv_cost[speed][compute_mode];
3880   if (rem_flag && ! (last_div_const != 0 && op1_is_constant
3881                      && INTVAL (op1) == last_div_const))
3882     max_cost -= mul_cost[speed][compute_mode] + add_cost[speed][compute_mode];
3883
3884   last_div_const = ! rem_flag && op1_is_constant ? INTVAL (op1) : 0;
3885
3886   /* Now convert to the best mode to use.  */
3887   if (compute_mode != mode)
3888     {
3889       op0 = convert_modes (compute_mode, mode, op0, unsignedp);
3890       op1 = convert_modes (compute_mode, mode, op1, unsignedp);
3891
3892       /* convert_modes may have placed op1 into a register, so we
3893          must recompute the following.  */
3894       op1_is_constant = GET_CODE (op1) == CONST_INT;
3895       op1_is_pow2 = (op1_is_constant
3896                      && ((EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
3897                           || (! unsignedp
3898                               && EXACT_POWER_OF_2_OR_ZERO_P (-INTVAL (op1)))))) ;
3899     }
3900
3901   /* If one of the operands is a volatile MEM, copy it into a register.  */
3902
3903   if (MEM_P (op0) && MEM_VOLATILE_P (op0))
3904     op0 = force_reg (compute_mode, op0);
3905   if (MEM_P (op1) && MEM_VOLATILE_P (op1))
3906     op1 = force_reg (compute_mode, op1);
3907
3908   /* If we need the remainder or if OP1 is constant, we need to
3909      put OP0 in a register in case it has any queued subexpressions.  */
3910   if (rem_flag || op1_is_constant)
3911     op0 = force_reg (compute_mode, op0);
3912
3913   last = get_last_insn ();
3914
3915   /* Promote floor rounding to trunc rounding for unsigned operations.  */
3916   if (unsignedp)
3917     {
3918       if (code == FLOOR_DIV_EXPR)
3919         code = TRUNC_DIV_EXPR;
3920       if (code == FLOOR_MOD_EXPR)
3921         code = TRUNC_MOD_EXPR;
3922       if (code == EXACT_DIV_EXPR && op1_is_pow2)
3923         code = TRUNC_DIV_EXPR;
3924     }
3925
3926   if (op1 != const0_rtx)
3927     switch (code)
3928       {
3929       case TRUNC_MOD_EXPR:
3930       case TRUNC_DIV_EXPR:
3931         if (op1_is_constant)
3932           {
3933             if (unsignedp)
3934               {
3935                 unsigned HOST_WIDE_INT mh;
3936                 int pre_shift, post_shift;
3937                 int dummy;
3938                 rtx ml;
3939                 unsigned HOST_WIDE_INT d = (INTVAL (op1)
3940                                             & GET_MODE_MASK (compute_mode));
3941
3942                 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
3943                   {
3944                     pre_shift = floor_log2 (d);
3945                     if (rem_flag)
3946                       {
3947                         remainder
3948                           = expand_binop (compute_mode, and_optab, op0,
3949                                           GEN_INT (((HOST_WIDE_INT) 1 << pre_shift) - 1),
3950                                           remainder, 1,
3951                                           OPTAB_LIB_WIDEN);
3952                         if (remainder)
3953                           return gen_lowpart (mode, remainder);
3954                       }
3955                     quotient = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3956                                              build_int_cst (NULL_TREE,
3957                                                             pre_shift),
3958                                              tquotient, 1);
3959                   }
3960                 else if (size <= HOST_BITS_PER_WIDE_INT)
3961                   {
3962                     if (d >= ((unsigned HOST_WIDE_INT) 1 << (size - 1)))
3963                       {
3964                         /* Most significant bit of divisor is set; emit an scc
3965                            insn.  */
3966                         quotient = emit_store_flag (tquotient, GEU, op0, op1,
3967                                                     compute_mode, 1, 1);
3968                         if (quotient == 0)
3969                           goto fail1;
3970                       }
3971                     else
3972                       {
3973                         /* Find a suitable multiplier and right shift count
3974                            instead of multiplying with D.  */
3975
3976                         mh = choose_multiplier (d, size, size,
3977                                                 &ml, &post_shift, &dummy);
3978
3979                         /* If the suggested multiplier is more than SIZE bits,
3980                            we can do better for even divisors, using an
3981                            initial right shift.  */
3982                         if (mh != 0 && (d & 1) == 0)
3983                           {
3984                             pre_shift = floor_log2 (d & -d);
3985                             mh = choose_multiplier (d >> pre_shift, size,
3986                                                     size - pre_shift,
3987                                                     &ml, &post_shift, &dummy);
3988                             gcc_assert (!mh);
3989                           }
3990                         else
3991                           pre_shift = 0;
3992
3993                         if (mh != 0)
3994                           {
3995                             rtx t1, t2, t3, t4;
3996
3997                             if (post_shift - 1 >= BITS_PER_WORD)
3998                               goto fail1;
3999
4000                             extra_cost
4001                               = (shift_cost[speed][compute_mode][post_shift - 1]
4002                                  + shift_cost[speed][compute_mode][1]
4003                                  + 2 * add_cost[speed][compute_mode]);
4004                             t1 = expand_mult_highpart (compute_mode, op0, ml,
4005                                                        NULL_RTX, 1,
4006                                                        max_cost - extra_cost);
4007                             if (t1 == 0)
4008                               goto fail1;
4009                             t2 = force_operand (gen_rtx_MINUS (compute_mode,
4010                                                                op0, t1),
4011                                                 NULL_RTX);
4012                             t3 = expand_shift
4013                               (RSHIFT_EXPR, compute_mode, t2,
4014                                build_int_cst (NULL_TREE, 1),
4015                                NULL_RTX,1);
4016                             t4 = force_operand (gen_rtx_PLUS (compute_mode,
4017                                                               t1, t3),
4018                                                 NULL_RTX);
4019                             quotient = expand_shift
4020                               (RSHIFT_EXPR, compute_mode, t4,
4021                                build_int_cst (NULL_TREE, post_shift - 1),
4022                                tquotient, 1);
4023                           }
4024                         else
4025                           {
4026                             rtx t1, t2;
4027
4028                             if (pre_shift >= BITS_PER_WORD
4029                                 || post_shift >= BITS_PER_WORD)
4030                               goto fail1;
4031
4032                             t1 = expand_shift
4033                               (RSHIFT_EXPR, compute_mode, op0,
4034                                build_int_cst (NULL_TREE, pre_shift),
4035                                NULL_RTX, 1);
4036                             extra_cost
4037                               = (shift_cost[speed][compute_mode][pre_shift]
4038                                  + shift_cost[speed][compute_mode][post_shift]);
4039                             t2 = expand_mult_highpart (compute_mode, t1, ml,
4040                                                        NULL_RTX, 1,
4041                                                        max_cost - extra_cost);
4042                             if (t2 == 0)
4043                               goto fail1;
4044                             quotient = expand_shift
4045                               (RSHIFT_EXPR, compute_mode, t2,
4046                                build_int_cst (NULL_TREE, post_shift),
4047                                tquotient, 1);
4048                           }
4049                       }
4050                   }
4051                 else            /* Too wide mode to use tricky code */
4052                   break;
4053
4054                 insn = get_last_insn ();
4055                 if (insn != last
4056                     && (set = single_set (insn)) != 0
4057                     && SET_DEST (set) == quotient)
4058                   set_unique_reg_note (insn,
4059                                        REG_EQUAL,
4060                                        gen_rtx_UDIV (compute_mode, op0, op1));
4061               }
4062             else                /* TRUNC_DIV, signed */
4063               {
4064                 unsigned HOST_WIDE_INT ml;
4065                 int lgup, post_shift;
4066                 rtx mlr;
4067                 HOST_WIDE_INT d = INTVAL (op1);
4068                 unsigned HOST_WIDE_INT abs_d;
4069
4070                 /* Since d might be INT_MIN, we have to cast to
4071                    unsigned HOST_WIDE_INT before negating to avoid
4072                    undefined signed overflow.  */
4073                 abs_d = (d >= 0
4074                          ? (unsigned HOST_WIDE_INT) d
4075                          : - (unsigned HOST_WIDE_INT) d);
4076
4077                 /* n rem d = n rem -d */
4078                 if (rem_flag && d < 0)
4079                   {
4080                     d = abs_d;
4081                     op1 = gen_int_mode (abs_d, compute_mode);
4082                   }
4083
4084                 if (d == 1)
4085                   quotient = op0;
4086                 else if (d == -1)
4087                   quotient = expand_unop (compute_mode, neg_optab, op0,
4088                                           tquotient, 0);
4089                 else if (abs_d == (unsigned HOST_WIDE_INT) 1 << (size - 1))
4090                   {
4091                     /* This case is not handled correctly below.  */
4092                     quotient = emit_store_flag (tquotient, EQ, op0, op1,
4093                                                 compute_mode, 1, 1);
4094                     if (quotient == 0)
4095                       goto fail1;
4096                   }
4097                 else if (EXACT_POWER_OF_2_OR_ZERO_P (d)
4098                          && (rem_flag ? smod_pow2_cheap[speed][compute_mode]
4099                                       : sdiv_pow2_cheap[speed][compute_mode])
4100                          /* We assume that cheap metric is true if the
4101                             optab has an expander for this mode.  */
4102                          && ((optab_handler ((rem_flag ? smod_optab
4103                                               : sdiv_optab),
4104                                               compute_mode)->insn_code
4105                               != CODE_FOR_nothing)
4106                              || (optab_handler(sdivmod_optab,
4107                                                compute_mode)
4108                                  ->insn_code != CODE_FOR_nothing)))
4109                   ;
4110                 else if (EXACT_POWER_OF_2_OR_ZERO_P (abs_d))
4111                   {
4112                     if (rem_flag)
4113                       {
4114                         remainder = expand_smod_pow2 (compute_mode, op0, d);
4115                         if (remainder)
4116                           return gen_lowpart (mode, remainder);
4117                       }
4118
4119                     if (sdiv_pow2_cheap[speed][compute_mode]
4120                         && ((optab_handler (sdiv_optab, compute_mode)->insn_code
4121                              != CODE_FOR_nothing)
4122                             || (optab_handler (sdivmod_optab, compute_mode)->insn_code
4123                                 != CODE_FOR_nothing)))
4124                       quotient = expand_divmod (0, TRUNC_DIV_EXPR,
4125                                                 compute_mode, op0,
4126                                                 gen_int_mode (abs_d,
4127                                                               compute_mode),
4128                                                 NULL_RTX, 0);
4129                     else
4130                       quotient = expand_sdiv_pow2 (compute_mode, op0, abs_d);
4131
4132                     /* We have computed OP0 / abs(OP1).  If OP1 is negative,
4133                        negate the quotient.  */
4134                     if (d < 0)
4135                       {
4136                         insn = get_last_insn ();
4137                         if (insn != last
4138                             && (set = single_set (insn)) != 0
4139                             && SET_DEST (set) == quotient
4140                             && abs_d < ((unsigned HOST_WIDE_INT) 1
4141                                         << (HOST_BITS_PER_WIDE_INT - 1)))
4142                           set_unique_reg_note (insn,
4143                                                REG_EQUAL,
4144                                                gen_rtx_DIV (compute_mode,
4145                                                             op0,
4146                                                             GEN_INT
4147                                                             (trunc_int_for_mode
4148                                                              (abs_d,
4149                                                               compute_mode))));
4150
4151                         quotient = expand_unop (compute_mode, neg_optab,
4152                                                 quotient, quotient, 0);
4153                       }
4154                   }
4155                 else if (size <= HOST_BITS_PER_WIDE_INT)
4156                   {
4157                     choose_multiplier (abs_d, size, size - 1,
4158                                        &mlr, &post_shift, &lgup);
4159                     ml = (unsigned HOST_WIDE_INT) INTVAL (mlr);
4160                     if (ml < (unsigned HOST_WIDE_INT) 1 << (size - 1))
4161                       {
4162                         rtx t1, t2, t3;
4163
4164                         if (post_shift >= BITS_PER_WORD
4165                             || size - 1 >= BITS_PER_WORD)
4166                           goto fail1;
4167
4168                         extra_cost = (shift_cost[speed][compute_mode][post_shift]
4169                                       + shift_cost[speed][compute_mode][size - 1]
4170                                       + add_cost[speed][compute_mode]);
4171                         t1 = expand_mult_highpart (compute_mode, op0, mlr,
4172                                                    NULL_RTX, 0,
4173                                                    max_cost - extra_cost);
4174                         if (t1 == 0)
4175                           goto fail1;
4176                         t2 = expand_shift
4177                           (RSHIFT_EXPR, compute_mode, t1,
4178                            build_int_cst (NULL_TREE, post_shift),
4179                            NULL_RTX, 0);
4180                         t3 = expand_shift
4181                           (RSHIFT_EXPR, compute_mode, op0,
4182                            build_int_cst (NULL_TREE, size - 1),
4183                            NULL_RTX, 0);
4184                         if (d < 0)
4185                           quotient
4186                             = force_operand (gen_rtx_MINUS (compute_mode,
4187                                                             t3, t2),
4188                                              tquotient);
4189                         else
4190                           quotient
4191                             = force_operand (gen_rtx_MINUS (compute_mode,
4192                                                             t2, t3),
4193                                              tquotient);
4194                       }
4195                     else
4196                       {
4197                         rtx t1, t2, t3, t4;
4198
4199                         if (post_shift >= BITS_PER_WORD
4200                             || size - 1 >= BITS_PER_WORD)
4201                           goto fail1;
4202
4203                         ml |= (~(unsigned HOST_WIDE_INT) 0) << (size - 1);
4204                         mlr = gen_int_mode (ml, compute_mode);
4205                         extra_cost = (shift_cost[speed][compute_mode][post_shift]
4206                                       + shift_cost[speed][compute_mode][size - 1]
4207                                       + 2 * add_cost[speed][compute_mode]);
4208                         t1 = expand_mult_highpart (compute_mode, op0, mlr,
4209                                                    NULL_RTX, 0,
4210                                                    max_cost - extra_cost);
4211                         if (t1 == 0)
4212                           goto fail1;
4213                         t2 = force_operand (gen_rtx_PLUS (compute_mode,
4214                                                           t1, op0),
4215                                             NULL_RTX);
4216                         t3 = expand_shift
4217                           (RSHIFT_EXPR, compute_mode, t2,
4218                            build_int_cst (NULL_TREE, post_shift),
4219                            NULL_RTX, 0);
4220                         t4 = expand_shift
4221                           (RSHIFT_EXPR, compute_mode, op0,
4222                            build_int_cst (NULL_TREE, size - 1),
4223                            NULL_RTX, 0);
4224                         if (d < 0)
4225                           quotient
4226                             = force_operand (gen_rtx_MINUS (compute_mode,
4227                                                             t4, t3),
4228                                              tquotient);
4229                         else
4230                           quotient
4231                             = force_operand (gen_rtx_MINUS (compute_mode,
4232                                                             t3, t4),
4233                                              tquotient);
4234                       }
4235                   }
4236                 else            /* Too wide mode to use tricky code */
4237                   break;
4238
4239                 insn = get_last_insn ();
4240                 if (insn != last
4241                     && (set = single_set (insn)) != 0
4242                     && SET_DEST (set) == quotient)
4243                   set_unique_reg_note (insn,
4244                                        REG_EQUAL,
4245                                        gen_rtx_DIV (compute_mode, op0, op1));
4246               }
4247             break;
4248           }
4249       fail1:
4250         delete_insns_since (last);
4251         break;
4252
4253       case FLOOR_DIV_EXPR:
4254       case FLOOR_MOD_EXPR:
4255       /* We will come here only for signed operations.  */
4256         if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4257           {
4258             unsigned HOST_WIDE_INT mh;
4259             int pre_shift, lgup, post_shift;
4260             HOST_WIDE_INT d = INTVAL (op1);
4261             rtx ml;
4262
4263             if (d > 0)
4264               {
4265                 /* We could just as easily deal with negative constants here,
4266                    but it does not seem worth the trouble for GCC 2.6.  */
4267                 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4268                   {
4269                     pre_shift = floor_log2 (d);
4270                     if (rem_flag)
4271                       {
4272                         remainder = expand_binop (compute_mode, and_optab, op0,
4273                                                   GEN_INT (((HOST_WIDE_INT) 1 << pre_shift) - 1),
4274                                                   remainder, 0, OPTAB_LIB_WIDEN);
4275                         if (remainder)
4276                           return gen_lowpart (mode, remainder);
4277                       }
4278                     quotient = expand_shift
4279                       (RSHIFT_EXPR, compute_mode, op0,
4280                        build_int_cst (NULL_TREE, pre_shift),
4281                        tquotient, 0);
4282                   }
4283                 else
4284                   {
4285                     rtx t1, t2, t3, t4;
4286
4287                     mh = choose_multiplier (d, size, size - 1,
4288                                             &ml, &post_shift, &lgup);
4289                     gcc_assert (!mh);
4290
4291                     if (post_shift < BITS_PER_WORD
4292                         && size - 1 < BITS_PER_WORD)
4293                       {
4294                         t1 = expand_shift
4295                           (RSHIFT_EXPR, compute_mode, op0,
4296                            build_int_cst (NULL_TREE, size - 1),
4297                            NULL_RTX, 0);
4298                         t2 = expand_binop (compute_mode, xor_optab, op0, t1,
4299                                            NULL_RTX, 0, OPTAB_WIDEN);
4300                         extra_cost = (shift_cost[speed][compute_mode][post_shift]
4301                                       + shift_cost[speed][compute_mode][size - 1]
4302                                       + 2 * add_cost[speed][compute_mode]);
4303                         t3 = expand_mult_highpart (compute_mode, t2, ml,
4304                                                    NULL_RTX, 1,
4305                                                    max_cost - extra_cost);
4306                         if (t3 != 0)
4307                           {
4308                             t4 = expand_shift
4309                               (RSHIFT_EXPR, compute_mode, t3,
4310                                build_int_cst (NULL_TREE, post_shift),
4311                                NULL_RTX, 1);
4312                             quotient = expand_binop (compute_mode, xor_optab,
4313                                                      t4, t1, tquotient, 0,
4314                                                      OPTAB_WIDEN);
4315                           }
4316                       }
4317                   }
4318               }
4319             else
4320               {
4321                 rtx nsign, t1, t2, t3, t4;
4322                 t1 = force_operand (gen_rtx_PLUS (compute_mode,
4323                                                   op0, constm1_rtx), NULL_RTX);
4324                 t2 = expand_binop (compute_mode, ior_optab, op0, t1, NULL_RTX,
4325                                    0, OPTAB_WIDEN);
4326                 nsign = expand_shift
4327                   (RSHIFT_EXPR, compute_mode, t2,
4328                    build_int_cst (NULL_TREE, size - 1),
4329                    NULL_RTX, 0);
4330                 t3 = force_operand (gen_rtx_MINUS (compute_mode, t1, nsign),
4331                                     NULL_RTX);
4332                 t4 = expand_divmod (0, TRUNC_DIV_EXPR, compute_mode, t3, op1,
4333                                     NULL_RTX, 0);
4334                 if (t4)
4335                   {
4336                     rtx t5;
4337                     t5 = expand_unop (compute_mode, one_cmpl_optab, nsign,
4338                                       NULL_RTX, 0);
4339                     quotient = force_operand (gen_rtx_PLUS (compute_mode,
4340                                                             t4, t5),
4341                                               tquotient);
4342                   }
4343               }
4344           }
4345
4346         if (quotient != 0)
4347           break;
4348         delete_insns_since (last);
4349
4350         /* Try using an instruction that produces both the quotient and
4351            remainder, using truncation.  We can easily compensate the quotient
4352            or remainder to get floor rounding, once we have the remainder.
4353            Notice that we compute also the final remainder value here,
4354            and return the result right away.  */
4355         if (target == 0 || GET_MODE (target) != compute_mode)
4356           target = gen_reg_rtx (compute_mode);
4357
4358         if (rem_flag)
4359           {
4360             remainder
4361               = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4362             quotient = gen_reg_rtx (compute_mode);
4363           }
4364         else
4365           {
4366             quotient
4367               = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4368             remainder = gen_reg_rtx (compute_mode);
4369           }
4370
4371         if (expand_twoval_binop (sdivmod_optab, op0, op1,
4372                                  quotient, remainder, 0))
4373           {
4374             /* This could be computed with a branch-less sequence.
4375                Save that for later.  */
4376             rtx tem;
4377             rtx label = gen_label_rtx ();
4378             do_cmp_and_jump (remainder, const0_rtx, EQ, compute_mode, label);
4379             tem = expand_binop (compute_mode, xor_optab, op0, op1,
4380                                 NULL_RTX, 0, OPTAB_WIDEN);
4381             do_cmp_and_jump (tem, const0_rtx, GE, compute_mode, label);
4382             expand_dec (quotient, const1_rtx);
4383             expand_inc (remainder, op1);
4384             emit_label (label);
4385             return gen_lowpart (mode, rem_flag ? remainder : quotient);
4386           }
4387
4388         /* No luck with division elimination or divmod.  Have to do it
4389            by conditionally adjusting op0 *and* the result.  */
4390         {
4391           rtx label1, label2, label3, label4, label5;
4392           rtx adjusted_op0;
4393           rtx tem;
4394
4395           quotient = gen_reg_rtx (compute_mode);
4396           adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4397           label1 = gen_label_rtx ();
4398           label2 = gen_label_rtx ();
4399           label3 = gen_label_rtx ();
4400           label4 = gen_label_rtx ();
4401           label5 = gen_label_rtx ();
4402           do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4403           do_cmp_and_jump (adjusted_op0, const0_rtx, LT, compute_mode, label1);
4404           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4405                               quotient, 0, OPTAB_LIB_WIDEN);
4406           if (tem != quotient)
4407             emit_move_insn (quotient, tem);
4408           emit_jump_insn (gen_jump (label5));
4409           emit_barrier ();
4410           emit_label (label1);
4411           expand_inc (adjusted_op0, const1_rtx);
4412           emit_jump_insn (gen_jump (label4));
4413           emit_barrier ();
4414           emit_label (label2);
4415           do_cmp_and_jump (adjusted_op0, const0_rtx, GT, compute_mode, label3);
4416           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4417                               quotient, 0, OPTAB_LIB_WIDEN);
4418           if (tem != quotient)
4419             emit_move_insn (quotient, tem);
4420           emit_jump_insn (gen_jump (label5));
4421           emit_barrier ();
4422           emit_label (label3);
4423           expand_dec (adjusted_op0, const1_rtx);
4424           emit_label (label4);
4425           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4426                               quotient, 0, OPTAB_LIB_WIDEN);
4427           if (tem != quotient)
4428             emit_move_insn (quotient, tem);
4429           expand_dec (quotient, const1_rtx);
4430           emit_label (label5);
4431         }
4432         break;
4433
4434       case CEIL_DIV_EXPR:
4435       case CEIL_MOD_EXPR:
4436         if (unsignedp)
4437           {
4438             if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1)))
4439               {
4440                 rtx t1, t2, t3;
4441                 unsigned HOST_WIDE_INT d = INTVAL (op1);
4442                 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4443                                    build_int_cst (NULL_TREE, floor_log2 (d)),
4444                                    tquotient, 1);
4445                 t2 = expand_binop (compute_mode, and_optab, op0,
4446                                    GEN_INT (d - 1),
4447                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4448                 t3 = gen_reg_rtx (compute_mode);
4449                 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4450                                       compute_mode, 1, 1);
4451                 if (t3 == 0)
4452                   {
4453                     rtx lab;
4454                     lab = gen_label_rtx ();
4455                     do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4456                     expand_inc (t1, const1_rtx);
4457                     emit_label (lab);
4458                     quotient = t1;
4459                   }
4460                 else
4461                   quotient = force_operand (gen_rtx_PLUS (compute_mode,
4462                                                           t1, t3),
4463                                             tquotient);
4464                 break;
4465               }
4466
4467             /* Try using an instruction that produces both the quotient and
4468                remainder, using truncation.  We can easily compensate the
4469                quotient or remainder to get ceiling rounding, once we have the
4470                remainder.  Notice that we compute also the final remainder
4471                value here, and return the result right away.  */
4472             if (target == 0 || GET_MODE (target) != compute_mode)
4473               target = gen_reg_rtx (compute_mode);
4474
4475             if (rem_flag)
4476               {
4477                 remainder = (REG_P (target)
4478                              ? target : gen_reg_rtx (compute_mode));
4479                 quotient = gen_reg_rtx (compute_mode);
4480               }
4481             else
4482               {
4483                 quotient = (REG_P (target)
4484                             ? target : gen_reg_rtx (compute_mode));
4485                 remainder = gen_reg_rtx (compute_mode);
4486               }
4487
4488             if (expand_twoval_binop (udivmod_optab, op0, op1, quotient,
4489                                      remainder, 1))
4490               {
4491                 /* This could be computed with a branch-less sequence.
4492                    Save that for later.  */
4493                 rtx label = gen_label_rtx ();
4494                 do_cmp_and_jump (remainder, const0_rtx, EQ,
4495                                  compute_mode, label);
4496                 expand_inc (quotient, const1_rtx);
4497                 expand_dec (remainder, op1);
4498                 emit_label (label);
4499                 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4500               }
4501
4502             /* No luck with division elimination or divmod.  Have to do it
4503                by conditionally adjusting op0 *and* the result.  */
4504             {
4505               rtx label1, label2;
4506               rtx adjusted_op0, tem;
4507
4508               quotient = gen_reg_rtx (compute_mode);
4509               adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4510               label1 = gen_label_rtx ();
4511               label2 = gen_label_rtx ();
4512               do_cmp_and_jump (adjusted_op0, const0_rtx, NE,
4513                                compute_mode, label1);
4514               emit_move_insn  (quotient, const0_rtx);
4515               emit_jump_insn (gen_jump (label2));
4516               emit_barrier ();
4517               emit_label (label1);
4518               expand_dec (adjusted_op0, const1_rtx);
4519               tem = expand_binop (compute_mode, udiv_optab, adjusted_op0, op1,
4520                                   quotient, 1, OPTAB_LIB_WIDEN);
4521               if (tem != quotient)
4522                 emit_move_insn (quotient, tem);
4523               expand_inc (quotient, const1_rtx);
4524               emit_label (label2);
4525             }
4526           }
4527         else /* signed */
4528           {
4529             if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4530                 && INTVAL (op1) >= 0)
4531               {
4532                 /* This is extremely similar to the code for the unsigned case
4533                    above.  For 2.7 we should merge these variants, but for
4534                    2.6.1 I don't want to touch the code for unsigned since that
4535                    get used in C.  The signed case will only be used by other
4536                    languages (Ada).  */
4537
4538                 rtx t1, t2, t3;
4539                 unsigned HOST_WIDE_INT d = INTVAL (op1);
4540                 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4541                                    build_int_cst (NULL_TREE, floor_log2 (d)),
4542                                    tquotient, 0);
4543                 t2 = expand_binop (compute_mode, and_optab, op0,
4544                                    GEN_INT (d - 1),
4545                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4546                 t3 = gen_reg_rtx (compute_mode);
4547                 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4548                                       compute_mode, 1, 1);
4549                 if (t3 == 0)
4550                   {
4551                     rtx lab;
4552                     lab = gen_label_rtx ();
4553                     do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4554                     expand_inc (t1, const1_rtx);
4555                     emit_label (lab);
4556                     quotient = t1;
4557                   }
4558                 else
4559                   quotient = force_operand (gen_rtx_PLUS (compute_mode,
4560                                                           t1, t3),
4561                                             tquotient);
4562                 break;
4563               }
4564
4565             /* Try using an instruction that produces both the quotient and
4566                remainder, using truncation.  We can easily compensate the
4567                quotient or remainder to get ceiling rounding, once we have the
4568                remainder.  Notice that we compute also the final remainder
4569                value here, and return the result right away.  */
4570             if (target == 0 || GET_MODE (target) != compute_mode)
4571               target = gen_reg_rtx (compute_mode);
4572             if (rem_flag)
4573               {
4574                 remainder= (REG_P (target)
4575                             ? target : gen_reg_rtx (compute_mode));
4576                 quotient = gen_reg_rtx (compute_mode);
4577               }
4578             else
4579               {
4580                 quotient = (REG_P (target)
4581                             ? target : gen_reg_rtx (compute_mode));
4582                 remainder = gen_reg_rtx (compute_mode);
4583               }
4584
4585             if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient,
4586                                      remainder, 0))
4587               {
4588                 /* This could be computed with a branch-less sequence.
4589                    Save that for later.  */
4590                 rtx tem;
4591                 rtx label = gen_label_rtx ();
4592                 do_cmp_and_jump (remainder, const0_rtx, EQ,
4593                                  compute_mode, label);
4594                 tem = expand_binop (compute_mode, xor_optab, op0, op1,
4595                                     NULL_RTX, 0, OPTAB_WIDEN);
4596                 do_cmp_and_jump (tem, const0_rtx, LT, compute_mode, label);
4597                 expand_inc (quotient, const1_rtx);
4598                 expand_dec (remainder, op1);
4599                 emit_label (label);
4600                 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4601               }
4602
4603             /* No luck with division elimination or divmod.  Have to do it
4604                by conditionally adjusting op0 *and* the result.  */
4605             {
4606               rtx label1, label2, label3, label4, label5;
4607               rtx adjusted_op0;
4608               rtx tem;
4609
4610               quotient = gen_reg_rtx (compute_mode);
4611               adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4612               label1 = gen_label_rtx ();
4613               label2 = gen_label_rtx ();
4614               label3 = gen_label_rtx ();
4615               label4 = gen_label_rtx ();
4616               label5 = gen_label_rtx ();
4617               do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4618               do_cmp_and_jump (adjusted_op0, const0_rtx, GT,
4619                                compute_mode, label1);
4620               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4621                                   quotient, 0, OPTAB_LIB_WIDEN);
4622               if (tem != quotient)
4623                 emit_move_insn (quotient, tem);
4624               emit_jump_insn (gen_jump (label5));
4625               emit_barrier ();
4626               emit_label (label1);
4627               expand_dec (adjusted_op0, const1_rtx);
4628               emit_jump_insn (gen_jump (label4));
4629               emit_barrier ();
4630               emit_label (label2);
4631               do_cmp_and_jump (adjusted_op0, const0_rtx, LT,
4632                                compute_mode, label3);
4633               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4634                                   quotient, 0, OPTAB_LIB_WIDEN);
4635               if (tem != quotient)
4636                 emit_move_insn (quotient, tem);
4637               emit_jump_insn (gen_jump (label5));
4638               emit_barrier ();
4639               emit_label (label3);
4640               expand_inc (adjusted_op0, const1_rtx);
4641               emit_label (label4);
4642               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4643                                   quotient, 0, OPTAB_LIB_WIDEN);
4644               if (tem != quotient)
4645                 emit_move_insn (quotient, tem);
4646               expand_inc (quotient, const1_rtx);
4647               emit_label (label5);
4648             }
4649           }
4650         break;
4651
4652       case EXACT_DIV_EXPR:
4653         if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4654           {
4655             HOST_WIDE_INT d = INTVAL (op1);
4656             unsigned HOST_WIDE_INT ml;
4657             int pre_shift;
4658             rtx t1;
4659
4660             pre_shift = floor_log2 (d & -d);
4661             ml = invert_mod2n (d >> pre_shift, size);
4662             t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4663                                build_int_cst (NULL_TREE, pre_shift),
4664                                NULL_RTX, unsignedp);
4665             quotient = expand_mult (compute_mode, t1,
4666                                     gen_int_mode (ml, compute_mode),
4667                                     NULL_RTX, 1);
4668
4669             insn = get_last_insn ();
4670             set_unique_reg_note (insn,
4671                                  REG_EQUAL,
4672                                  gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
4673                                                  compute_mode,
4674                                                  op0, op1));
4675           }
4676         break;
4677
4678       case ROUND_DIV_EXPR:
4679       case ROUND_MOD_EXPR:
4680         if (unsignedp)
4681           {
4682             rtx tem;
4683             rtx label;
4684             label = gen_label_rtx ();
4685             quotient = gen_reg_rtx (compute_mode);
4686             remainder = gen_reg_rtx (compute_mode);
4687             if (expand_twoval_binop (udivmod_optab, op0, op1, quotient, remainder, 1) == 0)
4688               {
4689                 rtx tem;
4690                 quotient = expand_binop (compute_mode, udiv_optab, op0, op1,
4691                                          quotient, 1, OPTAB_LIB_WIDEN);
4692                 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 1);
4693                 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4694                                           remainder, 1, OPTAB_LIB_WIDEN);
4695               }
4696             tem = plus_constant (op1, -1);
4697             tem = expand_shift (RSHIFT_EXPR, compute_mode, tem,
4698                                 build_int_cst (NULL_TREE, 1),
4699                                 NULL_RTX, 1);
4700             do_cmp_and_jump (remainder, tem, LEU, compute_mode, label);
4701             expand_inc (quotient, const1_rtx);
4702             expand_dec (remainder, op1);
4703             emit_label (label);
4704           }
4705         else
4706           {
4707             rtx abs_rem, abs_op1, tem, mask;
4708             rtx label;
4709             label = gen_label_rtx ();
4710             quotient = gen_reg_rtx (compute_mode);
4711             remainder = gen_reg_rtx (compute_mode);
4712             if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient, remainder, 0) == 0)
4713               {
4714                 rtx tem;
4715                 quotient = expand_binop (compute_mode, sdiv_optab, op0, op1,
4716                                          quotient, 0, OPTAB_LIB_WIDEN);
4717                 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 0);
4718                 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4719                                           remainder, 0, OPTAB_LIB_WIDEN);
4720               }
4721             abs_rem = expand_abs (compute_mode, remainder, NULL_RTX, 1, 0);
4722             abs_op1 = expand_abs (compute_mode, op1, NULL_RTX, 1, 0);
4723             tem = expand_shift (LSHIFT_EXPR, compute_mode, abs_rem,
4724                                 build_int_cst (NULL_TREE, 1),
4725                                 NULL_RTX, 1);
4726             do_cmp_and_jump (tem, abs_op1, LTU, compute_mode, label);
4727             tem = expand_binop (compute_mode, xor_optab, op0, op1,
4728                                 NULL_RTX, 0, OPTAB_WIDEN);
4729             mask = expand_shift (RSHIFT_EXPR, compute_mode, tem,
4730                                  build_int_cst (NULL_TREE, size - 1),
4731                                  NULL_RTX, 0);
4732             tem = expand_binop (compute_mode, xor_optab, mask, const1_rtx,
4733                                 NULL_RTX, 0, OPTAB_WIDEN);
4734             tem = expand_binop (compute_mode, sub_optab, tem, mask,
4735                                 NULL_RTX, 0, OPTAB_WIDEN);
4736             expand_inc (quotient, tem);
4737             tem = expand_binop (compute_mode, xor_optab, mask, op1,
4738                                 NULL_RTX, 0, OPTAB_WIDEN);
4739             tem = expand_binop (compute_mode, sub_optab, tem, mask,
4740                                 NULL_RTX, 0, OPTAB_WIDEN);
4741             expand_dec (remainder, tem);
4742             emit_label (label);
4743           }
4744         return gen_lowpart (mode, rem_flag ? remainder : quotient);
4745
4746       default:
4747         gcc_unreachable ();
4748       }
4749
4750   if (quotient == 0)
4751     {
4752       if (target && GET_MODE (target) != compute_mode)
4753         target = 0;
4754
4755       if (rem_flag)
4756         {
4757           /* Try to produce the remainder without producing the quotient.
4758              If we seem to have a divmod pattern that does not require widening,
4759              don't try widening here.  We should really have a WIDEN argument
4760              to expand_twoval_binop, since what we'd really like to do here is
4761              1) try a mod insn in compute_mode
4762              2) try a divmod insn in compute_mode
4763              3) try a div insn in compute_mode and multiply-subtract to get
4764                 remainder
4765              4) try the same things with widening allowed.  */
4766           remainder
4767             = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4768                                  op0, op1, target,
4769                                  unsignedp,
4770                                  ((optab_handler (optab2, compute_mode)->insn_code
4771                                    != CODE_FOR_nothing)
4772                                   ? OPTAB_DIRECT : OPTAB_WIDEN));
4773           if (remainder == 0)
4774             {
4775               /* No luck there.  Can we do remainder and divide at once
4776                  without a library call?  */
4777               remainder = gen_reg_rtx (compute_mode);
4778               if (! expand_twoval_binop ((unsignedp
4779                                           ? udivmod_optab
4780                                           : sdivmod_optab),
4781                                          op0, op1,
4782                                          NULL_RTX, remainder, unsignedp))
4783                 remainder = 0;
4784             }
4785
4786           if (remainder)
4787             return gen_lowpart (mode, remainder);
4788         }
4789
4790       /* Produce the quotient.  Try a quotient insn, but not a library call.
4791          If we have a divmod in this mode, use it in preference to widening
4792          the div (for this test we assume it will not fail). Note that optab2
4793          is set to the one of the two optabs that the call below will use.  */
4794       quotient
4795         = sign_expand_binop (compute_mode, udiv_optab, sdiv_optab,
4796                              op0, op1, rem_flag ? NULL_RTX : target,
4797                              unsignedp,
4798                              ((optab_handler (optab2, compute_mode)->insn_code
4799                                != CODE_FOR_nothing)
4800                               ? OPTAB_DIRECT : OPTAB_WIDEN));
4801
4802       if (quotient == 0)
4803         {
4804           /* No luck there.  Try a quotient-and-remainder insn,
4805              keeping the quotient alone.  */
4806           quotient = gen_reg_rtx (compute_mode);
4807           if (! expand_twoval_binop (unsignedp ? udivmod_optab : sdivmod_optab,
4808                                      op0, op1,
4809                                      quotient, NULL_RTX, unsignedp))
4810             {
4811               quotient = 0;
4812               if (! rem_flag)
4813                 /* Still no luck.  If we are not computing the remainder,
4814                    use a library call for the quotient.  */
4815                 quotient = sign_expand_binop (compute_mode,
4816                                               udiv_optab, sdiv_optab,
4817                                               op0, op1, target,
4818                                               unsignedp, OPTAB_LIB_WIDEN);
4819             }
4820         }
4821     }
4822
4823   if (rem_flag)
4824     {
4825       if (target && GET_MODE (target) != compute_mode)
4826         target = 0;
4827
4828       if (quotient == 0)
4829         {
4830           /* No divide instruction either.  Use library for remainder.  */
4831           remainder = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4832                                          op0, op1, target,
4833                                          unsignedp, OPTAB_LIB_WIDEN);
4834           /* No remainder function.  Try a quotient-and-remainder
4835              function, keeping the remainder.  */
4836           if (!remainder)
4837             {
4838               remainder = gen_reg_rtx (compute_mode);
4839               if (!expand_twoval_binop_libfunc
4840                   (unsignedp ? udivmod_optab : sdivmod_optab,
4841                    op0, op1,
4842                    NULL_RTX, remainder,
4843                    unsignedp ? UMOD : MOD))
4844                 remainder = NULL_RTX;
4845             }
4846         }
4847       else
4848         {
4849           /* We divided.  Now finish doing X - Y * (X / Y).  */
4850           remainder = expand_mult (compute_mode, quotient, op1,
4851                                    NULL_RTX, unsignedp);
4852           remainder = expand_binop (compute_mode, sub_optab, op0,
4853                                     remainder, target, unsignedp,
4854                                     OPTAB_LIB_WIDEN);
4855         }
4856     }
4857
4858   return gen_lowpart (mode, rem_flag ? remainder : quotient);
4859 }
4860 \f
4861 /* Return a tree node with data type TYPE, describing the value of X.
4862    Usually this is an VAR_DECL, if there is no obvious better choice.
4863    X may be an expression, however we only support those expressions
4864    generated by loop.c.  */
4865
4866 tree
4867 make_tree (tree type, rtx x)
4868 {
4869   tree t;
4870
4871   switch (GET_CODE (x))
4872     {
4873     case CONST_INT:
4874       {
4875         HOST_WIDE_INT hi = 0;
4876
4877         if (INTVAL (x) < 0
4878             && !(TYPE_UNSIGNED (type)
4879                  && (GET_MODE_BITSIZE (TYPE_MODE (type))
4880                      < HOST_BITS_PER_WIDE_INT)))
4881           hi = -1;
4882
4883         t = build_int_cst_wide (type, INTVAL (x), hi);
4884
4885         return t;
4886       }
4887
4888     case CONST_DOUBLE:
4889       if (GET_MODE (x) == VOIDmode)
4890         t = build_int_cst_wide (type,
4891                                 CONST_DOUBLE_LOW (x), CONST_DOUBLE_HIGH (x));
4892       else
4893         {
4894           REAL_VALUE_TYPE d;
4895
4896           REAL_VALUE_FROM_CONST_DOUBLE (d, x);
4897           t = build_real (type, d);
4898         }
4899
4900       return t;
4901
4902     case CONST_VECTOR:
4903       {
4904         int units = CONST_VECTOR_NUNITS (x);
4905         tree itype = TREE_TYPE (type);
4906         tree t = NULL_TREE;
4907         int i;
4908
4909
4910         /* Build a tree with vector elements.  */
4911         for (i = units - 1; i >= 0; --i)
4912           {
4913             rtx elt = CONST_VECTOR_ELT (x, i);
4914             t = tree_cons (NULL_TREE, make_tree (itype, elt), t);
4915           }
4916
4917         return build_vector (type, t);
4918       }
4919
4920     case PLUS:
4921       return fold_build2 (PLUS_EXPR, type, make_tree (type, XEXP (x, 0)),
4922                           make_tree (type, XEXP (x, 1)));
4923
4924     case MINUS:
4925       return fold_build2 (MINUS_EXPR, type, make_tree (type, XEXP (x, 0)),
4926                           make_tree (type, XEXP (x, 1)));
4927
4928     case NEG:
4929       return fold_build1 (NEGATE_EXPR, type, make_tree (type, XEXP (x, 0)));
4930
4931     case MULT:
4932       return fold_build2 (MULT_EXPR, type, make_tree (type, XEXP (x, 0)),
4933                           make_tree (type, XEXP (x, 1)));
4934
4935     case ASHIFT:
4936       return fold_build2 (LSHIFT_EXPR, type, make_tree (type, XEXP (x, 0)),
4937                           make_tree (type, XEXP (x, 1)));
4938
4939     case LSHIFTRT:
4940       t = unsigned_type_for (type);
4941       return fold_convert (type, build2 (RSHIFT_EXPR, t,
4942                                          make_tree (t, XEXP (x, 0)),
4943                                          make_tree (type, XEXP (x, 1))));
4944
4945     case ASHIFTRT:
4946       t = signed_type_for (type);
4947       return fold_convert (type, build2 (RSHIFT_EXPR, t,
4948                                          make_tree (t, XEXP (x, 0)),
4949                                          make_tree (type, XEXP (x, 1))));
4950
4951     case DIV:
4952       if (TREE_CODE (type) != REAL_TYPE)
4953         t = signed_type_for (type);
4954       else
4955         t = type;
4956
4957       return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
4958                                          make_tree (t, XEXP (x, 0)),
4959                                          make_tree (t, XEXP (x, 1))));
4960     case UDIV:
4961       t = unsigned_type_for (type);
4962       return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
4963                                          make_tree (t, XEXP (x, 0)),
4964                                          make_tree (t, XEXP (x, 1))));
4965
4966     case SIGN_EXTEND:
4967     case ZERO_EXTEND:
4968       t = lang_hooks.types.type_for_mode (GET_MODE (XEXP (x, 0)),
4969                                           GET_CODE (x) == ZERO_EXTEND);
4970       return fold_convert (type, make_tree (t, XEXP (x, 0)));
4971
4972     case CONST:
4973       return make_tree (type, XEXP (x, 0));
4974
4975     case SYMBOL_REF:
4976       t = SYMBOL_REF_DECL (x);
4977       if (t)
4978         return fold_convert (type, build_fold_addr_expr (t));
4979       /* else fall through.  */
4980
4981     default:
4982       t = build_decl (VAR_DECL, NULL_TREE, type);
4983
4984       /* If TYPE is a POINTER_TYPE, X might be Pmode with TYPE_MODE being
4985          ptr_mode.  So convert.  */
4986       if (POINTER_TYPE_P (type))
4987         x = convert_memory_address (TYPE_MODE (type), x);
4988
4989       /* Note that we do *not* use SET_DECL_RTL here, because we do not
4990          want set_decl_rtl to go adjusting REG_ATTRS for this temporary.  */
4991       t->decl_with_rtl.rtl = x;
4992
4993       return t;
4994     }
4995 }
4996 \f
4997 /* Compute the logical-and of OP0 and OP1, storing it in TARGET
4998    and returning TARGET.
4999
5000    If TARGET is 0, a pseudo-register or constant is returned.  */
5001
5002 rtx
5003 expand_and (enum machine_mode mode, rtx op0, rtx op1, rtx target)
5004 {
5005   rtx tem = 0;
5006
5007   if (GET_MODE (op0) == VOIDmode && GET_MODE (op1) == VOIDmode)
5008     tem = simplify_binary_operation (AND, mode, op0, op1);
5009   if (tem == 0)
5010     tem = expand_binop (mode, and_optab, op0, op1, target, 0, OPTAB_LIB_WIDEN);
5011
5012   if (target == 0)
5013     target = tem;
5014   else if (tem != target)
5015     emit_move_insn (target, tem);
5016   return target;
5017 }
5018 \f
5019 /* Helper function for emit_store_flag.  */
5020 static rtx
5021 emit_store_flag_1 (rtx target, rtx subtarget, enum machine_mode mode,
5022                    int normalizep)
5023 {
5024   rtx op0;
5025   enum machine_mode target_mode = GET_MODE (target);
5026
5027   /* If we are converting to a wider mode, first convert to
5028      TARGET_MODE, then normalize.  This produces better combining
5029      opportunities on machines that have a SIGN_EXTRACT when we are
5030      testing a single bit.  This mostly benefits the 68k.
5031
5032      If STORE_FLAG_VALUE does not have the sign bit set when
5033      interpreted in MODE, we can do this conversion as unsigned, which
5034      is usually more efficient.  */
5035   if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (mode))
5036     {
5037       convert_move (target, subtarget,
5038                     (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
5039                     && 0 == (STORE_FLAG_VALUE
5040                              & ((HOST_WIDE_INT) 1
5041                                 << (GET_MODE_BITSIZE (mode) -1))));
5042       op0 = target;
5043       mode = target_mode;
5044     }
5045   else
5046     op0 = subtarget;
5047
5048   /* If we want to keep subexpressions around, don't reuse our last
5049      target.  */
5050   if (optimize)
5051     subtarget = 0;
5052
5053   /* Now normalize to the proper value in MODE.  Sometimes we don't
5054      have to do anything.  */
5055   if (normalizep == 0 || normalizep == STORE_FLAG_VALUE)
5056     ;
5057   /* STORE_FLAG_VALUE might be the most negative number, so write
5058      the comparison this way to avoid a compiler-time warning.  */
5059   else if (- normalizep == STORE_FLAG_VALUE)
5060     op0 = expand_unop (mode, neg_optab, op0, subtarget, 0);
5061
5062   /* We don't want to use STORE_FLAG_VALUE < 0 below since this makes
5063      it hard to use a value of just the sign bit due to ANSI integer
5064      constant typing rules.  */
5065   else if (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT
5066            && (STORE_FLAG_VALUE
5067                & ((HOST_WIDE_INT) 1 << (GET_MODE_BITSIZE (mode) - 1))))
5068     op0 = expand_shift (RSHIFT_EXPR, mode, op0,
5069                         size_int (GET_MODE_BITSIZE (mode) - 1), subtarget,
5070                         normalizep == 1);
5071   else
5072     {
5073       gcc_assert (STORE_FLAG_VALUE & 1);
5074
5075       op0 = expand_and (mode, op0, const1_rtx, subtarget);
5076       if (normalizep == -1)
5077         op0 = expand_unop (mode, neg_optab, op0, op0, 0);
5078     }
5079
5080   /* If we were converting to a smaller mode, do the conversion now.  */
5081   if (target_mode != mode)
5082     {
5083       convert_move (target, op0, 0);
5084       return target;
5085     }
5086   else
5087     return op0;
5088 }
5089
5090 /* Emit a store-flags instruction for comparison CODE on OP0 and OP1
5091    and storing in TARGET.  Normally return TARGET.
5092    Return 0 if that cannot be done.
5093
5094    MODE is the mode to use for OP0 and OP1 should they be CONST_INTs.  If
5095    it is VOIDmode, they cannot both be CONST_INT.
5096
5097    UNSIGNEDP is for the case where we have to widen the operands
5098    to perform the operation.  It says to use zero-extension.
5099
5100    NORMALIZEP is 1 if we should convert the result to be either zero
5101    or one.  Normalize is -1 if we should convert the result to be
5102    either zero or -1.  If NORMALIZEP is zero, the result will be left
5103    "raw" out of the scc insn.  */
5104
5105 rtx
5106 emit_store_flag (rtx target, enum rtx_code code, rtx op0, rtx op1,
5107                  enum machine_mode mode, int unsignedp, int normalizep)
5108 {
5109   rtx subtarget;
5110   enum insn_code icode;
5111   enum machine_mode compare_mode;
5112   enum machine_mode target_mode = GET_MODE (target);
5113   rtx tem;
5114   rtx last = get_last_insn ();
5115   rtx pattern, comparison;
5116
5117   if (unsignedp)
5118     code = unsigned_condition (code);
5119
5120   /* If one operand is constant, make it the second one.  Only do this
5121      if the other operand is not constant as well.  */
5122
5123   if (swap_commutative_operands_p (op0, op1))
5124     {
5125       tem = op0;
5126       op0 = op1;
5127       op1 = tem;
5128       code = swap_condition (code);
5129     }
5130
5131   if (mode == VOIDmode)
5132     mode = GET_MODE (op0);
5133
5134   /* For some comparisons with 1 and -1, we can convert this to
5135      comparisons with zero.  This will often produce more opportunities for
5136      store-flag insns.  */
5137
5138   switch (code)
5139     {
5140     case LT:
5141       if (op1 == const1_rtx)
5142         op1 = const0_rtx, code = LE;
5143       break;
5144     case LE:
5145       if (op1 == constm1_rtx)
5146         op1 = const0_rtx, code = LT;
5147       break;
5148     case GE:
5149       if (op1 == const1_rtx)
5150         op1 = const0_rtx, code = GT;
5151       break;
5152     case GT:
5153       if (op1 == constm1_rtx)
5154         op1 = const0_rtx, code = GE;
5155       break;
5156     case GEU:
5157       if (op1 == const1_rtx)
5158         op1 = const0_rtx, code = NE;
5159       break;
5160     case LTU:
5161       if (op1 == const1_rtx)
5162         op1 = const0_rtx, code = EQ;
5163       break;
5164     default:
5165       break;
5166     }
5167
5168   /* If we are comparing a double-word integer with zero or -1, we can
5169      convert the comparison into one involving a single word.  */
5170   if (GET_MODE_BITSIZE (mode) == BITS_PER_WORD * 2
5171       && GET_MODE_CLASS (mode) == MODE_INT
5172       && (!MEM_P (op0) || ! MEM_VOLATILE_P (op0)))
5173     {
5174       if ((code == EQ || code == NE)
5175           && (op1 == const0_rtx || op1 == constm1_rtx))
5176         {
5177           rtx op00, op01, op0both;
5178
5179           /* Do a logical OR or AND of the two words and compare the
5180              result.  */
5181           op00 = simplify_gen_subreg (word_mode, op0, mode, 0);
5182           op01 = simplify_gen_subreg (word_mode, op0, mode, UNITS_PER_WORD);
5183           op0both = expand_binop (word_mode,
5184                                   op1 == const0_rtx ? ior_optab : and_optab,
5185                                   op00, op01, NULL_RTX, unsignedp,
5186                                   OPTAB_DIRECT);
5187
5188           if (op0both != 0)
5189             return emit_store_flag (target, code, op0both, op1, word_mode,
5190                                     unsignedp, normalizep);
5191         }
5192       else if ((code == LT || code == GE) && op1 == const0_rtx)
5193         {
5194           rtx op0h;
5195
5196           /* If testing the sign bit, can just test on high word.  */
5197           op0h = simplify_gen_subreg (word_mode, op0, mode,
5198                                       subreg_highpart_offset (word_mode,
5199                                                               mode));
5200           return emit_store_flag (target, code, op0h, op1, word_mode,
5201                                   unsignedp, normalizep);
5202         }
5203     }
5204
5205   /* If this is A < 0 or A >= 0, we can do this by taking the ones
5206      complement of A (for GE) and shifting the sign bit to the low bit.  */
5207   if (op1 == const0_rtx && (code == LT || code == GE)
5208       && GET_MODE_CLASS (mode) == MODE_INT
5209       && (normalizep || STORE_FLAG_VALUE == 1
5210           || (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT
5211               && ((STORE_FLAG_VALUE & GET_MODE_MASK (mode))
5212                   == ((unsigned HOST_WIDE_INT) 1
5213                       << (GET_MODE_BITSIZE (mode) - 1))))))
5214     {
5215       subtarget = target;
5216
5217       /* If the result is to be wider than OP0, it is best to convert it
5218          first.  If it is to be narrower, it is *incorrect* to convert it
5219          first.  */
5220       if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (mode))
5221         {
5222           op0 = convert_modes (target_mode, mode, op0, 0);
5223           mode = target_mode;
5224         }
5225
5226       if (target_mode != mode)
5227         subtarget = 0;
5228
5229       if (code == GE)
5230         op0 = expand_unop (mode, one_cmpl_optab, op0,
5231                            ((STORE_FLAG_VALUE == 1 || normalizep)
5232                             ? 0 : subtarget), 0);
5233
5234       if (STORE_FLAG_VALUE == 1 || normalizep)
5235         /* If we are supposed to produce a 0/1 value, we want to do
5236            a logical shift from the sign bit to the low-order bit; for
5237            a -1/0 value, we do an arithmetic shift.  */
5238         op0 = expand_shift (RSHIFT_EXPR, mode, op0,
5239                             size_int (GET_MODE_BITSIZE (mode) - 1),
5240                             subtarget, normalizep != -1);
5241
5242       if (mode != target_mode)
5243         op0 = convert_modes (target_mode, mode, op0, 0);
5244
5245       return op0;
5246     }
5247
5248   icode = setcc_gen_code[(int) code];
5249
5250   if (icode != CODE_FOR_nothing)
5251     {
5252       insn_operand_predicate_fn pred;
5253
5254       /* We think we may be able to do this with a scc insn.  Emit the
5255          comparison and then the scc insn.  */
5256
5257       do_pending_stack_adjust ();
5258       last = get_last_insn ();
5259
5260       comparison
5261         = compare_from_rtx (op0, op1, code, unsignedp, mode, NULL_RTX);
5262       if (CONSTANT_P (comparison))
5263         {
5264           switch (GET_CODE (comparison))
5265             {
5266             case CONST_INT:
5267               if (comparison == const0_rtx)
5268                 return const0_rtx;
5269               break;
5270
5271 #ifdef FLOAT_STORE_FLAG_VALUE
5272             case CONST_DOUBLE:
5273               if (comparison == CONST0_RTX (GET_MODE (comparison)))
5274                 return const0_rtx;
5275               break;
5276 #endif
5277             default:
5278               gcc_unreachable ();
5279             }
5280
5281           if (normalizep == 1)
5282             return const1_rtx;
5283           if (normalizep == -1)
5284             return constm1_rtx;
5285           return const_true_rtx;
5286         }
5287
5288       /* The code of COMPARISON may not match CODE if compare_from_rtx
5289          decided to swap its operands and reverse the original code.
5290
5291          We know that compare_from_rtx returns either a CONST_INT or
5292          a new comparison code, so it is safe to just extract the
5293          code from COMPARISON.  */
5294       code = GET_CODE (comparison);
5295
5296       /* Get a reference to the target in the proper mode for this insn.  */
5297       compare_mode = insn_data[(int) icode].operand[0].mode;
5298       subtarget = target;
5299       pred = insn_data[(int) icode].operand[0].predicate;
5300       if (optimize || ! (*pred) (subtarget, compare_mode))
5301         subtarget = gen_reg_rtx (compare_mode);
5302
5303       pattern = GEN_FCN (icode) (subtarget);
5304       if (pattern)
5305         {
5306           emit_insn (pattern);
5307           return emit_store_flag_1 (target, subtarget, compare_mode,
5308                                     normalizep);
5309         }
5310     }
5311   else
5312     {
5313       /* We don't have an scc insn, so try a cstore insn.  */
5314
5315       for (compare_mode = mode; compare_mode != VOIDmode;
5316            compare_mode = GET_MODE_WIDER_MODE (compare_mode))
5317         {
5318           icode = optab_handler (cstore_optab, compare_mode)->insn_code;
5319           if (icode != CODE_FOR_nothing)
5320             break;
5321         }
5322
5323       if (icode != CODE_FOR_nothing)
5324         {
5325           enum machine_mode result_mode
5326             = insn_data[(int) icode].operand[0].mode;
5327           rtx cstore_op0 = op0;
5328           rtx cstore_op1 = op1;
5329
5330           do_pending_stack_adjust ();
5331           last = get_last_insn ();
5332
5333           if (compare_mode != mode)
5334             {
5335               cstore_op0 = convert_modes (compare_mode, mode, cstore_op0,
5336                                           unsignedp);
5337               cstore_op1 = convert_modes (compare_mode, mode, cstore_op1,
5338                                           unsignedp);
5339             }
5340
5341           if (!insn_data[(int) icode].operand[2].predicate (cstore_op0,
5342                                                             compare_mode))
5343             cstore_op0 = copy_to_mode_reg (compare_mode, cstore_op0);
5344
5345           if (!insn_data[(int) icode].operand[3].predicate (cstore_op1,
5346                                                             compare_mode))
5347             cstore_op1 = copy_to_mode_reg (compare_mode, cstore_op1);
5348
5349           comparison = gen_rtx_fmt_ee (code, result_mode, cstore_op0,
5350                                        cstore_op1);
5351           subtarget = target;
5352
5353           if (optimize || !(insn_data[(int) icode].operand[0].predicate
5354                             (subtarget, result_mode)))
5355             subtarget = gen_reg_rtx (result_mode);
5356
5357           pattern = GEN_FCN (icode) (subtarget, comparison, cstore_op0,
5358                                      cstore_op1);
5359
5360           if (pattern)
5361             {
5362               emit_insn (pattern);
5363               return emit_store_flag_1 (target, subtarget, result_mode,
5364                                         normalizep);
5365             }
5366         }
5367     }
5368
5369   delete_insns_since (last);
5370
5371   /* If optimizing, use different pseudo registers for each insn, instead
5372      of reusing the same pseudo.  This leads to better CSE, but slows
5373      down the compiler, since there are more pseudos */
5374   subtarget = (!optimize
5375                && (target_mode == mode)) ? target : NULL_RTX;
5376
5377   /* If we reached here, we can't do this with a scc insn.  However, there
5378      are some comparisons that can be done directly.  For example, if
5379      this is an equality comparison of integers, we can try to exclusive-or
5380      (or subtract) the two operands and use a recursive call to try the
5381      comparison with zero.  Don't do any of these cases if branches are
5382      very cheap.  */
5383
5384   if (BRANCH_COST (optimize_insn_for_speed_p (),
5385                    false) > 0
5386       && GET_MODE_CLASS (mode) == MODE_INT && (code == EQ || code == NE)
5387       && op1 != const0_rtx)
5388     {
5389       tem = expand_binop (mode, xor_optab, op0, op1, subtarget, 1,
5390                           OPTAB_WIDEN);
5391
5392       if (tem == 0)
5393         tem = expand_binop (mode, sub_optab, op0, op1, subtarget, 1,
5394                             OPTAB_WIDEN);
5395       if (tem != 0)
5396         tem = emit_store_flag (target, code, tem, const0_rtx,
5397                                mode, unsignedp, normalizep);
5398       if (tem == 0)
5399         delete_insns_since (last);
5400       return tem;
5401     }
5402
5403   /* Some other cases we can do are EQ, NE, LE, and GT comparisons with
5404      the constant zero.  Reject all other comparisons at this point.  Only
5405      do LE and GT if branches are expensive since they are expensive on
5406      2-operand machines.  */
5407
5408   if (BRANCH_COST (optimize_insn_for_speed_p (),
5409                    false) == 0
5410       || GET_MODE_CLASS (mode) != MODE_INT || op1 != const0_rtx
5411       || (code != EQ && code != NE
5412           && (BRANCH_COST (optimize_insn_for_speed_p (),
5413                            false) <= 1 || (code != LE && code != GT))))
5414     return 0;
5415
5416   /* See what we need to return.  We can only return a 1, -1, or the
5417      sign bit.  */
5418
5419   if (normalizep == 0)
5420     {
5421       if (STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1)
5422         normalizep = STORE_FLAG_VALUE;
5423
5424       else if (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT
5425                && ((STORE_FLAG_VALUE & GET_MODE_MASK (mode))
5426                    == (unsigned HOST_WIDE_INT) 1 << (GET_MODE_BITSIZE (mode) - 1)))
5427         ;
5428       else
5429         return 0;
5430     }
5431
5432   /* Try to put the result of the comparison in the sign bit.  Assume we can't
5433      do the necessary operation below.  */
5434
5435   tem = 0;
5436
5437   /* To see if A <= 0, compute (A | (A - 1)).  A <= 0 iff that result has
5438      the sign bit set.  */
5439
5440   if (code == LE)
5441     {
5442       /* This is destructive, so SUBTARGET can't be OP0.  */
5443       if (rtx_equal_p (subtarget, op0))
5444         subtarget = 0;
5445
5446       tem = expand_binop (mode, sub_optab, op0, const1_rtx, subtarget, 0,
5447                           OPTAB_WIDEN);
5448       if (tem)
5449         tem = expand_binop (mode, ior_optab, op0, tem, subtarget, 0,
5450                             OPTAB_WIDEN);
5451     }
5452
5453   /* To see if A > 0, compute (((signed) A) << BITS) - A, where BITS is the
5454      number of bits in the mode of OP0, minus one.  */
5455
5456   if (code == GT)
5457     {
5458       if (rtx_equal_p (subtarget, op0))
5459         subtarget = 0;
5460
5461       tem = expand_shift (RSHIFT_EXPR, mode, op0,
5462                           size_int (GET_MODE_BITSIZE (mode) - 1),
5463                           subtarget, 0);
5464       tem = expand_binop (mode, sub_optab, tem, op0, subtarget, 0,
5465                           OPTAB_WIDEN);
5466     }
5467
5468   if (code == EQ || code == NE)
5469     {
5470       /* For EQ or NE, one way to do the comparison is to apply an operation
5471          that converts the operand into a positive number if it is nonzero
5472          or zero if it was originally zero.  Then, for EQ, we subtract 1 and
5473          for NE we negate.  This puts the result in the sign bit.  Then we
5474          normalize with a shift, if needed.
5475
5476          Two operations that can do the above actions are ABS and FFS, so try
5477          them.  If that doesn't work, and MODE is smaller than a full word,
5478          we can use zero-extension to the wider mode (an unsigned conversion)
5479          as the operation.  */
5480
5481       /* Note that ABS doesn't yield a positive number for INT_MIN, but
5482          that is compensated by the subsequent overflow when subtracting
5483          one / negating.  */
5484
5485       if (optab_handler (abs_optab, mode)->insn_code != CODE_FOR_nothing)
5486         tem = expand_unop (mode, abs_optab, op0, subtarget, 1);
5487       else if (optab_handler (ffs_optab, mode)->insn_code != CODE_FOR_nothing)
5488         tem = expand_unop (mode, ffs_optab, op0, subtarget, 1);
5489       else if (GET_MODE_SIZE (mode) < UNITS_PER_WORD)
5490         {
5491           tem = convert_modes (word_mode, mode, op0, 1);
5492           mode = word_mode;
5493         }
5494
5495       if (tem != 0)
5496         {
5497           if (code == EQ)
5498             tem = expand_binop (mode, sub_optab, tem, const1_rtx, subtarget,
5499                                 0, OPTAB_WIDEN);
5500           else
5501             tem = expand_unop (mode, neg_optab, tem, subtarget, 0);
5502         }
5503
5504       /* If we couldn't do it that way, for NE we can "or" the two's complement
5505          of the value with itself.  For EQ, we take the one's complement of
5506          that "or", which is an extra insn, so we only handle EQ if branches
5507          are expensive.  */
5508
5509       if (tem == 0
5510           && (code == NE
5511               || BRANCH_COST (optimize_insn_for_speed_p (),
5512                               false) > 1))
5513         {
5514           if (rtx_equal_p (subtarget, op0))
5515             subtarget = 0;
5516
5517           tem = expand_unop (mode, neg_optab, op0, subtarget, 0);
5518           tem = expand_binop (mode, ior_optab, tem, op0, subtarget, 0,
5519                               OPTAB_WIDEN);
5520
5521           if (tem && code == EQ)
5522             tem = expand_unop (mode, one_cmpl_optab, tem, subtarget, 0);
5523         }
5524     }
5525
5526   if (tem && normalizep)
5527     tem = expand_shift (RSHIFT_EXPR, mode, tem,
5528                         size_int (GET_MODE_BITSIZE (mode) - 1),
5529                         subtarget, normalizep == 1);
5530
5531   if (tem)
5532     {
5533       if (GET_MODE (tem) != target_mode)
5534         {
5535           convert_move (target, tem, 0);
5536           tem = target;
5537         }
5538       else if (!subtarget)
5539         {
5540           emit_move_insn (target, tem);
5541           tem = target;
5542         }
5543     }
5544   else
5545     delete_insns_since (last);
5546
5547   return tem;
5548 }
5549
5550 /* Like emit_store_flag, but always succeeds.  */
5551
5552 rtx
5553 emit_store_flag_force (rtx target, enum rtx_code code, rtx op0, rtx op1,
5554                        enum machine_mode mode, int unsignedp, int normalizep)
5555 {
5556   rtx tem, label;
5557
5558   /* First see if emit_store_flag can do the job.  */
5559   tem = emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep);
5560   if (tem != 0)
5561     return tem;
5562
5563   if (normalizep == 0)
5564     normalizep = 1;
5565
5566   /* If this failed, we have to do this with set/compare/jump/set code.  */
5567
5568   if (!REG_P (target)
5569       || reg_mentioned_p (target, op0) || reg_mentioned_p (target, op1))
5570     target = gen_reg_rtx (GET_MODE (target));
5571
5572   emit_move_insn (target, const1_rtx);
5573   label = gen_label_rtx ();
5574   do_compare_rtx_and_jump (op0, op1, code, unsignedp, mode, NULL_RTX,
5575                            NULL_RTX, label);
5576
5577   emit_move_insn (target, const0_rtx);
5578   emit_label (label);
5579
5580   return target;
5581 }
5582 \f
5583 /* Perform possibly multi-word comparison and conditional jump to LABEL
5584    if ARG1 OP ARG2 true where ARG1 and ARG2 are of mode MODE.  This is
5585    now a thin wrapper around do_compare_rtx_and_jump.  */
5586
5587 static void
5588 do_cmp_and_jump (rtx arg1, rtx arg2, enum rtx_code op, enum machine_mode mode,
5589                  rtx label)
5590 {
5591   int unsignedp = (op == LTU || op == LEU || op == GTU || op == GEU);
5592   do_compare_rtx_and_jump (arg1, arg2, op, unsignedp, mode,
5593                            NULL_RTX, NULL_RTX, label);
5594 }