gcc/expmed.c

   1 /* Medium-level subroutines: convert bit-field store and extract
   2    and shifts, multiplies and divides to rtl instructions.
   3    Copyright (C) 1987, 1988, 1989, 1992, 1993, 1994, 1995, 1996, 1997, 1998,
   4    1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
   5    Free Software Foundation, Inc.
   6
   7 This file is part of GCC.
   8
   9 GCC is free software; you can redistribute it and/or modify it under
  10 the terms of the GNU General Public License as published by the Free
  11 Software Foundation; either version 3, or (at your option) any later
  12 version.
  13
  14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  16 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  17 for more details.
  18
  19 You should have received a copy of the GNU General Public License
  20 along with GCC; see the file COPYING3.  If not see
  21 <http://www.gnu.org/licenses/>.  */
  22
  23
  24 #include "config.h"
  25 #include "system.h"
  26 #include "coretypes.h"
  27 #include "tm.h"
  28 #include "toplev.h"
  29 #include "rtl.h"
  30 #include "tree.h"
  31 #include "tm_p.h"
  32 #include "flags.h"
  33 #include "insn-config.h"
  34 #include "expr.h"
  35 #include "optabs.h"
  36 #include "recog.h"
  37 #include "langhooks.h"
  38 #include "df.h"
  39 #include "target.h"
  40
  41 static void store_fixed_bit_field (rtx, unsigned HOST_WIDE_INT,
  42                                    unsigned HOST_WIDE_INT,
  43                                    unsigned HOST_WIDE_INT, rtx);
  44 static void store_split_bit_field (rtx, unsigned HOST_WIDE_INT,
  45                                    unsigned HOST_WIDE_INT, rtx);
  46 static rtx extract_fixed_bit_field (enum machine_mode, rtx,
  47                                     unsigned HOST_WIDE_INT,
  48                                     unsigned HOST_WIDE_INT,
  49                                     unsigned HOST_WIDE_INT, rtx, int);
  50 static rtx mask_rtx (enum machine_mode, int, int, int);
  51 static rtx lshift_value (enum machine_mode, rtx, int, int);
  52 static rtx extract_split_bit_field (rtx, unsigned HOST_WIDE_INT,
  53                                     unsigned HOST_WIDE_INT, int);
  54 static void do_cmp_and_jump (rtx, rtx, enum rtx_code, enum machine_mode, rtx);
  55 static rtx expand_smod_pow2 (enum machine_mode, rtx, HOST_WIDE_INT);
  56 static rtx expand_sdiv_pow2 (enum machine_mode, rtx, HOST_WIDE_INT);
  57
  58 /* Test whether a value is zero of a power of two.  */
  59 #define EXACT_POWER_OF_2_OR_ZERO_P(x) (((x) & ((x) - 1)) == 0)
  60
  61 /* Nonzero means divides or modulus operations are relatively cheap for
  62    powers of two, so don't use branches; emit the operation instead.
  63    Usually, this will mean that the MD file will emit non-branch
  64    sequences.  */
  65
  66 static bool sdiv_pow2_cheap[2][NUM_MACHINE_MODES];
  67 static bool smod_pow2_cheap[2][NUM_MACHINE_MODES];
  68
  69 #ifndef SLOW_UNALIGNED_ACCESS
  70 #define SLOW_UNALIGNED_ACCESS(MODE, ALIGN) STRICT_ALIGNMENT
  71 #endif
  72
  73 /* For compilers that support multiple targets with different word sizes,
  74    MAX_BITS_PER_WORD contains the biggest value of BITS_PER_WORD.  An example
  75    is the H8/300(H) compiler.  */
  76
  77 #ifndef MAX_BITS_PER_WORD
  78 #define MAX_BITS_PER_WORD BITS_PER_WORD
  79 #endif
  80
  81 /* Reduce conditional compilation elsewhere.  */
  82 #ifndef HAVE_insv
  83 #define HAVE_insv       0
  84 #define CODE_FOR_insv   CODE_FOR_nothing
  85 #define gen_insv(a,b,c,d) NULL_RTX
  86 #endif
  87 #ifndef HAVE_extv
  88 #define HAVE_extv       0
  89 #define CODE_FOR_extv   CODE_FOR_nothing
  90 #define gen_extv(a,b,c,d) NULL_RTX
  91 #endif
  92 #ifndef HAVE_extzv
  93 #define HAVE_extzv      0
  94 #define CODE_FOR_extzv  CODE_FOR_nothing
  95 #define gen_extzv(a,b,c,d) NULL_RTX
  96 #endif
  97
  98 /* Cost of various pieces of RTL.  Note that some of these are indexed by
  99    shift count and some by mode.  */
 100 static int zero_cost[2];
 101 static int add_cost[2][NUM_MACHINE_MODES];
 102 static int neg_cost[2][NUM_MACHINE_MODES];
 103 static int shift_cost[2][NUM_MACHINE_MODES][MAX_BITS_PER_WORD];
 104 static int shiftadd_cost[2][NUM_MACHINE_MODES][MAX_BITS_PER_WORD];
 105 static int shiftsub0_cost[2][NUM_MACHINE_MODES][MAX_BITS_PER_WORD];
 106 static int shiftsub1_cost[2][NUM_MACHINE_MODES][MAX_BITS_PER_WORD];
 107 static int mul_cost[2][NUM_MACHINE_MODES];
 108 static int sdiv_cost[2][NUM_MACHINE_MODES];
 109 static int udiv_cost[2][NUM_MACHINE_MODES];
 110 static int mul_widen_cost[2][NUM_MACHINE_MODES];
 111 static int mul_highpart_cost[2][NUM_MACHINE_MODES];
 112
 113 void
 114 init_expmed (void)
 115 {
 116   struct
 117   {
 118     struct rtx_def reg;         rtunion reg_fld[2];
 119     struct rtx_def plus;        rtunion plus_fld1;
 120     struct rtx_def neg;
 121     struct rtx_def mult;        rtunion mult_fld1;
 122     struct rtx_def sdiv;        rtunion sdiv_fld1;
 123     struct rtx_def udiv;        rtunion udiv_fld1;
 124     struct rtx_def zext;
 125     struct rtx_def sdiv_32;     rtunion sdiv_32_fld1;
 126     struct rtx_def smod_32;     rtunion smod_32_fld1;
 127     struct rtx_def wide_mult;   rtunion wide_mult_fld1;
 128     struct rtx_def wide_lshr;   rtunion wide_lshr_fld1;
 129     struct rtx_def wide_trunc;
 130     struct rtx_def shift;       rtunion shift_fld1;
 131     struct rtx_def shift_mult;  rtunion shift_mult_fld1;
 132     struct rtx_def shift_add;   rtunion shift_add_fld1;
 133     struct rtx_def shift_sub0;  rtunion shift_sub0_fld1;
 134     struct rtx_def shift_sub1;  rtunion shift_sub1_fld1;
 135   } all;
 136
 137   rtx pow2[MAX_BITS_PER_WORD];
 138   rtx cint[MAX_BITS_PER_WORD];
 139   int m, n;
 140   enum machine_mode mode, wider_mode;
 141   int speed;
 142
 143
 144   for (m = 1; m < MAX_BITS_PER_WORD; m++)
 145     {
 146       pow2[m] = GEN_INT ((HOST_WIDE_INT) 1 << m);
 147       cint[m] = GEN_INT (m);
 148     }
 149   memset (&all, 0, sizeof all);
 150
 151   PUT_CODE (&all.reg, REG);
 152   /* Avoid using hard regs in ways which may be unsupported.  */
 153   SET_REGNO (&all.reg, LAST_VIRTUAL_REGISTER + 1);
 154
 155   PUT_CODE (&all.plus, PLUS);
 156   XEXP (&all.plus, 0) = &all.reg;
 157   XEXP (&all.plus, 1) = &all.reg;
 158
 159   PUT_CODE (&all.neg, NEG);
 160   XEXP (&all.neg, 0) = &all.reg;
 161
 162   PUT_CODE (&all.mult, MULT);
 163   XEXP (&all.mult, 0) = &all.reg;
 164   XEXP (&all.mult, 1) = &all.reg;
 165
 166   PUT_CODE (&all.sdiv, DIV);
 167   XEXP (&all.sdiv, 0) = &all.reg;
 168   XEXP (&all.sdiv, 1) = &all.reg;
 169
 170   PUT_CODE (&all.udiv, UDIV);
 171   XEXP (&all.udiv, 0) = &all.reg;
 172   XEXP (&all.udiv, 1) = &all.reg;
 173
 174   PUT_CODE (&all.sdiv_32, DIV);
 175   XEXP (&all.sdiv_32, 0) = &all.reg;
 176   XEXP (&all.sdiv_32, 1) = 32 < MAX_BITS_PER_WORD ? cint[32] : GEN_INT (32);
 177
 178   PUT_CODE (&all.smod_32, MOD);
 179   XEXP (&all.smod_32, 0) = &all.reg;
 180   XEXP (&all.smod_32, 1) = XEXP (&all.sdiv_32, 1);
 181
 182   PUT_CODE (&all.zext, ZERO_EXTEND);
 183   XEXP (&all.zext, 0) = &all.reg;
 184
 185   PUT_CODE (&all.wide_mult, MULT);
 186   XEXP (&all.wide_mult, 0) = &all.zext;
 187   XEXP (&all.wide_mult, 1) = &all.zext;
 188
 189   PUT_CODE (&all.wide_lshr, LSHIFTRT);
 190   XEXP (&all.wide_lshr, 0) = &all.wide_mult;
 191
 192   PUT_CODE (&all.wide_trunc, TRUNCATE);
 193   XEXP (&all.wide_trunc, 0) = &all.wide_lshr;
 194
 195   PUT_CODE (&all.shift, ASHIFT);
 196   XEXP (&all.shift, 0) = &all.reg;
 197
 198   PUT_CODE (&all.shift_mult, MULT);
 199   XEXP (&all.shift_mult, 0) = &all.reg;
 200
 201   PUT_CODE (&all.shift_add, PLUS);
 202   XEXP (&all.shift_add, 0) = &all.shift_mult;
 203   XEXP (&all.shift_add, 1) = &all.reg;
 204
 205   PUT_CODE (&all.shift_sub0, MINUS);
 206   XEXP (&all.shift_sub0, 0) = &all.shift_mult;
 207   XEXP (&all.shift_sub0, 1) = &all.reg;
 208
 209   PUT_CODE (&all.shift_sub1, MINUS);
 210   XEXP (&all.shift_sub1, 0) = &all.reg;
 211   XEXP (&all.shift_sub1, 1) = &all.shift_mult;
 212
 213   for (speed = 0; speed < 2; speed++)
 214     {
 215       crtl->maybe_hot_insn_p = speed;
 216       zero_cost[speed] = rtx_cost (const0_rtx, SET, speed);
 217
 218       for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT);
 219            mode != VOIDmode;
 220            mode = GET_MODE_WIDER_MODE (mode))
 221         {
 222           PUT_MODE (&all.reg, mode);
 223           PUT_MODE (&all.plus, mode);
 224           PUT_MODE (&all.neg, mode);
 225           PUT_MODE (&all.mult, mode);
 226           PUT_MODE (&all.sdiv, mode);
 227           PUT_MODE (&all.udiv, mode);
 228           PUT_MODE (&all.sdiv_32, mode);
 229           PUT_MODE (&all.smod_32, mode);
 230           PUT_MODE (&all.wide_trunc, mode);
 231           PUT_MODE (&all.shift, mode);
 232           PUT_MODE (&all.shift_mult, mode);
 233           PUT_MODE (&all.shift_add, mode);
 234           PUT_MODE (&all.shift_sub0, mode);
 235           PUT_MODE (&all.shift_sub1, mode);
 236
 237           add_cost[speed][mode] = rtx_cost (&all.plus, SET, speed);
 238           neg_cost[speed][mode] = rtx_cost (&all.neg, SET, speed);
 239           mul_cost[speed][mode] = rtx_cost (&all.mult, SET, speed);
 240           sdiv_cost[speed][mode] = rtx_cost (&all.sdiv, SET, speed);
 241           udiv_cost[speed][mode] = rtx_cost (&all.udiv, SET, speed);
 242
 243           sdiv_pow2_cheap[speed][mode] = (rtx_cost (&all.sdiv_32, SET, speed)
 244                                           <= 2 * add_cost[speed][mode]);
 245           smod_pow2_cheap[speed][mode] = (rtx_cost (&all.smod_32, SET, speed)
 246                                           <= 4 * add_cost[speed][mode]);
 247
 248           wider_mode = GET_MODE_WIDER_MODE (mode);
 249           if (wider_mode != VOIDmode)
 250             {
 251               PUT_MODE (&all.zext, wider_mode);
 252               PUT_MODE (&all.wide_mult, wider_mode);
 253               PUT_MODE (&all.wide_lshr, wider_mode);
 254               XEXP (&all.wide_lshr, 1) = GEN_INT (GET_MODE_BITSIZE (mode));
 255
 256               mul_widen_cost[speed][wider_mode]
 257                 = rtx_cost (&all.wide_mult, SET, speed);
 258               mul_highpart_cost[speed][mode]
 259                 = rtx_cost (&all.wide_trunc, SET, speed);
 260             }
 261
 262           shift_cost[speed][mode][0] = 0;
 263           shiftadd_cost[speed][mode][0] = shiftsub0_cost[speed][mode][0]
 264             = shiftsub1_cost[speed][mode][0] = add_cost[speed][mode];
 265
 266           n = MIN (MAX_BITS_PER_WORD, GET_MODE_BITSIZE (mode));
 267           for (m = 1; m < n; m++)
 268             {
 269               XEXP (&all.shift, 1) = cint[m];
 270               XEXP (&all.shift_mult, 1) = pow2[m];
 271
 272               shift_cost[speed][mode][m] = rtx_cost (&all.shift, SET, speed);
 273               shiftadd_cost[speed][mode][m] = rtx_cost (&all.shift_add, SET, speed);
 274               shiftsub0_cost[speed][mode][m] = rtx_cost (&all.shift_sub0, SET, speed);
 275               shiftsub1_cost[speed][mode][m] = rtx_cost (&all.shift_sub1, SET, speed);
 276             }
 277         }
 278     }
 279   default_rtl_profile ();
 280 }
 281
 282 /* Return an rtx representing minus the value of X.
 283    MODE is the intended mode of the result,
 284    useful if X is a CONST_INT.  */
 285
 286 rtx
 287 negate_rtx (enum machine_mode mode, rtx x)
 288 {
 289   rtx result = simplify_unary_operation (NEG, mode, x, mode);
 290
 291   if (result == 0)
 292     result = expand_unop (mode, neg_optab, x, NULL_RTX, 0);
 293
 294   return result;
 295 }
 296
 297 /* Report on the availability of insv/extv/extzv and the desired mode
 298    of each of their operands.  Returns MAX_MACHINE_MODE if HAVE_foo
 299    is false; else the mode of the specified operand.  If OPNO is -1,
 300    all the caller cares about is whether the insn is available.  */
 301 enum machine_mode
 302 mode_for_extraction (enum extraction_pattern pattern, int opno)
 303 {
 304   const struct insn_data *data;
 305
 306   switch (pattern)
 307     {
 308     case EP_insv:
 309       if (HAVE_insv)
 310         {
 311           data = &insn_data[CODE_FOR_insv];
 312           break;
 313         }
 314       return MAX_MACHINE_MODE;
 315
 316     case EP_extv:
 317       if (HAVE_extv)
 318         {
 319           data = &insn_data[CODE_FOR_extv];
 320           break;
 321         }
 322       return MAX_MACHINE_MODE;
 323
 324     case EP_extzv:
 325       if (HAVE_extzv)
 326         {
 327           data = &insn_data[CODE_FOR_extzv];
 328           break;
 329         }
 330       return MAX_MACHINE_MODE;
 331
 332     default:
 333       gcc_unreachable ();
 334     }
 335
 336   if (opno == -1)
 337     return VOIDmode;
 338
 339   /* Everyone who uses this function used to follow it with
 340      if (result == VOIDmode) result = word_mode; */
 341   if (data->operand[opno].mode == VOIDmode)
 342     return word_mode;
 343   return data->operand[opno].mode;
 344 }
 345
 346 /* Return true if X, of mode MODE, matches the predicate for operand
 347    OPNO of instruction ICODE.  Allow volatile memories, regardless of
 348    the ambient volatile_ok setting.  */
 349
 350 static bool
 351 check_predicate_volatile_ok (enum insn_code icode, int opno,
 352                              rtx x, enum machine_mode mode)
 353 {
 354   bool save_volatile_ok, result;
 355
 356   save_volatile_ok = volatile_ok;
 357   result = insn_data[(int) icode].operand[opno].predicate (x, mode);
 358   volatile_ok = save_volatile_ok;
 359   return result;
 360 }
 361 \f
 362 /* A subroutine of store_bit_field, with the same arguments.  Return true
 363    if the operation could be implemented.
 364
 365    If FALLBACK_P is true, fall back to store_fixed_bit_field if we have
 366    no other way of implementing the operation.  If FALLBACK_P is false,
 367    return false instead.  */
 368
 369 static bool
 370 store_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
 371                    unsigned HOST_WIDE_INT bitnum, enum machine_mode fieldmode,
 372                    rtx value, bool fallback_p)
 373 {
 374   unsigned int unit
 375     = (MEM_P (str_rtx)) ? BITS_PER_UNIT : BITS_PER_WORD;
 376   unsigned HOST_WIDE_INT offset, bitpos;
 377   rtx op0 = str_rtx;
 378   int byte_offset;
 379   rtx orig_value;
 380
 381   enum machine_mode op_mode = mode_for_extraction (EP_insv, 3);
 382
 383   while (GET_CODE (op0) == SUBREG)
 384     {
 385       /* The following line once was done only if WORDS_BIG_ENDIAN,
 386          but I think that is a mistake.  WORDS_BIG_ENDIAN is
 387          meaningful at a much higher level; when structures are copied
 388          between memory and regs, the higher-numbered regs
 389          always get higher addresses.  */
 390       int inner_mode_size = GET_MODE_SIZE (GET_MODE (SUBREG_REG (op0)));
 391       int outer_mode_size = GET_MODE_SIZE (GET_MODE (op0));
 392
 393       byte_offset = 0;
 394
 395       /* Paradoxical subregs need special handling on big endian machines.  */
 396       if (SUBREG_BYTE (op0) == 0 && inner_mode_size < outer_mode_size)
 397         {
 398           int difference = inner_mode_size - outer_mode_size;
 399
 400           if (WORDS_BIG_ENDIAN)
 401             byte_offset += (difference / UNITS_PER_WORD) * UNITS_PER_WORD;
 402           if (BYTES_BIG_ENDIAN)
 403             byte_offset += difference % UNITS_PER_WORD;
 404         }
 405       else
 406         byte_offset = SUBREG_BYTE (op0);
 407
 408       bitnum += byte_offset * BITS_PER_UNIT;
 409       op0 = SUBREG_REG (op0);
 410     }
 411
 412   /* No action is needed if the target is a register and if the field
 413      lies completely outside that register.  This can occur if the source
 414      code contains an out-of-bounds access to a small array.  */
 415   if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
 416     return true;
 417
 418   /* Use vec_set patterns for inserting parts of vectors whenever
 419      available.  */
 420   if (VECTOR_MODE_P (GET_MODE (op0))
 421       && !MEM_P (op0)
 422       && (optab_handler (vec_set_optab, GET_MODE (op0))->insn_code
 423           != CODE_FOR_nothing)
 424       && fieldmode == GET_MODE_INNER (GET_MODE (op0))
 425       && bitsize == GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
 426       && !(bitnum % GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
 427     {
 428       enum machine_mode outermode = GET_MODE (op0);
 429       enum machine_mode innermode = GET_MODE_INNER (outermode);
 430       int icode = (int) optab_handler (vec_set_optab, outermode)->insn_code;
 431       int pos = bitnum / GET_MODE_BITSIZE (innermode);
 432       rtx rtxpos = GEN_INT (pos);
 433       rtx src = value;
 434       rtx dest = op0;
 435       rtx pat, seq;
 436       enum machine_mode mode0 = insn_data[icode].operand[0].mode;
 437       enum machine_mode mode1 = insn_data[icode].operand[1].mode;
 438       enum machine_mode mode2 = insn_data[icode].operand[2].mode;
 439
 440       start_sequence ();
 441
 442       if (! (*insn_data[icode].operand[1].predicate) (src, mode1))
 443         src = copy_to_mode_reg (mode1, src);
 444
 445       if (! (*insn_data[icode].operand[2].predicate) (rtxpos, mode2))
 446         rtxpos = copy_to_mode_reg (mode1, rtxpos);
 447
 448       /* We could handle this, but we should always be called with a pseudo
 449          for our targets and all insns should take them as outputs.  */
 450       gcc_assert ((*insn_data[icode].operand[0].predicate) (dest, mode0)
 451                   && (*insn_data[icode].operand[1].predicate) (src, mode1)
 452                   && (*insn_data[icode].operand[2].predicate) (rtxpos, mode2));
 453       pat = GEN_FCN (icode) (dest, src, rtxpos);
 454       seq = get_insns ();
 455       end_sequence ();
 456       if (pat)
 457         {
 458           emit_insn (seq);
 459           emit_insn (pat);
 460           return true;
 461         }
 462     }
 463
 464   /* If the target is a register, overwriting the entire object, or storing
 465      a full-word or multi-word field can be done with just a SUBREG.
 466
 467      If the target is memory, storing any naturally aligned field can be
 468      done with a simple store.  For targets that support fast unaligned
 469      memory, any naturally sized, unit aligned field can be done directly.  */
 470
 471   offset = bitnum / unit;
 472   bitpos = bitnum % unit;
 473   byte_offset = (bitnum % BITS_PER_WORD) / BITS_PER_UNIT
 474                 + (offset * UNITS_PER_WORD);
 475
 476   if (bitpos == 0
 477       && bitsize == GET_MODE_BITSIZE (fieldmode)
 478       && (!MEM_P (op0)
 479           ? ((GET_MODE_SIZE (fieldmode) >= UNITS_PER_WORD
 480              || GET_MODE_SIZE (GET_MODE (op0)) == GET_MODE_SIZE (fieldmode))
 481              && byte_offset % GET_MODE_SIZE (fieldmode) == 0)
 482           : (! SLOW_UNALIGNED_ACCESS (fieldmode, MEM_ALIGN (op0))
 483              || (offset * BITS_PER_UNIT % bitsize == 0
 484                  && MEM_ALIGN (op0) % GET_MODE_BITSIZE (fieldmode) == 0))))
 485     {
 486       if (MEM_P (op0))
 487         op0 = adjust_address (op0, fieldmode, offset);
 488       else if (GET_MODE (op0) != fieldmode)
 489         op0 = simplify_gen_subreg (fieldmode, op0, GET_MODE (op0),
 490                                    byte_offset);
 491       emit_move_insn (op0, value);
 492       return true;
 493     }
 494
 495   /* Make sure we are playing with integral modes.  Pun with subregs
 496      if we aren't.  This must come after the entire register case above,
 497      since that case is valid for any mode.  The following cases are only
 498      valid for integral modes.  */
 499   {
 500     enum machine_mode imode = int_mode_for_mode (GET_MODE (op0));
 501     if (imode != GET_MODE (op0))
 502       {
 503         if (MEM_P (op0))
 504           op0 = adjust_address (op0, imode, 0);
 505         else
 506           {
 507             gcc_assert (imode != BLKmode);
 508             op0 = gen_lowpart (imode, op0);
 509           }
 510       }
 511   }
 512
 513   /* We may be accessing data outside the field, which means
 514      we can alias adjacent data.  */
 515   if (MEM_P (op0))
 516     {
 517       op0 = shallow_copy_rtx (op0);
 518       set_mem_alias_set (op0, 0);
 519       set_mem_expr (op0, 0);
 520     }
 521
 522   /* If OP0 is a register, BITPOS must count within a word.
 523      But as we have it, it counts within whatever size OP0 now has.
 524      On a bigendian machine, these are not the same, so convert.  */
 525   if (BYTES_BIG_ENDIAN
 526       && !MEM_P (op0)
 527       && unit > GET_MODE_BITSIZE (GET_MODE (op0)))
 528     bitpos += unit - GET_MODE_BITSIZE (GET_MODE (op0));
 529
 530   /* Storing an lsb-aligned field in a register
 531      can be done with a movestrict instruction.  */
 532
 533   if (!MEM_P (op0)
 534       && (BYTES_BIG_ENDIAN ? bitpos + bitsize == unit : bitpos == 0)
 535       && bitsize == GET_MODE_BITSIZE (fieldmode)
 536       && (optab_handler (movstrict_optab, fieldmode)->insn_code
 537           != CODE_FOR_nothing))
 538     {
 539       int icode = optab_handler (movstrict_optab, fieldmode)->insn_code;
 540       rtx insn;
 541       rtx start = get_last_insn ();
 542       rtx arg0 = op0;
 543
 544       /* Get appropriate low part of the value being stored.  */
 545       if (CONST_INT_P (value) || REG_P (value))
 546         value = gen_lowpart (fieldmode, value);
 547       else if (!(GET_CODE (value) == SYMBOL_REF
 548                  || GET_CODE (value) == LABEL_REF
 549                  || GET_CODE (value) == CONST))
 550         value = convert_to_mode (fieldmode, value, 0);
 551
 552       if (! (*insn_data[icode].operand[1].predicate) (value, fieldmode))
 553         value = copy_to_mode_reg (fieldmode, value);
 554
 555       if (GET_CODE (op0) == SUBREG)
 556         {
 557           /* Else we've got some float mode source being extracted into
 558              a different float mode destination -- this combination of
 559              subregs results in Severe Tire Damage.  */
 560           gcc_assert (GET_MODE (SUBREG_REG (op0)) == fieldmode
 561                       || GET_MODE_CLASS (fieldmode) == MODE_INT
 562                       || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT);
 563           arg0 = SUBREG_REG (op0);
 564         }
 565
 566       insn = (GEN_FCN (icode)
 567                  (gen_rtx_SUBREG (fieldmode, arg0,
 568                                   (bitnum % BITS_PER_WORD) / BITS_PER_UNIT
 569                                   + (offset * UNITS_PER_WORD)),
 570                                   value));
 571       if (insn)
 572         {
 573           emit_insn (insn);
 574           return true;
 575         }
 576       delete_insns_since (start);
 577     }
 578
 579   /* Handle fields bigger than a word.  */
 580
 581   if (bitsize > BITS_PER_WORD)
 582     {
 583       /* Here we transfer the words of the field
 584          in the order least significant first.
 585          This is because the most significant word is the one which may
 586          be less than full.
 587          However, only do that if the value is not BLKmode.  */
 588
 589       unsigned int backwards = WORDS_BIG_ENDIAN && fieldmode != BLKmode;
 590       unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
 591       unsigned int i;
 592       rtx last;
 593
 594       /* This is the mode we must force value to, so that there will be enough
 595          subwords to extract.  Note that fieldmode will often (always?) be
 596          VOIDmode, because that is what store_field uses to indicate that this
 597          is a bit field, but passing VOIDmode to operand_subword_force
 598          is not allowed.  */
 599       fieldmode = GET_MODE (value);
 600       if (fieldmode == VOIDmode)
 601         fieldmode = smallest_mode_for_size (nwords * BITS_PER_WORD, MODE_INT);
 602
 603       last = get_last_insn ();
 604       for (i = 0; i < nwords; i++)
 605         {
 606           /* If I is 0, use the low-order word in both field and target;
 607              if I is 1, use the next to lowest word; and so on.  */
 608           unsigned int wordnum = (backwards ? nwords - i - 1 : i);
 609           unsigned int bit_offset = (backwards
 610                                      ? MAX ((int) bitsize - ((int) i + 1)
 611                                             * BITS_PER_WORD,
 612                                             0)
 613                                      : (int) i * BITS_PER_WORD);
 614           rtx value_word = operand_subword_force (value, wordnum, fieldmode);
 615
 616           if (!store_bit_field_1 (op0, MIN (BITS_PER_WORD,
 617                                             bitsize - i * BITS_PER_WORD),
 618                                   bitnum + bit_offset, word_mode,
 619                                   value_word, fallback_p))
 620             {
 621               delete_insns_since (last);
 622               return false;
 623             }
 624         }
 625       return true;
 626     }
 627
 628   /* From here on we can assume that the field to be stored in is
 629      a full-word (whatever type that is), since it is shorter than a word.  */
 630
 631   /* OFFSET is the number of words or bytes (UNIT says which)
 632      from STR_RTX to the first word or byte containing part of the field.  */
 633
 634   if (!MEM_P (op0))
 635     {
 636       if (offset != 0
 637           || GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
 638         {
 639           if (!REG_P (op0))
 640             {
 641               /* Since this is a destination (lvalue), we can't copy
 642                  it to a pseudo.  We can remove a SUBREG that does not
 643                  change the size of the operand.  Such a SUBREG may
 644                  have been added above.  */
 645               gcc_assert (GET_CODE (op0) == SUBREG
 646                           && (GET_MODE_SIZE (GET_MODE (op0))
 647                               == GET_MODE_SIZE (GET_MODE (SUBREG_REG (op0)))));
 648               op0 = SUBREG_REG (op0);
 649             }
 650           op0 = gen_rtx_SUBREG (mode_for_size (BITS_PER_WORD, MODE_INT, 0),
 651                                 op0, (offset * UNITS_PER_WORD));
 652         }
 653       offset = 0;
 654     }
 655
 656   /* If VALUE has a floating-point or complex mode, access it as an
 657      integer of the corresponding size.  This can occur on a machine
 658      with 64 bit registers that uses SFmode for float.  It can also
 659      occur for unaligned float or complex fields.  */
 660   orig_value = value;
 661   if (GET_MODE (value) != VOIDmode
 662       && GET_MODE_CLASS (GET_MODE (value)) != MODE_INT
 663       && GET_MODE_CLASS (GET_MODE (value)) != MODE_PARTIAL_INT)
 664     {
 665       value = gen_reg_rtx (int_mode_for_mode (GET_MODE (value)));
 666       emit_move_insn (gen_lowpart (GET_MODE (orig_value), value), orig_value);
 667     }
 668
 669   /* Now OFFSET is nonzero only if OP0 is memory
 670      and is therefore always measured in bytes.  */
 671
 672   if (HAVE_insv
 673       && GET_MODE (value) != BLKmode
 674       && bitsize > 0
 675       && GET_MODE_BITSIZE (op_mode) >= bitsize
 676       && ! ((REG_P (op0) || GET_CODE (op0) == SUBREG)
 677             && (bitsize + bitpos > GET_MODE_BITSIZE (op_mode)))
 678       && insn_data[CODE_FOR_insv].operand[1].predicate (GEN_INT (bitsize),
 679                                                         VOIDmode)
 680       && check_predicate_volatile_ok (CODE_FOR_insv, 0, op0, VOIDmode))
 681     {
 682       int xbitpos = bitpos;
 683       rtx value1;
 684       rtx xop0 = op0;
 685       rtx last = get_last_insn ();
 686       rtx pat;
 687       bool copy_back = false;
 688
 689       /* Add OFFSET into OP0's address.  */
 690       if (MEM_P (xop0))
 691         xop0 = adjust_address (xop0, byte_mode, offset);
 692
 693       /* If xop0 is a register, we need it in OP_MODE
 694          to make it acceptable to the format of insv.  */
 695       if (GET_CODE (xop0) == SUBREG)
 696         /* We can't just change the mode, because this might clobber op0,
 697            and we will need the original value of op0 if insv fails.  */
 698         xop0 = gen_rtx_SUBREG (op_mode, SUBREG_REG (xop0), SUBREG_BYTE (xop0));
 699       if (REG_P (xop0) && GET_MODE (xop0) != op_mode)
 700         xop0 = gen_lowpart_SUBREG (op_mode, xop0);
 701
 702       /* If the destination is a paradoxical subreg such that we need a
 703          truncate to the inner mode, perform the insertion on a temporary and
 704          truncate the result to the original destination.  Note that we can't
 705          just truncate the paradoxical subreg as (truncate:N (subreg:W (reg:N
 706          X) 0)) is (reg:N X).  */
 707       if (GET_CODE (xop0) == SUBREG
 708           && REG_P (SUBREG_REG (xop0))
 709           && (!TRULY_NOOP_TRUNCATION
 710               (GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (xop0))),
 711                GET_MODE_BITSIZE (op_mode))))
 712         {
 713           rtx tem = gen_reg_rtx (op_mode);
 714           emit_move_insn (tem, xop0);
 715           xop0 = tem;
 716           copy_back = true;
 717         }
 718
 719       /* On big-endian machines, we count bits from the most significant.
 720          If the bit field insn does not, we must invert.  */
 721
 722       if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
 723         xbitpos = unit - bitsize - xbitpos;
 724
 725       /* We have been counting XBITPOS within UNIT.
 726          Count instead within the size of the register.  */
 727       if (BITS_BIG_ENDIAN && !MEM_P (xop0))
 728         xbitpos += GET_MODE_BITSIZE (op_mode) - unit;
 729
 730       unit = GET_MODE_BITSIZE (op_mode);
 731
 732       /* Convert VALUE to op_mode (which insv insn wants) in VALUE1.  */
 733       value1 = value;
 734       if (GET_MODE (value) != op_mode)
 735         {
 736           if (GET_MODE_BITSIZE (GET_MODE (value)) >= bitsize)
 737             {
 738               /* Optimization: Don't bother really extending VALUE
 739                  if it has all the bits we will actually use.  However,
 740                  if we must narrow it, be sure we do it correctly.  */
 741
 742               if (GET_MODE_SIZE (GET_MODE (value)) < GET_MODE_SIZE (op_mode))
 743                 {
 744                   rtx tmp;
 745
 746                   tmp = simplify_subreg (op_mode, value1, GET_MODE (value), 0);
 747                   if (! tmp)
 748                     tmp = simplify_gen_subreg (op_mode,
 749                                                force_reg (GET_MODE (value),
 750                                                           value1),
 751                                                GET_MODE (value), 0);
 752                   value1 = tmp;
 753                 }
 754               else
 755                 value1 = gen_lowpart (op_mode, value1);
 756             }
 757           else if (CONST_INT_P (value))
 758             value1 = gen_int_mode (INTVAL (value), op_mode);
 759           else
 760             /* Parse phase is supposed to make VALUE's data type
 761                match that of the component reference, which is a type
 762                at least as wide as the field; so VALUE should have
 763                a mode that corresponds to that type.  */
 764             gcc_assert (CONSTANT_P (value));
 765         }
 766
 767       /* If this machine's insv insists on a register,
 768          get VALUE1 into a register.  */
 769       if (! ((*insn_data[(int) CODE_FOR_insv].operand[3].predicate)
 770              (value1, op_mode)))
 771         value1 = force_reg (op_mode, value1);
 772
 773       pat = gen_insv (xop0, GEN_INT (bitsize), GEN_INT (xbitpos), value1);
 774       if (pat)
 775         {
 776           emit_insn (pat);
 777
 778           if (copy_back)
 779             convert_move (op0, xop0, true);
 780           return true;
 781         }
 782       delete_insns_since (last);
 783     }
 784
 785   /* If OP0 is a memory, try copying it to a register and seeing if a
 786      cheap register alternative is available.  */
 787   if (HAVE_insv && MEM_P (op0))
 788     {
 789       enum machine_mode bestmode;
 790
 791       /* Get the mode to use for inserting into this field.  If OP0 is
 792          BLKmode, get the smallest mode consistent with the alignment. If
 793          OP0 is a non-BLKmode object that is no wider than OP_MODE, use its
 794          mode. Otherwise, use the smallest mode containing the field.  */
 795
 796       if (GET_MODE (op0) == BLKmode
 797           || (op_mode != MAX_MACHINE_MODE
 798               && GET_MODE_SIZE (GET_MODE (op0)) > GET_MODE_SIZE (op_mode)))
 799         bestmode = get_best_mode (bitsize, bitnum, MEM_ALIGN (op0),
 800                                   (op_mode == MAX_MACHINE_MODE
 801                                    ? VOIDmode : op_mode),
 802                                   MEM_VOLATILE_P (op0));
 803       else
 804         bestmode = GET_MODE (op0);
 805
 806       if (bestmode != VOIDmode
 807           && GET_MODE_SIZE (bestmode) >= GET_MODE_SIZE (fieldmode)
 808           && !(SLOW_UNALIGNED_ACCESS (bestmode, MEM_ALIGN (op0))
 809                && GET_MODE_BITSIZE (bestmode) > MEM_ALIGN (op0)))
 810         {
 811           rtx last, tempreg, xop0;
 812           unsigned HOST_WIDE_INT xoffset, xbitpos;
 813
 814           last = get_last_insn ();
 815
 816           /* Adjust address to point to the containing unit of
 817              that mode.  Compute the offset as a multiple of this unit,
 818              counting in bytes.  */
 819           unit = GET_MODE_BITSIZE (bestmode);
 820           xoffset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
 821           xbitpos = bitnum % unit;
 822           xop0 = adjust_address (op0, bestmode, xoffset);
 823
 824           /* Fetch that unit, store the bitfield in it, then store
 825              the unit.  */
 826           tempreg = copy_to_reg (xop0);
 827           if (store_bit_field_1 (tempreg, bitsize, xbitpos,
 828                                  fieldmode, orig_value, false))
 829             {
 830               emit_move_insn (xop0, tempreg);
 831               return true;
 832             }
 833           delete_insns_since (last);
 834         }
 835     }
 836
 837   if (!fallback_p)
 838     return false;
 839
 840   store_fixed_bit_field (op0, offset, bitsize, bitpos, value);
 841   return true;
 842 }
 843
 844 /* Generate code to store value from rtx VALUE
 845    into a bit-field within structure STR_RTX
 846    containing BITSIZE bits starting at bit BITNUM.
 847    FIELDMODE is the machine-mode of the FIELD_DECL node for this field.  */
 848
 849 void
 850 store_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
 851                  unsigned HOST_WIDE_INT bitnum, enum machine_mode fieldmode,
 852                  rtx value)
 853 {
 854   if (!store_bit_field_1 (str_rtx, bitsize, bitnum, fieldmode, value, true))
 855     gcc_unreachable ();
 856 }
 857 \f
 858 /* Use shifts and boolean operations to store VALUE
 859    into a bit field of width BITSIZE
 860    in a memory location specified by OP0 except offset by OFFSET bytes.
 861      (OFFSET must be 0 if OP0 is a register.)
 862    The field starts at position BITPOS within the byte.
 863     (If OP0 is a register, it may be a full word or a narrower mode,
 864      but BITPOS still counts within a full word,
 865      which is significant on bigendian machines.)  */
 866
 867 static void
 868 store_fixed_bit_field (rtx op0, unsigned HOST_WIDE_INT offset,
 869                        unsigned HOST_WIDE_INT bitsize,
 870                        unsigned HOST_WIDE_INT bitpos, rtx value)
 871 {
 872   enum machine_mode mode;
 873   unsigned int total_bits = BITS_PER_WORD;
 874   rtx temp;
 875   int all_zero = 0;
 876   int all_one = 0;
 877
 878   /* There is a case not handled here:
 879      a structure with a known alignment of just a halfword
 880      and a field split across two aligned halfwords within the structure.
 881      Or likewise a structure with a known alignment of just a byte
 882      and a field split across two bytes.
 883      Such cases are not supposed to be able to occur.  */
 884
 885   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
 886     {
 887       gcc_assert (!offset);
 888       /* Special treatment for a bit field split across two registers.  */
 889       if (bitsize + bitpos > BITS_PER_WORD)
 890         {
 891           store_split_bit_field (op0, bitsize, bitpos, value);
 892           return;
 893         }
 894     }
 895   else
 896     {
 897       /* Get the proper mode to use for this field.  We want a mode that
 898          includes the entire field.  If such a mode would be larger than
 899          a word, we won't be doing the extraction the normal way.
 900          We don't want a mode bigger than the destination.  */
 901
 902       mode = GET_MODE (op0);
 903       if (GET_MODE_BITSIZE (mode) == 0
 904           || GET_MODE_BITSIZE (mode) > GET_MODE_BITSIZE (word_mode))
 905         mode = word_mode;
 906
 907       if (MEM_VOLATILE_P (op0)
 908           && GET_MODE_BITSIZE (GET_MODE (op0)) > 0
 909           && flag_strict_volatile_bitfields > 0)
 910         mode = GET_MODE (op0);
 911       else
 912         mode = get_best_mode (bitsize, bitpos + offset * BITS_PER_UNIT,
 913                               MEM_ALIGN (op0), mode, MEM_VOLATILE_P (op0));
 914
 915       if (mode == VOIDmode)
 916         {
 917           /* The only way this should occur is if the field spans word
 918              boundaries.  */
 919           store_split_bit_field (op0, bitsize, bitpos + offset * BITS_PER_UNIT,
 920                                  value);
 921           return;
 922         }
 923
 924       total_bits = GET_MODE_BITSIZE (mode);
 925
 926       /* Make sure bitpos is valid for the chosen mode.  Adjust BITPOS to
 927          be in the range 0 to total_bits-1, and put any excess bytes in
 928          OFFSET.  */
 929       if (bitpos >= total_bits)
 930         {
 931           offset += (bitpos / total_bits) * (total_bits / BITS_PER_UNIT);
 932           bitpos -= ((bitpos / total_bits) * (total_bits / BITS_PER_UNIT)
 933                      * BITS_PER_UNIT);
 934         }
 935
 936       /* Get ref to an aligned byte, halfword, or word containing the field.
 937          Adjust BITPOS to be position within a word,
 938          and OFFSET to be the offset of that word.
 939          Then alter OP0 to refer to that word.  */
 940       bitpos += (offset % (total_bits / BITS_PER_UNIT)) * BITS_PER_UNIT;
 941       offset -= (offset % (total_bits / BITS_PER_UNIT));
 942       op0 = adjust_address (op0, mode, offset);
 943     }
 944
 945   mode = GET_MODE (op0);
 946
 947   /* Now MODE is either some integral mode for a MEM as OP0,
 948      or is a full-word for a REG as OP0.  TOTAL_BITS corresponds.
 949      The bit field is contained entirely within OP0.
 950      BITPOS is the starting bit number within OP0.
 951      (OP0's mode may actually be narrower than MODE.)  */
 952
 953   if (BYTES_BIG_ENDIAN)
 954       /* BITPOS is the distance between our msb
 955          and that of the containing datum.
 956          Convert it to the distance from the lsb.  */
 957       bitpos = total_bits - bitsize - bitpos;
 958
 959   /* Now BITPOS is always the distance between our lsb
 960      and that of OP0.  */
 961
 962   /* Shift VALUE left by BITPOS bits.  If VALUE is not constant,
 963      we must first convert its mode to MODE.  */
 964
 965   if (CONST_INT_P (value))
 966     {
 967       HOST_WIDE_INT v = INTVAL (value);
 968
 969       if (bitsize < HOST_BITS_PER_WIDE_INT)
 970         v &= ((HOST_WIDE_INT) 1 << bitsize) - 1;
 971
 972       if (v == 0)
 973         all_zero = 1;
 974       else if ((bitsize < HOST_BITS_PER_WIDE_INT
 975                 && v == ((HOST_WIDE_INT) 1 << bitsize) - 1)
 976                || (bitsize == HOST_BITS_PER_WIDE_INT && v == -1))
 977         all_one = 1;
 978
 979       value = lshift_value (mode, value, bitpos, bitsize);
 980     }
 981   else
 982     {
 983       int must_and = (GET_MODE_BITSIZE (GET_MODE (value)) != bitsize
 984                       && bitpos + bitsize != GET_MODE_BITSIZE (mode));
 985
 986       if (GET_MODE (value) != mode)
 987         value = convert_to_mode (mode, value, 1);
 988
 989       if (must_and)
 990         value = expand_binop (mode, and_optab, value,
 991                               mask_rtx (mode, 0, bitsize, 0),
 992                               NULL_RTX, 1, OPTAB_LIB_WIDEN);
 993       if (bitpos > 0)
 994         value = expand_shift (LSHIFT_EXPR, mode, value,
 995                               build_int_cst (NULL_TREE, bitpos), NULL_RTX, 1);
 996     }
 997
 998   /* Now clear the chosen bits in OP0,
 999      except that if VALUE is -1 we need not bother.  */
1000   /* We keep the intermediates in registers to allow CSE to combine
1001      consecutive bitfield assignments.  */
1002
1003   temp = force_reg (mode, op0);
1004
1005   if (! all_one)
1006     {
1007       temp = expand_binop (mode, and_optab, temp,
1008                            mask_rtx (mode, bitpos, bitsize, 1),
1009                            NULL_RTX, 1, OPTAB_LIB_WIDEN);
1010       temp = force_reg (mode, temp);
1011     }
1012
1013   /* Now logical-or VALUE into OP0, unless it is zero.  */
1014
1015   if (! all_zero)
1016     {
1017       temp = expand_binop (mode, ior_optab, temp, value,
1018                            NULL_RTX, 1, OPTAB_LIB_WIDEN);
1019       temp = force_reg (mode, temp);
1020     }
1021
1022   if (op0 != temp)
1023     {
1024       op0 = copy_rtx (op0);
1025       emit_move_insn (op0, temp);
1026     }
1027 }
1028 \f
1029 /* Store a bit field that is split across multiple accessible memory objects.
1030
1031    OP0 is the REG, SUBREG or MEM rtx for the first of the objects.
1032    BITSIZE is the field width; BITPOS the position of its first bit
1033    (within the word).
1034    VALUE is the value to store.
1035
1036    This does not yet handle fields wider than BITS_PER_WORD.  */
1037
1038 static void
1039 store_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1040                        unsigned HOST_WIDE_INT bitpos, rtx value)
1041 {
1042   unsigned int unit;
1043   unsigned int bitsdone = 0;
1044
1045   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1046      much at a time.  */
1047   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
1048     unit = BITS_PER_WORD;
1049   else
1050     unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1051
1052   /* If VALUE is a constant other than a CONST_INT, get it into a register in
1053      WORD_MODE.  If we can do this using gen_lowpart_common, do so.  Note
1054      that VALUE might be a floating-point constant.  */
1055   if (CONSTANT_P (value) && !CONST_INT_P (value))
1056     {
1057       rtx word = gen_lowpart_common (word_mode, value);
1058
1059       if (word && (value != word))
1060         value = word;
1061       else
1062         value = gen_lowpart_common (word_mode,
1063                                     force_reg (GET_MODE (value) != VOIDmode
1064                                                ? GET_MODE (value)
1065                                                : word_mode, value));
1066     }
1067
1068   while (bitsdone < bitsize)
1069     {
1070       unsigned HOST_WIDE_INT thissize;
1071       rtx part, word;
1072       unsigned HOST_WIDE_INT thispos;
1073       unsigned HOST_WIDE_INT offset;
1074
1075       offset = (bitpos + bitsdone) / unit;
1076       thispos = (bitpos + bitsdone) % unit;
1077
1078       /* THISSIZE must not overrun a word boundary.  Otherwise,
1079          store_fixed_bit_field will call us again, and we will mutually
1080          recurse forever.  */
1081       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1082       thissize = MIN (thissize, unit - thispos);
1083
1084       if (BYTES_BIG_ENDIAN)
1085         {
1086           int total_bits;
1087
1088           /* We must do an endian conversion exactly the same way as it is
1089              done in extract_bit_field, so that the two calls to
1090              extract_fixed_bit_field will have comparable arguments.  */
1091           if (!MEM_P (value) || GET_MODE (value) == BLKmode)
1092             total_bits = BITS_PER_WORD;
1093           else
1094             total_bits = GET_MODE_BITSIZE (GET_MODE (value));
1095
1096           /* Fetch successively less significant portions.  */
1097           if (CONST_INT_P (value))
1098             part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1099                              >> (bitsize - bitsdone - thissize))
1100                             & (((HOST_WIDE_INT) 1 << thissize) - 1));
1101           else
1102             /* The args are chosen so that the last part includes the
1103                lsb.  Give extract_bit_field the value it needs (with
1104                endianness compensation) to fetch the piece we want.  */
1105             part = extract_fixed_bit_field (word_mode, value, 0, thissize,
1106                                             total_bits - bitsize + bitsdone,
1107                                             NULL_RTX, 1);
1108         }
1109       else
1110         {
1111           /* Fetch successively more significant portions.  */
1112           if (CONST_INT_P (value))
1113             part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1114                              >> bitsdone)
1115                             & (((HOST_WIDE_INT) 1 << thissize) - 1));
1116           else
1117             part = extract_fixed_bit_field (word_mode, value, 0, thissize,
1118                                             bitsdone, NULL_RTX, 1);
1119         }
1120
1121       /* If OP0 is a register, then handle OFFSET here.
1122
1123          When handling multiword bitfields, extract_bit_field may pass
1124          down a word_mode SUBREG of a larger REG for a bitfield that actually
1125          crosses a word boundary.  Thus, for a SUBREG, we must find
1126          the current word starting from the base register.  */
1127       if (GET_CODE (op0) == SUBREG)
1128         {
1129           int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset;
1130           word = operand_subword_force (SUBREG_REG (op0), word_offset,
1131                                         GET_MODE (SUBREG_REG (op0)));
1132           offset = 0;
1133         }
1134       else if (REG_P (op0))
1135         {
1136           word = operand_subword_force (op0, offset, GET_MODE (op0));
1137           offset = 0;
1138         }
1139       else
1140         word = op0;
1141
1142       /* OFFSET is in UNITs, and UNIT is in bits.
1143          store_fixed_bit_field wants offset in bytes.  */
1144       store_fixed_bit_field (word, offset * unit / BITS_PER_UNIT, thissize,
1145                              thispos, part);
1146       bitsdone += thissize;
1147     }
1148 }
1149 \f
1150 /* A subroutine of extract_bit_field_1 that converts return value X
1151    to either MODE or TMODE.  MODE, TMODE and UNSIGNEDP are arguments
1152    to extract_bit_field.  */
1153
1154 static rtx
1155 convert_extracted_bit_field (rtx x, enum machine_mode mode,
1156                              enum machine_mode tmode, bool unsignedp)
1157 {
1158   if (GET_MODE (x) == tmode || GET_MODE (x) == mode)
1159     return x;
1160
1161   /* If the x mode is not a scalar integral, first convert to the
1162      integer mode of that size and then access it as a floating-point
1163      value via a SUBREG.  */
1164   if (!SCALAR_INT_MODE_P (tmode))
1165     {
1166       enum machine_mode smode;
1167
1168       smode = mode_for_size (GET_MODE_BITSIZE (tmode), MODE_INT, 0);
1169       x = convert_to_mode (smode, x, unsignedp);
1170       x = force_reg (smode, x);
1171       return gen_lowpart (tmode, x);
1172     }
1173
1174   return convert_to_mode (tmode, x, unsignedp);
1175 }
1176
1177 /* A subroutine of extract_bit_field, with the same arguments.
1178    If FALLBACK_P is true, fall back to extract_fixed_bit_field
1179    if we can find no other means of implementing the operation.
1180    if FALLBACK_P is false, return NULL instead.  */
1181
1182 static rtx
1183 extract_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1184                      unsigned HOST_WIDE_INT bitnum, int unsignedp, rtx target,
1185                      enum machine_mode mode, enum machine_mode tmode,
1186                      bool fallback_p)
1187 {
1188   unsigned int unit
1189     = (MEM_P (str_rtx)) ? BITS_PER_UNIT : BITS_PER_WORD;
1190   unsigned HOST_WIDE_INT offset, bitpos;
1191   rtx op0 = str_rtx;
1192   enum machine_mode int_mode;
1193   enum machine_mode ext_mode;
1194   enum machine_mode mode1;
1195   enum insn_code icode;
1196   int byte_offset;
1197
1198   if (tmode == VOIDmode)
1199     tmode = mode;
1200
1201   while (GET_CODE (op0) == SUBREG)
1202     {
1203       bitnum += SUBREG_BYTE (op0) * BITS_PER_UNIT;
1204       op0 = SUBREG_REG (op0);
1205     }
1206
1207   /* If we have an out-of-bounds access to a register, just return an
1208      uninitialized register of the required mode.  This can occur if the
1209      source code contains an out-of-bounds access to a small array.  */
1210   if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
1211     return gen_reg_rtx (tmode);
1212
1213   if (REG_P (op0)
1214       && mode == GET_MODE (op0)
1215       && bitnum == 0
1216       && bitsize == GET_MODE_BITSIZE (GET_MODE (op0)))
1217     {
1218       /* We're trying to extract a full register from itself.  */
1219       return op0;
1220     }
1221
1222   /* See if we can get a better vector mode before extracting.  */
1223   if (VECTOR_MODE_P (GET_MODE (op0))
1224       && !MEM_P (op0)
1225       && GET_MODE_INNER (GET_MODE (op0)) != tmode)
1226     {
1227       enum machine_mode new_mode;
1228       int nunits = GET_MODE_NUNITS (GET_MODE (op0));
1229
1230       if (GET_MODE_CLASS (tmode) == MODE_FLOAT)
1231         new_mode = MIN_MODE_VECTOR_FLOAT;
1232       else if (GET_MODE_CLASS (tmode) == MODE_FRACT)
1233         new_mode = MIN_MODE_VECTOR_FRACT;
1234       else if (GET_MODE_CLASS (tmode) == MODE_UFRACT)
1235         new_mode = MIN_MODE_VECTOR_UFRACT;
1236       else if (GET_MODE_CLASS (tmode) == MODE_ACCUM)
1237         new_mode = MIN_MODE_VECTOR_ACCUM;
1238       else if (GET_MODE_CLASS (tmode) == MODE_UACCUM)
1239         new_mode = MIN_MODE_VECTOR_UACCUM;
1240       else
1241         new_mode = MIN_MODE_VECTOR_INT;
1242
1243       for (; new_mode != VOIDmode ; new_mode = GET_MODE_WIDER_MODE (new_mode))
1244         if (GET_MODE_NUNITS (new_mode) == nunits
1245             && GET_MODE_SIZE (new_mode) == GET_MODE_SIZE (GET_MODE (op0))
1246             && targetm.vector_mode_supported_p (new_mode))
1247           break;
1248       if (new_mode != VOIDmode)
1249         op0 = gen_lowpart (new_mode, op0);
1250     }
1251
1252   /* Use vec_extract patterns for extracting parts of vectors whenever
1253      available.  */
1254   if (VECTOR_MODE_P (GET_MODE (op0))
1255       && !MEM_P (op0)
1256       && (optab_handler (vec_extract_optab, GET_MODE (op0))->insn_code
1257           != CODE_FOR_nothing)
1258       && ((bitnum + bitsize - 1) / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
1259           == bitnum / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
1260     {
1261       enum machine_mode outermode = GET_MODE (op0);
1262       enum machine_mode innermode = GET_MODE_INNER (outermode);
1263       int icode = (int) optab_handler (vec_extract_optab, outermode)->insn_code;
1264       unsigned HOST_WIDE_INT pos = bitnum / GET_MODE_BITSIZE (innermode);
1265       rtx rtxpos = GEN_INT (pos);
1266       rtx src = op0;
1267       rtx dest = NULL, pat, seq;
1268       enum machine_mode mode0 = insn_data[icode].operand[0].mode;
1269       enum machine_mode mode1 = insn_data[icode].operand[1].mode;
1270       enum machine_mode mode2 = insn_data[icode].operand[2].mode;
1271
1272       if (innermode == tmode || innermode == mode)
1273         dest = target;
1274
1275       if (!dest)
1276         dest = gen_reg_rtx (innermode);
1277
1278       start_sequence ();
1279
1280       if (! (*insn_data[icode].operand[0].predicate) (dest, mode0))
1281         dest = copy_to_mode_reg (mode0, dest);
1282
1283       if (! (*insn_data[icode].operand[1].predicate) (src, mode1))
1284         src = copy_to_mode_reg (mode1, src);
1285
1286       if (! (*insn_data[icode].operand[2].predicate) (rtxpos, mode2))
1287         rtxpos = copy_to_mode_reg (mode1, rtxpos);
1288
1289       /* We could handle this, but we should always be called with a pseudo
1290          for our targets and all insns should take them as outputs.  */
1291       gcc_assert ((*insn_data[icode].operand[0].predicate) (dest, mode0)
1292                   && (*insn_data[icode].operand[1].predicate) (src, mode1)
1293                   && (*insn_data[icode].operand[2].predicate) (rtxpos, mode2));
1294
1295       pat = GEN_FCN (icode) (dest, src, rtxpos);
1296       seq = get_insns ();
1297       end_sequence ();
1298       if (pat)
1299         {
1300           emit_insn (seq);
1301           emit_insn (pat);
1302           if (mode0 != mode)
1303             return gen_lowpart (tmode, dest);
1304           return dest;
1305         }
1306     }
1307
1308   /* Make sure we are playing with integral modes.  Pun with subregs
1309      if we aren't.  */
1310   {
1311     enum machine_mode imode = int_mode_for_mode (GET_MODE (op0));
1312     if (imode != GET_MODE (op0))
1313       {
1314         if (MEM_P (op0))
1315           op0 = adjust_address (op0, imode, 0);
1316         else if (imode != BLKmode)
1317           {
1318             op0 = gen_lowpart (imode, op0);
1319
1320             /* If we got a SUBREG, force it into a register since we
1321                aren't going to be able to do another SUBREG on it.  */
1322             if (GET_CODE (op0) == SUBREG)
1323               op0 = force_reg (imode, op0);
1324           }
1325         else if (REG_P (op0))
1326           {
1327             rtx reg, subreg;
1328             imode = smallest_mode_for_size (GET_MODE_BITSIZE (GET_MODE (op0)),
1329                                             MODE_INT);
1330             reg = gen_reg_rtx (imode);
1331             subreg = gen_lowpart_SUBREG (GET_MODE (op0), reg);
1332             emit_move_insn (subreg, op0);
1333             op0 = reg;
1334             bitnum += SUBREG_BYTE (subreg) * BITS_PER_UNIT;
1335           }
1336         else
1337           {
1338             rtx mem = assign_stack_temp (GET_MODE (op0),
1339                                          GET_MODE_SIZE (GET_MODE (op0)), 0);
1340             emit_move_insn (mem, op0);
1341             op0 = adjust_address (mem, BLKmode, 0);
1342           }
1343       }
1344   }
1345
1346   /* We may be accessing data outside the field, which means
1347      we can alias adjacent data.  */
1348   if (MEM_P (op0))
1349     {
1350       op0 = shallow_copy_rtx (op0);
1351       set_mem_alias_set (op0, 0);
1352       set_mem_expr (op0, 0);
1353     }
1354
1355   /* Extraction of a full-word or multi-word value from a structure
1356      in a register or aligned memory can be done with just a SUBREG.
1357      A subword value in the least significant part of a register
1358      can also be extracted with a SUBREG.  For this, we need the
1359      byte offset of the value in op0.  */
1360
1361   bitpos = bitnum % unit;
1362   offset = bitnum / unit;
1363   byte_offset = bitpos / BITS_PER_UNIT + offset * UNITS_PER_WORD;
1364
1365   /* If OP0 is a register, BITPOS must count within a word.
1366      But as we have it, it counts within whatever size OP0 now has.
1367      On a bigendian machine, these are not the same, so convert.  */
1368   if (BYTES_BIG_ENDIAN
1369       && !MEM_P (op0)
1370       && unit > GET_MODE_BITSIZE (GET_MODE (op0)))
1371     bitpos += unit - GET_MODE_BITSIZE (GET_MODE (op0));
1372
1373   /* ??? We currently assume TARGET is at least as big as BITSIZE.
1374      If that's wrong, the solution is to test for it and set TARGET to 0
1375      if needed.  */
1376
1377   /* Only scalar integer modes can be converted via subregs.  There is an
1378      additional problem for FP modes here in that they can have a precision
1379      which is different from the size.  mode_for_size uses precision, but
1380      we want a mode based on the size, so we must avoid calling it for FP
1381      modes.  */
1382   mode1  = (SCALAR_INT_MODE_P (tmode)
1383             ? mode_for_size (bitsize, GET_MODE_CLASS (tmode), 0)
1384             : mode);
1385
1386   /* If the bitfield is volatile, we need to make sure the access
1387      remains on a type-aligned boundary.  */
1388   if (GET_CODE (op0) == MEM
1389       && MEM_VOLATILE_P (op0)
1390       && GET_MODE_BITSIZE (GET_MODE (op0)) > 0
1391       && flag_strict_volatile_bitfields > 0)
1392     goto no_subreg_mode_swap;
1393
1394   if (((bitsize >= BITS_PER_WORD && bitsize == GET_MODE_BITSIZE (mode)
1395         && bitpos % BITS_PER_WORD == 0)
1396        || (mode1 != BLKmode
1397            /* ??? The big endian test here is wrong.  This is correct
1398               if the value is in a register, and if mode_for_size is not
1399               the same mode as op0.  This causes us to get unnecessarily
1400               inefficient code from the Thumb port when -mbig-endian.  */
1401            && (BYTES_BIG_ENDIAN
1402                ? bitpos + bitsize == BITS_PER_WORD
1403                : bitpos == 0)))
1404       && ((!MEM_P (op0)
1405            && TRULY_NOOP_TRUNCATION (GET_MODE_BITSIZE (mode1),
1406                                      GET_MODE_BITSIZE (GET_MODE (op0)))
1407            && GET_MODE_SIZE (mode1) != 0
1408            && byte_offset % GET_MODE_SIZE (mode1) == 0)
1409           || (MEM_P (op0)
1410               && (! SLOW_UNALIGNED_ACCESS (mode, MEM_ALIGN (op0))
1411                   || (offset * BITS_PER_UNIT % bitsize == 0
1412                       && MEM_ALIGN (op0) % bitsize == 0)))))
1413     {
1414       if (MEM_P (op0))
1415         op0 = adjust_address (op0, mode1, offset);
1416       else if (mode1 != GET_MODE (op0))
1417         {
1418           rtx sub = simplify_gen_subreg (mode1, op0, GET_MODE (op0),
1419                                          byte_offset);
1420           if (sub == NULL)
1421             goto no_subreg_mode_swap;
1422           op0 = sub;
1423         }
1424       if (mode1 != mode)
1425         return convert_to_mode (tmode, op0, unsignedp);
1426       return op0;
1427     }
1428  no_subreg_mode_swap:
1429
1430   /* Handle fields bigger than a word.  */
1431
1432   if (bitsize > BITS_PER_WORD)
1433     {
1434       /* Here we transfer the words of the field
1435          in the order least significant first.
1436          This is because the most significant word is the one which may
1437          be less than full.  */
1438
1439       unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
1440       unsigned int i;
1441
1442       if (target == 0 || !REG_P (target))
1443         target = gen_reg_rtx (mode);
1444
1445       /* Indicate for flow that the entire target reg is being set.  */
1446       emit_clobber (target);
1447
1448       for (i = 0; i < nwords; i++)
1449         {
1450           /* If I is 0, use the low-order word in both field and target;
1451              if I is 1, use the next to lowest word; and so on.  */
1452           /* Word number in TARGET to use.  */
1453           unsigned int wordnum
1454             = (WORDS_BIG_ENDIAN
1455                ? GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD - i - 1
1456                : i);
1457           /* Offset from start of field in OP0.  */
1458           unsigned int bit_offset = (WORDS_BIG_ENDIAN
1459                                      ? MAX (0, ((int) bitsize - ((int) i + 1)
1460                                                 * (int) BITS_PER_WORD))
1461                                      : (int) i * BITS_PER_WORD);
1462           rtx target_part = operand_subword (target, wordnum, 1, VOIDmode);
1463           rtx result_part
1464             = extract_bit_field (op0, MIN (BITS_PER_WORD,
1465                                            bitsize - i * BITS_PER_WORD),
1466                                  bitnum + bit_offset, 1, target_part, mode,
1467                                  word_mode);
1468
1469           gcc_assert (target_part);
1470
1471           if (result_part != target_part)
1472             emit_move_insn (target_part, result_part);
1473         }
1474
1475       if (unsignedp)
1476         {
1477           /* Unless we've filled TARGET, the upper regs in a multi-reg value
1478              need to be zero'd out.  */
1479           if (GET_MODE_SIZE (GET_MODE (target)) > nwords * UNITS_PER_WORD)
1480             {
1481               unsigned int i, total_words;
1482
1483               total_words = GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD;
1484               for (i = nwords; i < total_words; i++)
1485                 emit_move_insn
1486                   (operand_subword (target,
1487                                     WORDS_BIG_ENDIAN ? total_words - i - 1 : i,
1488                                     1, VOIDmode),
1489                    const0_rtx);
1490             }
1491           return target;
1492         }
1493
1494       /* Signed bit field: sign-extend with two arithmetic shifts.  */
1495       target = expand_shift (LSHIFT_EXPR, mode, target,
1496                              build_int_cst (NULL_TREE,
1497                                             GET_MODE_BITSIZE (mode) - bitsize),
1498                              NULL_RTX, 0);
1499       return expand_shift (RSHIFT_EXPR, mode, target,
1500                            build_int_cst (NULL_TREE,
1501                                           GET_MODE_BITSIZE (mode) - bitsize),
1502                            NULL_RTX, 0);
1503     }
1504
1505   /* From here on we know the desired field is smaller than a word.  */
1506
1507   /* Check if there is a correspondingly-sized integer field, so we can
1508      safely extract it as one size of integer, if necessary; then
1509      truncate or extend to the size that is wanted; then use SUBREGs or
1510      convert_to_mode to get one of the modes we really wanted.  */
1511
1512   int_mode = int_mode_for_mode (tmode);
1513   if (int_mode == BLKmode)
1514     int_mode = int_mode_for_mode (mode);
1515   /* Should probably push op0 out to memory and then do a load.  */
1516   gcc_assert (int_mode != BLKmode);
1517
1518   /* OFFSET is the number of words or bytes (UNIT says which)
1519      from STR_RTX to the first word or byte containing part of the field.  */
1520   if (!MEM_P (op0))
1521     {
1522       if (offset != 0
1523           || GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
1524         {
1525           if (!REG_P (op0))
1526             op0 = copy_to_reg (op0);
1527           op0 = gen_rtx_SUBREG (mode_for_size (BITS_PER_WORD, MODE_INT, 0),
1528                                 op0, (offset * UNITS_PER_WORD));
1529         }
1530       offset = 0;
1531     }
1532
1533   /* Now OFFSET is nonzero only for memory operands.  */
1534   ext_mode = mode_for_extraction (unsignedp ? EP_extzv : EP_extv, 0);
1535   icode = unsignedp ? CODE_FOR_extzv : CODE_FOR_extv;
1536   if (ext_mode != MAX_MACHINE_MODE
1537       && bitsize > 0
1538       && GET_MODE_BITSIZE (ext_mode) >= bitsize
1539       /* If op0 is a register, we need it in EXT_MODE to make it
1540          acceptable to the format of ext(z)v.  */
1541       && !(GET_CODE (op0) == SUBREG && GET_MODE (op0) != ext_mode)
1542       && !((REG_P (op0) || GET_CODE (op0) == SUBREG)
1543            && (bitsize + bitpos > GET_MODE_BITSIZE (ext_mode)))
1544       && check_predicate_volatile_ok (icode, 1, op0, GET_MODE (op0)))
1545     {
1546       unsigned HOST_WIDE_INT xbitpos = bitpos, xoffset = offset;
1547       rtx bitsize_rtx, bitpos_rtx;
1548       rtx last = get_last_insn ();
1549       rtx xop0 = op0;
1550       rtx xtarget = target;
1551       rtx xspec_target = target;
1552       rtx xspec_target_subreg = 0;
1553       rtx pat;
1554
1555       /* If op0 is a register, we need it in EXT_MODE to make it
1556          acceptable to the format of ext(z)v.  */
1557       if (REG_P (xop0) && GET_MODE (xop0) != ext_mode)
1558         xop0 = gen_lowpart_SUBREG (ext_mode, xop0);
1559       if (MEM_P (xop0))
1560         /* Get ref to first byte containing part of the field.  */
1561         xop0 = adjust_address (xop0, byte_mode, xoffset);
1562
1563       /* On big-endian machines, we count bits from the most significant.
1564          If the bit field insn does not, we must invert.  */
1565       if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1566         xbitpos = unit - bitsize - xbitpos;
1567
1568       /* Now convert from counting within UNIT to counting in EXT_MODE.  */
1569       if (BITS_BIG_ENDIAN && !MEM_P (xop0))
1570         xbitpos += GET_MODE_BITSIZE (ext_mode) - unit;
1571
1572       unit = GET_MODE_BITSIZE (ext_mode);
1573
1574       if (xtarget == 0)
1575         xtarget = xspec_target = gen_reg_rtx (tmode);
1576
1577       if (GET_MODE (xtarget) != ext_mode)
1578         {
1579           /* Don't use LHS paradoxical subreg if explicit truncation is needed
1580              between the mode of the extraction (word_mode) and the target
1581              mode.  Instead, create a temporary and use convert_move to set
1582              the target.  */
1583           if (REG_P (xtarget)
1584               && TRULY_NOOP_TRUNCATION (GET_MODE_BITSIZE (GET_MODE (xtarget)),
1585                                         GET_MODE_BITSIZE (ext_mode)))
1586             {
1587               xtarget = gen_lowpart (ext_mode, xtarget);
1588               if (GET_MODE_SIZE (ext_mode)
1589                   > GET_MODE_SIZE (GET_MODE (xspec_target)))
1590                 xspec_target_subreg = xtarget;
1591             }
1592           else
1593             xtarget = gen_reg_rtx (ext_mode);
1594         }
1595
1596       /* If this machine's ext(z)v insists on a register target,
1597          make sure we have one.  */
1598       if (!insn_data[(int) icode].operand[0].predicate (xtarget, ext_mode))
1599         xtarget = gen_reg_rtx (ext_mode);
1600
1601       bitsize_rtx = GEN_INT (bitsize);
1602       bitpos_rtx = GEN_INT (xbitpos);
1603
1604       pat = (unsignedp
1605              ? gen_extzv (xtarget, xop0, bitsize_rtx, bitpos_rtx)
1606              : gen_extv (xtarget, xop0, bitsize_rtx, bitpos_rtx));
1607       if (pat)
1608         {
1609           emit_insn (pat);
1610           if (xtarget == xspec_target)
1611             return xtarget;
1612           if (xtarget == xspec_target_subreg)
1613             return xspec_target;
1614           return convert_extracted_bit_field (xtarget, mode, tmode, unsignedp);
1615         }
1616       delete_insns_since (last);
1617     }
1618
1619   /* If OP0 is a memory, try copying it to a register and seeing if a
1620      cheap register alternative is available.  */
1621   if (ext_mode != MAX_MACHINE_MODE && MEM_P (op0))
1622     {
1623       enum machine_mode bestmode;
1624
1625       /* Get the mode to use for inserting into this field.  If
1626          OP0 is BLKmode, get the smallest mode consistent with the
1627          alignment. If OP0 is a non-BLKmode object that is no
1628          wider than EXT_MODE, use its mode. Otherwise, use the
1629          smallest mode containing the field.  */
1630
1631       if (GET_MODE (op0) == BLKmode
1632           || (ext_mode != MAX_MACHINE_MODE
1633               && GET_MODE_SIZE (GET_MODE (op0)) > GET_MODE_SIZE (ext_mode)))
1634         bestmode = get_best_mode (bitsize, bitnum, MEM_ALIGN (op0),
1635                                   (ext_mode == MAX_MACHINE_MODE
1636                                    ? VOIDmode : ext_mode),
1637                                   MEM_VOLATILE_P (op0));
1638       else
1639         bestmode = GET_MODE (op0);
1640
1641       if (bestmode != VOIDmode
1642           && !(SLOW_UNALIGNED_ACCESS (bestmode, MEM_ALIGN (op0))
1643                && GET_MODE_BITSIZE (bestmode) > MEM_ALIGN (op0)))
1644         {
1645           unsigned HOST_WIDE_INT xoffset, xbitpos;
1646
1647           /* Compute the offset as a multiple of this unit,
1648              counting in bytes.  */
1649           unit = GET_MODE_BITSIZE (bestmode);
1650           xoffset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
1651           xbitpos = bitnum % unit;
1652
1653           /* Make sure the register is big enough for the whole field.  */
1654           if (xoffset * BITS_PER_UNIT + unit
1655               >= offset * BITS_PER_UNIT + bitsize)
1656             {
1657               rtx last, result, xop0;
1658
1659               last = get_last_insn ();
1660
1661               /* Fetch it to a register in that size.  */
1662               xop0 = adjust_address (op0, bestmode, xoffset);
1663               xop0 = force_reg (bestmode, xop0);
1664               result = extract_bit_field_1 (xop0, bitsize, xbitpos,
1665                                             unsignedp, target,
1666                                             mode, tmode, false);
1667               if (result)
1668                 return result;
1669
1670               delete_insns_since (last);
1671             }
1672         }
1673     }
1674
1675   if (!fallback_p)
1676     return NULL;
1677
1678   target = extract_fixed_bit_field (int_mode, op0, offset, bitsize,
1679                                     bitpos, target, unsignedp);
1680   return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1681 }
1682
1683 /* Generate code to extract a byte-field from STR_RTX
1684    containing BITSIZE bits, starting at BITNUM,
1685    and put it in TARGET if possible (if TARGET is nonzero).
1686    Regardless of TARGET, we return the rtx for where the value is placed.
1687
1688    STR_RTX is the structure containing the byte (a REG or MEM).
1689    UNSIGNEDP is nonzero if this is an unsigned bit field.
1690    MODE is the natural mode of the field value once extracted.
1691    TMODE is the mode the caller would like the value to have;
1692    but the value may be returned with type MODE instead.
1693
1694    If a TARGET is specified and we can store in it at no extra cost,
1695    we do so, and return TARGET.
1696    Otherwise, we return a REG of mode TMODE or MODE, with TMODE preferred
1697    if they are equally easy.  */
1698
1699 rtx
1700 extract_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1701                    unsigned HOST_WIDE_INT bitnum, int unsignedp, rtx target,
1702                    enum machine_mode mode, enum machine_mode tmode)
1703 {
1704   return extract_bit_field_1 (str_rtx, bitsize, bitnum, unsignedp,
1705                               target, mode, tmode, true);
1706 }
1707 \f
1708 /* Extract a bit field using shifts and boolean operations
1709    Returns an rtx to represent the value.
1710    OP0 addresses a register (word) or memory (byte).
1711    BITPOS says which bit within the word or byte the bit field starts in.
1712    OFFSET says how many bytes farther the bit field starts;
1713     it is 0 if OP0 is a register.
1714    BITSIZE says how many bits long the bit field is.
1715     (If OP0 is a register, it may be narrower than a full word,
1716      but BITPOS still counts within a full word,
1717      which is significant on bigendian machines.)
1718
1719    UNSIGNEDP is nonzero for an unsigned bit field (don't sign-extend value).
1720    If TARGET is nonzero, attempts to store the value there
1721    and return TARGET, but this is not guaranteed.
1722    If TARGET is not used, create a pseudo-reg of mode TMODE for the value.  */
1723
1724 static rtx
1725 extract_fixed_bit_field (enum machine_mode tmode, rtx op0,
1726                          unsigned HOST_WIDE_INT offset,
1727                          unsigned HOST_WIDE_INT bitsize,
1728                          unsigned HOST_WIDE_INT bitpos, rtx target,
1729                          int unsignedp)
1730 {
1731   unsigned int total_bits = BITS_PER_WORD;
1732   enum machine_mode mode;
1733
1734   if (GET_CODE (op0) == SUBREG || REG_P (op0))
1735     {
1736       /* Special treatment for a bit field split across two registers.  */
1737       if (bitsize + bitpos > BITS_PER_WORD)
1738         return extract_split_bit_field (op0, bitsize, bitpos, unsignedp);
1739     }
1740   else
1741     {
1742       /* Get the proper mode to use for this field.  We want a mode that
1743          includes the entire field.  If such a mode would be larger than
1744          a word, we won't be doing the extraction the normal way.  */
1745
1746       if (MEM_VOLATILE_P (op0)
1747           && flag_strict_volatile_bitfields > 0)
1748         {
1749           if (GET_MODE_BITSIZE (GET_MODE (op0)) > 0)
1750             mode = GET_MODE (op0);
1751           else if (target && GET_MODE_BITSIZE (GET_MODE (target)) > 0)
1752             mode = GET_MODE (target);
1753           else
1754             mode = tmode;
1755         }
1756       else
1757         mode = get_best_mode (bitsize, bitpos + offset * BITS_PER_UNIT,
1758                               MEM_ALIGN (op0), word_mode, MEM_VOLATILE_P (op0));
1759
1760       if (mode == VOIDmode)
1761         /* The only way this should occur is if the field spans word
1762            boundaries.  */
1763         return extract_split_bit_field (op0, bitsize,
1764                                         bitpos + offset * BITS_PER_UNIT,
1765                                         unsignedp);
1766
1767       total_bits = GET_MODE_BITSIZE (mode);
1768
1769       /* Make sure bitpos is valid for the chosen mode.  Adjust BITPOS to
1770          be in the range 0 to total_bits-1, and put any excess bytes in
1771          OFFSET.  */
1772       if (bitpos >= total_bits)
1773         {
1774           offset += (bitpos / total_bits) * (total_bits / BITS_PER_UNIT);
1775           bitpos -= ((bitpos / total_bits) * (total_bits / BITS_PER_UNIT)
1776                      * BITS_PER_UNIT);
1777         }
1778
1779       /* If we're accessing a volatile MEM, we can't do the next
1780          alignment step if it results in a multi-word access where we
1781          otherwise wouldn't have one.  So, check for that case
1782          here.  */
1783       if (MEM_P (op0)
1784           && MEM_VOLATILE_P (op0)
1785           && flag_strict_volatile_bitfields > 0
1786           && bitpos + bitsize <= total_bits
1787           && bitpos + bitsize + (offset % (total_bits / BITS_PER_UNIT)) * BITS_PER_UNIT > total_bits)
1788         {
1789           if (STRICT_ALIGNMENT)
1790             {
1791               static bool informed_about_misalignment = false;
1792               bool warned;
1793
1794               if (bitsize == total_bits)
1795                 warned = warning_at (input_location, OPT_fstrict_volatile_bitfields,
1796                                      "mis-aligned access used for structure member");
1797               else
1798                 warned = warning_at (input_location, OPT_fstrict_volatile_bitfields,
1799                                      "mis-aligned access used for structure bitfield");
1800
1801               if (! informed_about_misalignment && warned)
1802                 {
1803                   informed_about_misalignment = true;
1804                   inform (input_location,
1805                           "When a volatile object spans multiple type-sized locations,"
1806                           " the compiler must choose between using a single mis-aligned access to"
1807                           " preserve the volatility, or using multiple aligned accesses to avoid"
1808                           " runtime faults.  This code may fail at runtime if the hardware does"
1809                           " not allow this access.");
1810                 }
1811             }
1812         }
1813       else
1814         {
1815
1816           /* Get ref to an aligned byte, halfword, or word containing the field.
1817              Adjust BITPOS to be position within a word,
1818              and OFFSET to be the offset of that word.
1819              Then alter OP0 to refer to that word.  */
1820           bitpos += (offset % (total_bits / BITS_PER_UNIT)) * BITS_PER_UNIT;
1821           offset -= (offset % (total_bits / BITS_PER_UNIT));
1822         }
1823
1824       op0 = adjust_address (op0, mode, offset);
1825     }
1826
1827   mode = GET_MODE (op0);
1828
1829   if (BYTES_BIG_ENDIAN)
1830     /* BITPOS is the distance between our msb and that of OP0.
1831        Convert it to the distance from the lsb.  */
1832     bitpos = total_bits - bitsize - bitpos;
1833
1834   /* Now BITPOS is always the distance between the field's lsb and that of OP0.
1835      We have reduced the big-endian case to the little-endian case.  */
1836
1837   if (unsignedp)
1838     {
1839       if (bitpos)
1840         {
1841           /* If the field does not already start at the lsb,
1842              shift it so it does.  */
1843           tree amount = build_int_cst (NULL_TREE, bitpos);
1844           /* Maybe propagate the target for the shift.  */
1845           /* But not if we will return it--could confuse integrate.c.  */
1846           rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
1847           if (tmode != mode) subtarget = 0;
1848           op0 = expand_shift (RSHIFT_EXPR, mode, op0, amount, subtarget, 1);
1849         }
1850       /* Convert the value to the desired mode.  */
1851       if (mode != tmode)
1852         op0 = convert_to_mode (tmode, op0, 1);
1853
1854       /* Unless the msb of the field used to be the msb when we shifted,
1855          mask out the upper bits.  */
1856
1857       if (GET_MODE_BITSIZE (mode) != bitpos + bitsize)
1858         return expand_binop (GET_MODE (op0), and_optab, op0,
1859                              mask_rtx (GET_MODE (op0), 0, bitsize, 0),
1860                              target, 1, OPTAB_LIB_WIDEN);
1861       return op0;
1862     }
1863
1864   /* To extract a signed bit-field, first shift its msb to the msb of the word,
1865      then arithmetic-shift its lsb to the lsb of the word.  */
1866   op0 = force_reg (mode, op0);
1867   if (mode != tmode)
1868     target = 0;
1869
1870   /* Find the narrowest integer mode that contains the field.  */
1871
1872   for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT); mode != VOIDmode;
1873        mode = GET_MODE_WIDER_MODE (mode))
1874     if (GET_MODE_BITSIZE (mode) >= bitsize + bitpos)
1875       {
1876         op0 = convert_to_mode (mode, op0, 0);
1877         break;
1878       }
1879
1880   if (GET_MODE_BITSIZE (mode) != (bitsize + bitpos))
1881     {
1882       tree amount
1883         = build_int_cst (NULL_TREE,
1884                          GET_MODE_BITSIZE (mode) - (bitsize + bitpos));
1885       /* Maybe propagate the target for the shift.  */
1886       rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
1887       op0 = expand_shift (LSHIFT_EXPR, mode, op0, amount, subtarget, 1);
1888     }
1889
1890   return expand_shift (RSHIFT_EXPR, mode, op0,
1891                        build_int_cst (NULL_TREE,
1892                                       GET_MODE_BITSIZE (mode) - bitsize),
1893                        target, 0);
1894 }
1895 \f
1896 /* Return a constant integer (CONST_INT or CONST_DOUBLE) mask value
1897    of mode MODE with BITSIZE ones followed by BITPOS zeros, or the
1898    complement of that if COMPLEMENT.  The mask is truncated if
1899    necessary to the width of mode MODE.  The mask is zero-extended if
1900    BITSIZE+BITPOS is too small for MODE.  */
1901
1902 static rtx
1903 mask_rtx (enum machine_mode mode, int bitpos, int bitsize, int complement)
1904 {
1905   double_int mask;
1906
1907   mask = double_int_mask (bitsize);
1908   mask = double_int_lshift (mask, bitpos, HOST_BITS_PER_DOUBLE_INT, false);
1909
1910   if (complement)
1911     mask = double_int_not (mask);
1912
1913   return immed_double_int_const (mask, mode);
1914 }
1915
1916 /* Return a constant integer (CONST_INT or CONST_DOUBLE) rtx with the value
1917    VALUE truncated to BITSIZE bits and then shifted left BITPOS bits.  */
1918
1919 static rtx
1920 lshift_value (enum machine_mode mode, rtx value, int bitpos, int bitsize)
1921 {
1922   double_int val;
1923
1924   val = double_int_zext (uhwi_to_double_int (INTVAL (value)), bitsize);
1925   val = double_int_lshift (val, bitpos, HOST_BITS_PER_DOUBLE_INT, false);
1926
1927   return immed_double_int_const (val, mode);
1928 }
1929 \f
1930 /* Extract a bit field that is split across two words
1931    and return an RTX for the result.
1932
1933    OP0 is the REG, SUBREG or MEM rtx for the first of the two words.
1934    BITSIZE is the field width; BITPOS, position of its first bit, in the word.
1935    UNSIGNEDP is 1 if should zero-extend the contents; else sign-extend.  */
1936
1937 static rtx
1938 extract_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1939                          unsigned HOST_WIDE_INT bitpos, int unsignedp)
1940 {
1941   unsigned int unit;
1942   unsigned int bitsdone = 0;
1943   rtx result = NULL_RTX;
1944   int first = 1;
1945
1946   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1947      much at a time.  */
1948   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
1949     unit = BITS_PER_WORD;
1950   else
1951     unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1952
1953   while (bitsdone < bitsize)
1954     {
1955       unsigned HOST_WIDE_INT thissize;
1956       rtx part, word;
1957       unsigned HOST_WIDE_INT thispos;
1958       unsigned HOST_WIDE_INT offset;
1959
1960       offset = (bitpos + bitsdone) / unit;
1961       thispos = (bitpos + bitsdone) % unit;
1962
1963       /* THISSIZE must not overrun a word boundary.  Otherwise,
1964          extract_fixed_bit_field will call us again, and we will mutually
1965          recurse forever.  */
1966       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1967       thissize = MIN (thissize, unit - thispos);
1968
1969       /* If OP0 is a register, then handle OFFSET here.
1970
1971          When handling multiword bitfields, extract_bit_field may pass
1972          down a word_mode SUBREG of a larger REG for a bitfield that actually
1973          crosses a word boundary.  Thus, for a SUBREG, we must find
1974          the current word starting from the base register.  */
1975       if (GET_CODE (op0) == SUBREG)
1976         {
1977           int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset;
1978           word = operand_subword_force (SUBREG_REG (op0), word_offset,
1979                                         GET_MODE (SUBREG_REG (op0)));
1980           offset = 0;
1981         }
1982       else if (REG_P (op0))
1983         {
1984           word = operand_subword_force (op0, offset, GET_MODE (op0));
1985           offset = 0;
1986         }
1987       else
1988         word = op0;
1989
1990       /* Extract the parts in bit-counting order,
1991          whose meaning is determined by BYTES_PER_UNIT.
1992          OFFSET is in UNITs, and UNIT is in bits.
1993          extract_fixed_bit_field wants offset in bytes.  */
1994       part = extract_fixed_bit_field (word_mode, word,
1995                                       offset * unit / BITS_PER_UNIT,
1996                                       thissize, thispos, 0, 1);
1997       bitsdone += thissize;
1998
1999       /* Shift this part into place for the result.  */
2000       if (BYTES_BIG_ENDIAN)
2001         {
2002           if (bitsize != bitsdone)
2003             part = expand_shift (LSHIFT_EXPR, word_mode, part,
2004                                  build_int_cst (NULL_TREE, bitsize - bitsdone),
2005                                  0, 1);
2006         }
2007       else
2008         {
2009           if (bitsdone != thissize)
2010             part = expand_shift (LSHIFT_EXPR, word_mode, part,
2011                                  build_int_cst (NULL_TREE,
2012                                                 bitsdone - thissize), 0, 1);
2013         }
2014
2015       if (first)
2016         result = part;
2017       else
2018         /* Combine the parts with bitwise or.  This works
2019            because we extracted each part as an unsigned bit field.  */
2020         result = expand_binop (word_mode, ior_optab, part, result, NULL_RTX, 1,
2021                                OPTAB_LIB_WIDEN);
2022
2023       first = 0;
2024     }
2025
2026   /* Unsigned bit field: we are done.  */
2027   if (unsignedp)
2028     return result;
2029   /* Signed bit field: sign-extend with two arithmetic shifts.  */
2030   result = expand_shift (LSHIFT_EXPR, word_mode, result,
2031                          build_int_cst (NULL_TREE, BITS_PER_WORD - bitsize),
2032                          NULL_RTX, 0);
2033   return expand_shift (RSHIFT_EXPR, word_mode, result,
2034                        build_int_cst (NULL_TREE, BITS_PER_WORD - bitsize),
2035                        NULL_RTX, 0);
2036 }
2037 \f
2038 /* Try to read the low bits of SRC as an rvalue of mode MODE, preserving
2039    the bit pattern.  SRC_MODE is the mode of SRC; if this is smaller than
2040    MODE, fill the upper bits with zeros.  Fail if the layout of either
2041    mode is unknown (as for CC modes) or if the extraction would involve
2042    unprofitable mode punning.  Return the value on success, otherwise
2043    return null.
2044
2045    This is different from gen_lowpart* in these respects:
2046
2047      - the returned value must always be considered an rvalue
2048
2049      - when MODE is wider than SRC_MODE, the extraction involves
2050        a zero extension
2051
2052      - when MODE is smaller than SRC_MODE, the extraction involves
2053        a truncation (and is thus subject to TRULY_NOOP_TRUNCATION).
2054
2055    In other words, this routine performs a computation, whereas the
2056    gen_lowpart* routines are conceptually lvalue or rvalue subreg
2057    operations.  */
2058
2059 rtx
2060 extract_low_bits (enum machine_mode mode, enum machine_mode src_mode, rtx src)
2061 {
2062   enum machine_mode int_mode, src_int_mode;
2063
2064   if (mode == src_mode)
2065     return src;
2066
2067   if (CONSTANT_P (src))
2068     {
2069       /* simplify_gen_subreg can't be used here, as if simplify_subreg
2070          fails, it will happily create (subreg (symbol_ref)) or similar
2071          invalid SUBREGs.  */
2072       unsigned int byte = subreg_lowpart_offset (mode, src_mode);
2073       rtx ret = simplify_subreg (mode, src, src_mode, byte);
2074       if (ret)
2075         return ret;
2076
2077       if (GET_MODE (src) == VOIDmode
2078           || !validate_subreg (mode, src_mode, src, byte))
2079         return NULL_RTX;
2080
2081       src = force_reg (GET_MODE (src), src);
2082       return gen_rtx_SUBREG (mode, src, byte);
2083     }
2084
2085   if (GET_MODE_CLASS (mode) == MODE_CC || GET_MODE_CLASS (src_mode) == MODE_CC)
2086     return NULL_RTX;
2087
2088   if (GET_MODE_BITSIZE (mode) == GET_MODE_BITSIZE (src_mode)
2089       && MODES_TIEABLE_P (mode, src_mode))
2090     {
2091       rtx x = gen_lowpart_common (mode, src);
2092       if (x)
2093         return x;
2094     }
2095
2096   src_int_mode = int_mode_for_mode (src_mode);
2097   int_mode = int_mode_for_mode (mode);
2098   if (src_int_mode == BLKmode || int_mode == BLKmode)
2099     return NULL_RTX;
2100
2101   if (!MODES_TIEABLE_P (src_int_mode, src_mode))
2102     return NULL_RTX;
2103   if (!MODES_TIEABLE_P (int_mode, mode))
2104     return NULL_RTX;
2105
2106   src = gen_lowpart (src_int_mode, src);
2107   src = convert_modes (int_mode, src_int_mode, src, true);
2108   src = gen_lowpart (mode, src);
2109   return src;
2110 }
2111 \f
2112 /* Add INC into TARGET.  */
2113
2114 void
2115 expand_inc (rtx target, rtx inc)
2116 {
2117   rtx value = expand_binop (GET_MODE (target), add_optab,
2118                             target, inc,
2119                             target, 0, OPTAB_LIB_WIDEN);
2120   if (value != target)
2121     emit_move_insn (target, value);
2122 }
2123
2124 /* Subtract DEC from TARGET.  */
2125
2126 void
2127 expand_dec (rtx target, rtx dec)
2128 {
2129   rtx value = expand_binop (GET_MODE (target), sub_optab,
2130                             target, dec,
2131                             target, 0, OPTAB_LIB_WIDEN);
2132   if (value != target)
2133     emit_move_insn (target, value);
2134 }
2135 \f
2136 /* Output a shift instruction for expression code CODE,
2137    with SHIFTED being the rtx for the value to shift,
2138    and AMOUNT the tree for the amount to shift by.
2139    Store the result in the rtx TARGET, if that is convenient.
2140    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2141    Return the rtx for where the value is.  */
2142
2143 rtx
2144 expand_shift (enum tree_code code, enum machine_mode mode, rtx shifted,
2145               tree amount, rtx target, int unsignedp)
2146 {
2147   rtx op1, temp = 0;
2148   int left = (code == LSHIFT_EXPR || code == LROTATE_EXPR);
2149   int rotate = (code == LROTATE_EXPR || code == RROTATE_EXPR);
2150   optab lshift_optab = ashl_optab;
2151   optab rshift_arith_optab = ashr_optab;
2152   optab rshift_uns_optab = lshr_optab;
2153   optab lrotate_optab = rotl_optab;
2154   optab rrotate_optab = rotr_optab;
2155   enum machine_mode op1_mode;
2156   int attempt;
2157   bool speed = optimize_insn_for_speed_p ();
2158
2159   op1 = expand_normal (amount);
2160   op1_mode = GET_MODE (op1);
2161
2162   /* Determine whether the shift/rotate amount is a vector, or scalar.  If the
2163      shift amount is a vector, use the vector/vector shift patterns.  */
2164   if (VECTOR_MODE_P (mode) && VECTOR_MODE_P (op1_mode))
2165     {
2166       lshift_optab = vashl_optab;
2167       rshift_arith_optab = vashr_optab;
2168       rshift_uns_optab = vlshr_optab;
2169       lrotate_optab = vrotl_optab;
2170       rrotate_optab = vrotr_optab;
2171     }
2172
2173   /* Previously detected shift-counts computed by NEGATE_EXPR
2174      and shifted in the other direction; but that does not work
2175      on all machines.  */
2176
2177   if (SHIFT_COUNT_TRUNCATED)
2178     {
2179       if (CONST_INT_P (op1)
2180           && ((unsigned HOST_WIDE_INT) INTVAL (op1) >=
2181               (unsigned HOST_WIDE_INT) GET_MODE_BITSIZE (mode)))
2182         op1 = GEN_INT ((unsigned HOST_WIDE_INT) INTVAL (op1)
2183                        % GET_MODE_BITSIZE (mode));
2184       else if (GET_CODE (op1) == SUBREG
2185                && subreg_lowpart_p (op1)
2186                && INTEGRAL_MODE_P (GET_MODE (SUBREG_REG (op1))))
2187         op1 = SUBREG_REG (op1);
2188     }
2189
2190   if (op1 == const0_rtx)
2191     return shifted;
2192
2193   /* Check whether its cheaper to implement a left shift by a constant
2194      bit count by a sequence of additions.  */
2195   if (code == LSHIFT_EXPR
2196       && CONST_INT_P (op1)
2197       && INTVAL (op1) > 0
2198       && INTVAL (op1) < GET_MODE_BITSIZE (mode)
2199       && INTVAL (op1) < MAX_BITS_PER_WORD
2200       && shift_cost[speed][mode][INTVAL (op1)] > INTVAL (op1) * add_cost[speed][mode]
2201       && shift_cost[speed][mode][INTVAL (op1)] != MAX_COST)
2202     {
2203       int i;
2204       for (i = 0; i < INTVAL (op1); i++)
2205         {
2206           temp = force_reg (mode, shifted);
2207           shifted = expand_binop (mode, add_optab, temp, temp, NULL_RTX,
2208                                   unsignedp, OPTAB_LIB_WIDEN);
2209         }
2210       return shifted;
2211     }
2212
2213   for (attempt = 0; temp == 0 && attempt < 3; attempt++)
2214     {
2215       enum optab_methods methods;
2216
2217       if (attempt == 0)
2218         methods = OPTAB_DIRECT;
2219       else if (attempt == 1)
2220         methods = OPTAB_WIDEN;
2221       else
2222         methods = OPTAB_LIB_WIDEN;
2223
2224       if (rotate)
2225         {
2226           /* Widening does not work for rotation.  */
2227           if (methods == OPTAB_WIDEN)
2228             continue;
2229           else if (methods == OPTAB_LIB_WIDEN)
2230             {
2231               /* If we have been unable to open-code this by a rotation,
2232                  do it as the IOR of two shifts.  I.e., to rotate A
2233                  by N bits, compute (A << N) | ((unsigned) A >> (C - N))
2234                  where C is the bitsize of A.
2235
2236                  It is theoretically possible that the target machine might
2237                  not be able to perform either shift and hence we would
2238                  be making two libcalls rather than just the one for the
2239                  shift (similarly if IOR could not be done).  We will allow
2240                  this extremely unlikely lossage to avoid complicating the
2241                  code below.  */
2242
2243               rtx subtarget = target == shifted ? 0 : target;
2244               tree new_amount, other_amount;
2245               rtx temp1;
2246               tree type = TREE_TYPE (amount);
2247               if (GET_MODE (op1) != TYPE_MODE (type)
2248                   && GET_MODE (op1) != VOIDmode)
2249                 op1 = convert_to_mode (TYPE_MODE (type), op1, 1);
2250               new_amount = make_tree (type, op1);
2251               other_amount
2252                 = fold_build2 (MINUS_EXPR, type,
2253                                build_int_cst (type, GET_MODE_BITSIZE (mode)),
2254                                new_amount);
2255
2256               shifted = force_reg (mode, shifted);
2257
2258               temp = expand_shift (left ? LSHIFT_EXPR : RSHIFT_EXPR,
2259                                    mode, shifted, new_amount, 0, 1);
2260               temp1 = expand_shift (left ? RSHIFT_EXPR : LSHIFT_EXPR,
2261                                     mode, shifted, other_amount, subtarget, 1);
2262               return expand_binop (mode, ior_optab, temp, temp1, target,
2263                                    unsignedp, methods);
2264             }
2265
2266           temp = expand_binop (mode,
2267                                left ? lrotate_optab : rrotate_optab,
2268                                shifted, op1, target, unsignedp, methods);
2269         }
2270       else if (unsignedp)
2271         temp = expand_binop (mode,
2272                              left ? lshift_optab : rshift_uns_optab,
2273                              shifted, op1, target, unsignedp, methods);
2274
2275       /* Do arithmetic shifts.
2276          Also, if we are going to widen the operand, we can just as well
2277          use an arithmetic right-shift instead of a logical one.  */
2278       if (temp == 0 && ! rotate
2279           && (! unsignedp || (! left && methods == OPTAB_WIDEN)))
2280         {
2281           enum optab_methods methods1 = methods;
2282
2283           /* If trying to widen a log shift to an arithmetic shift,
2284              don't accept an arithmetic shift of the same size.  */
2285           if (unsignedp)
2286             methods1 = OPTAB_MUST_WIDEN;
2287
2288           /* Arithmetic shift */
2289
2290           temp = expand_binop (mode,
2291                                left ? lshift_optab : rshift_arith_optab,
2292                                shifted, op1, target, unsignedp, methods1);
2293         }
2294
2295       /* We used to try extzv here for logical right shifts, but that was
2296          only useful for one machine, the VAX, and caused poor code
2297          generation there for lshrdi3, so the code was deleted and a
2298          define_expand for lshrsi3 was added to vax.md.  */
2299     }
2300
2301   gcc_assert (temp);
2302   return temp;
2303 }
2304 \f
2305 enum alg_code {
2306   alg_unknown,
2307   alg_zero,
2308   alg_m, alg_shift,
2309   alg_add_t_m2,
2310   alg_sub_t_m2,
2311   alg_add_factor,
2312   alg_sub_factor,
2313   alg_add_t2_m,
2314   alg_sub_t2_m,
2315   alg_impossible
2316 };
2317
2318 /* This structure holds the "cost" of a multiply sequence.  The
2319    "cost" field holds the total rtx_cost of every operator in the
2320    synthetic multiplication sequence, hence cost(a op b) is defined
2321    as rtx_cost(op) + cost(a) + cost(b), where cost(leaf) is zero.
2322    The "latency" field holds the minimum possible latency of the
2323    synthetic multiply, on a hypothetical infinitely parallel CPU.
2324    This is the critical path, or the maximum height, of the expression
2325    tree which is the sum of rtx_costs on the most expensive path from
2326    any leaf to the root.  Hence latency(a op b) is defined as zero for
2327    leaves and rtx_cost(op) + max(latency(a), latency(b)) otherwise.  */
2328
2329 struct mult_cost {
2330   short cost;     /* Total rtx_cost of the multiplication sequence.  */
2331   short latency;  /* The latency of the multiplication sequence.  */
2332 };
2333
2334 /* This macro is used to compare a pointer to a mult_cost against an
2335    single integer "rtx_cost" value.  This is equivalent to the macro
2336    CHEAPER_MULT_COST(X,Z) where Z = {Y,Y}.  */
2337 #define MULT_COST_LESS(X,Y) ((X)->cost < (Y)    \
2338                              || ((X)->cost == (Y) && (X)->latency < (Y)))
2339
2340 /* This macro is used to compare two pointers to mult_costs against
2341    each other.  The macro returns true if X is cheaper than Y.
2342    Currently, the cheaper of two mult_costs is the one with the
2343    lower "cost".  If "cost"s are tied, the lower latency is cheaper.  */
2344 #define CHEAPER_MULT_COST(X,Y)  ((X)->cost < (Y)->cost          \
2345                                  || ((X)->cost == (Y)->cost     \
2346                                      && (X)->latency < (Y)->latency))
2347
2348 /* This structure records a sequence of operations.
2349    `ops' is the number of operations recorded.
2350    `cost' is their total cost.
2351    The operations are stored in `op' and the corresponding
2352    logarithms of the integer coefficients in `log'.
2353
2354    These are the operations:
2355    alg_zero             total := 0;
2356    alg_m                total := multiplicand;
2357    alg_shift            total := total * coeff
2358    alg_add_t_m2         total := total + multiplicand * coeff;
2359    alg_sub_t_m2         total := total - multiplicand * coeff;
2360    alg_add_factor       total := total * coeff + total;
2361    alg_sub_factor       total := total * coeff - total;
2362    alg_add_t2_m         total := total * coeff + multiplicand;
2363    alg_sub_t2_m         total := total * coeff - multiplicand;
2364
2365    The first operand must be either alg_zero or alg_m.  */
2366
2367 struct algorithm
2368 {
2369   struct mult_cost cost;
2370   short ops;
2371   /* The size of the OP and LOG fields are not directly related to the
2372      word size, but the worst-case algorithms will be if we have few
2373      consecutive ones or zeros, i.e., a multiplicand like 10101010101...
2374      In that case we will generate shift-by-2, add, shift-by-2, add,...,
2375      in total wordsize operations.  */
2376   enum alg_code op[MAX_BITS_PER_WORD];
2377   char log[MAX_BITS_PER_WORD];
2378 };
2379
2380 /* The entry for our multiplication cache/hash table.  */
2381 struct alg_hash_entry {
2382   /* The number we are multiplying by.  */
2383   unsigned HOST_WIDE_INT t;
2384
2385   /* The mode in which we are multiplying something by T.  */
2386   enum machine_mode mode;
2387
2388   /* The best multiplication algorithm for t.  */
2389   enum alg_code alg;
2390
2391   /* The cost of multiplication if ALG_CODE is not alg_impossible.
2392      Otherwise, the cost within which multiplication by T is
2393      impossible.  */
2394   struct mult_cost cost;
2395
2396   /* OPtimized for speed? */
2397   bool speed;
2398 };
2399
2400 /* The number of cache/hash entries.  */
2401 #if HOST_BITS_PER_WIDE_INT == 64
2402 #define NUM_ALG_HASH_ENTRIES 1031
2403 #else
2404 #define NUM_ALG_HASH_ENTRIES 307
2405 #endif
2406
2407 /* Each entry of ALG_HASH caches alg_code for some integer.  This is
2408    actually a hash table.  If we have a collision, that the older
2409    entry is kicked out.  */
2410 static struct alg_hash_entry alg_hash[NUM_ALG_HASH_ENTRIES];
2411
2412 /* Indicates the type of fixup needed after a constant multiplication.
2413    BASIC_VARIANT means no fixup is needed, NEGATE_VARIANT means that
2414    the result should be negated, and ADD_VARIANT means that the
2415    multiplicand should be added to the result.  */
2416 enum mult_variant {basic_variant, negate_variant, add_variant};
2417
2418 static void synth_mult (struct algorithm *, unsigned HOST_WIDE_INT,
2419                         const struct mult_cost *, enum machine_mode mode);
2420 static bool choose_mult_variant (enum machine_mode, HOST_WIDE_INT,
2421                                  struct algorithm *, enum mult_variant *, int);
2422 static rtx expand_mult_const (enum machine_mode, rtx, HOST_WIDE_INT, rtx,
2423                               const struct algorithm *, enum mult_variant);
2424 static unsigned HOST_WIDE_INT choose_multiplier (unsigned HOST_WIDE_INT, int,
2425                                                  int, rtx *, int *, int *);
2426 static unsigned HOST_WIDE_INT invert_mod2n (unsigned HOST_WIDE_INT, int);
2427 static rtx extract_high_half (enum machine_mode, rtx);
2428 static rtx expand_mult_highpart (enum machine_mode, rtx, rtx, rtx, int, int);
2429 static rtx expand_mult_highpart_optab (enum machine_mode, rtx, rtx, rtx,
2430                                        int, int);
2431 /* Compute and return the best algorithm for multiplying by T.
2432    The algorithm must cost less than cost_limit
2433    If retval.cost >= COST_LIMIT, no algorithm was found and all
2434    other field of the returned struct are undefined.
2435    MODE is the machine mode of the multiplication.  */
2436
2437 static void
2438 synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
2439             const struct mult_cost *cost_limit, enum machine_mode mode)
2440 {
2441   int m;
2442   struct algorithm *alg_in, *best_alg;
2443   struct mult_cost best_cost;
2444   struct mult_cost new_limit;
2445   int op_cost, op_latency;
2446   unsigned HOST_WIDE_INT orig_t = t;
2447   unsigned HOST_WIDE_INT q;
2448   int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode));
2449   int hash_index;
2450   bool cache_hit = false;
2451   enum alg_code cache_alg = alg_zero;
2452   bool speed = optimize_insn_for_speed_p ();
2453
2454   /* Indicate that no algorithm is yet found.  If no algorithm
2455      is found, this value will be returned and indicate failure.  */
2456   alg_out->cost.cost = cost_limit->cost + 1;
2457   alg_out->cost.latency = cost_limit->latency + 1;
2458
2459   if (cost_limit->cost < 0
2460       || (cost_limit->cost == 0 && cost_limit->latency <= 0))
2461     return;
2462
2463   /* Restrict the bits of "t" to the multiplication's mode.  */
2464   t &= GET_MODE_MASK (mode);
2465
2466   /* t == 1 can be done in zero cost.  */
2467   if (t == 1)
2468     {
2469       alg_out->ops = 1;
2470       alg_out->cost.cost = 0;
2471       alg_out->cost.latency = 0;
2472       alg_out->op[0] = alg_m;
2473       return;
2474     }
2475
2476   /* t == 0 sometimes has a cost.  If it does and it exceeds our limit,
2477      fail now.  */
2478   if (t == 0)
2479     {
2480       if (MULT_COST_LESS (cost_limit, zero_cost[speed]))
2481         return;
2482       else
2483         {
2484           alg_out->ops = 1;
2485           alg_out->cost.cost = zero_cost[speed];
2486           alg_out->cost.latency = zero_cost[speed];
2487           alg_out->op[0] = alg_zero;
2488           return;
2489         }
2490     }
2491
2492   /* We'll be needing a couple extra algorithm structures now.  */
2493
2494   alg_in = XALLOCA (struct algorithm);
2495   best_alg = XALLOCA (struct algorithm);
2496   best_cost = *cost_limit;
2497
2498   /* Compute the hash index.  */
2499   hash_index = (t ^ (unsigned int) mode ^ (speed * 256)) % NUM_ALG_HASH_ENTRIES;
2500
2501   /* See if we already know what to do for T.  */
2502   if (alg_hash[hash_index].t == t
2503       && alg_hash[hash_index].mode == mode
2504       && alg_hash[hash_index].mode == mode
2505       && alg_hash[hash_index].speed == speed
2506       && alg_hash[hash_index].alg != alg_unknown)
2507     {
2508       cache_alg = alg_hash[hash_index].alg;
2509
2510       if (cache_alg == alg_impossible)
2511         {
2512           /* The cache tells us that it's impossible to synthesize
2513              multiplication by T within alg_hash[hash_index].cost.  */
2514           if (!CHEAPER_MULT_COST (&alg_hash[hash_index].cost, cost_limit))
2515             /* COST_LIMIT is at least as restrictive as the one
2516                recorded in the hash table, in which case we have no
2517                hope of synthesizing a multiplication.  Just
2518                return.  */
2519             return;
2520
2521           /* If we get here, COST_LIMIT is less restrictive than the
2522              one recorded in the hash table, so we may be able to
2523              synthesize a multiplication.  Proceed as if we didn't
2524              have the cache entry.  */
2525         }
2526       else
2527         {
2528           if (CHEAPER_MULT_COST (cost_limit, &alg_hash[hash_index].cost))
2529             /* The cached algorithm shows that this multiplication
2530                requires more cost than COST_LIMIT.  Just return.  This
2531                way, we don't clobber this cache entry with
2532                alg_impossible but retain useful information.  */
2533             return;
2534
2535           cache_hit = true;
2536
2537           switch (cache_alg)
2538             {
2539             case alg_shift:
2540               goto do_alg_shift;
2541
2542             case alg_add_t_m2:
2543             case alg_sub_t_m2:
2544               goto do_alg_addsub_t_m2;
2545
2546             case alg_add_factor:
2547             case alg_sub_factor:
2548               goto do_alg_addsub_factor;
2549
2550             case alg_add_t2_m:
2551               goto do_alg_add_t2_m;
2552
2553             case alg_sub_t2_m:
2554               goto do_alg_sub_t2_m;
2555
2556             default:
2557               gcc_unreachable ();
2558             }
2559         }
2560     }
2561
2562   /* If we have a group of zero bits at the low-order part of T, try
2563      multiplying by the remaining bits and then doing a shift.  */
2564
2565   if ((t & 1) == 0)
2566     {
2567     do_alg_shift:
2568       m = floor_log2 (t & -t);  /* m = number of low zero bits */
2569       if (m < maxm)
2570         {
2571           q = t >> m;
2572           /* The function expand_shift will choose between a shift and
2573              a sequence of additions, so the observed cost is given as
2574              MIN (m * add_cost[speed][mode], shift_cost[speed][mode][m]).  */
2575           op_cost = m * add_cost[speed][mode];
2576           if (shift_cost[speed][mode][m] < op_cost)
2577             op_cost = shift_cost[speed][mode][m];
2578           new_limit.cost = best_cost.cost - op_cost;
2579           new_limit.latency = best_cost.latency - op_cost;
2580           synth_mult (alg_in, q, &new_limit, mode);
2581
2582           alg_in->cost.cost += op_cost;
2583           alg_in->cost.latency += op_cost;
2584           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2585             {
2586               struct algorithm *x;
2587               best_cost = alg_in->cost;
2588               x = alg_in, alg_in = best_alg, best_alg = x;
2589               best_alg->log[best_alg->ops] = m;
2590               best_alg->op[best_alg->ops] = alg_shift;
2591             }
2592
2593           /* See if treating ORIG_T as a signed number yields a better
2594              sequence.  Try this sequence only for a negative ORIG_T
2595              as it would be useless for a non-negative ORIG_T.  */
2596           if ((HOST_WIDE_INT) orig_t < 0)
2597             {
2598               /* Shift ORIG_T as follows because a right shift of a
2599                  negative-valued signed type is implementation
2600                  defined.  */
2601               q = ~(~orig_t >> m);
2602               /* The function expand_shift will choose between a shift
2603                  and a sequence of additions, so the observed cost is
2604                  given as MIN (m * add_cost[speed][mode],
2605                  shift_cost[speed][mode][m]).  */
2606               op_cost = m * add_cost[speed][mode];
2607               if (shift_cost[speed][mode][m] < op_cost)
2608                 op_cost = shift_cost[speed][mode][m];
2609               new_limit.cost = best_cost.cost - op_cost;
2610               new_limit.latency = best_cost.latency - op_cost;
2611               synth_mult (alg_in, q, &new_limit, mode);
2612
2613               alg_in->cost.cost += op_cost;
2614               alg_in->cost.latency += op_cost;
2615               if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2616                 {
2617                   struct algorithm *x;
2618                   best_cost = alg_in->cost;
2619                   x = alg_in, alg_in = best_alg, best_alg = x;
2620                   best_alg->log[best_alg->ops] = m;
2621                   best_alg->op[best_alg->ops] = alg_shift;
2622                 }
2623             }
2624         }
2625       if (cache_hit)
2626         goto done;
2627     }
2628
2629   /* If we have an odd number, add or subtract one.  */
2630   if ((t & 1) != 0)
2631     {
2632       unsigned HOST_WIDE_INT w;
2633
2634     do_alg_addsub_t_m2:
2635       for (w = 1; (w & t) != 0; w <<= 1)
2636         ;
2637       /* If T was -1, then W will be zero after the loop.  This is another
2638          case where T ends with ...111.  Handling this with (T + 1) and
2639          subtract 1 produces slightly better code and results in algorithm
2640          selection much faster than treating it like the ...0111 case
2641          below.  */
2642       if (w == 0
2643           || (w > 2
2644               /* Reject the case where t is 3.
2645                  Thus we prefer addition in that case.  */
2646               && t != 3))
2647         {
2648           /* T ends with ...111.  Multiply by (T + 1) and subtract 1.  */
2649
2650           op_cost = add_cost[speed][mode];
2651           new_limit.cost = best_cost.cost - op_cost;
2652           new_limit.latency = best_cost.latency - op_cost;
2653           synth_mult (alg_in, t + 1, &new_limit, mode);
2654
2655           alg_in->cost.cost += op_cost;
2656           alg_in->cost.latency += op_cost;
2657           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2658             {
2659               struct algorithm *x;
2660               best_cost = alg_in->cost;
2661               x = alg_in, alg_in = best_alg, best_alg = x;
2662               best_alg->log[best_alg->ops] = 0;
2663               best_alg->op[best_alg->ops] = alg_sub_t_m2;
2664             }
2665         }
2666       else
2667         {
2668           /* T ends with ...01 or ...011.  Multiply by (T - 1) and add 1.  */
2669
2670           op_cost = add_cost[speed][mode];
2671           new_limit.cost = best_cost.cost - op_cost;
2672           new_limit.latency = best_cost.latency - op_cost;
2673           synth_mult (alg_in, t - 1, &new_limit, mode);
2674
2675           alg_in->cost.cost += op_cost;
2676           alg_in->cost.latency += op_cost;
2677           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2678             {
2679               struct algorithm *x;
2680               best_cost = alg_in->cost;
2681               x = alg_in, alg_in = best_alg, best_alg = x;
2682               best_alg->log[best_alg->ops] = 0;
2683               best_alg->op[best_alg->ops] = alg_add_t_m2;
2684             }
2685         }
2686
2687       /* We may be able to calculate a * -7, a * -15, a * -31, etc
2688          quickly with a - a * n for some appropriate constant n.  */
2689       m = exact_log2 (-orig_t + 1);
2690       if (m >= 0 && m < maxm)
2691         {
2692           op_cost = shiftsub1_cost[speed][mode][m];
2693           new_limit.cost = best_cost.cost - op_cost;
2694           new_limit.latency = best_cost.latency - op_cost;
2695           synth_mult (alg_in, (unsigned HOST_WIDE_INT) (-orig_t + 1) >> m, &new_limit, mode);
2696
2697           alg_in->cost.cost += op_cost;
2698           alg_in->cost.latency += op_cost;
2699           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2700             {
2701               struct algorithm *x;
2702               best_cost = alg_in->cost;
2703               x = alg_in, alg_in = best_alg, best_alg = x;
2704               best_alg->log[best_alg->ops] = m;
2705               best_alg->op[best_alg->ops] = alg_sub_t_m2;
2706             }
2707         }
2708
2709       if (cache_hit)
2710         goto done;
2711     }
2712
2713   /* Look for factors of t of the form
2714      t = q(2**m +- 1), 2 <= m <= floor(log2(t - 1)).
2715      If we find such a factor, we can multiply by t using an algorithm that
2716      multiplies by q, shift the result by m and add/subtract it to itself.
2717
2718      We search for large factors first and loop down, even if large factors
2719      are less probable than small; if we find a large factor we will find a
2720      good sequence quickly, and therefore be able to prune (by decreasing
2721      COST_LIMIT) the search.  */
2722
2723  do_alg_addsub_factor:
2724   for (m = floor_log2 (t - 1); m >= 2; m--)
2725     {
2726       unsigned HOST_WIDE_INT d;
2727
2728       d = ((unsigned HOST_WIDE_INT) 1 << m) + 1;
2729       if (t % d == 0 && t > d && m < maxm
2730           && (!cache_hit || cache_alg == alg_add_factor))
2731         {
2732           /* If the target has a cheap shift-and-add instruction use
2733              that in preference to a shift insn followed by an add insn.
2734              Assume that the shift-and-add is "atomic" with a latency
2735              equal to its cost, otherwise assume that on superscalar
2736              hardware the shift may be executed concurrently with the
2737              earlier steps in the algorithm.  */
2738           op_cost = add_cost[speed][mode] + shift_cost[speed][mode][m];
2739           if (shiftadd_cost[speed][mode][m] < op_cost)
2740             {
2741               op_cost = shiftadd_cost[speed][mode][m];
2742               op_latency = op_cost;
2743             }
2744           else
2745             op_latency = add_cost[speed][mode];
2746
2747           new_limit.cost = best_cost.cost - op_cost;
2748           new_limit.latency = best_cost.latency - op_latency;
2749           synth_mult (alg_in, t / d, &new_limit, mode);
2750
2751           alg_in->cost.cost += op_cost;
2752           alg_in->cost.latency += op_latency;
2753           if (alg_in->cost.latency < op_cost)
2754             alg_in->cost.latency = op_cost;
2755           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2756             {
2757               struct algorithm *x;
2758               best_cost = alg_in->cost;
2759               x = alg_in, alg_in = best_alg, best_alg = x;
2760               best_alg->log[best_alg->ops] = m;
2761               best_alg->op[best_alg->ops] = alg_add_factor;
2762             }
2763           /* Other factors will have been taken care of in the recursion.  */
2764           break;
2765         }
2766
2767       d = ((unsigned HOST_WIDE_INT) 1 << m) - 1;
2768       if (t % d == 0 && t > d && m < maxm
2769           && (!cache_hit || cache_alg == alg_sub_factor))
2770         {
2771           /* If the target has a cheap shift-and-subtract insn use
2772              that in preference to a shift insn followed by a sub insn.
2773              Assume that the shift-and-sub is "atomic" with a latency
2774              equal to it's cost, otherwise assume that on superscalar
2775              hardware the shift may be executed concurrently with the
2776              earlier steps in the algorithm.  */
2777           op_cost = add_cost[speed][mode] + shift_cost[speed][mode][m];
2778           if (shiftsub0_cost[speed][mode][m] < op_cost)
2779             {
2780               op_cost = shiftsub0_cost[speed][mode][m];
2781               op_latency = op_cost;
2782             }
2783           else
2784             op_latency = add_cost[speed][mode];
2785
2786           new_limit.cost = best_cost.cost - op_cost;
2787           new_limit.latency = best_cost.latency - op_latency;
2788           synth_mult (alg_in, t / d, &new_limit, mode);
2789
2790           alg_in->cost.cost += op_cost;
2791           alg_in->cost.latency += op_latency;
2792           if (alg_in->cost.latency < op_cost)
2793             alg_in->cost.latency = op_cost;
2794           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2795             {
2796               struct algorithm *x;
2797               best_cost = alg_in->cost;
2798               x = alg_in, alg_in = best_alg, best_alg = x;
2799               best_alg->log[best_alg->ops] = m;
2800               best_alg->op[best_alg->ops] = alg_sub_factor;
2801             }
2802           break;
2803         }
2804     }
2805   if (cache_hit)
2806     goto done;
2807
2808   /* Try shift-and-add (load effective address) instructions,
2809      i.e. do a*3, a*5, a*9.  */
2810   if ((t & 1) != 0)
2811     {
2812     do_alg_add_t2_m:
2813       q = t - 1;
2814       q = q & -q;
2815       m = exact_log2 (q);
2816       if (m >= 0 && m < maxm)
2817         {
2818           op_cost = shiftadd_cost[speed][mode][m];
2819           new_limit.cost = best_cost.cost - op_cost;
2820           new_limit.latency = best_cost.latency - op_cost;
2821           synth_mult (alg_in, (t - 1) >> m, &new_limit, mode);
2822
2823           alg_in->cost.cost += op_cost;
2824           alg_in->cost.latency += op_cost;
2825           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2826             {
2827               struct algorithm *x;
2828               best_cost = alg_in->cost;
2829               x = alg_in, alg_in = best_alg, best_alg = x;
2830               best_alg->log[best_alg->ops] = m;
2831               best_alg->op[best_alg->ops] = alg_add_t2_m;
2832             }
2833         }
2834       if (cache_hit)
2835         goto done;
2836
2837     do_alg_sub_t2_m:
2838       q = t + 1;
2839       q = q & -q;
2840       m = exact_log2 (q);
2841       if (m >= 0 && m < maxm)
2842         {
2843           op_cost = shiftsub0_cost[speed][mode][m];
2844           new_limit.cost = best_cost.cost - op_cost;
2845           new_limit.latency = best_cost.latency - op_cost;
2846           synth_mult (alg_in, (t + 1) >> m, &new_limit, mode);
2847
2848           alg_in->cost.cost += op_cost;
2849           alg_in->cost.latency += op_cost;
2850           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2851             {
2852               struct algorithm *x;
2853               best_cost = alg_in->cost;
2854               x = alg_in, alg_in = best_alg, best_alg = x;
2855               best_alg->log[best_alg->ops] = m;
2856               best_alg->op[best_alg->ops] = alg_sub_t2_m;
2857             }
2858         }
2859       if (cache_hit)
2860         goto done;
2861     }
2862
2863  done:
2864   /* If best_cost has not decreased, we have not found any algorithm.  */
2865   if (!CHEAPER_MULT_COST (&best_cost, cost_limit))
2866     {
2867       /* We failed to find an algorithm.  Record alg_impossible for
2868          this case (that is, <T, MODE, COST_LIMIT>) so that next time
2869          we are asked to find an algorithm for T within the same or
2870          lower COST_LIMIT, we can immediately return to the
2871          caller.  */
2872       alg_hash[hash_index].t = t;
2873       alg_hash[hash_index].mode = mode;
2874       alg_hash[hash_index].speed = speed;
2875       alg_hash[hash_index].alg = alg_impossible;
2876       alg_hash[hash_index].cost = *cost_limit;
2877       return;
2878     }
2879
2880   /* Cache the result.  */
2881   if (!cache_hit)
2882     {
2883       alg_hash[hash_index].t = t;
2884       alg_hash[hash_index].mode = mode;
2885       alg_hash[hash_index].speed = speed;
2886       alg_hash[hash_index].alg = best_alg->op[best_alg->ops];
2887       alg_hash[hash_index].cost.cost = best_cost.cost;
2888       alg_hash[hash_index].cost.latency = best_cost.latency;
2889     }
2890
2891   /* If we are getting a too long sequence for `struct algorithm'
2892      to record, make this search fail.  */
2893   if (best_alg->ops == MAX_BITS_PER_WORD)
2894     return;
2895
2896   /* Copy the algorithm from temporary space to the space at alg_out.
2897      We avoid using structure assignment because the majority of
2898      best_alg is normally undefined, and this is a critical function.  */
2899   alg_out->ops = best_alg->ops + 1;
2900   alg_out->cost = best_cost;
2901   memcpy (alg_out->op, best_alg->op,
2902           alg_out->ops * sizeof *alg_out->op);
2903   memcpy (alg_out->log, best_alg->log,
2904           alg_out->ops * sizeof *alg_out->log);
2905 }
2906 \f
2907 /* Find the cheapest way of multiplying a value of mode MODE by VAL.
2908    Try three variations:
2909
2910        - a shift/add sequence based on VAL itself
2911        - a shift/add sequence based on -VAL, followed by a negation
2912        - a shift/add sequence based on VAL - 1, followed by an addition.
2913
2914    Return true if the cheapest of these cost less than MULT_COST,
2915    describing the algorithm in *ALG and final fixup in *VARIANT.  */
2916
2917 static bool
2918 choose_mult_variant (enum machine_mode mode, HOST_WIDE_INT val,
2919                      struct algorithm *alg, enum mult_variant *variant,
2920                      int mult_cost)
2921 {
2922   struct algorithm alg2;
2923   struct mult_cost limit;
2924   int op_cost;
2925   bool speed = optimize_insn_for_speed_p ();
2926
2927   /* Fail quickly for impossible bounds.  */
2928   if (mult_cost < 0)
2929     return false;
2930
2931   /* Ensure that mult_cost provides a reasonable upper bound.
2932      Any constant multiplication can be performed with less
2933      than 2 * bits additions.  */
2934   op_cost = 2 * GET_MODE_BITSIZE (mode) * add_cost[speed][mode];
2935   if (mult_cost > op_cost)
2936     mult_cost = op_cost;
2937
2938   *variant = basic_variant;
2939   limit.cost = mult_cost;
2940   limit.latency = mult_cost;
2941   synth_mult (alg, val, &limit, mode);
2942
2943   /* This works only if the inverted value actually fits in an
2944      `unsigned int' */
2945   if (HOST_BITS_PER_INT >= GET_MODE_BITSIZE (mode))
2946     {
2947       op_cost = neg_cost[speed][mode];
2948       if (MULT_COST_LESS (&alg->cost, mult_cost))
2949         {
2950           limit.cost = alg->cost.cost - op_cost;
2951           limit.latency = alg->cost.latency - op_cost;
2952         }
2953       else
2954         {
2955           limit.cost = mult_cost - op_cost;
2956           limit.latency = mult_cost - op_cost;
2957         }
2958
2959       synth_mult (&alg2, -val, &limit, mode);
2960       alg2.cost.cost += op_cost;
2961       alg2.cost.latency += op_cost;
2962       if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
2963         *alg = alg2, *variant = negate_variant;
2964     }
2965
2966   /* This proves very useful for division-by-constant.  */
2967   op_cost = add_cost[speed][mode];
2968   if (MULT_COST_LESS (&alg->cost, mult_cost))
2969     {
2970       limit.cost = alg->cost.cost - op_cost;
2971       limit.latency = alg->cost.latency - op_cost;
2972     }
2973   else
2974     {
2975       limit.cost = mult_cost - op_cost;
2976       limit.latency = mult_cost - op_cost;
2977     }
2978
2979   synth_mult (&alg2, val - 1, &limit, mode);
2980   alg2.cost.cost += op_cost;
2981   alg2.cost.latency += op_cost;
2982   if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
2983     *alg = alg2, *variant = add_variant;
2984
2985   return MULT_COST_LESS (&alg->cost, mult_cost);
2986 }
2987
2988 /* A subroutine of expand_mult, used for constant multiplications.
2989    Multiply OP0 by VAL in mode MODE, storing the result in TARGET if
2990    convenient.  Use the shift/add sequence described by ALG and apply
2991    the final fixup specified by VARIANT.  */
2992
2993 static rtx
2994 expand_mult_const (enum machine_mode mode, rtx op0, HOST_WIDE_INT val,
2995                    rtx target, const struct algorithm *alg,
2996                    enum mult_variant variant)
2997 {
2998   HOST_WIDE_INT val_so_far;
2999   rtx insn, accum, tem;
3000   int opno;
3001   enum machine_mode nmode;
3002
3003   /* Avoid referencing memory over and over and invalid sharing
3004      on SUBREGs.  */
3005   op0 = force_reg (mode, op0);
3006
3007   /* ACCUM starts out either as OP0 or as a zero, depending on
3008      the first operation.  */
3009
3010   if (alg->op[0] == alg_zero)
3011     {
3012       accum = copy_to_mode_reg (mode, const0_rtx);
3013       val_so_far = 0;
3014     }
3015   else if (alg->op[0] == alg_m)
3016     {
3017       accum = copy_to_mode_reg (mode, op0);
3018       val_so_far = 1;
3019     }
3020   else
3021     gcc_unreachable ();
3022
3023   for (opno = 1; opno < alg->ops; opno++)
3024     {
3025       int log = alg->log[opno];
3026       rtx shift_subtarget = optimize ? 0 : accum;
3027       rtx add_target
3028         = (opno == alg->ops - 1 && target != 0 && variant != add_variant
3029            && !optimize)
3030           ? target : 0;
3031       rtx accum_target = optimize ? 0 : accum;
3032
3033       switch (alg->op[opno])
3034         {
3035         case alg_shift:
3036           accum = expand_shift (LSHIFT_EXPR, mode, accum,
3037                                 build_int_cst (NULL_TREE, log),
3038                                 NULL_RTX, 0);
3039           val_so_far <<= log;
3040           break;
3041
3042         case alg_add_t_m2:
3043           tem = expand_shift (LSHIFT_EXPR, mode, op0,
3044                               build_int_cst (NULL_TREE, log),
3045                               NULL_RTX, 0);
3046           accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
3047                                  add_target ? add_target : accum_target);
3048           val_so_far += (HOST_WIDE_INT) 1 << log;
3049           break;
3050
3051         case alg_sub_t_m2:
3052           tem = expand_shift (LSHIFT_EXPR, mode, op0,
3053                               build_int_cst (NULL_TREE, log),
3054                               NULL_RTX, 0);
3055           accum = force_operand (gen_rtx_MINUS (mode, accum, tem),
3056                                  add_target ? add_target : accum_target);
3057           val_so_far -= (HOST_WIDE_INT) 1 << log;
3058           break;
3059
3060         case alg_add_t2_m:
3061           accum = expand_shift (LSHIFT_EXPR, mode, accum,
3062                                 build_int_cst (NULL_TREE, log),
3063                                 shift_subtarget,
3064                                 0);
3065           accum = force_operand (gen_rtx_PLUS (mode, accum, op0),
3066                                  add_target ? add_target : accum_target);
3067           val_so_far = (val_so_far << log) + 1;
3068           break;
3069
3070         case alg_sub_t2_m:
3071           accum = expand_shift (LSHIFT_EXPR, mode, accum,
3072                                 build_int_cst (NULL_TREE, log),
3073                                 shift_subtarget, 0);
3074           accum = force_operand (gen_rtx_MINUS (mode, accum, op0),
3075                                  add_target ? add_target : accum_target);
3076           val_so_far = (val_so_far << log) - 1;
3077           break;
3078
3079         case alg_add_factor:
3080           tem = expand_shift (LSHIFT_EXPR, mode, accum,
3081                               build_int_cst (NULL_TREE, log),
3082                               NULL_RTX, 0);
3083           accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
3084                                  add_target ? add_target : accum_target);
3085           val_so_far += val_so_far << log;
3086           break;
3087
3088         case alg_sub_factor:
3089           tem = expand_shift (LSHIFT_EXPR, mode, accum,
3090                               build_int_cst (NULL_TREE, log),
3091                               NULL_RTX, 0);
3092           accum = force_operand (gen_rtx_MINUS (mode, tem, accum),
3093                                  (add_target
3094                                   ? add_target : (optimize ? 0 : tem)));
3095           val_so_far = (val_so_far << log) - val_so_far;
3096           break;
3097
3098         default:
3099           gcc_unreachable ();
3100         }
3101
3102       /* Write a REG_EQUAL note on the last insn so that we can cse
3103          multiplication sequences.  Note that if ACCUM is a SUBREG,
3104          we've set the inner register and must properly indicate
3105          that.  */
3106
3107       tem = op0, nmode = mode;
3108       if (GET_CODE (accum) == SUBREG)
3109         {
3110           nmode = GET_MODE (SUBREG_REG (accum));
3111           tem = gen_lowpart (nmode, op0);
3112         }
3113
3114       insn = get_last_insn ();
3115       set_unique_reg_note (insn, REG_EQUAL,
3116                            gen_rtx_MULT (nmode, tem,
3117                                          GEN_INT (val_so_far)));
3118     }
3119
3120   if (variant == negate_variant)
3121     {
3122       val_so_far = -val_so_far;
3123       accum = expand_unop (mode, neg_optab, accum, target, 0);
3124     }
3125   else if (variant == add_variant)
3126     {
3127       val_so_far = val_so_far + 1;
3128       accum = force_operand (gen_rtx_PLUS (mode, accum, op0), target);
3129     }
3130
3131   /* Compare only the bits of val and val_so_far that are significant
3132      in the result mode, to avoid sign-/zero-extension confusion.  */
3133   val &= GET_MODE_MASK (mode);
3134   val_so_far &= GET_MODE_MASK (mode);
3135   gcc_assert (val == val_so_far);
3136
3137   return accum;
3138 }
3139
3140 /* Perform a multiplication and return an rtx for the result.
3141    MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3142    TARGET is a suggestion for where to store the result (an rtx).
3143
3144    We check specially for a constant integer as OP1.
3145    If you want this check for OP0 as well, then before calling
3146    you should swap the two operands if OP0 would be constant.  */
3147
3148 rtx
3149 expand_mult (enum machine_mode mode, rtx op0, rtx op1, rtx target,
3150              int unsignedp)
3151 {
3152   enum mult_variant variant;
3153   struct algorithm algorithm;
3154   int max_cost;
3155   bool speed = optimize_insn_for_speed_p ();
3156
3157   /* Handling const0_rtx here allows us to use zero as a rogue value for
3158      coeff below.  */
3159   if (op1 == const0_rtx)
3160     return const0_rtx;
3161   if (op1 == const1_rtx)
3162     return op0;
3163   if (op1 == constm1_rtx)
3164     return expand_unop (mode,
3165                         GET_MODE_CLASS (mode) == MODE_INT
3166                         && !unsignedp && flag_trapv
3167                         ? negv_optab : neg_optab,
3168                         op0, target, 0);
3169
3170   /* These are the operations that are potentially turned into a sequence
3171      of shifts and additions.  */
3172   if (SCALAR_INT_MODE_P (mode)
3173       && (unsignedp || !flag_trapv))
3174     {
3175       HOST_WIDE_INT coeff = 0;
3176       rtx fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3177
3178       /* synth_mult does an `unsigned int' multiply.  As long as the mode is
3179          less than or equal in size to `unsigned int' this doesn't matter.
3180          If the mode is larger than `unsigned int', then synth_mult works
3181          only if the constant value exactly fits in an `unsigned int' without
3182          any truncation.  This means that multiplying by negative values does
3183          not work; results are off by 2^32 on a 32 bit machine.  */
3184
3185       if (CONST_INT_P (op1))
3186         {
3187           /* Attempt to handle multiplication of DImode values by negative
3188              coefficients, by performing the multiplication by a positive
3189              multiplier and then inverting the result.  */
3190           if (INTVAL (op1) < 0
3191               && GET_MODE_BITSIZE (mode) > HOST_BITS_PER_WIDE_INT)
3192             {
3193               /* Its safe to use -INTVAL (op1) even for INT_MIN, as the
3194                  result is interpreted as an unsigned coefficient.
3195                  Exclude cost of op0 from max_cost to match the cost
3196                  calculation of the synth_mult.  */
3197               max_cost = rtx_cost (gen_rtx_MULT (mode, fake_reg, op1), SET, speed)
3198                          - neg_cost[speed][mode];
3199               if (max_cost > 0
3200                   && choose_mult_variant (mode, -INTVAL (op1), &algorithm,
3201                                           &variant, max_cost))
3202                 {
3203                   rtx temp = expand_mult_const (mode, op0, -INTVAL (op1),
3204                                                 NULL_RTX, &algorithm,
3205                                                 variant);
3206                   return expand_unop (mode, neg_optab, temp, target, 0);
3207                 }
3208             }
3209           else coeff = INTVAL (op1);
3210         }
3211       else if (GET_CODE (op1) == CONST_DOUBLE)
3212         {
3213           /* If we are multiplying in DImode, it may still be a win
3214              to try to work with shifts and adds.  */
3215           if (CONST_DOUBLE_HIGH (op1) == 0
3216               && CONST_DOUBLE_LOW (op1) > 0)
3217             coeff = CONST_DOUBLE_LOW (op1);
3218           else if (CONST_DOUBLE_LOW (op1) == 0
3219                    && EXACT_POWER_OF_2_OR_ZERO_P (CONST_DOUBLE_HIGH (op1)))
3220             {
3221               int shift = floor_log2 (CONST_DOUBLE_HIGH (op1))
3222                           + HOST_BITS_PER_WIDE_INT;
3223               return expand_shift (LSHIFT_EXPR, mode, op0,
3224                                    build_int_cst (NULL_TREE, shift),
3225                                    target, unsignedp);
3226             }
3227         }
3228
3229       /* We used to test optimize here, on the grounds that it's better to
3230          produce a smaller program when -O is not used.  But this causes
3231          such a terrible slowdown sometimes that it seems better to always
3232          use synth_mult.  */
3233       if (coeff != 0)
3234         {
3235           /* Special case powers of two.  */
3236           if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3237             return expand_shift (LSHIFT_EXPR, mode, op0,
3238                                  build_int_cst (NULL_TREE, floor_log2 (coeff)),
3239                                  target, unsignedp);
3240
3241           /* Exclude cost of op0 from max_cost to match the cost
3242              calculation of the synth_mult.  */
3243           max_cost = rtx_cost (gen_rtx_MULT (mode, fake_reg, op1), SET, speed);
3244           if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3245                                    max_cost))
3246             return expand_mult_const (mode, op0, coeff, target,
3247                                       &algorithm, variant);
3248         }
3249     }
3250
3251   if (GET_CODE (op0) == CONST_DOUBLE)
3252     {
3253       rtx temp = op0;
3254       op0 = op1;
3255       op1 = temp;
3256     }
3257
3258   /* Expand x*2.0 as x+x.  */
3259   if (GET_CODE (op1) == CONST_DOUBLE
3260       && SCALAR_FLOAT_MODE_P (mode))
3261     {
3262       REAL_VALUE_TYPE d;
3263       REAL_VALUE_FROM_CONST_DOUBLE (d, op1);
3264
3265       if (REAL_VALUES_EQUAL (d, dconst2))
3266         {
3267           op0 = force_reg (GET_MODE (op0), op0);
3268           return expand_binop (mode, add_optab, op0, op0,
3269                                target, unsignedp, OPTAB_LIB_WIDEN);
3270         }
3271     }
3272
3273   /* This used to use umul_optab if unsigned, but for non-widening multiply
3274      there is no difference between signed and unsigned.  */
3275   op0 = expand_binop (mode,
3276                       ! unsignedp
3277                       && flag_trapv && (GET_MODE_CLASS(mode) == MODE_INT)
3278                       ? smulv_optab : smul_optab,
3279                       op0, op1, target, unsignedp, OPTAB_LIB_WIDEN);
3280   gcc_assert (op0);
3281   return op0;
3282 }
3283
3284 /* Perform a widening multiplication and return an rtx for the result.
3285    MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3286    TARGET is a suggestion for where to store the result (an rtx).
3287    THIS_OPTAB is the optab we should use, it must be either umul_widen_optab
3288    or smul_widen_optab.
3289
3290    We check specially for a constant integer as OP1, comparing the
3291    cost of a widening multiply against the cost of a sequence of shifts
3292    and adds.  */
3293
3294 rtx
3295 expand_widening_mult (enum machine_mode mode, rtx op0, rtx op1, rtx target,
3296                       int unsignedp, optab this_optab)
3297 {
3298   bool speed = optimize_insn_for_speed_p ();
3299
3300   if (CONST_INT_P (op1)
3301       && (INTVAL (op1) >= 0
3302           || GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT))
3303     {
3304       HOST_WIDE_INT coeff = INTVAL (op1);
3305       int max_cost;
3306       enum mult_variant variant;
3307       struct algorithm algorithm;
3308
3309       /* Special case powers of two.  */
3310       if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3311         {
3312           op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3313           return expand_shift (LSHIFT_EXPR, mode, op0,
3314                                build_int_cst (NULL_TREE, floor_log2 (coeff)),
3315                                target, unsignedp);
3316         }
3317
3318       /* Exclude cost of op0 from max_cost to match the cost
3319          calculation of the synth_mult.  */
3320       max_cost = mul_widen_cost[speed][mode];
3321       if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3322                                max_cost))
3323         {
3324           op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3325           return expand_mult_const (mode, op0, coeff, target,
3326                                     &algorithm, variant);
3327         }
3328     }
3329   return expand_binop (mode, this_optab, op0, op1, target,
3330                        unsignedp, OPTAB_LIB_WIDEN);
3331 }
3332 \f
3333 /* Return the smallest n such that 2**n >= X.  */
3334
3335 int
3336 ceil_log2 (unsigned HOST_WIDE_INT x)
3337 {
3338   return floor_log2 (x - 1) + 1;
3339 }
3340
3341 /* Choose a minimal N + 1 bit approximation to 1/D that can be used to
3342    replace division by D, and put the least significant N bits of the result
3343    in *MULTIPLIER_PTR and return the most significant bit.
3344
3345    The width of operations is N (should be <= HOST_BITS_PER_WIDE_INT), the
3346    needed precision is in PRECISION (should be <= N).
3347
3348    PRECISION should be as small as possible so this function can choose
3349    multiplier more freely.
3350
3351    The rounded-up logarithm of D is placed in *lgup_ptr.  A shift count that
3352    is to be used for a final right shift is placed in *POST_SHIFT_PTR.
3353
3354    Using this function, x/D will be equal to (x * m) >> (*POST_SHIFT_PTR),
3355    where m is the full HOST_BITS_PER_WIDE_INT + 1 bit multiplier.  */
3356
3357 static
3358 unsigned HOST_WIDE_INT
3359 choose_multiplier (unsigned HOST_WIDE_INT d, int n, int precision,
3360                    rtx *multiplier_ptr, int *post_shift_ptr, int *lgup_ptr)
3361 {
3362   HOST_WIDE_INT mhigh_hi, mlow_hi;
3363   unsigned HOST_WIDE_INT mhigh_lo, mlow_lo;
3364   int lgup, post_shift;
3365   int pow, pow2;
3366   unsigned HOST_WIDE_INT nl, dummy1;
3367   HOST_WIDE_INT nh, dummy2;
3368
3369   /* lgup = ceil(log2(divisor)); */
3370   lgup = ceil_log2 (d);
3371
3372   gcc_assert (lgup <= n);
3373
3374   pow = n + lgup;
3375   pow2 = n + lgup - precision;
3376
3377   /* We could handle this with some effort, but this case is much
3378      better handled directly with a scc insn, so rely on caller using
3379      that.  */
3380   gcc_assert (pow != 2 * HOST_BITS_PER_WIDE_INT);
3381
3382   /* mlow = 2^(N + lgup)/d */
3383  if (pow >= HOST_BITS_PER_WIDE_INT)
3384     {
3385       nh = (HOST_WIDE_INT) 1 << (pow - HOST_BITS_PER_WIDE_INT);
3386       nl = 0;
3387     }
3388   else
3389     {
3390       nh = 0;
3391       nl = (unsigned HOST_WIDE_INT) 1 << pow;
3392     }
3393   div_and_round_double (TRUNC_DIV_EXPR, 1, nl, nh, d, (HOST_WIDE_INT) 0,
3394                         &mlow_lo, &mlow_hi, &dummy1, &dummy2);
3395
3396   /* mhigh = (2^(N + lgup) + 2^N + lgup - precision)/d */
3397   if (pow2 >= HOST_BITS_PER_WIDE_INT)
3398     nh |= (HOST_WIDE_INT) 1 << (pow2 - HOST_BITS_PER_WIDE_INT);
3399   else
3400     nl |= (unsigned HOST_WIDE_INT) 1 << pow2;
3401   div_and_round_double (TRUNC_DIV_EXPR, 1, nl, nh, d, (HOST_WIDE_INT) 0,
3402                         &mhigh_lo, &mhigh_hi, &dummy1, &dummy2);
3403
3404   gcc_assert (!mhigh_hi || nh - d < d);
3405   gcc_assert (mhigh_hi <= 1 && mlow_hi <= 1);
3406   /* Assert that mlow < mhigh.  */
3407   gcc_assert (mlow_hi < mhigh_hi
3408               || (mlow_hi == mhigh_hi && mlow_lo < mhigh_lo));
3409
3410   /* If precision == N, then mlow, mhigh exceed 2^N
3411      (but they do not exceed 2^(N+1)).  */
3412
3413   /* Reduce to lowest terms.  */
3414   for (post_shift = lgup; post_shift > 0; post_shift--)
3415     {
3416       unsigned HOST_WIDE_INT ml_lo = (mlow_hi << (HOST_BITS_PER_WIDE_INT - 1)) | (mlow_lo >> 1);
3417       unsigned HOST_WIDE_INT mh_lo = (mhigh_hi << (HOST_BITS_PER_WIDE_INT - 1)) | (mhigh_lo >> 1);
3418       if (ml_lo >= mh_lo)
3419         break;
3420
3421       mlow_hi = 0;
3422       mlow_lo = ml_lo;
3423       mhigh_hi = 0;
3424       mhigh_lo = mh_lo;
3425     }
3426
3427   *post_shift_ptr = post_shift;
3428   *lgup_ptr = lgup;
3429   if (n < HOST_BITS_PER_WIDE_INT)
3430     {
3431       unsigned HOST_WIDE_INT mask = ((unsigned HOST_WIDE_INT) 1 << n) - 1;
3432       *multiplier_ptr = GEN_INT (mhigh_lo & mask);
3433       return mhigh_lo >= mask;
3434     }
3435   else
3436     {
3437       *multiplier_ptr = GEN_INT (mhigh_lo);
3438       return mhigh_hi;
3439     }
3440 }
3441
3442 /* Compute the inverse of X mod 2**n, i.e., find Y such that X * Y is
3443    congruent to 1 (mod 2**N).  */
3444
3445 static unsigned HOST_WIDE_INT
3446 invert_mod2n (unsigned HOST_WIDE_INT x, int n)
3447 {
3448   /* Solve x*y == 1 (mod 2^n), where x is odd.  Return y.  */
3449
3450   /* The algorithm notes that the choice y = x satisfies
3451      x*y == 1 mod 2^3, since x is assumed odd.
3452      Each iteration doubles the number of bits of significance in y.  */
3453
3454   unsigned HOST_WIDE_INT mask;
3455   unsigned HOST_WIDE_INT y = x;
3456   int nbit = 3;
3457
3458   mask = (n == HOST_BITS_PER_WIDE_INT
3459           ? ~(unsigned HOST_WIDE_INT) 0
3460           : ((unsigned HOST_WIDE_INT) 1 << n) - 1);
3461
3462   while (nbit < n)
3463     {
3464       y = y * (2 - x*y) & mask;         /* Modulo 2^N */
3465       nbit *= 2;
3466     }
3467   return y;
3468 }
3469
3470 /* Emit code to adjust ADJ_OPERAND after multiplication of wrong signedness
3471    flavor of OP0 and OP1.  ADJ_OPERAND is already the high half of the
3472    product OP0 x OP1.  If UNSIGNEDP is nonzero, adjust the signed product
3473    to become unsigned, if UNSIGNEDP is zero, adjust the unsigned product to
3474    become signed.
3475
3476    The result is put in TARGET if that is convenient.
3477
3478    MODE is the mode of operation.  */
3479
3480 rtx
3481 expand_mult_highpart_adjust (enum machine_mode mode, rtx adj_operand, rtx op0,
3482                              rtx op1, rtx target, int unsignedp)
3483 {
3484   rtx tem;
3485   enum rtx_code adj_code = unsignedp ? PLUS : MINUS;
3486
3487   tem = expand_shift (RSHIFT_EXPR, mode, op0,
3488                       build_int_cst (NULL_TREE, GET_MODE_BITSIZE (mode) - 1),
3489                       NULL_RTX, 0);
3490   tem = expand_and (mode, tem, op1, NULL_RTX);
3491   adj_operand
3492     = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3493                      adj_operand);
3494
3495   tem = expand_shift (RSHIFT_EXPR, mode, op1,
3496                       build_int_cst (NULL_TREE, GET_MODE_BITSIZE (mode) - 1),
3497                       NULL_RTX, 0);
3498   tem = expand_and (mode, tem, op0, NULL_RTX);
3499   target = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3500                           target);
3501
3502   return target;
3503 }
3504
3505 /* Subroutine of expand_mult_highpart.  Return the MODE high part of OP.  */
3506
3507 static rtx
3508 extract_high_half (enum machine_mode mode, rtx op)
3509 {
3510   enum machine_mode wider_mode;
3511
3512   if (mode == word_mode)
3513     return gen_highpart (mode, op);
3514
3515   gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3516
3517   wider_mode = GET_MODE_WIDER_MODE (mode);
3518   op = expand_shift (RSHIFT_EXPR, wider_mode, op,
3519                      build_int_cst (NULL_TREE, GET_MODE_BITSIZE (mode)), 0, 1);
3520   return convert_modes (mode, wider_mode, op, 0);
3521 }
3522
3523 /* Like expand_mult_highpart, but only consider using a multiplication
3524    optab.  OP1 is an rtx for the constant operand.  */
3525
3526 static rtx
3527 expand_mult_highpart_optab (enum machine_mode mode, rtx op0, rtx op1,
3528                             rtx target, int unsignedp, int max_cost)
3529 {
3530   rtx narrow_op1 = gen_int_mode (INTVAL (op1), mode);
3531   enum machine_mode wider_mode;
3532   optab moptab;
3533   rtx tem;
3534   int size;
3535   bool speed = optimize_insn_for_speed_p ();
3536
3537   gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3538
3539   wider_mode = GET_MODE_WIDER_MODE (mode);
3540   size = GET_MODE_BITSIZE (mode);
3541
3542   /* Firstly, try using a multiplication insn that only generates the needed
3543      high part of the product, and in the sign flavor of unsignedp.  */
3544   if (mul_highpart_cost[speed][mode] < max_cost)
3545     {
3546       moptab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
3547       tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3548                           unsignedp, OPTAB_DIRECT);
3549       if (tem)
3550         return tem;
3551     }
3552
3553   /* Secondly, same as above, but use sign flavor opposite of unsignedp.
3554      Need to adjust the result after the multiplication.  */
3555   if (size - 1 < BITS_PER_WORD
3556       && (mul_highpart_cost[speed][mode] + 2 * shift_cost[speed][mode][size-1]
3557           + 4 * add_cost[speed][mode] < max_cost))
3558     {
3559       moptab = unsignedp ? smul_highpart_optab : umul_highpart_optab;
3560       tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3561                           unsignedp, OPTAB_DIRECT);
3562       if (tem)
3563         /* We used the wrong signedness.  Adjust the result.  */
3564         return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3565                                             tem, unsignedp);
3566     }
3567
3568   /* Try widening multiplication.  */
3569   moptab = unsignedp ? umul_widen_optab : smul_widen_optab;
3570   if (optab_handler (moptab, wider_mode)->insn_code != CODE_FOR_nothing
3571       && mul_widen_cost[speed][wider_mode] < max_cost)
3572     {
3573       tem = expand_binop (wider_mode, moptab, op0, narrow_op1, 0,
3574                           unsignedp, OPTAB_WIDEN);
3575       if (tem)
3576         return extract_high_half (mode, tem);
3577     }
3578
3579   /* Try widening the mode and perform a non-widening multiplication.  */
3580   if (optab_handler (smul_optab, wider_mode)->insn_code != CODE_FOR_nothing
3581       && size - 1 < BITS_PER_WORD
3582       && mul_cost[speed][wider_mode] + shift_cost[speed][mode][size-1] < max_cost)
3583     {
3584       rtx insns, wop0, wop1;
3585
3586       /* We need to widen the operands, for example to ensure the
3587          constant multiplier is correctly sign or zero extended.
3588          Use a sequence to clean-up any instructions emitted by
3589          the conversions if things don't work out.  */
3590       start_sequence ();
3591       wop0 = convert_modes (wider_mode, mode, op0, unsignedp);
3592       wop1 = convert_modes (wider_mode, mode, op1, unsignedp);
3593       tem = expand_binop (wider_mode, smul_optab, wop0, wop1, 0,
3594                           unsignedp, OPTAB_WIDEN);
3595       insns = get_insns ();
3596       end_sequence ();
3597
3598       if (tem)
3599         {
3600           emit_insn (insns);
3601           return extract_high_half (mode, tem);
3602         }
3603     }
3604
3605   /* Try widening multiplication of opposite signedness, and adjust.  */
3606   moptab = unsignedp ? smul_widen_optab : umul_widen_optab;
3607   if (optab_handler (moptab, wider_mode)->insn_code != CODE_FOR_nothing
3608       && size - 1 < BITS_PER_WORD
3609       && (mul_widen_cost[speed][wider_mode] + 2 * shift_cost[speed][mode][size-1]
3610           + 4 * add_cost[speed][mode] < max_cost))
3611     {
3612       tem = expand_binop (wider_mode, moptab, op0, narrow_op1,
3613                           NULL_RTX, ! unsignedp, OPTAB_WIDEN);
3614       if (tem != 0)
3615         {
3616           tem = extract_high_half (mode, tem);
3617           /* We used the wrong signedness.  Adjust the result.  */
3618           return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3619                                               target, unsignedp);
3620         }
3621     }
3622
3623   return 0;
3624 }
3625
3626 /* Emit code to multiply OP0 and OP1 (where OP1 is an integer constant),
3627    putting the high half of the result in TARGET if that is convenient,
3628    and return where the result is.  If the operation can not be performed,
3629    0 is returned.
3630
3631    MODE is the mode of operation and result.
3632
3633    UNSIGNEDP nonzero means unsigned multiply.
3634
3635    MAX_COST is the total allowed cost for the expanded RTL.  */
3636
3637 static rtx
3638 expand_mult_highpart (enum machine_mode mode, rtx op0, rtx op1,
3639                       rtx target, int unsignedp, int max_cost)
3640 {
3641   enum machine_mode wider_mode = GET_MODE_WIDER_MODE (mode);
3642   unsigned HOST_WIDE_INT cnst1;
3643   int extra_cost;
3644   bool sign_adjust = false;
3645   enum mult_variant variant;
3646   struct algorithm alg;
3647   rtx tem;
3648   bool speed = optimize_insn_for_speed_p ();
3649
3650   gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3651   /* We can't support modes wider than HOST_BITS_PER_INT.  */
3652   gcc_assert (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT);
3653
3654   cnst1 = INTVAL (op1) & GET_MODE_MASK (mode);
3655
3656   /* We can't optimize modes wider than BITS_PER_WORD.
3657      ??? We might be able to perform double-word arithmetic if
3658      mode == word_mode, however all the cost calculations in
3659      synth_mult etc. assume single-word operations.  */
3660   if (GET_MODE_BITSIZE (wider_mode) > BITS_PER_WORD)
3661     return expand_mult_highpart_optab (mode, op0, op1, target,
3662                                        unsignedp, max_cost);
3663
3664   extra_cost = shift_cost[speed][mode][GET_MODE_BITSIZE (mode) - 1];
3665
3666   /* Check whether we try to multiply by a negative constant.  */
3667   if (!unsignedp && ((cnst1 >> (GET_MODE_BITSIZE (mode) - 1)) & 1))
3668     {
3669       sign_adjust = true;
3670       extra_cost += add_cost[speed][mode];
3671     }
3672
3673   /* See whether shift/add multiplication is cheap enough.  */
3674   if (choose_mult_variant (wider_mode, cnst1, &alg, &variant,
3675                            max_cost - extra_cost))
3676     {
3677       /* See whether the specialized multiplication optabs are
3678          cheaper than the shift/add version.  */
3679       tem = expand_mult_highpart_optab (mode, op0, op1, target, unsignedp,
3680                                         alg.cost.cost + extra_cost);
3681       if (tem)
3682         return tem;
3683
3684       tem = convert_to_mode (wider_mode, op0, unsignedp);
3685       tem = expand_mult_const (wider_mode, tem, cnst1, 0, &alg, variant);
3686       tem = extract_high_half (mode, tem);
3687
3688       /* Adjust result for signedness.  */
3689       if (sign_adjust)
3690         tem = force_operand (gen_rtx_MINUS (mode, tem, op0), tem);
3691
3692       return tem;
3693     }
3694   return expand_mult_highpart_optab (mode, op0, op1, target,
3695                                      unsignedp, max_cost);
3696 }
3697
3698
3699 /* Expand signed modulus of OP0 by a power of two D in mode MODE.  */
3700
3701 static rtx
3702 expand_smod_pow2 (enum machine_mode mode, rtx op0, HOST_WIDE_INT d)
3703 {
3704   unsigned HOST_WIDE_INT masklow, maskhigh;
3705   rtx result, temp, shift, label;
3706   int logd;
3707
3708   logd = floor_log2 (d);
3709   result = gen_reg_rtx (mode);
3710
3711   /* Avoid conditional branches when they're expensive.  */
3712   if (BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2
3713       && optimize_insn_for_speed_p ())
3714     {
3715       rtx signmask = emit_store_flag (result, LT, op0, const0_rtx,
3716                                       mode, 0, -1);
3717       if (signmask)
3718         {
3719           signmask = force_reg (mode, signmask);
3720           masklow = ((HOST_WIDE_INT) 1 << logd) - 1;
3721           shift = GEN_INT (GET_MODE_BITSIZE (mode) - logd);
3722
3723           /* Use the rtx_cost of a LSHIFTRT instruction to determine
3724              which instruction sequence to use.  If logical right shifts
3725              are expensive the use 2 XORs, 2 SUBs and an AND, otherwise
3726              use a LSHIFTRT, 1 ADD, 1 SUB and an AND.  */
3727
3728           temp = gen_rtx_LSHIFTRT (mode, result, shift);
3729           if (optab_handler (lshr_optab, mode)->insn_code == CODE_FOR_nothing
3730               || rtx_cost (temp, SET, optimize_insn_for_speed_p ()) > COSTS_N_INSNS (2))
3731             {
3732               temp = expand_binop (mode, xor_optab, op0, signmask,
3733                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3734               temp = expand_binop (mode, sub_optab, temp, signmask,
3735                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3736               temp = expand_binop (mode, and_optab, temp, GEN_INT (masklow),
3737                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3738               temp = expand_binop (mode, xor_optab, temp, signmask,
3739                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3740               temp = expand_binop (mode, sub_optab, temp, signmask,
3741                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3742             }
3743           else
3744             {
3745               signmask = expand_binop (mode, lshr_optab, signmask, shift,
3746                                        NULL_RTX, 1, OPTAB_LIB_WIDEN);
3747               signmask = force_reg (mode, signmask);
3748
3749               temp = expand_binop (mode, add_optab, op0, signmask,
3750                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3751               temp = expand_binop (mode, and_optab, temp, GEN_INT (masklow),
3752                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3753               temp = expand_binop (mode, sub_optab, temp, signmask,
3754                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3755             }
3756           return temp;
3757         }
3758     }
3759
3760   /* Mask contains the mode's signbit and the significant bits of the
3761      modulus.  By including the signbit in the operation, many targets
3762      can avoid an explicit compare operation in the following comparison
3763      against zero.  */
3764
3765   masklow = ((HOST_WIDE_INT) 1 << logd) - 1;
3766   if (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
3767     {
3768       masklow |= (HOST_WIDE_INT) -1 << (GET_MODE_BITSIZE (mode) - 1);
3769       maskhigh = -1;
3770     }
3771   else
3772     maskhigh = (HOST_WIDE_INT) -1
3773                  << (GET_MODE_BITSIZE (mode) - HOST_BITS_PER_WIDE_INT - 1);
3774
3775   temp = expand_binop (mode, and_optab, op0,
3776                        immed_double_const (masklow, maskhigh, mode),
3777                        result, 1, OPTAB_LIB_WIDEN);
3778   if (temp != result)
3779     emit_move_insn (result, temp);
3780
3781   label = gen_label_rtx ();
3782   do_cmp_and_jump (result, const0_rtx, GE, mode, label);
3783
3784   temp = expand_binop (mode, sub_optab, result, const1_rtx, result,
3785                        0, OPTAB_LIB_WIDEN);
3786   masklow = (HOST_WIDE_INT) -1 << logd;
3787   maskhigh = -1;
3788   temp = expand_binop (mode, ior_optab, temp,
3789                        immed_double_const (masklow, maskhigh, mode),
3790                        result, 1, OPTAB_LIB_WIDEN);
3791   temp = expand_binop (mode, add_optab, temp, const1_rtx, result,
3792                        0, OPTAB_LIB_WIDEN);
3793   if (temp != result)
3794     emit_move_insn (result, temp);
3795   emit_label (label);
3796   return result;
3797 }
3798
3799 /* Expand signed division of OP0 by a power of two D in mode MODE.
3800    This routine is only called for positive values of D.  */
3801
3802 static rtx
3803 expand_sdiv_pow2 (enum machine_mode mode, rtx op0, HOST_WIDE_INT d)
3804 {
3805   rtx temp, label;
3806   tree shift;
3807   int logd;
3808
3809   logd = floor_log2 (d);
3810   shift = build_int_cst (NULL_TREE, logd);
3811
3812   if (d == 2
3813       && BRANCH_COST (optimize_insn_for_speed_p (),
3814                       false) >= 1)
3815     {
3816       temp = gen_reg_rtx (mode);
3817       temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, 1);
3818       temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3819                            0, OPTAB_LIB_WIDEN);
3820       return expand_shift (RSHIFT_EXPR, mode, temp, shift, NULL_RTX, 0);
3821     }
3822
3823 #ifdef HAVE_conditional_move
3824   if (BRANCH_COST (optimize_insn_for_speed_p (), false)
3825       >= 2)
3826     {
3827       rtx temp2;
3828
3829       /* ??? emit_conditional_move forces a stack adjustment via
3830          compare_from_rtx so, if the sequence is discarded, it will
3831          be lost.  Do it now instead.  */
3832       do_pending_stack_adjust ();
3833
3834       start_sequence ();
3835       temp2 = copy_to_mode_reg (mode, op0);
3836       temp = expand_binop (mode, add_optab, temp2, GEN_INT (d-1),
3837                            NULL_RTX, 0, OPTAB_LIB_WIDEN);
3838       temp = force_reg (mode, temp);
3839
3840       /* Construct "temp2 = (temp2 < 0) ? temp : temp2".  */
3841       temp2 = emit_conditional_move (temp2, LT, temp2, const0_rtx,
3842                                      mode, temp, temp2, mode, 0);
3843       if (temp2)
3844         {
3845           rtx seq = get_insns ();
3846           end_sequence ();
3847           emit_insn (seq);
3848           return expand_shift (RSHIFT_EXPR, mode, temp2, shift, NULL_RTX, 0);
3849         }
3850       end_sequence ();
3851     }
3852 #endif
3853
3854   if (BRANCH_COST (optimize_insn_for_speed_p (),
3855                    false) >= 2)
3856     {
3857       int ushift = GET_MODE_BITSIZE (mode) - logd;
3858
3859       temp = gen_reg_rtx (mode);
3860       temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, -1);
3861       if (shift_cost[optimize_insn_for_speed_p ()][mode][ushift] > COSTS_N_INSNS (1))
3862         temp = expand_binop (mode, and_optab, temp, GEN_INT (d - 1),
3863                              NULL_RTX, 0, OPTAB_LIB_WIDEN);
3864       else
3865         temp = expand_shift (RSHIFT_EXPR, mode, temp,
3866                              build_int_cst (NULL_TREE, ushift),
3867                              NULL_RTX, 1);
3868       temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3869                            0, OPTAB_LIB_WIDEN);
3870       return expand_shift (RSHIFT_EXPR, mode, temp, shift, NULL_RTX, 0);
3871     }
3872
3873   label = gen_label_rtx ();
3874   temp = copy_to_mode_reg (mode, op0);
3875   do_cmp_and_jump (temp, const0_rtx, GE, mode, label);
3876   expand_inc (temp, GEN_INT (d - 1));
3877   emit_label (label);
3878   return expand_shift (RSHIFT_EXPR, mode, temp, shift, NULL_RTX, 0);
3879 }
3880 \f
3881 /* Emit the code to divide OP0 by OP1, putting the result in TARGET
3882    if that is convenient, and returning where the result is.
3883    You may request either the quotient or the remainder as the result;
3884    specify REM_FLAG nonzero to get the remainder.
3885
3886    CODE is the expression code for which kind of division this is;
3887    it controls how rounding is done.  MODE is the machine mode to use.
3888    UNSIGNEDP nonzero means do unsigned division.  */
3889
3890 /* ??? For CEIL_MOD_EXPR, can compute incorrect remainder with ANDI
3891    and then correct it by or'ing in missing high bits
3892    if result of ANDI is nonzero.
3893    For ROUND_MOD_EXPR, can use ANDI and then sign-extend the result.
3894    This could optimize to a bfexts instruction.
3895    But C doesn't use these operations, so their optimizations are
3896    left for later.  */
3897 /* ??? For modulo, we don't actually need the highpart of the first product,
3898    the low part will do nicely.  And for small divisors, the second multiply
3899    can also be a low-part only multiply or even be completely left out.
3900    E.g. to calculate the remainder of a division by 3 with a 32 bit
3901    multiply, multiply with 0x55555556 and extract the upper two bits;
3902    the result is exact for inputs up to 0x1fffffff.
3903    The input range can be reduced by using cross-sum rules.
3904    For odd divisors >= 3, the following table gives right shift counts
3905    so that if a number is shifted by an integer multiple of the given
3906    amount, the remainder stays the same:
3907    2, 4, 3, 6, 10, 12, 4, 8, 18, 6, 11, 20, 18, 0, 5, 10, 12, 0, 12, 20,
3908    14, 12, 23, 21, 8, 0, 20, 18, 0, 0, 6, 12, 0, 22, 0, 18, 20, 30, 0, 0,
3909    0, 8, 0, 11, 12, 10, 36, 0, 30, 0, 0, 12, 0, 0, 0, 0, 44, 12, 24, 0,
3910    20, 0, 7, 14, 0, 18, 36, 0, 0, 46, 60, 0, 42, 0, 15, 24, 20, 0, 0, 33,
3911    0, 20, 0, 0, 18, 0, 60, 0, 0, 0, 0, 0, 40, 18, 0, 0, 12
3912
3913    Cross-sum rules for even numbers can be derived by leaving as many bits
3914    to the right alone as the divisor has zeros to the right.
3915    E.g. if x is an unsigned 32 bit number:
3916    (x mod 12) == (((x & 1023) + ((x >> 8) & ~3)) * 0x15555558 >> 2 * 3) >> 28
3917    */
3918
3919 rtx
3920 expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode,
3921                rtx op0, rtx op1, rtx target, int unsignedp)
3922 {
3923   enum machine_mode compute_mode;
3924   rtx tquotient;
3925   rtx quotient = 0, remainder = 0;
3926   rtx last;
3927   int size;
3928   rtx insn, set;
3929   optab optab1, optab2;
3930   int op1_is_constant, op1_is_pow2 = 0;
3931   int max_cost, extra_cost;
3932   static HOST_WIDE_INT last_div_const = 0;
3933   static HOST_WIDE_INT ext_op1;
3934   bool speed = optimize_insn_for_speed_p ();
3935
3936   op1_is_constant = CONST_INT_P (op1);
3937   if (op1_is_constant)
3938     {
3939       ext_op1 = INTVAL (op1);
3940       if (unsignedp)
3941         ext_op1 &= GET_MODE_MASK (mode);
3942       op1_is_pow2 = ((EXACT_POWER_OF_2_OR_ZERO_P (ext_op1)
3943                      || (! unsignedp && EXACT_POWER_OF_2_OR_ZERO_P (-ext_op1))));
3944     }
3945
3946   /*
3947      This is the structure of expand_divmod:
3948
3949      First comes code to fix up the operands so we can perform the operations
3950      correctly and efficiently.
3951
3952      Second comes a switch statement with code specific for each rounding mode.
3953      For some special operands this code emits all RTL for the desired
3954      operation, for other cases, it generates only a quotient and stores it in
3955      QUOTIENT.  The case for trunc division/remainder might leave quotient = 0,
3956      to indicate that it has not done anything.
3957
3958      Last comes code that finishes the operation.  If QUOTIENT is set and
3959      REM_FLAG is set, the remainder is computed as OP0 - QUOTIENT * OP1.  If
3960      QUOTIENT is not set, it is computed using trunc rounding.
3961
3962      We try to generate special code for division and remainder when OP1 is a
3963      constant.  If |OP1| = 2**n we can use shifts and some other fast
3964      operations.  For other values of OP1, we compute a carefully selected
3965      fixed-point approximation m = 1/OP1, and generate code that multiplies OP0
3966      by m.
3967
3968      In all cases but EXACT_DIV_EXPR, this multiplication requires the upper
3969      half of the product.  Different strategies for generating the product are
3970      implemented in expand_mult_highpart.
3971
3972      If what we actually want is the remainder, we generate that by another
3973      by-constant multiplication and a subtraction.  */
3974
3975   /* We shouldn't be called with OP1 == const1_rtx, but some of the
3976      code below will malfunction if we are, so check here and handle
3977      the special case if so.  */
3978   if (op1 == const1_rtx)
3979     return rem_flag ? const0_rtx : op0;
3980
3981     /* When dividing by -1, we could get an overflow.
3982      negv_optab can handle overflows.  */
3983   if (! unsignedp && op1 == constm1_rtx)
3984     {
3985       if (rem_flag)
3986         return const0_rtx;
3987       return expand_unop (mode, flag_trapv && GET_MODE_CLASS(mode) == MODE_INT
3988                           ? negv_optab : neg_optab, op0, target, 0);
3989     }
3990
3991   if (target
3992       /* Don't use the function value register as a target
3993          since we have to read it as well as write it,
3994          and function-inlining gets confused by this.  */
3995       && ((REG_P (target) && REG_FUNCTION_VALUE_P (target))
3996           /* Don't clobber an operand while doing a multi-step calculation.  */
3997           || ((rem_flag || op1_is_constant)
3998               && (reg_mentioned_p (target, op0)
3999                   || (MEM_P (op0) && MEM_P (target))))
4000           || reg_mentioned_p (target, op1)
4001           || (MEM_P (op1) && MEM_P (target))))
4002     target = 0;
4003
4004   /* Get the mode in which to perform this computation.  Normally it will
4005      be MODE, but sometimes we can't do the desired operation in MODE.
4006      If so, pick a wider mode in which we can do the operation.  Convert
4007      to that mode at the start to avoid repeated conversions.
4008
4009      First see what operations we need.  These depend on the expression
4010      we are evaluating.  (We assume that divxx3 insns exist under the
4011      same conditions that modxx3 insns and that these insns don't normally
4012      fail.  If these assumptions are not correct, we may generate less
4013      efficient code in some cases.)
4014
4015      Then see if we find a mode in which we can open-code that operation
4016      (either a division, modulus, or shift).  Finally, check for the smallest
4017      mode for which we can do the operation with a library call.  */
4018
4019   /* We might want to refine this now that we have division-by-constant
4020      optimization.  Since expand_mult_highpart tries so many variants, it is
4021      not straightforward to generalize this.  Maybe we should make an array
4022      of possible modes in init_expmed?  Save this for GCC 2.7.  */
4023
4024   optab1 = ((op1_is_pow2 && op1 != const0_rtx)
4025             ? (unsignedp ? lshr_optab : ashr_optab)
4026             : (unsignedp ? udiv_optab : sdiv_optab));
4027   optab2 = ((op1_is_pow2 && op1 != const0_rtx)
4028             ? optab1
4029             : (unsignedp ? udivmod_optab : sdivmod_optab));
4030
4031   for (compute_mode = mode; compute_mode != VOIDmode;
4032        compute_mode = GET_MODE_WIDER_MODE (compute_mode))
4033     if (optab_handler (optab1, compute_mode)->insn_code != CODE_FOR_nothing
4034         || optab_handler (optab2, compute_mode)->insn_code != CODE_FOR_nothing)
4035       break;
4036
4037   if (compute_mode == VOIDmode)
4038     for (compute_mode = mode; compute_mode != VOIDmode;
4039          compute_mode = GET_MODE_WIDER_MODE (compute_mode))
4040       if (optab_libfunc (optab1, compute_mode)
4041           || optab_libfunc (optab2, compute_mode))
4042         break;
4043
4044   /* If we still couldn't find a mode, use MODE, but expand_binop will
4045      probably die.  */
4046   if (compute_mode == VOIDmode)
4047     compute_mode = mode;
4048
4049   if (target && GET_MODE (target) == compute_mode)
4050     tquotient = target;
4051   else
4052     tquotient = gen_reg_rtx (compute_mode);
4053
4054   size = GET_MODE_BITSIZE (compute_mode);
4055 #if 0
4056   /* It should be possible to restrict the precision to GET_MODE_BITSIZE
4057      (mode), and thereby get better code when OP1 is a constant.  Do that
4058      later.  It will require going over all usages of SIZE below.  */
4059   size = GET_MODE_BITSIZE (mode);
4060 #endif
4061
4062   /* Only deduct something for a REM if the last divide done was
4063      for a different constant.   Then set the constant of the last
4064      divide.  */
4065   max_cost = unsignedp ? udiv_cost[speed][compute_mode] : sdiv_cost[speed][compute_mode];
4066   if (rem_flag && ! (last_div_const != 0 && op1_is_constant
4067                      && INTVAL (op1) == last_div_const))
4068     max_cost -= mul_cost[speed][compute_mode] + add_cost[speed][compute_mode];
4069
4070   last_div_const = ! rem_flag && op1_is_constant ? INTVAL (op1) : 0;
4071
4072   /* Now convert to the best mode to use.  */
4073   if (compute_mode != mode)
4074     {
4075       op0 = convert_modes (compute_mode, mode, op0, unsignedp);
4076       op1 = convert_modes (compute_mode, mode, op1, unsignedp);
4077
4078       /* convert_modes may have placed op1 into a register, so we
4079          must recompute the following.  */
4080       op1_is_constant = CONST_INT_P (op1);
4081       op1_is_pow2 = (op1_is_constant
4082                      && ((EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4083                           || (! unsignedp
4084                               && EXACT_POWER_OF_2_OR_ZERO_P (-INTVAL (op1)))))) ;
4085     }
4086
4087   /* If one of the operands is a volatile MEM, copy it into a register.  */
4088
4089   if (MEM_P (op0) && MEM_VOLATILE_P (op0))
4090     op0 = force_reg (compute_mode, op0);
4091   if (MEM_P (op1) && MEM_VOLATILE_P (op1))
4092     op1 = force_reg (compute_mode, op1);
4093
4094   /* If we need the remainder or if OP1 is constant, we need to
4095      put OP0 in a register in case it has any queued subexpressions.  */
4096   if (rem_flag || op1_is_constant)
4097     op0 = force_reg (compute_mode, op0);
4098
4099   last = get_last_insn ();
4100
4101   /* Promote floor rounding to trunc rounding for unsigned operations.  */
4102   if (unsignedp)
4103     {
4104       if (code == FLOOR_DIV_EXPR)
4105         code = TRUNC_DIV_EXPR;
4106       if (code == FLOOR_MOD_EXPR)
4107         code = TRUNC_MOD_EXPR;
4108       if (code == EXACT_DIV_EXPR && op1_is_pow2)
4109         code = TRUNC_DIV_EXPR;
4110     }
4111
4112   if (op1 != const0_rtx)
4113     switch (code)
4114       {
4115       case TRUNC_MOD_EXPR:
4116       case TRUNC_DIV_EXPR:
4117         if (op1_is_constant)
4118           {
4119             if (unsignedp)
4120               {
4121                 unsigned HOST_WIDE_INT mh;
4122                 int pre_shift, post_shift;
4123                 int dummy;
4124                 rtx ml;
4125                 unsigned HOST_WIDE_INT d = (INTVAL (op1)
4126                                             & GET_MODE_MASK (compute_mode));
4127
4128                 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4129                   {
4130                     pre_shift = floor_log2 (d);
4131                     if (rem_flag)
4132                       {
4133                         remainder
4134                           = expand_binop (compute_mode, and_optab, op0,
4135                                           GEN_INT (((HOST_WIDE_INT) 1 << pre_shift) - 1),
4136                                           remainder, 1,
4137                                           OPTAB_LIB_WIDEN);
4138                         if (remainder)
4139                           return gen_lowpart (mode, remainder);
4140                       }
4141                     quotient = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4142                                              build_int_cst (NULL_TREE,
4143                                                             pre_shift),
4144                                              tquotient, 1);
4145                   }
4146                 else if (size <= HOST_BITS_PER_WIDE_INT)
4147                   {
4148                     if (d >= ((unsigned HOST_WIDE_INT) 1 << (size - 1)))
4149                       {
4150                         /* Most significant bit of divisor is set; emit an scc
4151                            insn.  */
4152                         quotient = emit_store_flag_force (tquotient, GEU, op0, op1,
4153                                                           compute_mode, 1, 1);
4154                       }
4155                     else
4156                       {
4157                         /* Find a suitable multiplier and right shift count
4158                            instead of multiplying with D.  */
4159
4160                         mh = choose_multiplier (d, size, size,
4161                                                 &ml, &post_shift, &dummy);
4162
4163                         /* If the suggested multiplier is more than SIZE bits,
4164                            we can do better for even divisors, using an
4165                            initial right shift.  */
4166                         if (mh != 0 && (d & 1) == 0)
4167                           {
4168                             pre_shift = floor_log2 (d & -d);
4169                             mh = choose_multiplier (d >> pre_shift, size,
4170                                                     size - pre_shift,
4171                                                     &ml, &post_shift, &dummy);
4172                             gcc_assert (!mh);
4173                           }
4174                         else
4175                           pre_shift = 0;
4176
4177                         if (mh != 0)
4178                           {
4179                             rtx t1, t2, t3, t4;
4180
4181                             if (post_shift - 1 >= BITS_PER_WORD)
4182                               goto fail1;
4183
4184                             extra_cost
4185                               = (shift_cost[speed][compute_mode][post_shift - 1]
4186                                  + shift_cost[speed][compute_mode][1]
4187                                  + 2 * add_cost[speed][compute_mode]);
4188                             t1 = expand_mult_highpart (compute_mode, op0, ml,
4189                                                        NULL_RTX, 1,
4190                                                        max_cost - extra_cost);
4191                             if (t1 == 0)
4192                               goto fail1;
4193                             t2 = force_operand (gen_rtx_MINUS (compute_mode,
4194                                                                op0, t1),
4195                                                 NULL_RTX);
4196                             t3 = expand_shift
4197                               (RSHIFT_EXPR, compute_mode, t2,
4198                                build_int_cst (NULL_TREE, 1),
4199                                NULL_RTX,1);
4200                             t4 = force_operand (gen_rtx_PLUS (compute_mode,
4201                                                               t1, t3),
4202                                                 NULL_RTX);
4203                             quotient = expand_shift
4204                               (RSHIFT_EXPR, compute_mode, t4,
4205                                build_int_cst (NULL_TREE, post_shift - 1),
4206                                tquotient, 1);
4207                           }
4208                         else
4209                           {
4210                             rtx t1, t2;
4211
4212                             if (pre_shift >= BITS_PER_WORD
4213                                 || post_shift >= BITS_PER_WORD)
4214                               goto fail1;
4215
4216                             t1 = expand_shift
4217                               (RSHIFT_EXPR, compute_mode, op0,
4218                                build_int_cst (NULL_TREE, pre_shift),
4219                                NULL_RTX, 1);
4220                             extra_cost
4221                               = (shift_cost[speed][compute_mode][pre_shift]
4222                                  + shift_cost[speed][compute_mode][post_shift]);
4223                             t2 = expand_mult_highpart (compute_mode, t1, ml,
4224                                                        NULL_RTX, 1,
4225                                                        max_cost - extra_cost);
4226                             if (t2 == 0)
4227                               goto fail1;
4228                             quotient = expand_shift
4229                               (RSHIFT_EXPR, compute_mode, t2,
4230                                build_int_cst (NULL_TREE, post_shift),
4231                                tquotient, 1);
4232                           }
4233                       }
4234                   }
4235                 else            /* Too wide mode to use tricky code */
4236                   break;
4237
4238                 insn = get_last_insn ();
4239                 if (insn != last
4240                     && (set = single_set (insn)) != 0
4241                     && SET_DEST (set) == quotient)
4242                   set_unique_reg_note (insn,
4243                                        REG_EQUAL,
4244                                        gen_rtx_UDIV (compute_mode, op0, op1));
4245               }
4246             else                /* TRUNC_DIV, signed */
4247               {
4248                 unsigned HOST_WIDE_INT ml;
4249                 int lgup, post_shift;
4250                 rtx mlr;
4251                 HOST_WIDE_INT d = INTVAL (op1);
4252                 unsigned HOST_WIDE_INT abs_d;
4253
4254                 /* Since d might be INT_MIN, we have to cast to
4255                    unsigned HOST_WIDE_INT before negating to avoid
4256                    undefined signed overflow.  */
4257                 abs_d = (d >= 0
4258                          ? (unsigned HOST_WIDE_INT) d
4259                          : - (unsigned HOST_WIDE_INT) d);
4260
4261                 /* n rem d = n rem -d */
4262                 if (rem_flag && d < 0)
4263                   {
4264                     d = abs_d;
4265                     op1 = gen_int_mode (abs_d, compute_mode);
4266                   }
4267
4268                 if (d == 1)
4269                   quotient = op0;
4270                 else if (d == -1)
4271                   quotient = expand_unop (compute_mode, neg_optab, op0,
4272                                           tquotient, 0);
4273                 else if (HOST_BITS_PER_WIDE_INT >= size
4274                          && abs_d == (unsigned HOST_WIDE_INT) 1 << (size - 1))
4275                   {
4276                     /* This case is not handled correctly below.  */
4277                     quotient = emit_store_flag (tquotient, EQ, op0, op1,
4278                                                 compute_mode, 1, 1);
4279                     if (quotient == 0)
4280                       goto fail1;
4281                   }
4282                 else if (EXACT_POWER_OF_2_OR_ZERO_P (d)
4283                          && (rem_flag ? smod_pow2_cheap[speed][compute_mode]
4284                                       : sdiv_pow2_cheap[speed][compute_mode])
4285                          /* We assume that cheap metric is true if the
4286                             optab has an expander for this mode.  */
4287                          && ((optab_handler ((rem_flag ? smod_optab
4288                                               : sdiv_optab),
4289                                               compute_mode)->insn_code
4290                               != CODE_FOR_nothing)
4291                              || (optab_handler(sdivmod_optab,
4292                                                compute_mode)
4293                                  ->insn_code != CODE_FOR_nothing)))
4294                   ;
4295                 else if (EXACT_POWER_OF_2_OR_ZERO_P (abs_d))
4296                   {
4297                     if (rem_flag)
4298                       {
4299                         remainder = expand_smod_pow2 (compute_mode, op0, d);
4300                         if (remainder)
4301                           return gen_lowpart (mode, remainder);
4302                       }
4303
4304                     if (sdiv_pow2_cheap[speed][compute_mode]
4305                         && ((optab_handler (sdiv_optab, compute_mode)->insn_code
4306                              != CODE_FOR_nothing)
4307                             || (optab_handler (sdivmod_optab, compute_mode)->insn_code
4308                                 != CODE_FOR_nothing)))
4309                       quotient = expand_divmod (0, TRUNC_DIV_EXPR,
4310                                                 compute_mode, op0,
4311                                                 gen_int_mode (abs_d,
4312                                                               compute_mode),
4313                                                 NULL_RTX, 0);
4314                     else
4315                       quotient = expand_sdiv_pow2 (compute_mode, op0, abs_d);
4316
4317                     /* We have computed OP0 / abs(OP1).  If OP1 is negative,
4318                        negate the quotient.  */
4319                     if (d < 0)
4320                       {
4321                         insn = get_last_insn ();
4322                         if (insn != last
4323                             && (set = single_set (insn)) != 0
4324                             && SET_DEST (set) == quotient
4325                             && abs_d < ((unsigned HOST_WIDE_INT) 1
4326                                         << (HOST_BITS_PER_WIDE_INT - 1)))
4327                           set_unique_reg_note (insn,
4328                                                REG_EQUAL,
4329                                                gen_rtx_DIV (compute_mode,
4330                                                             op0,
4331                                                             GEN_INT
4332                                                             (trunc_int_for_mode
4333                                                              (abs_d,
4334                                                               compute_mode))));
4335
4336                         quotient = expand_unop (compute_mode, neg_optab,
4337                                                 quotient, quotient, 0);
4338                       }
4339                   }
4340                 else if (size <= HOST_BITS_PER_WIDE_INT)
4341                   {
4342                     choose_multiplier (abs_d, size, size - 1,
4343                                        &mlr, &post_shift, &lgup);
4344                     ml = (unsigned HOST_WIDE_INT) INTVAL (mlr);
4345                     if (ml < (unsigned HOST_WIDE_INT) 1 << (size - 1))
4346                       {
4347                         rtx t1, t2, t3;
4348
4349                         if (post_shift >= BITS_PER_WORD
4350                             || size - 1 >= BITS_PER_WORD)
4351                           goto fail1;
4352
4353                         extra_cost = (shift_cost[speed][compute_mode][post_shift]
4354                                       + shift_cost[speed][compute_mode][size - 1]
4355                                       + add_cost[speed][compute_mode]);
4356                         t1 = expand_mult_highpart (compute_mode, op0, mlr,
4357                                                    NULL_RTX, 0,
4358                                                    max_cost - extra_cost);
4359                         if (t1 == 0)
4360                           goto fail1;
4361                         t2 = expand_shift
4362                           (RSHIFT_EXPR, compute_mode, t1,
4363                            build_int_cst (NULL_TREE, post_shift),
4364                            NULL_RTX, 0);
4365                         t3 = expand_shift
4366                           (RSHIFT_EXPR, compute_mode, op0,
4367                            build_int_cst (NULL_TREE, size - 1),
4368                            NULL_RTX, 0);
4369                         if (d < 0)
4370                           quotient
4371                             = force_operand (gen_rtx_MINUS (compute_mode,
4372                                                             t3, t2),
4373                                              tquotient);
4374                         else
4375                           quotient
4376                             = force_operand (gen_rtx_MINUS (compute_mode,
4377                                                             t2, t3),
4378                                              tquotient);
4379                       }
4380                     else
4381                       {
4382                         rtx t1, t2, t3, t4;
4383
4384                         if (post_shift >= BITS_PER_WORD
4385                             || size - 1 >= BITS_PER_WORD)
4386                           goto fail1;
4387
4388                         ml |= (~(unsigned HOST_WIDE_INT) 0) << (size - 1);
4389                         mlr = gen_int_mode (ml, compute_mode);
4390                         extra_cost = (shift_cost[speed][compute_mode][post_shift]
4391                                       + shift_cost[speed][compute_mode][size - 1]
4392                                       + 2 * add_cost[speed][compute_mode]);
4393                         t1 = expand_mult_highpart (compute_mode, op0, mlr,
4394                                                    NULL_RTX, 0,
4395                                                    max_cost - extra_cost);
4396                         if (t1 == 0)
4397                           goto fail1;
4398                         t2 = force_operand (gen_rtx_PLUS (compute_mode,
4399                                                           t1, op0),
4400                                             NULL_RTX);
4401                         t3 = expand_shift
4402                           (RSHIFT_EXPR, compute_mode, t2,
4403                            build_int_cst (NULL_TREE, post_shift),
4404                            NULL_RTX, 0);
4405                         t4 = expand_shift
4406                           (RSHIFT_EXPR, compute_mode, op0,
4407                            build_int_cst (NULL_TREE, size - 1),
4408                            NULL_RTX, 0);
4409                         if (d < 0)
4410                           quotient
4411                             = force_operand (gen_rtx_MINUS (compute_mode,
4412                                                             t4, t3),
4413                                              tquotient);
4414                         else
4415                           quotient
4416                             = force_operand (gen_rtx_MINUS (compute_mode,
4417                                                             t3, t4),
4418                                              tquotient);
4419                       }
4420                   }
4421                 else            /* Too wide mode to use tricky code */
4422                   break;
4423
4424                 insn = get_last_insn ();
4425                 if (insn != last
4426                     && (set = single_set (insn)) != 0
4427                     && SET_DEST (set) == quotient)
4428                   set_unique_reg_note (insn,
4429                                        REG_EQUAL,
4430                                        gen_rtx_DIV (compute_mode, op0, op1));
4431               }
4432             break;
4433           }
4434       fail1:
4435         delete_insns_since (last);
4436         break;
4437
4438       case FLOOR_DIV_EXPR:
4439       case FLOOR_MOD_EXPR:
4440       /* We will come here only for signed operations.  */
4441         if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4442           {
4443             unsigned HOST_WIDE_INT mh;
4444             int pre_shift, lgup, post_shift;
4445             HOST_WIDE_INT d = INTVAL (op1);
4446             rtx ml;
4447
4448             if (d > 0)
4449               {
4450                 /* We could just as easily deal with negative constants here,
4451                    but it does not seem worth the trouble for GCC 2.6.  */
4452                 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4453                   {
4454                     pre_shift = floor_log2 (d);
4455                     if (rem_flag)
4456                       {
4457                         remainder = expand_binop (compute_mode, and_optab, op0,
4458                                                   GEN_INT (((HOST_WIDE_INT) 1 << pre_shift) - 1),
4459                                                   remainder, 0, OPTAB_LIB_WIDEN);
4460                         if (remainder)
4461                           return gen_lowpart (mode, remainder);
4462                       }
4463                     quotient = expand_shift
4464                       (RSHIFT_EXPR, compute_mode, op0,
4465                        build_int_cst (NULL_TREE, pre_shift),
4466                        tquotient, 0);
4467                   }
4468                 else
4469                   {
4470                     rtx t1, t2, t3, t4;
4471
4472                     mh = choose_multiplier (d, size, size - 1,
4473                                             &ml, &post_shift, &lgup);
4474                     gcc_assert (!mh);
4475
4476                     if (post_shift < BITS_PER_WORD
4477                         && size - 1 < BITS_PER_WORD)
4478                       {
4479                         t1 = expand_shift
4480                           (RSHIFT_EXPR, compute_mode, op0,
4481                            build_int_cst (NULL_TREE, size - 1),
4482                            NULL_RTX, 0);
4483                         t2 = expand_binop (compute_mode, xor_optab, op0, t1,
4484                                            NULL_RTX, 0, OPTAB_WIDEN);
4485                         extra_cost = (shift_cost[speed][compute_mode][post_shift]
4486                                       + shift_cost[speed][compute_mode][size - 1]
4487                                       + 2 * add_cost[speed][compute_mode]);
4488                         t3 = expand_mult_highpart (compute_mode, t2, ml,
4489                                                    NULL_RTX, 1,
4490                                                    max_cost - extra_cost);
4491                         if (t3 != 0)
4492                           {
4493                             t4 = expand_shift
4494                               (RSHIFT_EXPR, compute_mode, t3,
4495                                build_int_cst (NULL_TREE, post_shift),
4496                                NULL_RTX, 1);
4497                             quotient = expand_binop (compute_mode, xor_optab,
4498                                                      t4, t1, tquotient, 0,
4499                                                      OPTAB_WIDEN);
4500                           }
4501                       }
4502                   }
4503               }
4504             else
4505               {
4506                 rtx nsign, t1, t2, t3, t4;
4507                 t1 = force_operand (gen_rtx_PLUS (compute_mode,
4508                                                   op0, constm1_rtx), NULL_RTX);
4509                 t2 = expand_binop (compute_mode, ior_optab, op0, t1, NULL_RTX,
4510                                    0, OPTAB_WIDEN);
4511                 nsign = expand_shift
4512                   (RSHIFT_EXPR, compute_mode, t2,
4513                    build_int_cst (NULL_TREE, size - 1),
4514                    NULL_RTX, 0);
4515                 t3 = force_operand (gen_rtx_MINUS (compute_mode, t1, nsign),
4516                                     NULL_RTX);
4517                 t4 = expand_divmod (0, TRUNC_DIV_EXPR, compute_mode, t3, op1,
4518                                     NULL_RTX, 0);
4519                 if (t4)
4520                   {
4521                     rtx t5;
4522                     t5 = expand_unop (compute_mode, one_cmpl_optab, nsign,
4523                                       NULL_RTX, 0);
4524                     quotient = force_operand (gen_rtx_PLUS (compute_mode,
4525                                                             t4, t5),
4526                                               tquotient);
4527                   }
4528               }
4529           }
4530
4531         if (quotient != 0)
4532           break;
4533         delete_insns_since (last);
4534
4535         /* Try using an instruction that produces both the quotient and
4536            remainder, using truncation.  We can easily compensate the quotient
4537            or remainder to get floor rounding, once we have the remainder.
4538            Notice that we compute also the final remainder value here,
4539            and return the result right away.  */
4540         if (target == 0 || GET_MODE (target) != compute_mode)
4541           target = gen_reg_rtx (compute_mode);
4542
4543         if (rem_flag)
4544           {
4545             remainder
4546               = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4547             quotient = gen_reg_rtx (compute_mode);
4548           }
4549         else
4550           {
4551             quotient
4552               = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4553             remainder = gen_reg_rtx (compute_mode);
4554           }
4555
4556         if (expand_twoval_binop (sdivmod_optab, op0, op1,
4557                                  quotient, remainder, 0))
4558           {
4559             /* This could be computed with a branch-less sequence.
4560                Save that for later.  */
4561             rtx tem;
4562             rtx label = gen_label_rtx ();
4563             do_cmp_and_jump (remainder, const0_rtx, EQ, compute_mode, label);
4564             tem = expand_binop (compute_mode, xor_optab, op0, op1,
4565                                 NULL_RTX, 0, OPTAB_WIDEN);
4566             do_cmp_and_jump (tem, const0_rtx, GE, compute_mode, label);
4567             expand_dec (quotient, const1_rtx);
4568             expand_inc (remainder, op1);
4569             emit_label (label);
4570             return gen_lowpart (mode, rem_flag ? remainder : quotient);
4571           }
4572
4573         /* No luck with division elimination or divmod.  Have to do it
4574            by conditionally adjusting op0 *and* the result.  */
4575         {
4576           rtx label1, label2, label3, label4, label5;
4577           rtx adjusted_op0;
4578           rtx tem;
4579
4580           quotient = gen_reg_rtx (compute_mode);
4581           adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4582           label1 = gen_label_rtx ();
4583           label2 = gen_label_rtx ();
4584           label3 = gen_label_rtx ();
4585           label4 = gen_label_rtx ();
4586           label5 = gen_label_rtx ();
4587           do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4588           do_cmp_and_jump (adjusted_op0, const0_rtx, LT, compute_mode, label1);
4589           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4590                               quotient, 0, OPTAB_LIB_WIDEN);
4591           if (tem != quotient)
4592             emit_move_insn (quotient, tem);
4593           emit_jump_insn (gen_jump (label5));
4594           emit_barrier ();
4595           emit_label (label1);
4596           expand_inc (adjusted_op0, const1_rtx);
4597           emit_jump_insn (gen_jump (label4));
4598           emit_barrier ();
4599           emit_label (label2);
4600           do_cmp_and_jump (adjusted_op0, const0_rtx, GT, compute_mode, label3);
4601           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4602                               quotient, 0, OPTAB_LIB_WIDEN);
4603           if (tem != quotient)
4604             emit_move_insn (quotient, tem);
4605           emit_jump_insn (gen_jump (label5));
4606           emit_barrier ();
4607           emit_label (label3);
4608           expand_dec (adjusted_op0, const1_rtx);
4609           emit_label (label4);
4610           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4611                               quotient, 0, OPTAB_LIB_WIDEN);
4612           if (tem != quotient)
4613             emit_move_insn (quotient, tem);
4614           expand_dec (quotient, const1_rtx);
4615           emit_label (label5);
4616         }
4617         break;
4618
4619       case CEIL_DIV_EXPR:
4620       case CEIL_MOD_EXPR:
4621         if (unsignedp)
4622           {
4623             if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1)))
4624               {
4625                 rtx t1, t2, t3;
4626                 unsigned HOST_WIDE_INT d = INTVAL (op1);
4627                 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4628                                    build_int_cst (NULL_TREE, floor_log2 (d)),
4629                                    tquotient, 1);
4630                 t2 = expand_binop (compute_mode, and_optab, op0,
4631                                    GEN_INT (d - 1),
4632                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4633                 t3 = gen_reg_rtx (compute_mode);
4634                 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4635                                       compute_mode, 1, 1);
4636                 if (t3 == 0)
4637                   {
4638                     rtx lab;
4639                     lab = gen_label_rtx ();
4640                     do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4641                     expand_inc (t1, const1_rtx);
4642                     emit_label (lab);
4643                     quotient = t1;
4644                   }
4645                 else
4646                   quotient = force_operand (gen_rtx_PLUS (compute_mode,
4647                                                           t1, t3),
4648                                             tquotient);
4649                 break;
4650               }
4651
4652             /* Try using an instruction that produces both the quotient and
4653                remainder, using truncation.  We can easily compensate the
4654                quotient or remainder to get ceiling rounding, once we have the
4655                remainder.  Notice that we compute also the final remainder
4656                value here, and return the result right away.  */
4657             if (target == 0 || GET_MODE (target) != compute_mode)
4658               target = gen_reg_rtx (compute_mode);
4659
4660             if (rem_flag)
4661               {
4662                 remainder = (REG_P (target)
4663                              ? target : gen_reg_rtx (compute_mode));
4664                 quotient = gen_reg_rtx (compute_mode);
4665               }
4666             else
4667               {
4668                 quotient = (REG_P (target)
4669                             ? target : gen_reg_rtx (compute_mode));
4670                 remainder = gen_reg_rtx (compute_mode);
4671               }
4672
4673             if (expand_twoval_binop (udivmod_optab, op0, op1, quotient,
4674                                      remainder, 1))
4675               {
4676                 /* This could be computed with a branch-less sequence.
4677                    Save that for later.  */
4678                 rtx label = gen_label_rtx ();
4679                 do_cmp_and_jump (remainder, const0_rtx, EQ,
4680                                  compute_mode, label);
4681                 expand_inc (quotient, const1_rtx);
4682                 expand_dec (remainder, op1);
4683                 emit_label (label);
4684                 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4685               }
4686
4687             /* No luck with division elimination or divmod.  Have to do it
4688                by conditionally adjusting op0 *and* the result.  */
4689             {
4690               rtx label1, label2;
4691               rtx adjusted_op0, tem;
4692
4693               quotient = gen_reg_rtx (compute_mode);
4694               adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4695               label1 = gen_label_rtx ();
4696               label2 = gen_label_rtx ();
4697               do_cmp_and_jump (adjusted_op0, const0_rtx, NE,
4698                                compute_mode, label1);
4699               emit_move_insn  (quotient, const0_rtx);
4700               emit_jump_insn (gen_jump (label2));
4701               emit_barrier ();
4702               emit_label (label1);
4703               expand_dec (adjusted_op0, const1_rtx);
4704               tem = expand_binop (compute_mode, udiv_optab, adjusted_op0, op1,
4705                                   quotient, 1, OPTAB_LIB_WIDEN);
4706               if (tem != quotient)
4707                 emit_move_insn (quotient, tem);
4708               expand_inc (quotient, const1_rtx);
4709               emit_label (label2);
4710             }
4711           }
4712         else /* signed */
4713           {
4714             if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4715                 && INTVAL (op1) >= 0)
4716               {
4717                 /* This is extremely similar to the code for the unsigned case
4718                    above.  For 2.7 we should merge these variants, but for
4719                    2.6.1 I don't want to touch the code for unsigned since that
4720                    get used in C.  The signed case will only be used by other
4721                    languages (Ada).  */
4722
4723                 rtx t1, t2, t3;
4724                 unsigned HOST_WIDE_INT d = INTVAL (op1);
4725                 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4726                                    build_int_cst (NULL_TREE, floor_log2 (d)),
4727                                    tquotient, 0);
4728                 t2 = expand_binop (compute_mode, and_optab, op0,
4729                                    GEN_INT (d - 1),
4730                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4731                 t3 = gen_reg_rtx (compute_mode);
4732                 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4733                                       compute_mode, 1, 1);
4734                 if (t3 == 0)
4735                   {
4736                     rtx lab;
4737                     lab = gen_label_rtx ();
4738                     do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4739                     expand_inc (t1, const1_rtx);
4740                     emit_label (lab);
4741                     quotient = t1;
4742                   }
4743                 else
4744                   quotient = force_operand (gen_rtx_PLUS (compute_mode,
4745                                                           t1, t3),
4746                                             tquotient);
4747                 break;
4748               }
4749
4750             /* Try using an instruction that produces both the quotient and
4751                remainder, using truncation.  We can easily compensate the
4752                quotient or remainder to get ceiling rounding, once we have the
4753                remainder.  Notice that we compute also the final remainder
4754                value here, and return the result right away.  */
4755             if (target == 0 || GET_MODE (target) != compute_mode)
4756               target = gen_reg_rtx (compute_mode);
4757             if (rem_flag)
4758               {
4759                 remainder= (REG_P (target)
4760                             ? target : gen_reg_rtx (compute_mode));
4761                 quotient = gen_reg_rtx (compute_mode);
4762               }
4763             else
4764               {
4765                 quotient = (REG_P (target)
4766                             ? target : gen_reg_rtx (compute_mode));
4767                 remainder = gen_reg_rtx (compute_mode);
4768               }
4769
4770             if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient,
4771                                      remainder, 0))
4772               {
4773                 /* This could be computed with a branch-less sequence.
4774                    Save that for later.  */
4775                 rtx tem;
4776                 rtx label = gen_label_rtx ();
4777                 do_cmp_and_jump (remainder, const0_rtx, EQ,
4778                                  compute_mode, label);
4779                 tem = expand_binop (compute_mode, xor_optab, op0, op1,
4780                                     NULL_RTX, 0, OPTAB_WIDEN);
4781                 do_cmp_and_jump (tem, const0_rtx, LT, compute_mode, label);
4782                 expand_inc (quotient, const1_rtx);
4783                 expand_dec (remainder, op1);
4784                 emit_label (label);
4785                 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4786               }
4787
4788             /* No luck with division elimination or divmod.  Have to do it
4789                by conditionally adjusting op0 *and* the result.  */
4790             {
4791               rtx label1, label2, label3, label4, label5;
4792               rtx adjusted_op0;
4793               rtx tem;
4794
4795               quotient = gen_reg_rtx (compute_mode);
4796               adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4797               label1 = gen_label_rtx ();
4798               label2 = gen_label_rtx ();
4799               label3 = gen_label_rtx ();
4800               label4 = gen_label_rtx ();
4801               label5 = gen_label_rtx ();
4802               do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4803               do_cmp_and_jump (adjusted_op0, const0_rtx, GT,
4804                                compute_mode, label1);
4805               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4806                                   quotient, 0, OPTAB_LIB_WIDEN);
4807               if (tem != quotient)
4808                 emit_move_insn (quotient, tem);
4809               emit_jump_insn (gen_jump (label5));
4810               emit_barrier ();
4811               emit_label (label1);
4812               expand_dec (adjusted_op0, const1_rtx);
4813               emit_jump_insn (gen_jump (label4));
4814               emit_barrier ();
4815               emit_label (label2);
4816               do_cmp_and_jump (adjusted_op0, const0_rtx, LT,
4817                                compute_mode, label3);
4818               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4819                                   quotient, 0, OPTAB_LIB_WIDEN);
4820               if (tem != quotient)
4821                 emit_move_insn (quotient, tem);
4822               emit_jump_insn (gen_jump (label5));
4823               emit_barrier ();
4824               emit_label (label3);
4825               expand_inc (adjusted_op0, const1_rtx);
4826               emit_label (label4);
4827               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4828                                   quotient, 0, OPTAB_LIB_WIDEN);
4829               if (tem != quotient)
4830                 emit_move_insn (quotient, tem);
4831               expand_inc (quotient, const1_rtx);
4832               emit_label (label5);
4833             }
4834           }
4835         break;
4836
4837       case EXACT_DIV_EXPR:
4838         if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4839           {
4840             HOST_WIDE_INT d = INTVAL (op1);
4841             unsigned HOST_WIDE_INT ml;
4842             int pre_shift;
4843             rtx t1;
4844
4845             pre_shift = floor_log2 (d & -d);
4846             ml = invert_mod2n (d >> pre_shift, size);
4847             t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4848                                build_int_cst (NULL_TREE, pre_shift),
4849                                NULL_RTX, unsignedp);
4850             quotient = expand_mult (compute_mode, t1,
4851                                     gen_int_mode (ml, compute_mode),
4852                                     NULL_RTX, 1);
4853
4854             insn = get_last_insn ();
4855             set_unique_reg_note (insn,
4856                                  REG_EQUAL,
4857                                  gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
4858                                                  compute_mode,
4859                                                  op0, op1));
4860           }
4861         break;
4862
4863       case ROUND_DIV_EXPR:
4864       case ROUND_MOD_EXPR:
4865         if (unsignedp)
4866           {
4867             rtx tem;
4868             rtx label;
4869             label = gen_label_rtx ();
4870             quotient = gen_reg_rtx (compute_mode);
4871             remainder = gen_reg_rtx (compute_mode);
4872             if (expand_twoval_binop (udivmod_optab, op0, op1, quotient, remainder, 1) == 0)
4873               {
4874                 rtx tem;
4875                 quotient = expand_binop (compute_mode, udiv_optab, op0, op1,
4876                                          quotient, 1, OPTAB_LIB_WIDEN);
4877                 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 1);
4878                 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4879                                           remainder, 1, OPTAB_LIB_WIDEN);
4880               }
4881             tem = plus_constant (op1, -1);
4882             tem = expand_shift (RSHIFT_EXPR, compute_mode, tem,
4883                                 build_int_cst (NULL_TREE, 1),
4884                                 NULL_RTX, 1);
4885             do_cmp_and_jump (remainder, tem, LEU, compute_mode, label);
4886             expand_inc (quotient, const1_rtx);
4887             expand_dec (remainder, op1);
4888             emit_label (label);
4889           }
4890         else
4891           {
4892             rtx abs_rem, abs_op1, tem, mask;
4893             rtx label;
4894             label = gen_label_rtx ();
4895             quotient = gen_reg_rtx (compute_mode);
4896             remainder = gen_reg_rtx (compute_mode);
4897             if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient, remainder, 0) == 0)
4898               {
4899                 rtx tem;
4900                 quotient = expand_binop (compute_mode, sdiv_optab, op0, op1,
4901                                          quotient, 0, OPTAB_LIB_WIDEN);
4902                 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 0);
4903                 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4904                                           remainder, 0, OPTAB_LIB_WIDEN);
4905               }
4906             abs_rem = expand_abs (compute_mode, remainder, NULL_RTX, 1, 0);
4907             abs_op1 = expand_abs (compute_mode, op1, NULL_RTX, 1, 0);
4908             tem = expand_shift (LSHIFT_EXPR, compute_mode, abs_rem,
4909                                 build_int_cst (NULL_TREE, 1),
4910                                 NULL_RTX, 1);
4911             do_cmp_and_jump (tem, abs_op1, LTU, compute_mode, label);
4912             tem = expand_binop (compute_mode, xor_optab, op0, op1,
4913                                 NULL_RTX, 0, OPTAB_WIDEN);
4914             mask = expand_shift (RSHIFT_EXPR, compute_mode, tem,
4915                                  build_int_cst (NULL_TREE, size - 1),
4916                                  NULL_RTX, 0);
4917             tem = expand_binop (compute_mode, xor_optab, mask, const1_rtx,
4918                                 NULL_RTX, 0, OPTAB_WIDEN);
4919             tem = expand_binop (compute_mode, sub_optab, tem, mask,
4920                                 NULL_RTX, 0, OPTAB_WIDEN);
4921             expand_inc (quotient, tem);
4922             tem = expand_binop (compute_mode, xor_optab, mask, op1,
4923                                 NULL_RTX, 0, OPTAB_WIDEN);
4924             tem = expand_binop (compute_mode, sub_optab, tem, mask,
4925                                 NULL_RTX, 0, OPTAB_WIDEN);
4926             expand_dec (remainder, tem);
4927             emit_label (label);
4928           }
4929         return gen_lowpart (mode, rem_flag ? remainder : quotient);
4930
4931       default:
4932         gcc_unreachable ();
4933       }
4934
4935   if (quotient == 0)
4936     {
4937       if (target && GET_MODE (target) != compute_mode)
4938         target = 0;
4939
4940       if (rem_flag)
4941         {
4942           /* Try to produce the remainder without producing the quotient.
4943              If we seem to have a divmod pattern that does not require widening,
4944              don't try widening here.  We should really have a WIDEN argument
4945              to expand_twoval_binop, since what we'd really like to do here is
4946              1) try a mod insn in compute_mode
4947              2) try a divmod insn in compute_mode
4948              3) try a div insn in compute_mode and multiply-subtract to get
4949                 remainder
4950              4) try the same things with widening allowed.  */
4951           remainder
4952             = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4953                                  op0, op1, target,
4954                                  unsignedp,
4955                                  ((optab_handler (optab2, compute_mode)->insn_code
4956                                    != CODE_FOR_nothing)
4957                                   ? OPTAB_DIRECT : OPTAB_WIDEN));
4958           if (remainder == 0)
4959             {
4960               /* No luck there.  Can we do remainder and divide at once
4961                  without a library call?  */
4962               remainder = gen_reg_rtx (compute_mode);
4963               if (! expand_twoval_binop ((unsignedp
4964                                           ? udivmod_optab
4965                                           : sdivmod_optab),
4966                                          op0, op1,
4967                                          NULL_RTX, remainder, unsignedp))
4968                 remainder = 0;
4969             }
4970
4971           if (remainder)
4972             return gen_lowpart (mode, remainder);
4973         }
4974
4975       /* Produce the quotient.  Try a quotient insn, but not a library call.
4976          If we have a divmod in this mode, use it in preference to widening
4977          the div (for this test we assume it will not fail). Note that optab2
4978          is set to the one of the two optabs that the call below will use.  */
4979       quotient
4980         = sign_expand_binop (compute_mode, udiv_optab, sdiv_optab,
4981                              op0, op1, rem_flag ? NULL_RTX : target,
4982                              unsignedp,
4983                              ((optab_handler (optab2, compute_mode)->insn_code
4984                                != CODE_FOR_nothing)
4985                               ? OPTAB_DIRECT : OPTAB_WIDEN));
4986
4987       if (quotient == 0)
4988         {
4989           /* No luck there.  Try a quotient-and-remainder insn,
4990              keeping the quotient alone.  */
4991           quotient = gen_reg_rtx (compute_mode);
4992           if (! expand_twoval_binop (unsignedp ? udivmod_optab : sdivmod_optab,
4993                                      op0, op1,
4994                                      quotient, NULL_RTX, unsignedp))
4995             {
4996               quotient = 0;
4997               if (! rem_flag)
4998                 /* Still no luck.  If we are not computing the remainder,
4999                    use a library call for the quotient.  */
5000                 quotient = sign_expand_binop (compute_mode,
5001                                               udiv_optab, sdiv_optab,
5002                                               op0, op1, target,
5003                                               unsignedp, OPTAB_LIB_WIDEN);
5004             }
5005         }
5006     }
5007
5008   if (rem_flag)
5009     {
5010       if (target && GET_MODE (target) != compute_mode)
5011         target = 0;
5012
5013       if (quotient == 0)
5014         {
5015           /* No divide instruction either.  Use library for remainder.  */
5016           remainder = sign_expand_binop (compute_mode, umod_optab, smod_optab,
5017                                          op0, op1, target,
5018                                          unsignedp, OPTAB_LIB_WIDEN);
5019           /* No remainder function.  Try a quotient-and-remainder
5020              function, keeping the remainder.  */
5021           if (!remainder)
5022             {
5023               remainder = gen_reg_rtx (compute_mode);
5024               if (!expand_twoval_binop_libfunc
5025                   (unsignedp ? udivmod_optab : sdivmod_optab,
5026                    op0, op1,
5027                    NULL_RTX, remainder,
5028                    unsignedp ? UMOD : MOD))
5029                 remainder = NULL_RTX;
5030             }
5031         }
5032       else
5033         {
5034           /* We divided.  Now finish doing X - Y * (X / Y).  */
5035           remainder = expand_mult (compute_mode, quotient, op1,
5036                                    NULL_RTX, unsignedp);
5037           remainder = expand_binop (compute_mode, sub_optab, op0,
5038                                     remainder, target, unsignedp,
5039                                     OPTAB_LIB_WIDEN);
5040         }
5041     }
5042
5043   return gen_lowpart (mode, rem_flag ? remainder : quotient);
5044 }
5045 \f
5046 /* Return a tree node with data type TYPE, describing the value of X.
5047    Usually this is an VAR_DECL, if there is no obvious better choice.
5048    X may be an expression, however we only support those expressions
5049    generated by loop.c.  */
5050
5051 tree
5052 make_tree (tree type, rtx x)
5053 {
5054   tree t;
5055
5056   switch (GET_CODE (x))
5057     {
5058     case CONST_INT:
5059       {
5060         HOST_WIDE_INT hi = 0;
5061
5062         if (INTVAL (x) < 0
5063             && !(TYPE_UNSIGNED (type)
5064                  && (GET_MODE_BITSIZE (TYPE_MODE (type))
5065                      < HOST_BITS_PER_WIDE_INT)))
5066           hi = -1;
5067
5068         t = build_int_cst_wide (type, INTVAL (x), hi);
5069
5070         return t;
5071       }
5072
5073     case CONST_DOUBLE:
5074       if (GET_MODE (x) == VOIDmode)
5075         t = build_int_cst_wide (type,
5076                                 CONST_DOUBLE_LOW (x), CONST_DOUBLE_HIGH (x));
5077       else
5078         {
5079           REAL_VALUE_TYPE d;
5080
5081           REAL_VALUE_FROM_CONST_DOUBLE (d, x);
5082           t = build_real (type, d);
5083         }
5084
5085       return t;
5086
5087     case CONST_VECTOR:
5088       {
5089         int units = CONST_VECTOR_NUNITS (x);
5090         tree itype = TREE_TYPE (type);
5091         tree t = NULL_TREE;
5092         int i;
5093
5094
5095         /* Build a tree with vector elements.  */
5096         for (i = units - 1; i >= 0; --i)
5097           {
5098             rtx elt = CONST_VECTOR_ELT (x, i);
5099             t = tree_cons (NULL_TREE, make_tree (itype, elt), t);
5100           }
5101
5102         return build_vector (type, t);
5103       }
5104
5105     case PLUS:
5106       return fold_build2 (PLUS_EXPR, type, make_tree (type, XEXP (x, 0)),
5107                           make_tree (type, XEXP (x, 1)));
5108
5109     case MINUS:
5110       return fold_build2 (MINUS_EXPR, type, make_tree (type, XEXP (x, 0)),
5111                           make_tree (type, XEXP (x, 1)));
5112
5113     case NEG:
5114       return fold_build1 (NEGATE_EXPR, type, make_tree (type, XEXP (x, 0)));
5115
5116     case MULT:
5117       return fold_build2 (MULT_EXPR, type, make_tree (type, XEXP (x, 0)),
5118                           make_tree (type, XEXP (x, 1)));
5119
5120     case ASHIFT:
5121       return fold_build2 (LSHIFT_EXPR, type, make_tree (type, XEXP (x, 0)),
5122                           make_tree (type, XEXP (x, 1)));
5123
5124     case LSHIFTRT:
5125       t = unsigned_type_for (type);
5126       return fold_convert (type, build2 (RSHIFT_EXPR, t,
5127                                          make_tree (t, XEXP (x, 0)),
5128                                          make_tree (type, XEXP (x, 1))));
5129
5130     case ASHIFTRT:
5131       t = signed_type_for (type);
5132       return fold_convert (type, build2 (RSHIFT_EXPR, t,
5133                                          make_tree (t, XEXP (x, 0)),
5134                                          make_tree (type, XEXP (x, 1))));
5135
5136     case DIV:
5137       if (TREE_CODE (type) != REAL_TYPE)
5138         t = signed_type_for (type);
5139       else
5140         t = type;
5141
5142       return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5143                                          make_tree (t, XEXP (x, 0)),
5144                                          make_tree (t, XEXP (x, 1))));
5145     case UDIV:
5146       t = unsigned_type_for (type);
5147       return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5148                                          make_tree (t, XEXP (x, 0)),
5149                                          make_tree (t, XEXP (x, 1))));
5150
5151     case SIGN_EXTEND:
5152     case ZERO_EXTEND:
5153       t = lang_hooks.types.type_for_mode (GET_MODE (XEXP (x, 0)),
5154                                           GET_CODE (x) == ZERO_EXTEND);
5155       return fold_convert (type, make_tree (t, XEXP (x, 0)));
5156
5157     case CONST:
5158       return make_tree (type, XEXP (x, 0));
5159
5160     case SYMBOL_REF:
5161       t = SYMBOL_REF_DECL (x);
5162       if (t)
5163         return fold_convert (type, build_fold_addr_expr (t));
5164       /* else fall through.  */
5165
5166     default:
5167       t = build_decl (RTL_LOCATION (x), VAR_DECL, NULL_TREE, type);
5168
5169       /* If TYPE is a POINTER_TYPE, we might need to convert X from
5170          address mode to pointer mode.  */
5171       if (POINTER_TYPE_P (type))
5172         x = convert_memory_address_addr_space
5173               (TYPE_MODE (type), x, TYPE_ADDR_SPACE (TREE_TYPE (type)));
5174
5175       /* Note that we do *not* use SET_DECL_RTL here, because we do not
5176          want set_decl_rtl to go adjusting REG_ATTRS for this temporary.  */
5177       t->decl_with_rtl.rtl = x;
5178
5179       return t;
5180     }
5181 }
5182 \f
5183 /* Compute the logical-and of OP0 and OP1, storing it in TARGET
5184    and returning TARGET.
5185
5186    If TARGET is 0, a pseudo-register or constant is returned.  */
5187
5188 rtx
5189 expand_and (enum machine_mode mode, rtx op0, rtx op1, rtx target)
5190 {
5191   rtx tem = 0;
5192
5193   if (GET_MODE (op0) == VOIDmode && GET_MODE (op1) == VOIDmode)
5194     tem = simplify_binary_operation (AND, mode, op0, op1);
5195   if (tem == 0)
5196     tem = expand_binop (mode, and_optab, op0, op1, target, 0, OPTAB_LIB_WIDEN);
5197
5198   if (target == 0)
5199     target = tem;
5200   else if (tem != target)
5201     emit_move_insn (target, tem);
5202   return target;
5203 }
5204
5205 /* Helper function for emit_store_flag.  */
5206 static rtx
5207 emit_cstore (rtx target, enum insn_code icode, enum rtx_code code,
5208              enum machine_mode mode, enum machine_mode compare_mode,
5209              int unsignedp, rtx x, rtx y, int normalizep,
5210              enum machine_mode target_mode)
5211 {
5212   rtx op0, last, comparison, subtarget, pattern;
5213   enum machine_mode result_mode = insn_data[(int) icode].operand[0].mode;
5214
5215   last = get_last_insn ();
5216   x = prepare_operand (icode, x, 2, mode, compare_mode, unsignedp);
5217   y = prepare_operand (icode, y, 3, mode, compare_mode, unsignedp);
5218   comparison = gen_rtx_fmt_ee (code, result_mode, x, y);
5219   if (!x || !y
5220       || !insn_data[icode].operand[2].predicate
5221           (x, insn_data[icode].operand[2].mode)
5222       || !insn_data[icode].operand[3].predicate
5223           (y, insn_data[icode].operand[3].mode)
5224       || !insn_data[icode].operand[1].predicate (comparison, VOIDmode))
5225     {
5226       delete_insns_since (last);
5227       return NULL_RTX;
5228     }
5229
5230   if (target_mode == VOIDmode)
5231     target_mode = result_mode;
5232   if (!target)
5233     target = gen_reg_rtx (target_mode);
5234
5235   if (optimize
5236       || !(insn_data[(int) icode].operand[0].predicate (target, result_mode)))
5237     subtarget = gen_reg_rtx (result_mode);
5238   else
5239     subtarget = target;
5240
5241   pattern = GEN_FCN (icode) (subtarget, comparison, x, y);
5242   if (!pattern)
5243     return NULL_RTX;
5244   emit_insn (pattern);
5245
5246   /* If we are converting to a wider mode, first convert to
5247      TARGET_MODE, then normalize.  This produces better combining
5248      opportunities on machines that have a SIGN_EXTRACT when we are
5249      testing a single bit.  This mostly benefits the 68k.
5250
5251      If STORE_FLAG_VALUE does not have the sign bit set when
5252      interpreted in MODE, we can do this conversion as unsigned, which
5253      is usually more efficient.  */
5254   if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (result_mode))
5255     {
5256       convert_move (target, subtarget,
5257                     (GET_MODE_BITSIZE (result_mode) <= HOST_BITS_PER_WIDE_INT)
5258                     && 0 == (STORE_FLAG_VALUE
5259                              & ((HOST_WIDE_INT) 1
5260                                 << (GET_MODE_BITSIZE (result_mode) -1))));
5261       op0 = target;
5262       result_mode = target_mode;
5263     }
5264   else
5265     op0 = subtarget;
5266
5267   /* If we want to keep subexpressions around, don't reuse our last
5268      target.  */
5269   if (optimize)
5270     subtarget = 0;
5271
5272   /* Now normalize to the proper value in MODE.  Sometimes we don't
5273      have to do anything.  */
5274   if (normalizep == 0 || normalizep == STORE_FLAG_VALUE)
5275     ;
5276   /* STORE_FLAG_VALUE might be the most negative number, so write
5277      the comparison this way to avoid a compiler-time warning.  */
5278   else if (- normalizep == STORE_FLAG_VALUE)
5279     op0 = expand_unop (result_mode, neg_optab, op0, subtarget, 0);
5280
5281   /* We don't want to use STORE_FLAG_VALUE < 0 below since this makes
5282      it hard to use a value of just the sign bit due to ANSI integer
5283      constant typing rules.  */
5284   else if (GET_MODE_BITSIZE (result_mode) <= HOST_BITS_PER_WIDE_INT
5285            && (STORE_FLAG_VALUE
5286                & ((HOST_WIDE_INT) 1 << (GET_MODE_BITSIZE (result_mode) - 1))))
5287     op0 = expand_shift (RSHIFT_EXPR, result_mode, op0,
5288                         size_int (GET_MODE_BITSIZE (result_mode) - 1), subtarget,
5289                         normalizep == 1);
5290   else
5291     {
5292       gcc_assert (STORE_FLAG_VALUE & 1);
5293
5294       op0 = expand_and (result_mode, op0, const1_rtx, subtarget);
5295       if (normalizep == -1)
5296         op0 = expand_unop (result_mode, neg_optab, op0, op0, 0);
5297     }
5298
5299   /* If we were converting to a smaller mode, do the conversion now.  */
5300   if (target_mode != result_mode)
5301     {
5302       convert_move (target, op0, 0);
5303       return target;
5304     }
5305   else
5306     return op0;
5307 }
5308
5309
5310 /* A subroutine of emit_store_flag only including "tricks" that do not
5311    need a recursive call.  These are kept separate to avoid infinite
5312    loops.  */
5313
5314 static rtx
5315 emit_store_flag_1 (rtx target, enum rtx_code code, rtx op0, rtx op1,
5316                    enum machine_mode mode, int unsignedp, int normalizep,
5317                    enum machine_mode target_mode)
5318 {
5319   rtx subtarget;
5320   enum insn_code icode;
5321   enum machine_mode compare_mode;
5322   enum mode_class mclass;
5323   enum rtx_code scode;
5324   rtx tem;
5325
5326   if (unsignedp)
5327     code = unsigned_condition (code);
5328   scode = swap_condition (code);
5329
5330   /* If one operand is constant, make it the second one.  Only do this
5331      if the other operand is not constant as well.  */
5332
5333   if (swap_commutative_operands_p (op0, op1))
5334     {
5335       tem = op0;
5336       op0 = op1;
5337       op1 = tem;
5338       code = swap_condition (code);
5339     }
5340
5341   if (mode == VOIDmode)
5342     mode = GET_MODE (op0);
5343
5344   /* For some comparisons with 1 and -1, we can convert this to
5345      comparisons with zero.  This will often produce more opportunities for
5346      store-flag insns.  */
5347
5348   switch (code)
5349     {
5350     case LT:
5351       if (op1 == const1_rtx)
5352         op1 = const0_rtx, code = LE;
5353       break;
5354     case LE:
5355       if (op1 == constm1_rtx)
5356         op1 = const0_rtx, code = LT;
5357       break;
5358     case GE:
5359       if (op1 == const1_rtx)
5360         op1 = const0_rtx, code = GT;
5361       break;
5362     case GT:
5363       if (op1 == constm1_rtx)
5364         op1 = const0_rtx, code = GE;
5365       break;
5366     case GEU:
5367       if (op1 == const1_rtx)
5368         op1 = const0_rtx, code = NE;
5369       break;
5370     case LTU:
5371       if (op1 == const1_rtx)
5372         op1 = const0_rtx, code = EQ;
5373       break;
5374     default:
5375       break;
5376     }
5377
5378   /* If we are comparing a double-word integer with zero or -1, we can
5379      convert the comparison into one involving a single word.  */
5380   if (GET_MODE_BITSIZE (mode) == BITS_PER_WORD * 2
5381       && GET_MODE_CLASS (mode) == MODE_INT
5382       && (!MEM_P (op0) || ! MEM_VOLATILE_P (op0)))
5383     {
5384       if ((code == EQ || code == NE)
5385           && (op1 == const0_rtx || op1 == constm1_rtx))
5386         {
5387           rtx op00, op01;
5388
5389           /* Do a logical OR or AND of the two words and compare the
5390              result.  */
5391           op00 = simplify_gen_subreg (word_mode, op0, mode, 0);
5392           op01 = simplify_gen_subreg (word_mode, op0, mode, UNITS_PER_WORD);
5393           tem = expand_binop (word_mode,
5394                               op1 == const0_rtx ? ior_optab : and_optab,
5395                               op00, op01, NULL_RTX, unsignedp,
5396                               OPTAB_DIRECT);
5397
5398           if (tem != 0)
5399             tem = emit_store_flag (NULL_RTX, code, tem, op1, word_mode,
5400                                    unsignedp, normalizep);
5401         }
5402       else if ((code == LT || code == GE) && op1 == const0_rtx)
5403         {
5404           rtx op0h;
5405
5406           /* If testing the sign bit, can just test on high word.  */
5407           op0h = simplify_gen_subreg (word_mode, op0, mode,
5408                                       subreg_highpart_offset (word_mode,
5409                                                               mode));
5410           tem = emit_store_flag (NULL_RTX, code, op0h, op1, word_mode,
5411                                  unsignedp, normalizep);
5412         }
5413       else
5414         tem = NULL_RTX;
5415
5416       if (tem)
5417         {
5418           if (target_mode == VOIDmode || GET_MODE (tem) == target_mode)
5419             return tem;
5420           if (!target)
5421             target = gen_reg_rtx (target_mode);
5422
5423           convert_move (target, tem,
5424                         0 == ((normalizep ? normalizep : STORE_FLAG_VALUE)
5425                               & ((HOST_WIDE_INT) 1
5426                                  << (GET_MODE_BITSIZE (word_mode) -1))));
5427           return target;
5428         }
5429     }
5430
5431   /* If this is A < 0 or A >= 0, we can do this by taking the ones
5432      complement of A (for GE) and shifting the sign bit to the low bit.  */
5433   if (op1 == const0_rtx && (code == LT || code == GE)
5434       && GET_MODE_CLASS (mode) == MODE_INT
5435       && (normalizep || STORE_FLAG_VALUE == 1
5436           || (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT
5437               && ((STORE_FLAG_VALUE & GET_MODE_MASK (mode))
5438                   == ((unsigned HOST_WIDE_INT) 1
5439                       << (GET_MODE_BITSIZE (mode) - 1))))))
5440     {
5441       subtarget = target;
5442
5443       if (!target)
5444         target_mode = mode;
5445
5446       /* If the result is to be wider than OP0, it is best to convert it
5447          first.  If it is to be narrower, it is *incorrect* to convert it
5448          first.  */
5449       else if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (mode))
5450         {
5451           op0 = convert_modes (target_mode, mode, op0, 0);
5452           mode = target_mode;
5453         }
5454
5455       if (target_mode != mode)
5456         subtarget = 0;
5457
5458       if (code == GE)
5459         op0 = expand_unop (mode, one_cmpl_optab, op0,
5460                            ((STORE_FLAG_VALUE == 1 || normalizep)
5461                             ? 0 : subtarget), 0);
5462
5463       if (STORE_FLAG_VALUE == 1 || normalizep)
5464         /* If we are supposed to produce a 0/1 value, we want to do
5465            a logical shift from the sign bit to the low-order bit; for
5466            a -1/0 value, we do an arithmetic shift.  */
5467         op0 = expand_shift (RSHIFT_EXPR, mode, op0,
5468                             size_int (GET_MODE_BITSIZE (mode) - 1),
5469                             subtarget, normalizep != -1);
5470
5471       if (mode != target_mode)
5472         op0 = convert_modes (target_mode, mode, op0, 0);
5473
5474       return op0;
5475     }
5476
5477   mclass = GET_MODE_CLASS (mode);
5478   for (compare_mode = mode; compare_mode != VOIDmode;
5479        compare_mode = GET_MODE_WIDER_MODE (compare_mode))
5480     {
5481      enum machine_mode optab_mode = mclass == MODE_CC ? CCmode : compare_mode;
5482      icode = optab_handler (cstore_optab, optab_mode)->insn_code;
5483      if (icode != CODE_FOR_nothing)
5484         {
5485           do_pending_stack_adjust ();
5486           tem = emit_cstore (target, icode, code, mode, compare_mode,
5487                              unsignedp, op0, op1, normalizep, target_mode);
5488           if (tem)
5489             return tem;
5490
5491           if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5492             {
5493               tem = emit_cstore (target, icode, scode, mode, compare_mode,
5494                                  unsignedp, op1, op0, normalizep, target_mode);
5495               if (tem)
5496                 return tem;
5497             }
5498           break;
5499         }
5500     }
5501
5502   return 0;
5503 }
5504
5505 /* Emit a store-flags instruction for comparison CODE on OP0 and OP1
5506    and storing in TARGET.  Normally return TARGET.
5507    Return 0 if that cannot be done.
5508
5509    MODE is the mode to use for OP0 and OP1 should they be CONST_INTs.  If
5510    it is VOIDmode, they cannot both be CONST_INT.
5511
5512    UNSIGNEDP is for the case where we have to widen the operands
5513    to perform the operation.  It says to use zero-extension.
5514
5515    NORMALIZEP is 1 if we should convert the result to be either zero
5516    or one.  Normalize is -1 if we should convert the result to be
5517    either zero or -1.  If NORMALIZEP is zero, the result will be left
5518    "raw" out of the scc insn.  */
5519
5520 rtx
5521 emit_store_flag (rtx target, enum rtx_code code, rtx op0, rtx op1,
5522                  enum machine_mode mode, int unsignedp, int normalizep)
5523 {
5524   enum machine_mode target_mode = target ? GET_MODE (target) : VOIDmode;
5525   enum rtx_code rcode;
5526   rtx subtarget;
5527   rtx tem, last, trueval;
5528
5529   tem = emit_store_flag_1 (target, code, op0, op1, mode, unsignedp, normalizep,
5530                            target_mode);
5531   if (tem)
5532     return tem;
5533
5534   /* If we reached here, we can't do this with a scc insn, however there
5535      are some comparisons that can be done in other ways.  Don't do any
5536      of these cases if branches are very cheap.  */
5537   if (BRANCH_COST (optimize_insn_for_speed_p (), false) == 0)
5538     return 0;
5539
5540   /* See what we need to return.  We can only return a 1, -1, or the
5541      sign bit.  */
5542
5543   if (normalizep == 0)
5544     {
5545       if (STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1)
5546         normalizep = STORE_FLAG_VALUE;
5547
5548       else if (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT
5549                && ((STORE_FLAG_VALUE & GET_MODE_MASK (mode))
5550                    == (unsigned HOST_WIDE_INT) 1 << (GET_MODE_BITSIZE (mode) - 1)))
5551         ;
5552       else
5553         return 0;
5554     }
5555
5556   last = get_last_insn ();
5557
5558   /* If optimizing, use different pseudo registers for each insn, instead
5559      of reusing the same pseudo.  This leads to better CSE, but slows
5560      down the compiler, since there are more pseudos */
5561   subtarget = (!optimize
5562                && (target_mode == mode)) ? target : NULL_RTX;
5563   trueval = GEN_INT (normalizep ? normalizep : STORE_FLAG_VALUE);
5564
5565   /* For floating-point comparisons, try the reverse comparison or try
5566      changing the "orderedness" of the comparison.  */
5567   if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5568     {
5569       enum rtx_code first_code;
5570       bool and_them;
5571
5572       rcode = reverse_condition_maybe_unordered (code);
5573       if (can_compare_p (rcode, mode, ccp_store_flag)
5574           && (code == ORDERED || code == UNORDERED
5575               || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
5576               || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
5577         {
5578           int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5579                           || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5580
5581           /* For the reverse comparison, use either an addition or a XOR.  */
5582           if (want_add
5583               && rtx_cost (GEN_INT (normalizep), PLUS,
5584                            optimize_insn_for_speed_p ()) == 0)
5585             {
5586               tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5587                                        STORE_FLAG_VALUE, target_mode);
5588               if (tem)
5589                 return expand_binop (target_mode, add_optab, tem,
5590                                      GEN_INT (normalizep),
5591                                      target, 0, OPTAB_WIDEN);
5592             }
5593           else if (!want_add
5594                    && rtx_cost (trueval, XOR,
5595                                 optimize_insn_for_speed_p ()) == 0)
5596             {
5597               tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5598                                        normalizep, target_mode);
5599               if (tem)
5600                 return expand_binop (target_mode, xor_optab, tem, trueval,
5601                                      target, INTVAL (trueval) >= 0, OPTAB_WIDEN);
5602             }
5603         }
5604
5605       delete_insns_since (last);
5606
5607       /* Cannot split ORDERED and UNORDERED, only try the above trick.   */
5608       if (code == ORDERED || code == UNORDERED)
5609         return 0;
5610
5611       and_them = split_comparison (code, mode, &first_code, &code);
5612
5613       /* If there are no NaNs, the first comparison should always fall through.
5614          Effectively change the comparison to the other one.  */
5615       if (!HONOR_NANS (mode))
5616         {
5617           gcc_assert (first_code == (and_them ? ORDERED : UNORDERED));
5618           return emit_store_flag_1 (target, code, op0, op1, mode, 0, normalizep,
5619                                     target_mode);
5620         }
5621
5622 #ifdef HAVE_conditional_move
5623       /* Try using a setcc instruction for ORDERED/UNORDERED, followed by a
5624          conditional move.  */
5625       tem = emit_store_flag_1 (subtarget, first_code, op0, op1, mode, 0,
5626                                normalizep, target_mode);
5627       if (tem == 0)
5628         return 0;
5629
5630       if (and_them)
5631         tem = emit_conditional_move (target, code, op0, op1, mode,
5632                                      tem, const0_rtx, GET_MODE (tem), 0);
5633       else
5634         tem = emit_conditional_move (target, code, op0, op1, mode,
5635                                      trueval, tem, GET_MODE (tem), 0);
5636
5637       if (tem == 0)
5638         delete_insns_since (last);
5639       return tem;
5640 #else
5641       return 0;
5642 #endif
5643     }
5644
5645   /* The remaining tricks only apply to integer comparisons.  */
5646
5647   if (GET_MODE_CLASS (mode) != MODE_INT)
5648     return 0;
5649
5650   /* If this is an equality comparison of integers, we can try to exclusive-or
5651      (or subtract) the two operands and use a recursive call to try the
5652      comparison with zero.  Don't do any of these cases if branches are
5653      very cheap.  */
5654
5655   if ((code == EQ || code == NE) && op1 != const0_rtx)
5656     {
5657       tem = expand_binop (mode, xor_optab, op0, op1, subtarget, 1,
5658                           OPTAB_WIDEN);
5659
5660       if (tem == 0)
5661         tem = expand_binop (mode, sub_optab, op0, op1, subtarget, 1,
5662                             OPTAB_WIDEN);
5663       if (tem != 0)
5664         tem = emit_store_flag (target, code, tem, const0_rtx,
5665                                mode, unsignedp, normalizep);
5666       if (tem != 0)
5667         return tem;
5668
5669       delete_insns_since (last);
5670     }
5671
5672   /* For integer comparisons, try the reverse comparison.  However, for
5673      small X and if we'd have anyway to extend, implementing "X != 0"
5674      as "-(int)X >> 31" is still cheaper than inverting "(int)X == 0".  */
5675   rcode = reverse_condition (code);
5676   if (can_compare_p (rcode, mode, ccp_store_flag)
5677       && ! (optab_handler (cstore_optab, mode)->insn_code == CODE_FOR_nothing
5678             && code == NE
5679             && GET_MODE_SIZE (mode) < UNITS_PER_WORD
5680             && op1 == const0_rtx))
5681     {
5682       int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5683                       || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5684
5685       /* Again, for the reverse comparison, use either an addition or a XOR.  */
5686       if (want_add
5687           && rtx_cost (GEN_INT (normalizep), PLUS,
5688                        optimize_insn_for_speed_p ()) == 0)
5689         {
5690           tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5691                                    STORE_FLAG_VALUE, target_mode);
5692           if (tem != 0)
5693             tem = expand_binop (target_mode, add_optab, tem,
5694                                 GEN_INT (normalizep), target, 0, OPTAB_WIDEN);
5695         }
5696       else if (!want_add
5697                && rtx_cost (trueval, XOR,
5698                             optimize_insn_for_speed_p ()) == 0)
5699         {
5700           tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5701                                    normalizep, target_mode);
5702           if (tem != 0)
5703             tem = expand_binop (target_mode, xor_optab, tem, trueval, target,
5704                                 INTVAL (trueval) >= 0, OPTAB_WIDEN);
5705         }
5706
5707       if (tem != 0)
5708         return tem;
5709       delete_insns_since (last);
5710     }
5711
5712   /* Some other cases we can do are EQ, NE, LE, and GT comparisons with
5713      the constant zero.  Reject all other comparisons at this point.  Only
5714      do LE and GT if branches are expensive since they are expensive on
5715      2-operand machines.  */
5716
5717   if (op1 != const0_rtx
5718       || (code != EQ && code != NE
5719           && (BRANCH_COST (optimize_insn_for_speed_p (),
5720                            false) <= 1 || (code != LE && code != GT))))
5721     return 0;
5722
5723   /* Try to put the result of the comparison in the sign bit.  Assume we can't
5724      do the necessary operation below.  */
5725
5726   tem = 0;
5727
5728   /* To see if A <= 0, compute (A | (A - 1)).  A <= 0 iff that result has
5729      the sign bit set.  */
5730
5731   if (code == LE)
5732     {
5733       /* This is destructive, so SUBTARGET can't be OP0.  */
5734       if (rtx_equal_p (subtarget, op0))
5735         subtarget = 0;
5736
5737       tem = expand_binop (mode, sub_optab, op0, const1_rtx, subtarget, 0,
5738                           OPTAB_WIDEN);
5739       if (tem)
5740         tem = expand_binop (mode, ior_optab, op0, tem, subtarget, 0,
5741                             OPTAB_WIDEN);
5742     }
5743
5744   /* To see if A > 0, compute (((signed) A) << BITS) - A, where BITS is the
5745      number of bits in the mode of OP0, minus one.  */
5746
5747   if (code == GT)
5748     {
5749       if (rtx_equal_p (subtarget, op0))
5750         subtarget = 0;
5751
5752       tem = expand_shift (RSHIFT_EXPR, mode, op0,
5753                           size_int (GET_MODE_BITSIZE (mode) - 1),
5754                           subtarget, 0);
5755       tem = expand_binop (mode, sub_optab, tem, op0, subtarget, 0,
5756                           OPTAB_WIDEN);
5757     }
5758
5759   if (code == EQ || code == NE)
5760     {
5761       /* For EQ or NE, one way to do the comparison is to apply an operation
5762          that converts the operand into a positive number if it is nonzero
5763          or zero if it was originally zero.  Then, for EQ, we subtract 1 and
5764          for NE we negate.  This puts the result in the sign bit.  Then we
5765          normalize with a shift, if needed.
5766
5767          Two operations that can do the above actions are ABS and FFS, so try
5768          them.  If that doesn't work, and MODE is smaller than a full word,
5769          we can use zero-extension to the wider mode (an unsigned conversion)
5770          as the operation.  */
5771
5772       /* Note that ABS doesn't yield a positive number for INT_MIN, but
5773          that is compensated by the subsequent overflow when subtracting
5774          one / negating.  */
5775
5776       if (optab_handler (abs_optab, mode)->insn_code != CODE_FOR_nothing)
5777         tem = expand_unop (mode, abs_optab, op0, subtarget, 1);
5778       else if (optab_handler (ffs_optab, mode)->insn_code != CODE_FOR_nothing)
5779         tem = expand_unop (mode, ffs_optab, op0, subtarget, 1);
5780       else if (GET_MODE_SIZE (mode) < UNITS_PER_WORD)
5781         {
5782           tem = convert_modes (word_mode, mode, op0, 1);
5783           mode = word_mode;
5784         }
5785
5786       if (tem != 0)
5787         {
5788           if (code == EQ)
5789             tem = expand_binop (mode, sub_optab, tem, const1_rtx, subtarget,
5790                                 0, OPTAB_WIDEN);
5791           else
5792             tem = expand_unop (mode, neg_optab, tem, subtarget, 0);
5793         }
5794
5795       /* If we couldn't do it that way, for NE we can "or" the two's complement
5796          of the value with itself.  For EQ, we take the one's complement of
5797          that "or", which is an extra insn, so we only handle EQ if branches
5798          are expensive.  */
5799
5800       if (tem == 0
5801           && (code == NE
5802               || BRANCH_COST (optimize_insn_for_speed_p (),
5803                               false) > 1))
5804         {
5805           if (rtx_equal_p (subtarget, op0))
5806             subtarget = 0;
5807
5808           tem = expand_unop (mode, neg_optab, op0, subtarget, 0);
5809           tem = expand_binop (mode, ior_optab, tem, op0, subtarget, 0,
5810                               OPTAB_WIDEN);
5811
5812           if (tem && code == EQ)
5813             tem = expand_unop (mode, one_cmpl_optab, tem, subtarget, 0);
5814         }
5815     }
5816
5817   if (tem && normalizep)
5818     tem = expand_shift (RSHIFT_EXPR, mode, tem,
5819                         size_int (GET_MODE_BITSIZE (mode) - 1),
5820                         subtarget, normalizep == 1);
5821
5822   if (tem)
5823     {
5824       if (!target)
5825         ;
5826       else if (GET_MODE (tem) != target_mode)
5827         {
5828           convert_move (target, tem, 0);
5829           tem = target;
5830         }
5831       else if (!subtarget)
5832         {
5833           emit_move_insn (target, tem);
5834           tem = target;
5835         }
5836     }
5837   else
5838     delete_insns_since (last);
5839
5840   return tem;
5841 }
5842
5843 /* Like emit_store_flag, but always succeeds.  */
5844
5845 rtx
5846 emit_store_flag_force (rtx target, enum rtx_code code, rtx op0, rtx op1,
5847                        enum machine_mode mode, int unsignedp, int normalizep)
5848 {
5849   rtx tem, label;
5850   rtx trueval, falseval;
5851
5852   /* First see if emit_store_flag can do the job.  */
5853   tem = emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep);
5854   if (tem != 0)
5855     return tem;
5856
5857   if (!target)
5858     target = gen_reg_rtx (word_mode);
5859
5860   /* If this failed, we have to do this with set/compare/jump/set code.
5861      For foo != 0, if foo is in OP0, just replace it with 1 if nonzero.  */
5862   trueval = normalizep ? GEN_INT (normalizep) : const1_rtx;
5863   if (code == NE
5864       && GET_MODE_CLASS (mode) == MODE_INT
5865       && REG_P (target)
5866       && op0 == target
5867       && op1 == const0_rtx)
5868     {
5869       label = gen_label_rtx ();
5870       do_compare_rtx_and_jump (target, const0_rtx, EQ, unsignedp,
5871                                mode, NULL_RTX, NULL_RTX, label, -1);
5872       emit_move_insn (target, trueval);
5873       emit_label (label);
5874       return target;
5875     }
5876
5877   if (!REG_P (target)
5878       || reg_mentioned_p (target, op0) || reg_mentioned_p (target, op1))
5879     target = gen_reg_rtx (GET_MODE (target));
5880
5881   /* Jump in the right direction if the target cannot implement CODE
5882      but can jump on its reverse condition.  */
5883   falseval = const0_rtx;
5884   if (! can_compare_p (code, mode, ccp_jump)
5885       && (! FLOAT_MODE_P (mode)
5886           || code == ORDERED || code == UNORDERED
5887           || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
5888           || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
5889     {
5890       enum rtx_code rcode;
5891       if (FLOAT_MODE_P (mode))
5892         rcode = reverse_condition_maybe_unordered (code);
5893       else
5894         rcode = reverse_condition (code);
5895
5896       /* Canonicalize to UNORDERED for the libcall.  */
5897       if (can_compare_p (rcode, mode, ccp_jump)
5898           || (code == ORDERED && ! can_compare_p (ORDERED, mode, ccp_jump)))
5899         {
5900           falseval = trueval;
5901           trueval = const0_rtx;
5902           code = rcode;
5903         }
5904     }
5905
5906   emit_move_insn (target, trueval);
5907   label = gen_label_rtx ();
5908   do_compare_rtx_and_jump (op0, op1, code, unsignedp, mode, NULL_RTX,
5909                            NULL_RTX, label, -1);
5910
5911   emit_move_insn (target, falseval);
5912   emit_label (label);
5913
5914   return target;
5915 }
5916 \f
5917 /* Perform possibly multi-word comparison and conditional jump to LABEL
5918    if ARG1 OP ARG2 true where ARG1 and ARG2 are of mode MODE.  This is
5919    now a thin wrapper around do_compare_rtx_and_jump.  */
5920
5921 static void
5922 do_cmp_and_jump (rtx arg1, rtx arg2, enum rtx_code op, enum machine_mode mode,
5923                  rtx label)
5924 {
5925   int unsignedp = (op == LTU || op == LEU || op == GTU || op == GEU);
5926   do_compare_rtx_and_jump (arg1, arg2, op, unsignedp, mode,
5927                            NULL_RTX, NULL_RTX, label, -1);
5928 }